From bdbc90fa55af632f8a883a3d93c54a08708ed80a Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 28 Feb 2018 20:31:52 +0800 Subject: [PATCH 01/50] f2fs: don't put dentry page in pagecache into highmem Previous dentry page uses highmem, which will cause panic in platforms using highmem (such as arm), since the address space of dentry pages from highmem directly goes into the decryption path via the function fscrypt_fname_disk_to_usr. But sg_init_one assumes the address is not from highmem, and then cause panic since it doesn't call kmap_high but kunmap_high is triggered at the end. To fix this problem in a simple way, this patch avoids to put dentry page in pagecache into highmem. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu [Jaegeuk Kim: fix coding style] Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 23 +++++------------------ fs/f2fs/f2fs.h | 6 ------ fs/f2fs/inline.c | 3 +-- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 32 ++++++++------------------------ fs/f2fs/recovery.c | 11 +++++------ include/linux/f2fs_fs.h | 1 - 7 files changed, 20 insertions(+), 58 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f00b5ed8c011..797eb05cb538 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -94,14 +94,12 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; - dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; - else - kunmap(dentry_page); return de; } @@ -287,7 +285,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, de = f2fs_find_entry(dir, qstr, page); if (de) { res = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, *page); f2fs_put_page(*page, 0); } @@ -302,7 +299,6 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type, true); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode->i_mode); - f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = current_time(dir); @@ -350,13 +346,11 @@ static int make_empty_dir(struct inode *inode, if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap_atomic(dentry_page); + dentry_blk = page_address(dentry_page); make_dentry_ptr_block(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); - kunmap_atomic(dentry_blk); - set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); return 0; @@ -547,13 +541,12 @@ start: if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap(dentry_page); + dentry_blk = page_address(dentry_page); bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } @@ -588,7 +581,6 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; @@ -642,7 +634,6 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, F2FS_I(dir)->task = NULL; } if (de) { - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); err = -EEXIST; } else if (IS_ERR(page)) { @@ -730,7 +721,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); - kunmap(page); /* kunmap - pair of f2fs_find_entry */ set_page_dirty(page); dir->i_ctime = dir->i_mtime = current_time(dir); @@ -775,7 +765,7 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); + dentry_blk = page_address(dentry_page); if (bidx == 0) bit_pos = 2; else @@ -783,7 +773,6 @@ bool f2fs_empty_dir(struct inode *dir) bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); - kunmap_atomic(dentry_blk); f2fs_put_page(dentry_page, 1); @@ -901,19 +890,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } } - dentry_blk = kmap(dentry_page); + dentry_blk = page_address(dentry_page); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); if (err) { - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); break; } - kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } out_free: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6300ac5bcbe4..8b652e000326 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2399,12 +2399,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DENTRY); } -static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) -{ - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); -} - static inline int is_file(struct inode *inode, int type) { return F2FS_I(inode)->i_advise & type; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 90e38d8ea688..3b77d6421218 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -369,7 +369,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, f2fs_wait_on_page_writeback(page, DATA, true); zero_user_segment(page, MAX_INLINE_DATA(dir), PAGE_SIZE); - dentry_blk = kmap_atomic(page); + dentry_blk = page_address(page); make_dentry_ptr_inline(dir, &src, inline_dentry); make_dentry_ptr_block(dir, &dst, dentry_blk); @@ -386,7 +386,6 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); - kunmap_atomic(dentry_blk); if (!PageUptodate(page)) SetPageUptodate(page); set_page_dirty(page); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 205add3d0f3a..4efc815129b1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -328,7 +328,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + inode_nohighmem(inode); } else if (S_ISLNK(inode->i_mode)) { if (f2fs_encrypted_inode(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b68e7b03959f..2ebf3d045dd2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -317,7 +317,6 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) de = f2fs_find_entry(dir, &dot, &page); if (de) { - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } else if (IS_ERR(page)) { err = PTR_ERR(page); @@ -329,14 +328,12 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) } de = f2fs_find_entry(dir, &dotdot, &page); - if (de) { - f2fs_dentry_kunmap(dir, page); + if (de) f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { + else if (IS_ERR(page)) err = PTR_ERR(page); - } else { + else err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); - } out: if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); @@ -377,7 +374,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } ino = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -452,7 +448,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } @@ -579,7 +574,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); + inode_nohighmem(inode); set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); @@ -893,13 +888,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (old_dir_entry) { - if (old_dir != new_dir && !whiteout) { + if (old_dir != new_dir && !whiteout) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - } else { - f2fs_dentry_kunmap(old_inode, old_dir_page); + else f2fs_put_page(old_dir_page, 0); - } f2fs_i_links_write(old_dir, false); } add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); @@ -912,20 +905,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (new_page) { - f2fs_dentry_kunmap(new_dir, new_page); + if (new_page) f2fs_put_page(new_page, 0); - } out_whiteout: if (whiteout) iput(whiteout); out_dir: - if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page); + if (old_dir_entry) f2fs_put_page(old_dir_page, 0); - } out_old: - f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -1067,19 +1055,15 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; out_new_dir: if (new_dir_entry) { - f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 337f3363f48f..c5e5c45130b5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -144,7 +144,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, retry: de = __f2fs_find_entry(dir, &fname, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) - goto out_unmap_put; + goto out_put; if (de) { einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino)); @@ -153,19 +153,19 @@ retry: err = PTR_ERR(einode); if (err == -ENOENT) err = -EEXIST; - goto out_unmap_put; + goto out_put; } err = dquot_initialize(einode); if (err) { iput(einode); - goto out_unmap_put; + goto out_put; } err = acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); - goto out_unmap_put; + goto out_put; } f2fs_delete_entry(de, page, dir, einode); iput(einode); @@ -180,8 +180,7 @@ retry: goto retry; goto out; -out_unmap_put: - f2fs_dentry_kunmap(dir, page); +out_put: f2fs_put_page(page, 0); out: if (file_enc_name(inode)) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 58aecb60ea51..393b880afc9a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -46,7 +46,6 @@ /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) -#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum From 0964fc1a82a310c2effb6722446b9cc86db55aa3 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 29 Jan 2018 19:13:15 +0800 Subject: [PATCH 02/50] f2fs: fix potential corruption in area before F2FS_SUPER_OFFSET sb_getblk does not guarantee the buffer head is uptodate. If bh is not uptodate, the data (may be used as boot code) in area before F2FS_SUPER_OFFSET may get corrupted when super block is committed. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8173ae688814..3db1f02ed8c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1894,7 +1894,6 @@ static int __f2fs_commit_super(struct buffer_head *bh, lock_buffer(bh); if (super) memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -2334,7 +2333,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); + bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2345,7 +2344,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; /* write current valid superblock */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block); + bh = sb_bread(sbi->sb, sbi->valid_super_block); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); From b94929d975c8423defc9aededb0f499ff936b509 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 29 Jan 2018 11:37:45 +0800 Subject: [PATCH 03/50] f2fs: fix heap mode to reset it back Commit 7a20b8a61eff81bdb7097a578752a74860e9d142 ("f2fs: allocate node and hot data in the beginning of partition") introduces another mount option, heap, to reset it back. But it does not do anything for heap mode, so fix it. Cc: stable@vger.kernel.org Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 +++-- fs/f2fs/segment.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index aa720cc44509..b9d93fd532a9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -191,8 +191,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first */ - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + /* let's select beginning hot/small space first in no_heap mode*/ + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b16a8e6625aa..205b0d934c44 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2164,7 +2164,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) From 4d817ae099883b08fddd46dee3a916bbf93e9161 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 Jan 2018 17:29:48 +0800 Subject: [PATCH 04/50] f2fs: restrict inline_xattr_size configuration This patch limits to enable inline_xattr_size mount option only if both extra_attr and flexible_inline_xattr feature is on in current image. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3db1f02ed8c2..9587ca0bcd3e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -699,6 +699,13 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!f2fs_sb_has_extra_attr(sb) || + !f2fs_sb_has_flexible_inline_xattr(sb)) { + f2fs_msg(sb, KERN_ERR, + "extra_attr or flexible_inline_xattr " + "feature is off"); + return -EINVAL; + } if (!test_opt(sbi, INLINE_XATTR)) { f2fs_msg(sb, KERN_ERR, "inline_xattr_size option should be " From bf617f7a92edc6bb2909db2bfa4576f50b280ee5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 Jan 2018 17:29:49 +0800 Subject: [PATCH 05/50] f2fs: fix to check extent cache in f2fs_drop_extent_tree If noextent_cache mount option is on, we will never initialize extent tree in inode, but still we're going to access it in f2fs_drop_extent_tree, result in kernel panic as below: BUG: unable to handle kernel NULL pointer dereference at 0000000000000038 IP: _raw_write_lock+0xc/0x30 Call Trace: ? f2fs_drop_extent_tree+0x41/0x70 [f2fs] f2fs_fallocate+0x5a0/0xdd0 [f2fs] ? common_file_perm+0x47/0xc0 ? apparmor_file_permission+0x1a/0x20 vfs_fallocate+0x15b/0x290 SyS_fallocate+0x44/0x70 do_syscall_64+0x6e/0x160 entry_SYSCALL64_slow_path+0x25/0x25 This patch fixes to check extent cache status before using in f2fs_drop_extent_tree. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index ff2352a0ed15..aff6c2ed1c02 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -706,6 +706,9 @@ void f2fs_drop_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + if (!f2fs_may_extent_tree(inode)) + return; + set_inode_flag(inode, FI_NO_EXTENT); write_lock(&et->lock); From 199bc3fef29cacf672e7e5cd49d296c1fdc1a891 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 19:40:08 +0800 Subject: [PATCH 06/50] f2fs: support large nat bitmap Previously, we will store all nat version bitmap in checkpoint pack block, so our total node entry number has a limitation which caused total node number can not exceed (3900 * 8) block * 455 node/block = 14196000. So that once user wants to create more nodes in large size image, it becomes a bottleneck, that's unreasonable. This patch detects the new layout of nat/sit version bitmap in image in order to enable supporting large nat bitmap. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++++++ include/linux/f2fs_fs.h | 1 + 2 files changed, 7 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8b652e000326..3f4dafecd910 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1762,6 +1762,12 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; + if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { + offset = (flag == SIT_BITMAP) ? + le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; + } + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 393b880afc9a..96c9bdbace50 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 From cd36d7a17f9da68be9aa67185ba3ad7969934a19 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 31 Jan 2018 09:30:34 +0800 Subject: [PATCH 07/50] f2fs: fix to clear CP_TRIMMED_FLAG Once CP_TRIMMED_FLAG is set, after a reboot, we will never issue discard before LBA becomes invalid again, fix it by clearing the flag in checkpoint without CP_TRIMMED reason. Fixes: 1f43e2ad7bff ("f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 512dca8abc7d..8b0945ba284d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1136,6 +1136,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_TRIMMED) __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG); if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); From 0cdd31953967b25c216ddcb630ec9a6bb8a91371 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Wed, 31 Jan 2018 11:36:57 +0900 Subject: [PATCH 08/50] f2fs: support passing down write hints given by users to block layer Add the 'whint_mode' mount option that controls which write hints are passed down to block layer. There are "off" and "user-based" mode. The default mode is "off". 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. 2) whint_mode=user-based. F2FS tries to pass down hints given by users. User F2FS Block ---- ---- ----- META WRITE_LIFE_NOT_SET HOT_NODE " WARM_NODE " COLD_NODE " ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME extension list " " -- buffered io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " " WRITE_LIFE_MEDIUM " " WRITE_LIFE_LONG " " -- direct io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " WRITE_LIFE_NONE WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM WRITE_LIFE_LONG " WRITE_LIFE_LONG Many thanks to Chao Yu and Jaegeuk Kim for comments to implement this patch. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu [Jaegeuk Kim: avoid build warning] [Chao Yu: fix to restore whint_mode in ->remount_fs] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 26 +++++++++++++++++---- fs/f2fs/f2fs.h | 9 ++++++++ fs/f2fs/segment.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/super.c | 31 ++++++++++++++++++++++++- 4 files changed, 119 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7578ed1a85e0..0dae8561addb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -175,15 +175,22 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, */ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, struct writeback_control *wbc, - int npages, bool is_read) + int npages, bool is_read, + enum page_type type, enum temp_type temp) { struct bio *bio; bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); - bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = is_read ? NULL : sbi; + if (is_read) { + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = NULL; + } else { + bio->bi_end_io = f2fs_write_end_io; + bio->bi_private = sbi; + bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp); + } if (wbc) wbc_init_bio(wbc, bio); @@ -382,7 +389,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) /* Allocate a new bio */ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, - 1, is_read_io(fio->op)); + 1, is_read_io(fio->op), fio->type, fio->temp); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -445,7 +452,8 @@ alloc_new: goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, - BIO_MAX_PAGES, false); + BIO_MAX_PAGES, false, + fio->type, fio->temp); io->fio = *fio; } @@ -2287,10 +2295,13 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; int rw = iov_iter_rw(iter); int err; + enum rw_hint hint = iocb->ki_hint; + int whint_mode = sbi->whint_mode; err = check_direct_IO(inode, iter, offset); if (err) @@ -2301,11 +2312,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) trace_f2fs_direct_IO_enter(inode, offset, count, rw); + if (rw == WRITE && whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; + down_read(&F2FS_I(inode)->dio_rwsem[rw]); err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f4dafecd910..6c4236d348c6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1037,6 +1037,11 @@ enum { MAX_TIME, }; +enum { + WHINT_MODE_OFF, /* not pass down write hints */ + WHINT_MODE_USER, /* try to pass down hints given by users */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1220,6 +1225,8 @@ struct f2fs_sb_info { char *s_qf_names[MAXQUOTAS]; int s_jquota_fmt; /* Format of quota to use */ #endif + /* For which write hints are passed down to block layer */ + int whint_mode; }; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -2768,6 +2775,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi); int __init create_segment_manager_caches(void); void destroy_segment_manager_caches(void); int rw_hint_to_seg_type(enum rw_hint hint); +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, + enum temp_type temp); /* * checkpoint.c diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 205b0d934c44..c14d5a12b908 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2456,6 +2456,62 @@ int rw_hint_to_seg_type(enum rw_hint hint) } } +/* This returns write hints for each segment type. This hints will be + * passed down to block layer. There are mapping tables which depend on + * the mount option 'whint_mode'. + * + * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + * + * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NOT_SET + * HOT_NODE " + * WARM_NODE " + * COLD_NODE " + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + * + */ + +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + if (sbi->whint_mode == WHINT_MODE_USER) { + if (type == DATA) { + switch (temp) { + case COLD: + return WRITE_LIFE_EXTREME; + case HOT: + return WRITE_LIFE_SHORT; + default: + return WRITE_LIFE_NOT_SET; + } + } else { + return WRITE_LIFE_NOT_SET; + } + } else { + return WRITE_LIFE_NOT_SET; + } +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2643,6 +2699,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = META, + .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, @@ -2691,6 +2748,8 @@ int rewrite_data_page(struct f2fs_io_info *fio) int err; fio->new_blkaddr = fio->old_blkaddr; + /* i/o temperature is needed for passing down write hints */ + __get_segment_type(fio); stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9587ca0bcd3e..28b9d605d20f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -129,6 +129,7 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, + Opt_whint, Opt_err, }; @@ -182,6 +183,7 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, + {Opt_whint, "whint_mode=%s"}, {Opt_err, NULL}, }; @@ -679,6 +681,22 @@ static int parse_options(struct super_block *sb, char *options) "quota operations not supported"); break; #endif + case Opt_whint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 10 && + !strncmp(name, "user-based", 10)) { + sbi->whint_mode = WHINT_MODE_USER; + } else if (strlen(name) == 3 && + !strncmp(name, "off", 3)) { + sbi->whint_mode = WHINT_MODE_OFF; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -722,6 +740,12 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } } + + /* Not pass down write hints if the number of active logs is lesser + * than NR_CURSEG_TYPE. + */ + if (sbi->active_logs != NR_CURSEG_TYPE) + sbi->whint_mode = WHINT_MODE_OFF; return 0; } @@ -1232,6 +1256,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); + if (sbi->whint_mode == WHINT_MODE_USER) + seq_printf(seq, ",whint_mode=%s", "user-based"); return 0; } @@ -1241,6 +1267,7 @@ static void default_options(struct f2fs_sb_info *sbi) /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + sbi->whint_mode = WHINT_MODE_OFF; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1281,6 +1308,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + int old_whint_mode = sbi->whint_mode; #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info ffi = sbi->fault_info; #endif @@ -1380,7 +1408,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY) { + if (*flags & SB_RDONLY || sbi->whint_mode != old_whint_mode) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1430,6 +1458,7 @@ restore_opts: sbi->s_qf_names[i] = s_qf_names[i]; } #endif + sbi->whint_mode = old_whint_mode; sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; From f2e703f9a3caa91e4f3f0a8f5d3fe92dbea05623 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Wed, 31 Jan 2018 11:36:58 +0900 Subject: [PATCH 09/50] f2fs: support passing down write hints to block layer with F2FS policy Add 'whint_mode=fs-based' mount option. In this mode, F2FS passes down write hints with its policy. * whint_mode=fs-based. F2FS passes down hints with its policy. User F2FS Block ---- ---- ----- META WRITE_LIFE_MEDIUM; HOT_NODE WRITE_LIFE_NOT_SET WARM_NODE " COLD_NODE WRITE_LIFE_NONE ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME extension list " " -- buffered io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG WRITE_LIFE_NONE " " WRITE_LIFE_MEDIUM " " WRITE_LIFE_LONG " " -- direct io WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET WRITE_LIFE_NONE " WRITE_LIFE_NONE WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM WRITE_LIFE_LONG " WRITE_LIFE_LONG Many thanks to Chao Yu and Jaegeuk Kim for comments to implement this patch. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 57 +++++++++++++++++++++++++++++++++++++++-------- fs/f2fs/super.c | 5 +++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6c4236d348c6..d57b7dd8f1af 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1040,6 +1040,7 @@ enum { enum { WHINT_MODE_OFF, /* not pass down write hints */ WHINT_MODE_USER, /* try to pass down hints given by users */ + WHINT_MODE_FS, /* pass down hints with F2FS policy */ }; struct f2fs_sb_info { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c14d5a12b908..427b75ed1e29 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2489,6 +2489,32 @@ int rw_hint_to_seg_type(enum rw_hint hint) * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM * WRITE_LIFE_LONG " WRITE_LIFE_LONG * + * 3) whint_mode=fs-based. F2FS passes down hints with its policy. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_MEDIUM; + * HOT_NODE WRITE_LIFE_NOT_SET + * WARM_NODE " + * COLD_NODE WRITE_LIFE_NONE + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG */ enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, @@ -2496,20 +2522,33 @@ enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, { if (sbi->whint_mode == WHINT_MODE_USER) { if (type == DATA) { - switch (temp) { - case COLD: - return WRITE_LIFE_EXTREME; - case HOT: - return WRITE_LIFE_SHORT; - default: + if (temp == WARM) return WRITE_LIFE_NOT_SET; - } + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; } else { return WRITE_LIFE_NOT_SET; } - } else { - return WRITE_LIFE_NOT_SET; + } else if (sbi->whint_mode == WHINT_MODE_FS) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_LONG; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else if (type == NODE) { + if (temp == WARM || temp == HOT) + return WRITE_LIFE_NOT_SET; + else if (temp == COLD) + return WRITE_LIFE_NONE; + } else if (type == META) { + return WRITE_LIFE_MEDIUM; + } } + return WRITE_LIFE_NOT_SET; } static int __get_segment_type_2(struct f2fs_io_info *fio) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 28b9d605d20f..2e427a15b37e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -691,6 +691,9 @@ static int parse_options(struct super_block *sb, char *options) } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { sbi->whint_mode = WHINT_MODE_OFF; + } else if (strlen(name) == 8 && + !strncmp(name, "fs-based", 8)) { + sbi->whint_mode = WHINT_MODE_FS; } else { kfree(name); return -EINVAL; @@ -1258,6 +1261,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) f2fs_show_quota_options(seq, sbi->sb); if (sbi->whint_mode == WHINT_MODE_USER) seq_printf(seq, ",whint_mode=%s", "user-based"); + else if (sbi->whint_mode == WHINT_MODE_FS) + seq_printf(seq, ",whint_mode=%s", "fs-based"); return 0; } From 8b3a0ca0fde161c9716d4f0e8d81bf30fc004604 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Wed, 31 Jan 2018 11:36:59 +0900 Subject: [PATCH 10/50] f2fs: Add the 'whint_mode' mount option to f2fs documentation Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu [Jaegeuk Kim: Add the write-hint policy in f2fs doc.] Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 13c2ff034348..0caf7da0a532 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -174,6 +174,12 @@ offgrpjquota Turn off group journelled quota. offprjjquota Turn off project journelled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. +whint_mode=%s Control which write hints are passed down to block + layer. This supports "off", "user-based", and + "fs-based". In "off" mode (default), f2fs does not pass + down hints. In "user-based" mode, f2fs tries to pass + down hints given by users. And in "fs-based" mode, f2fs + passes down hints with its policy. ================================================================================ DEBUGFS ENTRIES @@ -611,3 +617,63 @@ algorithm. In order to identify whether the data in the victim segment are valid or not, F2FS manages a bitmap. Each bit represents the validity of a block, and the bitmap is composed of a bit stream covering whole blocks in main area. + +Write-hint Policy +----------------- + +1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + +2) whint_mode=user-based. F2FS tries to pass down hints given by +users. + +User F2FS Block +---- ---- ----- + META WRITE_LIFE_NOT_SET + HOT_NODE " + WARM_NODE " + COLD_NODE " +*ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME +*extension list " " + +-- buffered io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " " +WRITE_LIFE_MEDIUM " " +WRITE_LIFE_LONG " " + +-- direct io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " WRITE_LIFE_NONE +WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM +WRITE_LIFE_LONG " WRITE_LIFE_LONG + +3) whint_mode=fs-based. F2FS passes down hints with its policy. + +User F2FS Block +---- ---- ----- + META WRITE_LIFE_MEDIUM; + HOT_NODE WRITE_LIFE_NOT_SET + WARM_NODE " + COLD_NODE WRITE_LIFE_NONE +ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME +extension list " " + +-- buffered io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG +WRITE_LIFE_NONE " " +WRITE_LIFE_MEDIUM " " +WRITE_LIFE_LONG " " + +-- direct io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " WRITE_LIFE_NONE +WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM +WRITE_LIFE_LONG " WRITE_LIFE_LONG From 0f9ec2a8f65d72a454e99fb4f7351d7e01070385 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Feb 2018 17:01:48 -0800 Subject: [PATCH 11/50] f2fs: handle quota for orphan inodes This is to detect dquot_initialize errors early from evict_inode for orphan inodes. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8b0945ba284d..e3bf753a47be 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -569,13 +569,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) struct node_info ni; int err = acquire_orphan_inode(sbi); - if (err) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); - return err; - } + if (err) + goto err_out; __add_ino_entry(sbi, ino, 0, ORPHAN_INO); @@ -589,6 +584,11 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return PTR_ERR(inode); } + err = dquot_initialize(inode); + if (err) + goto err_out; + + dquot_initialize(inode); clear_nlink(inode); /* truncate all the data during iput */ @@ -598,14 +598,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x) by kernel, retry mount.", - __func__, ino); - return -EIO; + err = -EIO; + goto err_out; } __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; + +err_out: + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; } int recover_orphan_inodes(struct f2fs_sb_info *sbi) From fb0e72c8b94903be3a7e742ab0a4b53b89e7ee35 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 3 Feb 2018 17:44:39 +0800 Subject: [PATCH 12/50] f2fs: fix to handle looped node chain during recovery There is no checksum in node block now, so bit-transition from hardware can make node_footer.next_blkaddr being corrupted w/o any detection, result in node chain becoming looped one. For this condition, during recovery, in order to avoid running into dead loop, let's detect it and just skip out. Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c5e5c45130b5..1b23d3febe4c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -242,6 +242,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct curseg_info *curseg; struct page *page = NULL; block_t blkaddr; + unsigned int loop_cnt = 0; + unsigned int free_blocks = sbi->user_block_count - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ @@ -294,6 +297,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (IS_INODE(page) && is_dent_dnode(page)) entry->last_dentry = blkaddr; next: + /* sanity check in order to detect looped node chain */ + if (++loop_cnt >= free_blocks || + blkaddr == next_blkaddr_of_node(page)) { + f2fs_msg(sbi->sb, KERN_NOTICE, + "%s: detect looped node chain, " + "blkaddr:%u, next:%u", + __func__, blkaddr, next_blkaddr_of_node(page)); + err = -EINVAL; + break; + } + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); From 3bb09a0e7e807e2cb7a97e35d5075d0d82bd4085 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 6 Feb 2018 08:21:45 +0800 Subject: [PATCH 13/50] f2fs: remove redundant check of page type when submit bio This patch removes redundant check of page type when submit bio to make the logic more clear. Signed-off-by: Tiezhu Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0dae8561addb..1c5b050ef900 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -203,13 +203,12 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (!is_read_io(bio_op(bio))) { unsigned int start; - if (f2fs_sb_mounted_blkzoned(sbi->sb) && - current->plug && (type == DATA || type == NODE)) - blk_finish_plug(current->plug); - if (type != DATA && type != NODE) goto submit_io; + if (f2fs_sb_mounted_blkzoned(sbi->sb) && current->plug) + blk_finish_plug(current->plug); + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; start %= F2FS_IO_SIZE(sbi); From ccd31cb28ff2113a2de96ebf19a02b3c920b9fd1 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 6 Feb 2018 12:31:17 +0800 Subject: [PATCH 14/50] f2fs: clean up f2fs_sb_has_xxx functions This patch introduces F2FS_FEATURE_FUNCS to clean up the definitions of different f2fs_sb_has_xxx functions. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 51 ++++++++++++----------------------------------- fs/f2fs/file.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 14 ++++++------- fs/f2fs/sysfs.c | 4 ++-- 6 files changed, 28 insertions(+), 53 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1c5b050ef900..6c3c9784de06 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -206,7 +206,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (type != DATA && type != NODE) goto submit_io; - if (f2fs_sb_mounted_blkzoned(sbi->sb) && current->plug) + if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) blk_finish_plug(current->plug); start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d57b7dd8f1af..9f3841be992f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3182,45 +3182,20 @@ static inline bool f2fs_bio_encrypted(struct bio *bio) return bio->bi_private != NULL; } -static inline int f2fs_sb_has_crypto(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); +#define F2FS_FEATURE_FUNCS(name, flagname) \ +static inline int f2fs_sb_has_##name(struct super_block *sb) \ +{ \ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ } -static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); -} - -static inline int f2fs_sb_has_extra_attr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); -} - -static inline int f2fs_sb_has_project_quota(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); -} - -static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); -} - -static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); -} - -static inline int f2fs_sb_has_quota_ino(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); -} - -static inline int f2fs_sb_has_inode_crtime(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME); -} +F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); +F2FS_FEATURE_FUNCS(blkzoned, BLKZONED); +F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR); +F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA); +F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); +F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); +F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, @@ -3240,7 +3215,7 @@ static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) { struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); - return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); + return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 672a542e5464..29956e16c58c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1938,7 +1938,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -1948,7 +1948,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1959,7 +1959,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 427b75ed1e29..0646d5de8cd0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1485,7 +1485,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi->sb) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1683,7 +1683,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_mounted_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi->sb) || (force && len < cpc->trim_minlen)) goto skip; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2e427a15b37e..ff6da6e05ec9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -405,14 +405,14 @@ static int parse_options(struct super_block *sb, char *options) q = bdev_get_queue(sb->s_bdev); if (blk_queue_discard(q)) { set_opt(sbi, DISCARD); - } else if (!f2fs_sb_mounted_blkzoned(sb)) { + } else if (!f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); } break; case Opt_nodiscard: - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -561,7 +561,7 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); @@ -1282,7 +1282,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_mounted_blkzoned(sbi->sb)) { + if (f2fs_sb_has_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); set_opt(sbi, DISCARD); } else { @@ -2246,7 +2246,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_mounted_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi->sb)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2453,7 +2453,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_mounted_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi->sb)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -2548,7 +2548,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d978c7b6ea04..374ee5c82f94 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -92,10 +92,10 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_crypto(sb)) + if (f2fs_sb_has_encrypt(sb)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_mounted_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sb)) From 46706d5917f4457a6befe7a39a15c89dbb1ce9ca Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sat, 10 Feb 2018 12:12:51 +0800 Subject: [PATCH 15/50] f2fs: flush cp pack except cp pack 2 page at first Previously, we attempt to flush the whole cp pack in a single bio, however, when suddenly powering off at this time, we could get into an extreme scenario that cp pack 1 page and cp pack 2 page are updated and latest, but payload or current summaries are still partially outdated. (see reliable write in the UFS specification) This patch submits the whole cp pack except cp pack 2 page at first, and then writes the cp pack 2 page with an extra independent bio with pre-io barrier. Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 69 +++++++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e3bf753a47be..5b2db75ecd54 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1168,6 +1168,39 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static void commit_checkpoint(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) +{ + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + /* + * pagevec_lookup_tag and lock_page again will take + * some extra time. Therefore, update_meta_pages and + * sync_meta_pages are combined in this function. + */ + struct page *page = grab_meta_page(sbi, blk_addr); + int err; + + memcpy(page_address(page), src, PAGE_SIZE); + set_page_dirty(page); + + f2fs_wait_on_page_writeback(page, META, true); + f2fs_bug_on(sbi, PageWriteback(page)); + if (unlikely(!clear_page_dirty_for_io(page))) + f2fs_bug_on(sbi, 1); + + /* writeout cp pack 2 page */ + err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); + f2fs_bug_on(sbi, err); + + f2fs_put_page(page, 0); + + /* submit checkpoint (with barrier if NOBARRIER is not set) */ + f2fs_submit_merged_write(sbi, META_FLUSH); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1270,16 +1303,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } } - /* need to wait for end_io results */ - wait_on_all_pages_writeback(sbi); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - - /* flush all device cache */ - err = f2fs_flush_device_cache(sbi); - if (err) - return err; - /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); @@ -1307,26 +1330,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* writeout checkpoint block */ - update_meta_page(sbi, ckpt, start_blk); + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); - /* wait for previous submitted node/meta pages writeback */ + /* Here, we have one bio having CP pack except cp pack 2 page */ + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + + /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) return -EIO; - filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); - filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - - /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); - - /* wait for previous submitted meta pages writeback */ + /* barrier and flush checkpoint cp pack 2 page if it can */ + commit_checkpoint(sbi, ckpt, start_blk); wait_on_all_pages_writeback(sbi); release_ino_entry(sbi, false); From 8fe326cb996f89bac50cee2fdb4febfcca6eb297 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Feb 2018 18:13:40 +0000 Subject: [PATCH 16/50] f2fs: remove redundant initialization of pointer 'p' Pointer p is initialized with a value that is never read and is later re-assigned a new value, hence the initialization is redundant and can be removed. Cleans up clang warning: fs/f2fs/extent_cache.c:463:19: warning: Value stored to 'p' during its initialization is never read Signed-off-by: Colin Ian King Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index aff6c2ed1c02..d5a861bf2b42 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -460,7 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, struct rb_node *insert_parent) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct rb_node **p = &et->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; From d0d3f1b329b01a9e16609eeadcc6d4fae0afc7c8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 11 Feb 2018 22:53:20 +0800 Subject: [PATCH 17/50] f2fs: introduce sb_lock to make encrypt pwsalt update exclusive f2fs_super_block.encrypt_pw_salt can be udpated and persisted concurrently, result in getting different pwsalt in separated threads, so let's introduce sb_lock to exclude concurrent accessers. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 19 +++++++++++-------- fs/f2fs/super.c | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9f3841be992f..6d0e7b185b39 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1047,6 +1047,7 @@ struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + struct mutex sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 29956e16c58c..c355e4435b08 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1962,13 +1962,15 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + mutex_lock(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1976,15 +1978,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + mutex_unlock(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ff6da6e05ec9..d2833a232528 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2221,6 +2221,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); + + mutex_init(&sbi->sb_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) From 17cd07ae95073c298af92c1ba14ac58ce84de33b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 25 Feb 2018 23:38:21 +0800 Subject: [PATCH 18/50] f2fs: fix to set KEEP_SIZE bit in f2fs_zero_range As Jayashree Mohan reported: A simple workload to reproduce this would be : 1. create foo 2. Write (8K - 16K) // foo size = 16K now 3. fsync() 4. falloc zero_range , keep_size (4202496 - 4210688) // foo size must be 16K 5. fdatasync() Crash now On recovery, we see that the file size is 4210688 and not 16K, which violates the semantics of keep_size flag. We have a test case to reproduce this using CrashMonkey on 4.15 kernel. Try this out by simply running : ./c_harness -f /dev/sda -d /dev/cow_ram0 -t f2fs -e 102400 -P -v tests/generic_468_zero.so The root cause is that we miss to set KEEP_SIZE bit correctly in zero_range when zeroing block cross EOF with FALLOC_FL_KEEP_SIZE, let's fix this missing case. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c355e4435b08..db9dc9efafae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1348,8 +1348,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } out_sem: up_write(&F2FS_I(inode)->i_mmap_sem); From 846ae671ad368e344a2b141c0f19e1014b27a0dd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2018 22:04:13 +0800 Subject: [PATCH 19/50] f2fs: expose extension_list sysfs entry This patch adds a sysfs entry 'extension_list' to support query/add/del item in extension list. Query: cat /sys/fs/f2fs//extension_list Add: echo 'extension' > /sys/fs/f2fs//extension_list Del: echo '!extension' > /sys/fs/f2fs//extension_list Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 ++++++ fs/f2fs/f2fs.h | 4 ++- fs/f2fs/file.c | 4 +-- fs/f2fs/namei.c | 42 +++++++++++++++++++++++-- fs/f2fs/super.c | 2 +- fs/f2fs/sysfs.c | 40 +++++++++++++++++++++++ include/linux/f2fs_fs.h | 3 +- 7 files changed, 96 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index d870b5514d15..7fa84e349ad9 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -192,3 +192,12 @@ Date: November 2017 Contact: "Sheng Yong" Description: Controls readahead inode block in readdir. + +What: /sys/fs/f2fs//extension_list +Date: Feburary 2018 +Contact: "Chao Yu" +Description: + Used to control configure extension list: + - Query: cat /sys/fs/f2fs//extension_list + - Add: echo 'extension' > /sys/fs/f2fs//extension_list + - Del: echo '!extension' > /sys/fs/f2fs//extension_list diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6d0e7b185b39..3bb4e943454a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1047,7 +1047,7 @@ struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - struct mutex sb_lock; /* lock for raw super block */ + struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ @@ -2605,6 +2605,8 @@ void handle_failed_inode(struct inode *inode); /* * namei.c */ +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index db9dc9efafae..c0a80987e88e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1970,7 +1970,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) return err; - mutex_lock(&sbi->sb_lock); + down_write(&sbi->sb_lock); if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) goto got_it; @@ -1989,7 +1989,7 @@ got_it: 16)) err = -EFAULT; out_err: - mutex_unlock(&sbi->sb_lock); + up_write(&sbi->sb_lock); mnt_drop_write_file(filp); return err; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2ebf3d045dd2..2bd6e9f5746a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -171,16 +171,52 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { - int i; - __u8 (*extlist)[8] = sbi->raw_super->extension_list; + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, count; + + down_read(&sbi->sb_lock); + + count = le32_to_cpu(sbi->raw_super->extension_count); - int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { if (is_multimedia_file(name, extlist[i])) { file_set_cold(inode); break; } } + + up_read(&sbi->sb_lock); +} + +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool set) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int count = le32_to_cpu(sbi->raw_super->extension_count); + int i; + + for (i = 0; i < count; i++) { + if (strcmp(name, extlist[i])) + continue; + + if (set) + return -EINVAL; + + memcpy(extlist[i], extlist[i + 1], + F2FS_EXTENSION_LEN * (count - i - 1)); + memset(extlist[count - 1], 0, F2FS_EXTENSION_LEN); + sbi->raw_super->extension_count = cpu_to_le32(count - 1); + return 0; + } + + if (!set) + return -EINVAL; + + if (count == F2FS_MAX_EXTENSION) + return -EINVAL; + + strncpy(extlist[count], name, strlen(name)); + sbi->raw_super->extension_count = cpu_to_le32(count + 1); + return 0; } static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d2833a232528..06da158d4263 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2222,7 +2222,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); - mutex_init(&sbi->sb_lock); + init_rwsem(&sbi->sb_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 374ee5c82f94..d27b28e602a6 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -136,6 +136,18 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + __u8 (*extlist)[F2FS_EXTENSION_LEN] = + sbi->raw_super->extension_list; + int count = le32_to_cpu(sbi->raw_super->extension_count); + int len = 0, i; + + for (i = 0; i < count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + return len; + } + ui = (unsigned int *)(ptr + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); @@ -154,6 +166,32 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + const char *name = strim((char *)buf); + bool set = true; + + if (name[0] == '!') { + name++; + set = false; + } + + if (strlen(name) >= F2FS_EXTENSION_LEN) + return -EINVAL; + + down_write(&sbi->sb_lock); + + ret = update_extension_list(sbi, name, set); + if (ret) + goto out; + + ret = f2fs_commit_super(sbi, false); + if (ret) + update_extension_list(sbi, name, !set); +out: + up_write(&sbi->sb_lock); + return ret ? ret : count; + } + ui = (unsigned int *)(ptr + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); @@ -307,6 +345,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -357,6 +396,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), + ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 96c9bdbace50..d8c241451712 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -21,6 +21,7 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ @@ -101,7 +102,7 @@ struct f2fs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ - __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __u8 extension_list[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];/* extension array */ __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ From 69babac019337b35aae5644ccf8089243c749d4c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Feb 2018 09:19:47 -0800 Subject: [PATCH 20/50] f2fs: don't stop GC if GC is contended Let's do GC as much as possible, while gc_urgent is set. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b9d93fd532a9..bc9420ce2275 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,14 +76,15 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (!mutex_trylock(&sbi->gc_mutex)) - goto next; - if (gc_th->gc_urgent) { wait_ms = gc_th->urgent_sleep_time; + mutex_lock(&sbi->gc_mutex); goto do_gc; } + if (!mutex_trylock(&sbi->gc_mutex)) + goto next; + if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); From 079396270b0f1e98496ef7672dd754bdd42574dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 18 Feb 2018 08:50:49 -0800 Subject: [PATCH 21/50] f2fs: add mount option for segment allocation policy This patch adds an mount option, "alloc_mode=%s" having two options, "default" and "reuse". In "alloc_mode=reuse" case, f2fs starts to allocate segments from 0'th segment all the time to reassign segments. It'd be useful for small-sized eMMC parts. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/segment.c | 5 +++++ fs/f2fs/super.c | 26 ++++++++++++++++++++++++++ 4 files changed, 41 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 0caf7da0a532..0409c47584ea 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -180,6 +180,8 @@ whint_mode=%s Control which write hints are passed down to block down hints. In "user-based" mode, f2fs tries to pass down hints given by users. And in "fs-based" mode, f2fs passes down hints with its policy. +alloc_mode=%s Adjust block allocation policy, which supports "reuse" + and "default". ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3bb4e943454a..833c9bc06d03 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1043,6 +1043,11 @@ enum { WHINT_MODE_FS, /* pass down hints with F2FS policy */ }; +enum { + ALLOC_MODE_DEFAULT, /* stay default */ + ALLOC_MODE_REUSE, /* reuse segments as much as possible */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1229,6 +1234,9 @@ struct f2fs_sb_info { #endif /* For which write hints are passed down to block layer */ int whint_mode; + + /* segment allocation policy */ + int alloc_mode; }; #ifdef CONFIG_F2FS_FAULT_INJECTION diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0646d5de8cd0..5e5e2936a26a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2170,6 +2170,11 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + + /* find segments from 0 to reuse freed segments */ + if (sbi->alloc_mode == ALLOC_MODE_REUSE) + return 0; + return CURSEG_I(sbi, type)->segno; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 06da158d4263..16d157e44184 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -130,6 +130,7 @@ enum { Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_whint, + Opt_alloc, Opt_err, }; @@ -184,6 +185,7 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, {Opt_whint, "whint_mode=%s"}, + {Opt_alloc, "alloc_mode=%s"}, {Opt_err, NULL}, }; @@ -700,6 +702,23 @@ static int parse_options(struct super_block *sb, char *options) } kfree(name); break; + case Opt_alloc: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 7 && + !strncmp(name, "default", 7)) { + sbi->alloc_mode = ALLOC_MODE_DEFAULT; + } else if (strlen(name) == 5 && + !strncmp(name, "reuse", 5)) { + sbi->alloc_mode = ALLOC_MODE_REUSE; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -1264,6 +1283,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (sbi->whint_mode == WHINT_MODE_FS) seq_printf(seq, ",whint_mode=%s", "fs-based"); + if (sbi->alloc_mode == ALLOC_MODE_DEFAULT) + seq_printf(seq, ",alloc_mode=%s", "default"); + else if (sbi->alloc_mode == ALLOC_MODE_REUSE) + seq_printf(seq, ",alloc_mode=%s", "reuse"); return 0; } @@ -1273,6 +1296,7 @@ static void default_options(struct f2fs_sb_info *sbi) sbi->active_logs = NR_CURSEG_TYPE; sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; sbi->whint_mode = WHINT_MODE_OFF; + sbi->alloc_mode = ALLOC_MODE_DEFAULT; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1314,6 +1338,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); int old_whint_mode = sbi->whint_mode; + int old_alloc_mode = sbi->alloc_mode; #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info ffi = sbi->fault_info; #endif @@ -1463,6 +1488,7 @@ restore_opts: sbi->s_qf_names[i] = s_qf_names[i]; } #endif + sbi->alloc_mode = old_alloc_mode; sbi->whint_mode = old_whint_mode; sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; From 84b89e5d943d8d6b997fcebb4609373e1b3e4b03 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Feb 2018 14:09:30 -0800 Subject: [PATCH 22/50] f2fs: add auto tuning for small devices If f2fs is running on top of very small devices, it's worth to avoid abusing free LBAs. In order to achieve that, this patch introduces some parameter tuning. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 ++ fs/f2fs/super.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f11c4bc82c78..dbb774aaf387 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -596,6 +596,8 @@ static inline int utilization(struct f2fs_sb_info *sbi) #define DEF_MIN_FSYNC_BLOCKS 8 #define DEF_MIN_HOT_BLOCKS 16 +#define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ + enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 16d157e44184..9b6c53b69535 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2515,6 +2515,18 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) return 0; } +static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_i = SM_I(sbi); + + /* adjust parameters according to the volume size */ + if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { + sbi->alloc_mode = ALLOC_MODE_REUSE; + sm_i->dcc_info->discard_granularity = 1; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + } +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -2867,6 +2879,8 @@ skip_recovery: f2fs_join_shrinker(sbi); + f2fs_tuning_parameters(sbi); + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); From 782911f491e72857d67e93661580cf32e67df7b5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 25 Feb 2018 01:04:57 -0800 Subject: [PATCH 23/50] f2fs: set readdir_ra by default It gives general readdir improvement. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9b6c53b69535..30b93ad44b9d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1297,6 +1297,7 @@ static void default_options(struct f2fs_sb_info *sbi) sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; sbi->whint_mode = WHINT_MODE_OFF; sbi->alloc_mode = ALLOC_MODE_DEFAULT; + sbi->readdir_ra = 1; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); From dee02f0d62fcb5033fe26f86941b9e3bdc450b74 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Feb 2018 23:30:55 -0800 Subject: [PATCH 24/50] f2fs: issue discard aggressively in the gc_urgent mode This patch avoids to skip discard commands when user sets gc_urgent mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5e5e2936a26a..1e3dd3de4ecc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1411,12 +1411,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) { + if (dcc->discard_wake) dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, - DPOLICY_FORCE, 1); - } + + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1727,7 +1726,7 @@ void init_discard_policy(struct discard_policy *dpolicy, } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; + dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { From b27bc8091ccf22f66ab105d10fa4f5cdeaef05a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Feb 2018 15:40:30 -0800 Subject: [PATCH 25/50] f2fs: do gc in greedy mode for whole range if gc_urgent mode is set Otherwise, f2fs conducts GC on 8GB range only based on slow cost-benefit. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bc9420ce2275..bfb7a4a3a929 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -162,12 +162,17 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - if (gc_th && gc_th->gc_idle) { + if (!gc_th) + return gc_mode; + + if (gc_th->gc_idle) { if (gc_th->gc_idle == 1) gc_mode = GC_CB; else if (gc_th->gc_idle == 2) gc_mode = GC_GREEDY; } + if (gc_th->gc_urgent) + gc_mode = GC_GREEDY; return gc_mode; } @@ -189,7 +194,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, } /* we need to check every dirty segments in the FG_GC case */ - if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) + if (gc_type != FG_GC && + (sbi->gc_thread && !sbi->gc_thread->gc_urgent) && + p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; /* let's select beginning hot/small space first in no_heap mode*/ From 1dc0f8991d4d93194fb7a3ac3164e05e8b24ad02 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 Feb 2018 22:45:24 +0800 Subject: [PATCH 26/50] f2fs: fix to avoid race in between atomic write and background GC Sqlite user Background GC - move_data_block : move page #1 - f2fs_is_atomic_file - f2fs_ioc_start_atomic_write - f2fs_ioc_commit_atomic_write - commit_inmem_pages : commit page #1 & set node #2 dirty - f2fs_submit_page_write - f2fs_update_data_blkaddr - set_page_dirty : set node #2 dirty - f2fs_do_sync_file - fsync_node_pages : commit node #1 & node #2, then sudden power-cut In a race case, we may check FI_ATOMIC_FILE flag before starting atomic write flow, then we will commit meta data before data with reversed order, after a sudden pow-cut, database transaction will be inconsistent. So we'd better to exclude gc/atomic_write to each other by using lock instead of flag checking. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c0a80987e88e..6a202e5a6386 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1715,6 +1715,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_volatile_file(inode)) goto err_out; @@ -1733,6 +1735,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; From b6a06cbbb5f7fd03589cff9178314af04c568826 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Feb 2018 17:07:27 +0800 Subject: [PATCH 27/50] f2fs: support hot file extension This patch supports to recognize hot file extension in f2fs, so that we can allocate proper hot segment location for its data, which can lead to better hot/cold seperation in filesystem. In addition, we changes a bit on query/add/del operation method for extension_list sysfs entry as below: - Query: cat /sys/fs/f2fs//extension_list - Add: echo 'extension' > /sys/fs/f2fs//extension_list - Del: echo '!extension' > /sys/fs/f2fs//extension_list - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list - [h] means add/del hot file extension - [c] means add/del cold file extension Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 +- fs/f2fs/f2fs.h | 6 +- fs/f2fs/namei.c | 77 +++++++++++++++++++------ fs/f2fs/segment.c | 3 +- fs/f2fs/sysfs.c | 30 ++++++++-- include/linux/f2fs_fs.h | 3 +- 6 files changed, 95 insertions(+), 30 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 7fa84e349ad9..540553c933b6 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -199,5 +199,7 @@ Contact: "Chao Yu" Description: Used to control configure extension list: - Query: cat /sys/fs/f2fs//extension_list - - Add: echo 'extension' > /sys/fs/f2fs//extension_list - - Del: echo '!extension' > /sys/fs/f2fs//extension_list + - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list + - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list + - [h] means add/del hot file extension + - [c] means add/del cold file extension diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 833c9bc06d03..f6dc70666ebb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -576,6 +576,7 @@ enum { #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 +#define FADVISE_HOT_BIT 0x20 #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -590,6 +591,9 @@ enum { #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) +#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) +#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) #define DEF_DIR_LEVEL 0 @@ -2614,7 +2618,7 @@ void handle_failed_inode(struct inode *inode); * namei.c */ int update_extension_list(struct f2fs_sb_info *sbi, const char *name, - bool set); + bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2bd6e9f5746a..78ea0156f027 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -142,7 +142,7 @@ fail_drop: return ERR_PTR(err); } -static int is_multimedia_file(const unsigned char *s, const char *sub) +static int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -168,33 +168,59 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int i, count; + int i, cold_count, hot_count; down_read(&sbi->sb_lock); - count = le32_to_cpu(sbi->raw_super->extension_count); + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; - for (i = 0; i < count; i++) { - if (is_multimedia_file(name, extlist[i])) { + for (i = 0; i < cold_count + hot_count; i++) { + if (!is_extension_exist(name, extlist[i])) + continue; + if (i < cold_count) file_set_cold(inode); - break; - } + else + file_set_hot(inode); + break; } up_read(&sbi->sb_lock); } -int update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool set) +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int count = le32_to_cpu(sbi->raw_super->extension_count); + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int total_count = cold_count + hot_count; + int start, count; int i; - for (i = 0; i < count; i++) { + if (set) { + if (total_count == F2FS_MAX_EXTENSION) + return -EINVAL; + } else { + if (!hot && !cold_count) + return -EINVAL; + if (hot && !hot_count) + return -EINVAL; + } + + if (hot) { + start = cold_count; + count = total_count; + } else { + start = 0; + count = cold_count; + } + + for (i = start; i < count; i++) { if (strcmp(name, extlist[i])) continue; @@ -202,20 +228,33 @@ int update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool set) return -EINVAL; memcpy(extlist[i], extlist[i + 1], - F2FS_EXTENSION_LEN * (count - i - 1)); - memset(extlist[count - 1], 0, F2FS_EXTENSION_LEN); - sbi->raw_super->extension_count = cpu_to_le32(count - 1); + F2FS_EXTENSION_LEN * (total_count - i - 1)); + memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); + if (hot) + sbi->raw_super->hot_ext_count = hot_count - 1; + else + sbi->raw_super->extension_count = + cpu_to_le32(cold_count - 1); return 0; } if (!set) return -EINVAL; - if (count == F2FS_MAX_EXTENSION) - return -EINVAL; + if (hot) { + strncpy(extlist[count], name, strlen(name)); + sbi->raw_super->hot_ext_count = hot_count + 1; + } else { + char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; - strncpy(extlist[count], name, strlen(name)); - sbi->raw_super->extension_count = cpu_to_le32(count + 1); + memcpy(buf, &extlist[cold_count], + F2FS_EXTENSION_LEN * hot_count); + memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); + strncpy(extlist[cold_count], name, strlen(name)); + memcpy(&extlist[cold_count + 1], buf, + F2FS_EXTENSION_LEN * hot_count); + sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); + } return 0; } @@ -239,7 +278,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_files(sbi, inode, dentry->d_name.name); + set_file_temperature(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e3dd3de4ecc..f61c77bd673c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2587,7 +2587,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - if (is_inode_flag_set(inode, FI_HOT_DATA)) + if (file_is_hot(inode) || + is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; return rw_hint_to_seg_type(inode->i_write_hint); } else { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d27b28e602a6..23a2d8d66c43 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -139,10 +139,19 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "extension_list")) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int count = le32_to_cpu(sbi->raw_super->extension_count); + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - for (i = 0; i < count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, + "cold file extenstion:\n"); + for (i = 0; i < cold_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + + len += snprintf(buf + len, PAGE_SIZE - len, + "hot file extenstion:\n"); + for (i = cold_count; i < cold_count + hot_count; i++) len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", extlist[i]); return len; @@ -168,9 +177,18 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "extension_list")) { const char *name = strim((char *)buf); - bool set = true; + bool set = true, hot; - if (name[0] == '!') { + if (!strncmp(name, "[h]", 3)) + hot = true; + else if (!strncmp(name, "[c]", 3)) + hot = false; + else + return -EINVAL; + + name += 3; + + if (*name == '!') { name++; set = false; } @@ -180,13 +198,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, down_write(&sbi->sb_lock); - ret = update_extension_list(sbi, name, set); + ret = update_extension_list(sbi, name, hot, set); if (ret) goto out; ret = f2fs_commit_super(sbi, false); if (ret) - update_extension_list(sbi, name, !set); + update_extension_list(sbi, name, hot, !set); out: up_write(&sbi->sb_lock); return ret ? ret : count; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index d8c241451712..b06ab1f04ff6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -111,7 +111,8 @@ struct f2fs_super_block { __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ - __u8 reserved[315]; /* valid reserved region */ + __u8 hot_ext_count; /* # of hot file extension */ + __u8 reserved[314]; /* valid reserved region */ } __packed; /* From 162b27aec9abe9d555b46e3a8e7ab06e7f603cbb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 8 Mar 2018 20:47:33 -0800 Subject: [PATCH 28/50] f2fs: avoid selinux denial on CAP_SYS_RESOURCE This fixes CAP_SYS_RESOURCE denial of selinux when using resgid, since it seems selinux reports it at the first place, but mostly we don't need to check this condition first. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f6dc70666ebb..3d12277fbe9e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1607,13 +1607,13 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; if (IS_NOQUOTA(inode)) return true; - if (capable(CAP_SYS_RESOURCE)) - return true; if (uid_eq(sbi->s_resuid, current_fsuid())) return true; if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) return true; + if (capable(CAP_SYS_RESOURCE)) + return true; return false; } From deeedd71e3732fb72740eb41cf66214b2b13005e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 1 Mar 2018 23:40:31 +0800 Subject: [PATCH 29/50] f2fs: wrap sb_rdonly with f2fs_readonly Use f2fs_readonly to wrap sb_rdonly for cleanup, and spread it in all places. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/super.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3d12277fbe9e..0e47fe93c718 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2467,9 +2467,9 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) return ret; } -static inline int f2fs_readonly(struct super_block *sb) +static inline bool f2fs_readonly(struct super_block *sb) { - return sb->s_flags & SB_RDONLY; + return sb_rdonly(sb); } static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 30b93ad44b9d..d8357ba19159 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -336,7 +336,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) "QUOTA feature is enabled, so ignore jquota_fmt"); sbi->s_jquota_fmt = 0; } - if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -2805,7 +2805,7 @@ try_onemore: * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) { f2fs_msg(sb, KERN_ERR, @@ -2890,7 +2890,7 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif f2fs_sync_inode_meta(sbi); From eea52882d848827ca812282118cd17025a76a891 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 1 Mar 2018 23:40:32 +0800 Subject: [PATCH 30/50] f2fs: fix to restore old mount option in ->remount_fs This patch fixes to restore old mount option once we encounter failure in ->remount_fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d8357ba19159..36c5e582c1f2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1340,6 +1340,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); int old_whint_mode = sbi->whint_mode; int old_alloc_mode = sbi->alloc_mode; + int old_inline_xattr_size = sbi->inline_xattr_size; + block_t old_root_reserved_blocks = sbi->root_reserved_blocks; + kuid_t old_resuid = sbi->s_resuid; + kgid_t old_resgid = sbi->s_resgid; + int old_write_io_size_bits = sbi->write_io_size_bits; #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info ffi = sbi->fault_info; #endif @@ -1489,6 +1494,11 @@ restore_opts: sbi->s_qf_names[i] = s_qf_names[i]; } #endif + sbi->write_io_size_bits = old_write_io_size_bits; + sbi->s_resgid = old_resgid; + sbi->s_resuid = old_resuid; + sbi->root_reserved_blocks = old_root_reserved_blocks; + sbi->inline_xattr_size = old_inline_xattr_size; sbi->alloc_mode = old_alloc_mode; sbi->whint_mode = old_whint_mode; sbi->mount_opt = org_mount_opt; From 93cf93f17c8ef7c03d828914eaef88d8e585490d Mon Sep 17 00:00:00 2001 From: Junling Zheng Date: Wed, 7 Mar 2018 12:07:49 +0800 Subject: [PATCH 31/50] f2fs: introduce mount option for fsync mode Commit "0a007b97aad6"(f2fs: recover directory operations by fsync) fixed xfstest generic/342 case, but it also increased the written data and caused the performance degradation. In most cases, there's no need to do so heavy fsync actually. So we introduce new mount option "fsync_mode={posix,strict}" to control the policy of fsync. "fsync_mode=posix" is set by default, and means that f2fs uses a light fsync, which follows POSIX semantics. And "fsync_mode=strict" means that it's a heavy fsync, which behaves in line with xfs, ext4 and btrfs, where generic/342 will pass, but the performance will regress. Signed-off-by: Junling Zheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 7 +++++++ fs/f2fs/dir.c | 3 ++- fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/file.c | 3 ++- fs/f2fs/namei.c | 9 ++++++--- fs/f2fs/super.c | 26 ++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 0409c47584ea..514e44983a8e 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -182,6 +182,13 @@ whint_mode=%s Control which write hints are passed down to block passes down hints with its policy. alloc_mode=%s Adjust block allocation policy, which supports "reuse" and "default". +fsync_mode=%s Control the policy of fsync. Currently supports "posix" + and "strict". In "posix" mode, which is default, fsync + will follow POSIX semantics and does a light operation + to improve the filesystem performance. In "strict" mode, + fsync will be heavy and behaves in line with xfs, ext4 + and btrfs, where xfstest generic/342 will pass, but the + performance will regress. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 797eb05cb538..87c51709bc48 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -704,7 +704,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); - add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + if (F2FS_I_SB(dir)->fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0e47fe93c718..3c79b3565cb3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1052,6 +1052,11 @@ enum { ALLOC_MODE_REUSE, /* reuse segments as much as possible */ }; +enum fsync_mode { + FSYNC_MODE_POSIX, /* fsync follows posix semantics */ + FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1241,6 +1246,9 @@ struct f2fs_sb_info { /* segment allocation policy */ int alloc_mode; + + /* fsync policy */ + int fsync_mode; }; #ifdef CONFIG_F2FS_FAULT_INJECTION diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6a202e5a6386..99fa207fc310 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -165,7 +165,8 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_FASTBOOT_MODE; else if (sbi->active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; - else if (need_dentry_mark(sbi, inode->i_ino) && + else if (sbi->fsync_mode == FSYNC_MODE_STRICT && + need_dentry_mark(sbi, inode->i_ino) && exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 78ea0156f027..e9bb3a3a8b0a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -970,7 +970,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); f2fs_i_links_write(old_dir, false); } - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (sbi->fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); f2fs_unlock_op(sbi); @@ -1120,8 +1121,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); - add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (sbi->fsync_mode == FSYNC_MODE_STRICT) { + add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + } f2fs_unlock_op(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 36c5e582c1f2..2516dfa26a98 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -131,6 +131,7 @@ enum { Opt_jqfmt_vfsv1, Opt_whint, Opt_alloc, + Opt_fsync, Opt_err, }; @@ -186,6 +187,7 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, {Opt_whint, "whint_mode=%s"}, {Opt_alloc, "alloc_mode=%s"}, + {Opt_fsync, "fsync_mode=%s"}, {Opt_err, NULL}, }; @@ -719,6 +721,22 @@ static int parse_options(struct super_block *sb, char *options) } kfree(name); break; + case Opt_fsync: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 5 && + !strncmp(name, "posix", 5)) { + sbi->fsync_mode = FSYNC_MODE_POSIX; + } else if (strlen(name) == 6 && + !strncmp(name, "strict", 6)) { + sbi->fsync_mode = FSYNC_MODE_STRICT; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -1287,6 +1305,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",alloc_mode=%s", "default"); else if (sbi->alloc_mode == ALLOC_MODE_REUSE) seq_printf(seq, ",alloc_mode=%s", "reuse"); + + if (sbi->fsync_mode == FSYNC_MODE_POSIX) + seq_printf(seq, ",fsync_mode=%s", "posix"); + else if (sbi->fsync_mode == FSYNC_MODE_STRICT) + seq_printf(seq, ",fsync_mode=%s", "strict"); return 0; } @@ -1297,6 +1320,7 @@ static void default_options(struct f2fs_sb_info *sbi) sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; sbi->whint_mode = WHINT_MODE_OFF; sbi->alloc_mode = ALLOC_MODE_DEFAULT; + sbi->fsync_mode = FSYNC_MODE_POSIX; sbi->readdir_ra = 1; set_opt(sbi, BG_GC); @@ -1340,6 +1364,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); int old_whint_mode = sbi->whint_mode; int old_alloc_mode = sbi->alloc_mode; + int old_fsync_mode = sbi->fsync_mode; int old_inline_xattr_size = sbi->inline_xattr_size; block_t old_root_reserved_blocks = sbi->root_reserved_blocks; kuid_t old_resuid = sbi->s_resuid; @@ -1500,6 +1525,7 @@ restore_opts: sbi->root_reserved_blocks = old_root_reserved_blocks; sbi->inline_xattr_size = old_inline_xattr_size; sbi->alloc_mode = old_alloc_mode; + sbi->fsync_mode = old_fsync_mode; sbi->whint_mode = old_whint_mode; sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; From 5d7881cadf64277c1e212721073eaabfade12f66 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 7 Mar 2018 16:22:50 +0800 Subject: [PATCH 32/50] f2fs: Don't overwrite all types of node to keep node chain Currently, we enable node SSR by default, and mixed different types of node segment to do SSR more intensively. Although reuse warm node is not allowed, warm node chain will be destroyed by errors introduced by other types node chain. So we'd better forbid reusing all types of node to keep warm node chain. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f61c77bd673c..570e02d89cbc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1862,7 +1862,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (se->type == CURSEG_WARM_NODE) { + if (IS_NODESEG(se->type)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } From 63189b785960c3346d1af347516b7438f7ada8ec Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 8 Mar 2018 14:22:56 +0800 Subject: [PATCH 33/50] f2fs: wrap all options with f2fs_sb_info.mount_opt This patch merges miscellaneous mount options into struct f2fs_mount_info, After this patch, once we add new mount option, we don't need to worry about recovery of it in remount_fs(), since we will recover the f2fs_sb_info.mount_opt including all options. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 64 ++++++------ fs/f2fs/file.c | 4 +- fs/f2fs/namei.c | 6 +- fs/f2fs/segment.c | 8 +- fs/f2fs/super.c | 226 +++++++++++++++++++--------------------- fs/f2fs/sysfs.c | 4 +- include/linux/f2fs_fs.h | 8 +- 9 files changed, 154 insertions(+), 170 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6c3c9784de06..3d6ae3173f98 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2300,7 +2300,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; - int whint_mode = sbi->whint_mode; + int whint_mode = F2FS_OPTION(sbi).whint_mode; err = check_direct_IO(inode, iter, offset); if (err) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 87c51709bc48..3644db9177cb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -704,7 +704,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); - if (F2FS_I_SB(dir)->fsync_mode == FSYNC_MODE_STRICT) + if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3c79b3565cb3..93822634870d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -98,9 +98,10 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 -#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) +#define F2FS_OPTION(sbi) ((sbi)->mount_opt) +#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -113,7 +114,25 @@ typedef u32 block_t; /* typedef u32 nid_t; struct f2fs_mount_info { - unsigned int opt; + unsigned int opt; + int write_io_size_bits; /* Write IO size bits */ + block_t root_reserved_blocks; /* root reserved blocks */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ + int active_logs; /* # of active logs */ + int inline_xattr_size; /* inline xattr size */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; /* For fault injection */ +#endif +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif + /* For which write hints are passed down to block layer */ + int whint_mode; + int alloc_mode; /* segment allocation policy */ + int fsync_mode; /* fsync policy */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -1081,7 +1100,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; /* bio ordering for NODE/DATA */ - int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1131,9 +1149,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ - int active_logs; /* # of active logs */ int dir_level; /* directory level */ - int inline_xattr_size; /* inline xattr size */ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ @@ -1143,9 +1159,6 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ - block_t root_reserved_blocks; /* root reserved blocks */ - kuid_t s_resuid; /* reserved blocks for uid */ - kgid_t s_resgid; /* reserved blocks for gid */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1230,25 +1243,6 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; - - /* For fault injection */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info fault_info; -#endif - -#ifdef CONFIG_QUOTA - /* Names of quota files with journalled quota */ - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; /* Format of quota to use */ -#endif - /* For which write hints are passed down to block layer */ - int whint_mode; - - /* segment allocation policy */ - int alloc_mode; - - /* fsync policy */ - int fsync_mode; }; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -1258,7 +1252,7 @@ struct f2fs_sb_info { __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (!ffi->inject_rate) return false; @@ -1615,10 +1609,10 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; if (IS_NOQUOTA(inode)) return true; - if (uid_eq(sbi->s_resuid, current_fsuid())) + if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) return true; - if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && - in_group_p(sbi->s_resgid)) + if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && + in_group_p(F2FS_OPTION(sbi).s_resgid)) return true; if (capable(CAP_SYS_RESOURCE)) return true; @@ -1656,7 +1650,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks; if (!__allow_reserved_blocks(sbi, inode)) - avail_user_block_count -= sbi->root_reserved_blocks; + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; @@ -1863,7 +1857,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks + 1; if (!__allow_reserved_blocks(sbi, inode)) - valid_block_count += sbi->root_reserved_blocks; + valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 99fa207fc310..3072837744b9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -163,9 +163,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) cp_reason = CP_FASTBOOT_MODE; - else if (sbi->active_logs == 2) + else if (F2FS_OPTION(sbi).active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; - else if (sbi->fsync_mode == FSYNC_MODE_STRICT && + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && need_dentry_mark(sbi, inode->i_ino) && exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e9bb3a3a8b0a..f4ae46282eef 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -97,7 +97,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) - xattr_size = sbi->inline_xattr_size; + xattr_size = F2FS_OPTION(sbi).inline_xattr_size; /* Otherwise, will be 0 */ } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -970,7 +970,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); f2fs_i_links_write(old_dir, false); } - if (sbi->fsync_mode == FSYNC_MODE_STRICT) + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); f2fs_unlock_op(sbi); @@ -1121,7 +1121,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); - if (sbi->fsync_mode == FSYNC_MODE_STRICT) { + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 570e02d89cbc..637980d04503 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2171,7 +2171,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; /* find segments from 0 to reuse freed segments */ - if (sbi->alloc_mode == ALLOC_MODE_REUSE) + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) return 0; return CURSEG_I(sbi, type)->segno; @@ -2524,7 +2524,7 @@ int rw_hint_to_seg_type(enum rw_hint hint) enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp) { - if (sbi->whint_mode == WHINT_MODE_USER) { + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { if (type == DATA) { if (temp == WARM) return WRITE_LIFE_NOT_SET; @@ -2535,7 +2535,7 @@ enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, } else { return WRITE_LIFE_NOT_SET; } - } else if (sbi->whint_mode == WHINT_MODE_FS) { + } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { if (type == DATA) { if (temp == WARM) return WRITE_LIFE_LONG; @@ -2603,7 +2603,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; - switch (fio->sbi->active_logs) { + switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2516dfa26a98..419eaacf3366 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -60,7 +60,7 @@ char *fault_name[FAULT_MAX] = { static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); @@ -208,21 +208,24 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) block_t limit = (sbi->user_block_count << 1) / 1000; /* limit is 0.2% */ - if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) { - sbi->root_reserved_blocks = limit; + if (test_opt(sbi, RESERVE_ROOT) && + F2FS_OPTION(sbi).root_reserved_blocks > limit) { + F2FS_OPTION(sbi).root_reserved_blocks = limit; f2fs_msg(sbi->sb, KERN_INFO, "Reduce reserved blocks for root = %u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && - (!uid_eq(sbi->s_resuid, + (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || - !gid_eq(sbi->s_resgid, + !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) f2fs_msg(sbi->sb, KERN_INFO, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -242,7 +245,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, char *qname; int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); @@ -260,8 +263,8 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "Not enough memory for storing quotafile name"); return -EINVAL; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (F2FS_OPTION(sbi).s_qf_names[qtype]) { + if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else f2fs_msg(sb, KERN_ERR, @@ -274,7 +277,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + F2FS_OPTION(sbi).s_qf_names[qtype] = qname; set_opt(sbi, QUOTA); return 0; errout: @@ -286,13 +289,13 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -EINVAL; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -308,15 +311,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) "Cannot enable project quota enforcement."); return -1; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || - sbi->s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) clear_opt(sbi, USRQUOTA); - if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sbi, GRPQUOTA) && + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) clear_opt(sbi, GRPQUOTA); - if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + if (test_opt(sbi, PRJQUOTA) && + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) clear_opt(sbi, PRJQUOTA); if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || @@ -326,17 +333,17 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } - if (!sbi->s_jquota_fmt) { + if (!F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " "not specified"); return -1; } } - if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); - sbi->s_jquota_fmt = 0; + F2FS_OPTION(sbi).s_jquota_fmt = 0; } if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, @@ -446,7 +453,7 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; set_opt(sbi, INLINE_XATTR_SIZE); - sbi->inline_xattr_size = arg; + F2FS_OPTION(sbi).inline_xattr_size = arg; break; #else case Opt_user_xattr: @@ -486,7 +493,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) return -EINVAL; - sbi->active_logs = arg; + F2FS_OPTION(sbi).active_logs = arg; break; case Opt_disable_ext_identify: set_opt(sbi, DISABLE_EXT_IDENTIFY); @@ -530,9 +537,9 @@ static int parse_options(struct super_block *sb, char *options) if (test_opt(sbi, RESERVE_ROOT)) { f2fs_msg(sb, KERN_INFO, "Preserve previous reserve_root=%u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } else { - sbi->root_reserved_blocks = arg; + F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); } break; @@ -545,7 +552,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid uid value %d", arg); return -EINVAL; } - sbi->s_resuid = uid; + F2FS_OPTION(sbi).s_resuid = uid; break; case Opt_resgid: if (args->from && match_int(args, &arg)) @@ -556,7 +563,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid gid value %d", arg); return -EINVAL; } - sbi->s_resgid = gid; + F2FS_OPTION(sbi).s_resgid = gid; break; case Opt_mode: name = match_strdup(&args[0]); @@ -591,7 +598,7 @@ static int parse_options(struct super_block *sb, char *options) 1 << arg, BIO_MAX_PAGES); return -EINVAL; } - sbi->write_io_size_bits = arg; + F2FS_OPTION(sbi).write_io_size_bits = arg; break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) @@ -652,13 +659,13 @@ static int parse_options(struct super_block *sb, char *options) return ret; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; break; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; break; case Opt_jqfmt_vfsv1: - sbi->s_jquota_fmt = QFMT_VFS_V1; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; break; case Opt_noquota: clear_opt(sbi, QUOTA); @@ -691,13 +698,13 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 10 && !strncmp(name, "user-based", 10)) { - sbi->whint_mode = WHINT_MODE_USER; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) { - sbi->whint_mode = WHINT_MODE_OFF; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; } else if (strlen(name) == 8 && !strncmp(name, "fs-based", 8)) { - sbi->whint_mode = WHINT_MODE_FS; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { kfree(name); return -EINVAL; @@ -711,10 +718,10 @@ static int parse_options(struct super_block *sb, char *options) if (strlen(name) == 7 && !strncmp(name, "default", 7)) { - sbi->alloc_mode = ALLOC_MODE_DEFAULT; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; } else if (strlen(name) == 5 && !strncmp(name, "reuse", 5)) { - sbi->alloc_mode = ALLOC_MODE_REUSE; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { kfree(name); return -EINVAL; @@ -727,10 +734,10 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 5 && !strncmp(name, "posix", 5)) { - sbi->fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; } else if (strlen(name) == 6 && !strncmp(name, "strict", 6)) { - sbi->fsync_mode = FSYNC_MODE_STRICT; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; } else { kfree(name); return -EINVAL; @@ -770,8 +777,9 @@ static int parse_options(struct super_block *sb, char *options) "set with inline_xattr option"); return -EINVAL; } - if (!sbi->inline_xattr_size || - sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + if (!F2FS_OPTION(sbi).inline_xattr_size || + F2FS_OPTION(sbi).inline_xattr_size >= + DEF_ADDRS_PER_INODE - F2FS_TOTAL_EXTRA_ATTR_SIZE - DEF_INLINE_RESERVED_SIZE - DEF_MIN_INLINE_SIZE) { @@ -784,8 +792,8 @@ static int parse_options(struct super_block *sb, char *options) /* Not pass down write hints if the number of active logs is lesser * than NR_CURSEG_TYPE. */ - if (sbi->active_logs != NR_CURSEG_TYPE) - sbi->whint_mode = WHINT_MODE_OFF; + if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; return 0; } @@ -1027,7 +1035,7 @@ static void f2fs_put_super(struct super_block *sb) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) @@ -1141,8 +1149,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; - if (buf->f_bfree > sbi->root_reserved_blocks) - buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) + buf->f_bavail = buf->f_bfree - + F2FS_OPTION(sbi).root_reserved_blocks; else buf->f_bavail = 0; @@ -1177,10 +1186,10 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #ifdef CONFIG_QUOTA struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sbi->s_jquota_fmt) { + if (F2FS_OPTION(sbi).s_jquota_fmt) { char *fmtname = ""; - switch (sbi->s_jquota_fmt) { + switch (F2FS_OPTION(sbi).s_jquota_fmt) { case QFMT_VFS_OLD: fmtname = "vfsold"; break; @@ -1194,14 +1203,17 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]); - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]); - if (sbi->s_qf_names[PRJQUOTA]) - seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]); #endif } @@ -1236,7 +1248,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noinline_xattr"); if (test_opt(sbi, INLINE_XATTR_SIZE)) seq_printf(seq, ",inline_xattr_size=%u", - sbi->inline_xattr_size); + F2FS_OPTION(sbi).inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1272,18 +1284,20 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); + seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", - sbi->root_reserved_blocks, - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + F2FS_OPTION(sbi).root_reserved_blocks, + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) seq_printf(seq, ",fault_injection=%u", - sbi->fault_info.inject_rate); + F2FS_OPTION(sbi).fault_info.inject_rate); #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1296,19 +1310,19 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); - if (sbi->whint_mode == WHINT_MODE_USER) + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) seq_printf(seq, ",whint_mode=%s", "user-based"); - else if (sbi->whint_mode == WHINT_MODE_FS) + else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) seq_printf(seq, ",whint_mode=%s", "fs-based"); - if (sbi->alloc_mode == ALLOC_MODE_DEFAULT) + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) seq_printf(seq, ",alloc_mode=%s", "default"); - else if (sbi->alloc_mode == ALLOC_MODE_REUSE) + else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) seq_printf(seq, ",alloc_mode=%s", "reuse"); - if (sbi->fsync_mode == FSYNC_MODE_POSIX) + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); - else if (sbi->fsync_mode == FSYNC_MODE_STRICT) + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) seq_printf(seq, ",fsync_mode=%s", "strict"); return 0; } @@ -1316,11 +1330,11 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; - sbi->whint_mode = WHINT_MODE_OFF; - sbi->alloc_mode = ALLOC_MODE_DEFAULT; - sbi->fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE; + F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; sbi->readdir_ra = 1; set_opt(sbi, BG_GC); @@ -1358,24 +1372,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; - int err, active_logs; + int err; bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); - int old_whint_mode = sbi->whint_mode; - int old_alloc_mode = sbi->alloc_mode; - int old_fsync_mode = sbi->fsync_mode; - int old_inline_xattr_size = sbi->inline_xattr_size; - block_t old_root_reserved_blocks = sbi->root_reserved_blocks; - kuid_t old_resuid = sbi->s_resuid; - kgid_t old_resgid = sbi->s_resgid; - int old_write_io_size_bits = sbi->write_io_size_bits; -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info ffi = sbi->fault_info; -#endif #ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; int i, j; #endif @@ -1385,21 +1386,21 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; - active_logs = sbi->active_logs; #ifdef CONFIG_QUOTA - s_jquota_fmt = sbi->s_jquota_fmt; + org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { - s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); - if (!s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { + org_mount_opt.s_qf_names[i] = + kstrdup(F2FS_OPTION(sbi).s_qf_names[i], + GFP_KERNEL); + if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(s_qf_names[j]); + kfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { - s_qf_names[i] = NULL; + org_mount_opt.s_qf_names[i] = NULL; } } #endif @@ -1469,7 +1470,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & SB_RDONLY || sbi->whint_mode != old_whint_mode) { + if (*flags & SB_RDONLY || + F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1495,7 +1497,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(s_qf_names[i]); + kfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | @@ -1513,26 +1515,14 @@ restore_gc: } restore_opts: #ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = s_jquota_fmt; + F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = s_qf_names[i]; + kfree(F2FS_OPTION(sbi).s_qf_names[i]); + F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif - sbi->write_io_size_bits = old_write_io_size_bits; - sbi->s_resgid = old_resgid; - sbi->s_resuid = old_resuid; - sbi->root_reserved_blocks = old_root_reserved_blocks; - sbi->inline_xattr_size = old_inline_xattr_size; - sbi->alloc_mode = old_alloc_mode; - sbi->fsync_mode = old_fsync_mode; - sbi->whint_mode = old_whint_mode; sbi->mount_opt = org_mount_opt; - sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; -#ifdef CONFIG_F2FS_FAULT_INJECTION - sbi->fault_info = ffi; -#endif return err; } @@ -1654,8 +1644,8 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { - return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], - sbi->s_jquota_fmt, type); + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], + F2FS_OPTION(sbi).s_jquota_fmt, type); } int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) @@ -1674,7 +1664,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) } for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { err = f2fs_quota_on_mount(sbi, i); if (!err) { enabled = 1; @@ -2558,7 +2548,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) /* adjust parameters according to the volume size */ if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { - sbi->alloc_mode = ALLOC_MODE_REUSE; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; sm_i->dcc_info->discard_granularity = 1; sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; } @@ -2611,8 +2601,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; - sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); - sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sb)) @@ -2970,7 +2960,7 @@ free_bio_info: free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif kfree(options); free_sb_buf: diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 23a2d8d66c43..7d983ad19da4 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -58,7 +58,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; + return (unsigned char *)&F2FS_OPTION(sbi).fault_info; #endif return NULL; } @@ -222,7 +222,7 @@ out: if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - sbi->root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b06ab1f04ff6..124787e8db58 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -39,10 +39,10 @@ #define F2FS_MAX_QUOTAS 3 -#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ -#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ -#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) /* This flag is used by node and meta inodes, and by recovery */ From b91050a80cec3daf5a21f78274330df64a4936a3 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Thu, 8 Mar 2018 19:34:38 +0900 Subject: [PATCH 34/50] f2fs: add nowait aio support This patch adds nowait aio support[1]. Return EAGAIN if any of the following checks fail for direct I/O: - i_rwsem is not lockable - Blocks are not allocated at the write location And xfstests generic/471 is passed. [1]: 6be96d "Introduce RWF_NOWAIT and FMODE_AIO_NOWAIT" Signed-off-by: Hyunchul Lee Reviewed-by: Goldwyn Rodrigues Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 47 +++++++++++++++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/file.c | 35 +++++++++++++++++++++++++++++------ 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3d6ae3173f98..f6cf99a311c3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -839,13 +839,6 @@ alloc: return 0; } -static inline bool __force_buffered_io(struct inode *inode, int rw) -{ - return (f2fs_encrypted_file(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); -} - int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); @@ -877,7 +870,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); - flag = __force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1121,6 +1114,31 @@ out: return err; } +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) +{ + struct f2fs_map_blocks map; + block_t last_lblk; + int err; + + if (pos + len > i_size_read(inode)) + return false; + + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + last_lblk = F2FS_BLK_ALIGN(pos + len); + + while (map.m_lblk < last_lblk) { + map.m_len = last_lblk - map.m_lblk; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); + if (err || map.m_len == 0) + return false; + map.m_lblk += map.m_len; + } + return true; +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, pgoff_t *next_pgofs, int seg_type) @@ -2306,7 +2324,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (err) return err; - if (__force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, rw)) return 0; trace_f2fs_direct_IO_enter(inode, offset, count, rw); @@ -2314,7 +2332,15 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - down_read(&F2FS_I(inode)->dio_rwsem[rw]); + if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) { + if (iocb->ki_flags & IOCB_NOWAIT) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + down_read(&F2FS_I(inode)->dio_rwsem[rw]); + } + err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); up_read(&F2FS_I(inode)->dio_rwsem[rw]); @@ -2330,6 +2356,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } } +out: trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 93822634870d..88f2b420de27 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2877,6 +2877,7 @@ int f2fs_release_page(struct page *page, gfp_t wait); int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); #endif +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); /* * gc.c @@ -3261,4 +3262,11 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +{ + return (f2fs_encrypted_file(inode) || + (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || + F2FS_I_SB(inode)->s_ndevs); +} + #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3072837744b9..c4c27e63daf1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -480,6 +480,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + + filp->f_mode |= FMODE_NOWAIT; + return dquot_file_open(inode, filp); } @@ -2896,7 +2899,15 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - inode_lock(inode); + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = generic_write_checks(iocb, from); if (ret > 0) { int err; @@ -2904,11 +2915,23 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - return err; + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, WRITE)) { + inode_unlock(inode); + return -EAGAIN; + } + + } else { + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); From c56675750d7c45ce6cc21a67770629aaf41d1491 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 9 Mar 2018 14:24:22 +0800 Subject: [PATCH 35/50] f2fs: remove unneeded set_cold_node() When setting COLD_BIT_SHIFT flag in node block, we only need to call set_cold_node() in new_node_page() and recover_inode_page() during node page initialization. So remove unneeded set_cold_node() in other places. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 -- fs/f2fs/inode.c | 1 - fs/f2fs/node.c | 3 ++- fs/f2fs/node.h | 4 ++-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3644db9177cb..9a0d103b5052 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -396,8 +396,6 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; - - set_cold_node(inode, page); } if (new_name) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4efc815129b1..401f09ccce7e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -439,7 +439,6 @@ void update_inode(struct inode *inode, struct page *node_page) } __set_inode_rdev(inode, ri); - set_cold_node(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 177c438e4a56..b86e2b15b619 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1076,7 +1076,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(dn->inode, page); + set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) SetPageUptodate(page); if (set_page_dirty(page)) @@ -2291,6 +2291,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); + set_cold_node(page, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 081ef0d672bf..e593b4d78be2 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -422,12 +422,12 @@ static inline void clear_inline_node(struct page *page) ClearPageChecked(page); } -static inline void set_cold_node(struct inode *inode, struct page *page) +static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (S_ISDIR(inode->i_mode)) + if (is_dir) flag &= ~(0x1 << COLD_BIT_SHIFT); else flag |= (0x1 << COLD_BIT_SHIFT); From bb1105e479fbb8b0edc6f35affec71b75e31c8c0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Mar 2018 17:42:28 -0800 Subject: [PATCH 36/50] f2fs: align memory boundary for bitops For example, in arm64, free_nid_bitmap should be aligned to word size in order to use bit operations. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 20 +++++++++++++++++--- include/linux/f2fs_fs.h | 4 ++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 88f2b420de27..641b4b98d373 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -766,7 +766,7 @@ struct f2fs_nm_info { unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ - unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char **free_nid_bitmap; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b86e2b15b619..f7886b46478d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2708,12 +2708,20 @@ static int init_node_manager(struct f2fs_sb_info *sbi) static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); + int i; - nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks * - NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); + nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks * + sizeof(unsigned char *), GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; + for (i = 0; i < nm_i->nat_blocks; i++) { + nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, + NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL); + if (!nm_i->free_nid_bitmap) + return -ENOMEM; + } + nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) @@ -2804,7 +2812,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); - kvfree(nm_i->free_nid_bitmap); + if (nm_i->free_nid_bitmap) { + int i; + + for (i = 0; i < nm_i->nat_blocks; i++) + kvfree(nm_i->free_nid_bitmap[i]); + kfree(nm_i->free_nid_bitmap); + } kvfree(nm_i->free_nid_count); kfree(nm_i->nat_bitmap); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 124787e8db58..aa5db8b5521a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -305,6 +305,10 @@ struct f2fs_node { */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) #define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) +#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \ + ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \ + BITS_PER_LONG * BITS_PER_LONG) + struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ From da5ce874f8cabe18bc34b50856933108830663bc Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Tue, 13 Mar 2018 19:42:50 +0800 Subject: [PATCH 37/50] f2fs: release locks before return in f2fs_ioc_gc_range() Currently, we will leave the kernel with locks still held when the gc_range is invalid. This patch fixes the bug. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c4c27e63daf1..ee88058acd63 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2059,8 +2059,10 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) - return -EINVAL; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + ret = -EINVAL; + goto out; + } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { From b7c409deda6bcaaa6452b015937a013dd8b481b4 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 15 Mar 2018 18:51:41 +0800 Subject: [PATCH 38/50] f2fs: introduce F2FS_FEATURE_LOST_FOUND feature This patch introduces a new feature, F2FS_FEATURE_LOST_FOUND, which is set by mkfs. mkfs creates a directory named lost+found, which saves unreachable files. If fsck finds a file which has no parent, or its parent is removed by fsck, the file will be placed under lost+found directory by fsck. lost+found directory could not be encrypted. As a result, the root directory cannot be encrypted too. So if LOST_FOUND feature is enabled, let's avoid to encrypt root directory. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 12 ++++++++++++ fs/f2fs/sysfs.c | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 641b4b98d373..c0d74ff9a6d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -144,6 +144,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 +#define F2FS_FEATURE_LOST_FOUND 0x0200 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -3214,6 +3215,7 @@ F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); +F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 419eaacf3366..dcdae3f932e2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1891,6 +1891,18 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * Encrypting the root directory is not allowed because fsck + * expects lost+found directory to exist and remain unencrypted + * if LOST_FOUND feature is enabled. + * + */ + if (f2fs_sb_has_lost_found(sbi->sb) && + inode->i_ino == F2FS_ROOT_INO(sbi)) + return -EPERM; + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, fs_data, XATTR_CREATE); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 7d983ad19da4..f33a56d6e6dd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -116,6 +116,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_crtime(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); + if (f2fs_sb_has_lost_found(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "lost_found"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -292,6 +295,7 @@ enum feat_id { FEAT_FLEXIBLE_INLINE_XATTR, FEAT_QUOTA_INO, FEAT_INODE_CRTIME, + FEAT_LOST_FOUND, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -307,6 +311,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_FLEXIBLE_INLINE_XATTR: case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: + case FEAT_LOST_FOUND: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -386,6 +391,7 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); +F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -441,6 +447,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(flexible_inline_xattr), ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), + ATTR_LIST(lost_found), NULL, }; From ff62af200b94e06b1430eac6e58f5f26d13c749d Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 15 Mar 2018 18:51:42 +0800 Subject: [PATCH 39/50] f2fs: introduce a new mount option test_dummy_encryption This patch introduces a new mount option `test_dummy_encryption' to allow fscrypt to create a fake fscrypt context. This is used by xfstests. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/dir.c | 4 +++- fs/f2fs/f2fs.h | 8 ++++++++ fs/f2fs/namei.c | 9 ++++++--- fs/f2fs/super.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 514e44983a8e..12a147c9f87f 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -189,6 +189,8 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix" fsync will be heavy and behaves in line with xfs, ext4 and btrfs, where xfstest generic/342 will pass, but the performance will regress. +test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt + context. The fake fscrypt context is used by xfstests. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9a0d103b5052..fe661274ff10 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -361,6 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; + int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -387,7 +388,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + if ((f2fs_encrypted_inode(dir) || dummy_encrypt) && + f2fs_may_encrypt(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c0d74ff9a6d7..191ee5718369 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -133,6 +133,7 @@ struct f2fs_mount_info { int whint_mode; int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ + bool test_dummy_encryption; /* test dummy encryption */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -1077,6 +1078,13 @@ enum fsync_mode { FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ }; +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#define DUMMY_ENCRYPTION_ENABLED(sbi) \ + (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) +#else +#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f4ae46282eef..d5098efe577c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -78,7 +78,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && + f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi->sb)) { @@ -787,10 +788,12 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { + if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { int err = fscrypt_get_encryption_info(dir); if (err) return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dcdae3f932e2..98b2c7ff1804 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -132,6 +132,7 @@ enum { Opt_whint, Opt_alloc, Opt_fsync, + Opt_test_dummy_encryption, Opt_err, }; @@ -188,6 +189,7 @@ static match_table_t f2fs_tokens = { {Opt_whint, "whint_mode=%s"}, {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, + {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_err, NULL}, }; @@ -744,6 +746,21 @@ static int parse_options(struct super_block *sb, char *options) } kfree(name); break; + case Opt_test_dummy_encryption: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (!f2fs_sb_has_encrypt(sb)) { + f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + return -EINVAL; + } + + F2FS_OPTION(sbi).test_dummy_encryption = true; + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mode enabled"); +#else + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mount option ignored"); +#endif + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -1314,6 +1331,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",whint_mode=%s", "user-based"); else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) seq_printf(seq, ",whint_mode=%s", "fs-based"); +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_OPTION(sbi).test_dummy_encryption) + seq_puts(seq, ",test_dummy_encryption"); +#endif if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) seq_printf(seq, ",alloc_mode=%s", "default"); @@ -1335,6 +1356,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).test_dummy_encryption = false; sbi->readdir_ra = 1; set_opt(sbi, BG_GC); @@ -1908,6 +1930,11 @@ static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, ctx, len, fs_data, XATTR_CREATE); } +static bool f2fs_dummy_context(struct inode *inode) +{ + return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); +} + static unsigned f2fs_max_namelen(struct inode *inode) { return S_ISLNK(inode->i_mode) ? @@ -1918,6 +1945,7 @@ static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; From 02117b8ae9c0549500119c42375f9e15602bf47e Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Fri, 16 Mar 2018 18:53:53 +0530 Subject: [PATCH 40/50] f2fs: Set GF_NOFS in read_cache_page_gfp while doing f2fs_quota_read Quota code itself is serializing the operations by taking mutex_lock. It seems a below deadlock can happen if GF_NOFS is not used in f2fs_quota_read __switch_to+0x88 __schedule+0x5b0 schedule+0x78 schedule_preempt_disabled+0x20 __mutex_lock_slowpath+0xdc //mutex owner is itself mutex_lock+0x2c dquot_commit+0x30 //mutex_lock(&dqopt->dqio_mutex); dqput+0xe0 __dquot_drop+0x80 dquot_drop+0x48 f2fs_evict_inode+0x218 evict+0xa8 dispose_list+0x3c prune_icache_sb+0x58 super_cache_scan+0xf4 do_shrink_slab+0x208 shrink_slab.part.40+0xac shrink_zone+0x1b0 do_try_to_free_pages+0x25c try_to_free_pages+0x164 __alloc_pages_nodemask+0x534 do_read_cache_page+0x6c read_cache_page+0x14 f2fs_quota_read+0xa4 read_blk+0x54 find_tree_dqentry+0xe4 find_tree_dqentry+0xb8 find_tree_dqentry+0xb8 find_tree_dqentry+0xb8 qtree_read_dquot+0x68 v2_read_dquot+0x24 dquot_acquire+0x5c // mutex_lock(&dqopt->dqio_mutex); dqget+0x238 __dquot_initialize+0xd4 dquot_initialize+0x10 dquot_file_open+0x34 f2fs_file_open+0x6c do_dentry_open+0x1e4 vfs_open+0x6c path_openat+0xa20 do_filp_open+0x4c do_sys_open+0x178 Signed-off-by: Ritesh Harjani Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 98b2c7ff1804..0c1fe9b87502 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1572,7 +1572,7 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, while (toread > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); repeat: - page = read_mapping_page(mapping, blkidx, NULL); + page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); if (IS_ERR(page)) { if (PTR_ERR(page) == -ENOMEM) { congestion_wait(BLK_RW_ASYNC, HZ/50); From 0833721ec3658a4e9d5e58b6fa82cf9edc431e59 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 8 Mar 2018 16:29:13 +0800 Subject: [PATCH 41/50] f2fs: check blkaddr more accuratly before issue a bio This patch check blkaddr more accuratly before issue a write or read bio. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.h | 25 +++++++++++++++++++------ 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5b2db75ecd54..bf779461df13 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -68,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, + .is_meta = is_meta, }; if (unlikely(!is_meta)) @@ -162,6 +163,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, + .is_meta = (type != META_POR), }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f6cf99a311c3..db50686f5096 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -383,6 +383,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + verify_block_addr(fio, fio->new_blkaddr); trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -428,8 +429,8 @@ next: } if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(sbi, fio->old_blkaddr); - verify_block_addr(sbi, fio->new_blkaddr); + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 191ee5718369..ae69dc358401 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1001,6 +1001,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + bool is_meta; /* indicate borrow meta inode mapping or not */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ }; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index dbb774aaf387..3325d0769723 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -53,13 +53,19 @@ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ (sbi)->segs_per_sec)) \ -#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) -#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) +#define MAIN_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) +#define SEG0_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_SEGS(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->segment_count : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) @@ -632,10 +638,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + struct f2fs_sb_info *sbi = fio->sbi; + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || + blk_addr >= MAIN_BLKADDR(sbi)); + else + BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || + blk_addr >= MAX_BLKADDR(sbi)); } /* From df033caf51c01edf3d2bfc61b41a9f1e566743ed Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Mar 2018 23:08:29 +0800 Subject: [PATCH 42/50] f2fs: clean up with F2FS_BLK_ALIGN Clean up F2FS_BYTES_TO_BLK(x + F2FS_BLKSIZE - 1) with F2FS_BLK_ALIGN(x). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +-- fs/f2fs/node.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ee88058acd63..8068b015ece5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -573,7 +573,6 @@ truncate_out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; @@ -582,7 +581,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f7886b46478d..ecfc2465da9b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2581,8 +2581,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!enabled_nat_bits(sbi, NULL)) return 0; - nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + - F2FS_BLKSIZE - 1); + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) From 780de47cf6cb5f524cd98ec8ffbffc3da5696e17 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Mar 2018 23:08:30 +0800 Subject: [PATCH 43/50] f2fs: don't track new nat entry in nat set Nat entry set is used only in checkpoint(), and during checkpoint() we won't flush new nat entry with unallocated address, so we don't need to add new nat entry into nat set, then nat_entry_set::entry_cnt can indicate actual entry count we need to flush in checkpoint(). Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 29 +++++++++++++++++++++++++---- fs/f2fs/node.h | 1 + 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ecfc2465da9b..9a99243054ba 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) __free_nat_entry(e); } -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) +static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) { nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; @@ -209,15 +209,36 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } + return head; +} + +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + struct nat_entry_set *head; + bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; + + if (!new_ne) + head = __grab_nat_entry_set(nm_i, ne); + + /* + * update entry_cnt in below condition: + * 1. update NEW_ADDR to valid block address; + * 2. update old block address to new one; + */ + if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) || + !get_nat_flag(ne, IS_DIRTY))) + head->entry_cnt++; + + set_nat_flag(ne, IS_PREALLOC, new_ne); if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; nm_i->dirty_nat_cnt++; - head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: - if (nat_get_blkaddr(ne) == NEW_ADDR) + if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e593b4d78be2..b95e49e4a928 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -44,6 +44,7 @@ enum { HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ + IS_PREALLOC, /* nat entry is preallocated */ }; /* From 2f52052929842ac21a958ab5f07aab01312cc0b2 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 22 Mar 2018 10:08:40 +0800 Subject: [PATCH 44/50] f2fs: no need to initialize zero value for GFP_F2FS_ZERO Since f2fs_inode_info is allocated with flag GFP_F2FS_ZERO, so we do not need to initialize zero value for its member any more. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0c1fe9b87502..42d564c5ccd0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -827,7 +827,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; - fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); @@ -839,10 +838,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->i_mmap_sem); init_rwsem(&fi->i_xattr_sem); -#ifdef CONFIG_QUOTA - memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); - fi->i_reserved_quota = 0; -#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; From d21b0f238adf4641a72f4b0dd4a2b10aa36d09f5 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 26 Mar 2018 17:32:23 +0800 Subject: [PATCH 45/50] f2fs: Add a segment type check in inplace write This patch add a segment type check in IPU, in case of something wrong with blkadd in dnode. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 637980d04503..5854cc4e1d67 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2790,10 +2790,15 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) int rewrite_data_page(struct f2fs_io_info *fio) { int err; + struct f2fs_sb_info *sbi = fio->sbi; fio->new_blkaddr = fio->old_blkaddr; /* i/o temperature is needed for passing down write hints */ __get_segment_type(fio); + + f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, + GET_SEGNO(sbi, fio->new_blkaddr))->type)); + stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); From 53fedcc09c66c68e0fa7ccef0fe9c15f96dca452 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 28 Mar 2018 11:15:09 -0700 Subject: [PATCH 46/50] f2fs: reserve bits for fs-verity Reserve an F2FS feature flag and inode flag for fs-verity. This is an in-development feature that is planned be discussed at LSF/MM 2018 [1]. It will provide file-based integrity and authenticity for read-only files. Most code will be in a filesystem-independent module, with smaller changes needed to individual filesystems that opt-in to supporting the feature. An early prototype supporting F2FS is available [2]. Reserving the F2FS on-disk bits for fs-verity will prevent users of the prototype from conflicting with other new F2FS features. Note that we're reserving the inode flag in f2fs_inode.i_advise, which isn't really appropriate since it's not a hint or advice. But ->i_advise is already being used to hold the 'encrypt' flag; and F2FS's ->i_flags uses the generic FS_* values, so it seems ->i_flags can't be used for an F2FS-specific flag without additional work to remove the assumption that ->i_flags uses the generic flags namespace. [1] https://marc.info/?l=linux-fsdevel&m=151690752225644 [2] https://git.kernel.org/pub/scm/linux/kernel/git/mhalcrow/linux.git/log/?h=fs-verity-dev Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ae69dc358401..96d7809c4541 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -146,6 +146,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 +#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -598,6 +599,7 @@ enum { #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 #define FADVISE_HOT_BIT 0x20 +#define FADVISE_VERITY_BIT 0x40 /* reserved */ #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) From 235831d7dd783ebd20e08ef4310dbdeeea46d87a Mon Sep 17 00:00:00 2001 From: Junling Zheng Date: Thu, 29 Mar 2018 19:27:12 +0800 Subject: [PATCH 47/50] f2fs: fix a wrong condition in f2fs_skip_inode_update Fix commit 97dd26ad8347 (f2fs: fix wrong AUTO_RECOVER condition). We should use ~PAGE_MASK to determine whether i_size is aligned to the f2fs's block size or not. Signed-off-by: Junling Zheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 96d7809c4541..000f93f6767e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2471,7 +2471,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) } if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || file_keep_isize(inode) || - i_size_read(inode) & PAGE_MASK) + i_size_read(inode) & ~PAGE_MASK) return false; down_read(&F2FS_I(inode)->i_sem); From dc7a10ddee0c56c6d891dd18de5c4ee9869545e0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Mar 2018 17:58:13 -0700 Subject: [PATCH 48/50] f2fs: truncate preallocated blocks in error case If write is failed, we must deallocate the blocks that we couldn't write. Cc: stable@vger.kernel.org Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8068b015ece5..6b94f19b3fa8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2911,6 +2911,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret > 0) { + bool preallocated = false; + size_t target_size = 0; int err; if (iov_iter_fault_in_readable(from, iov_iter_count(from))) @@ -2927,6 +2929,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + err = f2fs_preallocate_blocks(iocb, from); if (err) { clear_inode_flag(inode, FI_NO_PREALLOC); @@ -2939,6 +2944,10 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } From c79d152094d228080e9895a92feb44ba9d60c798 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Mon, 2 Apr 2018 20:22:20 +0800 Subject: [PATCH 49/50] f2fs: make assignment of t->dentry_bitmap more readable In make_dentry_ptr_block, it is confused with "&" for t->dentry_bitmap but without "&" for t->dentry, so delete "&" to make code more readable. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 000f93f6767e..7102d3965fea 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -472,7 +472,7 @@ static inline void make_dentry_ptr_block(struct inode *inode, d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; - d->bitmap = &t->dentry_bitmap; + d->bitmap = t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; } From 214c2461a864a46b11856426b80dc7db453043c5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 29 Mar 2018 22:50:41 -0700 Subject: [PATCH 50/50] f2fs: remain written times to update inode during fsync This fixes xfstests/generic/392. The failure was caused by different times between 1) one marked in the last fsync(2) call and 2) the other given by roll-forward recovery after power-cut. The reason was that we skipped updating inode block at 1), since its i_size was recoverable along with 4KB-aligned data writes, which was fixed by: "f2fs: fix a wrong condition in f2fs_skip_inode_update" Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/inode.c | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7102d3965fea..1df7f10476d6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -664,6 +664,7 @@ struct f2fs_inode_info { kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ struct timespec i_crtime; /* inode creation time */ + struct timespec i_disk_time[4]; /* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -2474,6 +2475,16 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) + return false; + down_read(&F2FS_I(inode)->i_sem); ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); up_read(&F2FS_I(inode)->i_sem); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 401f09ccce7e..e0d9e8f27ed2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -284,6 +284,10 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -444,6 +448,10 @@ void update_inode(struct inode *inode, struct page *node_page) if (inode->i_nlink == 0) clear_inline_node(node_page); + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; } void update_inode_page(struct inode *inode)