From fc01008c92f40015aeeced94750855a7111b6929 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 21 May 2024 14:23:17 +0800 Subject: [PATCH 01/26] f2fs: fix to do sanity check on F2FS_INLINE_DATA flag in inode during GC syzbot reports a f2fs bug as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/inline.c:258! CPU: 1 PID: 34 Comm: kworker/u8:2 Not tainted 6.9.0-rc6-syzkaller-00012-g9e4bc4bcae01 #0 RIP: 0010:f2fs_write_inline_data+0x781/0x790 fs/f2fs/inline.c:258 Call Trace: f2fs_write_single_data_page+0xb65/0x1d60 fs/f2fs/data.c:2834 f2fs_write_cache_pages fs/f2fs/data.c:3133 [inline] __f2fs_write_data_pages fs/f2fs/data.c:3288 [inline] f2fs_write_data_pages+0x1efe/0x3a90 fs/f2fs/data.c:3315 do_writepages+0x35b/0x870 mm/page-writeback.c:2612 __writeback_single_inode+0x165/0x10b0 fs/fs-writeback.c:1650 writeback_sb_inodes+0x905/0x1260 fs/fs-writeback.c:1941 wb_writeback+0x457/0xce0 fs/fs-writeback.c:2117 wb_do_writeback fs/fs-writeback.c:2264 [inline] wb_workfn+0x410/0x1090 fs/fs-writeback.c:2304 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa12/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f2/0x390 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 The root cause is: inline_data inode can be fuzzed, so that there may be valid blkaddr in its direct node, once f2fs triggers background GC to migrate the block, it will hit f2fs_bug_on() during dirty page writeback. Let's add sanity check on F2FS_INLINE_DATA flag in inode during GC, so that, it can forbid migrating inline_data inode's data block for fixing. Reported-by: syzbot+848062ba19c8782ca5c8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000d103ce06174d7ec3@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6066c6eecf41..20e2f989013b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1563,6 +1563,16 @@ next_step: continue; } + if (f2fs_has_inline_data(inode)) { + iput(inode); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err_ratelimited(sbi, + "inode %lx has both inline_data flag and " + "data block, nid=%u, ofs_in_node=%u", + inode->i_ino, dni.nid, ofs_in_node); + continue; + } + err = f2fs_gc_pinned_control(inode, gc_type, segno); if (err == -EAGAIN) { iput(inode); From c240c87bcd44a1a2375fc8ef8c645d1f1fe76466 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 21 May 2024 14:23:18 +0800 Subject: [PATCH 02/26] f2fs: fix to do sanity check on blocks for inline_data inode inode can be fuzzed, so it can has F2FS_INLINE_DATA flag and valid i_blocks/i_nid value, this patch supports to do extra sanity check to detect such corrupted state. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inline.c | 20 +++++++++++++++++++- fs/f2fs/inode.c | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1974b6aff397..f463961b497c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4149,7 +4149,7 @@ extern struct kmem_cache *f2fs_inode_entry_slab; * inline.c */ bool f2fs_may_inline_data(struct inode *inode); -bool f2fs_sanity_check_inline_data(struct inode *inode); +bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); bool f2fs_may_inline_dentry(struct inode *inode); void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage); void f2fs_truncate_inline_inode(struct inode *inode, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7638d0d7b7ee..0203c3baabb6 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -33,11 +33,29 @@ bool f2fs_may_inline_data(struct inode *inode) return !f2fs_post_read_required(inode); } -bool f2fs_sanity_check_inline_data(struct inode *inode) +static bool inode_has_blocks(struct inode *inode, struct page *ipage) +{ + struct f2fs_inode *ri = F2FS_INODE(ipage); + int i; + + if (F2FS_HAS_BLOCKS(inode)) + return true; + + for (i = 0; i < DEF_NIDS_PER_INODE; i++) { + if (ri->i_nid[i]) + return true; + } + return false; +} + +bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage) { if (!f2fs_has_inline_data(inode)) return false; + if (inode_has_blocks(inode, ipage)) + return false; + if (!support_inline_data(inode)) return true; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 005dde72aff3..33b2778d5452 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -344,7 +344,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } - if (f2fs_sanity_check_inline_data(inode)) { + if (f2fs_sanity_check_inline_data(inode, node_page)) { f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; From cc260b66c4cd21a370b39f73c9034a420baf8f79 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 15 May 2024 16:12:33 +0800 Subject: [PATCH 03/26] f2fs: add support for FS_IOC_GETFSSYSFSPATH FS_IOC_GETFSSYSFSPATH ioctl expects sysfs sub-path of a filesystem, the format can be "$FSTYP/$SYSFS_IDENTIFIER" under /sys/fs, it can helps to standardizes exporting sysfs datas across filesystems. This patch wires up FS_IOC_GETFSSYSFSPATH for f2fs, it will output "f2fs/". Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1f1b3647a998..4a1bc8f40f9a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4481,6 +4481,7 @@ try_onemore: sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid)); + super_set_sysfs_name_bdev(sb); sb->s_iflags |= SB_I_CGROUPWB; /* init f2fs-specific super block info */ From 76da333f4b935af65c9465277032a1e24405375c Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 30 May 2024 18:01:58 +0800 Subject: [PATCH 04/26] f2fs: alloc new section if curseg is not the first seg in its zone If curseg is not the first segment in its zone, the zone is not empty. A new section should be allocated and avoid resetting the old zone. Reviewed-by: Chao Yu Signed-off-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a0ce3d080f80..c4d951ed54ff 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5187,7 +5187,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) } /* Allocate a new section if it's not new. */ - if (cs->next_blkoff) { + if (cs->next_blkoff || + cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) { unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff; f2fs_allocate_new_section(sbi, type, true); From 6924c8b6fdf96db24edf8392c2dd00f79eeb8e4b Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Mon, 27 May 2024 19:12:37 +0800 Subject: [PATCH 05/26] f2fs: fix to remove redundant SBI_NEED_FSCK flag set Subsequent f2fs_stop_checkpoint will set cp_err, so this SBI_NEED_FSCK flag set action is invalid. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 20e2f989013b..ef667fec9a12 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1752,7 +1752,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, if (type != GET_SUM_TYPE((&sum->footer))) { f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_SUMMARY); goto skip; From 5c8764f8679e659c5cb295af7d32279002d13735 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 May 2024 21:29:48 +0800 Subject: [PATCH 06/26] f2fs: fix to force buffered IO on inline_data inode It will return all zero data when DIO reading from inline_data inode, it is because f2fs_iomap_begin() assign iomap->type w/ IOMAP_HOLE incorrectly for this case. We can let iomap framework handle inline data via assigning iomap->type and iomap->inline_data correctly, however, it will be a little bit complicated when handling race case in between direct IO and buffered IO. So, let's force to use buffered IO to fix this issue. Cc: stable@vger.kernel.org Reported-by: Barry Song Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5c0b281a70f3..0cbaeb97c7f5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -825,6 +825,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return true; if (f2fs_compressed_file(inode)) return true; + if (f2fs_has_inline_data(inode)) + return true; /* disallow direct IO if any of devices has unaligned blksize */ if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) From 21327a042dd94bc73181d7300e688699cb1f467e Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 29 May 2024 17:47:00 +0800 Subject: [PATCH 07/26] f2fs: fix to avoid use SSR allocate when do defragment SSR allocate mode will be used when doing file defragment if ATGC is working at the same time, that is because set_page_private_gcing may make CURSEG_ALL_DATA_ATGC segment type got in f2fs_allocate_data_block when defragment page is writeback, which may cause file fragmentation is worse. A file with 2 fragmentations is changed as following after defragment: ----------------file info------------------- sensorsdata : -------------------------------------------- dev [254:48] ino [0x 3029 : 12329] mode [0x 81b0 : 33200] nlink [0x 1 : 1] uid [0x 27e6 : 10214] gid [0x 27e6 : 10214] size [0x 242000 : 2367488] blksize [0x 1000 : 4096] blocks [0x 1210 : 4624] -------------------------------------------- file_pos start_blk end_blk blks 0 11361121 11361207 87 356352 11361215 11361216 2 364544 11361218 11361218 1 368640 11361220 11361221 2 376832 11361224 11361225 2 385024 11361227 11361238 12 434176 11361240 11361252 13 487424 11361254 11361254 1 491520 11361271 11361279 9 528384 3681794 3681795 2 536576 3681797 3681797 1 540672 3681799 3681799 1 544768 3681803 3681803 1 548864 3681805 3681805 1 552960 3681807 3681807 1 557056 3681809 3681809 1 Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c4d951ed54ff..6e8a4b332ad5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3483,7 +3483,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (page_private_gcing(fio->page)) { if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && - (fio->sbi->gc_mode != GC_URGENT_HIGH)) + (fio->sbi->gc_mode != GC_URGENT_HIGH) && + !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else return CURSEG_COLD_DATA; From 192b8fb8d1c8ca3c87366ebbef599fa80bb626b8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 4 Jun 2024 15:56:36 +0800 Subject: [PATCH 08/26] f2fs: fix to don't dirty inode for readonly filesystem syzbot reports f2fs bug as below: kernel BUG at fs/f2fs/inode.c:933! RIP: 0010:f2fs_evict_inode+0x1576/0x1590 fs/f2fs/inode.c:933 Call Trace: evict+0x2a4/0x620 fs/inode.c:664 dispose_list fs/inode.c:697 [inline] evict_inodes+0x5f8/0x690 fs/inode.c:747 generic_shutdown_super+0x9d/0x2c0 fs/super.c:675 kill_block_super+0x44/0x90 fs/super.c:1667 kill_f2fs_super+0x303/0x3b0 fs/f2fs/super.c:4894 deactivate_locked_super+0xc1/0x130 fs/super.c:484 cleanup_mnt+0x426/0x4c0 fs/namespace.c:1256 task_work_run+0x24a/0x300 kernel/task_work.c:180 ptrace_notify+0x2cd/0x380 kernel/signal.c:2399 ptrace_report_syscall include/linux/ptrace.h:411 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:473 [inline] syscall_exit_work kernel/entry/common.c:251 [inline] syscall_exit_to_user_mode_prepare kernel/entry/common.c:278 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:283 [inline] syscall_exit_to_user_mode+0x15c/0x280 kernel/entry/common.c:296 do_syscall_64+0x50/0x110 arch/x86/entry/common.c:88 entry_SYSCALL_64_after_hwframe+0x63/0x6b The root cause is: - do_sys_open - f2fs_lookup - __f2fs_find_entry - f2fs_i_depth_write - f2fs_mark_inode_dirty_sync - f2fs_dirty_inode - set_inode_flag(inode, FI_DIRTY_INODE) - umount - kill_f2fs_super - kill_block_super - generic_shutdown_super - sync_filesystem : sb is readonly, skip sync_filesystem() - evict_inodes - iput - f2fs_evict_inode - f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)) : trigger kernel panic When we try to repair i_current_depth in readonly filesystem, let's skip dirty inode to avoid panic in later f2fs_evict_inode(). Cc: stable@vger.kernel.org Reported-by: syzbot+31e4659a3fe953aec2f4@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000e890bc0609a55cff@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 33b2778d5452..ffbcf3de2bdb 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -29,6 +29,9 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (is_inode_flag_set(inode, FI_NEW_INODE)) return; + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return; + if (f2fs_inode_dirtied(inode, sync)) return; From 270b09313b4e16cdfd01d8371023a1661dd1112e Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Mon, 3 Jun 2024 19:35:26 +0800 Subject: [PATCH 09/26] f2fs: use new ioprio Macro to get ckpt thread ioprio level IOPRIO_PRIO_DATA in the new kernel version includes level and hint, So Macro IOPRIO_PRIO_LEVEL is more accurate to get ckpt thread ioprio data/level, and it is also consisten with the way setting ckpt thread ioprio by IOPRIO_PRIO_VALUE(class, data/level). Besides, change variable name from "data" to "level" for more readable. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 09d3ecfaa4f1..fee7ee45ceaa 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -340,13 +340,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) { struct ckpt_req_control *cprc = &sbi->cprc_info; int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio); - int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio); + int level = IOPRIO_PRIO_LEVEL(cprc->ckpt_thread_ioprio); if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE) return -EINVAL; return sysfs_emit(buf, "%s,%d\n", - class == IOPRIO_CLASS_RT ? "rt" : "be", data); + class == IOPRIO_CLASS_RT ? "rt" : "be", level); } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -450,7 +450,7 @@ out: const char *name = strim((char *)buf); struct ckpt_req_control *cprc = &sbi->cprc_info; int class; - long data; + long level; int ret; if (!strncmp(name, "rt,", 3)) @@ -461,13 +461,13 @@ out: return -EINVAL; name += 3; - ret = kstrtol(name, 10, &data); + ret = kstrtol(name, 10, &level); if (ret) return ret; - if (data >= IOPRIO_NR_LEVELS || data < 0) + if (level >= IOPRIO_NR_LEVELS || level < 0) return -EINVAL; - cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data); + cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, level); if (test_opt(sbi, MERGE_CHECKPOINT)) { ret = set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio); From a8eb3de28e7a365690c61161e7a07a4fc7c60bbf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 3 Jun 2024 09:07:45 +0800 Subject: [PATCH 10/26] f2fs: fix return value of f2fs_convert_inline_inode() If device is readonly, make f2fs_convert_inline_inode() return EROFS instead of zero, otherwise it may trigger panic during writeback of inline inode's dirty page as below: f2fs_write_single_data_page+0xbb6/0x1e90 fs/f2fs/data.c:2888 f2fs_write_cache_pages fs/f2fs/data.c:3187 [inline] __f2fs_write_data_pages fs/f2fs/data.c:3342 [inline] f2fs_write_data_pages+0x1efe/0x3a90 fs/f2fs/data.c:3369 do_writepages+0x359/0x870 mm/page-writeback.c:2634 filemap_fdatawrite_wbc+0x125/0x180 mm/filemap.c:397 __filemap_fdatawrite_range mm/filemap.c:430 [inline] file_write_and_wait_range+0x1aa/0x290 mm/filemap.c:788 f2fs_do_sync_file+0x68a/0x1ae0 fs/f2fs/file.c:276 generic_write_sync include/linux/fs.h:2806 [inline] f2fs_file_write_iter+0x7bd/0x24e0 fs/f2fs/file.c:4977 call_write_iter include/linux/fs.h:2114 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xa72/0xc90 fs/read_write.c:590 ksys_write+0x1a0/0x2c0 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Cc: stable@vger.kernel.org Reported-by: syzbot+848062ba19c8782ca5c8@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000d103ce06174d7ec3@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 0203c3baabb6..1fba5728be70 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -221,8 +221,10 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; - if (!f2fs_has_inline_data(inode) || - f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + if (f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) + return -EROFS; + + if (!f2fs_has_inline_data(inode)) return 0; err = f2fs_dquot_initialize(inode); From d7409b05a64f212735f0d33f5f1602051a886eab Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 31 May 2024 10:00:32 +0800 Subject: [PATCH 11/26] f2fs: fix to cover read extent cache access with lock syzbot reports a f2fs bug as below: BUG: KASAN: slab-use-after-free in sanity_check_extent_cache+0x370/0x410 fs/f2fs/extent_cache.c:46 Read of size 4 at addr ffff8880739ab220 by task syz-executor200/5097 CPU: 0 PID: 5097 Comm: syz-executor200 Not tainted 6.9.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 sanity_check_extent_cache+0x370/0x410 fs/f2fs/extent_cache.c:46 do_read_inode fs/f2fs/inode.c:509 [inline] f2fs_iget+0x33e1/0x46e0 fs/f2fs/inode.c:560 f2fs_nfs_get_inode+0x74/0x100 fs/f2fs/super.c:3237 generic_fh_to_dentry+0x9f/0xf0 fs/libfs.c:1413 exportfs_decode_fh_raw+0x152/0x5f0 fs/exportfs/expfs.c:444 exportfs_decode_fh+0x3c/0x80 fs/exportfs/expfs.c:584 do_handle_to_path fs/fhandle.c:155 [inline] handle_to_path fs/fhandle.c:210 [inline] do_handle_open+0x495/0x650 fs/fhandle.c:226 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f We missed to cover sanity_check_extent_cache() w/ extent cache lock, so, below race case may happen, result in use after free issue. - f2fs_iget - do_read_inode - f2fs_init_read_extent_tree : add largest extent entry in to cache - shrink - f2fs_shrink_read_extent_tree - __shrink_extent_tree - __detach_extent_node : drop largest extent entry - sanity_check_extent_cache : access et->largest w/o lock let's refactor sanity_check_extent_cache() to avoid extent cache access and call it before f2fs_init_read_extent_tree() to fix this issue. Reported-by: syzbot+74ebe2104433e9dc610d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/00000000000009beea061740a531@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 50 +++++++++++++++++------------------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 10 ++++----- 3 files changed, 26 insertions(+), 36 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 48048fa36427..fd1fc06359ee 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -19,33 +19,23 @@ #include "node.h" #include -bool sanity_check_extent_cache(struct inode *inode) +bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - struct extent_tree *et = fi->extent_tree[EX_READ]; - struct extent_info *ei; + struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct extent_info ei; - if (!et) + get_read_extent_info(&ei, i_ext); + + if (!ei.len) return true; - ei = &et->largest; - if (!ei->len) - return true; - - /* Let's drop, if checkpoint got corrupted. */ - if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) { - ei->len = 0; - et->largest_updated = true; - return true; - } - - if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || - !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, + if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || + !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, DATA_GENERIC_ENHANCE)) { f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + ei.blk, ei.fofs, ei.len); return false; } return true; @@ -394,24 +384,22 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ - if (i_ext && i_ext->len) { + if (i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); } - goto out; + set_inode_flag(inode, FI_NO_EXTENT); + return; } et = __grab_extent_tree(inode, EX_READ); - if (!i_ext || !i_ext->len) - goto out; - get_read_extent_info(&ei, i_ext); write_lock(&et->lock); - if (atomic_read(&et->node_cnt)) - goto unlock_out; + if (atomic_read(&et->node_cnt) || !ei.len) + goto skip; en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); @@ -423,11 +411,13 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) list_add_tail(&en->list, &eti->extent_list); spin_unlock(&eti->extent_lock); } -unlock_out: +skip: + /* Let's drop, if checkpoint got corrupted. */ + if (f2fs_cp_error(sbi)) { + et->largest.len = 0; + et->largest_updated = true; + } write_unlock(&et->lock); -out: - if (!F2FS_I(inode)->extent_tree[EX_READ]) - set_inode_flag(inode, FI_NO_EXTENT); } void f2fs_init_age_extent_tree(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f463961b497c..86d4f7b19881 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4190,7 +4190,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ -bool sanity_check_extent_cache(struct inode *inode); +bool sanity_check_extent_cache(struct inode *inode, struct page *ipage); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ffbcf3de2bdb..9290b396e11c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -511,16 +511,16 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - /* Need all the flag bits */ - f2fs_init_read_extent_tree(inode, node_page); - f2fs_init_age_extent_tree(inode); - - if (!sanity_check_extent_cache(inode)) { + if (!sanity_check_extent_cache(inode, node_page)) { f2fs_put_page(node_page, 1); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_age_extent_tree(inode); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); From 298b1e4182d657c3e388adcc29477904e9600ed5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 29 May 2024 18:01:03 +0800 Subject: [PATCH 12/26] f2fs: fix to truncate preallocated blocks in f2fs_file_open() chenyuwen reports a f2fs bug as below: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000011 fscrypt_set_bio_crypt_ctx+0x78/0x1e8 f2fs_grab_read_bio+0x78/0x208 f2fs_submit_page_read+0x44/0x154 f2fs_get_read_data_page+0x288/0x5f4 f2fs_get_lock_data_page+0x60/0x190 truncate_partial_data_page+0x108/0x4fc f2fs_do_truncate_blocks+0x344/0x5f0 f2fs_truncate_blocks+0x6c/0x134 f2fs_truncate+0xd8/0x200 f2fs_iget+0x20c/0x5ac do_garbage_collect+0x5d0/0xf6c f2fs_gc+0x22c/0x6a4 f2fs_disable_checkpoint+0xc8/0x310 f2fs_fill_super+0x14bc/0x1764 mount_bdev+0x1b4/0x21c f2fs_mount+0x20/0x30 legacy_get_tree+0x50/0xbc vfs_get_tree+0x5c/0x1b0 do_new_mount+0x298/0x4cc path_mount+0x33c/0x5fc __arm64_sys_mount+0xcc/0x15c invoke_syscall+0x60/0x150 el0_svc_common+0xb8/0xf8 do_el0_svc+0x28/0xa0 el0_svc+0x24/0x84 el0t_64_sync_handler+0x88/0xec It is because inode.i_crypt_info is not initialized during below path: - mount - f2fs_fill_super - f2fs_disable_checkpoint - f2fs_gc - f2fs_iget - f2fs_truncate So, let's relocate truncation of preallocated blocks to f2fs_file_open(), after fscrypt_file_open(). Fixes: d4dd19ec1ea0 ("f2fs: do not expose unwritten blocks to user by DIO") Reported-by: chenyuwen Closes: https://lore.kernel.org/linux-kernel/20240517085327.1188515-1-yuwen.chen@xjmz.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 42 +++++++++++++++++++++++++++++++++++++++++- fs/f2fs/inode.c | 8 -------- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 86d4f7b19881..9688df332147 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -803,6 +803,7 @@ enum { FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ + FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ }; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0cbaeb97c7f5..c50213da474d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -554,6 +554,42 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +static int finish_preallocate_blocks(struct inode *inode) +{ + int ret; + + inode_lock(inode); + if (is_inode_flag_set(inode, FI_OPENED_FILE)) { + inode_unlock(inode); + return 0; + } + + if (!file_should_truncate(inode)) { + set_inode_flag(inode, FI_OPENED_FILE); + inode_unlock(inode); + return 0; + } + + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + filemap_invalidate_lock(inode->i_mapping); + + truncate_setsize(inode, i_size_read(inode)); + ret = f2fs_truncate(inode); + + filemap_invalidate_unlock(inode->i_mapping); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + + if (!ret) + set_inode_flag(inode, FI_OPENED_FILE); + + inode_unlock(inode); + if (ret) + return ret; + + file_dont_truncate(inode); + return 0; +} + static int f2fs_file_open(struct inode *inode, struct file *filp) { int err = fscrypt_file_open(inode, filp); @@ -571,7 +607,11 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) filp->f_mode |= FMODE_NOWAIT; filp->f_mode |= FMODE_CAN_ODIRECT; - return dquot_file_open(inode, filp); + err = dquot_file_open(inode, filp); + if (err) + return err; + + return finish_preallocate_blocks(inode); } void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9290b396e11c..dbfebbddf675 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -613,14 +613,6 @@ make_now: } f2fs_set_inode_flags(inode); - if (file_should_truncate(inode) && - !is_sbi_flag_set(sbi, SBI_POR_DOING)) { - ret = f2fs_truncate(inode); - if (ret) - goto bad_inode; - file_dont_truncate(inode); - } - unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; From 6efc3a05e6132812edf7eee0eb040eb88af34203 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 7 Jun 2024 17:00:30 +0800 Subject: [PATCH 13/26] f2fs: enable atgc dynamically if conditions are met Now atgc can only be enabled when umounted->mounted device even related conditions have reached. If the device has not be umounted->mounted for a long time, atgc can not work. So enable atgc dynamically when atgc_age_threshold is less than elapsed_time and ATGC mount option is on. Signed-off-by: Chao Yu Signed-off-by: Zhiguo Niu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 27 ++++++++++++++++++++++++--- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 55d444bec5c0..7cfe4e01dd7e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1718,6 +1718,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } f2fs_restore_inmem_curseg(sbi); + f2fs_reinit_atgc_curseg(sbi); stat_inc_cp_count(sbi); stop: unblock_operations(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9688df332147..8d385a1c75ad 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3693,6 +3693,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6e8a4b332ad5..362cfb550408 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2931,12 +2931,12 @@ static int get_atssr_segment(struct f2fs_sb_info *sbi, int type, return ret; } -static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) +static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); int ret = 0; - if (!sbi->am.atgc_enabled) + if (!sbi->am.atgc_enabled && !force) return 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); @@ -2953,9 +2953,30 @@ static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; } + int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) { - return __f2fs_init_atgc_curseg(sbi); + return __f2fs_init_atgc_curseg(sbi, false); +} + +int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi) +{ + int ret; + + if (!test_opt(sbi, ATGC)) + return 0; + if (sbi->am.atgc_enabled) + return 0; + if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) < + sbi->am.age_threshold) + return 0; + + ret = __f2fs_init_atgc_curseg(sbi, true); + if (!ret) { + sbi->am.atgc_enabled = true; + f2fs_info(sbi, "reenabled age threshold GC"); + } + return ret; } static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) From 6aeb084fa0b1ada0290df08c5230e41881aef783 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Jun 2024 18:21:56 +0000 Subject: [PATCH 14/26] f2fs: clean up set REQ_RAHEAD given rac Let's set REQ_RAHEAD per rac by single source. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 17 +++++++++++------ fs/f2fs/f2fs.h | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1ef82a546391..990b93689b46 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1100,7 +1100,7 @@ retry: struct bio *bio = NULL; ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, - &last_block_in_bio, false, true); + &last_block_in_bio, NULL, true); f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc, true); if (ret) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b9b0debc6b3d..b6dcb3bcaef7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2067,12 +2067,17 @@ static inline loff_t f2fs_readpage_limit(struct inode *inode) return i_size_read(inode); } +static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac) +{ + return rac ? REQ_RAHEAD : 0; +} + static int f2fs_read_single_page(struct inode *inode, struct folio *folio, unsigned nr_pages, struct f2fs_map_blocks *map, struct bio **bio_ret, sector_t *last_block_in_bio, - bool is_readahead) + struct readahead_control *rac) { struct bio *bio = *bio_ret; const unsigned blocksize = blks_to_bytes(inode, 1); @@ -2148,7 +2153,7 @@ submit_and_realloc: } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, index, + f2fs_ra_op_flags(rac), index, false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); @@ -2178,7 +2183,7 @@ out: #ifdef CONFIG_F2FS_FS_COMPRESSION int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write) + struct readahead_control *rac, bool for_write) { struct dnode_of_data dn; struct inode *inode = cc->inode; @@ -2301,7 +2306,7 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, + f2fs_ra_op_flags(rac), page->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); @@ -2399,7 +2404,7 @@ static int f2fs_mpage_readpages(struct inode *inode, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - rac != NULL, false); + rac, false); f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; @@ -2449,7 +2454,7 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - rac != NULL, false); + rac, false); f2fs_destroy_compress_ctx(&cc, false); } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8d385a1c75ad..f7ee6c5e371e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4298,7 +4298,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, - bool is_readahead, bool for_write); + struct readahead_control *rac, bool for_write); struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); From 1efb7c8fd8bf6b9d6f10dd2a9e0fde9e7a650ab4 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 13 Jun 2024 17:35:33 +0800 Subject: [PATCH 15/26] f2fs: fix to use mnt_{want,drop}_write_file replace file_{start,end}_wrtie mnt_{want,drop}_write_file is more suitable than file_{start,end}_wrtie and also is consistent with other ioctl operations. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c50213da474d..e4a7cff00796 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3935,7 +3935,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) return -EOPNOTSUPP; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || @@ -4061,7 +4063,7 @@ out: f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4115,7 +4117,9 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) option.algorithm >= COMPRESS_MAX) return -EINVAL; - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); @@ -4154,7 +4158,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) out: f2fs_up_write(&F2FS_I(inode)->i_sem); inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4211,7 +4215,9 @@ static int f2fs_ioc_decompress_file(struct file *filp) f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4266,7 +4272,7 @@ static int f2fs_ioc_decompress_file(struct file *filp) f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } @@ -4288,7 +4294,9 @@ static int f2fs_ioc_compress_file(struct file *filp) f2fs_balance_fs(sbi, true); - file_start_write(filp); + ret = mnt_want_write_file(filp); + if (ret) + return ret; inode_lock(inode); if (!f2fs_is_compress_backend_ready(inode)) { @@ -4344,7 +4352,7 @@ static int f2fs_ioc_compress_file(struct file *filp) f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); - file_end_write(filp); + mnt_drop_write_file(filp); return ret; } From 8cb1f4080dd91c6e6b01dbea013a3f42341cb6a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Jun 2024 02:15:38 +0000 Subject: [PATCH 16/26] f2fs: assign CURSEG_ALL_DATA_ATGC if blkaddr is valid mkdir /mnt/test/comp f2fs_io setflags compression /mnt/test/comp dd if=/dev/zero of=/mnt/test/comp/testfile bs=16k count=1 truncate --size 13 /mnt/test/comp/testfile In the above scenario, we can get a BUG_ON. kernel BUG at fs/f2fs/segment.c:3589! Call Trace: do_write_page+0x78/0x390 [f2fs] f2fs_outplace_write_data+0x62/0xb0 [f2fs] f2fs_do_write_data_page+0x275/0x740 [f2fs] f2fs_write_single_data_page+0x1dc/0x8f0 [f2fs] f2fs_write_multi_pages+0x1e5/0xae0 [f2fs] f2fs_write_cache_pages+0xab1/0xc60 [f2fs] f2fs_write_data_pages+0x2d8/0x330 [f2fs] do_writepages+0xcf/0x270 __writeback_single_inode+0x44/0x350 writeback_sb_inodes+0x242/0x530 __writeback_inodes_wb+0x54/0xf0 wb_writeback+0x192/0x310 wb_workfn+0x30d/0x400 The reason is we gave CURSEG_ALL_DATA_ATGC to COMPR_ADDR where the page was set the gcing flag by set_cluster_dirty(). Cc: stable@vger.kernel.org Fixes: 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration") Reviewed-by: Chao Yu Tested-by: Will McVicker Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 362cfb550408..4db1add43e36 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3505,6 +3505,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->sbi->am.atgc_enabled && (fio->io_type == FS_DATA_IO) && (fio->sbi->gc_mode != GC_URGENT_HIGH) && + __is_valid_data_blkaddr(fio->old_blkaddr) && !is_inode_flag_set(inode, FI_OPU_WRITE)) return CURSEG_ALL_DATA_ATGC; else From 388a2a0640e16a8887f0d47dab207f344fbdb913 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Jun 2024 17:55:51 +0800 Subject: [PATCH 17/26] f2fs: remove redundant sanity check in sanity_check_inode() Commit f240d3aaf5a1 ("f2fs: do more sanity check on inode") missed to remove redundant sanity check on flexible_inline_xattr flag, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index dbfebbddf675..7a3e2458b2d9 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -313,10 +313,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (!sanity_check_compress_inode(inode, ri)) return false; } - } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); - return false; } if (!f2fs_sb_has_extra_attr(sbi)) { From 8c409989678e92e4a737e7cd2bb04f3efb81071a Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 8 Jul 2024 20:04:07 +0800 Subject: [PATCH 18/26] f2fs: fix start segno of large section get_ckpt_valid_blocks() checks valid ckpt blocks in current section. It counts all vblocks from the first to the last segment in the large section. However, START_SEGNO() is used to get the first segno in an SIT block. This patch fixes that to get the correct start segno. Fixes: 61461fc921b7 ("f2fs: fix to avoid touching checkpointed data in get_victim()") Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e1c0f418aa11..bfc01a521cb9 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -347,7 +347,8 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { if (use_section && __is_large_section(sbi)) { - unsigned int start_segno = START_SEGNO(segno); + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int blocks = 0; int i; From c82bc1ab2a8a5e73d9728e80c4c2ed87e8921a38 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Thu, 4 Jul 2024 10:01:21 +0900 Subject: [PATCH 19/26] f2fs: fix null reference error when checking end of zone This patch fixes a potentially null pointer being accessed by is_end_zone_blkaddr() that checks the last block of a zone when f2fs is mounted as a single device. Fixes: e067dc3c6b9c ("f2fs: maintain six open zones for zoned devices") Signed-off-by: Daejun Park Reviewed-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b6dcb3bcaef7..1aa7eefa659c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -925,6 +925,7 @@ alloc_new: #ifdef CONFIG_BLK_DEV_ZONED static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) { + struct block_device *bdev = sbi->sb->s_bdev; int devi = 0; if (f2fs_is_multi_device(sbi)) { @@ -935,8 +936,9 @@ static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) return false; } blkaddr -= FDEV(devi).start_blk; + bdev = FDEV(devi).bdev; } - return bdev_is_zoned(FDEV(devi).bdev) && + return bdev_is_zoned(bdev) && f2fs_blkz_is_seq(sbi, devi, blkaddr) && (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); } From 54f43a10fa257ad4af02a1d157fefef6ebcfa7dc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Jun 2024 23:38:17 -0500 Subject: [PATCH 20/26] f2fs: remove unreachable lazytime mount option parsing The lazytime/nolazytime options are now handled in the VFS, and are never seen in filesystem parsers, so remove handling of these options from f2fs. Note: when lazytime support was added in 6d94c74ab85f it made lazytime the default in default_options() - as a result, lazytime cannot be disabled (because Opt_nolazytime is never seen in f2fs parsing). If lazytime is desired to be configurable, and default off is OK, default_options() could be updated to stop setting it by default and allow mount option control. Signed-off-by: Eric Sandeen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4a1bc8f40f9a..28335b5a7af6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -151,8 +151,6 @@ enum { Opt_mode, Opt_fault_injection, Opt_fault_type, - Opt_lazytime, - Opt_nolazytime, Opt_quota, Opt_noquota, Opt_usrquota, @@ -229,8 +227,6 @@ static match_table_t f2fs_tokens = { {Opt_mode, "mode=%s"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_fault_type, "fault_type=%u"}, - {Opt_lazytime, "lazytime"}, - {Opt_nolazytime, "nolazytime"}, {Opt_quota, "quota"}, {Opt_noquota, "noquota"}, {Opt_usrquota, "usrquota"}, @@ -918,12 +914,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) f2fs_info(sbi, "fault_type options not supported"); break; #endif - case Opt_lazytime: - sb->s_flags |= SB_LAZYTIME; - break; - case Opt_nolazytime: - sb->s_flags &= ~SB_LAZYTIME; - break; #ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: From f06c0f82e38bbda7264d6ef3c90045ad2810e0f3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Jun 2024 10:32:39 +0800 Subject: [PATCH 21/26] f2fs: fix to update user block counts in block_operations() Commit 59c9081bc86e ("f2fs: allow write page cache when writting cp") allows write() to write data to page cache during checkpoint, so block count fields like .total_valid_block_count, .alloc_valid_block_count and .rf_node_block_count may encounter race condition as below: CP Thread A - write_checkpoint - block_operations - f2fs_down_write(&sbi->node_change) - __prepare_cp_block : ckpt->valid_block_count = .total_valid_block_count - f2fs_up_write(&sbi->node_change) - write - f2fs_preallocate_blocks - f2fs_map_blocks(,F2FS_GET_BLOCK_PRE_AIO) - f2fs_map_lock - f2fs_down_read(&sbi->node_change) - f2fs_reserve_new_blocks - inc_valid_block_count : percpu_counter_add(&sbi->alloc_valid_block_count, count) : sbi->total_valid_block_count += count - f2fs_up_read(&sbi->node_change) - do_checkpoint : sbi->last_valid_block_count = sbi->total_valid_block_count : percpu_counter_set(&sbi->alloc_valid_block_count, 0) : percpu_counter_set(&sbi->rf_node_block_count, 0) - fsync - need_do_checkpoint - f2fs_space_for_roll_forward : alloc_valid_block_count was reset to zero, so, it may missed last data during checkpoint Let's change to update .total_valid_block_count, .alloc_valid_block_count and .rf_node_block_count in block_operations(), then their access can be protected by .node_change and .cp_rwsem lock, so that it can avoid above race condition. Fixes: 59c9081bc86e ("f2fs: allow write page cache when writting cp") Cc: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7cfe4e01dd7e..bdd96329dddd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1186,6 +1186,11 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); ckpt->next_free_nid = cpu_to_le32(last_nid); + + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); } static bool __need_flush_quota(struct f2fs_sb_info *sbi) @@ -1575,11 +1580,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - percpu_counter_set(&sbi->rf_node_block_count, 0); - /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); /* Wait for all dirty meta pages to be submitted for IO */ From e3a19972a49f61775e5e11ac43a663f28cdfb8b2 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 10 Jul 2024 09:26:59 +0800 Subject: [PATCH 22/26] f2fs: only fragment segment in the same section When new_curseg() is allocating a new segment, if mode=fragment:xxx is switched on in large section scenario, __get_next_segno() will select the next segno randomly in the range of [0, maxsegno] in order to fragment segments. If the candidate segno is free, get_new_segment() will use it directly as the new segment. However, if the section of the candidate is not empty, and some other segments have already been used, and have a different type (e.g NODE) with the candidate (e.g DATA), GC will complain inconsistent segment type later. This could be reproduced by the following steps: dd if=/dev/zero of=test.img bs=1M count=10240 mkfs.f2fs -s 128 test.img mount -t f2fs test.img /mnt -o mode=fragment:block echo 1 > /sys/fs/f2fs/loop0/max_fragment_chunk echo 512 > /sys/fs/f2fs/loop0/max_fragment_hole dd if=/dev/zero of=/mnt/testfile bs=4K count=100 umount /mnt F2FS-fs (loop0): Inconsistent segment (4377) type [0, 1] in SSA and SIT In order to allow simulating segment fragmentation in large section scenario, this patch reduces the candidate range: * if curseg is the last segment in the section, return curseg->segno to make get_new_segment() itself find the next free segment. * if curseg is in the middle of the section, select candicate randomly in the range of [curseg + 1, last_seg_in_the_same_section] to keep type consistent. Reviewed-by: Chao Yu Signed-off-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4db1add43e36..64b11d88a21c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2784,11 +2784,19 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); - if (f2fs_need_rand_seg(sbi)) - return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); + if (__is_large_section(sbi)) { + if (f2fs_need_rand_seg(sbi)) { + unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno); - if (__is_large_section(sbi)) + if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint) + return curseg->segno; + return get_random_u32_inclusive(curseg->segno + 1, + GET_SEG_FROM_SEC(sbi, hint + 1) - 1); + } return curseg->segno; + } else if (f2fs_need_rand_seg(sbi)) { + return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); + } /* inmem log may not locate on any segment after mount */ if (!curseg->inited) From b40a2b00370931b0c50148681dd7364573e52e6b Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 10 Jul 2024 20:51:17 +0900 Subject: [PATCH 23/26] f2fs: use meta inode for GC of atomic file The page cache of the atomic file keeps new data pages which will be stored in the COW file. It can also keep old data pages when GCing the atomic file. In this case, new data can be overwritten by old data if a GC thread sets the old data page as dirty after new data page was evicted. Also, since all writes to the atomic file are redirected to COW inodes, GC for the atomic file is not working well as below. f2fs_gc(gc_type=FG_GC) - select A as a victim segment do_garbage_collect - iget atomic file's inode for block B move_data_page f2fs_do_write_data_page - use dn of cow inode - set fio->old_blkaddr from cow inode - seg_freed is 0 since block B is still valid - goto gc_more and A is selected as victim again To solve the problem, let's separate GC writes and updates in the atomic file by using the meta inode for GC writes. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/gc.c | 6 +++--- fs/f2fs/segment.c | 6 +++--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1aa7eefa659c..1e4937af50f3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2695,7 +2695,7 @@ got_it: } /* wait for GCed page writeback via META_MAPPING */ - if (fio->post_read) + if (fio->meta_gc) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); /* @@ -2790,7 +2790,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .submitted = 0, .compr_blocks = compr_blocks, .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, - .post_read = f2fs_post_read_required(inode) ? 1 : 0, + .meta_gc = f2fs_meta_inode_gc_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, .bio = bio, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f7ee6c5e371e..796ae11c0fa3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1211,7 +1211,7 @@ struct f2fs_io_info { unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ unsigned int encrypted:1; /* indicate file is encrypted */ - unsigned int post_read:1; /* require post read */ + unsigned int meta_gc:1; /* require meta inode GC */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -4263,6 +4263,11 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_meta_inode_gc_required(struct inode *inode) +{ + return f2fs_post_read_required(inode) || f2fs_is_atomic_file(inode); +} + /* * compress.c */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ef667fec9a12..cb3006551ab5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1589,7 +1589,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) { + if (f2fs_meta_inode_gc_required(inode)) { int err = ra_data_block(inode, start_bidx); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1640,7 +1640,7 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_post_read_required(inode)) + if (f2fs_meta_inode_gc_required(inode)) err = move_data_block(inode, start_bidx, gc_type, segno, off); else @@ -1648,7 +1648,7 @@ next_step: segno, off); if (!err && (gc_type == FG_GC || - f2fs_post_read_required(inode))) + f2fs_meta_inode_gc_required(inode))) submitted++; if (locked) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 64b11d88a21c..78c3198a6308 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3859,7 +3859,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } - if (fio->post_read) + if (fio->meta_gc) f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); @@ -4029,7 +4029,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; if (!__is_valid_data_blkaddr(blkaddr)) @@ -4048,7 +4048,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; - if (!f2fs_post_read_required(inode)) + if (!f2fs_meta_inode_gc_required(inode)) return; for (i = 0; i < len; i++) From f18d0076933689775fe7faeeb10ee93ff01be6ab Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 10 Jul 2024 20:51:43 +0900 Subject: [PATCH 24/26] f2fs: use meta inode for GC of COW file In case of the COW file, new updates and GC writes are already separated to page caches of the atomic file and COW file. As some cases that use the meta inode for GC, there are some race issues between a foreground thread and GC thread. To handle them, we need to take care when to invalidate and wait writeback of GC pages in COW files as the case of using the meta inode. Also, a pointer from the COW inode to the original inode is required to check the state of original pages. For the former, we can solve the problem by using the meta inode for GC of COW files. Then let's get a page from the original inode in move_data_block when GCing the COW file to avoid race condition. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 13 +++++++++++-- fs/f2fs/file.c | 3 +++ fs/f2fs/gc.c | 7 +++++-- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 3 ++- 6 files changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1e4937af50f3..6457e5bca9c9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2608,7 +2608,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_NOQUOTA(inode)) return true; - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return true; /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ if (f2fs_compressed_file(inode) && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 796ae11c0fa3..4a8621e4a33a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -843,7 +843,11 @@ struct f2fs_inode_info { struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ - struct inode *cow_inode; /* copy-on-write inode for atomic write */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -4263,9 +4267,14 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_used_in_atomic_write(struct inode *inode) +{ + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); +} + static inline bool f2fs_meta_inode_gc_required(struct inode *inode) { - return f2fs_post_read_required(inode) || f2fs_is_atomic_file(inode); + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); } /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e4a7cff00796..547e7ec32b1f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2183,6 +2183,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cb3006551ab5..724bbcb447d3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1171,7 +1171,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int ra_data_block(struct inode *inode, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; struct page *page; struct f2fs_io_info fio = { @@ -1260,6 +1261,8 @@ put_page: static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .ino = inode->i_ino, @@ -1282,7 +1285,7 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); + page = f2fs_grab_cache_page(mapping, bidx, false); if (!page) return -ENOMEM; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1fba5728be70..cca7d448e55c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,7 +16,7 @@ static bool support_inline_data(struct inode *inode) { - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return false; if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7a3e2458b2d9..18dea43e694b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -804,8 +804,9 @@ void f2fs_evict_inode(struct inode *inode) f2fs_abort_atomic_write(inode, true); - if (fi->cow_inode) { + if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { clear_inode_flag(fi->cow_inode, FI_COW_FILE); + F2FS_I(fi->cow_inode)->atomic_inode = NULL; iput(fi->cow_inode); fi->cow_inode = NULL; } From 7309871c03be22bee843f6961dcce183f04d48a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Jun 2024 11:16:02 +0800 Subject: [PATCH 25/26] f2fs: clean up F2FS_I() Use temporary variable instead of F2FS_I() for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 32 ++++++++++------------- fs/f2fs/file.c | 64 ++++++++++++++++++++++++---------------------- fs/f2fs/inode.c | 54 +++++++++++++++++--------------------- fs/f2fs/namei.c | 20 ++++++++------- fs/f2fs/recovery.c | 11 ++++---- 5 files changed, 88 insertions(+), 93 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4a8621e4a33a..5e414c8ed334 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4403,22 +4403,18 @@ static inline int set_compress_context(struct inode *inode) { #ifdef CONFIG_F2FS_FS_COMPRESSION struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); - F2FS_I(inode)->i_compress_algorithm = - F2FS_OPTION(sbi).compress_algorithm; - F2FS_I(inode)->i_log_cluster_size = - F2FS_OPTION(sbi).compress_log_size; - F2FS_I(inode)->i_compress_flag = - F2FS_OPTION(sbi).compress_chksum ? - BIT(COMPRESS_CHKSUM) : 0; - F2FS_I(inode)->i_cluster_size = - BIT(F2FS_I(inode)->i_log_cluster_size); - if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 || - F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) && + fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm; + fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ? + BIT(COMPRESS_CHKSUM) : 0; + fi->i_cluster_size = BIT(fi->i_log_cluster_size); + if ((fi->i_compress_algorithm == COMPRESS_LZ4 || + fi->i_compress_algorithm == COMPRESS_ZSTD) && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = - F2FS_OPTION(sbi).compress_level; - F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_flags |= F2FS_COMPR_FL; set_inode_flag(inode, FI_COMPRESSED_FILE); stat_inc_compr_inode(inode); inc_compr_inode_stat(inode); @@ -4433,15 +4429,15 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&fi->i_sem); if (!f2fs_compressed_file(inode)) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return true; } if (f2fs_is_mmap_file(inode) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return false; } @@ -4450,7 +4446,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return true; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 547e7ec32b1f..effbaa606a4e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -981,6 +981,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); + struct f2fs_inode_info *fi = F2FS_I(inode); int err; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) @@ -999,7 +1000,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EOPNOTSUPP; if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && !IS_ALIGNED(attr->ia_size, - F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size))) + F2FS_BLK_TO_BYTES(fi->i_cluster_size))) return -EINVAL; } @@ -1053,7 +1054,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); truncate_setsize(inode, attr->ia_size); @@ -1065,14 +1066,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, * larger than i_size. */ filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; - spin_lock(&F2FS_I(inode)->i_size_lock); + spin_lock(&fi->i_size_lock); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - F2FS_I(inode)->last_disk_size = i_size_read(inode); - spin_unlock(&F2FS_I(inode)->i_size_lock); + fi->last_disk_size = i_size_read(inode); + spin_unlock(&fi->i_size_lock); } __setattr_copy(idmap, inode, attr); @@ -1082,7 +1083,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (is_inode_flag_set(inode, FI_ACL_MODE)) { if (!err) - inode->i_mode = F2FS_I(inode)->i_acl_mode; + inode->i_mode = fi->i_acl_mode; clear_inode_flag(inode, FI_ACL_MODE); } } @@ -1990,15 +1991,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (err) return err; - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&fi->i_sem); if (!f2fs_may_compress(inode) || (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); return -EINVAL; } err = set_compress_context(inode); - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); if (err) return err; @@ -3588,6 +3589,7 @@ next: static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int released_blocks = 0; @@ -3625,7 +3627,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + if (!atomic_read(&fi->i_compr_blocks)) { ret = -EPERM; goto out; } @@ -3634,7 +3636,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3660,7 +3662,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = release_compress_blocks(&dn, count); @@ -3676,7 +3678,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); out: if (released_blocks) f2fs_update_time(sbi, REQ_TIME); @@ -3687,14 +3689,14 @@ out: if (ret >= 0) { ret = put_user(released_blocks, (u64 __user *)arg); } else if (released_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " "iblocks=%llu, released=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, released_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -3783,6 +3785,7 @@ next: static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; unsigned int reserved_blocks = 0; @@ -3808,10 +3811,10 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + if (atomic_read(&fi->i_compr_blocks)) goto unlock_inode; - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3837,7 +3840,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) end_offset = ADDRS_PER_PAGE(dn.node_page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); - count = round_up(count, F2FS_I(inode)->i_cluster_size); + count = round_up(count, fi->i_cluster_size); ret = reserve_compress_blocks(&dn, count, &reserved_blocks); @@ -3852,7 +3855,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } filemap_invalidate_unlock(inode->i_mapping); - f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); @@ -3868,14 +3871,14 @@ unlock_inode: if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && - atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + atomic_read(&fi->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " "iblocks=%llu, reserved=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, reserved_blocks, - atomic_read(&F2FS_I(inode)->i_compr_blocks)); + atomic_read(&fi->i_compr_blocks)); } return ret; @@ -4101,6 +4104,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_comp_option option; int ret = 0; @@ -4141,25 +4145,25 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) goto out; } - F2FS_I(inode)->i_compress_algorithm = option.algorithm; - F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; - F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); + fi->i_compress_algorithm = option.algorithm; + fi->i_log_cluster_size = option.log_cluster_size; + fi->i_cluster_size = BIT(option.log_cluster_size); /* Set default level */ - if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) - F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + if (fi->i_compress_algorithm == COMPRESS_ZSTD) + fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; else - F2FS_I(inode)->i_compress_level = 0; + fi->i_compress_level = 0; /* Adjust mount option level */ if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && F2FS_OPTION(sbi).compress_level) - F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level; + fi->i_compress_level = F2FS_OPTION(sbi).compress_level; f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) f2fs_warn(sbi, "compression algorithm is successfully set, " "but current kernel doesn't support this algorithm."); out: - f2fs_up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&fi->i_sem); inode_unlock(inode); mnt_drop_write_file(filp); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 18dea43e694b..aef57172014f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -636,8 +636,9 @@ retry: void f2fs_update_inode(struct inode *inode, struct page *node_page) { + struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_tree *et = fi->extent_tree[EX_READ]; f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); @@ -647,7 +648,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri = F2FS_INODE(node_page); ri->i_mode = cpu_to_le16(inode->i_mode); - ri->i_advise = F2FS_I(inode)->i_advise; + ri->i_advise = fi->i_advise; ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); @@ -673,58 +674,49 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ri->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); if (S_ISDIR(inode->i_mode)) - ri->i_current_depth = - cpu_to_le32(F2FS_I(inode)->i_current_depth); + ri->i_current_depth = cpu_to_le32(fi->i_current_depth); else if (S_ISREG(inode->i_mode)) - ri->i_gc_failures = cpu_to_le16(F2FS_I(inode)->i_gc_failures); - ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); - ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); - ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); + ri->i_gc_failures = cpu_to_le16(fi->i_gc_failures); + ri->i_xattr_nid = cpu_to_le32(fi->i_xattr_nid); + ri->i_flags = cpu_to_le32(fi->i_flags); + ri->i_pino = cpu_to_le32(fi->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); - ri->i_dir_level = F2FS_I(inode)->i_dir_level; + ri->i_dir_level = fi->i_dir_level; if (f2fs_has_extra_attr(inode)) { - ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + ri->i_extra_isize = cpu_to_le16(fi->i_extra_isize); if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode))) ri->i_inline_xattr_size = - cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + cpu_to_le16(fi->i_inline_xattr_size); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_projid)) { + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) { projid_t i_projid; - i_projid = from_kprojid(&init_user_ns, - F2FS_I(inode)->i_projid); + i_projid = from_kprojid(&init_user_ns, fi->i_projid); ri->i_projid = cpu_to_le32(i_projid); } if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_crtime)) { - ri->i_crtime = - cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec); - ri->i_crtime_nsec = - cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + ri->i_crtime = cpu_to_le64(fi->i_crtime.tv_sec); + ri->i_crtime_nsec = cpu_to_le32(fi->i_crtime.tv_nsec); } if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && - F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_compress_flag)) { unsigned short compress_flag; - ri->i_compr_blocks = - cpu_to_le64(atomic_read( - &F2FS_I(inode)->i_compr_blocks)); - ri->i_compress_algorithm = - F2FS_I(inode)->i_compress_algorithm; - compress_flag = F2FS_I(inode)->i_compress_flag | - F2FS_I(inode)->i_compress_level << + ri->i_compr_blocks = cpu_to_le64( + atomic_read(&fi->i_compr_blocks)); + ri->i_compress_algorithm = fi->i_compress_algorithm; + compress_flag = fi->i_compress_flag | + fi->i_compress_level << COMPRESS_LEVEL_OFFSET; ri->i_compress_flag = cpu_to_le16(compress_flag); - ri->i_log_cluster_size = - F2FS_I(inode)->i_log_cluster_size; + ri->i_log_cluster_size = fi->i_log_cluster_size; } } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e54f8c08bda8..3d9d2b710796 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -221,6 +221,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, const char *name) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct f2fs_inode_info *fi; nid_t ino; struct inode *inode; bool nid_free = false; @@ -241,14 +242,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, inode_init_owner(idmap, inode, dir, mode); + fi = F2FS_I(inode); inode->i_ino = ino; inode->i_blocks = 0; simple_inode_init_ts(inode); - F2FS_I(inode)->i_crtime = inode_get_mtime(inode); + fi->i_crtime = inode_get_mtime(inode); inode->i_generation = get_random_u32(); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_current_depth = 1; + fi->i_current_depth = 1; err = insert_inode_locked(inode); if (err) { @@ -258,9 +260,9 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_project_quota(sbi) && (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) - F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + fi->i_projid = F2FS_I(dir)->i_projid; else - F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns, + fi->i_projid = make_kprojid(&init_user_ns, F2FS_DEF_PROJID); err = fscrypt_prepare_new_inode(dir, inode, &encrypt); @@ -278,7 +280,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, if (f2fs_sb_has_extra_attr(sbi)) { set_inode_flag(inode, FI_EXTRA_ATTR); - F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; } if (test_opt(sbi, INLINE_XATTR)) @@ -296,15 +298,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, f2fs_has_inline_dentry(inode)) { xattr_size = DEFAULT_INLINE_XATTR_ADDRS; } - F2FS_I(inode)->i_inline_xattr_size = xattr_size; + fi->i_inline_xattr_size = xattr_size; - F2FS_I(inode)->i_flags = + fi->i_flags = f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; + fi->i_flags |= F2FS_INDEX_FL; - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) + if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); /* Check compression first. */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 496aee53c38a..c74ffdeedc4c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -287,6 +287,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); + struct f2fs_inode_info *fi = F2FS_I(inode); char *name; int err; @@ -309,12 +310,12 @@ static int recover_inode(struct inode *inode, struct page *page) i_projid = (projid_t)le32_to_cpu(raw->i_projid); kprojid = make_kprojid(&init_user_ns, i_projid); - if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) { + if (!projid_eq(kprojid, fi->i_projid)) { err = f2fs_transfer_project_quota(inode, kprojid); if (err) return err; - F2FS_I(inode)->i_projid = kprojid; + fi->i_projid = kprojid; } } } @@ -327,10 +328,10 @@ static int recover_inode(struct inode *inode, struct page *page) inode_set_mtime(inode, le64_to_cpu(raw->i_mtime), le32_to_cpu(raw->i_mtime_nsec)); - F2FS_I(inode)->i_advise = raw->i_advise; - F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + fi->i_advise = raw->i_advise; + fi->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures = le16_to_cpu(raw->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); From bed6b0317441d82c32506750ccd868d83850e6f4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Jun 2024 11:16:03 +0800 Subject: [PATCH 26/26] f2fs: clean up addrs_per_{inode,block}() Introduce a new help addrs_per_page() to wrap common code from addrs_per_inode() and addrs_per_block() for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 20 +++++++------------- include/linux/f2fs_fs.h | 7 +++---- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5e414c8ed334..3948c181997f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3227,21 +3227,15 @@ static inline bool f2fs_need_compress_data(struct inode *inode) return false; } -static inline unsigned int addrs_per_inode(struct inode *inode) +static inline unsigned int addrs_per_page(struct inode *inode, + bool is_inode) { - unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - - get_inline_xattr_addrs(inode); + unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) - + get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK; - if (!f2fs_compressed_file(inode)) - return addrs; - return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); -} - -static inline unsigned int addrs_per_block(struct inode *inode) -{ - if (!f2fs_compressed_file(inode)) - return DEF_ADDRS_PER_BLOCK; - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); + if (f2fs_compressed_file(inode)) + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); + return addrs; } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 41d1d71c36ff..01bee2b289c2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -259,15 +259,14 @@ struct node_footer { #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ -#define ADDRS_PER_INODE(inode) addrs_per_inode(inode) +#define ADDRS_PER_INODE(inode) addrs_per_page(inode, true) /* Address Pointers in a Direct Block */ #define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) +#define ADDRS_PER_BLOCK(inode) addrs_per_page(inode, false) /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) \ - (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) +#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)