From 579c7e41507e85dc3eedf998a3dca14a2a1526ad Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Aug 2023 12:15:34 -0700 Subject: [PATCH 01/25] Revert "f2fs: clean up w/ sbi->log_sectors_per_block" This reverts commit bfd476623999118d9c509cb0fa9380f2912bc225. Shinichiro Kawasaki reported: When I ran workloads on f2fs using v6.5-rcX with fixes [1][2] and a zoned block devices with 4kb logical block size, I observe mount failure as follows. When I revert this commit, the failure goes away. [ 167.781975][ T1555] F2FS-fs (dm-0): IO Block Size: 4 KB [ 167.890728][ T1555] F2FS-fs (dm-0): Found nat_bits in checkpoint [ 171.482588][ T1555] F2FS-fs (dm-0): Zone without valid block has non-zero write pointer. Reset the write pointer: wp[0x1300,0x8] [ 171.496000][ T1555] F2FS-fs (dm-0): (0) : Unaligned zone reset attempted (block 280000 + 80000) [ 171.505037][ T1555] F2FS-fs (dm-0): Discard zone failed: (errno=-5) The patch replaced "sbi->log_blocksize - SECTOR_SHIFT" with "sbi->log_sectors_per_block". However, I think these two are not equal when the device has 4k logical block size. The former uses Linux kernel sector size 512 byte. The latter use 512b sector size or 4kb sector size depending on the device. mkfs.f2fs obtains logical block size via BLKSSZGET ioctl from the device and reflects it to the value sbi->log_sector_size_per_block. This causes unexpected write pointer calculations in check_zone_write_pointer(). This resulted in unexpected zone reset and the mount failure. [1] https://lkml.kernel.org/linux-f2fs-devel/20230711050101.GA19128@lst.de/ [2] https://lore.kernel.org/linux-f2fs-devel/20230804091556.2372567-1-shinichiro.kawasaki@wdc.com/ Cc: stable@vger.kernel.org Reported-by: Shinichiro Kawasaki Fixes: bfd476623999 ("f2fs: clean up w/ sbi->log_sectors_per_block") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0457d620011f..cbb4bd95ea19 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4846,17 +4846,17 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, { unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; block_t zone_block, wp_block, last_valid_block; + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; int i, s, b, ret; struct seg_entry *se; if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block); + wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - zone_block = fdev->start_blk + (zone->start >> - sbi->log_sectors_per_block); + zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); zone_segno = GET_SEGNO(sbi, zone_block); zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); @@ -4906,7 +4906,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, "pointer. Reset the write pointer: wp[0x%x,0x%x]", wp_segno, wp_blkoff); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, - zone->len >> sbi->log_sectors_per_block); + zone->len >> log_sectors_per_block); if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); @@ -4967,6 +4967,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) struct blk_zone zone; unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; block_t cs_zone_block, wp_block; + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; sector_t zone_sector; int err; @@ -4978,8 +4979,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; /* report zone for the sector the curseg points to */ - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << - sbi->log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) + << log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -4991,10 +4992,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block); + wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0); + wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && wp_sector_off == 0) @@ -5021,8 +5022,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (!zbd) return 0; - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << - sbi->log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) + << log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -5040,7 +5041,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) "Reset the zone: curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block, - zone.len >> sbi->log_sectors_per_block); + zone.len >> log_sectors_per_block); if (err) { f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", zbd->path, err); From 863907a4f53ad567db0767391247d5d0ca398dea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 10 Jul 2023 14:10:58 +0800 Subject: [PATCH 02/25] f2fs: don't handle error case of f2fs_compress_alloc_page() f2fs_compress_alloc_page() uses mempool to allocate memory, it never fail, don't handle error case in its callers. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 236d890f560b..9662d635efbe 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -649,13 +649,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) goto destroy_compress_ctx; } - for (i = 0; i < cc->nr_cpages; i++) { + for (i = 0; i < cc->nr_cpages; i++) cc->cpages[i] = f2fs_compress_alloc_page(); - if (!cc->cpages[i]) { - ret = -ENOMEM; - goto out_free_cpages; - } - } cc->rbuf = f2fs_vmap(cc->rpages, cc->cluster_size); if (!cc->rbuf) { @@ -1574,8 +1569,6 @@ static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, } dic->tpages[i] = f2fs_compress_alloc_page(); - if (!dic->tpages[i]) - return -ENOMEM; } dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size); @@ -1656,11 +1649,6 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) struct page *page; page = f2fs_compress_alloc_page(); - if (!page) { - ret = -ENOMEM; - goto out_free; - } - f2fs_set_compressed_page(page, cc->inode, start_idx + i + 1, dic); dic->cpages[i] = page; From a3ab55746612247ce3dcaac6de66f5ffc055b9df Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jul 2023 07:03:13 -0700 Subject: [PATCH 03/25] f2fs: flush inode if atomic file is aborted Let's flush the inode being aborted atomic operation to avoid stale dirty inode during eviction in this call stack: f2fs_mark_inode_dirty_sync+0x22/0x40 [f2fs] f2fs_abort_atomic_write+0xc4/0xf0 [f2fs] f2fs_evict_inode+0x3f/0x690 [f2fs] ? sugov_start+0x140/0x140 evict+0xc3/0x1c0 evict_inodes+0x17b/0x210 generic_shutdown_super+0x32/0x120 kill_block_super+0x21/0x50 deactivate_locked_super+0x31/0x90 cleanup_mnt+0x100/0x160 task_work_run+0x59/0x90 do_exit+0x33b/0xa50 do_group_exit+0x2d/0x80 __x64_sys_exit_group+0x14/0x20 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd This triggers f2fs_bug_on() in f2fs_evict_inode: f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); This fixes the syzbot report: loop0: detected capacity change from 0 to 131072 F2FS-fs (loop0): invalid crc value F2FS-fs (loop0): Found nat_bits in checkpoint F2FS-fs (loop0): Mounted with checkpoint version = 48b305e4 ------------[ cut here ]------------ kernel BUG at fs/f2fs/inode.c:869! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 5014 Comm: syz-executor220 Not tainted 6.4.0-syzkaller-11479-g6cd06ab12d1a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869 Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007 RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000 R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0 Call Trace: evict+0x2ed/0x6b0 fs/inode.c:665 dispose_list+0x117/0x1e0 fs/inode.c:698 evict_inodes+0x345/0x440 fs/inode.c:748 generic_shutdown_super+0xaf/0x480 fs/super.c:478 kill_block_super+0x64/0xb0 fs/super.c:1417 kill_f2fs_super+0x2af/0x3c0 fs/f2fs/super.c:4704 deactivate_locked_super+0x98/0x160 fs/super.c:330 deactivate_super+0xb1/0xd0 fs/super.c:361 cleanup_mnt+0x2ae/0x3d0 fs/namespace.c:1254 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa9a/0x29a0 kernel/exit.c:874 do_group_exit+0xd4/0x2a0 kernel/exit.c:1024 __do_sys_exit_group kernel/exit.c:1035 [inline] __se_sys_exit_group kernel/exit.c:1033 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1033 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f309be71a09 Code: Unable to access opcode bytes at 0x7f309be719df. RSP: 002b:00007fff171df518 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 00007f309bef7330 RCX: 00007f309be71a09 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001 RBP: 0000000000000001 R08: ffffffffffffffc0 R09: 00007f309bef1e40 R10: 0000000000010600 R11: 0000000000000246 R12: 00007f309bef7330 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869 Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007 RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000 R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0 Cc: Reported-and-tested-by: syzbot+e1246909d526a9d470fa@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cbb4bd95ea19..b254aaac3031 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -205,6 +205,8 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } + /* avoid stale dirty inode during eviction */ + sync_inode_metadata(inode, 0); } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, From d2d9bb3b6d2fbccb5b33d3a85a2830971625a4ea Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 19 Jan 2023 10:47:00 -0800 Subject: [PATCH 04/25] f2fs: get out of a repeat loop when getting a locked data page https://bugzilla.kernel.org/show_bug.cgi?id=216050 Somehow we're getting a page which has a different mapping. Let's avoid the infinite loop. Cc: Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5882afe71d82..ecebc3a139be 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1389,18 +1389,14 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, { struct address_space *mapping = inode->i_mapping; struct page *page; -repeat: + page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); if (IS_ERR(page)) return page; /* wait for read completion */ lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; - } - if (unlikely(!PageUptodate(page))) { + if (unlikely(page->mapping != mapping || !PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } From c709d099a0d2befa2b16c249ef8df722b43e6c28 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Jul 2023 22:23:24 -0700 Subject: [PATCH 05/25] f2fs: fix spelling in ABI documentation Correct spelling problems as identified by codespell. Fixes: 9e615dbba41e ("f2fs: add missing description for ipu_policy node") Fixes: b2e4a2b300e5 ("f2fs: expose discard related parameters in sysfs") Fixes: 846ae671ad36 ("f2fs: expose extension_list sysfs entry") Signed-off-by: Randy Dunlap Cc: Jaegeuk Kim Cc: Chao Yu Cc: linux-f2fs-devel@lists.sourceforge.net Cc: Yangtao Li Cc: Konstantin Vyshetsky Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 8140fc98f5ae..ad3d76d37c8b 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -54,9 +54,9 @@ Description: Controls the in-place-update policy. 0x00 DISABLE disable IPU(=default option in LFS mode) 0x01 FORCE all the time 0x02 SSR if SSR mode is activated - 0x04 UTIL if FS utilization is over threashold + 0x04 UTIL if FS utilization is over threshold 0x08 SSR_UTIL if SSR mode is activated and FS utilization is over - threashold + threshold 0x10 FSYNC activated in fsync path only for high performance flash storages. IPU will be triggered only if the # of dirty pages over min_fsync_blocks. @@ -117,7 +117,7 @@ Date: December 2021 Contact: "Konstantin Vyshetsky" Description: Controls the number of discards a thread will issue at a time. Higher number will allow the discard thread to finish its work - faster, at the cost of higher latency for incomming I/O. + faster, at the cost of higher latency for incoming I/O. What: /sys/fs/f2fs//min_discard_issue_time Date: December 2021 @@ -334,7 +334,7 @@ Description: This indicates how many GC can be failed for the pinned state. 2048 trials is set by default. What: /sys/fs/f2fs//extension_list -Date: Feburary 2018 +Date: February 2018 Contact: "Chao Yu" Description: Used to control configure extension list: - Query: cat /sys/fs/f2fs//extension_list From b5ab3276eb69cacf44ecfb11b2bfab73096ff4e4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Jul 2023 10:06:14 +0800 Subject: [PATCH 06/25] f2fs: fix to avoid mmap vs set_compress_option case Compression option in inode should not be changed after they have been used, however, it may happen in below race case: Thread A Thread B - f2fs_ioc_set_compress_option - check f2fs_is_mmap_file() - check get_dirty_pages() - check F2FS_HAS_BLOCKS() - f2fs_file_mmap - set_inode_flag(FI_MMAP_FILE) - fault - do_page_mkwrite - f2fs_vm_page_mkwrite - f2fs_get_block_locked - fault_dirty_shared_page - set_page_dirty - update i_compress_algorithm - update i_log_cluster_size - update i_cluster_size Avoid such race condition by covering f2fs_file_mmap() w/ i_sem lock, meanwhile add mmap file check condition in f2fs_may_compress() as well. Fixes: e1e8debec656 ("f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7cb2177b252..d372bedb0fe4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4483,7 +4483,8 @@ static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || - f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode)) + f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode) || + f2fs_is_mmap_file(inode)) return false; return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 093039dee992..d9073afe021f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -526,7 +526,11 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; + + f2fs_down_read(&F2FS_I(inode)->i_sem); set_inode_flag(inode, FI_MMAP_FILE); + f2fs_up_read(&F2FS_I(inode)->i_sem); + return 0; } @@ -1919,12 +1923,19 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) int err = f2fs_convert_inline_inode(inode); if (err) return err; - if (!f2fs_may_compress(inode)) + + f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_may_compress(inode) || + (S_ISREG(inode->i_mode) && + F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return -EINVAL; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) - return -EINVAL; - if (set_compress_context(inode)) - return -EOPNOTSUPP; + } + err = set_compress_context(inode); + f2fs_up_write(&F2FS_I(inode)->i_sem); + + if (err) + return err; } } @@ -3976,6 +3987,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) file_start_write(filp); inode_lock(inode); + f2fs_down_write(&F2FS_I(inode)->i_sem); if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; @@ -3995,6 +4007,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) f2fs_warn(sbi, "compression algorithm is successfully set, " "but current kernel doesn't support this algorithm."); out: + f2fs_up_write(&F2FS_I(inode)->i_sem); inode_unlock(inode); file_end_write(filp); From 51bf8d3c81992ae57beeaf22df78ed7c2782af9d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Jul 2023 10:31:49 +0200 Subject: [PATCH 07/25] f2fs: don't reopen the main block device in f2fs_scan_devices f2fs_scan_devices reopens the main device since the very beginning, which has always been useless, and also means that we don't pass the right holder for the reopen, which now leads to a warning as the core super.c holder ops aren't passed in for the reopen. Fixes: 3c62be17d4f5 ("f2fs: support multiple devices") Fixes: 0718afd47f70 ("block: introduce holder ops") Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ca31163da00a..30883beb750a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1561,7 +1561,8 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) int i; for (i = 0; i < sbi->s_ndevs; i++) { - blkdev_put(FDEV(i).bdev, sbi->sb->s_type); + if (i > 0) + blkdev_put(FDEV(i).bdev, sbi->sb->s_type); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif @@ -4190,16 +4191,12 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->aligned_blksize = true; for (i = 0; i < max_devices; i++) { - - if (i > 0 && !RDEV(i).path[0]) + if (i == 0) + FDEV(0).bdev = sbi->sb->s_bdev; + else if (!RDEV(i).path[0]) break; - if (max_devices == 1) { - /* Single zoned block device mount */ - FDEV(0).bdev = - blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode, - sbi->sb->s_type, NULL); - } else { + if (max_devices > 1) { /* Multi-device mount */ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); FDEV(i).total_segments = @@ -4215,10 +4212,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) FDEV(i).end_blk = FDEV(i).start_blk + (FDEV(i).total_segments << sbi->log_blocks_per_seg) - 1; + FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, + mode, sbi->sb->s_type, NULL); } - FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode, - sbi->sb->s_type, - NULL); } if (IS_ERR(FDEV(i).bdev)) return PTR_ERR(FDEV(i).bdev); From 3a2c0e55f9bdeda9c3807d6ac23d62f027f6caa9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Jul 2023 04:08:05 +0800 Subject: [PATCH 08/25] f2fs: allow f2fs_ioc_{,de}compress_file to be interrupted This patch allows f2fs_ioc_{,de}compress_file() to be interrupted, so that, userspace won't be blocked when manual {,de}compression on large file is interrupted by signal. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d9073afe021f..79cb6a41f128 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4107,6 +4107,12 @@ static int f2fs_ioc_decompress_file(struct file *filp) count -= len; page_idx += len; + + cond_resched(); + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } } if (!ret) @@ -4181,6 +4187,12 @@ static int f2fs_ioc_compress_file(struct file *filp) count -= len; page_idx += len; + + cond_resched(); + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } } if (!ret) From 025b3602b5fa216fb87bbfa4bff8bb378fe589a0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Jul 2023 04:08:06 +0800 Subject: [PATCH 09/25] f2fs: compress: don't {,de}compress non-full cluster f2fs won't compress non-full cluster in tail of file, let's skip dirtying and rewrite such cluster during f2fs_ioc_{,de}compress_file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 79cb6a41f128..74f79e7c8c02 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4092,10 +4092,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); count = last_idx - page_idx; - while (count) { - int len = min(cluster_size, count); - - ret = redirty_blocks(inode, page_idx, len); + while (count && count >= cluster_size) { + ret = redirty_blocks(inode, page_idx, cluster_size); if (ret < 0) break; @@ -4105,8 +4103,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) break; } - count -= len; - page_idx += len; + count -= cluster_size; + page_idx += cluster_size; cond_resched(); if (fatal_signal_pending(current)) { @@ -4172,10 +4170,8 @@ static int f2fs_ioc_compress_file(struct file *filp) last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); count = last_idx - page_idx; - while (count) { - int len = min(cluster_size, count); - - ret = redirty_blocks(inode, page_idx, len); + while (count && count >= cluster_size) { + ret = redirty_blocks(inode, page_idx, cluster_size); if (ret < 0) break; @@ -4185,8 +4181,8 @@ static int f2fs_ioc_compress_file(struct file *filp) break; } - count -= len; - page_idx += len; + count -= cluster_size; + page_idx += cluster_size; cond_resched(); if (fatal_signal_pending(current)) { From 3cb88bc15937990177df1f7eac6f22ebbed19312 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 4 Aug 2023 18:15:56 +0900 Subject: [PATCH 10/25] f2fs: check zone type before sending async reset zone command The commit 25f9080576b9 ("f2fs: add async reset zone command support") introduced "async reset zone commands" by calling __submit_zone_reset_cmd() in async discard operations. However, __submit_zone_reset_cmd() is called regardless of zone type of discard target zone. When devices have conventional zones, zone reset commands are sent to the conventional zones and cause I/O errors. Avoid the I/O errors by checking that the discard target zone type is sequential write required. If not, handle the discard operation in same manner as non-zoned, regular block devices. For that purpose, add a new helper function f2fs_bdev_index() which gets index of the zone reset target device. Fixes: 25f9080576b9 ("f2fs: add async reset zone command support") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 16 ++++++++++++++++ fs/f2fs/segment.c | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d372bedb0fe4..a52830927cb4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4423,6 +4423,22 @@ static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, } #endif +static inline int f2fs_bdev_index(struct f2fs_sb_info *sbi, + struct block_device *bdev) +{ + int i; + + if (!f2fs_is_multi_device(sbi)) + return 0; + + for (i = 0; i < sbi->s_ndevs; i++) + if (FDEV(i).bdev == bdev) + return i; + + WARN_ON(1); + return -1; +} + static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { return f2fs_sb_has_blkzoned(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b254aaac3031..24596d0e6b61 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1260,8 +1260,16 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) { - __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued); - return 0; + int devi = f2fs_bdev_index(sbi, bdev); + + if (devi < 0) + return -EINVAL; + + if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) { + __submit_zone_reset_cmd(sbi, dc, flag, + wait_list, issued); + return 0; + } } #endif @@ -1787,15 +1795,24 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) dc = __lookup_discard_cmd(sbi, blkaddr); #ifdef CONFIG_BLK_DEV_ZONED if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) { - /* force submit zone reset */ - if (dc->state == D_PREP) - __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, - &dcc->wait_list, NULL); - dc->ref++; - mutex_unlock(&dcc->cmd_lock); - /* wait zone reset */ - __wait_one_discard_bio(sbi, dc); - return; + int devi = f2fs_bdev_index(sbi, dc->bdev); + + if (devi < 0) { + mutex_unlock(&dcc->cmd_lock); + return; + } + + if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) { + /* force submit zone reset */ + if (dc->state == D_PREP) + __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, + &dcc->wait_list, NULL); + dc->ref++; + mutex_unlock(&dcc->cmd_lock); + /* wait zone reset */ + __wait_one_discard_bio(sbi, dc); + return; + } } #endif if (dc) { From 2bd4df8fcbc72f58ce3c62ed021ab291ca42de0b Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Thu, 3 Aug 2023 22:28:42 +0800 Subject: [PATCH 11/25] f2fs: Only lfs mode is allowed with zoned block device feature Now f2fs support four block allocation modes: lfs, adaptive, fragment:segment, fragment:block. Only lfs mode is allowed with zoned block device feature. Fixes: 6691d940b0e0 ("f2fs: introduce fragment allocation mode mount option") Signed-off-by: Chunhai Guo Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 30883beb750a..26add77f9062 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -862,11 +862,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (!name) return -ENOMEM; if (!strcmp(name, "adaptive")) { - if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); - kfree(name); - return -EINVAL; - } F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (!strcmp(name, "lfs")) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; @@ -1331,6 +1326,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION; } + + if (F2FS_OPTION(sbi).fs_mode != FS_MODE_LFS) { + f2fs_info(sbi, "Only lfs mode is allowed with zoned block device feature"); + return -EINVAL; + } #else f2fs_err(sbi, "Zoned block device support is not enabled"); return -EINVAL; From a842a90926b6b96ef38d6a190c27a4a60531a633 Mon Sep 17 00:00:00 2001 From: Minjie Du Date: Mon, 17 Jul 2023 15:11:09 +0800 Subject: [PATCH 12/25] f2fs: increase usage of folio_next_index() helper Simplify code pattern of 'folio->index + folio_nr_pages(folio)' by using the existing helper folio_next_index(). Signed-off-by: Minjie Du Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ecebc3a139be..5d9697717353 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3232,8 +3232,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, } goto next; } - done_index = folio->index + - folio_nr_pages(folio); + done_index = folio_next_index(folio); done = 1; break; } From 958ccbbf1ce716d77c7cfa79ace50a421c1eed73 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Jul 2023 19:29:53 +0800 Subject: [PATCH 13/25] Revert "f2fs: fix to do sanity check on extent cache correctly" syzbot reports a f2fs bug as below: UBSAN: array-index-out-of-bounds in fs/f2fs/f2fs.h:3275:19 index 1409 is out of range for type '__le32[923]' (aka 'unsigned int[923]') Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:217 [inline] __ubsan_handle_out_of_bounds+0x11c/0x150 lib/ubsan.c:348 inline_data_addr fs/f2fs/f2fs.h:3275 [inline] __recover_inline_status fs/f2fs/inode.c:113 [inline] do_read_inode fs/f2fs/inode.c:480 [inline] f2fs_iget+0x4730/0x48b0 fs/f2fs/inode.c:604 f2fs_fill_super+0x640e/0x80c0 fs/f2fs/super.c:4601 mount_bdev+0x276/0x3b0 fs/super.c:1391 legacy_get_tree+0xef/0x190 fs/fs_context.c:611 vfs_get_tree+0x8c/0x270 fs/super.c:1519 do_new_mount+0x28f/0xae0 fs/namespace.c:3335 do_mount fs/namespace.c:3675 [inline] __do_sys_mount fs/namespace.c:3884 [inline] __se_sys_mount+0x2d9/0x3c0 fs/namespace.c:3861 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The issue was bisected to: commit d48a7b3a72f121655d95b5157c32c7d555e44c05 Author: Chao Yu Date: Mon Jan 9 03:49:20 2023 +0000 f2fs: fix to do sanity check on extent cache correctly The root cause is we applied both v1 and v2 of the patch, v2 is the right fix, so it needs to revert v1 in order to fix reported issue. v1: commit d48a7b3a72f1 ("f2fs: fix to do sanity check on extent cache correctly") https://lore.kernel.org/lkml/20230109034920.492914-1-chao@kernel.org/ v2: commit 269d11948100 ("f2fs: fix to do sanity check on extent cache correctly") https://lore.kernel.org/lkml/20230207134808.1827869-1-chao@kernel.org/ Reported-by: syzbot+601018296973a481f302@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000fcf0690600e4d04d@google.com/ Fixes: d48a7b3a72f1 ("f2fs: fix to do sanity check on extent cache correctly") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 09e986b050c6..e81725c922cd 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -475,6 +475,12 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } + if (!sanity_check_inode(inode, node_page)) { + f2fs_put_page(node_page, 1); + f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); + return -EFSCORRUPTED; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); @@ -544,12 +550,6 @@ static int do_read_inode(struct inode *inode) f2fs_init_read_extent_tree(inode, node_page); f2fs_init_age_extent_tree(inode); - if (!sanity_check_inode(inode, node_page)) { - f2fs_put_page(node_page, 1); - f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); - return -EFSCORRUPTED; - } - if (!sanity_check_extent_cache(inode)) { f2fs_put_page(node_page, 1); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); From 8874ad7dae8d91d24cc87c545c0073b3b2da5688 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 19 Jul 2023 21:50:45 +0800 Subject: [PATCH 14/25] f2fs: fix to update i_ctime in __f2fs_setxattr() generic/728 - output mismatch (see /media/fstests/results//generic/728.out.bad) --- tests/generic/728.out 2023-07-19 07:10:48.362711407 +0000 +++ /media/fstests/results//generic/728.out.bad 2023-07-19 08:39:57.000000000 +0000 QA output created by 728 +Expected ctime to change after setxattr. +Expected ctime to change after removexattr. Silence is golden ... (Run 'diff -u /media/fstests/tests/generic/728.out /media/fstests/results//generic/728.out.bad' to see the entire diff) generic/729 1s It needs to update i_ctime after {set,remove}xattr, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 476b186b90a6..71bfa2391ab4 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -757,17 +757,17 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - f2fs_mark_inode_dirty_sync(inode, true); if (!error && S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; - inode->i_ctime = current_time(inode); clear_inode_flag(inode, FI_ACL_MODE); } + inode->i_ctime = current_time(inode); + f2fs_mark_inode_dirty_sync(inode, true); exit: kfree(base_addr); return error; From bc3994ffa4cf23f55171943c713366132c3ff45d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 19 Jul 2023 21:50:46 +0800 Subject: [PATCH 15/25] f2fs: remove unneeded check condition in __f2fs_setxattr() It has checked return value of write_all_xattrs(), remove unneeded following check condition. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 71bfa2391ab4..9cc1ca75b2da 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -757,7 +757,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - if (!error && S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); same: From 9bf1dcbdfdc8892d9cfeaeab02519c0ecf17fe51 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Aug 2023 08:59:48 +0800 Subject: [PATCH 16/25] f2fs: fix to account gc stats correctly As reported, status debugfs entry shows inconsistent GC stats as below: GC calls: 6008 (BG: 6161) - data segments : 3053 (BG: 3053) - node segments : 2955 (BG: 2955) Total GC calls is larger than BGGC calls, the reason is: - f2fs_stat_info.call_count accounts total migrated section count by f2fs_gc() - f2fs_stat_info.bg_gc accounts total call times of f2fs_gc() from background gc_thread Another issue is gc_foreground_calls sysfs entry shows total GC call count rather than FGGC call count. This patch changes as below for fix: - account GC calls and migrated segment count separately - support to account migrated section count if it enables large section mode - fix to show correct value in gc_foreground_calls sysfs entry Fixes: fc7100ea2a52 ("f2fs: Add f2fs stats to sysfs") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 24 ++++++++++++++++++------ fs/f2fs/f2fs.h | 42 +++++++++++++++++++++--------------------- fs/f2fs/file.c | 4 ++++ fs/f2fs/gc.c | 13 +++++++------ fs/f2fs/segment.c | 1 + fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 4 ++-- 7 files changed, 54 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 61c35b59126e..c7cf453dce83 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -511,12 +511,24 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Total : %4d\n", si->nr_total_ckpt); seq_printf(s, " - Cur time : %4d(ms)\n", si->cur_ckpt_time); seq_printf(s, " - Peak time : %4d(ms)\n", si->peak_ckpt_time); - seq_printf(s, "GC calls: %d (BG: %d)\n", - si->call_count, si->bg_gc); - seq_printf(s, " - data segments : %d (%d)\n", - si->data_segs, si->bg_data_segs); - seq_printf(s, " - node segments : %d (%d)\n", - si->node_segs, si->bg_node_segs); + seq_printf(s, "GC calls: %d (gc_thread: %d)\n", + si->gc_call_count[BACKGROUND] + + si->gc_call_count[FOREGROUND], + si->gc_call_count[BACKGROUND]); + if (__is_large_section(sbi)) { + seq_printf(s, " - data sections : %d (BG: %d)\n", + si->gc_secs[DATA][BG_GC] + si->gc_secs[DATA][FG_GC], + si->gc_secs[DATA][BG_GC]); + seq_printf(s, " - node sections : %d (BG: %d)\n", + si->gc_secs[NODE][BG_GC] + si->gc_secs[NODE][FG_GC], + si->gc_secs[NODE][BG_GC]); + } + seq_printf(s, " - data segments : %d (BG: %d)\n", + si->gc_segs[DATA][BG_GC] + si->gc_segs[DATA][FG_GC], + si->gc_segs[DATA][BG_GC]); + seq_printf(s, " - node segments : %d (BG: %d)\n", + si->gc_segs[NODE][BG_GC] + si->gc_segs[NODE][FG_GC], + si->gc_segs[NODE][BG_GC]); seq_puts(s, " - Reclaimed segs :\n"); seq_printf(s, " - Normal : %d\n", sbi->gc_reclaimed_segs[GC_NORMAL]); seq_printf(s, " - Idle CB : %d\n", sbi->gc_reclaimed_segs[GC_IDLE_CB]); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a52830927cb4..6114babbb26a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3860,6 +3860,12 @@ void f2fs_destroy_recovery_cache(void); /* * debug.c */ +enum { + BACKGROUND, + FOREGROUND, + MAX_CALL_TYPE +}; + #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info { struct list_head stat_list; @@ -3885,7 +3891,7 @@ struct f2fs_stat_info { int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; - int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_wb_cp_data, nr_wb_data; int nr_rd_data, nr_rd_node, nr_rd_meta; int nr_dio_read, nr_dio_write; unsigned int io_skip_bggc, other_skip_bggc; @@ -3905,9 +3911,11 @@ struct f2fs_stat_info { int rsvd_segs, overp_segs; int dirty_count, node_pages, meta_pages, compress_pages; int compress_page_hit; - int prefree_count, call_count, cp_count, bg_cp_count; - int tot_segs, node_segs, data_segs, free_segs, free_secs; - int bg_node_segs, bg_data_segs; + int prefree_count, free_segs, free_secs; + int cp_count, bg_cp_count; + int gc_call_count[MAX_CALL_TYPE]; + int gc_segs[2][2]; + int gc_secs[2][2]; int tot_blks, data_blks, node_blks; int bg_data_blks, bg_node_blks; int curseg[NR_CURSEG_TYPE]; @@ -3931,8 +3939,6 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_count(si) ((si)->cp_count++) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) -#define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(si) ((si)->bg_gc++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -4017,18 +4023,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ } while (0) -#define stat_inc_seg_count(sbi, type, gc_type) \ - do { \ - struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - si->tot_segs++; \ - if ((type) == SUM_TYPE_DATA) { \ - si->data_segs++; \ - si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ - } else { \ - si->node_segs++; \ - si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ - } \ - } while (0) +#define stat_inc_gc_call_count(sbi, foreground) \ + (F2FS_STAT(sbi)->gc_call_count[(foreground)]++) +#define stat_inc_gc_sec_count(sbi, type, gc_type) \ + (F2FS_STAT(sbi)->gc_secs[(type)][(gc_type)]++) +#define stat_inc_gc_seg_count(sbi, type, gc_type) \ + (F2FS_STAT(sbi)->gc_segs[(type)][(gc_type)]++) #define stat_inc_tot_blk_count(si, blks) \ ((si)->tot_blks += (blks)) @@ -4057,8 +4057,6 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) -#define stat_inc_call_count(si) do { } while (0) -#define stat_inc_bggc_count(si) do { } while (0) #define stat_io_skip_bggc_count(sbi) do { } while (0) #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) @@ -4086,7 +4084,9 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) -#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_gc_call_count(sbi, foreground) do { } while (0) +#define stat_inc_gc_sec_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_gc_seg_count(sbi, type, gc_type) do { } while (0) #define stat_inc_tot_blk_count(si, blks) do { } while (0) #define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 74f79e7c8c02..ff5494c255f6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1728,6 +1728,7 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { f2fs_down_write(&sbi->gc_lock); + stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); if (err && err != -ENODATA) goto out_err; @@ -2476,6 +2477,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) gc_control.init_gc_type = sync ? FG_GC : BG_GC; gc_control.err_gc_skipped = sync; + stat_inc_gc_call_count(sbi, FOREGROUND); ret = f2fs_gc(sbi, &gc_control); out: mnt_drop_write_file(filp); @@ -2519,6 +2521,7 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) } gc_control.victim_segno = GET_SEGNO(sbi, range->start); + stat_inc_gc_call_count(sbi, FOREGROUND); ret = f2fs_gc(sbi, &gc_control); if (ret) { if (ret == -EBUSY) @@ -3001,6 +3004,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) sm->last_victim[ALLOC_NEXT] = end_segno + 1; gc_control.victim_segno = start_segno; + stat_inc_gc_call_count(sbi, FOREGROUND); ret = f2fs_gc(sbi, &gc_control); if (ret == -EAGAIN) ret = 0; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 01effd3fcb6c..68c3250fb3d2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -121,8 +121,8 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - if (!foreground) - stat_inc_bggc_count(sbi->stat_info); + stat_inc_gc_call_count(sbi, foreground ? + FOREGROUND : BACKGROUND); sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; @@ -1685,6 +1685,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; + unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; int submitted = 0; if (__is_large_section(sbi)) @@ -1766,7 +1767,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, segno, gc_type, force_migrate); - stat_inc_seg_count(sbi, type, gc_type); + stat_inc_gc_seg_count(sbi, data_type, gc_type); sbi->gc_reclaimed_segs[sbi->gc_mode]++; migrated++; @@ -1783,12 +1784,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, } if (submitted) - f2fs_submit_merged_write(sbi, - (type == SUM_TYPE_NODE) ? NODE : DATA); + f2fs_submit_merged_write(sbi, data_type); blk_finish_plug(&plug); - stat_inc_call_count(sbi->stat_info); + if (migrated) + stat_inc_gc_sec_count(sbi, data_type, gc_type); return seg_freed; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 24596d0e6b61..d07e32e82aa9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -435,6 +435,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) .err_gc_skipped = false, .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); + stat_inc_gc_call_count(sbi, FOREGROUND); f2fs_gc(sbi, &gc_control); } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 26add77f9062..2bbef48bc5a3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2206,6 +2206,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); + stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); if (err == -ENODATA) { err = 0; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 48b7e0073884..95a301581b91 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -974,8 +974,8 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); #ifdef CONFIG_F2FS_STAT_FS STAT_INFO_RO_ATTR(cp_foreground_calls, cp_count); STAT_INFO_RO_ATTR(cp_background_calls, bg_cp_count); -STAT_INFO_RO_ATTR(gc_foreground_calls, call_count); -STAT_INFO_RO_ATTR(gc_background_calls, bg_gc); +STAT_INFO_RO_ATTR(gc_foreground_calls, gc_call_count[FOREGROUND]); +STAT_INFO_RO_ATTR(gc_background_calls, gc_call_count[BACKGROUND]); #endif /* FAULT_INFO ATTR */ From eb61c2cca2eb2110cc7b61a7bc15b3850977a778 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Aug 2023 08:59:49 +0800 Subject: [PATCH 17/25] f2fs: fix to account cp stats correctly cp_foreground_calls sysfs entry shows total CP call count rather than foreground CP call count, fix it. Fixes: fc7100ea2a52 ("f2fs: Add f2fs stats to sysfs") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/debug.c | 9 ++++++++- fs/f2fs/f2fs.h | 25 ++++++++++++++----------- fs/f2fs/gc.c | 5 +++++ fs/f2fs/recovery.c | 1 + fs/f2fs/segment.c | 3 ++- fs/f2fs/super.c | 8 +++++++- fs/f2fs/sysfs.c | 14 ++++++++++++-- 8 files changed, 50 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8fd3b7f9fb88..b0597a539fc5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1701,9 +1701,9 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } f2fs_restore_inmem_curseg(sbi); + stat_inc_cp_count(sbi); stop: unblock_operations(sbi); - stat_inc_cp_count(sbi->stat_info); if (cpc->reason & CP_RECOVERY) f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index c7cf453dce83..fdbf994f1271 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -215,6 +215,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_blks[type] += blks; } + for (i = 0; i < MAX_CALL_TYPE; i++) + si->cp_call_count[i] = atomic_read(&sbi->cp_call_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -497,7 +500,9 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", si->prefree_count, si->free_segs, si->free_secs); seq_printf(s, "CP calls: %d (BG: %d)\n", - si->cp_count, si->bg_cp_count); + si->cp_call_count[TOTAL_CALL], + si->cp_call_count[BACKGROUND]); + seq_printf(s, "CP count: %d\n", si->cp_count); seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); seq_printf(s, " - sit blocks : %u\n", si->meta_count[META_SIT]); @@ -699,6 +704,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inplace_count, 0); for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); + for (i = 0; i < MAX_CALL_TYPE; i++) + atomic_set(&sbi->cp_call_count[i], 0); atomic_set(&sbi->max_aw_cnt, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6114babbb26a..c602ff2403b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1383,6 +1383,13 @@ enum errors_option { MOUNT_ERRORS_PANIC, /* panic on errors */ }; +enum { + BACKGROUND, + FOREGROUND, + MAX_CALL_TYPE, + TOTAL_CALL = FOREGROUND, +}; + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -1695,6 +1702,7 @@ struct f2fs_sb_info { unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ + atomic_t cp_call_count[MAX_CALL_TYPE]; /* # of cp call */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -3860,12 +3868,6 @@ void f2fs_destroy_recovery_cache(void); /* * debug.c */ -enum { - BACKGROUND, - FOREGROUND, - MAX_CALL_TYPE -}; - #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info { struct list_head stat_list; @@ -3912,7 +3914,7 @@ struct f2fs_stat_info { int dirty_count, node_pages, meta_pages, compress_pages; int compress_page_hit; int prefree_count, free_segs, free_secs; - int cp_count, bg_cp_count; + int cp_call_count[MAX_CALL_TYPE], cp_count; int gc_call_count[MAX_CALL_TYPE]; int gc_segs[2][2]; int gc_secs[2][2]; @@ -3937,8 +3939,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) return (struct f2fs_stat_info *)sbi->stat_info; } -#define stat_inc_cp_count(si) ((si)->cp_count++) -#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) +#define stat_inc_cp_call_count(sbi, foreground) \ + atomic_inc(&sbi->cp_call_count[(foreground)]) +#define stat_inc_cp_count(si) (F2FS_STAT(sbi)->cp_count++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -4055,8 +4058,8 @@ void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else -#define stat_inc_cp_count(si) do { } while (0) -#define stat_inc_bg_cp_count(si) do { } while (0) +#define stat_inc_cp_call_count(sbi, foreground) do { } while (0) +#define stat_inc_cp_count(sbi) do { } while (0) #define stat_io_skip_bggc_count(sbi) do { } while (0) #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 68c3250fb3d2..6690323fff83 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1840,6 +1840,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) * secure free segments which doesn't need fggc any more. */ if (prefree_segments(sbi)) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -1888,6 +1889,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) round++; if (skipped_round > MAX_SKIP_GC_COUNT && skipped_round * 2 >= round) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); ret = f2fs_write_checkpoint(sbi, &cpc); goto stop; } @@ -1903,6 +1905,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) */ if (free_sections(sbi) <= upper_secs + NR_GC_CHECKPOINT_SECS && prefree_segments(sbi)) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -2030,6 +2033,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, if (gc_only) goto out; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); if (err) goto out; @@ -2222,6 +2226,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count) clear_sbi_flag(sbi, SBI_IS_RESIZEFS); set_sbi_flag(sbi, SBI_IS_DIRTY); + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); if (err) { update_fs_metadata(sbi, secs); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4e7d4ceeb084..e91f4619aa5b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -924,6 +924,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) struct cp_control cpc = { .reason = CP_RECOVERY, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d07e32e82aa9..35d1e1dd849f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -513,8 +513,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) mutex_unlock(&sbi->flush_lock); } + stat_inc_cp_call_count(sbi, BACKGROUND); f2fs_sync_fs(sbi->sb, 1); - stat_inc_bg_cp_count(sbi->stat_info); } static int __submit_flush_wait(struct f2fs_sb_info *sbi, @@ -3248,6 +3248,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; f2fs_down_write(&sbi->gc_lock); + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); f2fs_up_write(&sbi->gc_lock); if (err) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2bbef48bc5a3..a067466a694c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1601,6 +1601,7 @@ static void f2fs_put_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } @@ -1610,6 +1611,7 @@ static void f2fs_put_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); } @@ -1706,8 +1708,10 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return -EAGAIN; - if (sync) + if (sync) { + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_issue_checkpoint(sbi); + } return err; } @@ -2232,6 +2236,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); + stat_inc_cp_call_count(sbi, TOTAL_CALL); err = f2fs_write_checkpoint(sbi, &cpc); if (err) goto out_unlock; @@ -4868,6 +4873,7 @@ static void kill_f2fs_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT, }; + stat_inc_cp_call_count(sbi, TOTAL_CALL); f2fs_write_checkpoint(sbi, &cpc); } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 95a301581b91..417fae96890f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -356,6 +356,16 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!strcmp(a->attr.name, "revoked_atomic_block")) return sysfs_emit(buf, "%llu\n", sbi->revoked_atomic_block); +#ifdef CONFIG_F2FS_STAT_FS + if (!strcmp(a->attr.name, "cp_foreground_calls")) + return sysfs_emit(buf, "%d\n", + atomic_read(&sbi->cp_call_count[TOTAL_CALL]) - + atomic_read(&sbi->cp_call_count[BACKGROUND])); + if (!strcmp(a->attr.name, "cp_background_calls")) + return sysfs_emit(buf, "%d\n", + atomic_read(&sbi->cp_call_count[BACKGROUND])); +#endif + ui = (unsigned int *)(ptr + a->offset); return sysfs_emit(buf, "%u\n", *ui); @@ -972,8 +982,8 @@ F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS -STAT_INFO_RO_ATTR(cp_foreground_calls, cp_count); -STAT_INFO_RO_ATTR(cp_background_calls, bg_cp_count); +STAT_INFO_RO_ATTR(cp_foreground_calls, cp_call_count[FOREGROUND]); +STAT_INFO_RO_ATTR(cp_background_calls, cp_call_count[BACKGROUND]); STAT_INFO_RO_ATTR(gc_foreground_calls, gc_call_count[FOREGROUND]); STAT_INFO_RO_ATTR(gc_background_calls, gc_call_count[BACKGROUND]); #endif From 0cc81b1ad51287847e494e055e5d3426f95e7921 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 10 Aug 2023 16:40:00 +0800 Subject: [PATCH 18/25] f2fs: should update REQ_TIME for direct write The sending interval of discard and GC should also consider direct write requests; filesystem is not idle if there is direct write. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ff5494c255f6..047942d2ec5d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4604,6 +4604,7 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, dec_page_count(sbi, F2FS_DIO_WRITE); if (error) return error; + f2fs_update_time(sbi, REQ_TIME); f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size); return 0; } From 726865e69aa39d113471066acfdf2be3bbd96d50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 30 Jul 2023 22:25:51 +0800 Subject: [PATCH 19/25] f2fs: doc: fix description of max_small_discards The description of max_small_discards is out-of-update in below two aspects, fix it. - it is disabled by default - small discards will be issued during checkpoint Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index ad3d76d37c8b..36c3cb547901 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -102,9 +102,9 @@ What: /sys/fs/f2fs//max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" Description: Controls the issue rate of discard commands that consist of small - blocks less than 2MB. The candidates to be discarded are cached until - checkpoint is triggered, and issued during the checkpoint. - By default, it is disabled with 0. + blocks less than 2MB. The candidates to be discarded are cached during + checkpoint, and issued by issue_discard thread after checkpoint. + It is enabled by default. What: /sys/fs/f2fs//max_ordered_discard Date: October 2022 From 005abf9e5e0d4dcfce318ae5dbcac32b7bf6b647 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 30 Jul 2023 22:25:52 +0800 Subject: [PATCH 20/25] Revert "f2fs: do not issue small discard commands during checkpoint" Previously, we have two mechanisms to cache & submit small discards: a) set max small discard number in /sys/fs/f2fs/vdb/max_small_discards, and checkpoint will cache small discard candidates w/ configured maximum number. b) call FITRIM ioctl, also, checkpoint in f2fs_trim_fs() will cache small discard candidates w/ configured discard granularity, but w/o limitation of number. FSTRIM interface is asynchronized, so it won't submit discard directly. Finally, discard thread will submit them in background periodically. However, after commit 9ac00e7cef10 ("f2fs: do not issue small discard commands during checkpoint"), the mechanism a) is broken, since no matter how we configure the sysfs entry /sys/fs/f2fs/vdb/max_small_discards, checkpoint will not cache small discard candidates any more. echo 0 > /sys/fs/f2fs/vdb/max_small_discards xfs_io -f /mnt/f2fs/file -c "pwrite 0 2m" -c "fsync" xfs_io /mnt/f2fs/file -c "fpunch 0 4k" sync cat /proc/fs/f2fs/vdb/discard_plist_info |head -2 echo 100 > /sys/fs/f2fs/vdb/max_small_discards rm /mnt/f2fs/file xfs_io -f /mnt/f2fs/file -c "pwrite 0 2m" -c "fsync" xfs_io /mnt/f2fs/file -c "fpunch 0 4k" sync cat /proc/fs/f2fs/vdb/discard_plist_info |head -2 Before the patch: Discard pend list(Show diacrd_cmd count on each entry, .:not exist): 0 . . . . . . . . Discard pend list(Show diacrd_cmd count on each entry, .:not exist): 0 3 1 . . . . . . After the patch: Discard pend list(Show diacrd_cmd count on each entry, .:not exist): 0 . . . . . . . . Discard pend list(Show diacrd_cmd count on each entry, .:not exist): 0 . . . . . . . . This patch reverts commit 9ac00e7cef10 ("f2fs: do not issue small discard commands during checkpoint") in order to fix this issue. Fixes: 9ac00e7cef10 ("f2fs: do not issue small discard commands during checkpoint") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 35d1e1dd849f..b38bf2b34490 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2213,7 +2213,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || - !force || len < cpc->trim_minlen) + (force && len < cpc->trim_minlen)) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, From 5c13e2388bf3426fd69a89eb46e50469e9624e56 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 18 Aug 2023 11:34:32 -0700 Subject: [PATCH 21/25] f2fs: avoid false alarm of circular locking ====================================================== WARNING: possible circular locking dependency detected 6.5.0-rc5-syzkaller-00353-gae545c3283dc #0 Not tainted ------------------------------------------------------ syz-executor273/5027 is trying to acquire lock: ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_down_write fs/f2fs/f2fs.h:2133 [inline] ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644 but task is already holding lock: ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_down_read fs/f2fs/f2fs.h:2108 [inline] ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_add_dentry+0x92/0x230 fs/f2fs/dir.c:783 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&fi->i_xattr_sem){.+.+}-{3:3}: down_read+0x9c/0x470 kernel/locking/rwsem.c:1520 f2fs_down_read fs/f2fs/f2fs.h:2108 [inline] f2fs_getxattr+0xb1e/0x12c0 fs/f2fs/xattr.c:532 __f2fs_get_acl+0x5a/0x900 fs/f2fs/acl.c:179 f2fs_acl_create fs/f2fs/acl.c:377 [inline] f2fs_init_acl+0x15c/0xb30 fs/f2fs/acl.c:420 f2fs_init_inode_metadata+0x159/0x1290 fs/f2fs/dir.c:558 f2fs_add_regular_entry+0x79e/0xb90 fs/f2fs/dir.c:740 f2fs_add_dentry+0x1de/0x230 fs/f2fs/dir.c:788 f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827 f2fs_add_link fs/f2fs/f2fs.h:3554 [inline] f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781 vfs_mkdir+0x532/0x7e0 fs/namei.c:4117 do_mkdirat+0x2a9/0x330 fs/namei.c:4140 __do_sys_mkdir fs/namei.c:4160 [inline] __se_sys_mkdir fs/namei.c:4158 [inline] __x64_sys_mkdir+0xf2/0x140 fs/namei.c:4158 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd -> #0 (&fi->i_sem){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:3142 [inline] check_prevs_add kernel/locking/lockdep.c:3261 [inline] validate_chain kernel/locking/lockdep.c:3876 [inline] __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144 lock_acquire kernel/locking/lockdep.c:5761 [inline] lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726 down_write+0x93/0x200 kernel/locking/rwsem.c:1573 f2fs_down_write fs/f2fs/f2fs.h:2133 [inline] f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644 f2fs_add_dentry+0xa6/0x230 fs/f2fs/dir.c:784 f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827 f2fs_add_link fs/f2fs/f2fs.h:3554 [inline] f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781 vfs_mkdir+0x532/0x7e0 fs/namei.c:4117 ovl_do_mkdir fs/overlayfs/overlayfs.h:196 [inline] ovl_mkdir_real+0xb5/0x370 fs/overlayfs/dir.c:146 ovl_workdir_create+0x3de/0x820 fs/overlayfs/super.c:309 ovl_make_workdir fs/overlayfs/super.c:711 [inline] ovl_get_workdir fs/overlayfs/super.c:864 [inline] ovl_fill_super+0xdab/0x6180 fs/overlayfs/super.c:1400 vfs_get_super+0xf9/0x290 fs/super.c:1152 vfs_get_tree+0x88/0x350 fs/super.c:1519 do_new_mount fs/namespace.c:3335 [inline] path_mount+0x1492/0x1ed0 fs/namespace.c:3662 do_mount fs/namespace.c:3675 [inline] __do_sys_mount fs/namespace.c:3884 [inline] __se_sys_mount fs/namespace.c:3861 [inline] __x64_sys_mount+0x293/0x310 fs/namespace.c:3861 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- rlock(&fi->i_xattr_sem); lock(&fi->i_sem); lock(&fi->i_xattr_sem); lock(&fi->i_sem); Cc: Reported-and-tested-by: syzbot+e5600587fa9cbf8e3826@syzkaller.appspotmail.com Fixes: 5eda1ad1aaff "f2fs: fix deadlock in i_xattr_sem and inode page lock" Tested-by: Guenter Roeck Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 24 +++++++++++++++--------- fs/f2fs/inline.c | 3 ++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c602ff2403b6..c0b7417f1d87 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2122,15 +2122,6 @@ static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) return down_read_trylock(&sem->internal_rwsem); } -#ifdef CONFIG_DEBUG_LOCK_ALLOC -static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) -{ - down_read_nested(&sem->internal_rwsem, subclass); -} -#else -#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) -#endif - static inline void f2fs_up_read(struct f2fs_rwsem *sem) { up_read(&sem->internal_rwsem); @@ -2141,6 +2132,21 @@ static inline void f2fs_down_write(struct f2fs_rwsem *sem) down_write(&sem->internal_rwsem); } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} + +static inline void f2fs_down_write_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_write_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#define f2fs_down_write_nested(sem, subclass) f2fs_down_write(sem) +#endif + static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) { return down_write_trylock(&sem->internal_rwsem); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4638fee16a91..0d185ad5e469 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -641,7 +641,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, } if (inode) { - f2fs_down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write_nested(&F2FS_I(inode)->i_sem, + SINGLE_DEPTH_NESTING); page = f2fs_init_inode_metadata(inode, dir, fname, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); From c988794984135df31e6beedc53bd5d4ea8c8f788 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Aug 2023 23:22:23 +0800 Subject: [PATCH 22/25] f2fs: clean up error handling in sanity_check_{compress_,}inode() In sanity_check_{compress_,}inode(), it doesn't need to set SBI_NEED_FSCK in each error case, instead, we can set the flag in do_read_inode() only once when sanity_check_inode() fails. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index e81725c922cd..8ed5406ea204 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -214,7 +214,7 @@ static bool sanity_check_compress_inode(struct inode *inode, f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress algorithm: %u, run fsck to fix", __func__, inode->i_ino, ri->i_compress_algorithm); - goto err; + return false; } if (le64_to_cpu(ri->i_compr_blocks) > SECTOR_TO_BLOCK(inode->i_blocks)) { @@ -222,14 +222,14 @@ static bool sanity_check_compress_inode(struct inode *inode, "%s: inode (ino=%lx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix", __func__, inode->i_ino, le64_to_cpu(ri->i_compr_blocks), SECTOR_TO_BLOCK(inode->i_blocks)); - goto err; + return false; } if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported log cluster size: %u, run fsck to fix", __func__, inode->i_ino, ri->i_log_cluster_size); - goto err; + return false; } clevel = le16_to_cpu(ri->i_compress_flag) >> @@ -273,8 +273,6 @@ static bool sanity_check_compress_inode(struct inode *inode, err_level: f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress level: %u, run fsck to fix", __func__, inode->i_ino, clevel); -err: - set_sbi_flag(sbi, SBI_NEED_FSCK); return false; } @@ -287,14 +285,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); if (!iblocks) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", __func__, inode->i_ino, iblocks); return false; } if (ino_of_node(node_page) != nid_of_node(node_page)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", __func__, inode->i_ino, ino_of_node(node_page), nid_of_node(node_page)); @@ -303,7 +299,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { if (!f2fs_sb_has_extra_attr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", __func__, inode->i_ino); return false; @@ -311,7 +306,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || fi->i_extra_isize < F2FS_MIN_EXTRA_ATTR_SIZE || fi->i_extra_isize % sizeof(__le32)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", __func__, inode->i_ino, fi->i_extra_isize, F2FS_TOTAL_EXTRA_ATTR_SIZE); @@ -321,7 +315,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) f2fs_has_inline_xattr(inode) && (!fi->i_inline_xattr_size || fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", __func__, inode->i_ino, fi->i_inline_xattr_size, MAX_INLINE_XATTR_SIZE); @@ -335,7 +328,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", __func__, inode->i_ino); return false; @@ -343,31 +335,26 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (!f2fs_sb_has_extra_attr(sbi)) { if (f2fs_sb_has_project_quota(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_PRJQUOTA); return false; } if (f2fs_sb_has_inode_chksum(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_INODE_CHKSUM); return false; } if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); return false; } if (f2fs_sb_has_inode_crtime(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_INODE_CRTIME); return false; } if (f2fs_sb_has_compression(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", __func__, inode->i_ino, F2FS_FEATURE_COMPRESSION); return false; @@ -375,21 +362,18 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } if (f2fs_sanity_check_inline_data(inode)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; } if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix", __func__, inode->i_ino, inode->i_mode); return false; } if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off", __func__, inode->i_ino); return false; @@ -477,6 +461,7 @@ static int do_read_inode(struct inode *inode) if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); + set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } From 5118697f7215711f83c339cedab68399d6a01314 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Aug 2023 23:22:24 +0800 Subject: [PATCH 23/25] f2fs: fix error path of f2fs_submit_page_read() In error path of f2fs_submit_page_read(), it missed to call iostat_update_and_unbind_ctx() and free bio_post_read_ctx, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5d9697717353..916e317ac925 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1167,6 +1167,9 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, f2fs_wait_on_block_writeback(inode, blkaddr); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + iostat_update_and_unbind_ctx(bio); + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); bio_put(bio); return -EFAULT; } From 091a4dfbb1d32b06c031edbfe2a44af100c4604f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Aug 2023 23:22:25 +0800 Subject: [PATCH 24/25] f2fs: compress: fix to assign compress_level for lz4 correctly After remount, F2FS_OPTION().compress_level was assgin to LZ4HC_DEFAULT_CLEVEL incorrectly, result in lz4hc:9 was enabled, fix it. 1. mount /dev/vdb /dev/vdb on /mnt/f2fs type f2fs (...,compress_algorithm=lz4,compress_log_size=2,...) 2. mount -t f2fs -o remount,compress_log_size=3 /mnt/f2fs/ 3. mount|grep f2fs /dev/vdb on /mnt/f2fs type f2fs (...,compress_algorithm=lz4:9,compress_log_size=3,...) Fixes: 00e120b5e4b5 ("f2fs: assign default compression level") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a067466a694c..8d9d2ee7f3c7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -591,7 +591,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) unsigned int level; if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = LZ4HC_DEFAULT_CLEVEL; + F2FS_OPTION(sbi).compress_level = 0; return 0; } From 3b7166121402a5062d18dcf4e3bce083fb9e4201 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 24 Aug 2023 09:08:31 -0700 Subject: [PATCH 25/25] f2fs: use finish zone command when closing a zone Use the finish zone command first when a zone should be closed. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b38bf2b34490..d05b41608fc0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4948,12 +4948,19 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), wp_segno, wp_blkoff); - ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, - zone->len - (zone->wp - zone->start), - GFP_NOFS, 0); - if (ret) - f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", - fdev->path, ret); + ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, + zone->start, zone->len, GFP_NOFS); + if (ret == -EOPNOTSUPP) { + ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, + zone->len - (zone->wp - zone->start), + GFP_NOFS, 0); + if (ret) + f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", + fdev->path, ret); + } else if (ret) { + f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)", + fdev->path, ret); + } return ret; }