From b62e71be2110d8b52bf5faf3c3ed7ca1a0c113a5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 23 Apr 2023 23:49:15 +0800 Subject: [PATCH 01/46] f2fs: support errors=remount-ro|continue|panic mountoption This patch supports errors=remount-ro|continue|panic mount option for f2fs. f2fs behaves as below in three different modes: mode continue remount-ro panic access ops normal noraml N/A syscall errors -EIO -EROFS N/A mount option rw ro N/A pending dir write keep keep N/A pending non-dir write drop keep N/A pending node write drop keep N/A pending meta write keep keep N/A By default it uses "continue" mode. [Yangtao helps to clean up function's name] Signed-off-by: Yangtao Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 16 ++++ fs/f2fs/checkpoint.c | 7 +- fs/f2fs/data.c | 4 + fs/f2fs/f2fs.h | 20 ++++- fs/f2fs/file.c | 5 -- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 3 + fs/f2fs/super.c | 134 ++++++++++++++++++++++++++--- 8 files changed, 167 insertions(+), 24 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index c57745375edb..9359978a5af2 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -351,6 +351,22 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records data block update frequency of the extent per inode, in order to provide better temperature hints for data block allocation. +errors=%s Specify f2fs behavior on critical errors. This supports modes: + "panic", "continue" and "remount-ro", respectively, trigger + panic immediately, continue without doing anything, and remount + the partition in read-only mode. By default it uses "continue" + mode. + ====================== =============== =============== ======== + mode continue remount-ro panic + ====================== =============== =============== ======== + access ops normal noraml N/A + syscall errors -EIO -EROFS N/A + mount option rw ro N/A + pending dir write keep keep N/A + pending non-dir write drop keep N/A + pending node write drop keep N/A + pending meta write keep keep N/A + ====================== =============== =============== ======== ======================== ============================================================ Debugfs Entries diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 64b3860f50ee..8fd3b7f9fb88 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, unsigned char reason) { f2fs_build_fault_attr(sbi, 0, 0); - set_ckpt_flags(sbi, CP_ERROR_FLAG); - if (!end_io) { + if (!end_io) f2fs_flush_merged_writes(sbi); - - f2fs_handle_stop(sbi, reason); - } + f2fs_handle_critical_error(sbi, reason, end_io); } /* diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7165b1202f53..f26eac327d6e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2807,6 +2807,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (S_ISDIR(inode->i_mode) && !is_sbi_flag_set(sbi, SBI_IS_CLOSE)) goto redirty_out; + + /* keep data pages in remount-ro mode */ + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) + goto redirty_out; goto out; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d211ee89c158..7afc9aef127a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -162,6 +162,7 @@ struct f2fs_mount_info { int fs_mode; /* fs mode: LFS or ADAPTIVE */ int bggc_mode; /* bggc mode: off, on or sync */ int memory_mode; /* memory mode */ + int errors; /* errors parameter */ int discard_unit; /* * discard command's offset/size should * be aligned to this unit: block, @@ -1370,6 +1371,12 @@ enum { MEMORY_MODE_LOW, /* memory mode for low memry devices */ }; +enum errors_option { + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */ + MOUNT_ERRORS_CONTINUE, /* continue on errors */ + MOUNT_ERRORS_PANIC, /* panic on errors */ +}; + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -1721,8 +1728,14 @@ struct f2fs_sb_info { struct workqueue_struct *post_read_wq; /* post read workqueue */ - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */ - spinlock_t error_lock; /* protect errors array */ + /* + * If we are in irq context, let's update error information into + * on-disk superblock in the work. + */ + struct work_struct s_error_work; + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */ + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */ + spinlock_t error_lock; /* protect errors/stop_reason array */ bool error_dirty; /* errors of sb is dirty */ struct kmem_cache *inline_xattr_slab; /* inline xattr entry */ @@ -3541,8 +3554,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); int f2fs_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, + bool irq_context); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5ac53d2627d2..9c9c3f660e01 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2225,7 +2225,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = 0; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); trace_f2fs_shutdown(sbi, in, ret); } return ret; @@ -2238,7 +2237,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) goto out; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); thaw_bdev(sb->s_bdev); break; case F2FS_GOING_DOWN_METASYNC: @@ -2247,16 +2245,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) if (ret) goto out; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_METAFLUSH: f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); break; case F2FS_GOING_DOWN_NEED_FSCK: set_sbi_flag(sbi, SBI_NEED_FSCK); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 61c5f9d26018..d455140322a8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -59,7 +59,7 @@ static int gc_thread_func(void *data) if (gc_th->gc_wake) gc_th->gc_wake = false; - if (try_to_freeze()) { + if (try_to_freeze() || f2fs_readonly(sbi->sb)) { stat_other_skip_bggc_count(sbi); continue; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bd1dad523796..834c6f099c95 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1596,6 +1596,9 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, trace_f2fs_writepage(page, NODE); if (unlikely(f2fs_cp_error(sbi))) { + /* keep node pages in remount-ro mode */ + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) + goto redirty_out; ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); unlock_page(page); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9f15b03037db..51812f459581 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -164,6 +164,7 @@ enum { Opt_discard_unit, Opt_memory_mode, Opt_age_extent_cache, + Opt_errors, Opt_err, }; @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = { {Opt_discard_unit, "discard_unit=%s"}, {Opt_memory_mode, "memory=%s"}, {Opt_age_extent_cache, "age_extent_cache"}, + {Opt_errors, "errors=%s"}, {Opt_err, NULL}, }; @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_age_extent_cache: set_opt(sbi, AGE_EXTENT_CACHE); break; + case Opt_errors: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "remount-ro")) { + F2FS_OPTION(sbi).errors = + MOUNT_ERRORS_READONLY; + } else if (!strcmp(name, "continue")) { + F2FS_OPTION(sbi).errors = + MOUNT_ERRORS_CONTINUE; + } else if (!strcmp(name, "panic")) { + F2FS_OPTION(sbi).errors = + MOUNT_ERRORS_PANIC; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1622,6 +1643,9 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); + /* flush s_error_work before sbi destroy */ + flush_work(&sbi->s_error_work); + f2fs_destroy_post_read_wq(sbi); kvfree(sbi->ckpt); @@ -2052,6 +2076,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW) seq_printf(seq, ",memory=%s", "low"); + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) + seq_printf(seq, ",errors=%s", "remount-ro"); + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE) + seq_printf(seq, ",errors=%s", "continue"); + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC) + seq_printf(seq, ",errors=%s", "panic"); + return 0; } @@ -2080,6 +2111,7 @@ static void default_options(struct f2fs_sb_info *sbi) } F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE; sbi->sb->s_flags &= ~SB_INLINECRYPT; @@ -2281,6 +2313,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (err) goto restore_opts; + /* flush outstanding errors before changing fs state */ + flush_work(&sbi->s_error_work); + /* * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. @@ -3926,45 +3961,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; } -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason) +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason) +{ + unsigned long flags; + + spin_lock_irqsave(&sbi->error_lock, flags); + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0)) + sbi->stop_reason[reason]++; + spin_unlock_irqrestore(&sbi->error_lock, flags); +} + +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); + unsigned long flags; int err; f2fs_down_write(&sbi->sb_lock); - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0)) - raw_super->s_stop_reason[reason]++; + spin_lock_irqsave(&sbi->error_lock, flags); + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON); + spin_unlock_irqrestore(&sbi->error_lock, flags); err = f2fs_commit_super(sbi, false); - if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d", - reason, err); + f2fs_up_write(&sbi->sb_lock); + if (err) + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err); } void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) { - spin_lock(&sbi->error_lock); + unsigned long flags; + + spin_lock_irqsave(&sbi->error_lock, flags); if (!test_bit(flag, (unsigned long *)sbi->errors)) { set_bit(flag, (unsigned long *)sbi->errors); sbi->error_dirty = true; } - spin_unlock(&sbi->error_lock); + spin_unlock_irqrestore(&sbi->error_lock, flags); } static bool f2fs_update_errors(struct f2fs_sb_info *sbi) { + unsigned long flags; bool need_update = false; - spin_lock(&sbi->error_lock); + spin_lock_irqsave(&sbi->error_lock, flags); if (sbi->error_dirty) { memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors, MAX_F2FS_ERRORS); sbi->error_dirty = false; need_update = true; } - spin_unlock(&sbi->error_lock); + spin_unlock_irqrestore(&sbi->error_lock, flags); return need_update; } @@ -3988,6 +4038,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) f2fs_up_write(&sbi->sb_lock); } +static bool system_going_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, + bool irq_context) +{ + struct super_block *sb = sbi->sb; + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN; + bool continue_fs = !shutdown && + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE; + + set_ckpt_flags(sbi, CP_ERROR_FLAG); + + if (!f2fs_hw_is_readonly(sbi)) { + save_stop_reason(sbi, reason); + + if (irq_context && !shutdown) + schedule_work(&sbi->s_error_work); + else + f2fs_record_stop_reason(sbi); + } + + /* + * We force ERRORS_RO behavior when system is rebooting. Otherwise we + * could panic during 'reboot -f' as the underlying device got already + * disabled. + */ + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC && + !shutdown && !system_going_down() && + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + panic("F2FS-fs (device %s): panic forced after error\n", + sb->s_id); + + if (shutdown) + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + + /* continue filesystem operators if errors=continue */ + if (continue_fs || f2fs_readonly(sb)) + return; + + f2fs_warn(sbi, "Remounting filesystem read-only"); + /* + * Make sure updated value of ->s_mount_flags will be visible before + * ->s_flags update + */ + smp_wmb(); + sb->s_flags |= SB_RDONLY; +} + +static void f2fs_record_error_work(struct work_struct *work) +{ + struct f2fs_sb_info *sbi = container_of(work, + struct f2fs_sb_info, s_error_work); + + f2fs_record_stop_reason(sbi); +} + static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -4218,7 +4328,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; sbi->raw_super = raw_super; + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work); memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON); /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sbi)) @@ -4615,6 +4727,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_destroy_segment_manager(sbi); stop_ckpt_thread: f2fs_stop_ckpt_thread(sbi); + /* flush s_error_work before sbi destroy */ + flush_work(&sbi->s_error_work); f2fs_destroy_post_read_wq(sbi); free_devices: destroy_device_list(sbi); From 888ca6edac81e919fa7accb3b4f1d363e3c1e5f8 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Apr 2023 00:06:11 +0800 Subject: [PATCH 02/46] f2fs: add sanity check for proc_mkdir Return -ENOMEM when proc_mkdir failed. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 8ea05340bad9..467d743c801f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1386,12 +1386,19 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) { - kobject_put(&f2fs_feat); - kset_unregister(&f2fs_kset); - } else { - f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + if (ret) + goto put_kobject; + + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + if (!f2fs_proc_root) { + ret = -ENOMEM; + goto put_kobject; } + + return 0; +put_kobject: + kobject_put(&f2fs_feat); + kset_unregister(&f2fs_kset); return ret; } @@ -1430,23 +1437,24 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) if (err) goto put_feature_list_kobj; - if (f2fs_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + if (!sbi->s_proc) { + err = -ENOMEM; + goto put_feature_list_kobj; + } - if (sbi->s_proc) { - proc_create_single_data("segment_info", 0444, sbi->s_proc, + proc_create_single_data("segment_info", 0444, sbi->s_proc, segment_info_seq_show, sb); - proc_create_single_data("segment_bits", 0444, sbi->s_proc, + proc_create_single_data("segment_bits", 0444, sbi->s_proc, segment_bits_seq_show, sb); #ifdef CONFIG_F2FS_IOSTAT - proc_create_single_data("iostat_info", 0444, sbi->s_proc, + proc_create_single_data("iostat_info", 0444, sbi->s_proc, iostat_info_seq_show, sb); #endif - proc_create_single_data("victim_bits", 0444, sbi->s_proc, + proc_create_single_data("victim_bits", 0444, sbi->s_proc, victim_bits_seq_show, sb); - proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, + proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, discard_plist_seq_show, sb); - } return 0; put_feature_list_kobj: kobject_put(&sbi->s_feature_list_kobj); @@ -1462,8 +1470,7 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { - if (sbi->s_proc) - remove_proc_subtree(sbi->sb->s_id, f2fs_proc_root); + remove_proc_subtree(sbi->sb->s_id, f2fs_proc_root); kobject_put(&sbi->s_stat_kobj); wait_for_completion(&sbi->s_stat_kobj_unregister); From 7cd2e5f75b86a1befa99834f3ed1d735eeff69e6 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 26 Apr 2023 00:47:11 +0800 Subject: [PATCH 03/46] f2fs: do not allow to defragment files have FI_COMPRESS_RELEASED If a file has FI_COMPRESS_RELEASED, all writes for it should not be allowed. Fixes: 5fdb322ff2c2 ("f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE") Signed-off-by: Qi Han Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c9c3f660e01..78aa8cff4b41 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2588,6 +2588,11 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + err = -EINVAL; + goto unlock_out; + } + /* if in-place-update policy is enabled, don't waste time here */ set_inode_flag(inode, FI_OPU_WRITE); if (f2fs_should_update_inplace(inode, NULL)) { @@ -2712,6 +2717,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, clear_inode_flag(inode, FI_SKIP_WRITES); out: clear_inode_flag(inode, FI_OPU_WRITE); +unlock_out: inode_unlock(inode); if (!err) range->len = (u64)total << PAGE_SHIFT; From 1223e432d9e16df39ba51f496c6ad3d7d560f612 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Mon, 24 Apr 2023 23:46:48 +0000 Subject: [PATCH 04/46] f2fs: remove redundant goto statement in f2fs_read_single_page() After the commit "0a4ee518185", this "goto" statement was redundant, remote it for clean code. Signed-off-by: Li Zetao Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f26eac327d6e..7dd92a9028b1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2173,7 +2173,6 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); *last_block_in_bio = block_nr; - goto out; out: *bio_ret = bio; return ret; From 08c3eab525efb31406494282552a23f33a8a921a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 17 Apr 2023 22:51:46 +0200 Subject: [PATCH 05/46] f2fs: remove some dead code 'ret' is known to be 0 at the point. So these lines of code should just be removed. Signed-off-by: Christophe JAILLET Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 834c6f099c95..4a105a0cd794 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2066,7 +2066,6 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, struct list_head *head = &sbi->fsync_node_list; unsigned long flags; unsigned int cur_seq_id = 0; - int ret2, ret = 0; while (seq_id && cur_seq_id < seq_id) { spin_lock_irqsave(&sbi->fsync_node_lock, flags); @@ -2087,16 +2086,9 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, f2fs_wait_on_page_writeback(page, NODE, true, false); put_page(page); - - if (ret) - break; } - ret2 = filemap_check_errors(NODE_MAPPING(sbi)); - if (!ret) - ret = ret2; - - return ret; + return filemap_check_errors(NODE_MAPPING(sbi)); } static int f2fs_write_node_pages(struct address_space *mapping, From e067dc3c6b9c419bac43c6a0be2d85f44681f863 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 3 May 2023 13:53:49 -0700 Subject: [PATCH 06/46] f2fs: maintain six open zones for zoned devices To keep six open zone constraints, make them not to be open over six open zones. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 5 +++++ 2 files changed, 63 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7dd92a9028b1..0990d7f05366 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -383,6 +383,17 @@ static void f2fs_write_end_io(struct bio *bio) bio_put(bio); } +#ifdef CONFIG_BLK_DEV_ZONED +static void f2fs_zone_write_end_io(struct bio *bio) +{ + struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private; + + bio->bi_private = io->bi_private; + complete(&io->zone_wait); + f2fs_write_end_io(bio); +} +#endif + struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, sector_t *sector) { @@ -639,6 +650,11 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); +#ifdef CONFIG_BLK_DEV_ZONED + init_completion(&sbi->write_io[i][j].zone_wait); + sbi->write_io[i][j].zone_pending_bio = NULL; + sbi->write_io[i][j].bi_private = NULL; +#endif } } @@ -965,6 +981,26 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) return 0; } +#ifdef CONFIG_BLK_DEV_ZONED +static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + int devi = 0; + + if (f2fs_is_multi_device(sbi)) { + devi = f2fs_target_device_index(sbi, blkaddr); + if (blkaddr < FDEV(devi).start_blk || + blkaddr > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkaddr); + return false; + } + blkaddr -= FDEV(devi).start_blk; + } + return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM && + f2fs_blkz_is_seq(sbi, devi, blkaddr) && + (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1); +} +#endif + void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -975,6 +1011,16 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_bug_on(sbi, is_read_io(fio->op)); f2fs_down_write(&io->io_rwsem); + +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) { + wait_for_completion_io(&io->zone_wait); + bio_put(io->zone_pending_bio); + io->zone_pending_bio = NULL; + io->bi_private = NULL; + } +#endif + next: if (fio->in_list) { spin_lock(&io->io_lock); @@ -1038,6 +1084,18 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) if (fio->in_list) goto next; out: +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && btype < META && + is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { + bio_get(io->bio); + reinit_completion(&io->zone_wait); + io->bi_private = io->bio->bi_private; + io->bio->bi_private = io; + io->bio->bi_end_io = f2fs_zone_write_end_io; + io->zone_pending_bio = io->bio; + __submit_merged_bio(io); + } +#endif if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7afc9aef127a..0f05c1dd633f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1218,6 +1218,11 @@ struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ +#ifdef CONFIG_BLK_DEV_ZONED + struct completion zone_wait; /* condition value for the previous open zone to close */ + struct bio *zone_pending_bio; /* pending bio for the previous zone */ + void *bi_private; /* previous bi_private for pending bio */ +#endif struct f2fs_rwsem io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ From 633c8b9409f564ce4b7f7944c595ffac27ed1ff4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 May 2023 12:16:54 -0700 Subject: [PATCH 07/46] f2fs: fix the wrong condition to determine atomic context Should use !in_task for irq context. Cc: stable@vger.kernel.org Fixes: 1aa161e43106 ("f2fs: fix scheduling while atomic in decompression path") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 11653fa79289..10b545a1088e 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -743,7 +743,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) ret = -EFSCORRUPTED; /* Avoid f2fs_commit_super in irq context */ - if (in_task) + if (!in_task) f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION); else f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); From 04abeb699ddce800837c4039ea1cc7d4d139bb36 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 5 May 2023 13:40:00 -0700 Subject: [PATCH 08/46] f2fs: close unused open zones while mounting Zoned UFS allows only 6 open zones at the same time, so we need to take care of the count of open zones while mounting. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 53 +++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6db410f1bb8c..43d537d29b52 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4810,40 +4810,49 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, break; } - /* - * If last valid block is beyond the write pointer, report the - * inconsistency. This inconsistency does not cause write error - * because the zone will not be selected for write operation until - * it get discarded. Just report it. - */ - if (last_valid_block >= wp_block) { - f2fs_notice(sbi, "Valid block beyond write pointer: " - "valid block[0x%x,0x%x] wp[0x%x,0x%x]", - GET_SEGNO(sbi, last_valid_block), - GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), - wp_segno, wp_blkoff); + // The write pointer matches with the valid blocks + if (last_valid_block + 1 == wp_block) return 0; - } - /* - * If there is no valid block in the zone and if write pointer is - * not at zone start, reset the write pointer. - */ - if (last_valid_block + 1 == zone_block && zone->wp != zone->start) { + if (last_valid_block + 1 == zone_block) { + /* + * If there is no valid block in the zone and if write pointer + * is not at zone start, reset the write pointer. + */ f2fs_notice(sbi, "Zone without valid block has non-zero write " "pointer. Reset the write pointer: wp[0x%x,0x%x]", wp_segno, wp_blkoff); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, zone->len >> log_sectors_per_block); - if (ret) { + if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); - return ret; - } + + return ret; } - return 0; + /* + * If there are valid blocks and the write pointer doesn't + * match with them, we need to report the inconsistency and + * fill the zone till the end to close the zone. This inconsistency + * does not cause write error because the zone will not be selected + * for write operation until it get discarded. + */ + f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: " + "valid block[0x%x,0x%x] wp[0x%x,0x%x]", + GET_SEGNO(sbi, last_valid_block), + GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), + wp_segno, wp_blkoff); + + ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, + zone->len - (zone->wp - zone->start), + GFP_NOFS, 0); + if (ret) + f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", + fdev->path, ret); + + return ret; } static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, From 36ded4c106db2434754c9bdcabdbdb52117be35f Mon Sep 17 00:00:00 2001 From: Yonggil Song Date: Fri, 12 May 2023 13:16:10 +0900 Subject: [PATCH 09/46] f2fs: Fix over-estimating free section during FG GC There was a bug that finishing FG GC unconditionally because free sections are over-estimated after checkpoint in FG GC. This patch initializes sec_freed by every checkpoint in FG GC. Signed-off-by: Yonggil Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d455140322a8..51d7e8d29bf1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1797,7 +1797,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) { int gc_type = gc_control->init_gc_type; unsigned int segno = gc_control->victim_segno; - int sec_freed = 0, seg_freed = 0, total_freed = 0; + int sec_freed = 0, seg_freed = 0, total_freed = 0, total_sec_freed = 0; int ret = 0; struct cp_control cpc; struct gc_inode_list gc_list = { @@ -1842,6 +1842,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; + /* Reset due to checkpoint */ + sec_freed = 0; } } @@ -1866,15 +1868,17 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) gc_control->should_migrate_blocks); total_freed += seg_freed; - if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) + if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) { sec_freed++; + total_sec_freed++; + } if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; if (has_enough_free_secs(sbi, sec_freed, 0)) { if (!gc_control->no_bg_gc && - sec_freed < gc_control->nr_free_secs) + total_sec_freed < gc_control->nr_free_secs) goto go_gc_more; goto stop; } @@ -1901,6 +1905,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; + /* Reset due to checkpoint */ + sec_freed = 0; } go_gc_more: segno = NULL_SEGNO; @@ -1913,7 +1919,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) if (gc_type == FG_GC) f2fs_unpin_all_sections(sbi, true); - trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed, + trace_f2fs_gc_end(sbi->sb, ret, total_freed, total_sec_freed, get_pages(sbi, F2FS_DIRTY_NODES), get_pages(sbi, F2FS_DIRTY_DENTS), get_pages(sbi, F2FS_DIRTY_IMETA), @@ -1927,7 +1933,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control) put_gc_inode(&gc_list); if (gc_control->err_gc_skipped && !ret) - ret = sec_freed ? 0 : -EAGAIN; + ret = total_sec_freed ? 0 : -EAGAIN; return ret; } From f082c6b205a06953f26c40bdc7621cc5a58ceb7c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 14 May 2023 16:07:23 +0800 Subject: [PATCH 10/46] f2fs: fix potential deadlock due to unpaired node_write lock use If S_NOQUOTA is cleared from inode during data page writeback of quota file, it may miss to unlock node_write lock, result in potential deadlock, fix to use the lock in paired. Kworker Thread - writepage if (IS_NOQUOTA()) f2fs_down_read(&sbi->node_write); - vfs_cleanup_quota_inode - inode->i_flags &= ~S_NOQUOTA; if (IS_NOQUOTA()) f2fs_up_read(&sbi->node_write); Fixes: 79963d967b49 ("f2fs: shrink node_write lock coverage") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 ++++--- fs/f2fs/data.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 10b545a1088e..905b7c39a2b3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1215,6 +1215,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, unsigned int last_index = cc->cluster_size - 1; loff_t psize; int i, err; + bool quota_inode = IS_NOQUOTA(inode); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { @@ -1222,7 +1223,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_free; } - if (IS_NOQUOTA(inode)) { + if (quota_inode) { /* * We need to wait for node_write to avoid block allocation during * checkpoint. This can only happen to quota writes which can cause @@ -1344,7 +1345,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); - if (IS_NOQUOTA(inode)) + if (quota_inode) f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); @@ -1370,7 +1371,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: - if (IS_NOQUOTA(inode)) + if (quota_inode) f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0990d7f05366..3fad7a23a507 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2832,6 +2832,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; + bool quota_inode = IS_NOQUOTA(inode); int err = 0; struct f2fs_io_info fio = { .sbi = sbi, @@ -2893,19 +2894,19 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, goto out; /* Dentry/quota blocks are controlled by checkpoint */ - if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { + if (S_ISDIR(inode->i_mode) || quota_inode) { /* * We need to wait for node_write to avoid block allocation during * checkpoint. This can only happen to quota writes which can cause * the below discard race condition. */ - if (IS_NOQUOTA(inode)) + if (quota_inode) f2fs_down_read(&sbi->node_write); fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); - if (IS_NOQUOTA(inode)) + if (quota_inode) f2fs_up_read(&sbi->node_write); goto done; From 478d7100f44b7d250272fb86d70c909045171c9e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 May 2023 17:42:49 +0800 Subject: [PATCH 11/46] f2fs: renew value of F2FS_MOUNT_* Then we can just define newly introduced mount option w/ lasted free number rather than random free one. Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 56 +++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0f05c1dd633f..c660618ba911 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -80,34 +80,34 @@ extern const char *f2fs_fault_name[FAULT_MAX]; /* * For mount options */ -#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 -#define F2FS_MOUNT_DISCARD 0x00000004 -#define F2FS_MOUNT_NOHEAP 0x00000008 -#define F2FS_MOUNT_XATTR_USER 0x00000010 -#define F2FS_MOUNT_POSIX_ACL 0x00000020 -#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 -#define F2FS_MOUNT_INLINE_XATTR 0x00000080 -#define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 -#define F2FS_MOUNT_NOBARRIER 0x00000800 -#define F2FS_MOUNT_FASTBOOT 0x00001000 -#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000 -#define F2FS_MOUNT_DATA_FLUSH 0x00008000 -#define F2FS_MOUNT_FAULT_INJECTION 0x00010000 -#define F2FS_MOUNT_USRQUOTA 0x00080000 -#define F2FS_MOUNT_GRPQUOTA 0x00100000 -#define F2FS_MOUNT_PRJQUOTA 0x00200000 -#define F2FS_MOUNT_QUOTA 0x00400000 -#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 -#define F2FS_MOUNT_RESERVE_ROOT 0x01000000 -#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 -#define F2FS_MOUNT_NORECOVERY 0x04000000 -#define F2FS_MOUNT_ATGC 0x08000000 -#define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 -#define F2FS_MOUNT_GC_MERGE 0x20000000 -#define F2FS_MOUNT_COMPRESS_CACHE 0x40000000 -#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000001 +#define F2FS_MOUNT_DISCARD 0x00000002 +#define F2FS_MOUNT_NOHEAP 0x00000004 +#define F2FS_MOUNT_XATTR_USER 0x00000008 +#define F2FS_MOUNT_POSIX_ACL 0x00000010 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000020 +#define F2FS_MOUNT_INLINE_XATTR 0x00000040 +#define F2FS_MOUNT_INLINE_DATA 0x00000080 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000100 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 +#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_FASTBOOT 0x00000800 +#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00001000 +#define F2FS_MOUNT_DATA_FLUSH 0x00002000 +#define F2FS_MOUNT_FAULT_INJECTION 0x00004000 +#define F2FS_MOUNT_USRQUOTA 0x00008000 +#define F2FS_MOUNT_GRPQUOTA 0x00010000 +#define F2FS_MOUNT_PRJQUOTA 0x00020000 +#define F2FS_MOUNT_QUOTA 0x00040000 +#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00080000 +#define F2FS_MOUNT_RESERVE_ROOT 0x00100000 +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x00200000 +#define F2FS_MOUNT_NORECOVERY 0x00400000 +#define F2FS_MOUNT_ATGC 0x00800000 +#define F2FS_MOUNT_MERGE_CHECKPOINT 0x01000000 +#define F2FS_MOUNT_GC_MERGE 0x02000000 +#define F2FS_MOUNT_COMPRESS_CACHE 0x04000000 +#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) From 77e820ea73a5b86f434a776d63e1e5f50a366c19 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 May 2023 17:42:50 +0800 Subject: [PATCH 12/46] f2fs: renew value of F2FS_FEATURE_* Define F2FS_FEATURE_* macro w/ 32-bits value rather than 16-bits value. No logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c660618ba911..faa27f41f39d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -186,21 +186,21 @@ struct f2fs_mount_info { unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; -#define F2FS_FEATURE_ENCRYPT 0x0001 -#define F2FS_FEATURE_BLKZONED 0x0002 -#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 -#define F2FS_FEATURE_EXTRA_ATTR 0x0008 -#define F2FS_FEATURE_PRJQUOTA 0x0010 -#define F2FS_FEATURE_INODE_CHKSUM 0x0020 -#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 -#define F2FS_FEATURE_QUOTA_INO 0x0080 -#define F2FS_FEATURE_INODE_CRTIME 0x0100 -#define F2FS_FEATURE_LOST_FOUND 0x0200 -#define F2FS_FEATURE_VERITY 0x0400 -#define F2FS_FEATURE_SB_CHKSUM 0x0800 -#define F2FS_FEATURE_CASEFOLD 0x1000 -#define F2FS_FEATURE_COMPRESSION 0x2000 -#define F2FS_FEATURE_RO 0x4000 +#define F2FS_FEATURE_ENCRYPT 0x00000001 +#define F2FS_FEATURE_BLKZONED 0x00000002 +#define F2FS_FEATURE_ATOMIC_WRITE 0x00000004 +#define F2FS_FEATURE_EXTRA_ATTR 0x00000008 +#define F2FS_FEATURE_PRJQUOTA 0x00000010 +#define F2FS_FEATURE_INODE_CHKSUM 0x00000020 +#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x00000040 +#define F2FS_FEATURE_QUOTA_INO 0x00000080 +#define F2FS_FEATURE_INODE_CRTIME 0x00000100 +#define F2FS_FEATURE_LOST_FOUND 0x00000200 +#define F2FS_FEATURE_VERITY 0x00000400 +#define F2FS_FEATURE_SB_CHKSUM 0x00000800 +#define F2FS_FEATURE_CASEFOLD 0x00001000 +#define F2FS_FEATURE_COMPRESSION 0x00002000 +#define F2FS_FEATURE_RO 0x00004000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) From 90b7c4b748d897226577abb14480ec61a7c2a1f7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 May 2023 08:15:39 +0800 Subject: [PATCH 13/46] f2fs: fix to set noatime and immutable flag for quota file We should set noatime bit for quota files, since no one cares about atime of quota file, and we should set immutalbe bit as well, due to nobody should write to the file through exported interfaces. Meanwhile this patch use inode_lock to avoid race condition during inode->i_flags, f2fs_inode->i_flags update. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 51812f459581..8eb17cc73941 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2748,6 +2748,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, { struct inode *qf_inode; unsigned long qf_inum; + unsigned long qf_flag = F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; int err; BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); @@ -2763,7 +2764,15 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, } /* Don't account quota for quota files to avoid recursion */ + inode_lock(qf_inode); qf_inode->i_flags |= S_NOQUOTA; + + if ((F2FS_I(qf_inode)->i_flags & qf_flag) != qf_flag) { + F2FS_I(qf_inode)->i_flags |= qf_flag; + f2fs_set_inode_flags(qf_inode); + } + inode_unlock(qf_inode); + err = dquot_load_quota_inode(qf_inode, type, format_id, flags); iput(qf_inode); return err; From bfd476623999118d9c509cb0fa9380f2912bc225 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 May 2023 20:35:21 +0800 Subject: [PATCH 14/46] f2fs: clean up w/ sbi->log_sectors_per_block Use sbi->log_sectors_per_block to clean up below calculated one: unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 43d537d29b52..9282399cc810 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4768,17 +4768,17 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, { unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; block_t zone_block, wp_block, last_valid_block; - unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; int i, s, b, ret; struct seg_entry *se; if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); + wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); + zone_block = fdev->start_blk + (zone->start >> + sbi->log_sectors_per_block); zone_segno = GET_SEGNO(sbi, zone_block); zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); @@ -4824,7 +4824,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, "pointer. Reset the write pointer: wp[0x%x,0x%x]", wp_segno, wp_blkoff); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, - zone->len >> log_sectors_per_block); + zone->len >> sbi->log_sectors_per_block); if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); @@ -4885,7 +4885,6 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) struct blk_zone zone; unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; block_t cs_zone_block, wp_block; - unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; sector_t zone_sector; int err; @@ -4897,8 +4896,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; /* report zone for the sector the curseg points to */ - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) - << log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << + sbi->log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -4910,10 +4909,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); + wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); + wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0); if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && wp_sector_off == 0) @@ -4940,8 +4939,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (!zbd) return 0; - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) - << log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << + sbi->log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -4959,7 +4958,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) "Reset the zone: curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block, - zone.len >> log_sectors_per_block); + zone.len >> sbi->log_sectors_per_block); if (err) { f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", zbd->path, err); From d8189834d4348ae608083e1f1f53792cfcc2a9bc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 May 2023 14:17:25 +0800 Subject: [PATCH 15/46] f2fs: fix to avoid NULL pointer dereference f2fs_write_end_io() butt3rflyh4ck reports a bug as below: When a thread always calls F2FS_IOC_RESIZE_FS to resize fs, if resize fs is failed, f2fs kernel thread would invoke callback function to update f2fs io info, it would call f2fs_write_end_io and may trigger null-ptr-deref in NODE_MAPPING. general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] RIP: 0010:NODE_MAPPING fs/f2fs/f2fs.h:1972 [inline] RIP: 0010:f2fs_write_end_io+0x727/0x1050 fs/f2fs/data.c:370 bio_endio+0x5af/0x6c0 block/bio.c:1608 req_bio_endio block/blk-mq.c:761 [inline] blk_update_request+0x5cc/0x1690 block/blk-mq.c:906 blk_mq_end_request+0x59/0x4c0 block/blk-mq.c:1023 lo_complete_rq+0x1c6/0x280 drivers/block/loop.c:370 blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1101 __do_softirq+0x1d4/0x8ef kernel/softirq.c:571 run_ksoftirqd kernel/softirq.c:939 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:931 smpboot_thread_fn+0x659/0x9e0 kernel/smpboot.c:164 kthread+0x33e/0x440 kernel/kthread.c:379 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 The root cause is below race case can cause leaving dirty metadata in f2fs after filesystem is remount as ro: Thread A Thread B - f2fs_ioc_resize_fs - f2fs_readonly --- return false - f2fs_resize_fs - f2fs_remount - write_checkpoint - set f2fs as ro - free_segment_range - update meta_inode's data Then, if f2fs_put_super() fails to write_checkpoint due to readonly status, and meta_inode's dirty data will be writebacked after node_inode is put, finally, f2fs_write_end_io will access NULL pointer on sbi->node_inode. Thread A IRQ context - f2fs_put_super - write_checkpoint fails - iput(node_inode) - node_inode = NULL - iput(meta_inode) - write_inode_now - f2fs_write_meta_page - f2fs_write_end_io - NODE_MAPPING(sbi) : access NULL pointer on node_inode Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress") Reported-by: butt3rflyh4ck Closes: https://lore.kernel.org/r/1684480657-2375-1-git-send-email-yangtiezhu@loongson.cn Tested-by: butt3rflyh4ck Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 21 ++++++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index faa27f41f39d..ae47f522d859 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3834,7 +3834,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); -int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); +int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); /* victim selection function for cleaning and SSR */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 78aa8cff4b41..015ed274dc31 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3279,7 +3279,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) sizeof(block_count))) return -EFAULT; - return f2fs_resize_fs(sbi, block_count); + return f2fs_resize_fs(filp, block_count); } static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 51d7e8d29bf1..339c4ba67eb7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2105,8 +2105,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) } } -int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +int f2fs_resize_fs(struct file *filp, __u64 block_count) { + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); __u64 old_block_count, shrunk_blocks; struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; unsigned int secs; @@ -2144,12 +2145,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return -EINVAL; } + err = mnt_want_write_file(filp); + if (err) + return err; + shrunk_blocks = old_block_count - block_count; secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); /* stop other GC */ - if (!f2fs_down_write_trylock(&sbi->gc_lock)) - return -EAGAIN; + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { + err = -EAGAIN; + goto out_drop_write; + } /* stop CP to protect MAIN_SEC in free_segment_range */ f2fs_lock_op(sbi); @@ -2169,10 +2176,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out_unlock: f2fs_unlock_op(sbi); f2fs_up_write(&sbi->gc_lock); +out_drop_write: + mnt_drop_write_file(filp); if (err) return err; freeze_super(sbi->sb); + + if (f2fs_readonly(sbi->sb)) { + thaw_super(sbi->sb); + return -EROFS; + } + f2fs_down_write(&sbi->gc_lock); f2fs_down_write(&sbi->cp_global_sem); From 458c15dfbce62c35fefd9ca637b20a051309c9f1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 May 2023 11:58:22 +0800 Subject: [PATCH 16/46] f2fs: don't reset unchangable mount option in f2fs_remount() syzbot reports a bug as below: general protection fault, probably for non-canonical address 0xdffffc0000000009: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:__lock_acquire+0x69/0x2000 kernel/locking/lockdep.c:4942 Call Trace: lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5691 __raw_write_lock include/linux/rwlock_api_smp.h:209 [inline] _raw_write_lock+0x2e/0x40 kernel/locking/spinlock.c:300 __drop_extent_tree+0x3ac/0x660 fs/f2fs/extent_cache.c:1100 f2fs_drop_extent_tree+0x17/0x30 fs/f2fs/extent_cache.c:1116 f2fs_insert_range+0x2d5/0x3c0 fs/f2fs/file.c:1664 f2fs_fallocate+0x4e4/0x6d0 fs/f2fs/file.c:1838 vfs_fallocate+0x54b/0x6b0 fs/open.c:324 ksys_fallocate fs/open.c:347 [inline] __do_sys_fallocate fs/open.c:355 [inline] __se_sys_fallocate fs/open.c:353 [inline] __x64_sys_fallocate+0xbd/0x100 fs/open.c:353 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The root cause is race condition as below: - since it tries to remount rw filesystem, so that do_remount won't call sb_prepare_remount_readonly to block fallocate, there may be race condition in between remount and fallocate. - in f2fs_remount(), default_options() will reset mount option to default one, and then update it based on result of parse_options(), so there is a hole which race condition can happen. Thread A Thread B - f2fs_fill_super - parse_options - clear_opt(READ_EXTENT_CACHE) - f2fs_remount - default_options - set_opt(READ_EXTENT_CACHE) - f2fs_fallocate - f2fs_insert_range - f2fs_drop_extent_tree - __drop_extent_tree - __may_extent_tree - test_opt(READ_EXTENT_CACHE) return true - write_lock(&et->lock) access NULL pointer - parse_options - clear_opt(READ_EXTENT_CACHE) Cc: Reported-by: syzbot+d015b6c2fbb5c383bf08@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/20230522124203.3838360-1-chao@kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8eb17cc73941..6e770f82d39f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2086,9 +2086,22 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static void default_options(struct f2fs_sb_info *sbi) +static void default_options(struct f2fs_sb_info *sbi, bool remount) { /* init some FS parameters */ + if (!remount) { + set_opt(sbi, READ_EXTENT_CACHE); + clear_opt(sbi, DISABLE_CHECKPOINT); + + if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) + set_opt(sbi, DISCARD); + + if (f2fs_sb_has_blkzoned(sbi)) + F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION; + else + F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK; + } + if (f2fs_sb_has_readonly(sbi)) F2FS_OPTION(sbi).active_logs = NR_CURSEG_RO_TYPE; else @@ -2118,23 +2131,16 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, READ_EXTENT_CACHE); set_opt(sbi, NOHEAP); - clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; if (!f2fs_is_readonly(sbi)) set_opt(sbi, FLUSH_MERGE); - if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) - set_opt(sbi, DISCARD); - if (f2fs_sb_has_blkzoned(sbi)) { + if (f2fs_sb_has_blkzoned(sbi)) F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; - F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_SECTION; - } else { + else F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; - F2FS_OPTION(sbi).discard_unit = DISCARD_UNIT_BLOCK; - } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -2306,7 +2312,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } - default_options(sbi); + default_options(sbi, true); /* parse mount options */ err = parse_options(sb, data, true); @@ -4346,7 +4352,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, sizeof(raw_super->uuid)); - default_options(sbi); + default_options(sbi, false); /* parse mount options */ options = kstrdup((const char *)data, GFP_KERNEL); if (data && !options) { From 901c12d144570ed2558f4a6806201453c5b01bea Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 May 2023 10:14:12 +0800 Subject: [PATCH 17/46] f2fs: flush error flags in workqueue In IRQ context, it wakes up workqueue to record errors into on-disk superblock fields rather than in-memory fields. Fixes: 1aa161e43106 ("f2fs: fix scheduling while atomic in decompression path") Fixes: 95fa90c9e5a7 ("f2fs: support recording errors into superblock") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 26 +++++++++++++++++++++++--- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 905b7c39a2b3..1132d3cd8f33 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -744,7 +744,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) /* Avoid f2fs_commit_super in irq context */ if (!in_task) - f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION); + f2fs_handle_error_async(sbi, ERROR_FAIL_DECOMPRESSION); else f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); goto out_release; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ae47f522d859..86ac5f599575 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3563,6 +3563,7 @@ void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, bool irq_context); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); +void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6e770f82d39f..ee390c398e1c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3995,6 +3995,11 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) f2fs_down_write(&sbi->sb_lock); spin_lock_irqsave(&sbi->error_lock, flags); + if (sbi->error_dirty) { + memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors, + MAX_F2FS_ERRORS); + sbi->error_dirty = false; + } memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON); spin_unlock_irqrestore(&sbi->error_lock, flags); @@ -4034,12 +4039,10 @@ static bool f2fs_update_errors(struct f2fs_sb_info *sbi) return need_update; } -void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) +static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error) { int err; - f2fs_save_errors(sbi, error); - f2fs_down_write(&sbi->sb_lock); if (!f2fs_update_errors(sbi)) @@ -4053,6 +4056,23 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) f2fs_up_write(&sbi->sb_lock); } +void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error) +{ + f2fs_save_errors(sbi, error); + f2fs_record_errors(sbi, error); +} + +void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error) +{ + f2fs_save_errors(sbi, error); + + if (!sbi->error_dirty) + return; + if (!test_bit(error, (unsigned long *)sbi->errors)) + return; + schedule_work(&sbi->s_error_work); +} + static bool system_going_down(void) { return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF From 25f9080576b9549be9435123d7e45bfeebd2dc97 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Mon, 8 May 2023 17:10:42 +0900 Subject: [PATCH 18/46] f2fs: add async reset zone command support This patch enables submit reset zone command asynchornously. It helps decrease average latency of write IOs in high utilization scenario by faster checkpointing. Signed-off-by: Daejun Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/iostat.c | 1 + fs/f2fs/segment.c | 84 +++++++++++++++++++++++++++++++++++-- include/trace/events/f2fs.h | 24 +++++++++-- 4 files changed, 104 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 86ac5f599575..4b249716ae7b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1176,6 +1176,7 @@ enum iostat_type { /* other */ FS_DISCARD_IO, /* discard */ FS_FLUSH_IO, /* flush */ + FS_ZONE_RESET_IO, /* zone reset */ NR_IO_TYPE, }; diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index 3d5bfb1ad585..f8703038e1d8 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -80,6 +80,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) seq_puts(seq, "[OTHER]\n"); IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO); IOSTAT_INFO_SHOW("fs flush", FS_FLUSH_IO); + IOSTAT_INFO_SHOW("fs zone reset", FS_ZONE_RESET_IO); return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9282399cc810..0c0c033c4bdd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1196,6 +1196,45 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len); + +#ifdef CONFIG_BLK_DEV_ZONED +static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc, blk_opf_t flag, + struct list_head *wait_list, + unsigned int *issued) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct block_device *bdev = dc->bdev; + struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS); + unsigned long flags; + + trace_f2fs_issue_reset_zone(bdev, dc->di.start); + + spin_lock_irqsave(&dc->lock, flags); + dc->state = D_SUBMIT; + dc->bio_ref++; + spin_unlock_irqrestore(&dc->lock, flags); + + if (issued) + (*issued)++; + + atomic_inc(&dcc->queued_discard); + dc->queued++; + list_move_tail(&dc->list, wait_list); + + /* sanity check on discard range */ + __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len); + + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start); + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + submit_bio(bio); + + atomic_inc(&dcc->issued_discard); + f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE); +} +#endif + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, @@ -1217,6 +1256,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) return 0; +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) { + __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued); + return 0; + } +#endif + trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); lstart = dc->di.lstart; @@ -1461,6 +1507,19 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, } } +#ifdef CONFIG_BLK_DEV_ZONED +static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t lblkstart, + block_t blklen) +{ + trace_f2fs_queue_reset_zone(bdev, blkstart); + + mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); + __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); + mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); +} +#endif + static void __queue_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { @@ -1724,6 +1783,19 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) mutex_lock(&dcc->cmd_lock); dc = __lookup_discard_cmd(sbi, blkaddr); +#ifdef CONFIG_BLK_DEV_ZONED + if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) { + /* force submit zone reset */ + if (dc->state == D_PREP) + __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, + &dcc->wait_list, NULL); + dc->ref++; + mutex_unlock(&dcc->cmd_lock); + /* wait zone reset */ + __wait_one_discard_bio(sbi, dc); + return; + } +#endif if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); @@ -1876,9 +1948,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, blkstart, blklen); return -EIO; } - trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - sector, nr_sects, GFP_NOFS); + + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) { + trace_f2fs_issue_reset_zone(bdev, blkstart); + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects, GFP_NOFS); + } + + __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen); + return 0; } /* For conventional zones, use regular discard if supported */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 99cbc5949e3c..793f82cc1515 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1512,7 +1512,7 @@ DEFINE_EVENT(f2fs_discard, f2fs_remove_discard, TP_ARGS(dev, blkstart, blklen) ); -TRACE_EVENT(f2fs_issue_reset_zone, +DECLARE_EVENT_CLASS(f2fs_reset_zone, TP_PROTO(struct block_device *dev, block_t blkstart), @@ -1528,11 +1528,25 @@ TRACE_EVENT(f2fs_issue_reset_zone, __entry->blkstart = blkstart; ), - TP_printk("dev = (%d,%d), reset zone at block = 0x%llx", + TP_printk("dev = (%d,%d), zone at block = 0x%llx", show_dev(__entry->dev), (unsigned long long)__entry->blkstart) ); +DEFINE_EVENT(f2fs_reset_zone, f2fs_queue_reset_zone, + + TP_PROTO(struct block_device *dev, block_t blkstart), + + TP_ARGS(dev, blkstart) +); + +DEFINE_EVENT(f2fs_reset_zone, f2fs_issue_reset_zone, + + TP_PROTO(struct block_device *dev, block_t blkstart), + + TP_ARGS(dev, blkstart) +); + TRACE_EVENT(f2fs_issue_flush, TP_PROTO(struct block_device *dev, unsigned int nobarrier, @@ -1979,6 +1993,7 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_nrio) __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) + __field(unsigned long long, fs_reset_zone) ), TP_fast_assign( @@ -2010,12 +2025,14 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_nrio = iostat[FS_NODE_READ_IO]; __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD_IO]; + __entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO]; ), TP_printk("dev = (%d,%d), " "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu, " "compr(buffered=%llu, mapped=%llu)], " - "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu], " + "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu, " + "reset_zone=%llu], " "gc [data=%llu, node=%llu], " "cp [data=%llu, node=%llu, meta=%llu], " "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " @@ -2026,6 +2043,7 @@ TRACE_EVENT(f2fs_iostat, __entry->app_bio, __entry->app_mio, __entry->app_bcdio, __entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, + __entry->fs_reset_zone, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, __entry->fs_cp_nio, __entry->fs_cp_mio, __entry->app_rio, __entry->app_drio, __entry->app_brio, From 38a4a330c8bf6498bde3be155485c9b44a517fb0 Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Sun, 28 May 2023 01:06:40 +0800 Subject: [PATCH 19/46] f2fs: Detect looped node chain efficiently find_fsync_dnodes() detect the looped node chain by comparing the loop counter with free blocks. While it may take tens of seconds to quit when the free blocks are large enough. We can use Floyd's cycle detection algorithm to make the detection more efficient. Signed-off-by: Chunhai Guo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 71 +++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 58c1a0096f7d..f0cf1538389c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -360,21 +360,63 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, return ra_blocks; } +/* Detect looped node chain with Floyd's cycle detection algorithm. */ +static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, + block_t *blkaddr_fast, bool *is_detecting) +{ + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; + struct page *page = NULL; + int i; + + if (!*is_detecting) + return 0; + + for (i = 0; i < 2; i++) { + if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) { + *is_detecting = false; + return 0; + } + + page = f2fs_get_tmp_page(sbi, *blkaddr_fast); + if (IS_ERR(page)) + return PTR_ERR(page); + + if (!is_recoverable_dnode(page)) { + f2fs_put_page(page, 1); + *is_detecting = false; + return 0; + } + + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast, + next_blkaddr_of_node(page)); + + *blkaddr_fast = next_blkaddr_of_node(page); + f2fs_put_page(page, 1); + + f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks); + } + + if (*blkaddr_fast == blkaddr) { + f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u." + " Run fsck to fix it.", __func__, blkaddr); + return -EINVAL; + } + return 0; +} + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool check_only) { struct curseg_info *curseg; struct page *page = NULL; - block_t blkaddr; - unsigned int loop_cnt = 0; - unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; - unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - - valid_user_blocks(sbi); + block_t blkaddr, blkaddr_fast; + bool is_detecting = true; int err = 0; /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + blkaddr_fast = blkaddr; while (1) { struct fsync_inode_entry *entry; @@ -431,25 +473,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (IS_INODE(page) && is_dent_dnode(page)) entry->last_dentry = blkaddr; next: - /* sanity check in order to detect looped node chain */ - if (++loop_cnt >= free_blocks || - blkaddr == next_blkaddr_of_node(page)) { - f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u", - __func__, blkaddr, - next_blkaddr_of_node(page)); - f2fs_put_page(page, 1); - err = -EINVAL; - break; - } - - ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, - next_blkaddr_of_node(page)); - /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); - f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); + err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast, + &is_detecting); + if (err) + break; } return err; } From 20872584b8c0b006c007da9588a272c9e28d2e18 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 28 May 2023 15:47:12 +0800 Subject: [PATCH 20/46] f2fs: fix to drop all dirty meta/node pages during umount() For cp error case, there will be dirty meta/node pages remained after f2fs_write_checkpoint() in f2fs_put_super(), drop them explicitly, and do sanity check on reference count of dirty pages and inflight IOs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ee390c398e1c..2936bc870f5c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1571,6 +1571,7 @@ static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; + int err = 0; bool done; /* unregister procfs/sysfs entries in advance to avoid race case */ @@ -1597,7 +1598,7 @@ static void f2fs_put_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT, }; - f2fs_write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); } /* be sure to wait for any on-going discard commands */ @@ -1606,7 +1607,7 @@ static void f2fs_put_super(struct super_block *sb) struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; - f2fs_write_checkpoint(sbi, &cpc); + err = f2fs_write_checkpoint(sbi, &cpc); } /* @@ -1623,6 +1624,19 @@ static void f2fs_put_super(struct super_block *sb) f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA); + if (err) { + truncate_inode_pages_final(NODE_MAPPING(sbi)); + truncate_inode_pages_final(META_MAPPING(sbi)); + } + + for (i = 0; i < NR_COUNT_TYPE; i++) { + if (!get_pages(sbi, i)) + continue; + f2fs_err(sbi, "detect filesystem reference count leak during " + "umount, type: %d, count: %lld", i, get_pages(sbi, i)); + f2fs_bug_on(sbi, 1); + } + f2fs_bug_on(sbi, sbi->fsync_node_num); f2fs_destroy_compress_inode(sbi); From 38b57833de1d9716bf8134c6fefcc35d23d5b136 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 31 May 2023 20:59:18 +0800 Subject: [PATCH 21/46] f2fs: flag as supporting buffered async reads The f2fs uses generic_file_buffered_read(), which supports buffered async reads since commit 1a0a7853b901 ("mm: support async buffered reads in generic_file_buffered_read()"). Let's enable it to match other file-systems. The read performance has been greatly improved under io_uring: 167M/s -> 234M/s, Increase ratio by 40% Test w/: ./fio --name=onessd --filename=/data/test/local/io_uring_test --size=256M --rw=randread --bs=4k --direct=0 --overwrite=0 --numjobs=1 --iodepth=1 --time_based=0 --runtime=10 --ioengine=io_uring --registerfiles --fixedbufs --gtod_reduce=1 --group_reporting --sqthread_poll=1 Signed-off-by: Lu Hongfei Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 015ed274dc31..23c68ee946e5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -546,7 +546,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; - filp->f_mode |= FMODE_NOWAIT; + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; return dquot_file_open(inode, filp); } From cadfc2f9f8c35ed429a64245a89766961dad49fa Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Thu, 1 Jun 2023 09:37:59 +0800 Subject: [PATCH 22/46] f2fs: fix args passed to trace_f2fs_lookup_end The NULL return of 'd_splice_alias' dosen't mean error. Thus the successful case will also return NULL, which makes the tracepoint always print 'err=-ENOENT'. And the different cases of 'new' & 'err' are list as following: 1) dentry exists: err(0) with new(NULL) --> dentry, err=0 2) dentry exists: err(0) with new(VALID) --> new, err=0 3) dentry exists: err(0) with new(ERR) --> dentry, err=ERR 4) no dentry exists: err(-ENOENT) with new(NULL) --> dentry, err=-ENOENT 5) no dentry exists: err(-ENOENT) with new(VALID) --> new, err=-ENOENT 6) no dentry exists: err(-ENOENT) with new(ERR) --> dentry, err=ERR Signed-off-by: Wu Bo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 77a71276ecb1..3e35eb7dbb8f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -576,8 +576,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } #endif new = d_splice_alias(inode, dentry); - err = PTR_ERR_OR_ZERO(new); - trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err); + trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry, + ino, IS_ERR(new) ? PTR_ERR(new) : err); return new; out_iput: iput(inode); From 5079e1c0c879311668b77075de3e701869804adf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 2 Jun 2023 16:36:05 +0800 Subject: [PATCH 23/46] f2fs: avoid dead loop in f2fs_issue_checkpoint() generic/082 reports a bug as below: __schedule+0x332/0xf60 schedule+0x6f/0xf0 schedule_timeout+0x23b/0x2a0 wait_for_completion+0x8f/0x140 f2fs_issue_checkpoint+0xfe/0x1b0 f2fs_sync_fs+0x9d/0xb0 sync_filesystem+0x87/0xb0 dquot_load_quota_sb+0x41b/0x460 dquot_load_quota_inode+0xa5/0x130 dquot_quota_on+0x4b/0x60 f2fs_quota_on+0xe3/0x1b0 do_quotactl+0x483/0x700 __x64_sys_quotactl+0x15c/0x310 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc The root casue is race case as below: Thread A Kworker IRQ - write() : write data to quota.user file - writepages - f2fs_submit_page_write - __is_cp_guaranteed return false - inc_page_count(F2FS_WB_DATA) - submit_bio - quotactl(Q_QUOTAON) - f2fs_quota_on - dquot_quota_on - dquot_load_quota_inode - vfs_setup_quota_inode : inode->i_flags |= S_NOQUOTA - f2fs_write_end_io - __is_cp_guaranteed return true - dec_page_count(F2FS_WB_CP_DATA) - dquot_load_quota_sb - f2fs_sync_fs - f2fs_issue_checkpoint - do_checkpoint - f2fs_wait_on_all_pages(F2FS_WB_CP_DATA) : loop due to F2FS_WB_CP_DATA count is negative Calling filemap_fdatawrite() and filemap_fdatawait() to keep all data clean before quota file setup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2936bc870f5c..8fd23caa1ed9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2923,16 +2923,27 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, return -EBUSY; } + if (path->dentry->d_sb != sb) + return -EXDEV; + err = f2fs_quota_sync(sb, type); if (err) return err; + inode = d_inode(path->dentry); + + err = filemap_fdatawrite(inode->i_mapping); + if (err) + return err; + + err = filemap_fdatawait(inode->i_mapping); + if (err) + return err; + err = dquot_quota_on(sb, type, format_id, path); if (err) return err; - inode = d_inode(path->dentry); - inode_lock(inode); F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; f2fs_set_inode_flags(inode); From 8bec7dd1b3f7d7769d433d67bde404de948a2d95 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 Jun 2023 14:19:01 +0800 Subject: [PATCH 24/46] f2fs: check return value of freeze_super() freeze_super() can fail, it needs to check its return value and do error handling in f2fs_resize_fs(). Fixes: 04f0b2eaa3b3 ("f2fs: ioctl for removing a range from F2FS") Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 339c4ba67eb7..01effd3fcb6c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2181,7 +2181,9 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count) if (err) return err; - freeze_super(sbi->sb); + err = freeze_super(sbi->sb); + if (err) + return err; if (f2fs_readonly(sbi->sb)) { thaw_super(sbi->sb); From ccf3ff2b30edd52fb54e239da25758fb22acfb78 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 Jun 2023 14:18:22 +0800 Subject: [PATCH 25/46] f2fs: introduce F2FS_QUOTA_DEFAULT_FL for cleanup This patch adds F2FS_QUOTA_DEFAULT_FL to include two default flags: F2FS_NOATIME_FL and F2FS_IMMUTABLE_FL, and use it to clean up codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4b249716ae7b..5582a93d9190 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2960,6 +2960,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) + /* Flags that should be inherited by new inodes from their parent. */ #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8fd23caa1ed9..8efbb2e1b8b1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2768,7 +2768,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, { struct inode *qf_inode; unsigned long qf_inum; - unsigned long qf_flag = F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; + unsigned long qf_flag = F2FS_QUOTA_DEFAULT_FL; int err; BUG_ON(!f2fs_sb_has_quota_ino(F2FS_SB(sb))); @@ -2945,7 +2945,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, return err; inode_lock(inode); - F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; + F2FS_I(inode)->i_flags |= F2FS_QUOTA_DEFAULT_FL; f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); @@ -2970,7 +2970,7 @@ static int __f2fs_quota_off(struct super_block *sb, int type) goto out_put; inode_lock(inode); - F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL); + F2FS_I(inode)->i_flags &= ~F2FS_QUOTA_DEFAULT_FL; f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); From 00e120b5e4b5638cf19eee96d4332f2d100746ba Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 12 Jun 2023 12:58:34 -0700 Subject: [PATCH 26/46] f2fs: assign default compression level Let's avoid any confusion from assigning compress_level=0 for LZ4HC and ZSTD. Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 +-- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 12 +++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1132d3cd8f33..438af59d3571 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -317,8 +317,6 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { #endif #ifdef CONFIG_F2FS_FS_ZSTD -#define F2FS_ZSTD_DEFAULT_CLEVEL 1 - static int zstd_init_compress_ctx(struct compress_ctx *cc) { zstd_parameters params; @@ -327,6 +325,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) unsigned int workspace_size; unsigned char level = F2FS_I(cc->inode)->i_compress_level; + /* Need to remain this for backward compatibility */ if (!level) level = F2FS_ZSTD_DEFAULT_CLEVEL; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5582a93d9190..94811085f9f3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1440,6 +1440,8 @@ struct compress_data { #define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000 +#define F2FS_ZSTD_DEFAULT_CLEVEL 1 + #define COMPRESS_LEVEL_OFFSET 8 /* compress context */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8efbb2e1b8b1..a3695adad3d3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -589,14 +589,12 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) { #ifdef CONFIG_F2FS_FS_LZ4HC unsigned int level; -#endif if (strlen(str) == 3) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_OPTION(sbi).compress_level = LZ4HC_DEFAULT_CLEVEL; return 0; } -#ifdef CONFIG_F2FS_FS_LZ4HC str += 3; if (str[0] != ':') { @@ -614,6 +612,10 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) F2FS_OPTION(sbi).compress_level = level; return 0; #else + if (strlen(str) == 3) { + F2FS_OPTION(sbi).compress_level = 0; + return 0; + } f2fs_info(sbi, "kernel doesn't support lz4hc compression"); return -EINVAL; #endif @@ -627,7 +629,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) int len = 4; if (strlen(str) == len) { - F2FS_OPTION(sbi).compress_level = 0; + F2FS_OPTION(sbi).compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; return 0; } @@ -640,7 +642,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (!level || level > zstd_max_clevel()) { + if (level < zstd_min_clevel() || level > zstd_max_clevel()) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; } From 698a5c8c8e05590d92629ad5796a421e14218e20 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 8 Apr 2023 02:31:47 +0800 Subject: [PATCH 27/46] f2fs: add sanity compress level check for compressed file Commit 3fde13f817e2 ("f2fs: compress: support compress level") forgot to do basic compress level check, let's add it. Signed-off-by: Yangtao Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 107 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cf4327ad106c..bb6c3733d104 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" @@ -202,6 +204,80 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); } +static bool sanity_check_compress_inode(struct inode *inode, + struct f2fs_inode *ri) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned char clevel; + + if (ri->i_compress_algorithm >= COMPRESS_MAX) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) has unsupported compress algorithm: %u, run fsck to fix", + __func__, inode->i_ino, ri->i_compress_algorithm); + goto err; + } + if (le64_to_cpu(ri->i_compr_blocks) > + SECTOR_TO_BLOCK(inode->i_blocks)) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix", + __func__, inode->i_ino, le64_to_cpu(ri->i_compr_blocks), + SECTOR_TO_BLOCK(inode->i_blocks)); + goto err; + } + if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) has unsupported log cluster size: %u, run fsck to fix", + __func__, inode->i_ino, ri->i_log_cluster_size); + goto err; + } + + clevel = le16_to_cpu(ri->i_compress_flag) >> + COMPRESS_LEVEL_OFFSET; + switch (ri->i_compress_algorithm) { + case COMPRESS_LZO: +#ifdef CONFIG_F2FS_FS_LZO + if (clevel) + goto err_level; +#endif + break; + case COMPRESS_LZORLE: +#ifdef CONFIG_F2FS_FS_LZORLE + if (clevel) + goto err_level; +#endif + break; + case COMPRESS_LZ4: +#ifdef CONFIG_F2FS_FS_LZ4 +#ifdef CONFIG_F2FS_FS_LZ4HC + if (clevel && + (clevel < LZ4HC_MIN_CLEVEL || clevel > LZ4HC_MAX_CLEVEL)) + goto err_level; +#else + if (clevel) + goto err_level; +#endif +#endif + break; + case COMPRESS_ZSTD: +#ifdef CONFIG_F2FS_FS_ZSTD + if (clevel < zstd_min_clevel() || clevel > zstd_max_clevel()) + goto err_level; +#endif + break; + default: + goto err_level; + } + + return true; +err_level: + f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress level: %u, run fsck to fix", + __func__, inode->i_ino, clevel); +err: + set_sbi_flag(sbi, SBI_NEED_FSCK); + return false; +} + static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -286,35 +362,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, - i_log_cluster_size)) { - if (ri->i_compress_algorithm >= COMPRESS_MAX) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " - "compress algorithm: %u, run fsck to fix", - __func__, inode->i_ino, - ri->i_compress_algorithm); - return false; - } - if (le64_to_cpu(ri->i_compr_blocks) > - SECTOR_TO_BLOCK(inode->i_blocks)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent " - "i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix", - __func__, inode->i_ino, - le64_to_cpu(ri->i_compr_blocks), - SECTOR_TO_BLOCK(inode->i_blocks)); - return false; - } - if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || - ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported " - "log cluster size: %u, run fsck to fix", - __func__, inode->i_ino, - ri->i_log_cluster_size); - return false; - } - } + i_log_cluster_size)) + return sanity_check_compress_inode(inode, ri); return true; } From 64ee9163fe1b911aa0476af06ee0afd23fdf7388 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 May 2023 11:41:39 +0800 Subject: [PATCH 28/46] f2fs: compress: fix to check validity of i_compress_flag field The last valid compress related field is i_compress_flag, check its validity instead of i_log_cluster_size. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index bb6c3733d104..6cacc8c55dec 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -362,7 +362,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, - i_log_cluster_size)) + i_compress_flag)) return sanity_check_compress_inode(inode, ri); return true; @@ -491,7 +491,7 @@ static int do_read_inode(struct inode *inode) if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && (fi->i_flags & F2FS_COMPR_FL)) { if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, - i_log_cluster_size)) { + i_compress_flag)) { unsigned short compress_flag; atomic_set(&fi->i_compr_blocks, @@ -729,7 +729,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, - i_log_cluster_size)) { + i_compress_flag)) { unsigned short compress_flag; ri->i_compr_blocks = From f240d3aaf5a1552ecb75445b47b1ca957d5151d2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 31 May 2023 09:40:55 +0800 Subject: [PATCH 29/46] f2fs: do more sanity check on inode There are several issues in sanity_check_inode(): - The code looks not clean, it checks extra_attr related condition dispersively. - It missed to check i_extra_isize w/ lower boundary - It missed to check feature dependency: prjquota, inode_chksum, inode_crtime, compression features rely on extra_attr feature. - It's not necessary to check i_extra_isize due to it will only be assigned to non-zero value if f2fs_has_extra_attr() is true in do_read_inode(). Fix them all in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 + fs/f2fs/inode.c | 100 +++++++++++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 94811085f9f3..bd0edb619f40 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3417,6 +3417,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode) ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) +#define F2FS_MIN_EXTRA_ATTR_SIZE (sizeof(__le32)) + #define F2FS_TOTAL_EXTRA_ATTR_SIZE \ (offsetof(struct f2fs_inode, i_extra_end) - \ offsetof(struct f2fs_inode, i_extra_isize)) \ diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 6cacc8c55dec..09e986b050c6 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -301,41 +301,77 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi) - && !f2fs_has_extra_attr(inode)) { + if (f2fs_has_extra_attr(inode)) { + if (!f2fs_sb_has_extra_attr(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", + __func__, inode->i_ino); + return false; + } + if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || + fi->i_extra_isize < F2FS_MIN_EXTRA_ATTR_SIZE || + fi->i_extra_isize % sizeof(__le32)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); + return false; + } + if (f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (!fi->i_inline_xattr_size || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); + return false; + } + if (f2fs_sb_has_compression(sbi) && + fi->i_flags & F2FS_COMPR_FL && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_compress_flag)) { + if (!sanity_check_compress_inode(inode, ri)) + return false; + } + } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", __func__, inode->i_ino); return false; } - if (f2fs_has_extra_attr(inode) && - !f2fs_sb_has_extra_attr(sbi)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", - __func__, inode->i_ino); - return false; - } - - if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || - fi->i_extra_isize % sizeof(__le32)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", - __func__, inode->i_ino, fi->i_extra_isize, - F2FS_TOTAL_EXTRA_ATTR_SIZE); - return false; - } - - if (f2fs_has_extra_attr(inode) && - f2fs_sb_has_flexible_inline_xattr(sbi) && - f2fs_has_inline_xattr(inode) && - (!fi->i_inline_xattr_size || - fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); - return false; + if (!f2fs_sb_has_extra_attr(sbi)) { + if (f2fs_sb_has_project_quota(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", + __func__, inode->i_ino, F2FS_FEATURE_PRJQUOTA); + return false; + } + if (f2fs_sb_has_inode_chksum(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", + __func__, inode->i_ino, F2FS_FEATURE_INODE_CHKSUM); + return false; + } + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", + __func__, inode->i_ino, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); + return false; + } + if (f2fs_sb_has_inode_crtime(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", + __func__, inode->i_ino, F2FS_FEATURE_INODE_CRTIME); + return false; + } + if (f2fs_sb_has_compression(sbi)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", + __func__, inode->i_ino, F2FS_FEATURE_COMPRESSION); + return false; + } } if (f2fs_sanity_check_inline_data(inode)) { @@ -359,12 +395,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && - fi->i_flags & F2FS_COMPR_FL && - F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, - i_compress_flag)) - return sanity_check_compress_inode(inode, ri); - return true; } From 94c8431fb46bfbe51bd3eb68687334797af0a221 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Jun 2023 07:37:11 +0200 Subject: [PATCH 30/46] f2fs: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method Since commit a2ad63daa88b ("VFS: add FMODE_CAN_ODIRECT file flag") file systems can just set the FMODE_CAN_ODIRECT flag at open time instead of wiring up a dummy direct_IO method to indicate support for direct I/O. Do that for f2fs so that noop_direct_IO can eventually be removed. Signed-off-by: Christoph Hellwig Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - fs/f2fs/file.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3fad7a23a507..5882afe71d82 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4129,7 +4129,6 @@ const struct address_space_operations f2fs_dblock_aops = { .migrate_folio = filemap_migrate_folio, .invalidate_folio = f2fs_invalidate_folio, .release_folio = f2fs_release_folio, - .direct_IO = noop_direct_IO, .bmap = f2fs_bmap, .swap_activate = f2fs_swap_activate, .swap_deactivate = f2fs_swap_deactivate, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 23c68ee946e5..b8a6267d9800 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -547,6 +547,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return err; filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_CAN_ODIRECT; return dquot_file_open(inode, filp); } From c571fbb5b59a3741e48014faa92c2f14bc59fe50 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 12 Jun 2023 11:01:16 +0800 Subject: [PATCH 31/46] f2fs: add helper to check compression level This patch adds a helper function to check if compression level is valid. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 27 +++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 4 ++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 438af59d3571..236d890f560b 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -55,6 +55,7 @@ struct f2fs_compress_ops { int (*init_decompress_ctx)(struct decompress_io_ctx *dic); void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic); int (*decompress_pages)(struct decompress_io_ctx *dic); + bool (*is_level_valid)(int level); }; static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) @@ -308,11 +309,21 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool lz4_is_level_valid(int lvl) +{ +#ifdef CONFIG_F2FS_FS_LZ4HC + return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL); +#else + return lvl == 0; +#endif +} + static const struct f2fs_compress_ops f2fs_lz4_ops = { .init_compress_ctx = lz4_init_compress_ctx, .destroy_compress_ctx = lz4_destroy_compress_ctx, .compress_pages = lz4_compress_pages, .decompress_pages = lz4_decompress_pages, + .is_level_valid = lz4_is_level_valid, }; #endif @@ -476,6 +487,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) return 0; } +static bool zstd_is_level_valid(int lvl) +{ + return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel(); +} + static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_compress_ctx = zstd_init_compress_ctx, .destroy_compress_ctx = zstd_destroy_compress_ctx, @@ -483,6 +499,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = { .init_decompress_ctx = zstd_init_decompress_ctx, .destroy_decompress_ctx = zstd_destroy_decompress_ctx, .decompress_pages = zstd_decompress_pages, + .is_level_valid = zstd_is_level_valid, }; #endif @@ -541,6 +558,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } +bool f2fs_is_compress_level_valid(int alg, int lvl) +{ + const struct f2fs_compress_ops *cops = f2fs_cops[alg]; + + if (cops->is_level_valid) + return cops->is_level_valid(lvl); + + return lvl == 0; +} + static mempool_t *compress_page_pool; static int num_compress_pages = 512; module_param(num_compress_pages, uint, 0444); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bd0edb619f40..2a6a6b1a0895 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4240,6 +4240,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock); void f2fs_compress_write_end_io(struct bio *bio, struct page *page); bool f2fs_is_compress_backend_ready(struct inode *inode); +bool f2fs_is_compress_level_valid(int alg, int lvl); int __init f2fs_init_compress_mempool(void); void f2fs_destroy_compress_mempool(void); void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task); @@ -4304,6 +4305,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode) /* not support compression */ return false; } +static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } static inline struct page *f2fs_compress_control_page(struct page *page) { WARN_ON_ONCE(1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a3695adad3d3..5b7d25fd4c08 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -604,7 +604,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < LZ4HC_MIN_CLEVEL || level > LZ4HC_MAX_CLEVEL) { + if (!f2fs_is_compress_level_valid(COMPRESS_LZ4, level)) { f2fs_info(sbi, "invalid lz4hc compress level: %d", level); return -EINVAL; } @@ -642,7 +642,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (level < zstd_min_clevel() || level > zstd_max_clevel()) { + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; } From dde38c03b351749f682db087df5202b55c7c1b40 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 12 Jun 2023 11:01:17 +0800 Subject: [PATCH 32/46] f2fs: cleanup MIN_INLINE_XATTR_SIZE Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- fs/f2fs/xattr.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5b7d25fd4c08..1b2c788ed80d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1363,7 +1363,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) return -EINVAL; } - min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32); + min_size = MIN_INLINE_XATTR_SIZE; max_size = MAX_INLINE_XATTR_SIZE; if (F2FS_OPTION(sbi).inline_xattr_size < min_size || diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 416d652774a3..b1811c392e6f 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -83,6 +83,7 @@ struct f2fs_xattr_entry { sizeof(struct f2fs_xattr_header) - \ sizeof(struct f2fs_xattr_entry)) +#define MIN_INLINE_XATTR_SIZE (sizeof(struct f2fs_xattr_header) / sizeof(__le32)) #define MAX_INLINE_XATTR_SIZE \ (DEF_ADDRS_PER_INODE - \ F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \ From ac1ee161dec5801d9bbd874ef69cd0ff1e8053b6 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Mon, 12 Jun 2023 11:01:19 +0800 Subject: [PATCH 33/46] f2fs: add f2fs_ioc_get_compress_blocks This patch adds f2fs_ioc_get_compress_blocks() to provide a common f2fs_get_compress_blocks(). Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b8a6267d9800..95b92a6ca19f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3377,18 +3377,29 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) return err; } -static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg) +static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks) { - struct inode *inode = file_inode(filp); - __u64 blocks; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) return -EINVAL; - blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); + *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks); + + return 0; +} + +static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u64 blocks; + int ret; + + ret = f2fs_get_compress_blocks(inode, &blocks); + if (ret < 0) + return ret; + return put_user(blocks, (u64 __user *)arg); } @@ -4240,7 +4251,7 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case FS_IOC_SETFSLABEL: return f2fs_ioc_setfslabel(filp, arg); case F2FS_IOC_GET_COMPRESS_BLOCKS: - return f2fs_get_compress_blocks(filp, arg); + return f2fs_ioc_get_compress_blocks(filp, arg); case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: return f2fs_release_compress_blocks(filp, arg); case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: From c9667b19e2cf13735fe2620f9d97b788897cd4af Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 12 Jun 2023 16:32:03 -0700 Subject: [PATCH 34/46] f2fs: check zone write pointer points to the end of zone We don't need to report an issue, when the zone write pointer already points to the end of the zone, since the zone mismatch is already taken care. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0c0c033c4bdd..8c7af8b4fc47 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4888,8 +4888,12 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, break; } - // The write pointer matches with the valid blocks - if (last_valid_block + 1 == wp_block) + /* + * The write pointer matches with the valid blocks or + * already points to the end of the zone. + */ + if ((last_valid_block + 1 == wp_block) || + (zone->wp == zone->start + zone->len)) return 0; if (last_valid_block + 1 == zone_block) { From 9ac00e7cef106b66611e131f59f61f5ae35cf726 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 13 Jun 2023 13:35:31 -0700 Subject: [PATCH 35/46] f2fs: do not issue small discard commands during checkpoint If there're huge # of small discards, this will increase checkpoint latency insanely. Let's issue small discards only by trim. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8c7af8b4fc47..0457d620011f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2193,7 +2193,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || - (force && len < cpc->trim_minlen)) + !force || len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, From 3f8ac7da8c6efd72908e0a16d4a149e79f356a00 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 16 Jun 2023 15:20:09 +0100 Subject: [PATCH 36/46] f2fs: remove redundant assignment to variable err The assignment to variable err is redundant since the code jumps to label next and err is then re-assigned a new value on the call to sanity_check_node_chain. Remove the assignment. Cleans up clang scan build warning: fs/f2fs/recovery.c:464:6: warning: Value stored to 'err' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f0cf1538389c..4e7d4ceeb084 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -460,10 +460,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); - if (err == -ENOENT) { - err = 0; + if (err == -ENOENT) goto next; - } f2fs_put_page(page, 1); break; } From c3355ea9d82fe6b1a4226c9a7d311f9c5715b456 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 13 Jun 2023 15:51:57 +0800 Subject: [PATCH 37/46] f2fs: convert to use sbi directly F2FS_I_SB(inode) is redundant. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 95b92a6ca19f..12ad128aefb7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3468,7 +3468,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) int ret; int writecount; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3481,7 +3481,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3638,7 +3638,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) unsigned int reserved_blocks = 0; int ret; - if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; if (!f2fs_compressed_file(inode)) @@ -3654,7 +3654,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -4048,7 +4048,7 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); @@ -4123,7 +4123,7 @@ static int f2fs_ioc_compress_file(struct file *filp) if (!f2fs_compressed_file(inode)) return -EINVAL; - f2fs_balance_fs(F2FS_I_SB(inode), true); + f2fs_balance_fs(sbi, true); file_start_write(filp); inode_lock(inode); From 6201c478dedcf7c50361b23b5c4d4f41a68921ac Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Sat, 6 May 2023 23:16:03 +0800 Subject: [PATCH 38/46] f2fs: refactor struct f2fs_attr macro This patch provides a large number of variants of F2FS_RW_ATTR and F2FS_RO_ATTR macros, reducing the number of parameters required to initialize the f2fs_attr structure. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202304152234.wjaY3IYm-lkp@intel.com/ Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 244 ++++++++++++++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 93 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 467d743c801f..48b7e0073884 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -842,68 +842,160 @@ static struct f2fs_attr f2fs_attr_##_name = { \ #define F2FS_GENERAL_RO_ATTR(name) \ static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL) -#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \ -static struct f2fs_attr f2fs_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = 0444 }, \ - .show = f2fs_sbi_show, \ - .struct_type = _struct_type, \ - .offset = offsetof(struct _struct_name, _elname), \ -} +#ifdef CONFIG_F2FS_STAT_FS +#define STAT_INFO_RO_ATTR(name, elname) \ + F2FS_RO_ATTR(STAT_INFO, f2fs_stat_info, name, elname) +#endif + +#define GC_THREAD_RW_ATTR(name, elname) \ + F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, name, elname) + +#define SM_INFO_RW_ATTR(name, elname) \ + F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, name, elname) + +#define SM_INFO_GENERAL_RW_ATTR(elname) \ + SM_INFO_RW_ATTR(elname, elname) + +#define DCC_INFO_RW_ATTR(name, elname) \ + F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, name, elname) + +#define DCC_INFO_GENERAL_RW_ATTR(elname) \ + DCC_INFO_RW_ATTR(elname, elname) + +#define NM_INFO_RW_ATTR(name, elname) \ + F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, name, elname) + +#define NM_INFO_GENERAL_RW_ATTR(elname) \ + NM_INFO_RW_ATTR(elname, elname) + +#define F2FS_SBI_RW_ATTR(name, elname) \ + F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, name, elname) + +#define F2FS_SBI_GENERAL_RW_ATTR(elname) \ + F2FS_SBI_RW_ATTR(elname, elname) + +#define F2FS_SBI_GENERAL_RO_ATTR(elname) \ + F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, elname, elname) -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, - urgent_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_io_aware_gran, discard_io_aware_gran); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_urgent_util, discard_urgent_util); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); -F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard); -F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); -F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, - interval_time[DISCARD_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, - umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); -#ifdef CONFIG_F2FS_IOSTAT -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); -#endif -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); -F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION -F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); -F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); +#define FAULT_INFO_GENERAL_RW_ATTR(type, elname) \ + F2FS_RW_ATTR(type, f2fs_fault_info, elname, elname) #endif -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_remaining_trials, gc_remaining_trials); -F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio); + +#define RESERVED_BLOCKS_GENERAL_RW_ATTR(elname) \ + F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, elname, elname) + +#define CPRC_INFO_GENERAL_RW_ATTR(elname) \ + F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, elname, elname) + +#define ATGC_INFO_RW_ATTR(name, elname) \ + F2FS_RW_ATTR(ATGC_INFO, atgc_management, name, elname) + +/* GC_THREAD ATTR */ +GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time); +GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time); +GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); +GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); + +/* SM_INFO ATTR */ +SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); +SM_INFO_GENERAL_RW_ATTR(ipu_policy); +SM_INFO_GENERAL_RW_ATTR(min_ipu_util); +SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks); +SM_INFO_GENERAL_RW_ATTR(min_seq_blocks); +SM_INFO_GENERAL_RW_ATTR(min_hot_blocks); +SM_INFO_GENERAL_RW_ATTR(min_ssr_sections); + +/* DCC_INFO ATTR */ +DCC_INFO_RW_ATTR(max_small_discards, max_discards); +DCC_INFO_GENERAL_RW_ATTR(max_discard_request); +DCC_INFO_GENERAL_RW_ATTR(min_discard_issue_time); +DCC_INFO_GENERAL_RW_ATTR(mid_discard_issue_time); +DCC_INFO_GENERAL_RW_ATTR(max_discard_issue_time); +DCC_INFO_GENERAL_RW_ATTR(discard_io_aware_gran); +DCC_INFO_GENERAL_RW_ATTR(discard_urgent_util); +DCC_INFO_GENERAL_RW_ATTR(discard_granularity); +DCC_INFO_GENERAL_RW_ATTR(max_ordered_discard); + +/* NM_INFO ATTR */ +NM_INFO_RW_ATTR(max_roll_forward_node_blocks, max_rf_node_blocks); +NM_INFO_GENERAL_RW_ATTR(ram_thresh); +NM_INFO_GENERAL_RW_ATTR(ra_nid_pages); +NM_INFO_GENERAL_RW_ATTR(dirty_nats_ratio); + +/* F2FS_SBI ATTR */ +F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); +F2FS_SBI_RW_ATTR(gc_idle, gc_mode); +F2FS_SBI_RW_ATTR(gc_urgent, gc_mode); +F2FS_SBI_RW_ATTR(cp_interval, interval_time[CP_TIME]); +F2FS_SBI_RW_ATTR(idle_interval, interval_time[REQ_TIME]); +F2FS_SBI_RW_ATTR(discard_idle_interval, interval_time[DISCARD_TIME]); +F2FS_SBI_RW_ATTR(gc_idle_interval, interval_time[GC_TIME]); +F2FS_SBI_RW_ATTR(umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]); +F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold); +F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs); +F2FS_SBI_GENERAL_RW_ATTR(max_victim_search); +F2FS_SBI_GENERAL_RW_ATTR(migration_granularity); +F2FS_SBI_GENERAL_RW_ATTR(dir_level); +#ifdef CONFIG_F2FS_IOSTAT +F2FS_SBI_GENERAL_RW_ATTR(iostat_enable); +F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms); +#endif +F2FS_SBI_GENERAL_RW_ATTR(readdir_ra); +F2FS_SBI_GENERAL_RW_ATTR(max_io_bytes); +F2FS_SBI_GENERAL_RW_ATTR(data_io_flag); +F2FS_SBI_GENERAL_RW_ATTR(node_io_flag); +F2FS_SBI_GENERAL_RW_ATTR(gc_remaining_trials); +F2FS_SBI_GENERAL_RW_ATTR(seq_file_ra_mul); +F2FS_SBI_GENERAL_RW_ATTR(gc_segment_mode); +F2FS_SBI_GENERAL_RW_ATTR(max_fragment_chunk); +F2FS_SBI_GENERAL_RW_ATTR(max_fragment_hole); +#ifdef CONFIG_F2FS_FS_COMPRESSION +F2FS_SBI_GENERAL_RW_ATTR(compr_written_block); +F2FS_SBI_GENERAL_RW_ATTR(compr_saved_block); +F2FS_SBI_GENERAL_RW_ATTR(compr_new_inode); +F2FS_SBI_GENERAL_RW_ATTR(compress_percent); +F2FS_SBI_GENERAL_RW_ATTR(compress_watermark); +#endif +/* atomic write */ +F2FS_SBI_GENERAL_RO_ATTR(current_atomic_write); +F2FS_SBI_GENERAL_RW_ATTR(peak_atomic_write); +F2FS_SBI_GENERAL_RW_ATTR(committed_atomic_block); +F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block); +/* block age extent cache */ +F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold); +F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); +F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); +#ifdef CONFIG_BLK_DEV_ZONED +F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +#endif + +/* STAT_INFO ATTR */ +#ifdef CONFIG_F2FS_STAT_FS +STAT_INFO_RO_ATTR(cp_foreground_calls, cp_count); +STAT_INFO_RO_ATTR(cp_background_calls, bg_cp_count); +STAT_INFO_RO_ATTR(gc_foreground_calls, call_count); +STAT_INFO_RO_ATTR(gc_background_calls, bg_gc); +#endif + +/* FAULT_INFO ATTR */ +#ifdef CONFIG_F2FS_FAULT_INJECTION +FAULT_INFO_GENERAL_RW_ATTR(FAULT_INFO_RATE, inject_rate); +FAULT_INFO_GENERAL_RW_ATTR(FAULT_INFO_TYPE, inject_type); +#endif + +/* RESERVED_BLOCKS ATTR */ +RESERVED_BLOCKS_GENERAL_RW_ATTR(reserved_blocks); + +/* CPRC_INFO ATTR */ +CPRC_INFO_GENERAL_RW_ATTR(ckpt_thread_ioprio); + +/* ATGC_INFO ATTR */ +ATGC_INFO_RW_ATTR(atgc_candidate_ratio, candidate_ratio); +ATGC_INFO_RW_ATTR(atgc_candidate_count, max_candidate_count); +ATGC_INFO_RW_ATTR(atgc_age_weight, age_weight); +ATGC_INFO_RW_ATTR(atgc_age_threshold, age_threshold); + F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(ovp_segments); @@ -917,10 +1009,6 @@ F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS -F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); -F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); -F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count); -F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc); F2FS_GENERAL_RO_ATTR(moved_blocks_background); F2FS_GENERAL_RO_ATTR(moved_blocks_foreground); F2FS_GENERAL_RO_ATTR(avg_vblocks); @@ -935,8 +1023,6 @@ F2FS_FEATURE_RO_ATTR(encrypted_casefold); #endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED F2FS_FEATURE_RO_ATTR(block_zoned); -F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, unusable_blocks_per_sec, - unusable_blocks_per_sec); #endif F2FS_FEATURE_RO_ATTR(atomic_write); F2FS_FEATURE_RO_ATTR(extra_attr); @@ -956,37 +1042,9 @@ F2FS_FEATURE_RO_ATTR(casefold); F2FS_FEATURE_RO_ATTR(readonly); #ifdef CONFIG_F2FS_FS_COMPRESSION F2FS_FEATURE_RO_ATTR(compression); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_percent, compress_percent); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_watermark, compress_watermark); #endif F2FS_FEATURE_RO_ATTR(pin_file); -/* For ATGC */ -F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_ratio, candidate_ratio); -F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_count, max_candidate_count); -F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_weight, age_weight); -F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold); - -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole); - -/* For atomic write */ -F2FS_RO_ATTR(F2FS_SBI, f2fs_sb_info, current_atomic_write, current_atomic_write); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block); - -/* For block age extent cache */ -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, last_age_weight, last_age_weight); - #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent_sleep_time), From 2724daf6c24c58099a758d1e842d39b10133b065 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Jun 2023 10:17:47 -0700 Subject: [PATCH 39/46] f2fs: compress tmp files given extension Let's compress tmp files for the given extension list. This patch does not change the previous behavior, but allow the cases as below. Extention example: "ext" - abc.ext : allow - abc.ext.abc : allow - abc.extm : not allow Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 3e35eb7dbb8f..ff89de115272 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -23,7 +23,7 @@ #include static inline bool is_extension_exist(const unsigned char *s, const char *sub, - bool tmp_ext) + bool tmp_ext, bool tmp_dot) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -49,13 +49,27 @@ static inline bool is_extension_exist(const unsigned char *s, const char *sub, for (i = 1; i < slen - sublen; i++) { if (s[i] != '.') continue; - if (!strncasecmp(s + i + 1, sub, sublen)) - return true; + if (!strncasecmp(s + i + 1, sub, sublen)) { + if (!tmp_dot) + return true; + if (i == slen - sublen - 1 || s[i + 1 + sublen] == '.') + return true; + } } return false; } +static inline bool is_temperature_extension(const unsigned char *s, const char *sub) +{ + return is_extension_exist(s, sub, true, false); +} + +static inline bool is_compress_extension(const unsigned char *s, const char *sub) +{ + return is_extension_exist(s, sub, true, true); +} + int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set) { @@ -148,7 +162,7 @@ static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir, cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = cold_count; i < cold_count + hot_count; i++) - if (is_extension_exist(name, extlist[i], false)) + if (is_temperature_extension(name, extlist[i])) break; f2fs_up_read(&sbi->sb_lock); if (i < (cold_count + hot_count)) @@ -156,12 +170,12 @@ static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir, /* Don't compress unallowed extension. */ for (i = 0; i < noext_cnt; i++) - if (is_extension_exist(name, noext[i], false)) + if (is_compress_extension(name, noext[i])) return; /* Compress wanting extension. */ for (i = 0; i < ext_cnt; i++) { - if (is_extension_exist(name, ext[i], false)) { + if (is_compress_extension(name, ext[i])) { set_compress_context(inode); return; } @@ -189,7 +203,7 @@ static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) - if (is_extension_exist(name, extlist[i], true)) + if (is_temperature_extension(name, extlist[i])) break; f2fs_up_read(&sbi->sb_lock); From 396d0a28836d42bef595a8843533285abaf64ff7 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 23 Jun 2023 00:16:46 +0800 Subject: [PATCH 40/46] f2fs: update mtime and ctime in move file range method Mtime and ctime stay old value without update after move file range ioctl. This patch add time update. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 12ad128aefb7..fd2cde9d21b0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2878,6 +2878,17 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); out_src: f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + if (ret) + goto out_unlock; + + src->i_mtime = src->i_ctime = current_time(src); + f2fs_mark_inode_dirty_sync(src, false); + if (src != dst) { + dst->i_mtime = dst->i_ctime = current_time(dst); + f2fs_mark_inode_dirty_sync(dst, false); + } + f2fs_update_time(sbi, REQ_TIME); + out_unlock: if (src != dst) inode_unlock(dst); From cf2423a7555c4b012576c7282fb495ce739d50d4 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 19 Jun 2023 23:13:53 +0800 Subject: [PATCH 41/46] f2fs: remove unneeded page uptodate check/set This patch remove unneeded page uptodate check/set in f2fs_vm_page_mkwrite, which already done in set_page_dirty. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fd2cde9d21b0..b1a4de3b53e0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -149,8 +149,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) zero_user_segment(page, offset, PAGE_SIZE); } set_page_dirty(page); - if (!PageUptodate(page)) - SetPageUptodate(page); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); From 5eda1ad1aaffdfebdecf7a164e586060a210f74f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Jun 2023 01:00:56 -0700 Subject: [PATCH 42/46] f2fs: fix deadlock in i_xattr_sem and inode page lock Thread #1: [122554.641906][ T92] f2fs_getxattr+0xd4/0x5fc -> waiting for f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); [122554.641927][ T92] __f2fs_get_acl+0x50/0x284 [122554.641948][ T92] f2fs_init_acl+0x84/0x54c [122554.641969][ T92] f2fs_init_inode_metadata+0x460/0x5f0 [122554.641990][ T92] f2fs_add_inline_entry+0x11c/0x350 -> Locked dir->inode_page by f2fs_get_node_page() [122554.642009][ T92] f2fs_do_add_link+0x100/0x1e4 [122554.642025][ T92] f2fs_create+0xf4/0x22c [122554.642047][ T92] vfs_create+0x130/0x1f4 Thread #2: [123996.386358][ T92] __get_node_page+0x8c/0x504 -> waiting for dir->inode_page lock [123996.386383][ T92] read_all_xattrs+0x11c/0x1f4 [123996.386405][ T92] __f2fs_setxattr+0xcc/0x528 [123996.386424][ T92] f2fs_setxattr+0x158/0x1f4 -> f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); [123996.386443][ T92] __f2fs_set_acl+0x328/0x430 [123996.386618][ T92] f2fs_set_acl+0x38/0x50 [123996.386642][ T92] posix_acl_chmod+0xc8/0x1c8 [123996.386669][ T92] f2fs_setattr+0x5e0/0x6bc [123996.386689][ T92] notify_change+0x4d8/0x580 [123996.386717][ T92] chmod_common+0xd8/0x184 [123996.386748][ T92] do_fchmodat+0x60/0x124 [123996.386766][ T92] __arm64_sys_fchmodat+0x28/0x3c Cc: Fixes: 27161f13e3c3 "f2fs: avoid race in between read xattr & write xattr" Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 9 ++++++++- fs/f2fs/xattr.c | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 887e55988450..d635c58cf5a3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -775,8 +775,15 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, { int err = -EAGAIN; - if (f2fs_has_inline_dentry(dir)) + if (f2fs_has_inline_dentry(dir)) { + /* + * Should get i_xattr_sem to keep the lock order: + * i_xattr_sem -> inode_page lock used by f2fs_setxattr. + */ + f2fs_down_read(&F2FS_I(dir)->i_xattr_sem); err = f2fs_add_inline_entry(dir, fname, inode, ino, mode); + f2fs_up_read(&F2FS_I(dir)->i_xattr_sem); + } if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, fname, inode, ino, mode); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 213805d3592c..476b186b90a6 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -528,10 +528,12 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); + if (!ipage) + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, &entry, &base_addr, &base_size, &is_inline); - f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); + if (!ipage) + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; From 0135c482fa97e2fd8245cb462784112a00ed1211 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2023 09:41:02 +0800 Subject: [PATCH 43/46] f2fs: fix error path handling in truncate_dnode() If truncate_node() fails in truncate_dnode(), it missed to call f2fs_put_page(), fix it. Fixes: 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4a105a0cd794..dadea6b01888 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -943,8 +943,10 @@ static int truncate_dnode(struct dnode_of_data *dn) dn->ofs_in_node = 0; f2fs_truncate_data_blocks(dn); err = truncate_node(dn); - if (err) + if (err) { + f2fs_put_page(page, 1); return err; + } return 1; } From c31e49615762a5fa0d14ffcfd5e2f1c206213a14 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2023 09:41:34 +0800 Subject: [PATCH 44/46] f2fs: fix compile warning in f2fs_destroy_node_manager() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/f2fs/node.c: In function ‘f2fs_destroy_node_manager’: fs/f2fs/node.c:3390:1: warning: the frame size of 1048 bytes is larger than 1024 bytes [-Wframe-larger-than=] 3390 | } Merging below pointer arrays into common one, and reuse it by cast type. struct nat_entry *natvec[NATVEC_SIZE]; struct nat_entry_set *setvec[SETVEC_SIZE]; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 14 ++++++++------ fs/f2fs/node.h | 3 +-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dadea6b01888..3e1fa564db8f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3062,7 +3062,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; - struct nat_entry_set *setvec[SETVEC_SIZE]; + struct nat_entry_set *setvec[NAT_VEC_SIZE]; struct nat_entry_set *set, *tmp; unsigned int found; nid_t set_idx = 0; @@ -3095,7 +3095,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, - set_idx, SETVEC_SIZE, setvec))) { + set_idx, NAT_VEC_SIZE, setvec))) { unsigned idx; set_idx = setvec[found - 1]->set + 1; @@ -3316,8 +3316,9 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; - struct nat_entry *natvec[NATVEC_SIZE]; - struct nat_entry_set *setvec[SETVEC_SIZE]; + void *vec[NAT_VEC_SIZE]; + struct nat_entry **natvec = (struct nat_entry **)vec; + struct nat_entry_set **setvec = (struct nat_entry_set **)vec; nid_t nid = 0; unsigned int found; @@ -3340,7 +3341,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy nat cache */ f2fs_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, - nid, NATVEC_SIZE, natvec))) { + nid, NAT_VEC_SIZE, natvec))) { unsigned idx; nid = nat_get_nid(natvec[found - 1]) + 1; @@ -3356,8 +3357,9 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy nat set cache */ nid = 0; + memset(vec, 0, sizeof(void *) * NAT_VEC_SIZE); while ((found = __gang_lookup_nat_set(nm_i, - nid, SETVEC_SIZE, setvec))) { + nid, NAT_VEC_SIZE, setvec))) { unsigned idx; nid = setvec[found - 1]->set + 1; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 906fb67a99da..5bd16a95eef8 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -35,8 +35,7 @@ #define DEF_RF_NODE_BLOCKS 0 /* vector size for gang look-up from nat cache that consists of radix tree */ -#define NATVEC_SIZE 64 -#define SETVEC_SIZE 32 +#define NAT_VEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 From 87a91a155902f2b652e272ad3ba4de3486af9229 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 27 Jun 2023 20:21:53 +0800 Subject: [PATCH 45/46] f2fs: only set release for file that has compressed data If a file is not comprssed yet or does not have compressed data, for example, its data has a very low compression ratio, do not set FI_COMPRESS_RELEASED flag. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b1a4de3b53e0..0f54c1ff02f7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3510,13 +3510,15 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; + if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) { + ret = -EPERM; + goto out; + } + set_inode_flag(inode, FI_COMPRESS_RELEASED); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); - if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); From a6ec83786ab9f13f25fb18166dee908845713a95 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2023 19:11:44 +0800 Subject: [PATCH 46/46] f2fs: fix to do sanity check on direct node in truncate_dnode() syzbot reports below bug: BUG: KASAN: slab-use-after-free in f2fs_truncate_data_blocks_range+0x122a/0x14c0 fs/f2fs/file.c:574 Read of size 4 at addr ffff88802a25c000 by task syz-executor148/5000 CPU: 1 PID: 5000 Comm: syz-executor148 Not tainted 6.4.0-rc7-syzkaller-00041-ge660abd551f1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 print_address_description.constprop.0+0x2c/0x3c0 mm/kasan/report.c:351 print_report mm/kasan/report.c:462 [inline] kasan_report+0x11c/0x130 mm/kasan/report.c:572 f2fs_truncate_data_blocks_range+0x122a/0x14c0 fs/f2fs/file.c:574 truncate_dnode+0x229/0x2e0 fs/f2fs/node.c:944 f2fs_truncate_inode_blocks+0x64b/0xde0 fs/f2fs/node.c:1154 f2fs_do_truncate_blocks+0x4ac/0xf30 fs/f2fs/file.c:721 f2fs_truncate_blocks+0x7b/0x300 fs/f2fs/file.c:749 f2fs_truncate.part.0+0x4a5/0x630 fs/f2fs/file.c:799 f2fs_truncate include/linux/fs.h:825 [inline] f2fs_setattr+0x1738/0x2090 fs/f2fs/file.c:1006 notify_change+0xb2c/0x1180 fs/attr.c:483 do_truncate+0x143/0x200 fs/open.c:66 handle_truncate fs/namei.c:3295 [inline] do_open fs/namei.c:3640 [inline] path_openat+0x2083/0x2750 fs/namei.c:3791 do_filp_open+0x1ba/0x410 fs/namei.c:3818 do_sys_openat2+0x16d/0x4c0 fs/open.c:1356 do_sys_open fs/open.c:1372 [inline] __do_sys_creat fs/open.c:1448 [inline] __se_sys_creat fs/open.c:1442 [inline] __x64_sys_creat+0xcd/0x120 fs/open.c:1442 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The root cause is, inodeA references inodeB via inodeB's ino, once inodeA is truncated, it calls truncate_dnode() to truncate data blocks in inodeB's node page, it traverse mapping data from node->i.i_addr[0] to node->i.i_addr[ADDRS_PER_BLOCK() - 1], result in out-of-boundary access. This patch fixes to add sanity check on dnode page in truncate_dnode(), so that, it can help to avoid triggering such issue, and once it encounters such issue, it will record newly introduced ERROR_INVALID_NODE_REFERENCE error into superblock, later fsck can detect such issue and try repairing. Also, it removes f2fs_truncate_data_blocks() for cleanup due to the function has only one caller, and uses f2fs_truncate_data_blocks_range() instead. Reported-and-tested-by: syzbot+12cb4425b22169b52036@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000f3038a05fef867f8@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/file.c | 5 ----- fs/f2fs/node.c | 14 ++++++++++++-- include/linux/f2fs_fs.h | 1 + 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2a6a6b1a0895..c7cb2177b252 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3457,7 +3457,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr) * file.c */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); -void f2fs_truncate_data_blocks(struct dnode_of_data *dn); int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0f54c1ff02f7..861d7aaa4711 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -626,11 +626,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->ofs_in_node, nr_free); } -void f2fs_truncate_data_blocks(struct dnode_of_data *dn) -{ - f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); -} - static int truncate_partial_data_page(struct inode *inode, u64 from, bool cache_only) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3e1fa564db8f..ee2e1dd64f25 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -925,6 +925,7 @@ static int truncate_node(struct dnode_of_data *dn) static int truncate_dnode(struct dnode_of_data *dn) { + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *page; int err; @@ -932,16 +933,25 @@ static int truncate_dnode(struct dnode_of_data *dn) return 1; /* get direct node */ - page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); + page = f2fs_get_node_page(sbi, dn->nid); if (PTR_ERR(page) == -ENOENT) return 1; else if (IS_ERR(page)) return PTR_ERR(page); + if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) { + f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", + dn->inode->i_ino, dn->nid, ino_of_node(page)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); + f2fs_put_page(page, 1); + return -EFSCORRUPTED; + } + /* Make dnode_of_data for parameter */ dn->node_page = page; dn->ofs_in_node = 0; - f2fs_truncate_data_blocks(dn); + f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); err = truncate_node(dn); if (err) { f2fs_put_page(page, 1); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1d6402529d10..a82a4bb6ce68 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -103,6 +103,7 @@ enum f2fs_error { ERROR_INCONSISTENT_SIT, ERROR_CORRUPTED_VERITY_XATTR, ERROR_CORRUPTED_XATTR, + ERROR_INVALID_NODE_REFERENCE, ERROR_MAX, };