mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-18 06:15:12 +00:00
d11cef14f8
When f2fs tries to checkpoint during foreground gc in LFS mode, system crash occurs due to lack of free space if the amount of dirty node and dentry pages generated by data migration exceeds free space. The reproduction sequence is as follows. - 20GiB capacity block device (null_blk) - format and mount with LFS mode - create a file and write 20,000MiB - 4k random write on full range of the file RIP: 0010:new_curseg+0x48a/0x510 [f2fs] Code: 55 e7 f5 89 c0 48 0f af c3 48 8b 5d c0 48 c1 e8 20 83 c0 01 89 43 6c 48 83 c4 28 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc <0f> 0b f0 41 80 4f 48 04 45 85 f6 0f 84 ba fd ff ff e9 ef fe ff ff RSP: 0018:ffff977bc397b218 EFLAGS: 00010246 RAX: 00000000000027b9 RBX: 0000000000000000 RCX: 00000000000027c0 RDX: 0000000000000000 RSI: 00000000000027b9 RDI: ffff8c25ab4e74f8 RBP: ffff977bc397b268 R08: 00000000000027b9 R09: ffff8c29e4a34b40 R10: 0000000000000001 R11: ffff977bc397b0d8 R12: 0000000000000000 R13: ffff8c25b4dd81a0 R14: 0000000000000000 R15: ffff8c2f667f9000 FS: 0000000000000000(0000) GS:ffff8c344ec80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000c00055d000 CR3: 0000000e30810003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> allocate_segment_by_default+0x9c/0x110 [f2fs] f2fs_allocate_data_block+0x243/0xa30 [f2fs] ? __mod_lruvec_page_state+0xa0/0x150 do_write_page+0x80/0x160 [f2fs] f2fs_do_write_node_page+0x32/0x50 [f2fs] __write_node_page+0x339/0x730 [f2fs] f2fs_sync_node_pages+0x5a6/0x780 [f2fs] block_operations+0x257/0x340 [f2fs] f2fs_write_checkpoint+0x102/0x1050 [f2fs] f2fs_gc+0x27c/0x630 [f2fs] ? folio_mark_dirty+0x36/0x70 f2fs_balance_fs+0x16f/0x180 [f2fs] This patch adds checking whether free sections are enough before checkpoint during gc. Signed-off-by: Yonggil Song <yonggil.song@samsung.com> [Jaegeuk Kim: code clean-up] Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
169 lines
4.8 KiB
C
169 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* fs/f2fs/gc.h
|
|
*
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
* http://www.samsung.com/
|
|
*/
|
|
#define GC_THREAD_MIN_WB_PAGES 1 /*
|
|
* a threshold to determine
|
|
* whether IO subsystem is idle
|
|
* or not
|
|
*/
|
|
#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */
|
|
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
|
|
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
|
|
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
|
|
|
|
/* choose candidates from sections which has age of more than 7 days */
|
|
#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
|
|
#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
|
|
#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
|
|
#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
|
|
#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
|
|
|
|
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
|
|
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
|
|
|
|
#define DEF_GC_FAILED_PINNED_FILES 2048
|
|
|
|
/* Search max. number of dirty segments to select a victim segment */
|
|
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
|
|
|
|
#define NR_GC_CHECKPOINT_SECS (3) /* data/node/dentry sections */
|
|
|
|
struct f2fs_gc_kthread {
|
|
struct task_struct *f2fs_gc_task;
|
|
wait_queue_head_t gc_wait_queue_head;
|
|
|
|
/* for gc sleep time */
|
|
unsigned int urgent_sleep_time;
|
|
unsigned int min_sleep_time;
|
|
unsigned int max_sleep_time;
|
|
unsigned int no_gc_sleep_time;
|
|
|
|
/* for changing gc mode */
|
|
bool gc_wake;
|
|
|
|
/* for GC_MERGE mount option */
|
|
wait_queue_head_t fggc_wq; /*
|
|
* caller of f2fs_balance_fs()
|
|
* will wait on this wait queue.
|
|
*/
|
|
};
|
|
|
|
struct gc_inode_list {
|
|
struct list_head ilist;
|
|
struct radix_tree_root iroot;
|
|
};
|
|
|
|
struct victim_entry {
|
|
struct rb_node rb_node; /* rb node located in rb-tree */
|
|
unsigned long long mtime; /* mtime of section */
|
|
unsigned int segno; /* segment No. */
|
|
struct list_head list;
|
|
};
|
|
|
|
/*
|
|
* inline functions
|
|
*/
|
|
|
|
/*
|
|
* On a Zoned device zone-capacity can be less than zone-size and if
|
|
* zone-capacity is not aligned to f2fs segment size(2MB), then the segment
|
|
* starting just before zone-capacity has some blocks spanning across the
|
|
* zone-capacity, these blocks are not usable.
|
|
* Such spanning segments can be in free list so calculate the sum of usable
|
|
* blocks in currently free segments including normal and spanning segments.
|
|
*/
|
|
static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t free_seg_blks = 0;
|
|
struct free_segmap_info *free_i = FREE_I(sbi);
|
|
int j;
|
|
|
|
spin_lock(&free_i->segmap_lock);
|
|
for (j = 0; j < MAIN_SEGS(sbi); j++)
|
|
if (!test_bit(j, free_i->free_segmap))
|
|
free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
|
|
spin_unlock(&free_i->segmap_lock);
|
|
|
|
return free_seg_blks;
|
|
}
|
|
|
|
static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
|
|
{
|
|
if (f2fs_sb_has_blkzoned(sbi))
|
|
return free_segs_blk_count_zoned(sbi);
|
|
|
|
return free_segments(sbi) << sbi->log_blocks_per_seg;
|
|
}
|
|
|
|
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t free_blks, ovp_blks;
|
|
|
|
free_blks = free_segs_blk_count(sbi);
|
|
ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
|
|
|
|
if (free_blks < ovp_blks)
|
|
return 0;
|
|
|
|
return free_blks - ovp_blks;
|
|
}
|
|
|
|
static inline block_t limit_invalid_user_blocks(block_t user_block_count)
|
|
{
|
|
return (long)(user_block_count * LIMIT_INVALID_BLOCK) / 100;
|
|
}
|
|
|
|
static inline block_t limit_free_user_blocks(block_t reclaimable_user_blocks)
|
|
{
|
|
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
|
|
}
|
|
|
|
static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
|
|
unsigned int *wait)
|
|
{
|
|
unsigned int min_time = gc_th->min_sleep_time;
|
|
unsigned int max_time = gc_th->max_sleep_time;
|
|
|
|
if (*wait == gc_th->no_gc_sleep_time)
|
|
return;
|
|
|
|
if ((long long)*wait + (long long)min_time > (long long)max_time)
|
|
*wait = max_time;
|
|
else
|
|
*wait += min_time;
|
|
}
|
|
|
|
static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
|
|
unsigned int *wait)
|
|
{
|
|
unsigned int min_time = gc_th->min_sleep_time;
|
|
|
|
if (*wait == gc_th->no_gc_sleep_time)
|
|
*wait = gc_th->max_sleep_time;
|
|
|
|
if ((long long)*wait - (long long)min_time < (long long)min_time)
|
|
*wait = min_time;
|
|
else
|
|
*wait -= min_time;
|
|
}
|
|
|
|
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t user_block_count = sbi->user_block_count;
|
|
block_t invalid_user_blocks = user_block_count -
|
|
written_block_count(sbi);
|
|
/*
|
|
* Background GC is triggered with the following conditions.
|
|
* 1. There are a number of invalid blocks.
|
|
* 2. There is not enough free space.
|
|
*/
|
|
return (invalid_user_blocks >
|
|
limit_invalid_user_blocks(user_block_count) &&
|
|
free_user_blocks(sbi) <
|
|
limit_free_user_blocks(invalid_user_blocks));
|
|
}
|