mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 06:43:09 +00:00
de881df977
NVMe Zoned Namespace devices can have zone-capacity less than zone-size. Zone-capacity indicates the maximum number of sectors that are usable in a zone beginning from the first sector of the zone. This makes the sectors sectors after the zone-capacity till zone-size to be unusable. This patch set tracks zone-size and zone-capacity in zoned devices and calculate the usable blocks per segment and usable segments per section. If zone-capacity is less than zone-size mark only those segments which start before zone-capacity as free segments. All segments at and beyond zone-capacity are treated as permanently used segments. In cases where zone-capacity does not align with segment size the last segment will start before zone-capacity and end beyond the zone-capacity of the zone. For such spanning segments only sectors within the zone-capacity are used. During writes and GC manage the usable segments in a section and usable blocks per segment. Segments which are beyond zone-capacity are never allocated, and do not need to be garbage collected, only the segments which are before zone-capacity needs to garbage collected. For spanning segments based on the number of usable blocks in that segment, write to blocks only up to zone-capacity. Zone-capacity is device specific and cannot be configured by the user. Since NVMe ZNS device zones are sequentially write only, a block device with conventional zones or any normal block device is needed along with the ZNS device for the metadata operations of F2fs. A typical nvme-cli output of a zoned device shows zone start and capacity and write pointer as below: SLBA: 0x0 WP: 0x0 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x20000 WP: 0x20000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x40000 WP: 0x40000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ Here zone size is 64MB, capacity is 49MB, WP is at zone start as the zones are in EMPTY state. For each zone, only zone start + 49MB is usable area, any lba/sector after 49MB cannot be read or written to, the drive will fail any attempts to read/write. So, the second zone starts at 64MB and is usable till 113MB (64 + 49) and the range between 113 and 128MB is again unusable. The next zone starts at 128MB, and so on. Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
147 lines
4.0 KiB
C
147 lines
4.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* fs/f2fs/gc.h
|
|
*
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
* http://www.samsung.com/
|
|
*/
|
|
#define GC_THREAD_MIN_WB_PAGES 1 /*
|
|
* a threshold to determine
|
|
* whether IO subsystem is idle
|
|
* or not
|
|
*/
|
|
#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */
|
|
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
|
|
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
|
|
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
|
|
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
|
|
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
|
|
|
|
#define DEF_GC_FAILED_PINNED_FILES 2048
|
|
|
|
/* Search max. number of dirty segments to select a victim segment */
|
|
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
|
|
|
|
struct f2fs_gc_kthread {
|
|
struct task_struct *f2fs_gc_task;
|
|
wait_queue_head_t gc_wait_queue_head;
|
|
|
|
/* for gc sleep time */
|
|
unsigned int urgent_sleep_time;
|
|
unsigned int min_sleep_time;
|
|
unsigned int max_sleep_time;
|
|
unsigned int no_gc_sleep_time;
|
|
|
|
/* for changing gc mode */
|
|
unsigned int gc_wake;
|
|
};
|
|
|
|
struct gc_inode_list {
|
|
struct list_head ilist;
|
|
struct radix_tree_root iroot;
|
|
};
|
|
|
|
/*
|
|
* inline functions
|
|
*/
|
|
|
|
/*
|
|
* On a Zoned device zone-capacity can be less than zone-size and if
|
|
* zone-capacity is not aligned to f2fs segment size(2MB), then the segment
|
|
* starting just before zone-capacity has some blocks spanning across the
|
|
* zone-capacity, these blocks are not usable.
|
|
* Such spanning segments can be in free list so calculate the sum of usable
|
|
* blocks in currently free segments including normal and spanning segments.
|
|
*/
|
|
static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t free_seg_blks = 0;
|
|
struct free_segmap_info *free_i = FREE_I(sbi);
|
|
int j;
|
|
|
|
spin_lock(&free_i->segmap_lock);
|
|
for (j = 0; j < MAIN_SEGS(sbi); j++)
|
|
if (!test_bit(j, free_i->free_segmap))
|
|
free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
|
|
spin_unlock(&free_i->segmap_lock);
|
|
|
|
return free_seg_blks;
|
|
}
|
|
|
|
static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
|
|
{
|
|
if (f2fs_sb_has_blkzoned(sbi))
|
|
return free_segs_blk_count_zoned(sbi);
|
|
|
|
return free_segments(sbi) << sbi->log_blocks_per_seg;
|
|
}
|
|
|
|
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t free_blks, ovp_blks;
|
|
|
|
free_blks = free_segs_blk_count(sbi);
|
|
ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
|
|
|
|
if (free_blks < ovp_blks)
|
|
return 0;
|
|
|
|
return free_blks - ovp_blks;
|
|
}
|
|
|
|
static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
|
|
{
|
|
return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
|
|
}
|
|
|
|
static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t reclaimable_user_blocks = sbi->user_block_count -
|
|
written_block_count(sbi);
|
|
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
|
|
}
|
|
|
|
static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
|
|
unsigned int *wait)
|
|
{
|
|
unsigned int min_time = gc_th->min_sleep_time;
|
|
unsigned int max_time = gc_th->max_sleep_time;
|
|
|
|
if (*wait == gc_th->no_gc_sleep_time)
|
|
return;
|
|
|
|
if ((long long)*wait + (long long)min_time > (long long)max_time)
|
|
*wait = max_time;
|
|
else
|
|
*wait += min_time;
|
|
}
|
|
|
|
static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
|
|
unsigned int *wait)
|
|
{
|
|
unsigned int min_time = gc_th->min_sleep_time;
|
|
|
|
if (*wait == gc_th->no_gc_sleep_time)
|
|
*wait = gc_th->max_sleep_time;
|
|
|
|
if ((long long)*wait - (long long)min_time < (long long)min_time)
|
|
*wait = min_time;
|
|
else
|
|
*wait -= min_time;
|
|
}
|
|
|
|
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
|
|
{
|
|
block_t invalid_user_blocks = sbi->user_block_count -
|
|
written_block_count(sbi);
|
|
/*
|
|
* Background GC is triggered with the following conditions.
|
|
* 1. There are a number of invalid blocks.
|
|
* 2. There is not enough free space.
|
|
*/
|
|
if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
|
|
free_user_blocks(sbi) < limit_free_user_blocks(sbi))
|
|
return true;
|
|
return false;
|
|
}
|