diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 62dd72522d6e..6f9157f16725 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -44,6 +44,13 @@ Description: Controls the FS utilization condition for the in-place-update policies. +What: /sys/fs/f2fs//min_fsync_blocks +Date: September 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the dirty page count condition for the in-place-update + policies. + What: /sys/fs/f2fs//max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index a2046a7d0a9d..2cca5a25ef89 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -192,15 +192,22 @@ Files in /sys/fs/f2fs/ ipu_policy This parameter controls the policy of in-place updates in f2fs. There are five policies: - 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR, - 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL, - 4: F2FS_IPU_DISABLE. + 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, + 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, + 0x10: F2FS_IPU_FSYNC. min_ipu_util This parameter controls the threshold to trigger in-place-updates. The number indicates percentage of the filesystem utilization, and used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. + min_fsync_blocks This parameter controls the threshold to trigger + in-place-updates when F2FS_IPU_FSYNC mode is set. + The number indicates the number of dirty pages + when fsync needs to flush on its call path. If + the number is less than this value, it triggers + in-place-updates. + max_victim_search This parameter controls the number of trials to find a victim segment when conducting SSR and cleaning operations. The default value is 4096 diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ec3b7a5381fa..dd10a031c052 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -72,7 +72,22 @@ out: return page; } -static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) +{ + bool readahead = false; + struct page *page; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); + return get_meta_page(sbi, index); +} + +static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) { switch (type) { case META_NAT: @@ -82,6 +97,8 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) case META_SSA: case META_CP: return 0; + case META_POR: + return MAX_BLKADDR(sbi); default: BUG(); } @@ -90,12 +107,12 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) /* * Readahead CP/NAT/SIT/SSA pages */ -int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) +int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type) { block_t prev_blk_addr = 0; struct page *page; - int blkno = start; - int max_blks = get_max_meta_blks(sbi, type); + block_t blkno = start; + block_t max_blks = get_max_meta_blks(sbi, type); struct f2fs_io_info fio = { .type = META, @@ -125,7 +142,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) break; case META_SSA: case META_CP: - /* get ssa/cp block addr */ + case META_POR: + if (unlikely(blkno >= max_blks)) + goto out; + if (unlikely(blkno < SEG0_BLKADDR(sbi))) + goto out; blk_addr = blkno; break; default: @@ -151,8 +172,7 @@ out: static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_P_SB(page); trace_f2fs_writepage(page, META); @@ -177,7 +197,7 @@ redirty_out: static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); long diff, written; trace_f2fs_writepages(mapping->host, wbc, META); @@ -259,15 +279,12 @@ continue_unlock: static int f2fs_set_meta_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - trace_f2fs_set_page_dirty(page, META); SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); - inc_page_count(sbi, F2FS_DIRTY_META); + inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); return 1; } return 0; @@ -378,7 +395,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) void release_orphan_inode(struct f2fs_sb_info *sbi) { spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi->n_orphans == 0); + f2fs_bug_on(sbi, sbi->n_orphans == 0); sbi->n_orphans--; spin_unlock(&sbi->ino_lock[ORPHAN_INO]); } @@ -398,7 +415,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode = f2fs_iget(sbi->sb, ino); - f2fs_bug_on(IS_ERR(inode)); + f2fs_bug_on(sbi, IS_ERR(inode)); clear_nlink(inode); /* truncate all the data during iput */ @@ -459,7 +476,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) list_for_each_entry(orphan, head, list) { if (!page) { page = find_get_page(META_MAPPING(sbi), start_blk++); - f2fs_bug_on(!page); + f2fs_bug_on(sbi, !page); orphan_blk = (struct f2fs_orphan_block *)page_address(page); memset(orphan_blk, 0, sizeof(*orphan_blk)); @@ -619,7 +636,7 @@ fail_no_cp: static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) return -EEXIST; @@ -631,32 +648,38 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) return 0; } -void set_dirty_dir_page(struct inode *inode, struct page *page) +void update_dirty_page(struct inode *inode, struct page *page) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dir_inode_entry *new; int ret = 0; - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) return; + if (!S_ISDIR(inode->i_mode)) { + inode_inc_dirty_pages(inode); + goto out; + } + new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new->inode = inode; INIT_LIST_HEAD(&new->list); spin_lock(&sbi->dir_inode_lock); ret = __add_dirty_inode(inode, new); - inode_inc_dirty_dents(inode); - SetPagePrivate(page); + inode_inc_dirty_pages(inode); spin_unlock(&sbi->dir_inode_lock); if (ret) kmem_cache_free(inode_entry_slab, new); +out: + SetPagePrivate(page); } void add_dirty_dir_inode(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dir_inode_entry *new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); int ret = 0; @@ -674,14 +697,14 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dir_inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; spin_lock(&sbi->dir_inode_lock); - if (get_dirty_dents(inode) || + if (get_dirty_pages(inode) || !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { spin_unlock(&sbi->dir_inode_lock); return; @@ -802,11 +825,12 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) finish_wait(&sbi->cp_wait, &wait); } -static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); - nid_t last_nid = 0; + struct f2fs_nm_info *nm_i = NM_I(sbi); + nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; @@ -869,7 +893,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); - if (is_umount) { + if (cpc->reason == CP_UMOUNT) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + @@ -886,6 +910,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + if (sbi->need_fsck) + set_ckpt_flags(ckpt, CP_FSCK_FLAG); + /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); @@ -920,7 +947,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; - if (is_umount) { + if (cpc->reason == CP_UMOUNT) { write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } @@ -960,23 +987,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* * We guarantee that this checkpoint procedure will not fail. */ -void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) +void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty) + if (!sbi->s_dirty && cpc->reason != CP_DISCARD) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; if (block_operations(sbi)) goto out; - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); f2fs_submit_merged_bio(sbi, DATA, WRITE); f2fs_submit_merged_bio(sbi, NODE, WRITE); @@ -992,16 +1019,16 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* write cached NAT/SIT entries to NAT/SIT area */ flush_nat_entries(sbi); - flush_sit_entries(sbi); + flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ - do_checkpoint(sbi, is_umount); + do_checkpoint(sbi, cpc); unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); out: mutex_unlock(&sbi->cp_mutex); - trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); + trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); } void init_ino_entry_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 76de83e25a89..8e58c4cc2cb9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, bio = bio_alloc(GFP_NOIO, npages); bio->bi_bdev = sbi->sb->s_bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; bio->bi_private = sbi; @@ -193,7 +193,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + int bio_blocks = MAX_BIO_BLOCKS(sbi); io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); io->fio = *fio; @@ -236,7 +236,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) int reserve_new_block(struct dnode_of_data *dn) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; @@ -258,7 +258,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int err; /* if inode_page exists, index should be zero */ - f2fs_bug_on(!need_put && index); + f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) @@ -321,7 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(blk_addr == NEW_ADDR); + f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; @@ -396,7 +396,6 @@ end_update: struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; @@ -429,7 +428,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; } - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, sync ? READ_SYNC : READA); if (err) return ERR_PTR(err); @@ -451,7 +450,6 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) */ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; @@ -490,7 +488,8 @@ repeat: return page; } - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, + dn.data_blkaddr, READ_SYNC); if (err) return ERR_PTR(err); @@ -517,7 +516,6 @@ repeat: struct page *get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; struct page *page; struct dnode_of_data dn; @@ -541,8 +539,8 @@ repeat: zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, + dn.data_blkaddr, READ_SYNC); if (err) goto put_err; @@ -573,10 +571,12 @@ put_err: static int __allocate_data_block(struct dnode_of_data *dn) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); + struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; block_t new_blkaddr; struct node_info ni; + pgoff_t fofs; int type; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) @@ -599,6 +599,12 @@ static int __allocate_data_block(struct dnode_of_data *dn) update_extent_cache(new_blkaddr, dn); clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + /* update i_size */ + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) + i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); + dn->data_blkaddr = new_blkaddr; return 0; } @@ -614,7 +620,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create, bool fiemap) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); unsigned int blkbits = inode->i_sb->s_blocksize_bits; unsigned maxblocks = bh_result->b_size >> blkbits; struct dnode_of_data dn; @@ -630,8 +635,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto out; if (create) { - f2fs_balance_fs(sbi); - f2fs_lock_op(sbi); + f2fs_balance_fs(F2FS_I_SB(inode)); + f2fs_lock_op(F2FS_I_SB(inode)); } /* When reading holes, we need its node page */ @@ -707,7 +712,7 @@ put_out: f2fs_put_dnode(&dn); unlock_out: if (create) - f2fs_unlock_op(sbi); + f2fs_unlock_op(F2FS_I_SB(inode)); out: trace_f2fs_get_data_block(inode, iblock, bh_result, err); return err; @@ -804,7 +809,7 @@ static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_CACHE_SHIFT; @@ -846,7 +851,7 @@ write: if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); unlock_page(page); - return 0; + goto out; } if (!wbc->for_reclaim) @@ -866,7 +871,7 @@ done: clear_cold_data(page); out: - inode_dec_dirty_dents(inode); + inode_dec_dirty_pages(inode); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); @@ -892,7 +897,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); bool locked = false; int ret; long diff; @@ -904,7 +909,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, return 0; if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && - get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && + get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; @@ -926,7 +931,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, return ret; skip_write: - wbc->pages_skipped += get_dirty_dents(inode); + wbc->pages_skipped += get_dirty_pages(inode); return 0; } @@ -945,7 +950,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; @@ -1047,7 +1052,10 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - set_page_dirty(page); + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + register_inmem_page(inode, page); + else + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1092,9 +1100,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (check_direct_IO(inode, rw, iter, offset)) return 0; - /* clear fsync mark to recover these blocks */ - fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); - trace_f2fs_direct_IO_enter(inode, offset, count, rw); err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); @@ -1110,8 +1115,12 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, unsigned int length) { struct inode *inode = page->mapping->host; + + if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) + return; + if (PageDirty(page)) - inode_dec_dirty_dents(inode); + inode_dec_dirty_pages(inode); ClearPagePrivate(page); } @@ -1133,7 +1142,7 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); - set_dirty_dir_page(inode, page); + update_dirty_page(inode, page); return 1; } return 0; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fecebdbfd781..0a91ab813a9e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -93,7 +93,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) total_vblocks = 0; blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -103,7 +103,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) ndirty++; } } - dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; + dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; si->bimodal = bimodal / dist; if (si->dirty_count) si->avg_vblocks = total_vblocks / ndirty; @@ -131,17 +131,17 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build sit */ si->base_mem += sizeof(struct sit_info); - si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); - si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); + si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); + si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); if (sbi->segs_per_sec > 1) - si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); + si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); /* build free segmap */ si->base_mem += sizeof(struct free_segmap_info); - si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); /* build curseg */ si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; @@ -149,8 +149,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build dirty segmap */ si->base_mem += sizeof(struct dirty_seglist_info); - si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); - si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); + si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi)); + si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 155fb056b7f1..b54f87149c09 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -126,7 +126,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, * For the most part, it should be a bug when name_len is zero. * We stop here for figuring out where the bugs has occurred. */ - f2fs_bug_on(!de->name_len); + f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } @@ -151,7 +151,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, bool room = false; int max_slots = 0; - f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); + f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); @@ -284,10 +284,9 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) int update_dent_inode(struct inode *inode, const struct qstr *name) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; - page = get_node_page(sbi, inode->i_ino); + page = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(page)) return PTR_ERR(page); @@ -337,7 +336,6 @@ static int make_empty_dir(struct inode *inode, static struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct page *page; int err; @@ -360,7 +358,7 @@ static struct page *init_inode_metadata(struct inode *inode, if (err) goto put_error; } else { - page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; @@ -381,7 +379,7 @@ static struct page *init_inode_metadata(struct inode *inode, * we should remove this inode from orphan list. */ if (inode->i_nlink == 0) - remove_orphan_inode(sbi, inode->i_ino); + remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); inc_nlink(inode); } return page; @@ -571,8 +569,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct address_space *mapping = page->mapping; - struct inode *dir = mapping->host; + struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; @@ -594,7 +591,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; if (inode) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); down_write(&F2FS_I(inode)->i_sem); @@ -621,7 +618,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, truncate_hole(dir, page->index, page->index + 1); clear_page_dirty_for_io(page); ClearPageUptodate(page); - inode_dec_dirty_dents(dir); + inode_dec_dirty_pages(dir); } f2fs_put_page(page, 1); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e921242186f6..8171e80b2ee9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -21,10 +21,16 @@ #include #ifdef CONFIG_F2FS_CHECK_FS -#define f2fs_bug_on(condition) BUG_ON(condition) +#define f2fs_bug_on(sbi, condition) BUG_ON(condition) #define f2fs_down_write(x, y) down_write_nest_lock(x, y) #else -#define f2fs_bug_on(condition) WARN_ON(condition) +#define f2fs_bug_on(sbi, condition) \ + do { \ + if (unlikely(condition)) { \ + WARN_ON(1); \ + sbi->need_fsck = true; \ + } \ + } while (0) #define f2fs_down_write(x, y) down_write(x) #endif @@ -90,6 +96,20 @@ enum { SIT_BITMAP }; +enum { + CP_UMOUNT, + CP_SYNC, + CP_DISCARD, +}; + +struct cp_control { + int reason; + __u64 trim_start; + __u64 trim_end; + __u64 trim_minlen; + __u64 trimmed; +}; + /* * For CP/NAT/SIT/SSA readahead */ @@ -97,7 +117,8 @@ enum { META_CP, META_NAT, META_SIT, - META_SSA + META_SSA, + META_POR, }; /* for the list of ino */ @@ -130,7 +151,9 @@ struct discard_entry { struct fsync_inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ - block_t blkaddr; /* block address locating the last inode */ + block_t blkaddr; /* block address locating the last fsync */ + block_t last_dentry; /* block address locating the last dentry */ + block_t last_inode; /* block address locating the last inode */ }; #define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) @@ -141,6 +164,9 @@ struct fsync_inode_entry { #define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) #define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) +#define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum)) +#define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum)) + static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) { int before = nats_in_cursum(rs); @@ -155,11 +181,24 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) return before; } +static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, + int type) +{ + if (type == NAT_JOURNAL) + return size <= MAX_NAT_JENTRIES(sum); + return size <= MAX_SIT_JENTRIES(sum); +} + /* * ioctl commands */ -#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS + +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -222,13 +261,16 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags; /* use to pass per-file flags */ struct rw_semaphore i_sem; /* protect fi info */ - atomic_t dirty_dents; /* # of dirty dentry pages */ + atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + + struct list_head inmem_pages; /* inmemory pages managed by f2fs */ + struct mutex inmem_lock; /* lock for inmemory pages */ }; static inline void get_extent_info(struct extent_info *ext, @@ -260,11 +302,10 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ + struct radix_tree_root nat_set_root;/* root of the nat set cache */ rwlock_t nat_tree_lock; /* protect nat_tree_lock */ - unsigned int nat_cnt; /* the # of cached nat entries */ struct list_head nat_entries; /* cached nat entry list (clean) */ - struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ - struct list_head nat_entry_set; /* nat entry set list */ + unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ /* free node ids management */ @@ -332,18 +373,16 @@ enum { }; struct flush_cmd { - struct flush_cmd *next; struct completion wait; + struct llist_node llnode; int ret; }; struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - struct flush_cmd *issue_list; /* list for command issue */ - struct flush_cmd *dispatch_list; /* list for command dispatch */ - spinlock_t issue_lock; /* for issue list lock */ - struct flush_cmd *issue_tail; /* list tail of issue list */ + struct llist_head issue_list; /* list for command issue */ + struct llist_node *dispatch_list; /* list for command dispatch */ }; struct f2fs_sm_info { @@ -369,8 +408,11 @@ struct f2fs_sm_info { int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ + struct list_head sit_entry_set; /* sit entry set list */ + unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ + unsigned int min_fsync_blocks; /* threshold for fsync */ /* for flush command control */ struct flush_cmd_control *cmd_control_info; @@ -434,6 +476,7 @@ struct f2fs_sb_info { struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ + bool need_fsck; /* need fsck.f2fs to fix */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ @@ -539,6 +582,21 @@ static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) return sb->s_fs_info; } +static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode) +{ + return F2FS_SB(inode->i_sb); +} + +static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) +{ + return F2FS_I_SB(mapping->host); +} + +static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) +{ + return F2FS_M_SB(page->mapping); +} + static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) { return (struct f2fs_super_block *)(sbi->raw_super); @@ -703,8 +761,8 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t count) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(inode->i_blocks < count); + f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); + f2fs_bug_on(sbi, inode->i_blocks < count); inode->i_blocks -= count; sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); @@ -716,10 +774,11 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) F2FS_SET_SB_DIRT(sbi); } -static inline void inode_inc_dirty_dents(struct inode *inode) +static inline void inode_inc_dirty_pages(struct inode *inode) { - inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); - atomic_inc(&F2FS_I(inode)->dirty_dents); + atomic_inc(&F2FS_I(inode)->dirty_pages); + if (S_ISDIR(inode->i_mode)) + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -727,13 +786,15 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) atomic_dec(&sbi->nr_pages[count_type]); } -static inline void inode_dec_dirty_dents(struct inode *inode) +static inline void inode_dec_dirty_pages(struct inode *inode) { - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) return; - dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); - atomic_dec(&F2FS_I(inode)->dirty_dents); + atomic_dec(&F2FS_I(inode)->dirty_pages); + + if (S_ISDIR(inode->i_mode)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); } static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -741,9 +802,9 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) return atomic_read(&sbi->nr_pages[count_type]); } -static inline int get_dirty_dents(struct inode *inode) +static inline int get_dirty_pages(struct inode *inode) { - return atomic_read(&F2FS_I(inode)->dirty_dents); + return atomic_read(&F2FS_I(inode)->dirty_pages); } static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) @@ -848,9 +909,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, { spin_lock(&sbi->stat_lock); - f2fs_bug_on(!sbi->total_valid_block_count); - f2fs_bug_on(!sbi->total_valid_node_count); - f2fs_bug_on(!inode->i_blocks); + f2fs_bug_on(sbi, !sbi->total_valid_block_count); + f2fs_bug_on(sbi, !sbi->total_valid_node_count); + f2fs_bug_on(sbi, !inode->i_blocks); inode->i_blocks--; sbi->total_valid_node_count--; @@ -867,7 +928,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); + f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count); sbi->total_valid_inode_count++; spin_unlock(&sbi->stat_lock); } @@ -875,7 +936,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(!sbi->total_valid_inode_count); + f2fs_bug_on(sbi, !sbi->total_valid_inode_count); sbi->total_valid_inode_count--; spin_unlock(&sbi->stat_lock); } @@ -891,7 +952,7 @@ static inline void f2fs_put_page(struct page *page, int unlock) return; if (unlock) { - f2fs_bug_on(!PageLocked(page)); + f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); unlock_page(page); } page_cache_release(page); @@ -998,7 +1059,9 @@ enum { FI_INLINE_DATA, /* used for inline data*/ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ - FI_NEED_IPU, /* used fo ipu for fdatasync */ + FI_NEED_IPU, /* used for ipu per file */ + FI_ATOMIC_FILE, /* indicate atomic file */ + FI_VOLATILE_FILE, /* indicate volatile file */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1085,6 +1148,16 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline bool f2fs_is_atomic_file(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); +} + +static inline bool f2fs_is_volatile_file(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); +} + static inline void *inline_data_addr(struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); @@ -1141,6 +1214,7 @@ void update_inode(struct inode *, struct page *); void update_inode_page(struct inode *); int f2fs_write_inode(struct inode *, struct writeback_control *); void f2fs_evict_inode(struct inode *); +void handle_failed_inode(struct inode *); /* * namei.c @@ -1188,9 +1262,9 @@ struct dnode_of_data; struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); -int is_checkpointed_node(struct f2fs_sb_info *, nid_t); -bool fsync_mark_done(struct f2fs_sb_info *, nid_t); -void fsync_mark_clear(struct f2fs_sb_info *, nid_t); +bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); +bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); +bool need_inode_block_update(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); @@ -1221,6 +1295,8 @@ void destroy_node_manager_caches(void); /* * segment.c */ +void register_inmem_page(struct inode *, struct page *); +void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); int f2fs_issue_flush(struct f2fs_sb_info *); @@ -1229,9 +1305,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); +void release_discard_addrs(struct f2fs_sb_info *); void discard_next_dnode(struct f2fs_sb_info *, block_t); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); +int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, @@ -1248,7 +1326,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t); void write_node_summaries(struct f2fs_sb_info *, block_t); int lookup_journal_in_cursum(struct f2fs_summary_block *, int, unsigned int, int); -void flush_sit_entries(struct f2fs_sb_info *); +void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); int build_segment_manager(struct f2fs_sb_info *); void destroy_segment_manager(struct f2fs_sb_info *); int __init create_segment_manager_caches(void); @@ -1259,7 +1337,8 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -int ra_meta_pages(struct f2fs_sb_info *, int, int, int); +struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); +int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); @@ -1271,11 +1350,11 @@ void add_orphan_inode(struct f2fs_sb_info *, nid_t); void remove_orphan_inode(struct f2fs_sb_info *, nid_t); void recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); -void set_dirty_dir_page(struct inode *, struct page *); +void update_dirty_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); void sync_dirty_dir_inodes(struct f2fs_sb_info *); -void write_checkpoint(struct f2fs_sb_info *, bool); +void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); void destroy_checkpoint_caches(void); @@ -1359,12 +1438,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_SB(inode->i_sb))->inline_inode++); \ + ((F2FS_I_SB(inode))->inline_inode++); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_SB(inode->i_sb))->inline_inode--); \ + ((F2FS_I_SB(inode))->inline_inode--); \ } while (0) #define stat_inc_seg_type(sbi, curseg) \ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 060aee65aee8..8e68bb64f835 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -33,7 +33,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; int err; @@ -117,7 +117,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) static inline bool need_do_checkpoint(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); bool need_cp = false; if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) @@ -138,7 +138,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + nid_t ino = inode->i_ino; int ret = 0; bool need_cp = false; struct writeback_control wbc = { @@ -153,12 +154,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_f2fs_sync_file_enter(inode); /* if fdatasync is triggered, let's do in-place-update */ - if (datasync) + if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(fi, FI_NEED_IPU); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (datasync) - clear_inode_flag(fi, FI_NEED_IPU); + clear_inode_flag(fi, FI_NEED_IPU); + if (ret) { trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; @@ -168,13 +168,22 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && - !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { + !exist_written_data(sbi, ino, APPEND_INO)) { + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { + f2fs_put_page(i, 0); + goto go_write; + } + f2fs_put_page(i, 0); + if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || - exist_written_data(sbi, inode->i_ino, UPDATE_INO)) + exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; } - +go_write: /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); @@ -207,26 +216,28 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) up_write(&fi->i_sem); } } else { - /* if there is no written node page, write its inode page */ - while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { - if (fsync_mark_done(sbi, inode->i_ino)) - goto out; +sync_nodes: + sync_node_pages(sbi, ino, &wbc); + + if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); ret = f2fs_write_inode(inode, NULL); if (ret) goto out; + goto sync_nodes; } - ret = wait_on_node_pages_writeback(sbi, inode->i_ino); + + ret = wait_on_node_pages_writeback(sbi, ino); if (ret) goto out; /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); + remove_dirty_inode(sbi, ino, APPEND_INO); clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); + remove_dirty_inode(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); + ret = f2fs_issue_flush(F2FS_I_SB(inode)); } out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); @@ -353,6 +364,8 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) maxbytes, i_size_read(inode)); case SEEK_DATA: case SEEK_HOLE: + if (offset < 0) + return -ENXIO; return f2fs_seek_block(file, offset, whence); } @@ -369,7 +382,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; __le32 *addr; @@ -432,7 +445,7 @@ out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; @@ -463,7 +476,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; - f2fs_bug_on(count < 0); + f2fs_bug_on(sbi, count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { truncate_data_blocks_range(&dn, count); @@ -547,15 +560,22 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { + if (attr->ia_valid & ATTR_SIZE) { err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); if (err) return err; - truncate_setsize(inode, attr->ia_size); - f2fs_truncate(inode); - f2fs_balance_fs(F2FS_SB(inode->i_sb)); + if (attr->ia_size != i_size_read(inode)) { + truncate_setsize(inode, attr->ia_size); + f2fs_truncate(inode); + f2fs_balance_fs(F2FS_I_SB(inode)); + } else { + /* + * giving a chance to truncate blocks past EOF which + * are fallocated with FALLOC_FL_KEEP_SIZE. + */ + f2fs_truncate(inode); + } } __setattr_copy(inode, attr); @@ -589,7 +609,7 @@ const struct inode_operations f2fs_file_inode_operations = { static void fill_zero(struct inode *inode, pgoff_t index, loff_t start, loff_t len) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page; if (!len) @@ -638,6 +658,13 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + /* skip punching hole beyond i_size */ + if (offset >= inode->i_size) + return ret; + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); if (ret) return ret; @@ -661,7 +688,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (pg_start < pg_end) { struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); f2fs_balance_fs(sbi); @@ -682,7 +709,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t index, pg_start, pg_end; loff_t new_size = i_size_read(inode); loff_t off_start, off_end; @@ -778,61 +805,157 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) return flags & F2FS_OTHER_FLMASK; } -long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags; + unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; + return put_user(flags, (int __user *)arg); +} + +static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; + unsigned int oldflags; int ret; + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto out; + } + + if (get_user(flags, (int __user *)arg)) { + ret = -EFAULT; + goto out; + } + + flags = f2fs_mask_flags(inode->i_mode, flags); + + mutex_lock(&inode->i_mutex); + + oldflags = fi->i_flags; + + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto out; + } + } + + flags = flags & FS_FL_USER_MODIFIABLE; + flags |= oldflags & ~FS_FL_USER_MODIFIABLE; + fi->i_flags = flags; + mutex_unlock(&inode->i_mutex); + + f2fs_set_inode_flags(inode); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); +out: + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_start_atomic_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + f2fs_balance_fs(sbi); + + set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + + return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); +} + +static int f2fs_ioc_commit_atomic_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int ret; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (f2fs_is_volatile_file(inode)) + return 0; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, false); + + ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_start_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + return 0; +} + +static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct super_block *sb = inode->i_sb; + struct request_queue *q = bdev_get_queue(sb->s_bdev); + struct fstrim_range range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + if (copy_from_user(&range, (struct fstrim_range __user *)arg, + sizeof(range))) + return -EFAULT; + + range.minlen = max((unsigned int)range.minlen, + q->limits.discard_granularity); + ret = f2fs_trim_fs(F2FS_SB(sb), &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range __user *)arg, &range, + sizeof(range))) + return -EFAULT; + return 0; +} + +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ switch (cmd) { case F2FS_IOC_GETFLAGS: - flags = fi->i_flags & FS_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); + return f2fs_ioc_getflags(filp, arg); case F2FS_IOC_SETFLAGS: - { - unsigned int oldflags; - - ret = mnt_want_write_file(filp); - if (ret) - return ret; - - if (!inode_owner_or_capable(inode)) { - ret = -EACCES; - goto out; - } - - if (get_user(flags, (int __user *) arg)) { - ret = -EFAULT; - goto out; - } - - flags = f2fs_mask_flags(inode->i_mode, flags); - - mutex_lock(&inode->i_mutex); - - oldflags = fi->i_flags; - - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - mutex_unlock(&inode->i_mutex); - ret = -EPERM; - goto out; - } - } - - flags = flags & FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; - mutex_unlock(&inode->i_mutex); - - f2fs_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); -out: - mnt_drop_write_file(filp); - return ret; - } + return f2fs_ioc_setflags(filp, arg); + case F2FS_IOC_START_ATOMIC_WRITE: + return f2fs_ioc_start_atomic_write(filp); + case F2FS_IOC_COMMIT_ATOMIC_WRITE: + return f2fs_ioc_commit_atomic_write(filp); + case F2FS_IOC_START_VOLATILE_WRITE: + return f2fs_ioc_start_volatile_write(filp); + case FITRIM: + return f2fs_ioc_fitrim(filp, arg); default: return -ENOTTY; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 943a31db7cc3..2a8f4acdb86b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -193,7 +193,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) * selected by background GC before. * Those segments guarantee they have small valid blocks. */ - for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { + for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { if (sec_usage_check(sbi, secno)) continue; clear_bit(secno, dirty_i->victim_secmap); @@ -263,14 +263,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned int secno, max_cost; int nsearched = 0; + mutex_lock(&dirty_i->seglist_lock); + p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); p.min_segno = NULL_SEGNO; p.min_cost = max_cost = get_max_cost(sbi, &p); - mutex_lock(&dirty_i->seglist_lock); - if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); if (p.min_segno != NULL_SEGNO) @@ -281,9 +281,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned long cost; unsigned int segno; - segno = find_next_bit(p.dirty_segmap, - TOTAL_SEGS(sbi), p.offset); - if (segno >= TOTAL_SEGS(sbi)) { + segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset); + if (segno >= MAIN_SEGS(sbi)) { if (sbi->last_victim[p.gc_mode]) { sbi->last_victim[p.gc_mode] = 0; p.offset = 0; @@ -423,6 +422,12 @@ next_step: if (IS_ERR(node_page)) continue; + /* block may become invalid during get_node_page */ + if (check_valid_map(sbi, segno, off) == 0) { + f2fs_put_page(node_page, 1); + continue; + } + /* set page dirty and write it */ if (gc_type == FG_GC) { f2fs_wait_on_page_writeback(node_page, NODE); @@ -531,7 +536,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) f2fs_wait_on_page_writeback(page, DATA); if (clear_page_dirty_for_io(page)) - inode_dec_dirty_dents(inode); + inode_dec_dirty_pages(inode); set_cold_data(page); do_write_data_page(page, &fio); clear_cold_data(page); @@ -688,6 +693,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi) int gc_type = BG_GC; int nfree = 0; int ret = -1; + struct cp_control cpc = { + .reason = CP_SYNC, + }; INIT_LIST_HEAD(&ilist); gc_more: @@ -698,7 +706,7 @@ gc_more: if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { gc_type = FG_GC; - write_checkpoint(sbi, false); + write_checkpoint(sbi, &cpc); } if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) @@ -723,7 +731,7 @@ gc_more: goto gc_more; if (gc_type == FG_GC) - write_checkpoint(sbi, false); + write_checkpoint(sbi, &cpc); stop: mutex_unlock(&sbi->gc_mutex); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3e8ecdf3742b..88036fd75797 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,11 +15,13 @@ bool f2fs_may_inline(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); block_t nr_blocks; loff_t i_size; - if (!test_opt(sbi, INLINE_DATA)) + if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) + return false; + + if (f2fs_is_atomic_file(inode)) return false; nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; @@ -35,7 +37,6 @@ bool f2fs_may_inline(struct inode *inode) int f2fs_read_inline_data(struct inode *inode, struct page *page) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *ipage; void *src_addr, *dst_addr; @@ -44,7 +45,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) goto out; } - ipage = get_node_page(sbi, inode->i_ino); + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { unlock_page(page); return PTR_ERR(ipage); @@ -73,7 +74,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) struct dnode_of_data dn; void *src_addr, *dst_addr; block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, @@ -189,13 +190,12 @@ int f2fs_write_inline_data(struct inode *inode, void truncate_inline_data(struct inode *inode, u64 from) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *ipage; if (from >= MAX_INLINE_DATA) return; - ipage = get_node_page(sbi, inode->i_ino); + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return; @@ -209,7 +209,7 @@ void truncate_inline_data(struct inode *inode, u64 from) bool recover_inline_data(struct inode *inode, struct page *npage) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; void *src_addr, *dst_addr; struct page *ipage; @@ -229,7 +229,7 @@ bool recover_inline_data(struct inode *inode, struct page *npage) ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: ipage = get_node_page(sbi, inode->i_ino); - f2fs_bug_on(IS_ERR(ipage)); + f2fs_bug_on(sbi, IS_ERR(ipage)); f2fs_wait_on_page_writeback(ipage, NODE); @@ -243,7 +243,7 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); - f2fs_bug_on(IS_ERR(ipage)); + f2fs_bug_on(sbi, IS_ERR(ipage)); f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2c39999f3868..0deead4505e7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -69,7 +69,7 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) static int do_read_inode(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; @@ -218,7 +218,7 @@ void update_inode(struct inode *inode, struct page *node_page) void update_inode_page(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *node_page; retry: node_page = get_node_page(sbi, inode->i_ino); @@ -238,7 +238,7 @@ retry: int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) @@ -266,9 +266,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) */ void f2fs_evict_inode(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t xnid = F2FS_I(inode)->i_xattr_nid; + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + commit_inmem_pages(inode, true); + trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); @@ -276,7 +280,7 @@ void f2fs_evict_inode(struct inode *inode) inode->i_ino == F2FS_META_INO(sbi)) goto out_clear; - f2fs_bug_on(get_dirty_dents(inode)); + f2fs_bug_on(sbi, get_dirty_pages(inode)); remove_dirty_dir_inode(inode); if (inode->i_nlink || is_bad_inode(inode)) @@ -306,3 +310,26 @@ no_delete: out_clear: clear_inode(inode); } + +/* caller should call f2fs_lock_op() */ +void handle_failed_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + clear_nlink(inode); + make_bad_inode(inode); + unlock_new_inode(inode); + + i_size_write(inode, 0); + if (F2FS_HAS_BLOCKS(inode)) + f2fs_truncate(inode); + + remove_inode_page(inode); + stat_dec_inline_inode(inode); + + alloc_nid_failed(sbi, inode->i_ino); + f2fs_unlock_op(sbi); + + /* iput will drop the inode object */ + iput(inode); +} diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index ee103fd7283c..0d2526e5aa11 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -23,7 +23,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); nid_t ino; struct inode *inode; bool nid_free = false; @@ -102,7 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; nid_t ino = 0; int err; @@ -123,9 +123,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); if (err) goto out; + f2fs_unlock_op(sbi); alloc_nid_done(sbi, ino); @@ -133,9 +133,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, unlock_new_inode(inode); return 0; out: - clear_nlink(inode); - iget_failed(inode); - alloc_nid_failed(sbi, ino); + handle_failed_inode(inode); return err; } @@ -143,7 +141,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; f2fs_balance_fs(sbi); @@ -154,15 +152,16 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); if (err) goto out; + f2fs_unlock_op(sbi); d_instantiate(dentry, inode); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); iput(inode); + f2fs_unlock_op(sbi); return err; } @@ -203,7 +202,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, static int f2fs_unlink(struct inode *dir, struct dentry *dentry) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode = dentry->d_inode; struct f2fs_dir_entry *de; struct page *page; @@ -237,7 +236,7 @@ fail: static int f2fs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; size_t symlen = strlen(symname) + 1; int err; @@ -253,9 +252,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); if (err) goto out; + f2fs_unlock_op(sbi); err = page_symlink(inode, symname, symlen); alloc_nid_done(sbi, inode->i_ino); @@ -264,15 +263,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, unlock_new_inode(inode); return err; out: - clear_nlink(inode); - iget_failed(inode); - alloc_nid_failed(sbi, inode->i_ino); + handle_failed_inode(inode); return err; } static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err; @@ -290,9 +287,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); if (err) goto out_fail; + f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); @@ -303,9 +300,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); - clear_nlink(inode); - iget_failed(inode); - alloc_nid_failed(sbi, inode->i_ino); + handle_failed_inode(inode); return err; } @@ -320,7 +315,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) static int f2fs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err = 0; @@ -338,25 +333,23 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); if (err) goto out; + f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); unlock_new_inode(inode); return 0; out: - clear_nlink(inode); - iget_failed(inode); - alloc_nid_failed(sbi, inode->i_ino); + handle_failed_inode(inode); return err; } static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct page *old_dir_page; @@ -480,8 +473,7 @@ out: static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct super_block *sb = old_dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct page *old_dir_page, *new_dir_page; @@ -642,7 +634,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err; @@ -678,10 +670,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) release_out: release_orphan_inode(sbi); out: - f2fs_unlock_op(sbi); - clear_nlink(inode); - iget_failed(inode); - alloc_nid_failed(sbi, inode->i_ino); + handle_failed_inode(inode); return err; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 45378196e19a..44b8afef43d9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -54,7 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); unsigned int long flags; if (PageDirty(page)) { @@ -65,7 +64,7 @@ static void clear_node_page_dirty(struct page *page) spin_unlock_irqrestore(&mapping->tree_lock, flags); clear_page_dirty_for_io(page); - dec_page_count(sbi, F2FS_DIRTY_NODES); + dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); } ClearPageUptodate(page); } @@ -92,7 +91,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_page = get_meta_page(sbi, src_off); dst_page = grab_meta_page(sbi, dst_off); - f2fs_bug_on(PageDirty(src_page)); + f2fs_bug_on(sbi, PageDirty(src_page)); src_addr = page_address(src_page); dst_addr = page_address(dst_page); @@ -124,44 +123,99 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) kmem_cache_free(nat_entry_slab, e); } -int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); + struct nat_entry_set *head; + + if (get_nat_flag(ne, IS_DIRTY)) + return; +retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); + if (!head) { + head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); + + INIT_LIST_HEAD(&head->entry_list); + INIT_LIST_HEAD(&head->set_list); + head->set = set; + head->entry_cnt = 0; + + if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { + cond_resched(); + goto retry; + } + } + list_move_tail(&ne->list, &head->entry_list); + nm_i->dirty_nat_cnt++; + head->entry_cnt++; + set_nat_flag(ne, IS_DIRTY, true); +} + +static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + struct nat_entry_set *head; + + head = radix_tree_lookup(&nm_i->nat_set_root, set); + if (head) { + list_move_tail(&ne->list, &nm_i->nat_entries); + set_nat_flag(ne, IS_DIRTY, false); + head->entry_cnt--; + nm_i->dirty_nat_cnt--; + } +} + +static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, + nid_t start, unsigned int nr, struct nat_entry_set **ep) +{ + return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, + start, nr); +} + +bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - int is_cp = 1; + bool is_cp = true; read_lock(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (e && !e->checkpointed) - is_cp = 0; + if (e && !get_nat_flag(e, IS_CHECKPOINTED)) + is_cp = false; read_unlock(&nm_i->nat_tree_lock); return is_cp; } -bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) +bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - bool fsync_done = false; + bool fsynced = false; read_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); - if (e) - fsync_done = e->fsync_done; + e = __lookup_nat_cache(nm_i, ino); + if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) + fsynced = true; read_unlock(&nm_i->nat_tree_lock); - return fsync_done; + return fsynced; } -void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) +bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; + bool need_update = true; - write_lock(&nm_i->nat_tree_lock); - e = __lookup_nat_cache(nm_i, nid); - if (e) - e->fsync_done = false; - write_unlock(&nm_i->nat_tree_lock); + read_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, ino); + if (e && get_nat_flag(e, HAS_LAST_FSYNC) && + (get_nat_flag(e, IS_CHECKPOINTED) || + get_nat_flag(e, HAS_FSYNCED_INODE))) + need_update = false; + read_unlock(&nm_i->nat_tree_lock); + return need_update; } static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) @@ -177,7 +231,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) } memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); - new->checkpointed = true; + nat_reset_flag(new); list_add_tail(&new->list, &nm_i->nat_entries); nm_i->nat_cnt++; return new; @@ -216,7 +270,7 @@ retry: goto retry; } e->ni = *ni; - f2fs_bug_on(ni->blk_addr == NEW_ADDR); + f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { /* * when nid is reallocated, @@ -224,16 +278,16 @@ retry: * So, reinitialize it with new information. */ e->ni = *ni; - f2fs_bug_on(ni->blk_addr != NULL_ADDR); + f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } /* sanity check */ - f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); - f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && + f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); + f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && new_blkaddr == NULL_ADDR); - f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && + f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && + f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && nat_get_blkaddr(e) != NULL_ADDR && new_blkaddr == NEW_ADDR); @@ -245,12 +299,17 @@ retry: /* change address */ nat_set_blkaddr(e, new_blkaddr); + if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) + set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); /* update fsync_mark if its inode nat entry is still alive */ e = __lookup_nat_cache(nm_i, ni->ino); - if (e) - e->fsync_done = fsync_done; + if (e) { + if (fsync_done && ni->nid == ni->ino) + set_nat_flag(e, HAS_FSYNCED_INODE, true); + set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); + } write_unlock(&nm_i->nat_tree_lock); } @@ -411,7 +470,7 @@ got: */ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *npage[4]; struct page *parent; int offset[4]; @@ -504,15 +563,15 @@ release_out: static void truncate_node(struct dnode_of_data *dn) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info ni; get_node_info(sbi, dn->nid, &ni); if (dn->inode->i_blocks == 0) { - f2fs_bug_on(ni.blk_addr != NULL_ADDR); + f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); goto invalidate; } - f2fs_bug_on(ni.blk_addr == NULL_ADDR); + f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); @@ -540,14 +599,13 @@ invalidate: static int truncate_dnode(struct dnode_of_data *dn) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct page *page; if (dn->nid == 0) return 1; /* get direct node */ - page = get_node_page(sbi, dn->nid); + page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) return 1; else if (IS_ERR(page)) @@ -564,7 +622,6 @@ static int truncate_dnode(struct dnode_of_data *dn) static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, int ofs, int depth) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct dnode_of_data rdn = *dn; struct page *page; struct f2fs_node *rn; @@ -578,7 +635,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); - page = get_node_page(sbi, dn->nid); + page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); if (IS_ERR(page)) { trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); return PTR_ERR(page); @@ -636,7 +693,6 @@ out_err: static int truncate_partial_nodes(struct dnode_of_data *dn, struct f2fs_inode *ri, int *offset, int depth) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct page *pages[2]; nid_t nid[3]; nid_t child_nid; @@ -651,7 +707,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { /* reference count'll be increased */ - pages[i] = get_node_page(sbi, nid[i]); + pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); if (IS_ERR(pages[i])) { err = PTR_ERR(pages[i]); idx = i - 1; @@ -696,7 +752,7 @@ fail: */ int truncate_inode_blocks(struct inode *inode, pgoff_t from) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; @@ -792,7 +848,7 @@ fail: int truncate_xattr_node(struct inode *inode, struct page *page) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; struct dnode_of_data dn; struct page *npage; @@ -840,7 +896,8 @@ void remove_inode_page(struct inode *inode) truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ - f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); + f2fs_bug_on(F2FS_I_SB(inode), + inode->i_blocks != 0 && inode->i_blocks != 1); /* will put inode & node pages */ truncate_node(&dn); @@ -860,7 +917,7 @@ struct page *new_inode_page(struct inode *inode) struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs, struct page *ipage) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info old_ni, new_ni; struct page *page; int err; @@ -880,7 +937,7 @@ struct page *new_node_page(struct dnode_of_data *dn, get_node_info(sbi, dn->nid, &old_ni); /* Reinitialize old_ni with new node page */ - f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); + f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); new_ni = old_ni; new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR, false); @@ -918,7 +975,7 @@ fail: */ static int read_node_page(struct page *page, int rw) { - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; get_node_info(sbi, page->index, &ni); @@ -994,7 +1051,7 @@ got_it: */ struct page *get_node_page_ra(struct page *parent, int start) { - struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); struct blk_plug plug; struct page *page; int err, i, end; @@ -1124,10 +1181,14 @@ continue_unlock: /* called by fsync() */ if (ino && IS_DNODE(page)) { - int mark = !is_checkpointed_node(sbi, ino); set_fsync_mark(page, 1); - if (IS_INODE(page)) - set_dentry_mark(page, mark); + if (IS_INODE(page)) { + if (!is_checkpointed_node(sbi, ino) && + !has_fsynced_inode(sbi, ino)) + set_dentry_mark(page, 1); + else + set_dentry_mark(page, 0); + } nwritten++; } else { set_fsync_mark(page, 0); @@ -1206,7 +1267,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; block_t new_addr; struct node_info ni; @@ -1226,7 +1287,7 @@ static int f2fs_write_node_page(struct page *page, /* get old block addr of this node page */ nid = nid_of_node(page); - f2fs_bug_on(page->index != nid); + f2fs_bug_on(sbi, page->index != nid); get_node_info(sbi, nid, &ni); @@ -1257,7 +1318,7 @@ redirty_out: static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); long diff; trace_f2fs_writepages(mapping->host, wbc, NODE); @@ -1282,15 +1343,12 @@ skip_write: static int f2fs_set_node_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - trace_f2fs_set_page_dirty(page, NODE); SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); - inc_page_count(sbi, F2FS_DIRTY_NODES); + inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); return 1; } @@ -1301,9 +1359,8 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, unsigned int length) { struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); if (PageDirty(page)) - dec_page_count(sbi, F2FS_DIRTY_NODES); + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); ClearPagePrivate(page); } @@ -1356,7 +1413,8 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) read_lock(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && - (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) + (!get_nat_flag(ne, IS_CHECKPOINTED) || + nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; read_unlock(&nm_i->nat_tree_lock); if (allocated) @@ -1413,7 +1471,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - f2fs_bug_on(blk_addr == NEW_ADDR); + f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { if (add_free_nid(sbi, start_nid, true) < 0) break; @@ -1483,12 +1541,12 @@ retry: /* We should not use stale free nids created by build_free_nids */ if (nm_i->fcnt && !on_build_free_nids(nm_i)) { - f2fs_bug_on(list_empty(&nm_i->free_nid_list)); + f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); list_for_each_entry(i, &nm_i->free_nid_list, list) if (i->state == NID_NEW) break; - f2fs_bug_on(i->state != NID_NEW); + f2fs_bug_on(sbi, i->state != NID_NEW); *nid = i->nid; i->state = NID_ALLOC; nm_i->fcnt--; @@ -1514,7 +1572,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - f2fs_bug_on(!i || i->state != NID_ALLOC); + f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); __del_from_free_nid_list(nm_i, i); spin_unlock(&nm_i->free_nid_list_lock); @@ -1535,7 +1593,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - f2fs_bug_on(!i || i->state != NID_ALLOC); + f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); if (!available_free_memory(sbi, FREE_NIDS)) { __del_from_free_nid_list(nm_i, i); need_free = true; @@ -1551,14 +1609,13 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) void recover_inline_xattr(struct inode *inode, struct page *page) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); void *src_addr, *dst_addr; size_t inline_size; struct page *ipage; struct f2fs_inode *ri; - ipage = get_node_page(sbi, inode->i_ino); - f2fs_bug_on(IS_ERR(ipage)); + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); ri = F2FS_INODE(page); if (!(ri->i_inline & F2FS_INLINE_XATTR)) { @@ -1579,7 +1636,7 @@ update_inode: void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; nid_t new_xnid = nid_of_node(page); struct node_info ni; @@ -1590,7 +1647,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) /* Deallocate node address */ get_node_info(sbi, prev_xnid, &ni); - f2fs_bug_on(ni.blk_addr == NULL_ADDR); + f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, inode); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -1598,7 +1655,7 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) recover_xnid: /* 2: allocate new xattr nid */ if (unlikely(!inc_valid_node_count(sbi, inode))) - f2fs_bug_on(1); + f2fs_bug_on(sbi, 1); remove_free_nid(NM_I(sbi), new_xnid); get_node_info(sbi, new_xnid, &ni); @@ -1691,7 +1748,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, struct f2fs_summary *sum_entry; struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; - int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + int bio_blocks = MAX_BIO_BLOCKS(sbi); struct page *pages[bio_blocks]; int i, idx, last_offset, nrpages, err = 0; @@ -1733,89 +1790,6 @@ skip: return err; } -static struct nat_entry_set *grab_nat_entry_set(void) -{ - struct nat_entry_set *nes = - f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); - - nes->entry_cnt = 0; - INIT_LIST_HEAD(&nes->set_list); - INIT_LIST_HEAD(&nes->entry_list); - return nes; -} - -static void release_nat_entry_set(struct nat_entry_set *nes, - struct f2fs_nm_info *nm_i) -{ - f2fs_bug_on(!list_empty(&nes->entry_list)); - - nm_i->dirty_nat_cnt -= nes->entry_cnt; - list_del(&nes->set_list); - kmem_cache_free(nat_entry_set_slab, nes); -} - -static void adjust_nat_entry_set(struct nat_entry_set *nes, - struct list_head *head) -{ - struct nat_entry_set *next = nes; - - if (list_is_last(&nes->set_list, head)) - return; - - list_for_each_entry_continue(next, head, set_list) - if (nes->entry_cnt <= next->entry_cnt) - break; - - list_move_tail(&nes->set_list, &next->set_list); -} - -static void add_nat_entry(struct nat_entry *ne, struct list_head *head) -{ - struct nat_entry_set *nes; - nid_t start_nid = START_NID(ne->ni.nid); - - list_for_each_entry(nes, head, set_list) { - if (nes->start_nid == start_nid) { - list_move_tail(&ne->list, &nes->entry_list); - nes->entry_cnt++; - adjust_nat_entry_set(nes, head); - return; - } - } - - nes = grab_nat_entry_set(); - - nes->start_nid = start_nid; - list_move_tail(&ne->list, &nes->entry_list); - nes->entry_cnt++; - list_add(&nes->set_list, head); -} - -static void merge_nats_in_set(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct list_head *dirty_list = &nm_i->dirty_nat_entries; - struct list_head *set_list = &nm_i->nat_entry_set; - struct nat_entry *ne, *tmp; - - write_lock(&nm_i->nat_tree_lock); - list_for_each_entry_safe(ne, tmp, dirty_list, list) { - if (nat_get_blkaddr(ne) == NEW_ADDR) - continue; - add_nat_entry(ne, set_list); - nm_i->dirty_nat_cnt++; - } - write_unlock(&nm_i->nat_tree_lock); -} - -static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) -{ - if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) - return true; - else - return false; -} - static void remove_nats_in_journal(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1850,6 +1824,91 @@ found: mutex_unlock(&curseg->curseg_mutex); } +static void __adjust_nat_entry_set(struct nat_entry_set *nes, + struct list_head *head, int max) +{ + struct nat_entry_set *cur; + + if (nes->entry_cnt >= max) + goto add_out; + + list_for_each_entry(cur, head, set_list) { + if (cur->entry_cnt >= nes->entry_cnt) { + list_add(&nes->set_list, cur->set_list.prev); + return; + } + } +add_out: + list_add_tail(&nes->set_list, head); +} + +static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, + struct nat_entry_set *set) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); + struct f2fs_summary_block *sum = curseg->sum_blk; + nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; + bool to_journal = true; + struct f2fs_nat_block *nat_blk; + struct nat_entry *ne, *cur; + struct page *page = NULL; + + /* + * there are two steps to flush nat entries: + * #1, flush nat entries to journal in current hot data summary block. + * #2, flush nat entries to nat page. + */ + if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL)) + to_journal = false; + + if (to_journal) { + mutex_lock(&curseg->curseg_mutex); + } else { + page = get_next_nat_page(sbi, start_nid); + nat_blk = page_address(page); + f2fs_bug_on(sbi, !nat_blk); + } + + /* flush dirty nats in nat entry set */ + list_for_each_entry_safe(ne, cur, &set->entry_list, list) { + struct f2fs_nat_entry *raw_ne; + nid_t nid = nat_get_nid(ne); + int offset; + + if (nat_get_blkaddr(ne) == NEW_ADDR) + continue; + + if (to_journal) { + offset = lookup_journal_in_cursum(sum, + NAT_JOURNAL, nid, 1); + f2fs_bug_on(sbi, offset < 0); + raw_ne = &nat_in_journal(sum, offset); + nid_in_journal(sum, offset) = cpu_to_le32(nid); + } else { + raw_ne = &nat_blk->entries[nid - start_nid]; + } + raw_nat_from_node_info(raw_ne, &ne->ni); + + write_lock(&NM_I(sbi)->nat_tree_lock); + nat_reset_flag(ne); + __clear_nat_cache_dirty(NM_I(sbi), ne); + write_unlock(&NM_I(sbi)->nat_tree_lock); + + if (nat_get_blkaddr(ne) == NULL_ADDR) + add_free_nid(sbi, nid, false); + } + + if (to_journal) + mutex_unlock(&curseg->curseg_mutex); + else + f2fs_put_page(page, 1); + + if (!set->entry_cnt) { + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); + } +} + /* * This function is called during the checkpointing process. */ @@ -1858,91 +1917,37 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - struct nat_entry_set *nes, *tmp; - struct list_head *head = &nm_i->nat_entry_set; - bool to_journal = true; - - /* merge nat entries of dirty list to nat entry set temporarily */ - merge_nats_in_set(sbi); + struct nat_entry_set *setvec[NATVEC_SIZE]; + struct nat_entry_set *set, *tmp; + unsigned int found; + nid_t set_idx = 0; + LIST_HEAD(sets); /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them * into nat entry set. */ - if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { + if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - /* - * merge nat entries of dirty list to nat entry set temporarily - */ - merge_nats_in_set(sbi); - } - if (!nm_i->dirty_nat_cnt) return; - /* - * there are two steps to flush nat entries: - * #1, flush nat entries to journal in current hot data summary block. - * #2, flush nat entries to nat page. - */ - list_for_each_entry_safe(nes, tmp, head, set_list) { - struct f2fs_nat_block *nat_blk; - struct nat_entry *ne, *cur; - struct page *page; - nid_t start_nid = nes->start_nid; - - if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) - to_journal = false; - - if (to_journal) { - mutex_lock(&curseg->curseg_mutex); - } else { - page = get_next_nat_page(sbi, start_nid); - nat_blk = page_address(page); - f2fs_bug_on(!nat_blk); - } - - /* flush dirty nats in nat entry set */ - list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { - struct f2fs_nat_entry *raw_ne; - nid_t nid = nat_get_nid(ne); - int offset; - - if (to_journal) { - offset = lookup_journal_in_cursum(sum, - NAT_JOURNAL, nid, 1); - f2fs_bug_on(offset < 0); - raw_ne = &nat_in_journal(sum, offset); - nid_in_journal(sum, offset) = cpu_to_le32(nid); - } else { - raw_ne = &nat_blk->entries[nid - start_nid]; - } - raw_nat_from_node_info(raw_ne, &ne->ni); - - if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(sbi, nid, false) <= 0) { - write_lock(&nm_i->nat_tree_lock); - __del_from_nat_cache(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - } else { - write_lock(&nm_i->nat_tree_lock); - __clear_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - } - } - - if (to_journal) - mutex_unlock(&curseg->curseg_mutex); - else - f2fs_put_page(page, 1); - - release_nat_entry_set(nes, nm_i); + while ((found = __gang_lookup_nat_set(nm_i, + set_idx, NATVEC_SIZE, setvec))) { + unsigned idx; + set_idx = setvec[found - 1]->set + 1; + for (idx = 0; idx < found; idx++) + __adjust_nat_entry_set(setvec[idx], &sets, + MAX_NAT_JENTRIES(sum)); } - f2fs_bug_on(!list_empty(head)); - f2fs_bug_on(nm_i->dirty_nat_cnt); + /* flush dirty nats in nat entry set */ + list_for_each_entry_safe(set, tmp, &sets, set_list) + __flush_nat_entry_set(sbi, set); + + f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } static int init_node_manager(struct f2fs_sb_info *sbi) @@ -1969,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->nat_entries); - INIT_LIST_HEAD(&nm_i->dirty_nat_entries); - INIT_LIST_HEAD(&nm_i->nat_entry_set); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); @@ -2020,14 +2024,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy free nid list */ spin_lock(&nm_i->free_nid_list_lock); list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { - f2fs_bug_on(i->state == NID_ALLOC); + f2fs_bug_on(sbi, i->state == NID_ALLOC); __del_from_free_nid_list(nm_i, i); nm_i->fcnt--; spin_unlock(&nm_i->free_nid_list_lock); kmem_cache_free(free_nid_slab, i); spin_lock(&nm_i->free_nid_list_lock); } - f2fs_bug_on(nm_i->fcnt); + f2fs_bug_on(sbi, nm_i->fcnt); spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ @@ -2039,7 +2043,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) for (idx = 0; idx < found; idx++) __del_from_nat_cache(nm_i, natvec[idx]); } - f2fs_bug_on(nm_i->nat_cnt); + f2fs_bug_on(sbi, nm_i->nat_cnt); write_unlock(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8a116a407599..8d5e6e0dd840 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -39,10 +39,16 @@ struct node_info { unsigned char version; /* version of the node */ }; +enum { + IS_CHECKPOINTED, /* is it checkpointed before? */ + HAS_FSYNCED_INODE, /* is the inode fsynced before? */ + HAS_LAST_FSYNC, /* has the latest node fsync mark? */ + IS_DIRTY, /* this nat entry is dirty? */ +}; + struct nat_entry { struct list_head list; /* for clean or dirty nat list */ - bool checkpointed; /* whether it is checkpointed or not */ - bool fsync_done; /* whether the latest node has fsync mark */ + unsigned char flag; /* for node information bits */ struct node_info ni; /* in-memory node information */ }; @@ -55,18 +61,32 @@ struct nat_entry { #define nat_get_version(nat) (nat->ni.version) #define nat_set_version(nat, v) (nat->ni.version = v) -#define __set_nat_cache_dirty(nm_i, ne) \ - do { \ - ne->checkpointed = false; \ - list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ - } while (0) -#define __clear_nat_cache_dirty(nm_i, ne) \ - do { \ - ne->checkpointed = true; \ - list_move_tail(&ne->list, &nm_i->nat_entries); \ - } while (0) #define inc_node_version(version) (++version) +static inline void set_nat_flag(struct nat_entry *ne, + unsigned int type, bool set) +{ + unsigned char mask = 0x01 << type; + if (set) + ne->flag |= mask; + else + ne->flag &= ~mask; +} + +static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) +{ + unsigned char mask = 0x01 << type; + return ne->flag & mask; +} + +static inline void nat_reset_flag(struct nat_entry *ne) +{ + /* these states can be set only after checkpoint was done */ + set_nat_flag(ne, IS_CHECKPOINTED, true); + set_nat_flag(ne, HAS_FSYNCED_INODE, false); + set_nat_flag(ne, HAS_LAST_FSYNC, true); +} + static inline void node_info_from_raw_nat(struct node_info *ni, struct f2fs_nat_entry *raw_ne) { @@ -90,9 +110,9 @@ enum mem_type { }; struct nat_entry_set { - struct list_head set_list; /* link with all nat sets */ + struct list_head set_list; /* link with other nat sets */ struct list_head entry_list; /* link with dirty nat entries */ - nid_t start_nid; /* start nid of nats in set */ + nid_t set; /* set number*/ unsigned int entry_cnt; /* the # of nat entries in set */ }; @@ -110,18 +130,19 @@ struct free_nid { int state; /* in use or not: NID_NEW or NID_ALLOC */ }; -static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *fnid; - if (nm_i->fcnt <= 0) - return -1; spin_lock(&nm_i->free_nid_list_lock); + if (nm_i->fcnt <= 0) { + spin_unlock(&nm_i->free_nid_list_lock); + return; + } fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->free_nid_list_lock); - return 0; } /* @@ -197,8 +218,7 @@ static inline void copy_node_footer(struct page *dst, struct page *src) static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); struct f2fs_node *rn = F2FS_NODE(page); rn->footer.cp_ver = ckpt->checkpoint_ver; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 756c41cd2582..ebd013225788 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -14,6 +14,37 @@ #include "node.h" #include "segment.h" +/* + * Roll forward recovery scenarios. + * + * [Term] F: fsync_mark, D: dentry_mark + * + * 1. inode(x) | CP | inode(x) | dnode(F) + * -> Update the latest inode(x). + * + * 2. inode(x) | CP | inode(F) | dnode(F) + * -> No problem. + * + * 3. inode(x) | CP | dnode(F) | inode(x) + * -> Recover to the latest dnode(F), and drop the last inode(x) + * + * 4. inode(x) | CP | dnode(F) | inode(F) + * -> No problem. + * + * 5. CP | inode(x) | dnode(F) + * -> The inode(DF) was missing. Should drop this dnode(F). + * + * 6. CP | inode(DF) | dnode(F) + * -> No problem. + * + * 7. CP | dnode(F) | inode(DF) + * -> If f2fs_iget fails, then goto next to find inode(DF). + * + * 8. CP | dnode(F) | inode(x) + * -> If f2fs_iget fails, then goto next to find inode(DF). + * But it will fail due to no inode(DF). + */ + static struct kmem_cache *fsync_entry_slab; bool space_for_roll_forward(struct f2fs_sb_info *sbi) @@ -36,7 +67,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, return NULL; } -static int recover_dentry(struct page *ipage, struct inode *inode) +static int recover_dentry(struct inode *inode, struct page *ipage) { struct f2fs_inode *raw_inode = F2FS_INODE(ipage); nid_t pino = le32_to_cpu(raw_inode->i_pino); @@ -75,7 +106,7 @@ retry: err = -EEXIST; goto out_unmap_put; } - err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); + err = acquire_orphan_inode(F2FS_I_SB(inode)); if (err) { iput(einode); goto out_unmap_put; @@ -110,35 +141,28 @@ out: return err; } -static int recover_inode(struct inode *inode, struct page *node_page) +static void recover_inode(struct inode *inode, struct page *page) { - struct f2fs_inode *raw_inode = F2FS_INODE(node_page); + struct f2fs_inode *raw = F2FS_INODE(page); - if (!IS_INODE(node_page)) - return 0; - - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - i_size_write(inode, le64_to_cpu(raw_inode->i_size)); - inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - - if (is_dent_dnode(node_page)) - return recover_dentry(node_page, inode); + inode->i_mode = le16_to_cpu(raw->i_mode); + i_size_write(inode, le64_to_cpu(raw->i_size)); + inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); + inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); + inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); + inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", - ino_of_node(node_page), raw_inode->i_name); - return 0; + ino_of_node(page), F2FS_INODE(page)->i_name); } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; - struct page *page; + struct page *page = NULL; block_t blkaddr; int err = 0; @@ -146,20 +170,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - /* read node page */ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) - return -ENOMEM; - lock_page(page); - while (1) { struct fsync_inode_entry *entry; - err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); - if (err) - return err; + if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) + return 0; - lock_page(page); + page = get_meta_page_ra(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -180,33 +197,38 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) } /* add this fsync inode to the list */ - entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); + entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); if (!entry) { err = -ENOMEM; break; } - + /* + * CP | dnode(F) | inode(DF) + * For this case, we should not give up now. + */ entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); + if (err == -ENOENT) + goto next; break; } list_add_tail(&entry->list, head); } entry->blkaddr = blkaddr; - err = recover_inode(entry->inode, page); - if (err && err != -ENOENT) - break; + if (IS_INODE(page)) { + entry->last_inode = blkaddr; + if (is_dent_dnode(page)) + entry->last_dentry = blkaddr; + } next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); + f2fs_put_page(page, 1); } - - unlock_page(page); - __free_pages(page, 0); - + f2fs_put_page(page, 1); return err; } @@ -279,16 +301,30 @@ got_it: ino = ino_of_node(node_page); f2fs_put_page(node_page, 1); - /* Deallocate previous index in the node page */ - inode = f2fs_iget(sbi->sb, ino); - if (IS_ERR(inode)) - return PTR_ERR(inode); + if (ino != dn->inode->i_ino) { + /* Deallocate previous index in the node page */ + inode = f2fs_iget(sbi->sb, ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + } else { + inode = dn->inode; + } bidx = start_bidx_of_node(offset, F2FS_I(inode)) + - le16_to_cpu(sum.ofs_in_node); + le16_to_cpu(sum.ofs_in_node); - truncate_hole(inode, bidx, bidx + 1); - iput(inode); + if (ino != dn->inode->i_ino) { + truncate_hole(inode, bidx, bidx + 1); + iput(inode); + } else { + struct dnode_of_data tdn; + set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); + if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) + return 0; + if (tdn.data_blkaddr != NULL_ADDR) + truncate_data_blocks_range(&tdn, 1); + f2fs_put_page(tdn.node_page, 1); + } return 0; } @@ -331,8 +367,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_wait_on_page_writeback(dn.node_page, NODE); get_node_info(sbi, dn.nid, &ni); - f2fs_bug_on(ni.ino != ino_of_node(page)); - f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); + f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); for (; start < end; start++) { block_t src, dest; @@ -344,7 +380,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (src == NULL_ADDR) { err = reserve_new_block(&dn); /* We should not get -ENOSPC */ - f2fs_bug_on(err); + f2fs_bug_on(sbi, err); } /* Check the previous node page having this index */ @@ -386,7 +422,7 @@ static int recover_data(struct f2fs_sb_info *sbi, { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; - struct page *page; + struct page *page = NULL; int err = 0; block_t blkaddr; @@ -394,32 +430,41 @@ static int recover_data(struct f2fs_sb_info *sbi, curseg = CURSEG_I(sbi, type); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - /* read node page */ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) - return -ENOMEM; - - lock_page(page); - while (1) { struct fsync_inode_entry *entry; - err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); - if (err) - return err; - - lock_page(page); - - if (cp_ver != cpver_of_node(page)) + if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) break; + page = get_meta_page_ra(sbi, blkaddr); + + if (cp_ver != cpver_of_node(page)) { + f2fs_put_page(page, 1); + break; + } + entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) goto next; - + /* + * inode(x) | CP | inode(x) | dnode(F) + * In this case, we can lose the latest inode(x). + * So, call recover_inode for the inode update. + */ + if (entry->last_inode == blkaddr) + recover_inode(entry->inode, page); + if (entry->last_dentry == blkaddr) { + err = recover_dentry(entry->inode, page); + if (err) { + f2fs_put_page(page, 1); + break; + } + } err = do_recover_data(sbi, entry->inode, page, blkaddr); - if (err) + if (err) { + f2fs_put_page(page, 1); break; + } if (entry->blkaddr == blkaddr) { iput(entry->inode); @@ -429,11 +474,8 @@ static int recover_data(struct f2fs_sb_info *sbi, next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); + f2fs_put_page(page, 1); } - - unlock_page(page); - __free_pages(page, 0); - if (!err) allocate_new_segments(sbi); return err; @@ -474,11 +516,15 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #2: recover data */ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); if (!err) - f2fs_bug_on(!list_empty(&inode_list)); + f2fs_bug_on(sbi, !list_empty(&inode_list)); out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); + /* truncate meta pages to be used by the recovery */ + truncate_inode_pages_range(META_MAPPING(sbi), + MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); + if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); @@ -494,8 +540,11 @@ out: set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { + struct cp_control cpc = { + .reason = CP_SYNC, + }; mutex_unlock(&sbi->cp_mutex); - write_checkpoint(sbi, false); + write_checkpoint(sbi, &cpc); } else { mutex_unlock(&sbi->cp_mutex); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0aa337cd5bba..923cb76fdc46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -25,6 +25,8 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; +static struct kmem_cache *sit_entry_set_slab; +static struct kmem_cache *inmem_entry_slab; /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since @@ -172,6 +174,60 @@ found_middle: return result + __reverse_ffz(tmp); } +void register_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *new; + + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); + + /* add atomic page indices to the list */ + new->page = page; + INIT_LIST_HEAD(&new->list); + + /* increase reference count with clean state */ + mutex_lock(&fi->inmem_lock); + get_page(page); + list_add_tail(&new->list, &fi->inmem_pages); + mutex_unlock(&fi->inmem_lock); +} + +void commit_inmem_pages(struct inode *inode, bool abort) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur, *tmp; + bool submit_bio = false; + struct f2fs_io_info fio = { + .type = DATA, + .rw = WRITE_SYNC, + }; + + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + + mutex_lock(&fi->inmem_lock); + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { + lock_page(cur->page); + if (!abort && cur->page->mapping == inode->i_mapping) { + f2fs_wait_on_page_writeback(cur->page, DATA); + if (clear_page_dirty_for_io(cur->page)) + inode_dec_dirty_pages(inode); + do_write_data_page(cur->page, &fio); + submit_bio = true; + } + f2fs_put_page(cur->page, 1); + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + } + if (submit_bio) + f2fs_submit_merged_bio(sbi, DATA, WRITE); + mutex_unlock(&fi->inmem_lock); + + filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); + f2fs_unlock_op(sbi); +} + /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. @@ -205,24 +261,20 @@ repeat: if (kthread_should_stop()) return 0; - spin_lock(&fcc->issue_lock); - if (fcc->issue_list) { - fcc->dispatch_list = fcc->issue_list; - fcc->issue_list = fcc->issue_tail = NULL; - } - spin_unlock(&fcc->issue_lock); - - if (fcc->dispatch_list) { + if (!llist_empty(&fcc->issue_list)) { struct bio *bio = bio_alloc(GFP_NOIO, 0); struct flush_cmd *cmd, *next; int ret; + fcc->dispatch_list = llist_del_all(&fcc->issue_list); + fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); + bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); - for (cmd = fcc->dispatch_list; cmd; cmd = next) { + llist_for_each_entry_safe(cmd, next, + fcc->dispatch_list, llnode) { cmd->ret = ret; - next = cmd->next; complete(&cmd->wait); } bio_put(bio); @@ -230,7 +282,7 @@ repeat: } wait_event_interruptible(*q, - kthread_should_stop() || fcc->issue_list); + kthread_should_stop() || !llist_empty(&fcc->issue_list)); goto repeat; } @@ -249,15 +301,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); init_completion(&cmd.wait); - cmd.next = NULL; - spin_lock(&fcc->issue_lock); - if (fcc->issue_list) - fcc->issue_tail->next = &cmd; - else - fcc->issue_list = &cmd; - fcc->issue_tail = &cmd; - spin_unlock(&fcc->issue_lock); + llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); @@ -276,8 +321,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; - spin_lock_init(&fcc->issue_lock); init_waitqueue_head(&fcc->flush_wait_queue); + init_llist_head(&fcc->issue_list); SM_I(sbi)->cmd_control_info = fcc; fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); @@ -317,6 +362,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, struct seg_entry *sentry = get_seg_entry(sbi, segno); enum dirty_type t = sentry->type; + if (unlikely(t >= DIRTY)) { + f2fs_bug_on(sbi, 1); + return; + } if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]++; } @@ -376,8 +425,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { - sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); - sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); + sector_t start = SECTOR_FROM_BLOCK(blkstart); + sector_t len = SECTOR_FROM_BLOCK(blklen); trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } @@ -392,22 +441,48 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void add_discard_addrs(struct f2fs_sb_info *sbi, - unsigned int segno, struct seg_entry *se) +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct list_head *head = &SM_I(sbi)->discard_list; struct discard_entry *new; int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; + struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long dmap[entries]; unsigned int start = 0, end = -1; + bool force = (cpc->reason == CP_DISCARD); int i; - if (!test_opt(sbi, DISCARD)) + if (!force && !test_opt(sbi, DISCARD)) return; + if (force && !se->valid_blocks) { + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + /* + * if this segment is registered in the prefree list, then + * we should skip adding a discard candidate, and let the + * checkpoint do that later. + */ + mutex_lock(&dirty_i->seglist_lock); + if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { + mutex_unlock(&dirty_i->seglist_lock); + cpc->trimmed += sbi->blocks_per_seg; + return; + } + mutex_unlock(&dirty_i->seglist_lock); + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start); + new->len = sbi->blocks_per_seg; + list_add_tail(&new->list, head); + SM_I(sbi)->nr_discards += sbi->blocks_per_seg; + cpc->trimmed += sbi->blocks_per_seg; + return; + } + /* zero block will be discarded through the prefree list */ if (!se->valid_blocks || se->valid_blocks == max_blocks) return; @@ -416,23 +491,39 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, for (i = 0; i < entries; i++) dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; - while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { + while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); if (start >= max_blocks) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + if (end - start < cpc->trim_minlen) + continue; + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, segno) + start; + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; new->len = end - start; + cpc->trimmed += end - start; list_add_tail(&new->list, head); SM_I(sbi)->nr_discards += end - start; } } +void release_discard_addrs(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &(SM_I(sbi)->discard_list); + struct discard_entry *entry, *this; + + /* drop caches */ + list_for_each_entry_safe(entry, this, head, list) { + list_del(&entry->list); + kmem_cache_free(discard_entry_slab, entry); + } +} + /* * Should call clear_prefree_segments after checkpoint is done. */ @@ -440,10 +531,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int segno; - unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); - for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) __set_test_and_free(sbi, segno); mutex_unlock(&dirty_i->seglist_lock); } @@ -454,17 +544,17 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; - unsigned int total_segs = TOTAL_SEGS(sbi); unsigned int start = 0, end = -1; mutex_lock(&dirty_i->seglist_lock); while (1) { int i; - start = find_next_bit(prefree_map, total_segs, end + 1); - if (start >= total_segs) + start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); + if (start >= MAIN_SEGS(sbi)) break; - end = find_next_zero_bit(prefree_map, total_segs, start + 1); + end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), + start + 1); for (i = start; i < end; i++) clear_bit(i, prefree_map); @@ -488,11 +578,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) } } -static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) +static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) + + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { sit_i->dirty_sentries++; + return false; + } + + return true; } static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, @@ -516,7 +611,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) new_vblocks = se->valid_blocks + del; offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || + f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; @@ -526,10 +621,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { if (f2fs_set_bit(offset, se->cur_valid_map)) - BUG(); + f2fs_bug_on(sbi, 1); } else { if (!f2fs_clear_bit(offset, se->cur_valid_map)) - BUG(); + f2fs_bug_on(sbi, 1); } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks += del; @@ -558,7 +653,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); - f2fs_bug_on(addr == NULL_ADDR); + f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR) return; @@ -634,7 +729,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) return !test_bit(segno, free_i->free_segmap); return 0; } @@ -648,7 +743,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; - unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; + unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = *newseg / sbi->segs_per_sec; unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); unsigned int left_start = hint; @@ -660,18 +755,18 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - TOTAL_SEGS(sbi), *newseg + 1); + MAIN_SEGS(sbi), *newseg + 1); if (segno - *newseg < sbi->segs_per_sec - (*newseg % sbi->segs_per_sec)) goto got_it; } find_other_zone: - secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); - if (secno >= TOTAL_SECS(sbi)) { + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + if (secno >= MAIN_SECS(sbi)) { if (dir == ALLOC_RIGHT) { secno = find_next_zero_bit(free_i->free_secmap, - TOTAL_SECS(sbi), 0); - f2fs_bug_on(secno >= TOTAL_SECS(sbi)); + MAIN_SECS(sbi), 0); + f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); } else { go_left = 1; left_start = hint - 1; @@ -686,8 +781,8 @@ find_other_zone: continue; } left_start = find_next_zero_bit(free_i->free_secmap, - TOTAL_SECS(sbi), 0); - f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); + MAIN_SECS(sbi), 0); + f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); break; } secno = left_start; @@ -726,7 +821,7 @@ skip_left: } got_it: /* set it as dirty segment in free segmap */ - f2fs_bug_on(test_bit(segno, free_i->free_segmap)); + f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; write_unlock(&free_i->segmap_lock); @@ -898,6 +993,37 @@ static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; +int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) +{ + __u64 start = range->start >> sbi->log_blocksize; + __u64 end = start + (range->len >> sbi->log_blocksize) - 1; + unsigned int start_segno, end_segno; + struct cp_control cpc; + + if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || + range->len < sbi->blocksize) + return -EINVAL; + + if (end <= MAIN_BLKADDR(sbi)) + goto out; + + /* start/end segment number in main_area */ + start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); + end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : + GET_SEGNO(sbi, end); + cpc.reason = CP_DISCARD; + cpc.trim_start = start_segno; + cpc.trim_end = end_segno; + cpc.trim_minlen = range->minlen >> sbi->log_blocksize; + cpc.trimmed = 0; + + /* do checkpoint to issue discard commands safely */ + write_checkpoint(sbi, &cpc); +out: + range->len = cpc.trimmed << sbi->log_blocksize; + return 0; +} + static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -953,15 +1079,15 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) static int __get_segment_type(struct page *page, enum page_type p_type) { - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - switch (sbi->active_logs) { + switch (F2FS_P_SB(page)->active_logs) { case 2: return __get_segment_type_2(page, p_type); case 4: return __get_segment_type_4(page, p_type); } /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); + f2fs_bug_on(F2FS_P_SB(page), + F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); return __get_segment_type_6(page, p_type); } @@ -1041,11 +1167,11 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page, void write_data_page(struct page *page, struct dnode_of_data *dn, block_t *new_blkaddr, struct f2fs_io_info *fio) { - struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; struct node_info ni; - f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); + f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); @@ -1055,9 +1181,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, void rewrite_data_page(struct page *page, block_t old_blkaddr, struct f2fs_io_info *fio) { - struct inode *inode = page->mapping->host; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); + f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); } void recover_data_page(struct f2fs_sb_info *sbi, @@ -1130,8 +1254,9 @@ out: void f2fs_wait_on_page_writeback(struct page *page, enum page_type type) { - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); if (PageWriteback(page)) { + struct f2fs_sb_info *sbi = F2FS_P_SB(page); + if (is_merged_page(sbi, page, type)) f2fs_submit_merged_bio(sbi, type, WRITE); wait_on_page_writeback(page); @@ -1400,7 +1525,7 @@ static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); + unsigned int offset = SIT_BLOCK_OFFSET(segno); block_t blk_addr = sit_i->sit_base_addr + offset; check_seg_range(sbi, segno); @@ -1426,7 +1551,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, /* get current sit block page without lock */ src_page = get_meta_page(sbi, src_off); dst_page = grab_meta_page(sbi, dst_off); - f2fs_bug_on(PageDirty(src_page)); + f2fs_bug_on(sbi, PageDirty(src_page)); src_addr = page_address(src_page); dst_addr = page_address(dst_page); @@ -1440,101 +1565,192 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, return dst_page; } -static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) +static struct sit_entry_set *grab_sit_entry_set(void) +{ + struct sit_entry_set *ses = + f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); + + ses->entry_cnt = 0; + INIT_LIST_HEAD(&ses->set_list); + return ses; +} + +static void release_sit_entry_set(struct sit_entry_set *ses) +{ + list_del(&ses->set_list); + kmem_cache_free(sit_entry_set_slab, ses); +} + +static void adjust_sit_entry_set(struct sit_entry_set *ses, + struct list_head *head) +{ + struct sit_entry_set *next = ses; + + if (list_is_last(&ses->set_list, head)) + return; + + list_for_each_entry_continue(next, head, set_list) + if (ses->entry_cnt <= next->entry_cnt) + break; + + list_move_tail(&ses->set_list, &next->set_list); +} + +static void add_sit_entry(unsigned int segno, struct list_head *head) +{ + struct sit_entry_set *ses; + unsigned int start_segno = START_SEGNO(segno); + + list_for_each_entry(ses, head, set_list) { + if (ses->start_segno == start_segno) { + ses->entry_cnt++; + adjust_sit_entry_set(ses, head); + return; + } + } + + ses = grab_sit_entry_set(); + + ses->start_segno = start_segno; + ses->entry_cnt++; + list_add(&ses->set_list, head); +} + +static void add_sits_in_set(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + struct list_head *set_list = &sm_info->sit_entry_set; + unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; + unsigned int segno; + + for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) + add_sit_entry(segno, set_list); +} + +static void remove_sits_in_journal(struct f2fs_sb_info *sbi) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; int i; - /* - * If the journal area in the current summary is full of sit entries, - * all the sit entries will be flushed. Otherwise the sit entries - * are not able to replace with newly hot sit entries. - */ - if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { - for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { - unsigned int segno; - segno = le32_to_cpu(segno_in_journal(sum, i)); - __mark_sit_entry_dirty(sbi, segno); - } - update_sits_in_cursum(sum, -sits_in_cursum(sum)); - return true; + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + unsigned int segno; + bool dirtied; + + segno = le32_to_cpu(segno_in_journal(sum, i)); + dirtied = __mark_sit_entry_dirty(sbi, segno); + + if (!dirtied) + add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); } - return false; + update_sits_in_cursum(sum, -sits_in_cursum(sum)); } /* * CP calls this function, which flushes SIT entries including sit_journal, * and moves prefree segs to free segs. */ -void flush_sit_entries(struct f2fs_sb_info *sbi) +void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned long nsegs = TOTAL_SEGS(sbi); - struct page *page = NULL; - struct f2fs_sit_block *raw_sit = NULL; - unsigned int start = 0, end = 0; - unsigned int segno; - bool flushed; + struct sit_entry_set *ses, *tmp; + struct list_head *head = &SM_I(sbi)->sit_entry_set; + bool to_journal = true; + struct seg_entry *se; mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); /* - * "flushed" indicates whether sit entries in journal are flushed - * to the SIT area or not. + * add and account sit entries of dirty bitmap in sit entry + * set temporarily */ - flushed = flush_sits_in_journal(sbi); + add_sits_in_set(sbi); - for_each_set_bit(segno, bitmap, nsegs) { - struct seg_entry *se = get_seg_entry(sbi, segno); - int sit_offset, offset; + /* + * if there are no enough space in journal to store dirty sit + * entries, remove all entries from journal and add and account + * them in sit entry set. + */ + if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) + remove_sits_in_journal(sbi); - sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + if (!sit_i->dirty_sentries) + goto out; - /* add discard candidates */ - if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) - add_discard_addrs(sbi, segno, se); + /* + * there are two steps to flush sit entries: + * #1, flush sit entries to journal in current cold data summary block. + * #2, flush sit entries to sit page. + */ + list_for_each_entry_safe(ses, tmp, head, set_list) { + struct page *page; + struct f2fs_sit_block *raw_sit = NULL; + unsigned int start_segno = ses->start_segno; + unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, + (unsigned long)MAIN_SEGS(sbi)); + unsigned int segno = start_segno; - if (flushed) - goto to_sit_page; + if (to_journal && + !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) + to_journal = false; - offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); - if (offset >= 0) { - segno_in_journal(sum, offset) = cpu_to_le32(segno); - seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); - goto flush_done; - } -to_sit_page: - if (!page || (start > segno) || (segno > end)) { - if (page) { - f2fs_put_page(page, 1); - page = NULL; - } - - start = START_SEGNO(sit_i, segno); - end = start + SIT_ENTRY_PER_BLOCK - 1; - - /* read sit block that will be updated */ - page = get_next_sit_page(sbi, start); + if (!to_journal) { + page = get_next_sit_page(sbi, start_segno); raw_sit = page_address(page); } - /* udpate entry in SIT block */ - seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); -flush_done: - __clear_bit(segno, bitmap); - sit_i->dirty_sentries--; + /* flush dirty sit entries in region of current sit set */ + for_each_set_bit_from(segno, bitmap, end) { + int offset, sit_offset; + + se = get_seg_entry(sbi, segno); + + /* add discard candidates */ + if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { + cpc->trim_start = segno; + add_discard_addrs(sbi, cpc); + } + + if (to_journal) { + offset = lookup_journal_in_cursum(sum, + SIT_JOURNAL, segno, 1); + f2fs_bug_on(sbi, offset < 0); + segno_in_journal(sum, offset) = + cpu_to_le32(segno); + seg_info_to_raw_sit(se, + &sit_in_journal(sum, offset)); + } else { + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + seg_info_to_raw_sit(se, + &raw_sit->entries[sit_offset]); + } + + __clear_bit(segno, bitmap); + sit_i->dirty_sentries--; + ses->entry_cnt--; + } + + if (!to_journal) + f2fs_put_page(page, 1); + + f2fs_bug_on(sbi, ses->entry_cnt); + release_sit_entry_set(ses); + } + + f2fs_bug_on(sbi, !list_empty(head)); + f2fs_bug_on(sbi, sit_i->dirty_sentries); +out: + if (cpc->reason == CP_DISCARD) { + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) + add_discard_addrs(sbi, cpc); } mutex_unlock(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - /* writeout last modified SIT block */ - f2fs_put_page(page, 1); - set_prefree_as_free_segments(sbi); } @@ -1554,16 +1770,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); + sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry)); if (!sit_i->sentries) return -ENOMEM; - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; - for (start = 0; start < TOTAL_SEGS(sbi); start++) { + for (start = 0; start < MAIN_SEGS(sbi); start++) { sit_i->sentries[start].cur_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map @@ -1574,7 +1790,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) } if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * + sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry)); if (!sit_i->sec_entries) return -ENOMEM; @@ -1609,7 +1825,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi) static int build_free_segmap(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_info = SM_I(sbi); struct free_segmap_info *free_i; unsigned int bitmap_size, sec_bitmap_size; @@ -1620,12 +1835,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; - sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -1635,8 +1850,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) memset(free_i->free_secmap, 0xff, sec_bitmap_size); /* init free segmap information */ - free_i->start_segno = - (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); + free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); free_i->free_segments = 0; free_i->free_sections = 0; rwlock_init(&free_i->segmap_lock); @@ -1673,7 +1887,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; - int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + int nrpages = MAX_BIO_BLOCKS(sbi); do { readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); @@ -1681,7 +1895,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) start = start_blk * sit_i->sents_per_block; end = (start_blk + readed) * sit_i->sents_per_block; - for (; start < end && start < TOTAL_SEGS(sbi); start++) { + for (; start < end && start < MAIN_SEGS(sbi); start++) { struct seg_entry *se = &sit_i->sentries[start]; struct f2fs_sit_block *sit_blk; struct f2fs_sit_entry sit; @@ -1719,7 +1933,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi) unsigned int start; int type; - for (start = 0; start < TOTAL_SEGS(sbi); start++) { + for (start = 0; start < MAIN_SEGS(sbi); start++) { struct seg_entry *sentry = get_seg_entry(sbi, start); if (!sentry->valid_blocks) __set_free(sbi, start); @@ -1736,18 +1950,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); + unsigned int segno = 0, offset = 0; unsigned short valid_blocks; while (1) { /* find dirty segment based on free segmap */ - segno = find_next_inuse(free_i, total_segs, offset); - if (segno >= total_segs) + segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); + if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; valid_blocks = get_valid_blocks(sbi, segno, 0); - if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) + if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) continue; + if (valid_blocks > sbi->blocks_per_seg) { + f2fs_bug_on(sbi, 1); + continue; + } mutex_lock(&dirty_i->seglist_lock); __locate_dirty_segment(sbi, segno, DIRTY); mutex_unlock(&dirty_i->seglist_lock); @@ -1757,7 +1975,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) static int init_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) @@ -1778,7 +1996,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->dirty_info = dirty_i; mutex_init(&dirty_i->seglist_lock); - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); @@ -1802,7 +2020,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = LLONG_MAX; - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { unsigned int i; unsigned long long mtime = 0; @@ -1840,13 +2058,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; - sm_info->ipu_policy = F2FS_IPU_DISABLE; + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; + sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; INIT_LIST_HEAD(&sm_info->discard_list); sm_info->nr_discards = 0; sm_info->max_discards = 0; + INIT_LIST_HEAD(&sm_info->sit_entry_set); + if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) @@ -1942,7 +2163,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) return; if (sit_i->sentries) { - for (start = 0; start < TOTAL_SEGS(sbi); start++) { + for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); kfree(sit_i->sentries[start].ckpt_valid_map); } @@ -1976,11 +2197,30 @@ int __init create_segment_manager_caches(void) discard_entry_slab = f2fs_kmem_cache_create("discard_entry", sizeof(struct discard_entry)); if (!discard_entry_slab) - return -ENOMEM; + goto fail; + + sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", + sizeof(struct nat_entry_set)); + if (!sit_entry_set_slab) + goto destory_discard_entry; + + inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", + sizeof(struct inmem_pages)); + if (!inmem_entry_slab) + goto destroy_sit_entry_set; return 0; + +destroy_sit_entry_set: + kmem_cache_destroy(sit_entry_set_slab); +destory_discard_entry: + kmem_cache_destroy(discard_entry_slab); +fail: + return -ENOMEM; } void destroy_segment_manager_caches(void) { + kmem_cache_destroy(sit_entry_set_slab); kmem_cache_destroy(discard_entry_slab); + kmem_cache_destroy(inmem_entry_slab); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ff483257283b..2495bec1c621 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -45,16 +45,26 @@ (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ sbi->segs_per_sec)) \ -#define START_BLOCK(sbi, segno) \ - (SM_I(sbi)->seg0_blkaddr + \ +#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) +#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) + +#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) +#define MAIN_SECS(sbi) (sbi->total_sections) + +#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) + +#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) +#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ + sbi->log_blocks_per_seg)) + +#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) + #define NEXT_FREE_BLKADDR(sbi, curseg) \ (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) -#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) - -#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ - ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ @@ -77,23 +87,21 @@ #define SIT_ENTRY_OFFSET(sit_i, segno) \ (segno % sit_i->sents_per_block) -#define SIT_BLOCK_OFFSET(sit_i, segno) \ +#define SIT_BLOCK_OFFSET(segno) \ (segno / SIT_ENTRY_PER_BLOCK) -#define START_SEGNO(sit_i, segno) \ - (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(segno) \ + (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ - ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) + ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) -#define TOTAL_SECS(sbi) (sbi->total_sections) -#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ - (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) -#define SECTOR_TO_BLOCK(sbi, sectors) \ - (sectors >> (sbi)->log_sectors_per_block) -#define MAX_BIO_BLOCKS(max_hw_blocks) \ - (min((int)max_hw_blocks, BIO_MAX_PAGES)) +#define SECTOR_FROM_BLOCK(blk_addr) \ + (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) +#define SECTOR_TO_BLOCK(sectors) \ + (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) +#define MAX_BIO_BLOCKS(sbi) \ + ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES)) /* * indicate a block allocation direction: RIGHT and LEFT. @@ -167,6 +175,11 @@ struct segment_allocation { void (*allocate_segment)(struct f2fs_sb_info *, int, bool); }; +struct inmem_pages { + struct list_head list; + struct page *page; +}; + struct sit_info { const struct segment_allocation *s_ops; @@ -237,6 +250,12 @@ struct curseg_info { unsigned int next_segno; /* preallocated segment */ }; +struct sit_entry_set { + struct list_head set_list; /* link with all sit sets */ + unsigned int start_segno; /* start segno of sits in set */ + unsigned int entry_cnt; /* the # of sit entries in set */ +}; + /* * inline functions */ @@ -316,7 +335,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) clear_bit(segno, free_i->free_segmap); free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); if (next >= start_segno + sbi->segs_per_sec) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; @@ -430,8 +449,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) static inline bool need_SSR(struct f2fs_sb_info *sbi) { - return (prefree_segments(sbi) / sbi->segs_per_sec) - + free_sections(sbi) < overprovision_sections(sbi); + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + return free_sections(sbi) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi) + 1); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) @@ -466,48 +487,47 @@ static inline int utilization(struct f2fs_sb_info *sbi) * F2FS_IPU_UTIL - if FS utilization is over threashold, * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over * threashold, + * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash + * storages. IPU will be triggered only if the # of dirty + * pages over min_fsync_blocks. * F2FS_IPUT_DISABLE - disable IPU. (=default option) */ #define DEF_MIN_IPU_UTIL 70 +#define DEF_MIN_FSYNC_BLOCKS 8 enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, F2FS_IPU_UTIL, F2FS_IPU_SSR_UTIL, - F2FS_IPU_DISABLE, + F2FS_IPU_FSYNC, }; static inline bool need_inplace_update(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int policy = SM_I(sbi)->ipu_policy; /* IPU can be done only for the user data */ - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) return false; - /* this is only set during fdatasync */ - if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) + if (policy & (0x1 << F2FS_IPU_FORCE)) + return true; + if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) + return true; + if (policy & (0x1 << F2FS_IPU_UTIL) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; - switch (SM_I(sbi)->ipu_policy) { - case F2FS_IPU_FORCE: + /* this is only set during fdatasync */ + if (policy & (0x1 << F2FS_IPU_FSYNC) && + is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) return true; - case F2FS_IPU_SSR: - if (need_SSR(sbi)) - return true; - break; - case F2FS_IPU_UTIL: - if (utilization(sbi) > SM_I(sbi)->min_ipu_util) - return true; - break; - case F2FS_IPU_SSR_UTIL: - if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) - return true; - break; - case F2FS_IPU_DISABLE: - break; - } + return false; } @@ -534,18 +554,13 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) #ifdef CONFIG_F2FS_CHECK_FS static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { - unsigned int end_segno = SM_I(sbi)->segment_count - 1; - BUG_ON(segno > end_segno); + BUG_ON(segno > TOTAL_SEGS(sbi) - 1); } static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { - struct f2fs_sm_info *sm_info = SM_I(sbi); - block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; - block_t start_addr = sm_info->seg0_blkaddr; - block_t end_addr = start_addr + total_blks - 1; - BUG_ON(blk_addr < start_addr); - BUG_ON(blk_addr > end_addr); + BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); + BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); } /* @@ -554,8 +569,6 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) static inline void check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) { - struct f2fs_sm_info *sm_info = SM_I(sbi); - unsigned int end_segno = sm_info->segment_count - 1; bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; int valid_blocks = 0; int cur_pos = 0, next_pos; @@ -564,7 +577,7 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); /* check boundary of a given segment number */ - BUG_ON(segno > end_segno); + BUG_ON(segno > TOTAL_SEGS(sbi) - 1); /* check bitmap with valid block count */ do { @@ -583,16 +596,39 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); } #else -#define check_seg_range(sbi, segno) -#define verify_block_addr(sbi, blk_addr) -#define check_block_count(sbi, segno, raw_sit) +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + if (segno > TOTAL_SEGS(sbi) - 1) + sbi->need_fsck = true; +} + +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) + sbi->need_fsck = true; +} + +/* + * Summary block is always treated as an invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + /* check segment usage */ + if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) + sbi->need_fsck = true; + + /* check boundary of a given segment number */ + if (segno > TOTAL_SEGS(sbi) - 1) + sbi->need_fsck = true; +} #endif static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + unsigned int offset = SIT_BLOCK_OFFSET(start); block_t blk_addr = sit_i->sit_base_addr + offset; check_seg_range(sbi, start); @@ -619,7 +655,7 @@ static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { - unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + unsigned int block_off = SIT_BLOCK_OFFSET(start); if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) f2fs_clear_bit(block_off, sit_i->sit_bitmap); @@ -666,7 +702,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) { struct block_device *bdev = sbi->sb->s_bdev; struct request_queue *q = bdev_get_queue(bdev); - return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); + return SECTOR_TO_BLOCK(queue_max_sectors(q)); } /* @@ -683,7 +719,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) else if (type == NODE) return 3 * sbi->blocks_per_seg; else if (type == META) - return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + return MAX_BIO_BLOCKS(sbi); else return 0; } @@ -706,7 +742,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, else if (type == NODE) desired = 3 * max_hw_blocks(sbi); else - desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + desired = MAX_BIO_BLOCKS(sbi); wbc->nr_to_write = desired; return desired - nr_to_write; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41bdf511003d..41d6f700f4ee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -190,6 +190,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); @@ -204,6 +205,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(max_small_discards), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), + ATTR_LIST(min_fsync_blocks), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), @@ -366,11 +368,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ fi->vfs_inode.i_version = 1; - atomic_set(&fi->dirty_dents, 0); + atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); init_rwsem(&fi->i_sem); + INIT_LIST_HEAD(&fi->inmem_pages); + mutex_init(&fi->inmem_lock); set_inode_flag(fi, FI_NEW_INODE); @@ -432,14 +436,19 @@ static void f2fs_put_super(struct super_block *sb) stop_gc_thread(sbi); /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty) - write_checkpoint(sbi, true); + if (sbi->s_dirty) { + struct cp_control cpc = { + .reason = CP_UMOUNT, + }; + write_checkpoint(sbi, &cpc); + } /* * normally superblock is clean, so we need to release this. * In addition, EIO will skip do checkpoint, we need this as well. */ release_dirty_inode(sbi); + release_discard_addrs(sbi); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -464,8 +473,11 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { + struct cp_control cpc = { + .reason = CP_SYNC, + }; mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, false); + write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); } else { f2fs_balance_fs(sbi); @@ -616,6 +628,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) org_mount_opt = sbi->mount_opt; active_logs = sbi->active_logs; + sbi->mount_opt.opt = 0; + sbi->active_logs = NR_CURSEG_TYPE; + /* parse mount options */ err = parse_options(sb, data); if (err) @@ -786,14 +801,22 @@ static int sanity_check_raw_super(struct super_block *sb, return 1; } - if (le32_to_cpu(raw_super->log_sectorsize) != - F2FS_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); + /* Currently, support 512/1024/2048/4096 bytes sector size */ + if (le32_to_cpu(raw_super->log_sectorsize) > + F2FS_MAX_LOG_SECTOR_SIZE || + le32_to_cpu(raw_super->log_sectorsize) < + F2FS_MIN_LOG_SECTOR_SIZE) { + f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", + le32_to_cpu(raw_super->log_sectorsize)); return 1; } - if (le32_to_cpu(raw_super->log_sectors_per_block) != - F2FS_LOG_SECTORS_PER_BLOCK) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); + if (le32_to_cpu(raw_super->log_sectors_per_block) + + le32_to_cpu(raw_super->log_sectorsize) != + F2FS_MAX_LOG_SECTOR_SIZE) { + f2fs_msg(sb, KERN_INFO, + "Invalid log sectors per block(%u) log sectorsize(%u)", + le32_to_cpu(raw_super->log_sectors_per_block), + le32_to_cpu(raw_super->log_sectorsize)); return 1; } return 0; @@ -849,6 +872,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; + sbi->need_fsck = false; } /* @@ -1082,6 +1106,9 @@ try_onemore: if (err) goto free_proc; + if (!retry) + sbi->need_fsck = true; + /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { err = recover_fsync_data(sbi); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 728a5dc3dc16..deca8728117b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -266,7 +266,7 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, static void *read_all_xattrs(struct inode *inode, struct page *ipage) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; size_t size = PAGE_SIZE, inline_size = 0; void *txattr_addr; @@ -325,7 +325,7 @@ fail: static inline int write_all_xattrs(struct inode *inode, __u32 hsize, void *txattr_addr, struct page *ipage) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = 0; void *xattr_addr; struct page *xpage; @@ -373,7 +373,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); } - f2fs_bug_on(new_nid); + f2fs_bug_on(sbi, new_nid); f2fs_wait_on_page_writeback(xpage, NODE); } else { struct dnode_of_data dn; @@ -596,7 +596,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; /* this case is only from init_inode_metadata */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 08ed2b0a96e6..860313a33a43 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -15,8 +15,9 @@ #include #define F2FS_SUPER_OFFSET 1024 /* byte-size offset */ -#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */ -#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ +#define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */ +#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ +#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) @@ -85,6 +86,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 #define CP_COMPACT_SUM_FLAG 0x00000004 #define CP_ORPHAN_PRESENT_FLAG 0x00000002 diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index d06d44363fea..bbc4de9baef7 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -69,6 +69,12 @@ { GC_GREEDY, "Greedy" }, \ { GC_CB, "Cost-Benefit" }) +#define show_cpreason(type) \ + __print_symbolic(type, \ + { CP_UMOUNT, "Umount" }, \ + { CP_SYNC, "Sync" }, \ + { CP_DISCARD, "Discard" }) + struct victim_sel_policy; DECLARE_EVENT_CLASS(f2fs__inode, @@ -944,25 +950,25 @@ TRACE_EVENT(f2fs_submit_page_mbio, TRACE_EVENT(f2fs_write_checkpoint, - TP_PROTO(struct super_block *sb, bool is_umount, char *msg), + TP_PROTO(struct super_block *sb, int reason, char *msg), - TP_ARGS(sb, is_umount, msg), + TP_ARGS(sb, reason, msg), TP_STRUCT__entry( __field(dev_t, dev) - __field(bool, is_umount) + __field(int, reason) __field(char *, msg) ), TP_fast_assign( __entry->dev = sb->s_dev; - __entry->is_umount = is_umount; + __entry->reason = reason; __entry->msg = msg; ), TP_printk("dev = (%d,%d), checkpoint for %s, state = %s", show_dev(__entry), - __entry->is_umount ? "clean umount" : "consistency", + show_cpreason(__entry->reason), __entry->msg) );