From e820dbeb6ad1d66906663643302f2157347e1d8b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 10 Oct 2024 15:16:13 +1030 Subject: [PATCH] btrfs: convert btrfs_buffered_write() to use folios The buffered write path is still heavily utilizing the page interface. Since we have converted it to do a page-by-page copying, it's much easier to convert all involved functions to folio interface, this involves: - btrfs_copy_from_user() - btrfs_drop_folio() - prepare_uptodate_page() - prepare_one_page() - lock_and_cleanup_extent_if_need() - btrfs_dirty_page() All function are changed to accept a folio parameter, and if the word "page" is in the function name, change that to "folio" too. The function btrfs_dirty_page() is exported for v1 space cache, convert v1 cache call site to convert its page to folio for the new interface. And there is a small enhancement for prepare_one_folio(), instead of manually waiting for the page writeback, let __filemap_get_folio() to handle that by using FGP_WRITEBEGIN, which implies (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/file.c | 138 ++++++++++++++++-------------------- fs/btrfs/file.h | 4 +- fs/btrfs/free-space-cache.c | 4 +- 3 files changed, 66 insertions(+), 80 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0c5fc0e73c55..5e0a1805e897 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -42,7 +42,7 @@ * calls into generic code. */ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, - struct page *page, struct iov_iter *i) + struct folio *folio, struct iov_iter *i) { size_t copied = 0; size_t total_copied = 0; @@ -53,10 +53,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, /* * Copy data from userspace to the current page */ - copied = copy_page_from_iter_atomic(page, offset, count, i); + copied = copy_folio_from_iter_atomic(folio, offset, count, i); /* Flush processor's dcache for this page */ - flush_dcache_page(page); + flush_dcache_folio(folio); /* * if we get a partial write, we can end up with @@ -68,7 +68,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, * back to page at a time copies after we return 0. */ if (unlikely(copied < count)) { - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { iov_iter_revert(i, copied); copied = 0; } @@ -84,37 +84,36 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, } /* - * unlocks pages after btrfs_file_write is done with them + * Unlock folio after btrfs_file_write() is done with it. */ -static void btrfs_drop_page(struct btrfs_fs_info *fs_info, struct page *page, - u64 pos, u64 copied) +static void btrfs_drop_folio(struct btrfs_fs_info *fs_info, struct folio *folio, + u64 pos, u64 copied) { u64 block_start = round_down(pos, fs_info->sectorsize); u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start; ASSERT(block_len <= U32_MAX); /* - * Page checked is some magic around finding pages that have been - * modified without going through btrfs_set_page_dirty clear it here. + * Folio checked is some magic around finding folios that have been + * modified without going through btrfs_dirty_folio(). Clear it here. * There should be no need to mark the pages accessed as - * prepare_one_page() should have marked them accessed in - * prepare_one_page() via find_or_create_page() + * prepare_one_folio() should have marked them accessed in + * prepare_one_folio() via find_or_create_page() */ - btrfs_folio_clamp_clear_checked(fs_info, page_folio(page), block_start, - block_len); - unlock_page(page); - put_page(page); + btrfs_folio_clamp_clear_checked(fs_info, folio, block_start, block_len); + folio_unlock(folio); + folio_put(folio); } /* * After btrfs_copy_from_user(), update the following things for delalloc: - * - Mark newly dirtied pages as DELALLOC in the io tree. + * - Mark newly dirtied folio as DELALLOC in the io tree. * Used to advise which range is to be written back. - * - Mark modified pages as Uptodate/Dirty and not needing COW fixup + * - Mark modified folio as Uptodate/Dirty and not needing COW fixup * - Update inode size for past EOF write */ -int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos, - size_t write_bytes, struct extent_state **cached, bool noreserve) +int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos, + size_t write_bytes, struct extent_state **cached, bool noreserve) { struct btrfs_fs_info *fs_info = inode->root->fs_info; int ret = 0; @@ -122,7 +121,6 @@ int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos, u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; - struct folio *folio = page_folio(page); loff_t isize = i_size_read(&inode->vfs_inode); unsigned int extra_bits = 0; @@ -835,14 +833,12 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, } /* - * on error we return an unlocked page and the error value - * on success we return a locked page and 0 + * On error return an unlocked folio and the error value + * On success return a locked folio and 0 */ -static int prepare_uptodate_page(struct inode *inode, - struct page *page, u64 pos, - u64 len, bool force_uptodate) +static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 pos, + u64 len, bool force_uptodate) { - struct folio *folio = page_folio(page); u64 clamp_start = max_t(u64, pos, folio_pos(folio)); u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio)); int ret = 0; @@ -873,23 +869,13 @@ static int prepare_uptodate_page(struct inode *inode, * The private flag check is essential for subpage as we need to store * extra bitmap using folio private. */ - if (page->mapping != inode->i_mapping || !folio_test_private(folio)) { + if (folio->mapping != inode->i_mapping || !folio_test_private(folio)) { folio_unlock(folio); return -EAGAIN; } return 0; } -static fgf_t get_prepare_fgp_flags(bool nowait) -{ - fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT; - - if (nowait) - fgp_flags |= FGP_NOWAIT; - - return fgp_flags; -} - static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait) { gfp_t gfp; @@ -904,60 +890,60 @@ static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait) } /* - * this just gets page into the page cache and locks them down. + * Get folio into the page cache and lock it. */ -static noinline int prepare_one_page(struct inode *inode, struct page **page_ret, - loff_t pos, size_t write_bytes, - bool force_uptodate, bool nowait) +static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ret, + loff_t pos, size_t write_bytes, + bool force_uptodate, bool nowait) { unsigned long index = pos >> PAGE_SHIFT; gfp_t mask = get_prepare_gfp_flags(inode, nowait); - fgf_t fgp_flags = get_prepare_fgp_flags(nowait); - struct page *page; + fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN); + struct folio *folio; int ret = 0; again: - page = pagecache_get_page(inode->i_mapping, index, fgp_flags, - mask | __GFP_WRITE); - if (!page) { + folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags, mask); + if (IS_ERR(folio)) { if (nowait) ret = -EAGAIN; else - ret = -ENOMEM; + ret = PTR_ERR(folio); return ret; } - ret = set_page_extent_mapped(page); + /* Only support page sized folio yet. */ + ASSERT(folio_order(folio) == 0); + ret = set_folio_extent_mapped(folio); if (ret < 0) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return ret; } - ret = prepare_uptodate_page(inode, page, pos, write_bytes, force_uptodate); + ret = prepare_uptodate_folio(inode, folio, pos, write_bytes, force_uptodate); if (ret) { - /* The page is already unlocked. */ - put_page(page); + /* The folio is already unlocked. */ + folio_put(folio); if (!nowait && ret == -EAGAIN) { ret = 0; goto again; } return ret; } - wait_on_page_writeback(page); - *page_ret = page; + *folio_ret = folio; return 0; } /* - * This function locks the extent and properly waits for data=ordered extents - * to finish before allowing the pages to be modified if need. + * Locks the extent and properly waits for data=ordered extents to finish + * before allowing the folios to be modified if need. * - * The return value: + * Return: * 1 - the extent is locked * 0 - the extent is not locked, and everything is OK - * -EAGAIN - need re-prepare the pages + * -EAGAIN - need to prepare the folios again */ static noinline int -lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page, +lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio, loff_t pos, size_t write_bytes, u64 *lockstart, u64 *lockend, bool nowait, struct extent_state **cached_state) @@ -976,8 +962,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page, if (nowait) { if (!try_lock_extent(&inode->io_tree, start_pos, last_pos, cached_state)) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return -EAGAIN; } } else { @@ -991,8 +977,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page, ordered->file_offset <= last_pos) { unlock_extent(&inode->io_tree, start_pos, last_pos, cached_state); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); btrfs_start_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); return -EAGAIN; @@ -1006,10 +992,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page, } /* - * We should be called after prepare_one_page() which should have locked + * We should be called after prepare_one_folio() which should have locked * all pages in the range. */ - WARN_ON(!PageLocked(page)); + WARN_ON(!folio_test_locked(folio)); return ret; } @@ -1190,12 +1176,12 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) size_t copied; size_t dirty_sectors; size_t num_sectors; - struct page *page = NULL; + struct folio *folio = NULL; int extents_locked; bool force_page_uptodate = false; /* - * Fault pages before locking them in prepare_one_page() + * Fault pages before locking them in prepare_one_folio() * to avoid recursive lock */ if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) { @@ -1261,8 +1247,8 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) break; } - ret = prepare_one_page(inode, &page, pos, write_bytes, - force_page_uptodate, false); + ret = prepare_one_folio(inode, &folio, pos, write_bytes, + force_page_uptodate, false); if (ret) { btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); @@ -1270,7 +1256,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) } extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode), - page, pos, write_bytes, &lockstart, + folio, pos, write_bytes, &lockstart, &lockend, nowait, &cached_state); if (extents_locked < 0) { if (!nowait && extents_locked == -EAGAIN) @@ -1282,7 +1268,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) break; } - copied = btrfs_copy_from_user(pos, write_bytes, page, i); + copied = btrfs_copy_from_user(pos, write_bytes, folio, i); num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes); dirty_sectors = round_up(copied + sector_offset, @@ -1314,8 +1300,8 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) release_bytes = round_up(copied + sector_offset, fs_info->sectorsize); - ret = btrfs_dirty_page(BTRFS_I(inode), page, pos, copied, - &cached_state, only_release_metadata); + ret = btrfs_dirty_folio(BTRFS_I(inode), folio, pos, copied, + &cached_state, only_release_metadata); /* * If we have not locked the extent range, because the range's @@ -1332,7 +1318,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { - btrfs_drop_page(fs_info, page, pos, copied); + btrfs_drop_folio(fs_info, folio, pos, copied); break; } @@ -1340,7 +1326,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) if (only_release_metadata) btrfs_check_nocow_unlock(BTRFS_I(inode)); - btrfs_drop_page(fs_info, page, pos, copied); + btrfs_drop_folio(fs_info, folio, pos, copied); cond_resched(); diff --git a/fs/btrfs/file.h b/fs/btrfs/file.h index 69a7b78d99bb..de89e644be29 100644 --- a/fs/btrfs/file.h +++ b/fs/btrfs/file.h @@ -34,8 +34,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded); int btrfs_release_file(struct inode *inode, struct file *file); -int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos, - size_t write_bytes, struct extent_state **cached, bool noreserve); +int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos, + size_t write_bytes, struct extent_state **cached, bool noreserve); int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end); int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, size_t *write_bytes, bool nowait); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 207aff5f1957..cfa52ef40b06 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1464,8 +1464,8 @@ static int __btrfs_write_out_cache(struct inode *inode, u64 dirty_start = i * PAGE_SIZE; u64 dirty_len = min_t(u64, dirty_start + PAGE_SIZE, i_size) - dirty_start; - ret = btrfs_dirty_page(BTRFS_I(inode), io_ctl->pages[i], - dirty_start, dirty_len, &cached_state, false); + ret = btrfs_dirty_folio(BTRFS_I(inode), page_folio(io_ctl->pages[i]), + dirty_start, dirty_len, &cached_state, false); if (ret < 0) goto out_nospc; }