mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-10 15:19:51 +00:00
6bd68ec266
We're using more stack than we'd like in a number of functions, and btree_trans is the biggest object that we stack allocate. But we have to do a heap allocatation to initialize it anyways, so there's no real downside to heap allocating the entire thing. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
792 lines
18 KiB
C
792 lines
18 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#ifndef NO_BCACHEFS_FS
|
|
|
|
#include "bcachefs.h"
|
|
#include "btree_iter.h"
|
|
#include "extents.h"
|
|
#include "fs-io.h"
|
|
#include "fs-io-pagecache.h"
|
|
#include "subvolume.h"
|
|
|
|
#include <linux/pagevec.h>
|
|
#include <linux/writeback.h>
|
|
|
|
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
|
|
loff_t start, u64 end,
|
|
int fgp_flags, gfp_t gfp,
|
|
folios *fs)
|
|
{
|
|
struct folio *f;
|
|
u64 pos = start;
|
|
int ret = 0;
|
|
|
|
while (pos < end) {
|
|
if ((u64) pos >= (u64) start + (1ULL << 20))
|
|
fgp_flags &= ~FGP_CREAT;
|
|
|
|
ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
|
|
if (ret)
|
|
break;
|
|
|
|
f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
|
|
if (IS_ERR_OR_NULL(f))
|
|
break;
|
|
|
|
BUG_ON(fs->nr && folio_pos(f) != pos);
|
|
|
|
pos = folio_end_pos(f);
|
|
darray_push(fs, f);
|
|
}
|
|
|
|
if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
|
|
ret = -ENOMEM;
|
|
|
|
return fs->nr ? 0 : ret;
|
|
}
|
|
|
|
/* pagecache_block must be held */
|
|
int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
|
|
loff_t start, loff_t end)
|
|
{
|
|
int ret;
|
|
|
|
/*
|
|
* XXX: the way this is currently implemented, we can spin if a process
|
|
* is continually redirtying a specific page
|
|
*/
|
|
do {
|
|
if (!mapping->nrpages)
|
|
return 0;
|
|
|
|
ret = filemap_write_and_wait_range(mapping, start, end);
|
|
if (ret)
|
|
break;
|
|
|
|
if (!mapping->nrpages)
|
|
return 0;
|
|
|
|
ret = invalidate_inode_pages2_range(mapping,
|
|
start >> PAGE_SHIFT,
|
|
end >> PAGE_SHIFT);
|
|
} while (ret == -EBUSY);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#if 0
|
|
/* Useful for debug tracing: */
|
|
static const char * const bch2_folio_sector_states[] = {
|
|
#define x(n) #n,
|
|
BCH_FOLIO_SECTOR_STATE()
|
|
#undef x
|
|
NULL
|
|
};
|
|
#endif
|
|
|
|
static inline enum bch_folio_sector_state
|
|
folio_sector_dirty(enum bch_folio_sector_state state)
|
|
{
|
|
switch (state) {
|
|
case SECTOR_unallocated:
|
|
return SECTOR_dirty;
|
|
case SECTOR_reserved:
|
|
return SECTOR_dirty_reserved;
|
|
default:
|
|
return state;
|
|
}
|
|
}
|
|
|
|
static inline enum bch_folio_sector_state
|
|
folio_sector_undirty(enum bch_folio_sector_state state)
|
|
{
|
|
switch (state) {
|
|
case SECTOR_dirty:
|
|
return SECTOR_unallocated;
|
|
case SECTOR_dirty_reserved:
|
|
return SECTOR_reserved;
|
|
default:
|
|
return state;
|
|
}
|
|
}
|
|
|
|
static inline enum bch_folio_sector_state
|
|
folio_sector_reserve(enum bch_folio_sector_state state)
|
|
{
|
|
switch (state) {
|
|
case SECTOR_unallocated:
|
|
return SECTOR_reserved;
|
|
case SECTOR_dirty:
|
|
return SECTOR_dirty_reserved;
|
|
default:
|
|
return state;
|
|
}
|
|
}
|
|
|
|
/* for newly allocated folios: */
|
|
struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
|
|
{
|
|
struct bch_folio *s;
|
|
|
|
s = kzalloc(sizeof(*s) +
|
|
sizeof(struct bch_folio_sector) *
|
|
folio_sectors(folio), gfp);
|
|
if (!s)
|
|
return NULL;
|
|
|
|
spin_lock_init(&s->lock);
|
|
folio_attach_private(folio, s);
|
|
return s;
|
|
}
|
|
|
|
struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
|
|
{
|
|
return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
|
|
}
|
|
|
|
static unsigned bkey_to_sector_state(struct bkey_s_c k)
|
|
{
|
|
if (bkey_extent_is_reservation(k))
|
|
return SECTOR_reserved;
|
|
if (bkey_extent_is_allocation(k.k))
|
|
return SECTOR_allocated;
|
|
return SECTOR_unallocated;
|
|
}
|
|
|
|
static void __bch2_folio_set(struct folio *folio,
|
|
unsigned pg_offset, unsigned pg_len,
|
|
unsigned nr_ptrs, unsigned state)
|
|
{
|
|
struct bch_folio *s = bch2_folio(folio);
|
|
unsigned i, sectors = folio_sectors(folio);
|
|
|
|
BUG_ON(pg_offset >= sectors);
|
|
BUG_ON(pg_offset + pg_len > sectors);
|
|
|
|
spin_lock(&s->lock);
|
|
|
|
for (i = pg_offset; i < pg_offset + pg_len; i++) {
|
|
s->s[i].nr_replicas = nr_ptrs;
|
|
bch2_folio_sector_set(folio, s, i, state);
|
|
}
|
|
|
|
if (i == sectors)
|
|
s->uptodate = true;
|
|
|
|
spin_unlock(&s->lock);
|
|
}
|
|
|
|
/*
|
|
* Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
|
|
* extents btree:
|
|
*/
|
|
int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
|
|
struct folio **fs, unsigned nr_folios)
|
|
{
|
|
struct btree_trans *trans;
|
|
struct btree_iter iter;
|
|
struct bkey_s_c k;
|
|
struct bch_folio *s;
|
|
u64 offset = folio_sector(fs[0]);
|
|
unsigned folio_idx;
|
|
u32 snapshot;
|
|
bool need_set = false;
|
|
int ret;
|
|
|
|
for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
|
|
s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
need_set |= !s->uptodate;
|
|
}
|
|
|
|
if (!need_set)
|
|
return 0;
|
|
|
|
folio_idx = 0;
|
|
trans = bch2_trans_get(c);
|
|
retry:
|
|
bch2_trans_begin(trans);
|
|
|
|
ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
|
|
if (ret)
|
|
goto err;
|
|
|
|
for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
|
|
SPOS(inum.inum, offset, snapshot),
|
|
BTREE_ITER_SLOTS, k, ret) {
|
|
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
|
|
unsigned state = bkey_to_sector_state(k);
|
|
|
|
while (folio_idx < nr_folios) {
|
|
struct folio *folio = fs[folio_idx];
|
|
u64 folio_start = folio_sector(folio);
|
|
u64 folio_end = folio_end_sector(folio);
|
|
unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
|
|
folio_start;
|
|
unsigned folio_len = min(k.k->p.offset, folio_end) -
|
|
folio_offset - folio_start;
|
|
|
|
BUG_ON(k.k->p.offset < folio_start);
|
|
BUG_ON(bkey_start_offset(k.k) > folio_end);
|
|
|
|
if (!bch2_folio(folio)->uptodate)
|
|
__bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
|
|
|
|
if (k.k->p.offset < folio_end)
|
|
break;
|
|
folio_idx++;
|
|
}
|
|
|
|
if (folio_idx == nr_folios)
|
|
break;
|
|
}
|
|
|
|
offset = iter.pos.offset;
|
|
bch2_trans_iter_exit(trans, &iter);
|
|
err:
|
|
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
|
|
goto retry;
|
|
bch2_trans_put(trans);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
|
|
{
|
|
struct bvec_iter iter;
|
|
struct folio_vec fv;
|
|
unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
|
|
? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
|
|
unsigned state = bkey_to_sector_state(k);
|
|
|
|
bio_for_each_folio(fv, bio, iter)
|
|
__bch2_folio_set(fv.fv_folio,
|
|
fv.fv_offset >> 9,
|
|
fv.fv_len >> 9,
|
|
nr_ptrs, state);
|
|
}
|
|
|
|
void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
|
|
u64 start, u64 end)
|
|
{
|
|
pgoff_t index = start >> PAGE_SECTORS_SHIFT;
|
|
pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
|
|
struct folio_batch fbatch;
|
|
unsigned i, j;
|
|
|
|
if (end <= start)
|
|
return;
|
|
|
|
folio_batch_init(&fbatch);
|
|
|
|
while (filemap_get_folios(inode->v.i_mapping,
|
|
&index, end_index, &fbatch)) {
|
|
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
|
struct folio *folio = fbatch.folios[i];
|
|
u64 folio_start = folio_sector(folio);
|
|
u64 folio_end = folio_end_sector(folio);
|
|
unsigned folio_offset = max(start, folio_start) - folio_start;
|
|
unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
|
|
struct bch_folio *s;
|
|
|
|
BUG_ON(end <= folio_start);
|
|
|
|
folio_lock(folio);
|
|
s = bch2_folio(folio);
|
|
|
|
if (s) {
|
|
spin_lock(&s->lock);
|
|
for (j = folio_offset; j < folio_offset + folio_len; j++)
|
|
s->s[j].nr_replicas = 0;
|
|
spin_unlock(&s->lock);
|
|
}
|
|
|
|
folio_unlock(folio);
|
|
}
|
|
folio_batch_release(&fbatch);
|
|
cond_resched();
|
|
}
|
|
}
|
|
|
|
void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
|
|
u64 start, u64 end)
|
|
{
|
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
|
pgoff_t index = start >> PAGE_SECTORS_SHIFT;
|
|
pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
|
|
struct folio_batch fbatch;
|
|
s64 i_sectors_delta = 0;
|
|
unsigned i, j;
|
|
|
|
if (end <= start)
|
|
return;
|
|
|
|
folio_batch_init(&fbatch);
|
|
|
|
while (filemap_get_folios(inode->v.i_mapping,
|
|
&index, end_index, &fbatch)) {
|
|
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
|
struct folio *folio = fbatch.folios[i];
|
|
u64 folio_start = folio_sector(folio);
|
|
u64 folio_end = folio_end_sector(folio);
|
|
unsigned folio_offset = max(start, folio_start) - folio_start;
|
|
unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
|
|
struct bch_folio *s;
|
|
|
|
BUG_ON(end <= folio_start);
|
|
|
|
folio_lock(folio);
|
|
s = bch2_folio(folio);
|
|
|
|
if (s) {
|
|
spin_lock(&s->lock);
|
|
for (j = folio_offset; j < folio_offset + folio_len; j++) {
|
|
i_sectors_delta -= s->s[j].state == SECTOR_dirty;
|
|
bch2_folio_sector_set(folio, s, j,
|
|
folio_sector_reserve(s->s[j].state));
|
|
}
|
|
spin_unlock(&s->lock);
|
|
}
|
|
|
|
folio_unlock(folio);
|
|
}
|
|
folio_batch_release(&fbatch);
|
|
cond_resched();
|
|
}
|
|
|
|
bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
|
|
}
|
|
|
|
static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
|
|
unsigned nr_replicas)
|
|
{
|
|
return max(0, (int) nr_replicas -
|
|
s->nr_replicas -
|
|
s->replicas_reserved);
|
|
}
|
|
|
|
int bch2_get_folio_disk_reservation(struct bch_fs *c,
|
|
struct bch_inode_info *inode,
|
|
struct folio *folio, bool check_enospc)
|
|
{
|
|
struct bch_folio *s = bch2_folio_create(folio, 0);
|
|
unsigned nr_replicas = inode_nr_replicas(c, inode);
|
|
struct disk_reservation disk_res = { 0 };
|
|
unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
|
|
int ret;
|
|
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
for (i = 0; i < sectors; i++)
|
|
disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
|
|
|
|
if (!disk_res_sectors)
|
|
return 0;
|
|
|
|
ret = bch2_disk_reservation_get(c, &disk_res,
|
|
disk_res_sectors, 1,
|
|
!check_enospc
|
|
? BCH_DISK_RESERVATION_NOFAIL
|
|
: 0);
|
|
if (unlikely(ret))
|
|
return ret;
|
|
|
|
for (i = 0; i < sectors; i++)
|
|
s->s[i].replicas_reserved +=
|
|
sectors_to_reserve(&s->s[i], nr_replicas);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void bch2_folio_reservation_put(struct bch_fs *c,
|
|
struct bch_inode_info *inode,
|
|
struct bch2_folio_reservation *res)
|
|
{
|
|
bch2_disk_reservation_put(c, &res->disk);
|
|
bch2_quota_reservation_put(c, inode, &res->quota);
|
|
}
|
|
|
|
int bch2_folio_reservation_get(struct bch_fs *c,
|
|
struct bch_inode_info *inode,
|
|
struct folio *folio,
|
|
struct bch2_folio_reservation *res,
|
|
unsigned offset, unsigned len)
|
|
{
|
|
struct bch_folio *s = bch2_folio_create(folio, 0);
|
|
unsigned i, disk_sectors = 0, quota_sectors = 0;
|
|
int ret;
|
|
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
BUG_ON(!s->uptodate);
|
|
|
|
for (i = round_down(offset, block_bytes(c)) >> 9;
|
|
i < round_up(offset + len, block_bytes(c)) >> 9;
|
|
i++) {
|
|
disk_sectors += sectors_to_reserve(&s->s[i],
|
|
res->disk.nr_replicas);
|
|
quota_sectors += s->s[i].state == SECTOR_unallocated;
|
|
}
|
|
|
|
if (disk_sectors) {
|
|
ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
|
|
if (unlikely(ret))
|
|
return ret;
|
|
}
|
|
|
|
if (quota_sectors) {
|
|
ret = bch2_quota_reservation_add(c, inode, &res->quota,
|
|
quota_sectors, true);
|
|
if (unlikely(ret)) {
|
|
struct disk_reservation tmp = {
|
|
.sectors = disk_sectors
|
|
};
|
|
|
|
bch2_disk_reservation_put(c, &tmp);
|
|
res->disk.sectors -= disk_sectors;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void bch2_clear_folio_bits(struct folio *folio)
|
|
{
|
|
struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
|
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
|
struct bch_folio *s = bch2_folio(folio);
|
|
struct disk_reservation disk_res = { 0 };
|
|
int i, sectors = folio_sectors(folio), dirty_sectors = 0;
|
|
|
|
if (!s)
|
|
return;
|
|
|
|
EBUG_ON(!folio_test_locked(folio));
|
|
EBUG_ON(folio_test_writeback(folio));
|
|
|
|
for (i = 0; i < sectors; i++) {
|
|
disk_res.sectors += s->s[i].replicas_reserved;
|
|
s->s[i].replicas_reserved = 0;
|
|
|
|
dirty_sectors -= s->s[i].state == SECTOR_dirty;
|
|
bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
|
|
}
|
|
|
|
bch2_disk_reservation_put(c, &disk_res);
|
|
|
|
bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
|
|
|
|
bch2_folio_release(folio);
|
|
}
|
|
|
|
void bch2_set_folio_dirty(struct bch_fs *c,
|
|
struct bch_inode_info *inode,
|
|
struct folio *folio,
|
|
struct bch2_folio_reservation *res,
|
|
unsigned offset, unsigned len)
|
|
{
|
|
struct bch_folio *s = bch2_folio(folio);
|
|
unsigned i, dirty_sectors = 0;
|
|
|
|
WARN_ON((u64) folio_pos(folio) + offset + len >
|
|
round_up((u64) i_size_read(&inode->v), block_bytes(c)));
|
|
|
|
BUG_ON(!s->uptodate);
|
|
|
|
spin_lock(&s->lock);
|
|
|
|
for (i = round_down(offset, block_bytes(c)) >> 9;
|
|
i < round_up(offset + len, block_bytes(c)) >> 9;
|
|
i++) {
|
|
unsigned sectors = sectors_to_reserve(&s->s[i],
|
|
res->disk.nr_replicas);
|
|
|
|
/*
|
|
* This can happen if we race with the error path in
|
|
* bch2_writepage_io_done():
|
|
*/
|
|
sectors = min_t(unsigned, sectors, res->disk.sectors);
|
|
|
|
s->s[i].replicas_reserved += sectors;
|
|
res->disk.sectors -= sectors;
|
|
|
|
dirty_sectors += s->s[i].state == SECTOR_unallocated;
|
|
|
|
bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
|
|
}
|
|
|
|
spin_unlock(&s->lock);
|
|
|
|
bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
|
|
|
|
if (!folio_test_dirty(folio))
|
|
filemap_dirty_folio(inode->v.i_mapping, folio);
|
|
}
|
|
|
|
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
|
|
{
|
|
struct file *file = vmf->vma->vm_file;
|
|
struct address_space *mapping = file->f_mapping;
|
|
struct address_space *fdm = faults_disabled_mapping();
|
|
struct bch_inode_info *inode = file_bch_inode(file);
|
|
vm_fault_t ret;
|
|
|
|
if (fdm == mapping)
|
|
return VM_FAULT_SIGBUS;
|
|
|
|
/* Lock ordering: */
|
|
if (fdm > mapping) {
|
|
struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
|
|
|
|
if (bch2_pagecache_add_tryget(inode))
|
|
goto got_lock;
|
|
|
|
bch2_pagecache_block_put(fdm_host);
|
|
|
|
bch2_pagecache_add_get(inode);
|
|
bch2_pagecache_add_put(inode);
|
|
|
|
bch2_pagecache_block_get(fdm_host);
|
|
|
|
/* Signal that lock has been dropped: */
|
|
set_fdm_dropped_locks();
|
|
return VM_FAULT_SIGBUS;
|
|
}
|
|
|
|
bch2_pagecache_add_get(inode);
|
|
got_lock:
|
|
ret = filemap_fault(vmf);
|
|
bch2_pagecache_add_put(inode);
|
|
|
|
return ret;
|
|
}
|
|
|
|
vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
|
|
{
|
|
struct folio *folio = page_folio(vmf->page);
|
|
struct file *file = vmf->vma->vm_file;
|
|
struct bch_inode_info *inode = file_bch_inode(file);
|
|
struct address_space *mapping = file->f_mapping;
|
|
struct bch_fs *c = inode->v.i_sb->s_fs_info;
|
|
struct bch2_folio_reservation res;
|
|
unsigned len;
|
|
loff_t isize;
|
|
vm_fault_t ret;
|
|
|
|
bch2_folio_reservation_init(c, inode, &res);
|
|
|
|
sb_start_pagefault(inode->v.i_sb);
|
|
file_update_time(file);
|
|
|
|
/*
|
|
* Not strictly necessary, but helps avoid dio writes livelocking in
|
|
* bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
|
|
* a bch2_write_invalidate_inode_pages_range() that works without dropping
|
|
* page lock before invalidating page
|
|
*/
|
|
bch2_pagecache_add_get(inode);
|
|
|
|
folio_lock(folio);
|
|
isize = i_size_read(&inode->v);
|
|
|
|
if (folio->mapping != mapping || folio_pos(folio) >= isize) {
|
|
folio_unlock(folio);
|
|
ret = VM_FAULT_NOPAGE;
|
|
goto out;
|
|
}
|
|
|
|
len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
|
|
|
|
if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
|
|
bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
|
|
folio_unlock(folio);
|
|
ret = VM_FAULT_SIGBUS;
|
|
goto out;
|
|
}
|
|
|
|
bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
|
|
bch2_folio_reservation_put(c, inode, &res);
|
|
|
|
folio_wait_stable(folio);
|
|
ret = VM_FAULT_LOCKED;
|
|
out:
|
|
bch2_pagecache_add_put(inode);
|
|
sb_end_pagefault(inode->v.i_sb);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
|
|
{
|
|
if (offset || length < folio_size(folio))
|
|
return;
|
|
|
|
bch2_clear_folio_bits(folio);
|
|
}
|
|
|
|
bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
|
|
{
|
|
if (folio_test_dirty(folio) || folio_test_writeback(folio))
|
|
return false;
|
|
|
|
bch2_clear_folio_bits(folio);
|
|
return true;
|
|
}
|
|
|
|
/* fseek: */
|
|
|
|
static int folio_data_offset(struct folio *folio, loff_t pos,
|
|
unsigned min_replicas)
|
|
{
|
|
struct bch_folio *s = bch2_folio(folio);
|
|
unsigned i, sectors = folio_sectors(folio);
|
|
|
|
if (s)
|
|
for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
|
|
if (s->s[i].state >= SECTOR_dirty &&
|
|
s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
|
|
return i << SECTOR_SHIFT;
|
|
|
|
return -1;
|
|
}
|
|
|
|
loff_t bch2_seek_pagecache_data(struct inode *vinode,
|
|
loff_t start_offset,
|
|
loff_t end_offset,
|
|
unsigned min_replicas,
|
|
bool nonblock)
|
|
{
|
|
struct folio_batch fbatch;
|
|
pgoff_t start_index = start_offset >> PAGE_SHIFT;
|
|
pgoff_t end_index = end_offset >> PAGE_SHIFT;
|
|
pgoff_t index = start_index;
|
|
unsigned i;
|
|
loff_t ret;
|
|
int offset;
|
|
|
|
folio_batch_init(&fbatch);
|
|
|
|
while (filemap_get_folios(vinode->i_mapping,
|
|
&index, end_index, &fbatch)) {
|
|
for (i = 0; i < folio_batch_count(&fbatch); i++) {
|
|
struct folio *folio = fbatch.folios[i];
|
|
|
|
if (!nonblock) {
|
|
folio_lock(folio);
|
|
} else if (!folio_trylock(folio)) {
|
|
folio_batch_release(&fbatch);
|
|
return -EAGAIN;
|
|
}
|
|
|
|
offset = folio_data_offset(folio,
|
|
max(folio_pos(folio), start_offset),
|
|
min_replicas);
|
|
if (offset >= 0) {
|
|
ret = clamp(folio_pos(folio) + offset,
|
|
start_offset, end_offset);
|
|
folio_unlock(folio);
|
|
folio_batch_release(&fbatch);
|
|
return ret;
|
|
}
|
|
folio_unlock(folio);
|
|
}
|
|
folio_batch_release(&fbatch);
|
|
cond_resched();
|
|
}
|
|
|
|
return end_offset;
|
|
}
|
|
|
|
/*
|
|
* Search for a hole in a folio.
|
|
*
|
|
* The filemap layer returns -ENOENT if no folio exists, so reuse the same error
|
|
* code to indicate a pagecache hole exists at the returned offset. Otherwise
|
|
* return 0 if the folio is filled with data, or an error code. This function
|
|
* can return -EAGAIN if nonblock is specified.
|
|
*/
|
|
static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
|
|
unsigned min_replicas, bool nonblock)
|
|
{
|
|
struct folio *folio;
|
|
struct bch_folio *s;
|
|
unsigned i, sectors;
|
|
int ret = -ENOENT;
|
|
|
|
folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
|
|
FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
|
|
if (IS_ERR(folio))
|
|
return PTR_ERR(folio);
|
|
|
|
s = bch2_folio(folio);
|
|
if (!s)
|
|
goto unlock;
|
|
|
|
sectors = folio_sectors(folio);
|
|
for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
|
|
if (s->s[i].state < SECTOR_dirty ||
|
|
s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
|
|
*offset = max(*offset,
|
|
folio_pos(folio) + (i << SECTOR_SHIFT));
|
|
goto unlock;
|
|
}
|
|
|
|
*offset = folio_end_pos(folio);
|
|
ret = 0;
|
|
unlock:
|
|
folio_unlock(folio);
|
|
folio_put(folio);
|
|
return ret;
|
|
}
|
|
|
|
loff_t bch2_seek_pagecache_hole(struct inode *vinode,
|
|
loff_t start_offset,
|
|
loff_t end_offset,
|
|
unsigned min_replicas,
|
|
bool nonblock)
|
|
{
|
|
struct address_space *mapping = vinode->i_mapping;
|
|
loff_t offset = start_offset;
|
|
loff_t ret = 0;
|
|
|
|
while (!ret && offset < end_offset)
|
|
ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
|
|
|
|
if (ret && ret != -ENOENT)
|
|
return ret;
|
|
return min(offset, end_offset);
|
|
}
|
|
|
|
int bch2_clamp_data_hole(struct inode *inode,
|
|
u64 *hole_start,
|
|
u64 *hole_end,
|
|
unsigned min_replicas,
|
|
bool nonblock)
|
|
{
|
|
loff_t ret;
|
|
|
|
ret = bch2_seek_pagecache_hole(inode,
|
|
*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
*hole_start = ret;
|
|
|
|
if (*hole_start == *hole_end)
|
|
return 0;
|
|
|
|
ret = bch2_seek_pagecache_data(inode,
|
|
*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
*hole_end = ret;
|
|
return 0;
|
|
}
|
|
|
|
#endif /* NO_BCACHEFS_FS */
|