bcachefs fixes for 6.12-rc5

Lots of hotfixes:
 - transaction restart injection has been shaking out a few things
 
 - fix a data corruption in the buffered write path on -ENOSPC, found by
   xfstests generic/299
 
 - Some small show_options fixes
 
 - Repair mismatches in inode hash type, seed: different snapshot
   versions of an inode must have the same hash/type seed, used for
   directory entries and xattrs. We were checking the hash seed, but not
   the type, and a user contributed a filesystem where the hash type on
   one inode had somehow been flipped; these fixes allow his filesystem
   to repair.
 
   Additionally, the hash type flip made some directory entries
   invisible, which were then recreated by userspace; so the hash check
   code now checks for duplicate non dangling dirents, and renames one of
   them if necessary.
 
 - Don't use wait_event_interruptible() in recovery: this fixes some
   filesystems failing to mount with -ERESTARTSYS
 
 - Workaround for kvmalloc not supporting > INT_MAX allocations, causing
   an -ENOMEM when allocating the sorted array of journal keys: this
   allows a 75 TB filesystem to mount
 
 - Make sure bch_inode_unpacked.bi_snapshot is set in the old inode
   compat path: this alllows Marcin's filesystem (in use since before
   6.7) to repair and mount.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmcX4vYACgkQE6szbY3K
 bnbywxAArBfIJfshWq5Wk9WztenzUmyUmV2HIgntT/iN4ty4eIpZ26VSvHcGvgkU
 j3wx+OuxMTPBGc3fjUS+gALf/BGcQEgh6oPZCV+6M3kasTzNzG2jYOCkLqKbpcO1
 V5n/Le/SM1X2grkgTm/H+TulGHNgG9gJ2U4kjihroJrTbTesZhzcW/qlz6RWo7U1
 02NvLop4WE9M6WaW9RzsHK2llRUAl2Z3oRMuwNz3IIijCpm98STGD4gyvGoMV2b8
 qNsXjy7b2lkYObKI29yWF0caRzWK1LRz79afRlnNVSJb6DK1QB83ms5Qa8rprCU4
 uOq0wsGWyg6lzwQ19X+2TvUYABopVk2HXLlzTO/lJrWeMTuYJVPZ7KZi3l6ubw5T
 GIsAD5qMdCm8E5nXX8hG//0rOIl6QK288+zMQyRCvAkCL+iN2k0TU8qKAEEC44de
 vj6ZyNqbuLR39LLz9K09ZhzIZGk09ELpxOJ2Wwwj4ZFriwphWDtFgBtBUpNo/KWA
 inBfq2lZJsmNjfns9vCqOmNOStOJxXnyMOR25sTv7wM69QPGkl41dPY3oeuG8lRk
 cU/qJQKlpTKJbFeXiEKWKDnMzWxOnovqLFC0tKu2qAYM6vAz+AtwTXgthVFGh21U
 QoUDbsnQCCixMkS2AksCo7nivLrxmV/EeYm5pgeiU38VdA5ofBM=
 =OpYN
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Lots of hotfixes:

   - transaction restart injection has been shaking out a few things

   - fix a data corruption in the buffered write path on -ENOSPC, found
     by xfstests generic/299

   - Some small show_options fixes

   - Repair mismatches in inode hash type, seed: different snapshot
     versions of an inode must have the same hash/type seed, used for
     directory entries and xattrs. We were checking the hash seed, but
     not the type, and a user contributed a filesystem where the hash
     type on one inode had somehow been flipped; these fixes allow his
     filesystem to repair.

     Additionally, the hash type flip made some directory entries
     invisible, which were then recreated by userspace; so the hash
     check code now checks for duplicate non dangling dirents, and
     renames one of them if necessary.

   - Don't use wait_event_interruptible() in recovery: this fixes some
     filesystems failing to mount with -ERESTARTSYS

   - Workaround for kvmalloc not supporting > INT_MAX allocations,
     causing an -ENOMEM when allocating the sorted array of journal
     keys: this allows a 75 TB filesystem to mount

   - Make sure bch_inode_unpacked.bi_snapshot is set in the old inode
     compat path: this alllows Marcin's filesystem (in use since before
     6.7) to repair and mount"

* tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs: (26 commits)
  bcachefs: Set bch_inode_unpacked.bi_snapshot in old inode path
  bcachefs: Mark more errors as AUTOFIX
  bcachefs: Workaround for kvmalloc() not supporting > INT_MAX allocations
  bcachefs: Don't use wait_event_interruptible() in recovery
  bcachefs: Fix __bch2_fsck_err() warning
  bcachefs: fsck: Improve hash_check_key()
  bcachefs: bch2_hash_set_or_get_in_snapshot()
  bcachefs: Repair mismatches in inode hash seed, type
  bcachefs: Add hash seed, type to inode_to_text()
  bcachefs: INODE_STR_HASH() for bch_inode_unpacked
  bcachefs: Run in-kernel offline fsck without ratelimit errors
  bcachefs: skip mount option handle for empty string.
  bcachefs: fix incorrect show_options results
  bcachefs: Fix data corruption on -ENOSPC in buffered write path
  bcachefs: bch2_folio_reservation_get_partial() is now better behaved
  bcachefs: fix disk reservation accounting in bch2_folio_reservation_get()
  bcachefS: ec: fix data type on stripe deletion
  bcachefs: Don't use commit_do() unnecessarily
  bcachefs: handle restarts in bch2_bucket_io_time_reset()
  bcachefs: fix restart handling in __bch2_resume_logged_op_finsert()
  ...
This commit is contained in:
Linus Torvalds 2024-10-24 12:38:59 -07:00
commit c1e822754c
41 changed files with 471 additions and 201 deletions

View File

@ -1977,7 +1977,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
ca->mi.bucket_size,
GFP_KERNEL);
int ret = bch2_trans_do(c, NULL, NULL,
int ret = bch2_trans_commit_do(c, NULL, NULL,
BCH_WATERMARK_btree|
BCH_TRANS_COMMIT_no_enospc,
bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
@ -2137,14 +2137,15 @@ static void bch2_do_invalidates_work(struct work_struct *work)
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
goto restart_err;
if (!k.k)
break;
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
restart_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
@ -2350,24 +2351,19 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
/* Bucket IO clocks: */
int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
size_t bucket_nr, int rw)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_i_alloc_v4 *a;
u64 now;
int ret = 0;
if (bch2_trans_relock(trans))
bch2_trans_begin(trans);
a = bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
ret = PTR_ERR_OR_ZERO(a);
struct bkey_i_alloc_v4 *a =
bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
int ret = PTR_ERR_OR_ZERO(a);
if (ret)
return ret;
now = bch2_current_io_time(c, rw);
u64 now = bch2_current_io_time(c, rw);
if (a->v.io_time[rw] == now)
goto out;
@ -2380,6 +2376,15 @@ out:
return ret;
}
int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
size_t bucket_nr, int rw)
{
if (bch2_trans_relock(trans))
bch2_trans_begin(trans);
return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw));
}
/* Startup/shutdown (ro/rw): */
void bch2_recalc_capacity(struct bch_fs *c)

View File

@ -684,7 +684,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
struct bch_dev_usage usage;
struct open_bucket *ob;
bch2_trans_do(c, NULL, NULL, 0,
bch2_trans_do(c,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
data_type, cl, false, &usage)));
return ob;

View File

@ -820,12 +820,22 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
* fix that here:
*/
alloc_data_type_set(&gc, gc.data_type);
if (gc.data_type != old_gc.data_type ||
gc.dirty_sectors != old_gc.dirty_sectors) {
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
if (ret)
return ret;
/*
* Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
* we don't run it twice:
*/
percpu_down_read(&c->mark_lock);
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
gc_m->data_type = gc.data_type;
gc_m->dirty_sectors = gc.dirty_sectors;
percpu_up_read(&c->mark_lock);
}
if (fsck_err_on(new.data_type != gc.data_type,

View File

@ -1871,7 +1871,7 @@ static void btree_node_write_work(struct work_struct *work)
}
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_do(c,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|

View File

@ -912,6 +912,8 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
_ret; \
})
#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))
struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
void bch2_trans_put(struct btree_trans *);

View File

@ -668,7 +668,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
struct disk_reservation *disk_res, int flags,
enum btree_iter_update_trigger_flags iter_flags)
{
return bch2_trans_do(c, disk_res, NULL, flags,
return bch2_trans_commit_do(c, disk_res, NULL, flags,
bch2_btree_insert_trans(trans, id, k, iter_flags));
}
@ -865,7 +865,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
memcpy(l->d, buf.buf, buf.pos);
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_do(c, NULL, NULL,
ret = bch2_trans_commit_do(c, NULL, NULL,
BCH_TRANS_COMMIT_lazy_rw|commit_flags,
__bch2_trans_log_msg(trans, &buf, u64s));
}

View File

@ -192,7 +192,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_flags)))
#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
#define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \
bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
#define trans_for_each_update(_trans, _i) \

View File

@ -2239,10 +2239,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
struct async_btree_rewrite *a =
container_of(work, struct async_btree_rewrite, work);
struct bch_fs *c = a->c;
int ret;
ret = bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(trans, a));
int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a));
bch_err_fn_ratelimited(c, ret);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a);

View File

@ -1160,11 +1160,11 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c)
#define SECTORS_CACHE 1024
int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, int flags)
u64 sectors, enum bch_reservation_flags flags)
{
struct bch_fs_pcpu *pcpu;
u64 old, get;
s64 sectors_available;
u64 sectors_available;
int ret;
percpu_down_read(&c->mark_lock);
@ -1202,6 +1202,9 @@ recalculate:
percpu_u64_set(&c->pcpu->sectors_available, 0);
sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);
if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
sectors = min(sectors, sectors_available);
if (sectors <= sectors_available ||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,

View File

@ -344,14 +344,16 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
}
}
#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
enum bch_reservation_flags {
BCH_DISK_RESERVATION_NOFAIL = 1 << 0,
BCH_DISK_RESERVATION_PARTIAL = 1 << 1,
};
int __bch2_disk_reservation_add(struct bch_fs *,
struct disk_reservation *,
u64, int);
int __bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *,
u64, enum bch_reservation_flags);
static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, int flags)
u64 sectors, enum bch_reservation_flags flags)
{
#ifdef __KERNEL__
u64 old, new;

View File

@ -225,6 +225,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
opt_set(thr->opts, read_only, 1);
opt_set(thr->opts, ratelimit_errors, 0);
/* We need request_key() to be called before we punt to kthread: */
opt_set(thr->opts, nostart, true);

View File

@ -2,6 +2,7 @@
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "darray.h"
int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
@ -9,7 +10,19 @@ int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_
if (new_size > d->size) {
new_size = roundup_pow_of_two(new_size);
void *data = kvmalloc_array_noprof(new_size, element_size, gfp);
/*
* This is a workaround: kvmalloc() doesn't support > INT_MAX
* allocations, but vmalloc() does.
* The limit needs to be lifted from kvmalloc, and when it does
* we'll go back to just using that.
*/
size_t bytes;
if (unlikely(check_mul_overflow(new_size, element_size, &bytes)))
return -ENOMEM;
void *data = likely(bytes < INT_MAX)
? kvmalloc_noprof(bytes, gfp)
: vmalloc_noprof(bytes);
if (!data)
return -ENOMEM;

View File

@ -250,13 +250,6 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
return ret;
}
static void dirent_copy_target(struct bkey_i_dirent *dst,
struct bkey_s_c_dirent src)
{
dst->v.d_inum = src.v->d_inum;
dst->v.d_type = src.v->d_type;
}
int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
struct bkey_s_c_dirent d, subvol_inum *target)
{

View File

@ -34,6 +34,13 @@ static inline unsigned dirent_val_u64s(unsigned len)
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *);
static inline void dirent_copy_target(struct bkey_i_dirent *dst,
struct bkey_s_c_dirent src)
{
dst->v.d_inum = src.v->d_inum;
dst->v.d_type = src.v->d_type;
}
int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *,

View File

@ -856,8 +856,10 @@ int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
};
u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 };
int ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc));
int ret = bch2_trans_do(c, ({
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?:
(!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0);
}));
bch_err_fn(c, ret);
return ret;
}

View File

@ -266,12 +266,12 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
if (!deleting) {
a->stripe = s.k->p.offset;
a->stripe_redundancy = s.v->nr_redundant;
alloc_data_type_set(a, data_type);
} else {
a->stripe = 0;
a->stripe_redundancy = 0;
alloc_data_type_set(a, BCH_DATA_user);
}
alloc_data_type_set(a, data_type);
err:
printbuf_exit(&buf);
return ret;
@ -1186,7 +1186,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
if (!idx)
break;
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
ec_stripe_delete(trans, idx));
bch_err_fn(c, ret);
if (ret)
@ -1519,7 +1519,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
ret = bch2_trans_do(c, &s->res, NULL,
ret = bch2_trans_commit_do(c, &s->res, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc,
ec_stripe_key_update(trans,

View File

@ -251,7 +251,10 @@ int __bch2_fsck_err(struct bch_fs *c,
* delete the key)
* - and we don't need to warn if we're not prompting
*/
WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c));
WARN_ON((flags & FSCK_CAN_FIX) &&
!(flags & FSCK_AUTOFIX) &&
!trans &&
bch2_current_has_btree_trans(c));
if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent))

View File

@ -856,6 +856,12 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
folios_trunc(&fs, fi);
end = min(end, folio_end_pos(darray_last(fs)));
} else {
if (!folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
if (ret)
goto out;
}
folios_trunc(&fs, fi + 1);
end = f_pos + f_reserved;
}

View File

@ -399,14 +399,17 @@ void bch2_folio_reservation_put(struct bch_fs *c,
bch2_quota_reservation_put(c, inode, &res->quota);
}
int bch2_folio_reservation_get(struct bch_fs *c,
static int __bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
size_t offset, size_t len,
bool partial)
{
struct bch_folio *s = bch2_folio_create(folio, 0);
unsigned i, disk_sectors = 0, quota_sectors = 0;
struct disk_reservation disk_res = {};
size_t reserved = len;
int ret;
if (!s)
@ -422,23 +425,56 @@ int bch2_folio_reservation_get(struct bch_fs *c,
}
if (disk_sectors) {
ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
ret = bch2_disk_reservation_add(c, &disk_res, disk_sectors,
partial ? BCH_DISK_RESERVATION_PARTIAL : 0);
if (unlikely(ret))
return ret;
if (unlikely(disk_res.sectors != disk_sectors)) {
disk_sectors = quota_sectors = 0;
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
if (disk_sectors > disk_res.sectors) {
/*
* Make sure to get a reservation that's
* aligned to the filesystem blocksize:
*/
unsigned reserved_offset = round_down(i << 9, block_bytes(c));
reserved = clamp(reserved_offset, offset, offset + len) - offset;
if (!reserved) {
bch2_disk_reservation_put(c, &disk_res);
return -BCH_ERR_ENOSPC_disk_reservation;
}
break;
}
quota_sectors += s->s[i].state == SECTOR_unallocated;
}
}
}
if (quota_sectors) {
ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
if (unlikely(ret)) {
struct disk_reservation tmp = { .sectors = disk_sectors };
bch2_disk_reservation_put(c, &tmp);
res->disk.sectors -= disk_sectors;
bch2_disk_reservation_put(c, &disk_res);
return ret;
}
}
return 0;
res->disk.sectors += disk_res.sectors;
return partial ? reserved : 0;
}
int bch2_folio_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode,
struct folio *folio,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
{
return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, false);
}
ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
@ -447,23 +483,7 @@ ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
struct bch2_folio_reservation *res,
size_t offset, size_t len)
{
size_t l, reserved = 0;
int ret;
while ((l = len - reserved)) {
while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
return reserved ?: ret;
len = reserved + l;
l /= 2;
}
offset += l;
reserved += l;
}
return reserved;
return __bch2_folio_reservation_get(c, inode, folio, res, offset, len, true);
}
static void bch2_clear_folio_bits(struct folio *folio)

View File

@ -182,7 +182,7 @@ static int bch2_flush_inode(struct bch_fs *c,
struct bch_inode_unpacked u;
int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
bch2_journal_flush_seq(&c->journal, u.bi_journal_seq, TASK_INTERRUPTIBLE) ?:
bch2_inode_flush_nocow_writes(c, inode);
bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
return ret;

View File

@ -656,7 +656,7 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
struct bch_inode_info *inode;
bch2_trans_do(c, NULL, NULL, 0,
bch2_trans_do(c,
PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir),
&hash, &dentry->d_name)));
if (IS_ERR(inode))
@ -869,7 +869,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:
bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);
if (ret)
goto err;
goto err_tx_restart;
if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
ret = bch2_fs_quota_transfer(c, src_inode,
@ -1266,7 +1266,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(ei->v.i_ino, start), 0);
while (true) {
while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
enum btree_id data_btree = BTREE_ID_extents;
bch2_trans_begin(trans);
@ -1274,14 +1274,14 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
u32 snapshot;
ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);
if (ret)
goto err;
continue;
bch2_btree_iter_set_snapshot(&iter, snapshot);
k = bch2_btree_iter_peek_upto(&iter, end);
ret = bkey_err(k);
if (ret)
goto err;
continue;
if (!k.k)
break;
@ -1301,7 +1301,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
ret = bch2_read_indirect_extent(trans, &data_btree,
&offset_into_extent, &cur);
if (ret)
break;
continue;
k = bkey_i_to_s_c(cur.k);
bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
@ -1329,10 +1329,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
bch2_btree_iter_set_pos(&iter,
POS(iter.pos.inode, iter.pos.offset + sectors));
err:
if (ret &&
!bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
}
bch2_trans_iter_exit(trans, &iter);
@ -2040,7 +2036,7 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb,
OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE);
printbuf_nul_terminate(&buf);
seq_puts(seq, buf.buf);
seq_printf(seq, ",%s", buf.buf);
int ret = buf.allocation_failure ? -ENOMEM : 0;
printbuf_exit(&buf);

View File

@ -929,35 +929,138 @@ static int get_visible_inodes(struct btree_trans *trans,
return ret;
}
static int hash_redo_key(struct btree_trans *trans,
static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
{
if (d.v->d_type == DT_SUBVOL) {
u32 snap;
u64 inum;
int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
return !ret;
} else {
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
int ret = bkey_err(k);
if (ret)
return ret;
ret = bkey_is_inode(k.k);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
}
/*
* Prefer to delete the first one, since that will be the one at the wrong
* offset:
* return value: 0 -> delete k1, 1 -> delete k2
*/
static int hash_pick_winner(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c k)
struct bkey_s_c k1,
struct bkey_s_c k2)
{
struct bkey_i *delete;
struct bkey_i *tmp;
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
return 0;
delete = bch2_trans_kmalloc(trans, sizeof(*delete));
if (IS_ERR(delete))
return PTR_ERR(delete);
switch (desc.btree_id) {
case BTREE_ID_dirents: {
int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1));
if (ret < 0)
return ret;
if (!ret)
return 0;
tmp = bch2_bkey_make_mut_noupdate(trans, k);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2));
if (ret < 0)
return ret;
if (!ret)
return 1;
return 2;
}
default:
return 0;
}
}
bkey_init(&delete->k);
delete->k.p = k_iter->pos;
return bch2_btree_iter_traverse(k_iter) ?:
bch2_trans_update(trans, k_iter, delete, 0) ?:
bch2_hash_set_in_snapshot(trans, desc, hash_info,
(subvol_inum) { 0, k.k->p.inode },
k.k->p.snapshot, tmp,
STR_HASH_must_create|
BTREE_UPDATE_internal_snapshot_node) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
static int fsck_update_backpointers(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_i *new)
{
if (new->k.type != KEY_TYPE_dirent)
return 0;
struct bkey_i_dirent *d = bkey_i_to_dirent(new);
struct inode_walker target = inode_walker_init();
int ret = 0;
if (d->v.d_type == DT_SUBVOL) {
BUG();
} else {
ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
if (ret)
goto err;
darray_for_each(target.inodes, i) {
i->inode.bi_dir_offset = d->k.p.offset;
ret = __bch2_fsck_write_inode(trans, &i->inode);
if (ret)
goto err;
}
}
err:
inode_walker_exit(&target);
return ret;
}
static int fsck_rename_dirent(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c_dirent old)
{
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
bkey_dirent_init(&new->k_i);
dirent_copy_target(new, old);
new->k.p = old.k->p;
for (unsigned i = 0; i < 1000; i++) {
unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
old_name.len, old_name.name, i);
unsigned u64s = BKEY_U64s + dirent_val_u64s(len);
if (u64s > U8_MAX)
return -EINVAL;
new->k.u64s = u64s;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
(subvol_inum) { 0, old.k->p.inode },
old.k->p.snapshot, &new->k_i,
BTREE_UPDATE_internal_snapshot_node);
if (!bch2_err_matches(ret, EEXIST))
break;
}
if (ret)
return ret;
return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
}
static int hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct btree_iter *k_iter, struct bkey_s_c hash_k)
@ -986,16 +1089,9 @@ static int hash_check_key(struct btree_trans *trans,
if (bkey_eq(k.k->p, hash_k.k->p))
break;
if (fsck_err_on(k.k->type == desc.key_type &&
!desc.cmp_bkey(k, hash_k),
trans, hash_table_key_duplicate,
"duplicate hash table keys:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k),
buf.buf))) {
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
break;
}
if (k.k->type == desc.key_type &&
!desc.cmp_bkey(k, hash_k))
goto duplicate_entries;
if (bkey_deleted(k.k)) {
bch2_trans_iter_exit(trans, &iter);
@ -1008,18 +1104,66 @@ out:
return ret;
bad_hash:
if (fsck_err(trans, hash_table_key_wrong_offset,
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
bch_err_fn(c, ret);
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
if (IS_ERR(new))
return PTR_ERR(new);
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
(subvol_inum) { 0, hash_k.k->p.inode },
hash_k.k->p.snapshot, new,
STR_HASH_must_create|
BTREE_ITER_with_updates|
BTREE_UPDATE_internal_snapshot_node);
ret = bkey_err(k);
if (ret)
return ret;
ret = -BCH_ERR_transaction_restart_nested;
goto out;
if (k.k)
goto duplicate_entries;
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
BTREE_UPDATE_internal_snapshot_node) ?:
fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
}
fsck_err:
goto out;
duplicate_entries:
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
if (ret < 0)
goto out;
if (!fsck_err(trans, hash_table_key_duplicate,
"duplicate hash table keys%s:\n%s",
ret != 2 ? "" : ", both point to valid inodes",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k),
prt_newline(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf)))
goto out;
switch (ret) {
case 0:
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
break;
case 1:
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
break;
case 2:
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
goto out;
}
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
}
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
@ -1096,10 +1240,36 @@ fsck_err:
return ret;
}
static int get_snapshot_root_inode(struct btree_trans *trans,
struct bch_inode_unpacked *root,
u64 inum)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
SPOS(0, inum, U32_MAX),
BTREE_ITER_all_snapshots, k, ret) {
if (k.k->p.offset != inum)
break;
if (bkey_is_inode(k.k))
goto found_root;
}
if (ret)
goto err;
BUG();
found_root:
BUG_ON(bch2_inode_unpack(k, root));
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
struct bch_inode_unpacked *prev,
struct bch_inode_unpacked *snapshot_root,
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
@ -1123,16 +1293,19 @@ static int check_inode(struct btree_trans *trans,
BUG_ON(bch2_inode_unpack(k, &u));
if (prev->bi_inum != u.bi_inum)
*prev = u;
if (snapshot_root->bi_inum != u.bi_inum) {
ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum);
if (ret)
goto err;
}
if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed ||
inode_d_type(prev) != inode_d_type(&u),
if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed ||
INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root),
trans, inode_snapshot_mismatch,
"inodes in different snapshots don't match")) {
bch_err(c, "repair not implemented yet");
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err_noprint;
u.bi_hash_seed = snapshot_root->bi_hash_seed;
SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root));
do_update = true;
}
if (u.bi_dir || u.bi_dir_offset) {
@ -1285,7 +1458,7 @@ err_noprint:
int bch2_check_inodes(struct bch_fs *c)
{
struct bch_inode_unpacked prev = { 0 };
struct bch_inode_unpacked snapshot_root = {};
struct snapshots_seen s;
snapshots_seen_init(&s);
@ -1295,7 +1468,7 @@ int bch2_check_inodes(struct bch_fs *c)
POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_inode(trans, &iter, k, &prev, &s)));
check_inode(trans, &iter, k, &snapshot_root, &s)));
snapshots_seen_exit(&s);
bch_err_fn(c, ret);
@ -2307,7 +2480,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
dir->first_this_inode = false;
ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k);
ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
if (ret < 0)
goto err;
if (ret) {
@ -2421,7 +2594,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &i->inode);
inode->first_this_inode = false;
ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
bch_err_fn(c, ret);
return ret;
}
@ -2509,7 +2682,7 @@ fsck_err:
/* Get root directory, create if it doesn't exist: */
int bch2_check_root(struct bch_fs *c)
{
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_root_trans(trans));
bch_err_fn(c, ret);
return ret;

View File

@ -164,7 +164,7 @@ static noinline int bch2_inode_unpack_v1(struct bkey_s_c_inode inode,
int ret;
#define x(_name, _bits) \
if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
if (fieldnr++ == INODEv1_NR_FIELDS(inode.v)) { \
unsigned offset = offsetof(struct bch_inode_unpacked, _name);\
memset((void *) unpacked + offset, 0, \
sizeof(*unpacked) - offset); \
@ -283,6 +283,8 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
{
memset(unpacked, 0, sizeof(*unpacked));
unpacked->bi_snapshot = k.k->p.snapshot;
switch (k.k->type) {
case KEY_TYPE_inode: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
@ -293,10 +295,10 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
unpacked->bi_mode = le16_to_cpu(inode.v->bi_mode);
if (INODE_NEW_VARINT(inode.v)) {
if (INODEv1_NEW_VARINT(inode.v)) {
return bch2_inode_unpack_v2(unpacked, inode.v->fields,
bkey_val_end(inode),
INODE_NR_FIELDS(inode.v));
INODEv1_NR_FIELDS(inode.v));
} else {
return bch2_inode_unpack_v1(inode, unpacked);
}
@ -471,10 +473,10 @@ int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k,
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
int ret = 0;
bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR,
bkey_fsck_err_on(INODEv1_STR_HASH(inode.v) >= BCH_STR_HASH_NR,
c, inode_str_hash_invalid,
"invalid str hash type (%llu >= %u)",
INODE_STR_HASH(inode.v), BCH_STR_HASH_NR);
INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR);
ret = __bch2_inode_validate(c, k, flags);
fsck_err:
@ -533,6 +535,10 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
prt_printf(out, "(%x)\n", inode->bi_flags);
prt_printf(out, "journal_seq=%llu\n", inode->bi_journal_seq);
prt_printf(out, "hash_seed=%llx\n", inode->bi_hash_seed);
prt_printf(out, "hash_type=");
bch2_prt_str_hash_type(out, INODE_STR_HASH(inode));
prt_newline(out);
prt_printf(out, "bi_size=%llu\n", inode->bi_size);
prt_printf(out, "bi_sectors=%llu\n", inode->bi_sectors);
prt_printf(out, "bi_version=%llu\n", inode->bi_version);
@ -800,10 +806,8 @@ void bch2_inode_init_early(struct bch_fs *c,
memset(inode_u, 0, sizeof(*inode_u));
/* ick */
inode_u->bi_flags |= str_hash << INODE_STR_HASH_OFFSET;
get_random_bytes(&inode_u->bi_hash_seed,
sizeof(inode_u->bi_hash_seed));
SET_INODE_STR_HASH(inode_u, str_hash);
get_random_bytes(&inode_u->bi_hash_seed, sizeof(inode_u->bi_hash_seed));
}
void bch2_inode_init_late(struct bch_inode_unpacked *inode_u, u64 now,
@ -1087,8 +1091,7 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans,
int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
struct bch_inode_unpacked *inode)
{
return bch2_trans_do(c, NULL, NULL, 0,
bch2_inode_find_by_inum_trans(trans, inum, inode));
return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode));
}
int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)

View File

@ -92,6 +92,7 @@ struct bch_inode_unpacked {
BCH_INODE_FIELDS_v3()
#undef x
};
BITMASK(INODE_STR_HASH, struct bch_inode_unpacked, bi_flags, 20, 24);
struct bkey_inode_buf {
struct bkey_i_inode_v3 inode;

View File

@ -150,9 +150,9 @@ enum __bch_inode_flags {
#undef x
};
LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
LE32_BITMASK(INODE_NEW_VARINT, struct bch_inode, bi_flags, 31, 32);
LE32_BITMASK(INODEv1_STR_HASH, struct bch_inode, bi_flags, 20, 24);
LE32_BITMASK(INODEv1_NR_FIELDS, struct bch_inode, bi_flags, 24, 31);
LE32_BITMASK(INODEv1_NEW_VARINT,struct bch_inode, bi_flags, 31, 32);
LE64_BITMASK(INODEv2_STR_HASH, struct bch_inode_v2, bi_flags, 20, 24);
LE64_BITMASK(INODEv2_NR_FIELDS, struct bch_inode_v2, bi_flags, 24, 31);

View File

@ -377,7 +377,7 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
* check for missing subvolume before fpunch, as in resume we don't want
* it to be a fatal error
*/
ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors);
ret = lockrestart_do(trans, __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors));
if (ret)
return ret;

View File

@ -409,8 +409,8 @@ retry:
bch2_trans_begin(trans);
rbio->bio.bi_status = 0;
k = bch2_btree_iter_peek_slot(&iter);
if (bkey_err(k))
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
if (ret)
goto err;
bch2_bkey_buf_reassemble(&sk, c, k);
@ -557,7 +557,7 @@ out:
static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
bch2_trans_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
__bch2_rbio_narrow_crcs(trans, rbio));
}

View File

@ -1437,7 +1437,7 @@ again:
* freeing up space on specific disks, which means that
* allocations for specific disks may hang arbitrarily long:
*/
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_run(c, lockrestart_do(trans,
bch2_alloc_sectors_start_trans(trans,
op->target,
op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
@ -1447,7 +1447,7 @@ again:
op->nr_replicas_required,
op->watermark,
op->flags,
&op->cl, &wp));
&op->cl, &wp)));
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
break;

View File

@ -758,7 +758,7 @@ out:
return ret;
}
int bch2_journal_flush_seq(struct journal *j, u64 seq)
int bch2_journal_flush_seq(struct journal *j, u64 seq, unsigned task_state)
{
u64 start_time = local_clock();
int ret, ret2;
@ -769,7 +769,9 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
if (seq <= j->flushed_seq_ondisk)
return 0;
ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
ret = wait_event_state(j->wait,
(ret2 = bch2_journal_flush_seq_async(j, seq, NULL)),
task_state);
if (!ret)
bch2_time_stats_update(j->flush_seq_time, start_time);
@ -788,7 +790,7 @@ void bch2_journal_flush_async(struct journal *j, struct closure *parent)
int bch2_journal_flush(struct journal *j)
{
return bch2_journal_flush_seq(j, atomic64_read(&j->seq));
return bch2_journal_flush_seq(j, atomic64_read(&j->seq), TASK_UNINTERRUPTIBLE);
}
/*
@ -851,7 +853,7 @@ int bch2_journal_meta(struct journal *j)
bch2_journal_res_put(j, &res);
return bch2_journal_flush_seq(j, res.seq);
return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE);
}
/* block/unlock the journal: */

View File

@ -401,7 +401,7 @@ void bch2_journal_entry_res_resize(struct journal *,
int bch2_journal_flush_seq_async(struct journal *, u64, struct closure *);
void bch2_journal_flush_async(struct journal *, struct closure *);
int bch2_journal_flush_seq(struct journal *, u64);
int bch2_journal_flush_seq(struct journal *, u64, unsigned);
int bch2_journal_flush(struct journal *);
bool bch2_journal_noflush_seq(struct journal *, u64);
int bch2_journal_meta(struct journal *);

View File

@ -63,7 +63,7 @@ const char * const bch2_compression_opts[] = {
NULL
};
const char * const bch2_str_hash_types[] = {
const char * const __bch2_str_hash_types[] = {
BCH_STR_HASH_TYPES()
NULL
};
@ -115,6 +115,7 @@ PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type);
PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type);
PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type);
PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type);
PRT_STR_OPT_BOUNDSCHECKED(str_hash_type, enum bch_str_hash_type);
static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
@ -596,6 +597,9 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
copied_opts_start = copied_opts;
while ((opt = strsep(&copied_opts, ",")) != NULL) {
if (!*opt)
continue;
name = strsep(&opt, "=");
val = opt;

View File

@ -18,7 +18,7 @@ extern const char * const bch2_sb_compat[];
extern const char * const __bch2_btree_ids[];
extern const char * const bch2_csum_opts[];
extern const char * const bch2_compression_opts[];
extern const char * const bch2_str_hash_types[];
extern const char * const __bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const __bch2_data_types[];
extern const char * const bch2_member_states[];
@ -29,6 +29,7 @@ void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type);
void bch2_prt_data_type(struct printbuf *, enum bch_data_type);
void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type);
void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type);
void bch2_prt_str_hash_type(struct printbuf *, enum bch_str_hash_type);
static inline const char *bch2_d_type_str(unsigned d_type)
{

View File

@ -869,7 +869,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
bch2_set_quota_trans(trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq);

View File

@ -70,7 +70,9 @@ err:
int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum)
{
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
int ret = bch2_trans_commit_do(c, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_lazy_rw,
__bch2_set_rebalance_needs_scan(trans, inum));
rebalance_wakeup(c);
return ret;

View File

@ -1091,7 +1091,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_inode_init_early(c, &lostfound_inode);
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
bch2_create_trans(trans,
BCACHEFS_ROOT_SUBVOL_INUM,
&root_inode, &lostfound_inode,

View File

@ -267,8 +267,8 @@ enum bch_fsck_flags {
x(journal_entry_dup_same_device, 246, 0) \
x(inode_bi_subvol_missing, 247, 0) \
x(inode_bi_subvol_wrong, 248, 0) \
x(inode_points_to_missing_dirent, 249, 0) \
x(inode_points_to_wrong_dirent, 250, 0) \
x(inode_points_to_missing_dirent, 249, FSCK_AUTOFIX) \
x(inode_points_to_wrong_dirent, 250, FSCK_AUTOFIX) \
x(inode_bi_parent_nonzero, 251, 0) \
x(dirent_to_missing_parent_subvol, 252, 0) \
x(dirent_not_visible_in_parent_subvol, 253, 0) \

View File

@ -46,8 +46,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
{
/* XXX ick */
struct bch_hash_info info = {
.type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
~(~0U << INODE_STR_HASH_BITS),
.type = INODE_STR_HASH(bi),
.siphash_key = { .k0 = bi->bi_hash_seed }
};
@ -253,19 +252,20 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
}
static __always_inline
int bch2_hash_set_in_snapshot(struct btree_trans *trans,
struct bkey_s_c bch2_hash_set_or_get_in_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
subvol_inum inum, u32 snapshot,
struct bkey_i *insert,
enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter, slot = { NULL };
struct btree_iter slot = {};
struct bkey_s_c k;
bool found = false;
int ret;
for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(insert->k.p.inode,
desc.hash_bkey(info, bkey_i_to_s_c(insert)),
snapshot),
@ -280,7 +280,7 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans,
}
if (!slot.path && !(flags & STR_HASH_must_replace))
bch2_trans_copy_iter(&slot, &iter);
bch2_trans_copy_iter(&slot, iter);
if (k.k->type != KEY_TYPE_hash_whiteout)
goto not_found;
@ -290,28 +290,49 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans,
ret = -BCH_ERR_ENOSPC_str_hash_create;
out:
bch2_trans_iter_exit(trans, &slot);
bch2_trans_iter_exit(trans, &iter);
return ret;
bch2_trans_iter_exit(trans, iter);
return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
found:
found = true;
not_found:
if (!found && (flags & STR_HASH_must_replace)) {
if (found && (flags & STR_HASH_must_create)) {
bch2_trans_iter_exit(trans, &slot);
return k;
} else if (!found && (flags & STR_HASH_must_replace)) {
ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
} else if (found && (flags & STR_HASH_must_create)) {
ret = -BCH_ERR_EEXIST_str_hash_set;
} else {
if (!found && slot.path)
swap(iter, slot);
swap(*iter, slot);
insert->k.p = iter.pos;
ret = bch2_trans_update(trans, &iter, insert, flags);
insert->k.p = iter->pos;
ret = bch2_trans_update(trans, iter, insert, flags);
}
goto out;
}
static __always_inline
int bch2_hash_set_in_snapshot(struct btree_trans *trans,
const struct bch_hash_desc desc,
const struct bch_hash_info *info,
subvol_inum inum, u32 snapshot,
struct bkey_i *insert,
enum btree_iter_update_trigger_flags flags)
{
struct btree_iter iter;
struct bkey_s_c k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, info, inum,
snapshot, insert, flags);
int ret = bkey_err(k);
if (ret)
return ret;
if (k.k) {
bch2_trans_iter_exit(trans, &iter);
return -BCH_ERR_EEXIST_str_hash_set;
}
return 0;
}
static __always_inline
int bch2_hash_set(struct btree_trans *trans,
const struct bch_hash_desc desc,
@ -363,8 +384,11 @@ int bch2_hash_delete(struct btree_trans *trans,
struct btree_iter iter;
struct bkey_s_c k = bch2_hash_lookup(trans, &iter, desc, info, inum, key,
BTREE_ITER_intent);
int ret = bkey_err(k) ?:
bch2_hash_delete_at(trans, desc, info, &iter, 0);
int ret = bkey_err(k);
if (ret)
return ret;
ret = bch2_hash_delete_at(trans, desc, info, &iter, 0);
bch2_trans_iter_exit(trans, &iter);
return ret;
}

View File

@ -319,8 +319,7 @@ int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
{
return bch2_trans_do(c, NULL, NULL, 0,
bch2_subvol_is_ro_trans(trans, subvol));
return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol));
}
int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
@ -676,7 +675,7 @@ err:
/* set bi_subvol on root inode */
int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
{
int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
__bch2_fs_upgrade_for_subvolumes(trans));
bch_err_fn(c, ret);
return ret;

View File

@ -1972,7 +1972,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
};
u64 v[3] = { nbuckets - old_nbuckets, 0, 0 };
ret = bch2_trans_do(ca->fs, NULL, NULL, 0,
ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0,
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), false)) ?:
bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets);
if (ret)

View File

@ -450,7 +450,7 @@ static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start,
k.k_i.k.p.snapshot = snapid;
k.k_i.k.size = len;
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i,
BTREE_UPDATE_internal_snapshot_node));
bch_err_fn(c, ret);
@ -510,7 +510,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr)
if (ret)
return ret;
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_commit_do(c, NULL, NULL, 0,
bch2_snapshot_node_create(trans, U32_MAX,
snapids,
snapid_subvols,

View File

@ -330,7 +330,7 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler,
{
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret = bch2_trans_do(c, NULL, NULL, 0,
int ret = bch2_trans_do(c,
bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags));
if (ret < 0 && bch2_err_matches(ret, ENOENT))