mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-17 02:15:57 +00:00
bcachefs fixes for 6.9-rc6
- fix a few more deadlocks in recovery - fix u32/u64 issues in mi_btree_bitmap - btree key cache shrinker now actually frees, with more instrumentation coming so we can verify that it's working correctly more easily in the future -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmYmveYACgkQE6szbY3K bnY+kA//dtdKfPliuQlfYhUvIZSvgEy+KxjDaDmeJFVMKFYHiip/JJt7V/YU7jWW DmWGtPqo1hoGZnic7h9fstbBCRgUdIYGBInqAWPzL3wmFYe5FPE02KN5fuZ+A+Mp sn0QFML4oA0uxD7TRXfCeNx3NRwXonztletVskXCuLa0T8iTOTdOuAH0MCGow3OC mlfRZyK05f6Q0UOIzvntfBl8Tkr+yk5g0GFz54U7s+qs/zJsYiYfruXuq6AxLTy2 xVMDj3Hc6W0vggVpv68HInluubl/b7rVSy+w59GG0D3iQ9/fBdqitFLdw49ReXzi J//ctZLb3n+IM4lA7t5ev0lY7bvI2FwFNkrL4qW41E4un5eQ3ghbyOmMoz87svyg 4JW/CPGP7uKNVmfRuHn1qhgJ9/vIXkObJVl9GKZF2BylaZwjMM5YrL5MZwrKFQYy 9BMgemgvHFK+wRi74q/OUu3PyH045AoJdIKI66ypFexhGi5YeNqtRHLUKdfSrJR+ eEkAUaHcgLLfxyk+fIRvcSK+Q9j3BibvsSkU3vLSnl2B+xdvfJqb+I/yvBt/SZQW P09JceDQABRBMu9beVbVqMED+PniSZVfG2eU2jBcZ+jhbGQgmiWfMNJVKS4/0uwz PRS33P2mViVZ6PJokWyecbgGtVxKrK2ruPdcu6/W05Qi0Vv58+k= =lCNd -----END PGP SIGNATURE----- Merge tag 'bcachefs-2024-04-22' of https://evilpiepirate.org/git/bcachefs Pull bcachefs fixes from Kent Overstreet: "Nothing too crazy in this one, and it looks like (fingers crossed) the recovery and repair issues are settling down - although there's going to be a long tail there, as we've still yet to really ramp up on error injection or syzbot. - fix a few more deadlocks in recovery - fix u32/u64 issues in mi_btree_bitmap - btree key cache shrinker now actually frees, with more instrumentation coming so we can verify that it's working correctly more easily in the future" * tag 'bcachefs-2024-04-22' of https://evilpiepirate.org/git/bcachefs: bcachefs: If we run merges at a lower watermark, they must be nonblocking bcachefs: Fix inode early destruction path bcachefs: Fix deadlock in journal write path bcachefs: Tweak btree key cache shrinker so it actually frees bcachefs: bkey_cached.btree_trans_barrier_seq needs to be a ulong bcachefs: Fix missing call to bch2_fs_allocator_background_exit() bcachefs: Check for journal entries overruning end of sb clean section bcachefs: Fix bio alloc in check_extent_checksum() bcachefs: fix leak in bch2_gc_write_reflink_key bcachefs: KEY_TYPE_error is allowed for reflink bcachefs: Fix bch2_dev_btree_bitmap_marked_sectors() shift bcachefs: make sure to release last journal pin in replay bcachefs: node scan: ignore multiple nodes with same seq if interior bcachefs: Fix format specifier in validate_bset_keys() bcachefs: Fix null ptr deref in twf from BCH_IOCTL_FSCK_OFFLINE
This commit is contained in:
commit
a2c63a3f3d
@ -470,7 +470,7 @@ found:
|
||||
goto err;
|
||||
}
|
||||
|
||||
bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
|
||||
bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
|
||||
bio->bi_iter.bi_sector = p.ptr.offset;
|
||||
bch2_bio_map(bio, data_buf, bytes);
|
||||
ret = submit_bio_wait(bio);
|
||||
|
@ -1504,7 +1504,8 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_stripe)) \
|
||||
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
|
||||
BIT_ULL(KEY_TYPE_reflink_v)| \
|
||||
BIT_ULL(KEY_TYPE_indirect_inline_data)) \
|
||||
BIT_ULL(KEY_TYPE_indirect_inline_data)| \
|
||||
BIT_ULL(KEY_TYPE_error)) \
|
||||
x(subvolumes, 8, 0, \
|
||||
BIT_ULL(KEY_TYPE_subvolume)) \
|
||||
x(snapshots, 9, 0, \
|
||||
|
@ -1587,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
|
||||
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
|
||||
ret = PTR_ERR_OR_ZERO(new);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
if (!r->refcount)
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
@ -1595,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
|
||||
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
|
||||
ret = bch2_trans_update(trans, iter, new, 0);
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
|
@ -888,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
|
||||
-BCH_ERR_btree_node_read_err_fixable,
|
||||
c, NULL, b, i,
|
||||
btree_node_bkey_bad_u64s,
|
||||
"bad k->u64s %u (min %u max %lu)", k->u64s,
|
||||
"bad k->u64s %u (min %u max %zu)", k->u64s,
|
||||
bkeyp_key_u64s(&b->format, k),
|
||||
U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
|
||||
goto drop_this_key;
|
||||
|
@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
* Newest freed entries are at the end of the list - once we hit one
|
||||
* that's too new to be freed, we can bail out:
|
||||
*/
|
||||
scanned += bc->nr_freed_nonpcpu;
|
||||
|
||||
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
|
||||
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||
ck->btree_trans_barrier_seq))
|
||||
@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
bc->nr_freed_nonpcpu--;
|
||||
}
|
||||
|
||||
if (scanned >= nr)
|
||||
goto out;
|
||||
|
||||
scanned += bc->nr_freed_pcpu;
|
||||
|
||||
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
|
||||
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
|
||||
ck->btree_trans_barrier_seq))
|
||||
@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
bc->nr_freed_pcpu--;
|
||||
}
|
||||
|
||||
if (scanned >= nr)
|
||||
goto out;
|
||||
|
||||
rcu_read_lock();
|
||||
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
|
||||
if (bc->shrink_iter >= tbl->size)
|
||||
@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
|
||||
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
|
||||
ck = container_of(pos, struct bkey_cached, hash);
|
||||
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
|
||||
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
|
||||
goto next;
|
||||
|
||||
if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
|
||||
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
|
||||
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
|
||||
else if (bkey_cached_lock_for_evict(ck)) {
|
||||
goto next;
|
||||
} else if (bkey_cached_lock_for_evict(ck)) {
|
||||
bkey_cached_evict(bc, ck);
|
||||
bkey_cached_free(bc, ck);
|
||||
}
|
||||
@ -916,7 +906,6 @@ next:
|
||||
} while (scanned < nr && bc->shrink_iter != start);
|
||||
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
memalloc_nofs_restore(flags);
|
||||
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
|
||||
mutex_unlock(&bc->lock);
|
||||
|
@ -302,6 +302,8 @@ again:
|
||||
|
||||
start->max_key = bpos_predecessor(n->min_key);
|
||||
start->range_updated = true;
|
||||
} else if (n->level) {
|
||||
n->overwritten = true;
|
||||
} else {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
|
@ -321,9 +321,9 @@ struct bkey_cached {
|
||||
struct btree_bkey_cached_common c;
|
||||
|
||||
unsigned long flags;
|
||||
unsigned long btree_trans_barrier_seq;
|
||||
u16 u64s;
|
||||
bool valid;
|
||||
u32 btree_trans_barrier_seq;
|
||||
struct bkey_cached_key key;
|
||||
|
||||
struct rhash_head hash;
|
||||
|
@ -1960,7 +1960,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
|
||||
if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
|
||||
return 0;
|
||||
|
||||
if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) {
|
||||
flags &= ~BCH_WATERMARK_MASK;
|
||||
flags |= BCH_WATERMARK_btree;
|
||||
flags |= BCH_TRANS_COMMIT_journal_reclaim;
|
||||
}
|
||||
|
||||
b = trans->paths[path].l[level].b;
|
||||
|
||||
|
@ -232,13 +232,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
|
||||
/* We need request_key() to be called before we punt to kthread: */
|
||||
opt_set(thr->opts, nostart, true);
|
||||
|
||||
bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
|
||||
|
||||
thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
|
||||
|
||||
if (!IS_ERR(thr->c) &&
|
||||
thr->c->opts.errors == BCH_ON_ERROR_panic)
|
||||
thr->c->opts.errors = BCH_ON_ERROR_ro;
|
||||
|
||||
ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
|
||||
ret = __bch2_run_thread_with_stdio(&thr->thr);
|
||||
out:
|
||||
darray_for_each(devs, i)
|
||||
kfree(*i);
|
||||
|
@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
|
||||
BUG_ON(!old);
|
||||
|
||||
if (unlikely(old != inode)) {
|
||||
discard_new_inode(&inode->v);
|
||||
__destroy_inode(&inode->v);
|
||||
kmem_cache_free(bch2_inode_cache, inode);
|
||||
inode = old;
|
||||
} else {
|
||||
mutex_lock(&c->vfs_inodes_lock);
|
||||
@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
|
||||
|
||||
if (unlikely(!inode)) {
|
||||
int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
|
||||
if (ret && inode)
|
||||
discard_new_inode(&inode->v);
|
||||
if (ret && inode) {
|
||||
__destroy_inode(&inode->v);
|
||||
kmem_cache_free(bch2_inode_cache, inode);
|
||||
}
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
@ -1723,7 +1723,7 @@ static void journal_write_endio(struct bio *bio)
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
}
|
||||
|
||||
static CLOSURE_CALLBACK(do_journal_write)
|
||||
static CLOSURE_CALLBACK(journal_write_submit)
|
||||
{
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
@ -1768,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write)
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
}
|
||||
|
||||
static CLOSURE_CALLBACK(journal_write_preflush)
|
||||
{
|
||||
closure_type(w, struct journal_buf, io);
|
||||
struct journal *j = container_of(w, struct journal, buf[w->idx]);
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
|
||||
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
|
||||
spin_lock(&j->lock);
|
||||
closure_wait(&j->async_wait, cl);
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
continue_at(cl, journal_write_preflush, j->wq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (w->separate_flush) {
|
||||
for_each_rw_member(c, ca) {
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bio *bio = &ja->bio[w->idx]->bio;
|
||||
bio_reset(bio, ca->disk_sb.bdev,
|
||||
REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
|
||||
bio->bi_end_io = journal_write_endio;
|
||||
bio->bi_private = ca;
|
||||
closure_bio_submit(bio, cl);
|
||||
}
|
||||
|
||||
continue_at(cl, journal_write_submit, j->wq);
|
||||
} else {
|
||||
/*
|
||||
* no need to punt to another work item if we're not waiting on
|
||||
* preflushes
|
||||
*/
|
||||
journal_write_submit(&cl->work);
|
||||
}
|
||||
}
|
||||
|
||||
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
@ -2033,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
|
||||
goto err;
|
||||
|
||||
if (!JSET_NO_FLUSH(w->data))
|
||||
closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
|
||||
|
||||
if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
|
||||
for_each_rw_member(c, ca) {
|
||||
percpu_ref_get(&ca->io_ref);
|
||||
|
||||
struct journal_device *ja = &ca->journal;
|
||||
struct bio *bio = &ja->bio[w->idx]->bio;
|
||||
bio_reset(bio, ca->disk_sb.bdev,
|
||||
REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
|
||||
bio->bi_end_io = journal_write_endio;
|
||||
bio->bi_private = ca;
|
||||
closure_bio_submit(bio, cl);
|
||||
}
|
||||
}
|
||||
|
||||
continue_at(cl, do_journal_write, j->wq);
|
||||
continue_at(cl, journal_write_preflush, j->wq);
|
||||
else
|
||||
continue_at(cl, journal_write_submit, j->wq);
|
||||
return;
|
||||
no_io:
|
||||
continue_at(cl, journal_write_done, j->wq);
|
||||
|
@ -249,7 +249,10 @@ int bch2_journal_replay(struct bch_fs *c)
|
||||
|
||||
struct journal_key *k = *kp;
|
||||
|
||||
if (k->journal_seq)
|
||||
replay_now_at(j, k->journal_seq);
|
||||
else
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
|
||||
ret = commit_do(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc|
|
||||
|
@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle
|
||||
for (entry = clean->start;
|
||||
entry < (struct jset_entry *) vstruct_end(&clean->field);
|
||||
entry = vstruct_next(entry)) {
|
||||
if (vstruct_end(entry) > vstruct_end(&clean->field)) {
|
||||
bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu",
|
||||
le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s),
|
||||
(u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field));
|
||||
bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun);
|
||||
return -BCH_ERR_fsck_repair_unimplemented;
|
||||
}
|
||||
|
||||
ret = bch2_journal_entry_validate(c, NULL, entry,
|
||||
le16_to_cpu(c->disk_sb.sb->version),
|
||||
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
|
||||
|
@ -271,7 +271,8 @@
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263) \
|
||||
x(snapshot_node_missing, 264) \
|
||||
x(dup_backpointer_to_bad_csum_extent, 265) \
|
||||
x(btree_bitmap_not_marked, 266)
|
||||
x(btree_bitmap_not_marked, 266) \
|
||||
x(sb_clean_entry_overrun, 267)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -463,8 +463,8 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
|
||||
m->btree_bitmap_shift += resize;
|
||||
}
|
||||
|
||||
for (unsigned bit = sectors >> m->btree_bitmap_shift;
|
||||
bit << m->btree_bitmap_shift < end;
|
||||
for (unsigned bit = start >> m->btree_bitmap_shift;
|
||||
(u64) bit << m->btree_bitmap_shift < end;
|
||||
bit++)
|
||||
bitmap |= BIT_ULL(bit);
|
||||
|
||||
|
@ -235,11 +235,11 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64
|
||||
{
|
||||
u64 end = start + sectors;
|
||||
|
||||
if (end > 64 << ca->mi.btree_bitmap_shift)
|
||||
if (end > 64ULL << ca->mi.btree_bitmap_shift)
|
||||
return false;
|
||||
|
||||
for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift;
|
||||
bit << ca->mi.btree_bitmap_shift < end;
|
||||
for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
|
||||
(u64) bit << ca->mi.btree_bitmap_shift < end;
|
||||
bit++)
|
||||
if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
|
||||
return false;
|
||||
|
@ -544,6 +544,7 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
|
||||
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
|
||||
bch2_free_pending_node_rewrites(c);
|
||||
bch2_fs_allocator_background_exit(c);
|
||||
bch2_fs_sb_errors_exit(c);
|
||||
bch2_fs_counters_exit(c);
|
||||
bch2_fs_snapshots_exit(c);
|
||||
|
@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
|
||||
void bch2_thread_with_stdio_init(struct thread_with_stdio *thr,
|
||||
const struct thread_with_stdio_ops *ops)
|
||||
{
|
||||
stdio_buf_init(&thr->stdio.input);
|
||||
stdio_buf_init(&thr->stdio.output);
|
||||
thr->ops = ops;
|
||||
}
|
||||
|
||||
int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr)
|
||||
{
|
||||
return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn);
|
||||
}
|
||||
|
||||
int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
|
||||
const struct thread_with_stdio_ops *ops)
|
||||
{
|
||||
bch2_thread_with_stdio_init(thr, ops);
|
||||
|
||||
return __bch2_run_thread_with_stdio(thr);
|
||||
}
|
||||
|
||||
int bch2_run_thread_with_stdout(struct thread_with_stdio *thr,
|
||||
const struct thread_with_stdio_ops *ops)
|
||||
{
|
||||
|
@ -63,6 +63,9 @@ struct thread_with_stdio {
|
||||
const struct thread_with_stdio_ops *ops;
|
||||
};
|
||||
|
||||
void bch2_thread_with_stdio_init(struct thread_with_stdio *,
|
||||
const struct thread_with_stdio_ops *);
|
||||
int __bch2_run_thread_with_stdio(struct thread_with_stdio *);
|
||||
int bch2_run_thread_with_stdio(struct thread_with_stdio *,
|
||||
const struct thread_with_stdio_ops *);
|
||||
int bch2_run_thread_with_stdout(struct thread_with_stdio *,
|
||||
|
Loading…
x
Reference in New Issue
Block a user