bcachefs: BTREE_ITER_FILTER_SNAPSHOTS

For snapshots, we need to implement btree lookups that return the first
key that's an ancestor of the snapshot ID the lookup is being done in -
and filter out keys in unrelated snapshots. This patch adds the btree
iterator flag BTREE_ITER_FILTER_SNAPSHOTS which does that filtering.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2021-03-04 22:29:25 -05:00 committed by Kent Overstreet
parent 284ae18c1d
commit c075ff700f
4 changed files with 166 additions and 15 deletions

View File

@ -13,6 +13,7 @@
#include "extents.h"
#include "journal.h"
#include "replicas.h"
#include "subvolume.h"
#include "trace.h"
#include <linux/prefetch.h>
@ -683,6 +684,55 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
bkey_cmp(iter->pos, iter->k.p) > 0);
}
static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
{
struct btree_trans *trans = iter->trans;
struct btree_iter copy;
struct bkey_s_c prev;
int ret = 0;
if (!bch2_debug_check_iterators)
return 0;
if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
return 0;
if (bkey_err(k) || !k.k)
return 0;
BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
iter->snapshot,
k.k->p.snapshot));
bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
BTREE_ITER_ALL_SNAPSHOTS);
prev = bch2_btree_iter_prev(&copy);
if (!prev.k)
goto out;
ret = bkey_err(prev);
if (ret)
goto out;
if (!bkey_cmp(prev.k->p, k.k->p) &&
bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
prev.k->p.snapshot) > 0) {
char buf1[100], buf2[200];
bch2_bkey_to_text(&PBUF(buf1), k.k);
bch2_bkey_to_text(&PBUF(buf2), prev.k);
panic("iter snap %u\n"
"k %s\n"
"prev %s\n",
iter->snapshot,
buf1, buf2);
}
out:
bch2_trans_iter_exit(trans, &copy);
return ret;
}
#else
static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
@ -691,6 +741,7 @@ static inline void bch2_btree_path_verify(struct btree_trans *trans,
struct btree_path *path) {}
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
#endif
@ -2004,11 +2055,25 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
}
if (likely(k.k)) {
if (likely(!bkey_deleted(k.k)))
break;
/*
* We can never have a key in a leaf node at POS_MAX, so
* we don't have to check these successor() calls:
*/
if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
!bch2_snapshot_is_ancestor(trans->c,
iter->snapshot,
k.k->p.snapshot)) {
search_key = bpos_successor(k.k->p);
continue;
}
/* Advance to next key: */
search_key = bkey_successor(iter, k.k->p);
if (bkey_whiteout(k.k) &&
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
search_key = bkey_successor(iter, k.k->p);
continue;
}
break;
} else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
/* Advance to next leaf node: */
search_key = bpos_successor(iter->path->l[0].b->key.k.p);
@ -2029,6 +2094,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
iter->pos = bkey_start_pos(k.k);
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
iter->pos.snapshot = iter->snapshot;
cmp = bpos_cmp(k.k->p, iter->path->pos);
if (cmp) {
iter->path = bch2_btree_path_make_mut(trans, iter->path,
@ -2041,6 +2109,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
ret = bch2_btree_iter_verify_ret(iter, k);
if (unlikely(ret))
return bkey_s_c_err(ret);
return k;
}
@ -2064,7 +2136,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
struct bpos search_key = iter->pos;
struct btree_path *saved_path = NULL;
struct bkey_s_c k;
struct bkey saved_k;
const struct bch_val *saved_v;
int ret;
EBUG_ON(iter->path->cached || iter->path->level);
@ -2072,6 +2147,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
bch2_btree_iter_verify(iter);
bch2_btree_iter_verify_entry_exit(iter);
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
search_key.snapshot = U32_MAX;
while (1) {
iter->path = btree_path_set_pos(trans, iter->path, search_key,
iter->flags & BTREE_ITER_INTENT);
@ -2088,12 +2166,55 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
&iter->path->l[0], &iter->k);
if (!k.k ||
((iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0
: bkey_cmp(k.k->p, iter->pos) > 0))
? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
: bpos_cmp(k.k->p, search_key) > 0))
k = btree_path_level_prev(trans, iter->path,
&iter->path->l[0], &iter->k);
if (likely(k.k)) {
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
if (k.k->p.snapshot == iter->snapshot)
goto got_key;
/*
* If we have a saved candidate, and we're no
* longer at the same _key_ (not pos), return
* that candidate
*/
if (saved_path && bkey_cmp(k.k->p, saved_k.p)) {
bch2_path_put(trans, iter->path,
iter->flags & BTREE_ITER_INTENT);
iter->path = saved_path;
saved_path = NULL;
iter->k = saved_k;
k.v = saved_v;
goto got_key;
}
if (bch2_snapshot_is_ancestor(iter->trans->c,
iter->snapshot,
k.k->p.snapshot)) {
if (saved_path)
bch2_path_put(trans, saved_path,
iter->flags & BTREE_ITER_INTENT);
saved_path = btree_path_clone(trans, iter->path,
iter->flags & BTREE_ITER_INTENT);
saved_k = *k.k;
saved_v = k.v;
}
search_key = bpos_predecessor(k.k->p);
continue;
}
got_key:
if (bkey_whiteout(k.k) &&
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
search_key = bkey_predecessor(iter, k.k->p);
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
search_key.snapshot = U32_MAX;
continue;
}
break;
} else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
/* Advance to previous leaf node: */
@ -2111,7 +2232,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
/* Extents can straddle iter->pos: */
if (bkey_cmp(k.k->p, iter->pos) < 0)
iter->pos = k.k->p;
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
iter->pos.snapshot = iter->snapshot;
out:
if (saved_path)
bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
iter->path->should_be_locked = true;
bch2_btree_iter_verify_entry_exit(iter);
@ -2160,7 +2286,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (unlikely(ret))
return bkey_s_c_err(ret);
if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
if ((iter->flags & BTREE_ITER_CACHED) ||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
struct bkey_i *next_update;
next_update = btree_trans_peek_updates(iter);
@ -2209,15 +2336,18 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (bkey_cmp(iter->pos, next) < 0) {
bkey_init(&iter->k);
iter->k.p = iter->pos;
bch2_key_resize(&iter->k,
min_t(u64, KEY_SIZE_MAX,
(next.inode == iter->pos.inode
? next.offset
: KEY_OFFSET_MAX) -
iter->pos.offset));
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
bch2_key_resize(&iter->k,
min_t(u64, KEY_SIZE_MAX,
(next.inode == iter->pos.inode
? next.offset
: KEY_OFFSET_MAX) -
iter->pos.offset));
EBUG_ON(!iter->k.size);
}
k = (struct bkey_s_c) { &iter->k, NULL };
EBUG_ON(!k.k->size);
}
}
@ -2225,6 +2355,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
ret = bch2_btree_iter_verify_ret(iter, k);
if (unlikely(ret))
return bkey_s_c_err(ret);
return k;
}
@ -2392,6 +2525,13 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
if (!btree_type_has_snapshots(btree_id) &&
!(flags & __BTREE_ITER_ALL_SNAPSHOTS))
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
#if 0
/* let's have this be explicitly set: */
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_type_has_snapshots(btree_id) &&
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
#endif
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
pos.snapshot = btree_type_has_snapshots(btree_id)

View File

@ -260,6 +260,15 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
iter->pos = bkey_start_pos(&iter->k);
}
static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
{
struct bpos pos = iter->pos;
iter->snapshot = snapshot;
pos.snapshot = snapshot;
bch2_btree_iter_set_pos(iter, pos);
}
/*
* Unlocks before scheduling
* Note: does not revalidate iterator

View File

@ -372,7 +372,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos,
BTREE_ITER_SLOTS|
BTREE_ITER_INTENT);
BTREE_ITER_INTENT|
BTREE_ITER_ALL_SNAPSHOTS);
bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
BTREE_ITER_CACHED|
BTREE_ITER_CACHED_NOFILL|

View File

@ -209,6 +209,7 @@ struct btree_node_iter {
#define BTREE_ITER_WITH_UPDATES (1 << 10)
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,