mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
bcachefs: Use KEY_TYPE_deleted whitouts for extents
Previously, partial overwrites of existing extents were handled implicitly by the btree code; when reading in a btree node, we'd do a mergesort of the different bsets and detect and fix partially overlapping extents during that mergesort. That approach won't work with snapshots: this changes extents to work like regular keys as far as the btree code is concerned, where a 0 size KEY_TYPE_deleted whiteout will completely overwrite an existing extent. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
1c3ff72c0f
commit
bcd6f3e06f
@ -1286,6 +1286,7 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
|
|||||||
* reflink: gates KEY_TYPE_reflink
|
* reflink: gates KEY_TYPE_reflink
|
||||||
* inline_data: gates KEY_TYPE_inline_data
|
* inline_data: gates KEY_TYPE_inline_data
|
||||||
* new_siphash: gates BCH_STR_HASH_SIPHASH
|
* new_siphash: gates BCH_STR_HASH_SIPHASH
|
||||||
|
* new_extent_overwrite: gates BTREE_NODE_NEW_EXTENT_OVERWRITE
|
||||||
*/
|
*/
|
||||||
#define BCH_SB_FEATURES() \
|
#define BCH_SB_FEATURES() \
|
||||||
x(lz4, 0) \
|
x(lz4, 0) \
|
||||||
@ -1296,7 +1297,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
|
|||||||
x(journal_seq_blacklist_v3, 5) \
|
x(journal_seq_blacklist_v3, 5) \
|
||||||
x(reflink, 6) \
|
x(reflink, 6) \
|
||||||
x(new_siphash, 7) \
|
x(new_siphash, 7) \
|
||||||
x(inline_data, 8)
|
x(inline_data, 8) \
|
||||||
|
x(new_extent_overwrite, 9)
|
||||||
|
|
||||||
enum bch_sb_feature {
|
enum bch_sb_feature {
|
||||||
#define x(f, n) BCH_FEATURE_##f,
|
#define x(f, n) BCH_FEATURE_##f,
|
||||||
@ -1620,7 +1622,9 @@ struct btree_node {
|
|||||||
|
|
||||||
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
|
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
|
||||||
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
|
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
|
||||||
/* 8-32 unused */
|
LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
|
||||||
|
struct btree_node, flags, 8, 9);
|
||||||
|
/* 9-32 unused */
|
||||||
LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
|
LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
|
||||||
|
|
||||||
struct btree_node_entry {
|
struct btree_node_entry {
|
||||||
|
@ -130,24 +130,6 @@ bch2_key_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
|
|||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If keys compare equal, compare by pointer order:
|
|
||||||
*
|
|
||||||
* Necessary for sort_fix_overlapping() - if there are multiple keys that
|
|
||||||
* compare equal in different sets, we have to process them newest to oldest.
|
|
||||||
*/
|
|
||||||
static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
|
|
||||||
struct bkey_packed *l,
|
|
||||||
struct bkey_packed *r)
|
|
||||||
{
|
|
||||||
struct bkey ul = bkey_unpack_key(b, l);
|
|
||||||
struct bkey ur = bkey_unpack_key(b, r);
|
|
||||||
|
|
||||||
return bkey_cmp(bkey_start_pos(&ul),
|
|
||||||
bkey_start_pos(&ur)) ?:
|
|
||||||
cmp_int((unsigned long) r, (unsigned long) l);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void extent_sort_advance_prev(struct bkey_format *f,
|
static void extent_sort_advance_prev(struct bkey_format *f,
|
||||||
struct btree_nr_keys *nr,
|
struct btree_nr_keys *nr,
|
||||||
struct bkey_packed *start,
|
struct bkey_packed *start,
|
||||||
@ -188,6 +170,141 @@ static void extent_sort_append(struct bch_fs *c,
|
|||||||
bkey_reassemble((void *) *prev, k.s_c);
|
bkey_reassemble((void *) *prev, k.s_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Sort + repack in a new format: */
|
||||||
|
struct btree_nr_keys
|
||||||
|
bch2_sort_repack(struct bset *dst, struct btree *src,
|
||||||
|
struct btree_node_iter *src_iter,
|
||||||
|
struct bkey_format *out_f,
|
||||||
|
bool filter_whiteouts)
|
||||||
|
{
|
||||||
|
struct bkey_format *in_f = &src->format;
|
||||||
|
struct bkey_packed *in, *out = vstruct_last(dst);
|
||||||
|
struct btree_nr_keys nr;
|
||||||
|
|
||||||
|
memset(&nr, 0, sizeof(nr));
|
||||||
|
|
||||||
|
while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
|
||||||
|
if (filter_whiteouts && bkey_whiteout(in))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (bch2_bkey_transform(out_f, out, bkey_packed(in)
|
||||||
|
? in_f : &bch2_bkey_format_current, in))
|
||||||
|
out->format = KEY_FORMAT_LOCAL_BTREE;
|
||||||
|
else
|
||||||
|
bch2_bkey_unpack(src, (void *) out, in);
|
||||||
|
|
||||||
|
btree_keys_account_key_add(&nr, 0, out);
|
||||||
|
out = bkey_next(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
|
||||||
|
return nr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sort, repack, and merge: */
|
||||||
|
struct btree_nr_keys
|
||||||
|
bch2_sort_repack_merge(struct bch_fs *c,
|
||||||
|
struct bset *dst, struct btree *src,
|
||||||
|
struct btree_node_iter *iter,
|
||||||
|
struct bkey_format *out_f,
|
||||||
|
bool filter_whiteouts)
|
||||||
|
{
|
||||||
|
struct bkey_packed *prev = NULL, *k_packed;
|
||||||
|
struct bkey_s k;
|
||||||
|
struct btree_nr_keys nr;
|
||||||
|
struct bkey unpacked;
|
||||||
|
|
||||||
|
memset(&nr, 0, sizeof(nr));
|
||||||
|
|
||||||
|
while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
|
||||||
|
if (filter_whiteouts && bkey_whiteout(k_packed))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
k = __bkey_disassemble(src, k_packed, &unpacked);
|
||||||
|
|
||||||
|
if (filter_whiteouts &&
|
||||||
|
bch2_bkey_normalize(c, k))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
|
||||||
|
|
||||||
|
dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
|
||||||
|
return nr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int sort_keys_cmp(struct btree *b,
|
||||||
|
struct bkey_packed *l,
|
||||||
|
struct bkey_packed *r)
|
||||||
|
{
|
||||||
|
return bkey_cmp_packed(b, l, r) ?:
|
||||||
|
(int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
|
||||||
|
(int) l->needs_whiteout - (int) r->needs_whiteout;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned bch2_sort_keys(struct bkey_packed *dst,
|
||||||
|
struct sort_iter *iter,
|
||||||
|
bool filter_whiteouts)
|
||||||
|
{
|
||||||
|
const struct bkey_format *f = &iter->b->format;
|
||||||
|
struct bkey_packed *in, *next, *out = dst;
|
||||||
|
|
||||||
|
sort_iter_sort(iter, sort_keys_cmp);
|
||||||
|
|
||||||
|
while ((in = sort_iter_next(iter, sort_keys_cmp))) {
|
||||||
|
if (bkey_whiteout(in) &&
|
||||||
|
(filter_whiteouts || !in->needs_whiteout))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (bkey_whiteout(in) &&
|
||||||
|
(next = sort_iter_peek(iter)) &&
|
||||||
|
!bkey_cmp_packed(iter->b, in, next)) {
|
||||||
|
BUG_ON(in->needs_whiteout &&
|
||||||
|
next->needs_whiteout);
|
||||||
|
/*
|
||||||
|
* XXX racy, called with read lock from write path
|
||||||
|
*
|
||||||
|
* leads to spurious BUG_ON() in bkey_unpack_key() in
|
||||||
|
* debug mode
|
||||||
|
*/
|
||||||
|
next->needs_whiteout |= in->needs_whiteout;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bkey_whiteout(in)) {
|
||||||
|
memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
|
||||||
|
set_bkeyp_val_u64s(f, out, 0);
|
||||||
|
} else {
|
||||||
|
bkey_copy(out, in);
|
||||||
|
}
|
||||||
|
out = bkey_next(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (u64 *) out - (u64 *) dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compat code for btree_node_old_extent_overwrite: */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If keys compare equal, compare by pointer order:
|
||||||
|
*
|
||||||
|
* Necessary for sort_fix_overlapping() - if there are multiple keys that
|
||||||
|
* compare equal in different sets, we have to process them newest to oldest.
|
||||||
|
*/
|
||||||
|
static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
|
||||||
|
struct bkey_packed *l,
|
||||||
|
struct bkey_packed *r)
|
||||||
|
{
|
||||||
|
struct bkey ul = bkey_unpack_key(b, l);
|
||||||
|
struct bkey ur = bkey_unpack_key(b, r);
|
||||||
|
|
||||||
|
return bkey_cmp(bkey_start_pos(&ul),
|
||||||
|
bkey_start_pos(&ur)) ?:
|
||||||
|
cmp_int((unsigned long) r, (unsigned long) l);
|
||||||
|
}
|
||||||
|
|
||||||
struct btree_nr_keys
|
struct btree_nr_keys
|
||||||
bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
|
bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
|
||||||
struct sort_iter *iter)
|
struct sort_iter *iter)
|
||||||
@ -284,121 +401,6 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
|
|||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sort + repack in a new format: */
|
|
||||||
struct btree_nr_keys
|
|
||||||
bch2_sort_repack(struct bset *dst, struct btree *src,
|
|
||||||
struct btree_node_iter *src_iter,
|
|
||||||
struct bkey_format *out_f,
|
|
||||||
bool filter_whiteouts)
|
|
||||||
{
|
|
||||||
struct bkey_format *in_f = &src->format;
|
|
||||||
struct bkey_packed *in, *out = vstruct_last(dst);
|
|
||||||
struct btree_nr_keys nr;
|
|
||||||
|
|
||||||
memset(&nr, 0, sizeof(nr));
|
|
||||||
|
|
||||||
while ((in = bch2_btree_node_iter_next_all(src_iter, src))) {
|
|
||||||
if (filter_whiteouts && bkey_whiteout(in))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (bch2_bkey_transform(out_f, out, bkey_packed(in)
|
|
||||||
? in_f : &bch2_bkey_format_current, in))
|
|
||||||
out->format = KEY_FORMAT_LOCAL_BTREE;
|
|
||||||
else
|
|
||||||
bch2_bkey_unpack(src, (void *) out, in);
|
|
||||||
|
|
||||||
btree_keys_account_key_add(&nr, 0, out);
|
|
||||||
out = bkey_next(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
|
|
||||||
return nr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Sort, repack, and merge: */
|
|
||||||
struct btree_nr_keys
|
|
||||||
bch2_sort_repack_merge(struct bch_fs *c,
|
|
||||||
struct bset *dst, struct btree *src,
|
|
||||||
struct btree_node_iter *iter,
|
|
||||||
struct bkey_format *out_f,
|
|
||||||
bool filter_whiteouts)
|
|
||||||
{
|
|
||||||
struct bkey_packed *prev = NULL, *k_packed;
|
|
||||||
struct bkey_s k;
|
|
||||||
struct btree_nr_keys nr;
|
|
||||||
struct bkey unpacked;
|
|
||||||
|
|
||||||
memset(&nr, 0, sizeof(nr));
|
|
||||||
|
|
||||||
while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
|
|
||||||
if (filter_whiteouts && bkey_whiteout(k_packed))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
k = __bkey_disassemble(src, k_packed, &unpacked);
|
|
||||||
|
|
||||||
if (filter_whiteouts &&
|
|
||||||
bch2_bkey_normalize(c, k))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
extent_sort_append(c, out_f, &nr, vstruct_last(dst), &prev, k);
|
|
||||||
}
|
|
||||||
|
|
||||||
extent_sort_advance_prev(out_f, &nr, vstruct_last(dst), &prev);
|
|
||||||
|
|
||||||
dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
|
|
||||||
return nr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int sort_keys_cmp(struct btree *b,
|
|
||||||
struct bkey_packed *l,
|
|
||||||
struct bkey_packed *r)
|
|
||||||
{
|
|
||||||
return bkey_cmp_packed(b, l, r) ?:
|
|
||||||
(int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
|
|
||||||
(int) l->needs_whiteout - (int) r->needs_whiteout;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned bch2_sort_keys(struct bkey_packed *dst,
|
|
||||||
struct sort_iter *iter,
|
|
||||||
bool filter_whiteouts)
|
|
||||||
{
|
|
||||||
const struct bkey_format *f = &iter->b->format;
|
|
||||||
struct bkey_packed *in, *next, *out = dst;
|
|
||||||
|
|
||||||
sort_iter_sort(iter, sort_keys_cmp);
|
|
||||||
|
|
||||||
while ((in = sort_iter_next(iter, sort_keys_cmp))) {
|
|
||||||
if (bkey_whiteout(in) &&
|
|
||||||
(filter_whiteouts || !in->needs_whiteout))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (bkey_whiteout(in) &&
|
|
||||||
(next = sort_iter_peek(iter)) &&
|
|
||||||
!bkey_cmp_packed(iter->b, in, next)) {
|
|
||||||
BUG_ON(in->needs_whiteout &&
|
|
||||||
next->needs_whiteout);
|
|
||||||
/*
|
|
||||||
* XXX racy, called with read lock from write path
|
|
||||||
*
|
|
||||||
* leads to spurious BUG_ON() in bkey_unpack_key() in
|
|
||||||
* debug mode
|
|
||||||
*/
|
|
||||||
next->needs_whiteout |= in->needs_whiteout;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bkey_whiteout(in)) {
|
|
||||||
memcpy_u64s(out, in, bkeyp_key_u64s(f, in));
|
|
||||||
set_bkeyp_val_u64s(f, out, 0);
|
|
||||||
} else {
|
|
||||||
bkey_copy(out, in);
|
|
||||||
}
|
|
||||||
out = bkey_next(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (u64 *) out - (u64 *) dst;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int sort_extents_cmp(struct btree *b,
|
static inline int sort_extents_cmp(struct btree *b,
|
||||||
struct bkey_packed *l,
|
struct bkey_packed *l,
|
||||||
struct bkey_packed *r)
|
struct bkey_packed *r)
|
||||||
|
@ -22,7 +22,8 @@
|
|||||||
|
|
||||||
static void verify_no_dups(struct btree *b,
|
static void verify_no_dups(struct btree *b,
|
||||||
struct bkey_packed *start,
|
struct bkey_packed *start,
|
||||||
struct bkey_packed *end)
|
struct bkey_packed *end,
|
||||||
|
bool extents)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_BCACHEFS_DEBUG
|
#ifdef CONFIG_BCACHEFS_DEBUG
|
||||||
struct bkey_packed *k, *p;
|
struct bkey_packed *k, *p;
|
||||||
@ -36,7 +37,7 @@ static void verify_no_dups(struct btree *b,
|
|||||||
struct bkey l = bkey_unpack_key(b, p);
|
struct bkey l = bkey_unpack_key(b, p);
|
||||||
struct bkey r = bkey_unpack_key(b, k);
|
struct bkey r = bkey_unpack_key(b, k);
|
||||||
|
|
||||||
BUG_ON(btree_node_is_extents(b)
|
BUG_ON(extents
|
||||||
? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
|
? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
|
||||||
: bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
|
: bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
|
||||||
//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
|
//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
|
||||||
@ -147,7 +148,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
|
|||||||
}
|
}
|
||||||
|
|
||||||
verify_no_dups(b, new_whiteouts,
|
verify_no_dups(b, new_whiteouts,
|
||||||
(void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
|
(void *) ((u64 *) new_whiteouts + b->whiteout_u64s),
|
||||||
|
btree_node_old_extent_overwrite(b));
|
||||||
|
|
||||||
memcpy_u64s(unwritten_whiteouts_start(c, b),
|
memcpy_u64s(unwritten_whiteouts_start(c, b),
|
||||||
new_whiteouts, b->whiteout_u64s);
|
new_whiteouts, b->whiteout_u64s);
|
||||||
@ -297,7 +299,8 @@ static bool bch2_compact_extent_whiteouts(struct bch_fs *c,
|
|||||||
|
|
||||||
verify_no_dups(b,
|
verify_no_dups(b,
|
||||||
unwritten_whiteouts_start(c, b),
|
unwritten_whiteouts_start(c, b),
|
||||||
unwritten_whiteouts_end(c, b));
|
unwritten_whiteouts_end(c, b),
|
||||||
|
true);
|
||||||
|
|
||||||
btree_bounce_free(c, order, used_mempool, whiteouts);
|
btree_bounce_free(c, order, used_mempool, whiteouts);
|
||||||
|
|
||||||
@ -377,7 +380,7 @@ static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode)
|
|||||||
bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
|
bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
|
||||||
enum compact_mode mode)
|
enum compact_mode mode)
|
||||||
{
|
{
|
||||||
return !btree_node_is_extents(b)
|
return !btree_node_old_extent_overwrite(b)
|
||||||
? bch2_drop_whiteouts(b, mode)
|
? bch2_drop_whiteouts(b, mode)
|
||||||
: bch2_compact_extent_whiteouts(c, b, mode);
|
: bch2_compact_extent_whiteouts(c, b, mode);
|
||||||
}
|
}
|
||||||
@ -417,10 +420,10 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
|
|||||||
|
|
||||||
start_time = local_clock();
|
start_time = local_clock();
|
||||||
|
|
||||||
if (btree_node_is_extents(b))
|
if (btree_node_old_extent_overwrite(b))
|
||||||
filter_whiteouts = bset_written(b, start_bset);
|
filter_whiteouts = bset_written(b, start_bset);
|
||||||
|
|
||||||
u64s = (btree_node_is_extents(b)
|
u64s = (btree_node_old_extent_overwrite(b)
|
||||||
? bch2_sort_extents
|
? bch2_sort_extents
|
||||||
: bch2_sort_keys)(out->keys.start,
|
: bch2_sort_keys)(out->keys.start,
|
||||||
&sort_iter,
|
&sort_iter,
|
||||||
@ -706,7 +709,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
bool have_retry)
|
bool have_retry)
|
||||||
{
|
{
|
||||||
struct bkey_packed *k, *prev = NULL;
|
struct bkey_packed *k, *prev = NULL;
|
||||||
struct bpos prev_pos = POS_MIN;
|
struct bpos prev_pos = POS_MIN;
|
||||||
|
struct bpos prev_data = POS_MIN;
|
||||||
bool seen_non_whiteout = false;
|
bool seen_non_whiteout = false;
|
||||||
unsigned version;
|
unsigned version;
|
||||||
const char *err;
|
const char *err;
|
||||||
@ -839,7 +843,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
(bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) {
|
(bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) {
|
||||||
*whiteout_u64s = k->_data - i->_data;
|
*whiteout_u64s = k->_data - i->_data;
|
||||||
seen_non_whiteout = true;
|
seen_non_whiteout = true;
|
||||||
} else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
|
} else if (bkey_cmp(prev_data, bkey_start_pos(u.k)) > 0 ||
|
||||||
|
bkey_cmp(prev_pos, u.k->p) > 0) {
|
||||||
btree_err(BTREE_ERR_FATAL, c, b, i,
|
btree_err(BTREE_ERR_FATAL, c, b, i,
|
||||||
"keys out of order: %llu:%llu > %llu:%llu",
|
"keys out of order: %llu:%llu > %llu:%llu",
|
||||||
prev_pos.inode,
|
prev_pos.inode,
|
||||||
@ -849,7 +854,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
|||||||
/* XXX: repair this */
|
/* XXX: repair this */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!bkey_deleted(u.k))
|
||||||
|
prev_data = u.k->p;
|
||||||
prev_pos = u.k->p;
|
prev_pos = u.k->p;
|
||||||
|
|
||||||
prev = k;
|
prev = k;
|
||||||
k = bkey_next_skip_noops(k, vstruct_last(i));
|
k = bkey_next_skip_noops(k, vstruct_last(i));
|
||||||
}
|
}
|
||||||
@ -908,6 +916,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||||||
|
|
||||||
bset_encrypt(c, i, b->written << 9);
|
bset_encrypt(c, i, b->written << 9);
|
||||||
|
|
||||||
|
if (btree_node_is_extents(b) &&
|
||||||
|
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data))
|
||||||
|
set_btree_node_old_extent_overwrite(b);
|
||||||
|
|
||||||
sectors = vstruct_sectors(b->data, c->block_bits);
|
sectors = vstruct_sectors(b->data, c->block_bits);
|
||||||
|
|
||||||
btree_node_set_format(b, b->data->format);
|
btree_node_set_format(b, b->data->format);
|
||||||
@ -971,7 +983,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||||||
|
|
||||||
set_btree_bset(b, b->set, &b->data->keys);
|
set_btree_bset(b, b->set, &b->data->keys);
|
||||||
|
|
||||||
b->nr = (btree_node_is_extents(b)
|
b->nr = (btree_node_old_extent_overwrite(b)
|
||||||
? bch2_extent_sort_fix_overlapping
|
? bch2_extent_sort_fix_overlapping
|
||||||
: bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
|
: bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
|
||||||
|
|
||||||
@ -1486,7 +1498,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
i->journal_seq = cpu_to_le64(seq);
|
i->journal_seq = cpu_to_le64(seq);
|
||||||
i->u64s = 0;
|
i->u64s = 0;
|
||||||
|
|
||||||
if (!btree_node_is_extents(b)) {
|
if (!btree_node_old_extent_overwrite(b)) {
|
||||||
sort_iter_add(&sort_iter,
|
sort_iter_add(&sort_iter,
|
||||||
unwritten_whiteouts_start(c, b),
|
unwritten_whiteouts_start(c, b),
|
||||||
unwritten_whiteouts_end(c, b));
|
unwritten_whiteouts_end(c, b));
|
||||||
@ -1501,7 +1513,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
|||||||
|
|
||||||
b->whiteout_u64s = 0;
|
b->whiteout_u64s = 0;
|
||||||
|
|
||||||
u64s = btree_node_is_extents(b)
|
u64s = btree_node_old_extent_overwrite(b)
|
||||||
? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
|
? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
|
||||||
: bch2_sort_keys(i->start, &sort_iter, false);
|
: bch2_sort_keys(i->start, &sort_iter, false);
|
||||||
le16_add_cpu(&i->u64s, u64s);
|
le16_add_cpu(&i->u64s, u64s);
|
||||||
|
@ -311,6 +311,7 @@ enum btree_flags {
|
|||||||
BTREE_NODE_just_written,
|
BTREE_NODE_just_written,
|
||||||
BTREE_NODE_dying,
|
BTREE_NODE_dying,
|
||||||
BTREE_NODE_fake,
|
BTREE_NODE_fake,
|
||||||
|
BTREE_NODE_old_extent_overwrite,
|
||||||
};
|
};
|
||||||
|
|
||||||
BTREE_FLAG(read_in_flight);
|
BTREE_FLAG(read_in_flight);
|
||||||
@ -324,6 +325,7 @@ BTREE_FLAG(write_in_flight);
|
|||||||
BTREE_FLAG(just_written);
|
BTREE_FLAG(just_written);
|
||||||
BTREE_FLAG(dying);
|
BTREE_FLAG(dying);
|
||||||
BTREE_FLAG(fake);
|
BTREE_FLAG(fake);
|
||||||
|
BTREE_FLAG(old_extent_overwrite);
|
||||||
|
|
||||||
static inline struct btree_write *btree_current_write(struct btree *b)
|
static inline struct btree_write *btree_current_write(struct btree *b)
|
||||||
{
|
{
|
||||||
|
@ -374,6 +374,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
|||||||
SET_BTREE_NODE_LEVEL(b->data, level);
|
SET_BTREE_NODE_LEVEL(b->data, level);
|
||||||
b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
|
b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
|
||||||
|
|
||||||
|
if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
|
||||||
|
SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
|
||||||
|
|
||||||
|
if (btree_node_is_extents(b) &&
|
||||||
|
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data))
|
||||||
|
set_btree_node_old_extent_overwrite(b);
|
||||||
|
|
||||||
bch2_btree_build_aux_trees(b);
|
bch2_btree_build_aux_trees(b);
|
||||||
|
|
||||||
btree_node_will_make_reachable(as, b);
|
btree_node_will_make_reachable(as, b);
|
||||||
|
@ -267,6 +267,8 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
|
|||||||
int old_live_u64s = b->nr.live_u64s;
|
int old_live_u64s = b->nr.live_u64s;
|
||||||
int live_u64s_added, u64s_added;
|
int live_u64s_added, u64s_added;
|
||||||
|
|
||||||
|
insert->k->k.needs_whiteout = false;
|
||||||
|
|
||||||
if (!btree_node_is_extents(b))
|
if (!btree_node_is_extents(b))
|
||||||
bch2_insert_fixup_key(trans, insert);
|
bch2_insert_fixup_key(trans, insert);
|
||||||
else
|
else
|
||||||
|
@ -186,11 +186,26 @@ bch2_extent_can_insert(struct btree_trans *trans,
|
|||||||
|
|
||||||
overlap = bch2_extent_overlap(&insert->k->k, k.k);
|
overlap = bch2_extent_overlap(&insert->k->k, k.k);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're overwriting an existing extent, we may need to emit
|
||||||
|
* a whiteout - unless we're inserting a new extent at the same
|
||||||
|
* position:
|
||||||
|
*/
|
||||||
|
if (k.k->needs_whiteout &&
|
||||||
|
(!bkey_whiteout(&insert->k->k) ||
|
||||||
|
bkey_cmp(k.k->p, insert->k->k.p)))
|
||||||
|
*u64s += BKEY_U64s;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're partially overwriting an existing extent which has
|
||||||
|
* been written out to disk, we'll need to emit a new version of
|
||||||
|
* that extent:
|
||||||
|
*/
|
||||||
if (bkey_written(l->b, _k) &&
|
if (bkey_written(l->b, _k) &&
|
||||||
overlap != BCH_EXTENT_OVERLAP_ALL)
|
overlap != BCH_EXTENT_OVERLAP_ALL)
|
||||||
*u64s += _k->u64s;
|
*u64s += _k->u64s;
|
||||||
|
|
||||||
/* account for having to split existing extent: */
|
/* And we may be splitting an existing extent: */
|
||||||
if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
|
if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
|
||||||
*u64s += _k->u64s;
|
*u64s += _k->u64s;
|
||||||
|
|
||||||
@ -286,6 +301,23 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
|
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void pack_push_whiteout(struct bch_fs *c, struct btree *b,
|
||||||
|
struct bpos pos)
|
||||||
|
{
|
||||||
|
struct bkey_packed k;
|
||||||
|
|
||||||
|
if (!bkey_pack_pos(&k, pos, b)) {
|
||||||
|
struct bkey_i tmp;
|
||||||
|
|
||||||
|
bkey_init(&tmp.k);
|
||||||
|
tmp.k.p = pos;
|
||||||
|
bkey_copy(&k, &tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
k.needs_whiteout = true;
|
||||||
|
push_whiteout(c, b, &k);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
extent_drop(struct bch_fs *c, struct btree_iter *iter,
|
extent_drop(struct bch_fs *c, struct btree_iter *iter,
|
||||||
struct bkey_packed *_k, struct bkey_s k)
|
struct bkey_packed *_k, struct bkey_s k)
|
||||||
@ -297,7 +329,12 @@ extent_drop(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
|
|
||||||
k.k->size = 0;
|
k.k->size = 0;
|
||||||
k.k->type = KEY_TYPE_deleted;
|
k.k->type = KEY_TYPE_deleted;
|
||||||
k.k->needs_whiteout = false;
|
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b) &&
|
||||||
|
k.k->needs_whiteout) {
|
||||||
|
pack_push_whiteout(c, l->b, k.k->p);
|
||||||
|
k.k->needs_whiteout = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (_k >= btree_bset_last(l->b)->start) {
|
if (_k >= btree_bset_last(l->b)->start) {
|
||||||
unsigned u64s = _k->u64s;
|
unsigned u64s = _k->u64s;
|
||||||
@ -322,12 +359,29 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
bkey_on_stack_init(&tmp);
|
bkey_on_stack_init(&tmp);
|
||||||
bkey_on_stack_init(&split);
|
bkey_on_stack_init(&split);
|
||||||
|
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b)) {
|
||||||
|
if (!bkey_whiteout(&insert->k) &&
|
||||||
|
!bkey_cmp(k.k->p, insert->k.p)) {
|
||||||
|
insert->k.needs_whiteout = k.k->needs_whiteout;
|
||||||
|
k.k->needs_whiteout = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
insert->k.needs_whiteout |= k.k->needs_whiteout;
|
||||||
|
}
|
||||||
|
|
||||||
switch (overlap) {
|
switch (overlap) {
|
||||||
case BCH_EXTENT_OVERLAP_FRONT:
|
case BCH_EXTENT_OVERLAP_FRONT:
|
||||||
if (bkey_written(l->b, _k)) {
|
if (bkey_written(l->b, _k)) {
|
||||||
bkey_on_stack_reassemble(&tmp, c, k.s_c);
|
bkey_on_stack_reassemble(&tmp, c, k.s_c);
|
||||||
bch2_cut_front(insert->k.p, tmp.k);
|
bch2_cut_front(insert->k.p, tmp.k);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* needs_whiteout was propagated to new version of @k,
|
||||||
|
* @tmp:
|
||||||
|
*/
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b))
|
||||||
|
k.k->needs_whiteout = false;
|
||||||
|
|
||||||
extent_drop(c, iter, _k, k);
|
extent_drop(c, iter, _k, k);
|
||||||
extent_bset_insert(c, iter, tmp.k);
|
extent_bset_insert(c, iter, tmp.k);
|
||||||
} else {
|
} else {
|
||||||
@ -348,9 +402,26 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
bkey_on_stack_reassemble(&tmp, c, k.s_c);
|
bkey_on_stack_reassemble(&tmp, c, k.s_c);
|
||||||
bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);
|
bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @tmp has different position than @k, needs_whiteout
|
||||||
|
* should not be propagated:
|
||||||
|
*/
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b))
|
||||||
|
tmp.k->k.needs_whiteout = false;
|
||||||
|
|
||||||
extent_drop(c, iter, _k, k);
|
extent_drop(c, iter, _k, k);
|
||||||
extent_bset_insert(c, iter, tmp.k);
|
extent_bset_insert(c, iter, tmp.k);
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
* position of @k is changing, emit a whiteout if
|
||||||
|
* needs_whiteout is set:
|
||||||
|
*/
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b) &&
|
||||||
|
k.k->needs_whiteout) {
|
||||||
|
pack_push_whiteout(c, l->b, k.k->p);
|
||||||
|
k.k->needs_whiteout = false;
|
||||||
|
}
|
||||||
|
|
||||||
btree_keys_account_val_delta(l->b, _k,
|
btree_keys_account_val_delta(l->b, _k,
|
||||||
bch2_cut_back_s(bkey_start_pos(&insert->k), k));
|
bch2_cut_back_s(bkey_start_pos(&insert->k), k));
|
||||||
extent_save(l->b, _k, k.k);
|
extent_save(l->b, _k, k.k);
|
||||||
@ -367,10 +438,17 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
|
|||||||
bkey_on_stack_reassemble(&split, c, k.s_c);
|
bkey_on_stack_reassemble(&split, c, k.s_c);
|
||||||
bch2_cut_back(bkey_start_pos(&insert->k), split.k);
|
bch2_cut_back(bkey_start_pos(&insert->k), split.k);
|
||||||
|
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b))
|
||||||
|
split.k->k.needs_whiteout = false;
|
||||||
|
|
||||||
|
/* this is identical to BCH_EXTENT_OVERLAP_FRONT: */
|
||||||
if (bkey_written(l->b, _k)) {
|
if (bkey_written(l->b, _k)) {
|
||||||
bkey_on_stack_reassemble(&tmp, c, k.s_c);
|
bkey_on_stack_reassemble(&tmp, c, k.s_c);
|
||||||
bch2_cut_front(insert->k.p, tmp.k);
|
bch2_cut_front(insert->k.p, tmp.k);
|
||||||
|
|
||||||
|
if (!btree_node_old_extent_overwrite(l->b))
|
||||||
|
k.k->needs_whiteout = false;
|
||||||
|
|
||||||
extent_drop(c, iter, _k, k);
|
extent_drop(c, iter, _k, k);
|
||||||
extent_bset_insert(c, iter, tmp.k);
|
extent_bset_insert(c, iter, tmp.k);
|
||||||
} else {
|
} else {
|
||||||
@ -462,7 +540,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
|
|||||||
bch2_cut_front(cur_end, insert);
|
bch2_cut_front(cur_end, insert);
|
||||||
bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
|
bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
|
||||||
} else {
|
} else {
|
||||||
insert->k.needs_whiteout |= k.k->needs_whiteout;
|
|
||||||
extent_squash(c, iter, insert, _k, k, overlap);
|
extent_squash(c, iter, insert, _k, k, overlap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -480,7 +557,10 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
|
|||||||
if (insert->k.type == KEY_TYPE_deleted)
|
if (insert->k.type == KEY_TYPE_deleted)
|
||||||
insert->k.type = KEY_TYPE_discard;
|
insert->k.type = KEY_TYPE_discard;
|
||||||
|
|
||||||
extent_bset_insert(c, iter, insert);
|
if (!bkey_whiteout(&insert->k) ||
|
||||||
|
btree_node_old_extent_overwrite(l->b))
|
||||||
|
extent_bset_insert(c, iter, insert);
|
||||||
|
|
||||||
bch2_btree_journal_key(trans, iter, insert);
|
bch2_btree_journal_key(trans, iter, insert);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -908,6 +908,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||||||
le16_to_cpu(bcachefs_metadata_version_min);
|
le16_to_cpu(bcachefs_metadata_version_min);
|
||||||
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
||||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
|
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
|
||||||
|
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
|
||||||
write_sb = true;
|
write_sb = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1027,6 +1028,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
|||||||
le16_to_cpu(bcachefs_metadata_version_current);
|
le16_to_cpu(bcachefs_metadata_version_current);
|
||||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
|
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
|
||||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
|
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
|
||||||
|
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
|
||||||
|
|
||||||
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
|
SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
|
||||||
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
|
||||||
|
Loading…
Reference in New Issue
Block a user