mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 21:23:23 +00:00
9e779f3f24
For each bucket we track when the bucket became nonempty and when it became empty again: if we can ensure that there will be no journal flushes in the range [nonempty, empty) (possibly because they occured at the same journal sequence number), then it's safe to reuse the bucket without waiting for a journal commit. This is a major performance optimization for erasure coding, where writes are initially replicated, but the extra replicas are quickly dropped: if those buckets are reused and overwritten without issuing a cache flush to the underlying device, then they only cost bus bandwidth. But there's a tricky corner case when there's multiple empty -> nonempty -> empty transitions in quick succession, i.e. when data is getting overwritten immediately as it's being written. If this happens and the previous empty transition hasn't been flushed, we need to continue tracking the previous nonempty transition - not start a new one. Fixing this means we now need to track both the nonempty and empty transitions in bch_alloc_v4. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
96 lines
2.3 KiB
C
96 lines
2.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
|
|
#define _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
|
|
|
|
struct bch_alloc {
|
|
struct bch_val v;
|
|
__u8 fields;
|
|
__u8 gen;
|
|
__u8 data[];
|
|
} __packed __aligned(8);
|
|
|
|
#define BCH_ALLOC_FIELDS_V1() \
|
|
x(read_time, 16) \
|
|
x(write_time, 16) \
|
|
x(data_type, 8) \
|
|
x(dirty_sectors, 16) \
|
|
x(cached_sectors, 16) \
|
|
x(oldest_gen, 8) \
|
|
x(stripe, 32) \
|
|
x(stripe_redundancy, 8)
|
|
|
|
enum {
|
|
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
|
|
BCH_ALLOC_FIELDS_V1()
|
|
#undef x
|
|
};
|
|
|
|
struct bch_alloc_v2 {
|
|
struct bch_val v;
|
|
__u8 nr_fields;
|
|
__u8 gen;
|
|
__u8 oldest_gen;
|
|
__u8 data_type;
|
|
__u8 data[];
|
|
} __packed __aligned(8);
|
|
|
|
#define BCH_ALLOC_FIELDS_V2() \
|
|
x(read_time, 64) \
|
|
x(write_time, 64) \
|
|
x(dirty_sectors, 32) \
|
|
x(cached_sectors, 32) \
|
|
x(stripe, 32) \
|
|
x(stripe_redundancy, 8)
|
|
|
|
struct bch_alloc_v3 {
|
|
struct bch_val v;
|
|
__le64 journal_seq;
|
|
__le32 flags;
|
|
__u8 nr_fields;
|
|
__u8 gen;
|
|
__u8 oldest_gen;
|
|
__u8 data_type;
|
|
__u8 data[];
|
|
} __packed __aligned(8);
|
|
|
|
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
|
|
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
|
|
|
|
struct bch_alloc_v4 {
|
|
struct bch_val v;
|
|
__u64 journal_seq_nonempty;
|
|
__u32 flags;
|
|
__u8 gen;
|
|
__u8 oldest_gen;
|
|
__u8 data_type;
|
|
__u8 stripe_redundancy;
|
|
__u32 dirty_sectors;
|
|
__u32 cached_sectors;
|
|
__u64 io_time[2];
|
|
__u32 stripe;
|
|
__u32 nr_external_backpointers;
|
|
/* end of fields in original version of alloc_v4 */
|
|
__u64 journal_seq_empty;
|
|
__u32 stripe_sectors;
|
|
__u32 pad;
|
|
} __packed __aligned(8);
|
|
|
|
#define BCH_ALLOC_V4_U64s_V0 6
|
|
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
|
|
|
|
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
|
|
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
|
|
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
|
|
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
|
|
|
|
#define KEY_TYPE_BUCKET_GENS_BITS 8
|
|
#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS)
|
|
#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1)
|
|
|
|
struct bch_bucket_gens {
|
|
struct bch_val v;
|
|
u8 gens[KEY_TYPE_BUCKET_GENS_NR];
|
|
} __packed __aligned(8);
|
|
|
|
#endif /* _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H */
|