linux-next/fs/bcachefs/disk_accounting_format.h
Kent Overstreet 58474f76a7 bcachefs: bcachefs_metadata_version_disk_accounting_inum
This adds another disk accounting counter to track usage per inode
number (any snapshot ID).

This will be used for a couple things:

- It'll give us a way to tell the user how much space a given file ista
  consuming in all snapshots; i.e. how much extra space it's consuming
  due to snapshot versioning.

- It counts number of extents and total size of extents (both in btree
  keyspace sectors and actual disk usage), meaning it gives us average
  extent size: that is, it'll let us cheaply find fragmented files that
  should be defragmented.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2024-08-13 23:00:50 -04:00

168 lines
4.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_DISK_ACCOUNTING_FORMAT_H
#define _BCACHEFS_DISK_ACCOUNTING_FORMAT_H
#include "replicas_format.h"
/*
* Disk accounting - KEY_TYPE_accounting - on disk format:
*
* Here, the key has considerably more structure than a typical key (bpos); an
* accounting key is 'struct disk_accounting_pos', which is a union of bpos.
*
* More specifically: a key is just a muliword integer (where word endianness
* matches native byte order), so we're treating bpos as an opaque 20 byte
* integer and mapping bch_accounting_key to that.
*
* This is a type-tagged union of all our various subtypes; a disk accounting
* key can be device counters, replicas counters, et cetera - it's extensible.
*
* The value is a list of u64s or s64s; the number of counters is specific to a
* given accounting type.
*
* Unlike with other key types, updates are _deltas_, and the deltas are not
* resolved until the update to the underlying btree, done by btree write buffer
* flush or journal replay.
*
* Journal replay in particular requires special handling. The journal tracks a
* range of entries which may possibly have not yet been applied to the btree
* yet - it does not know definitively whether individual entries are dirty and
* still need to be applied.
*
* To handle this, we use the version field of struct bkey, and give every
* accounting update a unique version number - a total ordering in time; the
* version number is derived from the key's position in the journal. Then
* journal replay can compare the version number of the key from the journal
* with the version number of the key in the btree to determine if a key needs
* to be replayed.
*
* For this to work, we must maintain this strict time ordering of updates as
* they are flushed to the btree, both via write buffer flush and via journal
* replay. This has complications for the write buffer code while journal replay
* is still in progress; the write buffer cannot flush any accounting keys to
* the btree until journal replay has finished replaying its accounting keys, or
* the (newer) version number of the keys from the write buffer will cause
* updates from journal replay to be lost.
*/
struct bch_accounting {
struct bch_val v;
__u64 d[];
};
#define BCH_ACCOUNTING_MAX_COUNTERS 3
#define BCH_DATA_TYPES() \
x(free, 0) \
x(sb, 1) \
x(journal, 2) \
x(btree, 3) \
x(user, 4) \
x(cached, 5) \
x(parity, 6) \
x(stripe, 7) \
x(need_gc_gens, 8) \
x(need_discard, 9) \
x(unstriped, 10)
enum bch_data_type {
#define x(t, n) BCH_DATA_##t,
BCH_DATA_TYPES()
#undef x
BCH_DATA_NR
};
static inline bool data_type_is_empty(enum bch_data_type type)
{
switch (type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
return true;
default:
return false;
}
}
static inline bool data_type_is_hidden(enum bch_data_type type)
{
switch (type) {
case BCH_DATA_sb:
case BCH_DATA_journal:
return true;
default:
return false;
}
}
#define BCH_DISK_ACCOUNTING_TYPES() \
x(nr_inodes, 0) \
x(persistent_reserved, 1) \
x(replicas, 2) \
x(dev_data_type, 3) \
x(compression, 4) \
x(snapshot, 5) \
x(btree, 6) \
x(rebalance_work, 7) \
x(inum, 8)
enum disk_accounting_type {
#define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr,
BCH_DISK_ACCOUNTING_TYPES()
#undef x
BCH_DISK_ACCOUNTING_TYPE_NR,
};
struct bch_nr_inodes {
};
struct bch_persistent_reserved {
__u8 nr_replicas;
};
struct bch_dev_data_type {
__u8 dev;
__u8 data_type;
};
struct bch_acct_compression {
__u8 type;
};
struct bch_acct_snapshot {
__u32 id;
} __packed;
struct bch_acct_btree {
__u32 id;
} __packed;
struct bch_acct_inum {
__u64 inum;
} __packed;
struct bch_acct_rebalance_work {
};
struct disk_accounting_pos {
union {
struct {
__u8 type;
union {
struct bch_nr_inodes nr_inodes;
struct bch_persistent_reserved persistent_reserved;
struct bch_replicas_entry_v1 replicas;
struct bch_dev_data_type dev_data_type;
struct bch_acct_compression compression;
struct bch_acct_snapshot snapshot;
struct bch_acct_btree btree;
struct bch_acct_rebalance_work rebalance_work;
struct bch_acct_inum inum;
} __packed;
} __packed;
struct bpos _pad;
};
};
#endif /* _BCACHEFS_DISK_ACCOUNTING_FORMAT_H */