bcachefs: Add a mechanism for blocking the journal

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2019-02-14 18:38:52 -05:00 committed by Kent Overstreet
parent 8fe826f90a
commit 768ac63924
13 changed files with 138 additions and 91 deletions

View File

@ -724,7 +724,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
{
u64 stranded = c->write_points_nr * c->bucket_size_max;
u64 free = bch2_fs_sectors_free(c);
u64 free = bch2_fs_usage_read_short(c).free;
return stranded * factor > free;
}

View File

@ -612,11 +612,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
struct bch_fs_usage *src = (void *)
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
copy_fs_field(s.hidden, "hidden");
copy_fs_field(s.data, "data");
copy_fs_field(s.cached, "cached");
copy_fs_field(s.reserved, "reserved");
copy_fs_field(s.nr_inodes, "nr_inodes");
copy_fs_field(hidden, "hidden");
copy_fs_field(data, "data");
copy_fs_field(cached, "cached");
copy_fs_field(reserved, "reserved");
copy_fs_field(nr_inodes, "nr_inodes");
for (i = 0; i < BCH_REPLICAS_MAX; i++)
copy_fs_field(persistent_reserved[i],
@ -629,7 +629,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
bch2_replicas_entry_to_text(&PBUF(buf), e);
copy_fs_field(data[i], "%s", buf);
copy_fs_field(replicas[i], "%s", buf);
}
}

View File

@ -124,7 +124,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
for (i = 0; i < BCH_REPLICAS_MAX; i++)
usage->s.reserved += usage->persistent_reserved[i];
usage->reserved += usage->persistent_reserved[i];
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
@ -133,10 +133,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
switch (e->data_type) {
case BCH_DATA_BTREE:
case BCH_DATA_USER:
usage->s.data += usage->data[i];
usage->data += usage->replicas[i];
break;
case BCH_DATA_CACHED:
usage->s.cached += usage->data[i];
usage->cached += usage->replicas[i];
break;
}
}
@ -144,21 +144,16 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
#define bch2_usage_read_raw(_stats) \
({ \
typeof(*this_cpu_ptr(_stats)) _acc; \
\
memset(&_acc, 0, sizeof(_acc)); \
acc_u64s_percpu((u64 *) &_acc, \
(u64 __percpu *) _stats, \
sizeof(_acc) / sizeof(u64)); \
\
_acc; \
})
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
return bch2_usage_read_raw(ca->usage[0]);
struct bch_dev_usage ret;
memset(&ret, 0, sizeof(ret));
acc_u64s_percpu((u64 *) &ret,
(u64 __percpu *) ca->usage[0],
sizeof(ret) / sizeof(u64));
return ret;
}
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
@ -198,27 +193,44 @@ static u64 avail_factor(u64 r)
return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
}
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
return min(fs_usage.s.hidden +
fs_usage.s.data +
reserve_factor(fs_usage.s.reserved +
fs_usage.s.online_reserved),
return min(fs_usage->hidden +
fs_usage->data +
reserve_factor(fs_usage->reserved +
fs_usage->online_reserved),
c->capacity);
}
static struct bch_fs_usage_short
__bch2_fs_usage_read_short(struct bch_fs *c)
{
struct bch_fs_usage_short ret;
u64 data, reserved;
ret.capacity = c->capacity -
percpu_u64_get(&c->usage[0]->hidden);
data = percpu_u64_get(&c->usage[0]->data);
reserved = percpu_u64_get(&c->usage[0]->reserved) +
percpu_u64_get(&c->usage[0]->online_reserved);
ret.used = min(ret.capacity, data + reserve_factor(reserved));
ret.free = ret.capacity - ret.used;
ret.nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
return ret;
}
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *c)
{
struct bch_fs_usage_summarized usage =
bch2_usage_read_raw(&c->usage[0]->s);
struct bch_fs_usage_short ret;
ret.capacity = READ_ONCE(c->capacity) - usage.hidden;
ret.used = min(ret.capacity, usage.data +
reserve_factor(usage.reserved +
usage.online_reserved));
ret.nr_inodes = usage.nr_inodes;
percpu_down_read(&c->mark_lock);
ret = __bch2_fs_usage_read_short(c);
percpu_up_read(&c->mark_lock);
return ret;
}
@ -257,7 +269,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct disk_reservation *disk_res)
{
s64 added = fs_usage->s.data + fs_usage->s.reserved;
s64 added = fs_usage->data + fs_usage->reserved;
s64 should_not_have_added;
int ret = 0;
@ -277,7 +289,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
if (added > 0) {
disk_res->sectors -= added;
fs_usage->s.online_reserved -= added;
fs_usage->online_reserved -= added;
}
preempt_disable();
@ -295,7 +307,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
int nr, s64 size)
{
if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
fs_usage->s.hidden += size;
fs_usage->hidden += size;
dev_usage->buckets[type] += nr;
}
@ -381,10 +393,10 @@ static inline void update_replicas(struct bch_fs *c,
BUG_ON(!sectors);
if (r->data_type == BCH_DATA_CACHED)
fs_usage->s.cached += sectors;
fs_usage->cached += sectors;
else
fs_usage->s.data += sectors;
fs_usage->data[idx] += sectors;
fs_usage->data += sectors;
fs_usage->replicas[idx] += sectors;
}
static inline void update_cached_sectors(struct bch_fs *c,
@ -911,9 +923,9 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
fs_usage, journal_seq, flags, gc);
case KEY_TYPE_inode:
if (inserting)
fs_usage->s.nr_inodes++;
fs_usage->nr_inodes++;
else
fs_usage->s.nr_inodes--;
fs_usage->nr_inodes--;
return 0;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@ -922,7 +934,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
replicas = clamp_t(unsigned, replicas, 1,
ARRAY_SIZE(fs_usage->persistent_reserved));
fs_usage->s.reserved += sectors;
fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
return 0;
}
@ -1074,13 +1086,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
{
percpu_u64_set(&c->pcpu->sectors_available, 0);
return avail_factor(bch2_fs_sectors_free(c));
return avail_factor(__bch2_fs_usage_read_short(c).free);
}
void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
{
percpu_down_read(&c->mark_lock);
this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors);
this_cpu_sub(c->usage[0]->online_reserved, res->sectors);
percpu_up_read(&c->mark_lock);
res->sectors = 0;
@ -1120,7 +1132,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
out:
pcpu->sectors_available -= sectors;
this_cpu_add(c->usage[0]->s.online_reserved, sectors);
this_cpu_add(c->usage[0]->online_reserved, sectors);
res->sectors += sectors;
preempt_enable();
@ -1136,7 +1148,7 @@ recalculate:
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,
max_t(s64, 0, sectors_available - sectors));
this_cpu_add(c->usage[0]->s.online_reserved, sectors);
this_cpu_add(c->usage[0]->online_reserved, sectors);
res->sectors += sectors;
ret = 0;
} else {

View File

@ -225,18 +225,11 @@ static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *);
static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
{
struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
return usage.capacity - usage.used;
}
/* key/bucket marking: */
void bch2_bucket_seq_cleanup(struct bch_fs *);

View File

@ -64,35 +64,33 @@ struct bch_dev_usage {
struct bch_fs_usage {
/* all fields are in units of 512 byte sectors: */
/* summarized: */
struct bch_fs_usage_summarized {
u64 online_reserved;
u64 online_reserved;
/* fields after online_reserved are cleared/recalculated by gc: */
u64 gc_start[0];
/* fields after online_reserved are cleared/recalculated by gc: */
u64 gc_start[0];
u64 hidden;
u64 data;
u64 cached;
u64 reserved;
u64 nr_inodes;
u64 hidden;
u64 data;
u64 cached;
u64 reserved;
u64 nr_inodes;
/* XXX: add stats for compression ratio */
/* XXX: add stats for compression ratio */
#if 0
u64 uncompressed;
u64 compressed;
u64 uncompressed;
u64 compressed;
#endif
} s;
/* broken out: */
u64 persistent_reserved[BCH_REPLICAS_MAX];
u64 data[];
u64 replicas[];
};
struct bch_fs_usage_short {
u64 capacity;
u64 used;
u64 free;
u64 nr_inodes;
};

View File

@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
if (!src)
return -ENOMEM;
percpu_up_read(&c->mark_lock);
dst.used = bch2_fs_sectors_used(c, src);
dst.online_reserved = src->online_reserved;
dst.used = bch2_fs_sectors_used(c, *src);
dst.online_reserved = src->s.online_reserved;
percpu_up_read(&c->mark_lock);
for (i = 0; i < BCH_REPLICAS_MAX; i++) {
dst.persistent_reserved[i] =

View File

@ -212,6 +212,9 @@ static int journal_entry_open(struct journal *j)
lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j));
if (j->blocked)
return -EAGAIN;
if (!fifo_free(&j->pin))
return 0;
@ -287,7 +290,7 @@ static bool __journal_entry_close(struct journal *j)
spin_unlock(&j->lock);
fallthrough;
case JOURNAL_UNLOCKED:
return true;
return false;
}
}
@ -297,6 +300,22 @@ static bool journal_entry_close(struct journal *j)
return __journal_entry_close(j);
}
static bool journal_quiesced(struct journal *j)
{
bool ret;
spin_lock(&j->lock);
ret = !j->reservations.prev_buf_unwritten &&
!journal_entry_is_open(j);
__journal_entry_close(j);
return ret;
}
static void journal_quiesce(struct journal *j)
{
wait_event(j->wait, journal_quiesced(j));
}
static void journal_write_work(struct work_struct *work)
{
struct journal *j = container_of(work, struct journal, write_work.work);
@ -722,6 +741,26 @@ int bch2_journal_flush(struct journal *j)
return bch2_journal_flush_seq(j, seq);
}
/* block/unlock the journal: */
void bch2_journal_unblock(struct journal *j)
{
spin_lock(&j->lock);
j->blocked--;
spin_unlock(&j->lock);
journal_wake(j);
}
void bch2_journal_block(struct journal *j)
{
spin_lock(&j->lock);
j->blocked++;
spin_unlock(&j->lock);
journal_quiesce(j);
}
/* allocate journal on a device: */
static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
@ -931,8 +970,7 @@ void bch2_fs_journal_stop(struct journal *j)
c->btree_roots_dirty)
bch2_journal_meta(j);
BUG_ON(journal_entry_is_open(j) ||
j->reservations.prev_buf_unwritten);
journal_quiesce(j);
BUG_ON(!bch2_journal_error(j) &&
test_bit(JOURNAL_NOT_EMPTY, &j->flags));

View File

@ -370,6 +370,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
set_bit(JOURNAL_REPLAY_DONE, &j->flags);
}
void bch2_journal_unblock(struct journal *);
void bch2_journal_block(struct journal *);
ssize_t bch2_journal_print_debug(struct journal *, char *);
ssize_t bch2_journal_print_pins(struct journal *, char *);

View File

@ -142,6 +142,9 @@ struct journal {
spinlock_t lock;
/* if nonzero, we may not open a new journal entry: */
unsigned blocked;
/* Used when waiting because the journal was full */
wait_queue_head_t wait;
struct closure_waitlist async_wait;

View File

@ -83,7 +83,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
le64_to_cpu(u->v));
break;
case FS_USAGE_INODES:
percpu_u64_set(&c->usage[0]->s.nr_inodes,
percpu_u64_set(&c->usage[0]->nr_inodes,
le64_to_cpu(u->v));
break;
case FS_USAGE_KEY_VERSION:

View File

@ -245,14 +245,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
*dst = *src;
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
if (!src->data[src_idx])
if (!src->replicas[src_idx])
continue;
dst_idx = __replicas_entry_idx(dst_r,
cpu_replicas_entry(src_r, src_idx));
BUG_ON(dst_idx < 0);
dst->data[dst_idx] = src->data[src_idx];
dst->replicas[dst_idx] = src->replicas[src_idx];
}
}
@ -457,7 +457,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
if (__replicas_has_entry(&c->replicas_gc, e))
continue;
v = percpu_u64_get(&c->usage[0]->data[i]);
v = percpu_u64_get(&c->usage[0]->replicas[i]);
if (!v)
continue;
@ -558,7 +558,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
BUG_ON(ret < 0);
}
percpu_u64_set(&c->usage[0]->data[idx], sectors);
percpu_u64_set(&c->usage[0]->replicas[idx], sectors);
return 0;
}

View File

@ -930,7 +930,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
percpu_down_write(&c->mark_lock);
{
u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
@ -977,7 +977,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
struct jset_entry_data_usage *u =
container_of(entry, struct jset_entry_data_usage, entry);

View File

@ -244,17 +244,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
pr_buf(&out, "hidden:\t\t\t\t%llu\n",
fs_usage->s.hidden);
fs_usage->hidden);
pr_buf(&out, "data:\t\t\t\t%llu\n",
fs_usage->s.data);
fs_usage->data);
pr_buf(&out, "cached:\t\t\t\t%llu\n",
fs_usage->s.cached);
fs_usage->cached);
pr_buf(&out, "reserved:\t\t\t%llu\n",
fs_usage->s.reserved);
fs_usage->reserved);
pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
fs_usage->s.nr_inodes);
fs_usage->nr_inodes);
pr_buf(&out, "online reserved:\t\t%llu\n",
fs_usage->s.online_reserved);
fs_usage->online_reserved);
for (i = 0;
i < ARRAY_SIZE(fs_usage->persistent_reserved);
@ -270,7 +270,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
pr_buf(&out, "\t");
bch2_replicas_entry_to_text(&out, e);
pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
}
percpu_up_read(&c->mark_lock);