bcachefs: Switch fsync to use bi_journal_seq

Now that we're recording in each inode the journal sequence number of
the most recent update, fsync becomes a lot simpler and we can delete
all the plumbing for ei_journal_seq.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2021-11-05 15:17:13 -04:00 committed by Kent Overstreet
parent e15a57ac05
commit 68a2054d88
9 changed files with 65 additions and 96 deletions

View File

@ -330,8 +330,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
inode_u.bi_mode = mode;
ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq, 0);
bch2_trans_commit(&trans, NULL, NULL, 0);
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);

View File

@ -1096,7 +1096,6 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
op = &w->io->op;
bch2_write_op_init(op, c, w->opts);
op->target = w->opts.foreground_target;
op_journal_seq_set(op, &inode->ei_journal_seq);
op->nr_replicas = nr_replicas;
op->res.nr_replicas = nr_replicas;
op->write_point = writepoint_hashed(inode->ei_last_dirtied);
@ -1947,7 +1946,6 @@ static long bch2_dio_write_loop(struct dio_write *dio)
bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
dio->op.end_io = bch2_dio_write_loop_async;
dio->op.target = dio->op.opts.foreground_target;
op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
dio->op.write_point = writepoint_hashed((unsigned long) current);
dio->op.nr_replicas = dio->op.opts.data_replicas;
dio->op.subvol = inode->ei_subvol;
@ -2164,29 +2162,36 @@ ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* fsync: */
/*
* inode->ei_inode.bi_journal_seq won't be up to date since it's set in an
* insert trigger: look up the btree inode instead
*/
static int bch2_flush_inode(struct bch_fs *c, subvol_inum inum)
{
struct bch_inode_unpacked inode;
int ret;
if (c->opts.journal_flush_disabled)
return 0;
ret = bch2_inode_find_by_inum(c, inum, &inode);
if (ret)
return ret;
return bch2_journal_flush_seq(&c->journal, inode.bi_journal_seq);
}
int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret, ret2;
int ret, ret2, ret3;
ret = file_write_and_wait_range(file, start, end);
if (ret)
return ret;
ret2 = sync_inode_metadata(&inode->v, 1);
ret3 = bch2_flush_inode(c, inode_inum(inode));
if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC))
goto out;
ret = sync_inode_metadata(&inode->v, 1);
if (ret)
return ret;
out:
if (!c->opts.journal_flush_disabled)
ret = bch2_journal_flush_seq(&c->journal,
inode->ei_journal_seq);
ret2 = file_check_and_advance_wb_err(file);
return ret ?: ret2;
return ret ?: ret2 ?: ret3;
}
/* truncate: */
@ -2448,7 +2453,7 @@ int bch2_truncate(struct mnt_idmap *idmap,
ret = bch2_fpunch(c, inode_inum(inode),
round_up(iattr->ia_size, block_bytes(c)) >> 9,
U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
U64_MAX, &i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta);
if (unlikely(ret))
@ -2508,7 +2513,6 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
ret = bch2_fpunch(c, inode_inum(inode),
discard_start, discard_end,
&inode->ei_journal_seq,
&i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta);
}
@ -2587,7 +2591,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
ret = bch2_fpunch(c, inode_inum(inode),
offset >> 9, (offset + len) >> 9,
&inode->ei_journal_seq,
&i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta);
@ -2691,8 +2694,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
ret = bch2_btree_iter_traverse(&del) ?:
bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
bch2_trans_commit(&trans, &disk_res,
&inode->ei_journal_seq,
bch2_trans_commit(&trans, &disk_res, NULL,
BTREE_INSERT_NOFAIL);
bch2_disk_reservation_put(c, &disk_res);
@ -2803,7 +2805,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
ret = bch2_extent_update(&trans, inode_inum(inode), &iter,
&reservation.k_i,
&disk_res, &inode->ei_journal_seq,
&disk_res, NULL,
0, &i_sectors_delta, true);
i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
bkey_err:
@ -3003,7 +3005,6 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
inode_inum(dst), pos_dst >> 9,
inode_inum(src), pos_src >> 9,
aligned_len >> 9,
&dst->ei_journal_seq,
pos_dst + len, &i_sectors_delta);
if (ret < 0)
goto err;
@ -3021,10 +3022,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
i_size_write(&dst->v, pos_dst + ret);
spin_unlock(&dst->v.i_lock);
if (((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
IS_SYNC(file_inode(file_dst))) &&
!c->opts.journal_flush_disabled)
ret = bch2_journal_flush_seq(&c->journal, dst->ei_journal_seq);
if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
IS_SYNC(file_inode(file_dst)))
ret = bch2_flush_inode(c, inode_inum(dst));
err:
bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);

View File

@ -41,25 +41,6 @@ static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum,
struct bch_inode_info *,
struct bch_inode_unpacked *);
static void journal_seq_copy(struct bch_fs *c,
struct bch_inode_info *dst,
u64 journal_seq)
{
/*
* atomic64_cmpxchg has a fallback for archs that don't support it,
* cmpxchg does not:
*/
atomic64_t *dst_seq = (void *) &dst->ei_journal_seq;
u64 old, v = READ_ONCE(dst->ei_journal_seq);
do {
old = v;
if (old >= journal_seq)
break;
} while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old);
}
static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
{
BUG_ON(atomic_long_read(&lock->v) == 0);
@ -152,9 +133,7 @@ int __must_check bch2_write_inode(struct bch_fs *c,
BTREE_ITER_INTENT) ?:
(set ? set(inode, &inode_u, p) : 0) ?:
bch2_inode_write(&trans, &iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq,
BTREE_INSERT_NOFAIL);
bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
/*
* the btree node lock protects inode->ei_inode, not ei_update_lock;
@ -329,7 +308,6 @@ __bch2_create(struct mnt_idmap *idmap,
if (!(flags & BCH_CREATE_TMPFILE)) {
bch2_inode_update_after_write(c, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
journal_seq_copy(c, dir, journal_seq);
mutex_unlock(&dir->ei_update_lock);
}
@ -337,7 +315,6 @@ __bch2_create(struct mnt_idmap *idmap,
inum.inum = inode_u.bi_inum;
bch2_vfs_inode_init(c, inum, inode, &inode_u);
journal_seq_copy(c, inode, journal_seq);
set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@ -362,7 +339,6 @@ __bch2_create(struct mnt_idmap *idmap,
* We raced, another process pulled the new inode into cache
* before us:
*/
journal_seq_copy(c, old, journal_seq);
make_bad_inode(&inode->v);
iput(&inode->v);
@ -446,7 +422,7 @@ static int __bch2_link(struct bch_fs *c,
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 4, 1024);
ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0,
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_link_trans(&trans,
inode_inum(dir), &dir_u,
inode_inum(inode), &inode_u,
@ -455,7 +431,6 @@ static int __bch2_link(struct bch_fs *c,
if (likely(!ret)) {
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
journal_seq_copy(c, inode, dir->ei_journal_seq);
bch2_inode_update_after_write(c, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
@ -498,7 +473,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
bch2_trans_init(&trans, c, 4, 1024);
ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq,
ret = __bch2_trans_do(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL,
bch2_unlink_trans(&trans,
inode_inum(dir), &dir_u,
@ -508,7 +483,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
if (likely(!ret)) {
BUG_ON(inode_u.bi_inum != inode->v.i_ino);
journal_seq_copy(c, inode, dir->ei_journal_seq);
bch2_inode_update_after_write(c, dir, &dir_u,
ATTR_MTIME|ATTR_CTIME);
bch2_inode_update_after_write(c, inode, &inode_u,
@ -550,8 +524,6 @@ static int bch2_symlink(struct mnt_idmap *idmap,
if (unlikely(ret))
goto err;
journal_seq_copy(c, dir, inode->ei_journal_seq);
ret = __bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
goto err;
@ -586,7 +558,6 @@ static int bch2_rename2(struct mnt_idmap *idmap,
? BCH_RENAME_EXCHANGE
: dst_dentry->d_inode
? BCH_RENAME_OVERWRITE : BCH_RENAME;
u64 journal_seq = 0;
int ret;
if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
@ -626,7 +597,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
goto err;
}
ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0,
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
bch2_rename_trans(&trans,
inode_inum(src_dir), &src_dir_u,
inode_inum(dst_dir), &dst_dir_u,
@ -644,23 +615,17 @@ static int bch2_rename2(struct mnt_idmap *idmap,
bch2_inode_update_after_write(c, src_dir, &src_dir_u,
ATTR_MTIME|ATTR_CTIME);
journal_seq_copy(c, src_dir, journal_seq);
if (src_dir != dst_dir) {
if (src_dir != dst_dir)
bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
ATTR_MTIME|ATTR_CTIME);
journal_seq_copy(c, dst_dir, journal_seq);
}
bch2_inode_update_after_write(c, src_inode, &src_inode_u,
ATTR_CTIME);
journal_seq_copy(c, src_inode, journal_seq);
if (dst_inode) {
if (dst_inode)
bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
ATTR_CTIME);
journal_seq_copy(c, dst_inode, journal_seq);
}
err:
bch2_trans_exit(&trans);
@ -767,8 +732,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap,
}
ret = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq,
bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);
@ -1203,7 +1167,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum,
inode->v.i_size = bi->bi_size;
inode->ei_flags = 0;
inode->ei_journal_seq = bi->bi_journal_seq;
inode->ei_quota_reserved = 0;
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;
@ -1242,7 +1205,6 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
mutex_init(&inode->ei_update_lock);
pagecache_lock_init(&inode->ei_pagecache_lock);
mutex_init(&inode->ei_quota_lock);
inode->ei_journal_seq = 0;
return &inode->v;
}

View File

@ -36,7 +36,6 @@ struct bch_inode_info {
unsigned long ei_flags;
struct mutex ei_update_lock;
u64 ei_journal_seq;
u64 ei_quota_reserved;
unsigned long ei_last_dirtied;
struct pagecache_lock ei_pagecache_lock;

View File

@ -393,7 +393,7 @@ int bch2_extent_update(struct btree_trans *trans,
*/
int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
subvol_inum inum, u64 end,
u64 *journal_seq, s64 *i_sectors_delta)
s64 *i_sectors_delta)
{
struct bch_fs *c = trans->c;
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
@ -431,7 +431,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
bch2_cut_back(end_pos, &delete);
ret = bch2_extent_update(trans, inum, iter, &delete,
&disk_res, journal_seq,
&disk_res, NULL,
0, i_sectors_delta, false);
bch2_disk_reservation_put(c, &disk_res);
btree_err:
@ -450,7 +450,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
}
int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
u64 *journal_seq, s64 *i_sectors_delta)
s64 *i_sectors_delta)
{
struct btree_trans trans;
struct btree_iter iter;
@ -461,8 +461,7 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
POS(inum.inum, start),
BTREE_ITER_INTENT);
ret = bch2_fpunch_at(&trans, &iter, inum, end,
journal_seq, i_sectors_delta);
ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta);
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);

View File

@ -68,12 +68,6 @@ static inline u64 *op_journal_seq(struct bch_write_op *op)
? op->journal_seq_p : &op->journal_seq;
}
static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
{
op->journal_seq_p = journal_seq;
op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
}
static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
{
return op->alloc_reserve == RESERVE_MOVINGGC
@ -88,8 +82,8 @@ int bch2_extent_update(struct btree_trans *, subvol_inum,
struct disk_reservation *, u64 *, u64, s64 *, bool);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, u64 *, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, u64 *, s64 *);
subvol_inum, u64, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
struct bch_io_opts opts)

View File

@ -210,7 +210,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
s64 bch2_remap_range(struct bch_fs *c,
subvol_inum dst_inum, u64 dst_offset,
subvol_inum src_inum, u64 src_offset,
u64 remap_sectors, u64 *journal_seq,
u64 remap_sectors,
u64 new_i_size, s64 *i_sectors_delta)
{
struct btree_trans trans;
@ -281,7 +281,7 @@ s64 bch2_remap_range(struct bch_fs *c,
min(dst_end.offset,
dst_iter.pos.offset +
src_iter.pos.offset - src_want.offset),
journal_seq, i_sectors_delta);
i_sectors_delta);
continue;
}
@ -320,7 +320,7 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_end.offset - dst_iter.pos.offset));
ret = bch2_extent_update(&trans, dst_inum, &dst_iter,
new_dst.k, &disk_res, journal_seq,
new_dst.k, &disk_res, NULL,
new_i_size, i_sectors_delta,
true);
bch2_disk_reservation_put(c, &disk_res);
@ -347,7 +347,7 @@ s64 bch2_remap_range(struct bch_fs *c,
inode_u.bi_size < new_i_size) {
inode_u.bi_size = new_i_size;
ret2 = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL, journal_seq, 0);
bch2_trans_commit(&trans, NULL, NULL, 0);
}
bch2_trans_iter_exit(&trans, &inode_iter);

View File

@ -58,6 +58,6 @@ static inline __le64 *bkey_refcount(struct bkey_i *k)
}
s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64,
subvol_inum, u64, u64, u64 *, u64, s64 *);
subvol_inum, u64, u64, u64, s64 *);
#endif /* _BCACHEFS_REFLINK_H */

View File

@ -165,8 +165,24 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
const char *name, const void *value, size_t size,
int type, int flags)
{
struct btree_iter inode_iter = { NULL };
struct bch_inode_unpacked inode_u;
int ret;
/*
* We need to do an inode update so that bi_journal_sync gets updated
* and fsync works:
*
* Perhaps we should be updating bi_mtime too?
*/
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
bch2_inode_write(trans, &inode_iter, &inode_u);
bch2_trans_iter_exit(trans, &inode_iter);
if (ret)
return ret;
if (value) {
struct bkey_i_xattr *xattr;
unsigned namelen = strlen(name);
@ -352,7 +368,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
return bch2_trans_do(c, NULL, NULL, 0,
bch2_xattr_set(&trans, inode_inum(inode), &hash,
name, value, size,
handler->flags, flags));