bcachefs: Refactor dio write code to reinit bch_write_op

This fixes a bug where the BCH_WRITE_SKIP_CLOSURE_PUT was set
incorrectly, causing the completion to be delivered multiple times.
oops.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2020-06-29 18:22:06 -04:00 committed by Kent Overstreet
parent 64f2a8803e
commit 042a1f268e
3 changed files with 35 additions and 47 deletions

View File

@ -63,6 +63,7 @@ struct dio_write {
sync:1, sync:1,
free_iov:1; free_iov:1;
struct quota_res quota_res; struct quota_res quota_res;
u64 written;
struct iov_iter iter; struct iov_iter iter;
struct iovec inline_vecs[2]; struct iovec inline_vecs[2];
@ -1776,18 +1777,19 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
return 0; return 0;
} }
static void bch2_dio_write_loop_async(struct bch_write_op *);
static long bch2_dio_write_loop(struct dio_write *dio) static long bch2_dio_write_loop(struct dio_write *dio)
{ {
bool kthread = (current->flags & PF_KTHREAD) != 0; bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_fs *c = dio->op.c;
struct kiocb *req = dio->req; struct kiocb *req = dio->req;
struct address_space *mapping = req->ki_filp->f_mapping; struct address_space *mapping = req->ki_filp->f_mapping;
struct bch_inode_info *inode = file_bch_inode(req->ki_filp); struct bch_inode_info *inode = file_bch_inode(req->ki_filp);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bio *bio = &dio->op.wbio.bio; struct bio *bio = &dio->op.wbio.bio;
struct bvec_iter_all iter; struct bvec_iter_all iter;
struct bio_vec *bv; struct bio_vec *bv;
unsigned unaligned; unsigned unaligned;
u64 new_i_size;
bool sync = dio->sync; bool sync = dio->sync;
long ret; long ret;
@ -1834,8 +1836,24 @@ static long bch2_dio_write_loop(struct dio_write *dio)
goto err; goto err;
} }
dio->op.pos = POS(inode->v.i_ino, bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
(req->ki_pos >> 9) + dio->op.written); dio->op.end_io = bch2_dio_write_loop_async;
dio->op.target = dio->op.opts.foreground_target;
op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
dio->op.write_point = writepoint_hashed((unsigned long) current);
dio->op.nr_replicas = dio->op.opts.data_replicas;
dio->op.pos = POS(inode->v.i_ino, (u64) req->ki_pos >> 9);
if ((req->ki_flags & IOCB_DSYNC) &&
!c->opts.journal_flush_disabled)
dio->op.flags |= BCH_WRITE_FLUSH;
ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
dio->op.opts.data_replicas, 0);
if (unlikely(ret) &&
!bch2_check_range_allocated(c, dio->op.pos,
bio_sectors(bio), dio->op.opts.data_replicas))
goto err;
task_io_account_write(bio->bi_iter.bi_size); task_io_account_write(bio->bi_iter.bi_size);
@ -1856,13 +1874,12 @@ do_io:
loop: loop:
i_sectors_acct(c, inode, &dio->quota_res, i_sectors_acct(c, inode, &dio->quota_res,
dio->op.i_sectors_delta); dio->op.i_sectors_delta);
dio->op.i_sectors_delta = 0; req->ki_pos += (u64) dio->op.written << 9;
dio->written += dio->op.written;
new_i_size = req->ki_pos + ((u64) dio->op.written << 9);
spin_lock(&inode->v.i_lock); spin_lock(&inode->v.i_lock);
if (new_i_size > inode->v.i_size) if (req->ki_pos > inode->v.i_size)
i_size_write(&inode->v, new_i_size); i_size_write(&inode->v, req->ki_pos);
spin_unlock(&inode->v.i_lock); spin_unlock(&inode->v.i_lock);
bio_for_each_segment_all(bv, bio, iter) bio_for_each_segment_all(bv, bio, iter)
@ -1874,10 +1891,9 @@ loop:
reinit_completion(&dio->done); reinit_completion(&dio->done);
} }
ret = dio->op.error ?: ((long) dio->op.written << 9); ret = dio->op.error ?: ((long) dio->written << 9);
err: err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock); bch2_pagecache_block_put(&inode->ei_pagecache_lock);
bch2_disk_reservation_put(c, &dio->op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res); bch2_quota_reservation_put(c, inode, &dio->quota_res);
if (dio->free_iov) if (dio->free_iov)
@ -1912,7 +1928,6 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
struct bch_inode_info *inode = file_bch_inode(file); struct bch_inode_info *inode = file_bch_inode(file);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
struct dio_write *dio; struct dio_write *dio;
struct bio *bio; struct bio *bio;
bool locked = true, extending; bool locked = true, extending;
@ -1962,35 +1977,14 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
dio->sync = is_sync_kiocb(req) || extending; dio->sync = is_sync_kiocb(req) || extending;
dio->free_iov = false; dio->free_iov = false;
dio->quota_res.sectors = 0; dio->quota_res.sectors = 0;
dio->written = 0;
dio->iter = *iter; dio->iter = *iter;
bch2_write_op_init(&dio->op, c, opts);
dio->op.end_io = bch2_dio_write_loop_async;
dio->op.target = opts.foreground_target;
op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
dio->op.write_point = writepoint_hashed((unsigned long) current);
dio->op.flags |= BCH_WRITE_NOPUT_RESERVATION;
if ((req->ki_flags & IOCB_DSYNC) &&
!c->opts.journal_flush_disabled)
dio->op.flags |= BCH_WRITE_FLUSH;
ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, ret = bch2_quota_reservation_add(c, inode, &dio->quota_res,
iter->count >> 9, true); iter->count >> 9, true);
if (unlikely(ret)) if (unlikely(ret))
goto err_put_bio; goto err_put_bio;
dio->op.nr_replicas = dio->op.opts.data_replicas;
ret = bch2_disk_reservation_get(c, &dio->op.res, iter->count >> 9,
dio->op.opts.data_replicas, 0);
if (unlikely(ret) &&
!bch2_check_range_allocated(c, POS(inode->v.i_ino,
req->ki_pos >> 9),
iter->count >> 9,
dio->op.opts.data_replicas))
goto err_put_bio;
if (unlikely(mapping->nrpages)) { if (unlikely(mapping->nrpages)) {
ret = write_invalidate_inode_pages_range(mapping, ret = write_invalidate_inode_pages_range(mapping,
req->ki_pos, req->ki_pos,
@ -2003,12 +1997,9 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
err: err:
if (locked) if (locked)
inode_unlock(&inode->v); inode_unlock(&inode->v);
if (ret > 0)
req->ki_pos += ret;
return ret; return ret;
err_put_bio: err_put_bio:
bch2_pagecache_block_put(&inode->ei_pagecache_lock); bch2_pagecache_block_put(&inode->ei_pagecache_lock);
bch2_disk_reservation_put(c, &dio->op.res);
bch2_quota_reservation_put(c, inode, &dio->quota_res); bch2_quota_reservation_put(c, inode, &dio->quota_res);
bio_put(bio); bio_put(bio);
inode_dio_end(&inode->v); inode_dio_end(&inode->v);

View File

@ -509,8 +509,7 @@ static void bch2_write_done(struct closure *cl)
if (!op->error && (op->flags & BCH_WRITE_FLUSH)) if (!op->error && (op->flags & BCH_WRITE_FLUSH))
op->error = bch2_journal_error(&c->journal); op->error = bch2_journal_error(&c->journal);
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION)) bch2_disk_reservation_put(c, &op->res);
bch2_disk_reservation_put(c, &op->res);
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
bch2_keylist_free(&op->insert_keys, op->inline_keys); bch2_keylist_free(&op->insert_keys, op->inline_keys);
@ -1273,8 +1272,7 @@ void bch2_write(struct closure *cl)
continue_at_nobarrier(cl, __bch2_write, NULL); continue_at_nobarrier(cl, __bch2_write, NULL);
return; return;
err: err:
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION)) bch2_disk_reservation_put(c, &op->res);
bch2_disk_reservation_put(c, &op->res);
if (op->end_io) { if (op->end_io) {
EBUG_ON(cl->parent); EBUG_ON(cl->parent);

View File

@ -34,14 +34,13 @@ enum bch_write_flags {
BCH_WRITE_PAGES_STABLE = (1 << 4), BCH_WRITE_PAGES_STABLE = (1 << 4),
BCH_WRITE_PAGES_OWNED = (1 << 5), BCH_WRITE_PAGES_OWNED = (1 << 5),
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6), BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6),
BCH_WRITE_NOPUT_RESERVATION = (1 << 7), BCH_WRITE_WROTE_DATA_INLINE = (1 << 7),
BCH_WRITE_WROTE_DATA_INLINE = (1 << 8), BCH_WRITE_FROM_INTERNAL = (1 << 8),
BCH_WRITE_FROM_INTERNAL = (1 << 9),
/* Internal: */ /* Internal: */
BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 10), BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 9),
BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 11), BCH_WRITE_SKIP_CLOSURE_PUT = (1 << 10),
BCH_WRITE_DONE = (1 << 12), BCH_WRITE_DONE = (1 << 11),
}; };
static inline u64 *op_journal_seq(struct bch_write_op *op) static inline u64 *op_journal_seq(struct bch_write_op *op)