mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
io_uring: limit parallelism of buffered writes
All the popular filesystems need to grab the inode lock for buffered writes. With io_uring punting buffered writes to async context, we observe a lot of contention with all workers hamming this mutex. For buffered writes, we generally don't need a lot of parallelism on the submission side, as the flushing will take care of that for us. Hence we don't need a deep queue on the write side, as long as we can safely punt from the original submission context. Add a workqueue with a limit of 2 that we can use for buffered writes. This greatly improves the performance and efficiency of higher queue depth buffered async writes with io_uring. Reported-by: Andres Freund <andres@anarazel.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
18d9be1a97
commit
54a91f3bb9
@ -203,7 +203,7 @@ struct io_ring_ctx {
|
|||||||
} ____cacheline_aligned_in_smp;
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
/* IO offload */
|
/* IO offload */
|
||||||
struct workqueue_struct *sqo_wq;
|
struct workqueue_struct *sqo_wq[2];
|
||||||
struct task_struct *sqo_thread; /* if using sq thread polling */
|
struct task_struct *sqo_thread; /* if using sq thread polling */
|
||||||
struct mm_struct *sqo_mm;
|
struct mm_struct *sqo_mm;
|
||||||
wait_queue_head_t sqo_wait;
|
wait_queue_head_t sqo_wait;
|
||||||
@ -446,7 +446,19 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
|
|||||||
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
|
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
|
||||||
struct io_kiocb *req)
|
struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
queue_work(ctx->sqo_wq, &req->work);
|
int rw;
|
||||||
|
|
||||||
|
switch (req->submit.sqe->opcode) {
|
||||||
|
case IORING_OP_WRITEV:
|
||||||
|
case IORING_OP_WRITE_FIXED:
|
||||||
|
rw = !(req->rw.ki_flags & IOCB_DIRECT);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rw = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
queue_work(ctx->sqo_wq[rw], &req->work);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_commit_cqring(struct io_ring_ctx *ctx)
|
static void io_commit_cqring(struct io_ring_ctx *ctx)
|
||||||
@ -2634,11 +2646,15 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
|
|||||||
|
|
||||||
static void io_finish_async(struct io_ring_ctx *ctx)
|
static void io_finish_async(struct io_ring_ctx *ctx)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
io_sq_thread_stop(ctx);
|
io_sq_thread_stop(ctx);
|
||||||
|
|
||||||
if (ctx->sqo_wq) {
|
for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) {
|
||||||
destroy_workqueue(ctx->sqo_wq);
|
if (ctx->sqo_wq[i]) {
|
||||||
ctx->sqo_wq = NULL;
|
destroy_workqueue(ctx->sqo_wq[i]);
|
||||||
|
ctx->sqo_wq[i] = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2846,16 +2862,31 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Do QD, or 2 * CPUS, whatever is smallest */
|
/* Do QD, or 2 * CPUS, whatever is smallest */
|
||||||
ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE,
|
ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
|
||||||
|
WQ_UNBOUND | WQ_FREEZABLE,
|
||||||
min(ctx->sq_entries - 1, 2 * num_online_cpus()));
|
min(ctx->sq_entries - 1, 2 * num_online_cpus()));
|
||||||
if (!ctx->sqo_wq) {
|
if (!ctx->sqo_wq[0]) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is for buffered writes, where we want to limit the parallelism
|
||||||
|
* due to file locking in file systems. As "normal" buffered writes
|
||||||
|
* should parellelize on writeout quite nicely, limit us to having 2
|
||||||
|
* pending. This avoids massive contention on the inode when doing
|
||||||
|
* buffered async writes.
|
||||||
|
*/
|
||||||
|
ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
|
||||||
|
WQ_UNBOUND | WQ_FREEZABLE, 2);
|
||||||
|
if (!ctx->sqo_wq[1]) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
err:
|
err:
|
||||||
io_sq_thread_stop(ctx);
|
io_finish_async(ctx);
|
||||||
mmdrop(ctx->sqo_mm);
|
mmdrop(ctx->sqo_mm);
|
||||||
ctx->sqo_mm = NULL;
|
ctx->sqo_mm = NULL;
|
||||||
return ret;
|
return ret;
|
||||||
|
Loading…
Reference in New Issue
Block a user