io_uring/poll: get rid of unlocked cancel hash

io_uring maintains two hash lists of inflight requests:

1) ctx->cancel_table_locked. This is used when the caller has the
   ctx->uring_lock held already. This is only an issue side parameter,
   as removal or task_work will always have it held.

2) ctx->cancel_table. This is used when the issuer does NOT have the
   ctx->uring_lock held, and relies on the table spinlocks for access.

However, it's pretty trivial to simply grab the lock in the one spot
where we care about it, for insertion. With that, we can kill the
unlocked table (and get rid of the _locked postfix for the other one).

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2024-09-30 14:22:36 -06:00
parent 829ab73e7b
commit 085268829b
4 changed files with 36 additions and 127 deletions

View File

@ -291,7 +291,7 @@ struct io_ring_ctx {
struct xarray io_bl_xa; struct xarray io_bl_xa;
struct io_hash_table cancel_table_locked; struct io_hash_table cancel_table;
struct io_alloc_cache apoll_cache; struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache; struct io_alloc_cache netmsg_cache;
struct io_alloc_cache rw_cache; struct io_alloc_cache rw_cache;
@ -342,7 +342,6 @@ struct io_ring_ctx {
struct list_head io_buffers_comp; struct list_head io_buffers_comp;
struct list_head cq_overflow_list; struct list_head cq_overflow_list;
struct io_hash_table cancel_table;
struct hlist_head waitid_list; struct hlist_head waitid_list;
@ -459,7 +458,6 @@ enum {
REQ_F_DOUBLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT,
REQ_F_APOLL_MULTISHOT_BIT, REQ_F_APOLL_MULTISHOT_BIT,
REQ_F_CLEAR_POLLIN_BIT, REQ_F_CLEAR_POLLIN_BIT,
REQ_F_HASH_LOCKED_BIT,
/* keep async read/write and isreg together and in order */ /* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT, REQ_F_ISREG_BIT,
@ -534,8 +532,6 @@ enum {
REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
/* recvmsg special flag, clear EPOLLIN */ /* recvmsg special flag, clear EPOLLIN */
REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
/* don't use lazy poll wake for this request */ /* don't use lazy poll wake for this request */
REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
/* file is pollable */ /* file is pollable */

View File

@ -190,22 +190,13 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
} }
seq_puts(m, "PollList:\n"); seq_puts(m, "PollList:\n");
for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) {
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
struct io_hash_bucket *hbl = &ctx->cancel_table_locked.hbs[i];
struct io_kiocb *req; struct io_kiocb *req;
spin_lock(&hb->lock);
hlist_for_each_entry(req, &hb->list, hash_node) hlist_for_each_entry(req, &hb->list, hash_node)
seq_printf(m, " op=%d, task_works=%d\n", req->opcode, seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
task_work_pending(req->task)); task_work_pending(req->task));
spin_unlock(&hb->lock);
if (!has_lock)
continue;
hlist_for_each_entry(req, &hbl->list, hash_node)
seq_printf(m, " op=%d, task_works=%d\n", req->opcode,
task_work_pending(req->task));
} }
if (has_lock) if (has_lock)

View File

@ -294,8 +294,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
hash_bits = clamp(hash_bits, 1, 8); hash_bits = clamp(hash_bits, 1, 8);
if (io_alloc_hash_table(&ctx->cancel_table, hash_bits)) if (io_alloc_hash_table(&ctx->cancel_table, hash_bits))
goto err; goto err;
if (io_alloc_hash_table(&ctx->cancel_table_locked, hash_bits))
goto err;
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
0, GFP_KERNEL)) 0, GFP_KERNEL))
goto err; goto err;
@ -361,7 +359,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free); io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
io_futex_cache_free(ctx); io_futex_cache_free(ctx);
kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
xa_destroy(&ctx->io_bl_xa); xa_destroy(&ctx->io_bl_xa);
kfree(ctx); kfree(ctx);
return NULL; return NULL;
@ -2774,7 +2771,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_wq_put_hash(ctx->hash_map); io_wq_put_hash(ctx->hash_map);
io_napi_free(ctx); io_napi_free(ctx);
kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
xa_destroy(&ctx->io_bl_xa); xa_destroy(&ctx->io_bl_xa);
kfree(ctx); kfree(ctx);
} }

View File

@ -122,28 +122,6 @@ static void io_poll_req_insert(struct io_kiocb *req)
{ {
struct io_hash_table *table = &req->ctx->cancel_table; struct io_hash_table *table = &req->ctx->cancel_table;
u32 index = hash_long(req->cqe.user_data, table->hash_bits); u32 index = hash_long(req->cqe.user_data, table->hash_bits);
struct io_hash_bucket *hb = &table->hbs[index];
spin_lock(&hb->lock);
hlist_add_head(&req->hash_node, &hb->list);
spin_unlock(&hb->lock);
}
static void io_poll_req_delete(struct io_kiocb *req)
{
struct io_hash_table *table = &req->ctx->cancel_table;
u32 index = hash_long(req->cqe.user_data, table->hash_bits);
spinlock_t *lock = &table->hbs[index].lock;
spin_lock(lock);
hash_del(&req->hash_node);
spin_unlock(lock);
}
static void io_poll_req_insert_locked(struct io_kiocb *req)
{
struct io_hash_table *table = &req->ctx->cancel_table_locked;
u32 index = hash_long(req->cqe.user_data, table->hash_bits);
lockdep_assert_held(&req->ctx->uring_lock); lockdep_assert_held(&req->ctx->uring_lock);
@ -154,19 +132,14 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, struct io_tw_state *ts)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
if (req->flags & REQ_F_HASH_LOCKED) { /*
/* * ->cancel_table_locked is protected by ->uring_lock in
* ->cancel_table_locked is protected by ->uring_lock in * contrast to per bucket spinlocks. Likely, tctx_task_work()
* contrast to per bucket spinlocks. Likely, tctx_task_work() * already grabbed the mutex for us, but there is a chance it
* already grabbed the mutex for us, but there is a chance it * failed.
* failed. */
*/ io_tw_lock(ctx, ts);
io_tw_lock(ctx, ts); hash_del(&req->hash_node);
hash_del(&req->hash_node);
req->flags &= ~REQ_F_HASH_LOCKED;
} else {
io_poll_req_delete(req);
}
} }
static void io_init_poll_iocb(struct io_poll *poll, __poll_t events) static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
@ -563,12 +536,13 @@ static bool io_poll_can_finish_inline(struct io_kiocb *req,
return pt->owning || io_poll_get_ownership(req); return pt->owning || io_poll_get_ownership(req);
} }
static void io_poll_add_hash(struct io_kiocb *req) static void io_poll_add_hash(struct io_kiocb *req, unsigned int issue_flags)
{ {
if (req->flags & REQ_F_HASH_LOCKED) struct io_ring_ctx *ctx = req->ctx;
io_poll_req_insert_locked(req);
else io_ring_submit_lock(ctx, issue_flags);
io_poll_req_insert(req); io_poll_req_insert(req);
io_ring_submit_unlock(ctx, issue_flags);
} }
/* /*
@ -605,11 +579,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
ipt->owning = issue_flags & IO_URING_F_UNLOCKED; ipt->owning = issue_flags & IO_URING_F_UNLOCKED;
atomic_set(&req->poll_refs, (int)ipt->owning); atomic_set(&req->poll_refs, (int)ipt->owning);
/* io-wq doesn't hold uring_lock */
if (issue_flags & IO_URING_F_UNLOCKED)
req->flags &= ~REQ_F_HASH_LOCKED;
/* /*
* Exclusive waits may only wake a limited amount of entries * Exclusive waits may only wake a limited amount of entries
* rather than all of them, this may interfere with lazy * rather than all of them, this may interfere with lazy
@ -638,7 +607,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
if (mask && if (mask &&
((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
if (!io_poll_can_finish_inline(req, ipt)) { if (!io_poll_can_finish_inline(req, ipt)) {
io_poll_add_hash(req); io_poll_add_hash(req, issue_flags);
return 0; return 0;
} }
io_poll_remove_entries(req); io_poll_remove_entries(req);
@ -647,7 +616,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
return 1; return 1;
} }
io_poll_add_hash(req); io_poll_add_hash(req, issue_flags);
if (mask && (poll->events & EPOLLET) && if (mask && (poll->events & EPOLLET) &&
io_poll_can_finish_inline(req, ipt)) { io_poll_can_finish_inline(req, ipt)) {
@ -720,12 +689,6 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
__poll_t mask = POLLPRI | POLLERR | EPOLLET; __poll_t mask = POLLPRI | POLLERR | EPOLLET;
int ret; int ret;
/*
* apoll requests already grab the mutex to complete in the tw handler,
* so removal from the mutex-backed hash is free, use it by default.
*/
req->flags |= REQ_F_HASH_LOCKED;
if (!def->pollin && !def->pollout) if (!def->pollin && !def->pollout)
return IO_APOLL_ABORTED; return IO_APOLL_ABORTED;
if (!io_file_can_poll(req)) if (!io_file_can_poll(req))
@ -761,18 +724,22 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
return IO_APOLL_OK; return IO_APOLL_OK;
} }
static __cold bool io_poll_remove_all_table(struct task_struct *tsk, /*
struct io_hash_table *table, * Returns true if we found and killed one or more poll requests
bool cancel_all) */
__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
bool cancel_all)
{ {
unsigned nr_buckets = 1U << table->hash_bits; unsigned nr_buckets = 1U << ctx->cancel_table.hash_bits;
struct hlist_node *tmp; struct hlist_node *tmp;
struct io_kiocb *req; struct io_kiocb *req;
bool found = false; bool found = false;
int i; int i;
lockdep_assert_held(&ctx->uring_lock);
for (i = 0; i < nr_buckets; i++) { for (i = 0; i < nr_buckets; i++) {
struct io_hash_bucket *hb = &table->hbs[i]; struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
spin_lock(&hb->lock); spin_lock(&hb->lock);
hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) {
@ -787,28 +754,13 @@ static __cold bool io_poll_remove_all_table(struct task_struct *tsk,
return found; return found;
} }
/*
* Returns true if we found and killed one or more poll requests
*/
__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
bool cancel_all)
__must_hold(&ctx->uring_lock)
{
bool ret;
ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all);
ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all);
return ret;
}
static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
struct io_cancel_data *cd, struct io_cancel_data *cd,
struct io_hash_table *table,
struct io_hash_bucket **out_bucket) struct io_hash_bucket **out_bucket)
{ {
struct io_kiocb *req; struct io_kiocb *req;
u32 index = hash_long(cd->data, table->hash_bits); u32 index = hash_long(cd->data, ctx->cancel_table.hash_bits);
struct io_hash_bucket *hb = &table->hbs[index]; struct io_hash_bucket *hb = &ctx->cancel_table.hbs[index];
*out_bucket = NULL; *out_bucket = NULL;
@ -831,17 +783,16 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only,
static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx,
struct io_cancel_data *cd, struct io_cancel_data *cd,
struct io_hash_table *table,
struct io_hash_bucket **out_bucket) struct io_hash_bucket **out_bucket)
{ {
unsigned nr_buckets = 1U << table->hash_bits; unsigned nr_buckets = 1U << ctx->cancel_table.hash_bits;
struct io_kiocb *req; struct io_kiocb *req;
int i; int i;
*out_bucket = NULL; *out_bucket = NULL;
for (i = 0; i < nr_buckets; i++) { for (i = 0; i < nr_buckets; i++) {
struct io_hash_bucket *hb = &table->hbs[i]; struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
spin_lock(&hb->lock); spin_lock(&hb->lock);
hlist_for_each_entry(req, &hb->list, hash_node) { hlist_for_each_entry(req, &hb->list, hash_node) {
@ -866,17 +817,16 @@ static int io_poll_disarm(struct io_kiocb *req)
return 0; return 0;
} }
static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, static int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
struct io_hash_table *table)
{ {
struct io_hash_bucket *bucket; struct io_hash_bucket *bucket;
struct io_kiocb *req; struct io_kiocb *req;
if (cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP | if (cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP |
IORING_ASYNC_CANCEL_ANY)) IORING_ASYNC_CANCEL_ANY))
req = io_poll_file_find(ctx, cd, table, &bucket); req = io_poll_file_find(ctx, cd, &bucket);
else else
req = io_poll_find(ctx, false, cd, table, &bucket); req = io_poll_find(ctx, false, cd, &bucket);
if (req) if (req)
io_poll_cancel_req(req); io_poll_cancel_req(req);
@ -890,12 +840,8 @@ int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
{ {
int ret; int ret;
ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table);
if (ret != -ENOENT)
return ret;
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); ret = __io_poll_cancel(ctx, cd);
io_ring_submit_unlock(ctx, issue_flags); io_ring_submit_unlock(ctx, issue_flags);
return ret; return ret;
} }
@ -972,13 +918,6 @@ int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
ipt.pt._qproc = io_poll_queue_proc; ipt.pt._qproc = io_poll_queue_proc;
/*
* If sqpoll or single issuer, there is no contention for ->uring_lock
* and we'll end up holding it in tw handlers anyway.
*/
if (req->ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_SINGLE_ISSUER))
req->flags |= REQ_F_HASH_LOCKED;
ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags);
if (ret > 0) { if (ret > 0) {
io_req_set_res(req, ipt.result_mask, 0); io_req_set_res(req, ipt.result_mask, 0);
@ -997,18 +936,7 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
int ret2, ret = 0; int ret2, ret = 0;
io_ring_submit_lock(ctx, issue_flags); io_ring_submit_lock(ctx, issue_flags);
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); preq = io_poll_find(ctx, true, &cd, &bucket);
ret2 = io_poll_disarm(preq);
if (bucket)
spin_unlock(&bucket->lock);
if (!ret2)
goto found;
if (ret2 != -ENOENT) {
ret = ret2;
goto out;
}
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket);
ret2 = io_poll_disarm(preq); ret2 = io_poll_disarm(preq);
if (bucket) if (bucket)
spin_unlock(&bucket->lock); spin_unlock(&bucket->lock);
@ -1016,8 +944,6 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
ret = ret2; ret = ret2;
goto out; goto out;
} }
found:
if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;