io_uring: improve poll completion performance

For busy IORING_OP_POLL_ADD workloads, we can have enough contention
on the completion lock that we fail the inline completion path quite
often as we fail the trylock on that lock. Add a list for deferred
completions that we can use in that case. This helps reduce the number
of async offloads we have to do, as if we get multiple completions in
a row, we'll piggy back on to the poll_llist instead of having to queue
our own offload.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2019-12-19 12:06:02 -07:00
parent ad3eb2c89f
commit e94f141bd2

View File

@ -286,7 +286,8 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
bool poll_multi_file;
struct llist_head poll_llist;
/*
* ->poll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
@ -296,6 +297,7 @@ struct io_ring_ctx {
struct list_head poll_list;
struct hlist_head *cancel_hash;
unsigned cancel_hash_bits;
bool poll_multi_file;
spinlock_t inflight_lock;
struct list_head inflight_list;
@ -453,7 +455,14 @@ struct io_kiocb {
};
struct io_async_ctx *io;
union {
/*
* ring_file is only used in the submission path, and
* llist_node is only used for poll deferred completions
*/
struct file *ring_file;
struct llist_node llist_node;
};
int ring_fd;
bool has_user;
bool in_async;
@ -725,6 +734,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
init_llist_head(&ctx->poll_llist);
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
@ -1320,6 +1330,20 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
}
static inline bool io_req_multi_free(struct io_kiocb *req)
{
/*
* If we're not using fixed files, we have to pair the completion part
* with the file put. Use regular completions for those, only batch
* free for fixed file and non-linked commands.
*/
if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == REQ_F_FIXED_FILE)
&& !io_is_fallback_req(req) && !req->io)
return true;
return false;
}
/*
* Find and free completed poll iocbs
*/
@ -1339,14 +1363,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
(*nr_events)++;
if (refcount_dec_and_test(&req->refs)) {
/* If we're not using fixed files, we have to pair the
* completion part with the file put. Use regular
* completions for those, only batch free for fixed
* file and non-linked commands.
*/
if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
REQ_F_FIXED_FILE) && !io_is_fallback_req(req) &&
!req->io) {
if (io_req_multi_free(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
@ -3081,6 +3098,44 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
io_wq_assign_next(workptr, nxt);
}
static void __io_poll_flush(struct io_ring_ctx *ctx, struct llist_node *nodes)
{
void *reqs[IO_IOPOLL_BATCH];
struct io_kiocb *req, *tmp;
int to_free = 0;
spin_lock_irq(&ctx->completion_lock);
llist_for_each_entry_safe(req, tmp, nodes, llist_node) {
hash_del(&req->hash_node);
io_poll_complete(req, req->result, 0);
if (refcount_dec_and_test(&req->refs)) {
if (io_req_multi_free(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
} else {
req->flags |= REQ_F_COMP_LOCKED;
io_free_req(req);
}
}
}
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
io_free_req_many(ctx, reqs, &to_free);
}
static void io_poll_flush(struct io_wq_work **workptr)
{
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
struct llist_node *nodes;
nodes = llist_del_all(&req->ctx->poll_llist);
if (nodes)
__io_poll_flush(req->ctx, nodes);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
{
@ -3088,7 +3143,6 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
struct io_ring_ctx *ctx = req->ctx;
__poll_t mask = key_to_poll(key);
unsigned long flags;
/* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events))
@ -3102,7 +3156,11 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
* If we have a link timeout we're going to need the completion_lock
* for finalizing the request, mark us as having grabbed that already.
*/
if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
if (mask) {
unsigned long flags;
if (llist_empty(&ctx->poll_llist) &&
spin_trylock_irqsave(&ctx->completion_lock, flags)) {
hash_del(&req->hash_node);
io_poll_complete(req, mask, 0);
req->flags |= REQ_F_COMP_LOCKED;
@ -3110,9 +3168,19 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
req = NULL;
} else {
io_queue_async_work(req);
req->result = mask;
req->llist_node.next = NULL;
/* if the list wasn't empty, we're done */
if (!llist_add(&req->llist_node, &ctx->poll_llist))
req = NULL;
else
req->work.func = io_poll_flush;
}
}
if (req)
io_queue_async_work(req);
return 1;
}