io_uring-6.13-20241220

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmdllhgQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpiAoD/kBxUPr8PcSzjdZ8f32/FbKOn77lOaHxvzk
 lWRsvgDYC/c0a9ugQk0cYNuzy4hADkapcxOlglQw80s0BLgKCESzBYgoamw5744g
 r4nYX9D7bMv+dh0N22mli925hPmc7Ifk3Gc4Icc4rXKHtcEr20wUe4/sM0wi6zDv
 cgnUMq4PltT8XLssimpEjuoakNejUL+787QkrEWyjWUXVwypeSGka7zPIFw0ASqy
 2ykULC9/lhw0sMzdpQcRWYCxVjfXDdSTMRLePQcyzAt7DtB67QGx8dW2/rn95HbV
 XqEPBYBDcC+GWRrpTC4zlgjTw64TAUrOjddHzYtOMTCULxslREJZLbwP4FOI4WKR
 5VEGfDkN3ZFh2DxQTt/VXs447bdH/l/ZzfI0FI/Si41zVdnyi5RklPYBSemH8noT
 PnB7XUABtbbUdp6CIf9oexlME0Wc4iYOCEC9CVX0R69R5UYH6liyUOTjqHHNyNA/
 uz46UX3J1281yaOb3cf7+eZbGIaeqL2SC4DoellNswx114B0SAkP7VuGKgISjLnJ
 B8h3cFRb3kR7GPIgtbDVPN8s2glHH77Y2/vGXSzbYsMIjv/svZjAEsRV/kuMaXCp
 tDEdf1miSSinjfV6uTKtj6FFfUYD+2u5075CqM+yGI8o9Alt+G62+yoWCMctKFio
 jXU3o1atwg==
 =2vyn
 -----END PGP SIGNATURE-----

Merge tag 'io_uring-6.13-20241220' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Fix for a file ref leak for registered ring fds

 - Turn the ->timeout_lock into a raw spinlock, as it nests under the
   io-wq lock which is a raw spinlock as it's called from the scheduler
   side

 - Limit ring resizing to DEFER_TASKRUN for now. We will broaden this in
   the future, but for now, ensure that it's only feasible on rings with
   a single user

 - Add sanity check for io-wq enqueuing

* tag 'io_uring-6.13-20241220' of git://git.kernel.dk/linux:
  io_uring: check if iowq is killed before queuing
  io_uring/register: limit ring resizing to DEFER_TASKRUN
  io_uring: Fix registered ring file refcount leak
  io_uring: make ctx->timeout_lock a raw spinlock
This commit is contained in:
Linus Torvalds 2024-12-20 13:32:43 -08:00
commit 7c05bd9230
5 changed files with 36 additions and 30 deletions

View File

@ -15,10 +15,8 @@ bool io_is_uring_fops(struct file *file);
static inline void io_uring_files_cancel(void)
{
if (current->io_uring) {
io_uring_unreg_ringfd();
if (current->io_uring)
__io_uring_cancel(false);
}
}
static inline void io_uring_task_cancel(void)
{

View File

@ -345,7 +345,7 @@ struct io_ring_ctx {
/* timeouts */
struct {
spinlock_t timeout_lock;
raw_spinlock_t timeout_lock;
struct list_head timeout_list;
struct list_head ltimeout_list;
unsigned cq_last_tm_flush;

View File

@ -215,9 +215,9 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
struct io_ring_ctx *ctx = head->ctx;
/* protect against races with linked timeouts */
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
matched = io_match_linked(head);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
} else {
matched = io_match_linked(head);
}
@ -333,7 +333,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
init_waitqueue_head(&ctx->poll_wq);
spin_lock_init(&ctx->completion_lock);
spin_lock_init(&ctx->timeout_lock);
raw_spin_lock_init(&ctx->timeout_lock);
INIT_WQ_LIST(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->io_buffers_comp);
INIT_LIST_HEAD(&ctx->defer_list);
@ -498,10 +498,10 @@ static void io_prep_async_link(struct io_kiocb *req)
if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
io_for_each_link(cur, req)
io_prep_async_work(cur);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
} else {
io_for_each_link(cur, req)
io_prep_async_work(cur);
@ -514,7 +514,11 @@ static void io_queue_iowq(struct io_kiocb *req)
struct io_uring_task *tctx = req->tctx;
BUG_ON(!tctx);
BUG_ON(!tctx->io_wq);
if ((current->flags & PF_KTHREAD) || !tctx->io_wq) {
io_req_task_queue_fail(req, -ECANCELED);
return;
}
/* init ->work of the whole link before punting */
io_prep_async_link(req);
@ -3214,6 +3218,7 @@ end_wait:
void __io_uring_cancel(bool cancel_all)
{
io_uring_unreg_ringfd();
io_uring_cancel_generic(cancel_all, NULL);
}

View File

@ -414,6 +414,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER &&
current != ctx->submitter_task)
return -EEXIST;
/* limited to DEFER_TASKRUN for now */
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
return -EINVAL;
if (copy_from_user(&p, arg, sizeof(p)))
return -EFAULT;
if (p.flags & ~RESIZE_FLAGS)

View File

@ -74,10 +74,10 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
if (!io_timeout_finish(timeout, data)) {
if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
/* re-arm timer */
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
list_add(&timeout->list, ctx->timeout_list.prev);
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
return;
}
}
@ -109,7 +109,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
u32 seq;
struct io_timeout *timeout, *tmp;
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
@ -134,7 +134,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
io_kill_timeout(req, 0);
}
ctx->cq_last_tm_flush = seq;
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
}
static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
@ -200,9 +200,9 @@ void io_disarm_next(struct io_kiocb *req)
} else if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
link = io_disarm_linked_timeout(req);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
if (link)
io_req_queue_tw_complete(link, -ECANCELED);
}
@ -238,11 +238,11 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->timeout_lock, flags);
raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
list_del_init(&timeout->list);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
spin_unlock_irqrestore(&ctx->timeout_lock, flags);
raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
req_set_fail(req);
@ -285,9 +285,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
{
struct io_kiocb *req;
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
req = io_timeout_extract(ctx, cd);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
if (IS_ERR(req))
return PTR_ERR(req);
@ -330,7 +330,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->timeout_lock, flags);
raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
prev = timeout->head;
timeout->head = NULL;
@ -345,7 +345,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
}
list_del(&timeout->list);
timeout->prev = prev;
spin_unlock_irqrestore(&ctx->timeout_lock, flags);
raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
req->io_task_work.func = io_req_task_link_timeout;
io_req_task_work_add(req);
@ -472,12 +472,12 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
} else {
enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
if (tr->ltimeout)
ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
else
ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
}
if (ret < 0)
@ -572,7 +572,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
struct list_head *entry;
u32 tail, off = timeout->off;
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
/*
* sqe->off holds how many events that need to occur for this
@ -611,7 +611,7 @@ add:
list_add(&timeout->list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
return IOU_ISSUE_SKIP_COMPLETE;
}
@ -620,7 +620,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_ring_ctx *ctx = req->ctx;
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
/*
* If the back reference is NULL, then our linked request finished
* before we got a chance to setup the timer
@ -633,7 +633,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
data->mode);
list_add_tail(&timeout->list, &ctx->ltimeout_list);
}
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
/* drop submission reference */
io_put_req(req);
}
@ -668,7 +668,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
* timeout_lockfirst to keep locking ordering.
*/
spin_lock(&ctx->completion_lock);
spin_lock_irq(&ctx->timeout_lock);
raw_spin_lock_irq(&ctx->timeout_lock);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
struct io_kiocb *req = cmd_to_io_kiocb(timeout);
@ -676,7 +676,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
io_kill_timeout(req, -ECANCELED))
canceled++;
}
spin_unlock_irq(&ctx->timeout_lock);
raw_spin_unlock_irq(&ctx->timeout_lock);
spin_unlock(&ctx->completion_lock);
return canceled != 0;
}