From c83c846231db8b153bfcb44d552d373c34f78245 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 4 Jan 2025 18:29:02 +0000 Subject: [PATCH 1/5] io_uring/timeout: fix multishot updates After update only the first shot of a multishot timeout request adheres to the new timeout value while all subsequent retries continue to use the old value. Don't forget to update the timeout stored in struct io_timeout_data. Cc: stable@vger.kernel.org Fixes: ea97f6c8558e8 ("io_uring: add support for multishot timeouts") Reported-by: Christian Mazakas Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e6516c3304eb654ec234cfa65c88a9579861e597.1736015288.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/timeout.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 362689b17ccc..e9cec9e4dc2f 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -427,10 +427,12 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, timeout->off = 0; /* noseq */ data = req->async_data; + data->ts = *ts; + list_add_tail(&timeout->list, &ctx->timeout_list); hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); data->timer.function = io_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode); return 0; } From 60495b08cf7a6920035c5172a22655ca2001270b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 7 Jan 2025 14:11:32 +0000 Subject: [PATCH 2/5] io_uring: silence false positive warnings If we kill a ring and then immediately exit the task, we'll get cancellattion running by the task and a kthread in io_ring_exit_work. For DEFER_TASKRUN, we do want to limit it to only one entity executing it, however it's currently not an issue as it's protected by uring_lock. Silence lockdep assertions for now, we'll return to it later. Reported-by: syzbot+1bcb75613069ad4957fc@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7e5f68281acb0f081f65fde435833c68a3b7e02f.1736257837.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 12abee607e4a..492cbbf2c23b 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -125,6 +125,9 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) #if defined(CONFIG_PROVE_LOCKING) lockdep_assert(in_task()); + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) + lockdep_assert_held(&ctx->uring_lock); + if (ctx->flags & IORING_SETUP_IOPOLL) { lockdep_assert_held(&ctx->uring_lock); } else if (!ctx->task_complete) { @@ -136,9 +139,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) * Not from an SQE, as those cannot be submitted, but via * updating tagged resources. */ - if (percpu_ref_is_dying(&ctx->refs)) - lockdep_assert(current_work()); - else + if (!percpu_ref_is_dying(&ctx->refs)) lockdep_assert(current == ctx->submitter_task); } #endif From c9a40292a44e78f71258b8522655bffaf5753bdb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Jan 2025 10:28:05 -0700 Subject: [PATCH 3/5] io_uring/eventfd: ensure io_eventfd_signal() defers another RCU period io_eventfd_do_signal() is invoked from an RCU callback, but when dropping the reference to the io_ev_fd, it calls io_eventfd_free() directly if the refcount drops to zero. This isn't correct, as any potential freeing of the io_ev_fd should be deferred another RCU grace period. Just call io_eventfd_put() rather than open-code the dec-and-test and free, which will correctly defer it another RCU grace period. Fixes: 21a091b970cd ("io_uring: signal registered eventfd to process deferred task work") Reported-by: Jann Horn Cc: stable@vger.kernel.org Tested-by: Li Zetao Reviewed-by: Li Zetao Reviewed-by: Prasanna Kumar T S M Signed-off-by: Jens Axboe --- io_uring/eventfd.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index fab936d31ba8..100d5da94cb9 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c @@ -33,20 +33,18 @@ static void io_eventfd_free(struct rcu_head *rcu) kfree(ev_fd); } +static void io_eventfd_put(struct io_ev_fd *ev_fd) +{ + if (refcount_dec_and_test(&ev_fd->refs)) + call_rcu(&ev_fd->rcu, io_eventfd_free); +} + static void io_eventfd_do_signal(struct rcu_head *rcu) { struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); - - if (refcount_dec_and_test(&ev_fd->refs)) - io_eventfd_free(rcu); -} - -static void io_eventfd_put(struct io_ev_fd *ev_fd) -{ - if (refcount_dec_and_test(&ev_fd->refs)) - call_rcu(&ev_fd->rcu, io_eventfd_free); + io_eventfd_put(ev_fd); } static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref) From 4b7cfa8b6c28a9fa22b86894166a1a34f6d630ba Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 10 Jan 2025 14:31:23 +0000 Subject: [PATCH 4/5] io_uring/sqpoll: zero sqd->thread on tctx errors Syzkeller reports: BUG: KASAN: slab-use-after-free in thread_group_cputime+0x409/0x700 kernel/sched/cputime.c:341 Read of size 8 at addr ffff88803578c510 by task syz.2.3223/27552 Call Trace: ... kasan_report+0x143/0x180 mm/kasan/report.c:602 thread_group_cputime+0x409/0x700 kernel/sched/cputime.c:341 thread_group_cputime_adjusted+0xa6/0x340 kernel/sched/cputime.c:639 getrusage+0x1000/0x1340 kernel/sys.c:1863 io_uring_show_fdinfo+0xdfe/0x1770 io_uring/fdinfo.c:197 seq_show+0x608/0x770 fs/proc/fd.c:68 ... That's due to sqd->task not being cleared properly in cases where SQPOLL task tctx setup fails, which can essentially only happen with fault injection to insert allocation errors. Cc: stable@vger.kernel.org Fixes: 1251d2025c3e1 ("io_uring/sqpoll: early exit thread if task_context wasn't allocated") Reported-by: syzbot+3d92cfcfa84070b0a470@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/efc7ec7010784463b2e7466d7b5c02c2cb381635.1736519461.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 9e5bd79fd2b5..8961a3c1e73c 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -268,8 +268,12 @@ static int io_sq_thread(void *data) DEFINE_WAIT(wait); /* offload context creation failed, just exit */ - if (!current->io_uring) + if (!current->io_uring) { + mutex_lock(&sqd->lock); + sqd->thread = NULL; + mutex_unlock(&sqd->lock); goto err_out; + } snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); From bd2703b42decebdcddf76e277ba76b4c4a142d73 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 10 Jan 2025 20:36:45 +0000 Subject: [PATCH 5/5] io_uring: don't touch sqd->thread off tw add With IORING_SETUP_SQPOLL all requests are created by the SQPOLL task, which means that req->task should always match sqd->thread. Since accesses to sqd->thread should be separately protected, use req->task in io_req_normal_work_add() instead. Note, in the eyes of io_req_normal_work_add(), the SQPOLL task struct is always pinned and alive, and sqd->thread can either be the task or NULL. It's only problematic if the compiler decides to reload the value after the null check, which is not so likely. Cc: stable@vger.kernel.org Cc: Bui Quang Minh Reported-by: lizetao Fixes: 78f9b61bd8e54 ("io_uring: wake SQPOLL task when task_work is added to an empty queue") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1cbbe72cf32c45a8fee96026463024cd8564a7d7.1736541357.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d3403c8216db..5eb119002099 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1226,10 +1226,7 @@ static void io_req_normal_work_add(struct io_kiocb *req) /* SQPOLL doesn't need the task_work added, it'll run it itself */ if (ctx->flags & IORING_SETUP_SQPOLL) { - struct io_sq_data *sqd = ctx->sq_data; - - if (sqd->thread) - __set_notify_signal(sqd->thread); + __set_notify_signal(tctx->task); return; }