From c83c846231db8b153bfcb44d552d373c34f78245 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 4 Jan 2025 18:29:02 +0000
Subject: [PATCH 1/5] io_uring/timeout: fix multishot updates

After update only the first shot of a multishot timeout request adheres
to the new timeout value while all subsequent retries continue to use
the old value. Don't forget to update the timeout stored in struct
io_timeout_data.

Cc: stable@vger.kernel.org
Fixes: ea97f6c8558e8 ("io_uring: add support for multishot timeouts")
Reported-by: Christian Mazakas <christian.mazakas@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e6516c3304eb654ec234cfa65c88a9579861e597.1736015288.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 362689b17ccc..e9cec9e4dc2f 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -427,10 +427,12 @@ static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
 
 	timeout->off = 0; /* noseq */
 	data = req->async_data;
+	data->ts = *ts;
+
 	list_add_tail(&timeout->list, &ctx->timeout_list);
 	hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
 	data->timer.function = io_timeout_fn;
-	hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
+	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), mode);
 	return 0;
 }
 

From 60495b08cf7a6920035c5172a22655ca2001270b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 7 Jan 2025 14:11:32 +0000
Subject: [PATCH 2/5] io_uring: silence false positive warnings

If we kill a ring and then immediately exit the task, we'll get
cancellattion running by the task and a kthread in io_ring_exit_work.
For DEFER_TASKRUN, we do want to limit it to only one entity executing
it, however it's currently not an issue as it's protected by uring_lock.

Silence lockdep assertions for now, we'll return to it later.

Reported-by: syzbot+1bcb75613069ad4957fc@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7e5f68281acb0f081f65fde435833c68a3b7e02f.1736257837.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 12abee607e4a..492cbbf2c23b 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -125,6 +125,9 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 #if defined(CONFIG_PROVE_LOCKING)
 	lockdep_assert(in_task());
 
+	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+		lockdep_assert_held(&ctx->uring_lock);
+
 	if (ctx->flags & IORING_SETUP_IOPOLL) {
 		lockdep_assert_held(&ctx->uring_lock);
 	} else if (!ctx->task_complete) {
@@ -136,9 +139,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 		 * Not from an SQE, as those cannot be submitted, but via
 		 * updating tagged resources.
 		 */
-		if (percpu_ref_is_dying(&ctx->refs))
-			lockdep_assert(current_work());
-		else
+		if (!percpu_ref_is_dying(&ctx->refs))
 			lockdep_assert(current == ctx->submitter_task);
 	}
 #endif

From c9a40292a44e78f71258b8522655bffaf5753bdb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 8 Jan 2025 10:28:05 -0700
Subject: [PATCH 3/5] io_uring/eventfd: ensure io_eventfd_signal() defers
 another RCU period

io_eventfd_do_signal() is invoked from an RCU callback, but when
dropping the reference to the io_ev_fd, it calls io_eventfd_free()
directly if the refcount drops to zero. This isn't correct, as any
potential freeing of the io_ev_fd should be deferred another RCU grace
period.

Just call io_eventfd_put() rather than open-code the dec-and-test and
free, which will correctly defer it another RCU grace period.

Fixes: 21a091b970cd ("io_uring: signal registered eventfd to process deferred task work")
Reported-by: Jann Horn <jannh@google.com>
Cc: stable@vger.kernel.org
Tested-by: Li Zetao <lizetao1@huawei.com>
Reviewed-by: Li Zetao<lizetao1@huawei.com>
Reviewed-by: Prasanna Kumar T S M <ptsm@linux.microsoft.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index fab936d31ba8..100d5da94cb9 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -33,20 +33,18 @@ static void io_eventfd_free(struct rcu_head *rcu)
 	kfree(ev_fd);
 }
 
+static void io_eventfd_put(struct io_ev_fd *ev_fd)
+{
+	if (refcount_dec_and_test(&ev_fd->refs))
+		call_rcu(&ev_fd->rcu, io_eventfd_free);
+}
+
 static void io_eventfd_do_signal(struct rcu_head *rcu)
 {
 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
 
 	eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
-
-	if (refcount_dec_and_test(&ev_fd->refs))
-		io_eventfd_free(rcu);
-}
-
-static void io_eventfd_put(struct io_ev_fd *ev_fd)
-{
-	if (refcount_dec_and_test(&ev_fd->refs))
-		call_rcu(&ev_fd->rcu, io_eventfd_free);
+	io_eventfd_put(ev_fd);
 }
 
 static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)

From 4b7cfa8b6c28a9fa22b86894166a1a34f6d630ba Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 10 Jan 2025 14:31:23 +0000
Subject: [PATCH 4/5] io_uring/sqpoll: zero sqd->thread on tctx errors

Syzkeller reports:

BUG: KASAN: slab-use-after-free in thread_group_cputime+0x409/0x700 kernel/sched/cputime.c:341
Read of size 8 at addr ffff88803578c510 by task syz.2.3223/27552
 Call Trace:
  <TASK>
  ...
  kasan_report+0x143/0x180 mm/kasan/report.c:602
  thread_group_cputime+0x409/0x700 kernel/sched/cputime.c:341
  thread_group_cputime_adjusted+0xa6/0x340 kernel/sched/cputime.c:639
  getrusage+0x1000/0x1340 kernel/sys.c:1863
  io_uring_show_fdinfo+0xdfe/0x1770 io_uring/fdinfo.c:197
  seq_show+0x608/0x770 fs/proc/fd.c:68
  ...

That's due to sqd->task not being cleared properly in cases where
SQPOLL task tctx setup fails, which can essentially only happen with
fault injection to insert allocation errors.

Cc: stable@vger.kernel.org
Fixes: 1251d2025c3e1 ("io_uring/sqpoll: early exit thread if task_context wasn't allocated")
Reported-by: syzbot+3d92cfcfa84070b0a470@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/efc7ec7010784463b2e7466d7b5c02c2cb381635.1736519461.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/sqpoll.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 9e5bd79fd2b5..8961a3c1e73c 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -268,8 +268,12 @@ static int io_sq_thread(void *data)
 	DEFINE_WAIT(wait);
 
 	/* offload context creation failed, just exit */
-	if (!current->io_uring)
+	if (!current->io_uring) {
+		mutex_lock(&sqd->lock);
+		sqd->thread = NULL;
+		mutex_unlock(&sqd->lock);
 		goto err_out;
+	}
 
 	snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
 	set_task_comm(current, buf);

From bd2703b42decebdcddf76e277ba76b4c4a142d73 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 10 Jan 2025 20:36:45 +0000
Subject: [PATCH 5/5] io_uring: don't touch sqd->thread off tw add

With IORING_SETUP_SQPOLL all requests are created by the SQPOLL task,
which means that req->task should always match sqd->thread. Since
accesses to sqd->thread should be separately protected, use req->task
in io_req_normal_work_add() instead.

Note, in the eyes of io_req_normal_work_add(), the SQPOLL task struct
is always pinned and alive, and sqd->thread can either be the task or
NULL. It's only problematic if the compiler decides to reload the value
after the null check, which is not so likely.

Cc: stable@vger.kernel.org
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Reported-by: lizetao <lizetao1@huawei.com>
Fixes: 78f9b61bd8e54 ("io_uring: wake SQPOLL task when task_work is added to an empty queue")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1cbbe72cf32c45a8fee96026463024cd8564a7d7.1736541357.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d3403c8216db..5eb119002099 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1226,10 +1226,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
 
 	/* SQPOLL doesn't need the task_work added, it'll run it itself */
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
-		struct io_sq_data *sqd = ctx->sq_data;
-
-		if (sqd->thread)
-			__set_notify_signal(sqd->thread);
+		__set_notify_signal(tctx->task);
 		return;
 	}