eventpoll: add EPOLL_URING_WAKE poll wakeup flag

We can have dependencies between epoll and io_uring. Consider an epoll
context, identified by the epfd file descriptor, and an io_uring file
descriptor identified by iofd. If we add iofd to the epfd context, and
arm a multishot poll request for epfd with iofd, then the multishot
poll request will repeatedly trigger and generate events until terminated
by CQ ring overflow. This isn't a desired behavior.

Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup
key, and io_uring can check for that to detect a potential recursive
invocation.

Cc: stable@vger.kernel.org # 6.0
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2022-11-20 10:10:53 -07:00
parent f9d567c75e
commit caf1aeaffc
2 changed files with 16 additions and 8 deletions

View File

@ -491,7 +491,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
*/ */
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
unsigned pollflags)
{ {
struct eventpoll *ep_src; struct eventpoll *ep_src;
unsigned long flags; unsigned long flags;
@ -522,16 +523,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
} }
spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
ep->nests = nests + 1; ep->nests = nests + 1;
wake_up_locked_poll(&ep->poll_wait, EPOLLIN); wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
ep->nests = 0; ep->nests = 0;
spin_unlock_irqrestore(&ep->poll_wait.lock, flags); spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
} }
#else #else
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
unsigned pollflags)
{ {
wake_up_poll(&ep->poll_wait, EPOLLIN); wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
} }
#endif #endif
@ -742,7 +744,7 @@ static void ep_free(struct eventpoll *ep)
/* We need to release all tasks waiting for these file */ /* We need to release all tasks waiting for these file */
if (waitqueue_active(&ep->poll_wait)) if (waitqueue_active(&ep->poll_wait))
ep_poll_safewake(ep, NULL); ep_poll_safewake(ep, NULL, 0);
/* /*
* We need to lock this because we could be hit by * We need to lock this because we could be hit by
@ -1208,7 +1210,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
/* We have to call this outside the lock */ /* We have to call this outside the lock */
if (pwake) if (pwake)
ep_poll_safewake(ep, epi); ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
if (!(epi->event.events & EPOLLEXCLUSIVE)) if (!(epi->event.events & EPOLLEXCLUSIVE))
ewake = 1; ewake = 1;
@ -1553,7 +1555,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
/* We have to call this outside the lock */ /* We have to call this outside the lock */
if (pwake) if (pwake)
ep_poll_safewake(ep, NULL); ep_poll_safewake(ep, NULL, 0);
return 0; return 0;
} }
@ -1629,7 +1631,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
/* We have to call this outside the lock */ /* We have to call this outside the lock */
if (pwake) if (pwake)
ep_poll_safewake(ep, NULL); ep_poll_safewake(ep, NULL, 0);
return 0; return 0;
} }

View File

@ -41,6 +41,12 @@
#define EPOLLMSG (__force __poll_t)0x00000400 #define EPOLLMSG (__force __poll_t)0x00000400
#define EPOLLRDHUP (__force __poll_t)0x00002000 #define EPOLLRDHUP (__force __poll_t)0x00002000
/*
* Internal flag - wakeup generated by io_uring, used to detect recursion back
* into the io_uring poll handler.
*/
#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27))
/* Set exclusive wakeup mode for the target file descriptor */ /* Set exclusive wakeup mode for the target file descriptor */
#define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28)) #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28))