mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
f4bb2f65bb
Everything else about the io_uring eventfd support is nicely kept private to that code, except the cached_cq_tail tracking. With everything else in place, move io_eventfd_flush_signal() to using the ev_fd grab+release helpers, which then enables the direct use of io_ev_fd for this tracking too. Link: https://lore.kernel.org/r/20240921080307.185186-7-axboe@kernel.dk Signed-off-by: Jens Axboe <axboe@kernel.dk>
209 lines
4.7 KiB
C
209 lines
4.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/eventfd.h>
|
|
#include <linux/eventpoll.h>
|
|
#include <linux/io_uring.h>
|
|
#include <linux/io_uring_types.h>
|
|
|
|
#include "io-wq.h"
|
|
#include "eventfd.h"
|
|
|
|
struct io_ev_fd {
|
|
struct eventfd_ctx *cq_ev_fd;
|
|
unsigned int eventfd_async;
|
|
/* protected by ->completion_lock */
|
|
unsigned last_cq_tail;
|
|
refcount_t refs;
|
|
atomic_t ops;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
enum {
|
|
IO_EVENTFD_OP_SIGNAL_BIT,
|
|
};
|
|
|
|
static void io_eventfd_free(struct rcu_head *rcu)
|
|
{
|
|
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
|
|
|
eventfd_ctx_put(ev_fd->cq_ev_fd);
|
|
kfree(ev_fd);
|
|
}
|
|
|
|
static void io_eventfd_do_signal(struct rcu_head *rcu)
|
|
{
|
|
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
|
|
|
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
|
|
|
if (refcount_dec_and_test(&ev_fd->refs))
|
|
io_eventfd_free(rcu);
|
|
}
|
|
|
|
static void io_eventfd_put(struct io_ev_fd *ev_fd)
|
|
{
|
|
if (refcount_dec_and_test(&ev_fd->refs))
|
|
call_rcu(&ev_fd->rcu, io_eventfd_free);
|
|
}
|
|
|
|
static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
|
|
{
|
|
if (put_ref)
|
|
io_eventfd_put(ev_fd);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/*
|
|
* Returns true if the caller should put the ev_fd reference, false if not.
|
|
*/
|
|
static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
|
|
{
|
|
if (eventfd_signal_allowed()) {
|
|
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
|
return true;
|
|
}
|
|
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
|
|
call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Trigger if eventfd_async isn't set, or if it's set and the caller is
|
|
* an async worker. If ev_fd isn't valid, obviously return false.
|
|
*/
|
|
static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
|
|
{
|
|
if (ev_fd)
|
|
return !ev_fd->eventfd_async || io_wq_current_is_worker();
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* On success, returns with an ev_fd reference grabbed and the RCU read
|
|
* lock held.
|
|
*/
|
|
static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_ev_fd *ev_fd;
|
|
|
|
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
|
|
return NULL;
|
|
|
|
rcu_read_lock();
|
|
|
|
/*
|
|
* rcu_dereference ctx->io_ev_fd once and use it for both for checking
|
|
* and eventfd_signal
|
|
*/
|
|
ev_fd = rcu_dereference(ctx->io_ev_fd);
|
|
|
|
/*
|
|
* Check again if ev_fd exists in case an io_eventfd_unregister call
|
|
* completed between the NULL check of ctx->io_ev_fd at the start of
|
|
* the function and rcu_read_lock.
|
|
*/
|
|
if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
|
|
return ev_fd;
|
|
|
|
rcu_read_unlock();
|
|
return NULL;
|
|
}
|
|
|
|
void io_eventfd_signal(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_ev_fd *ev_fd;
|
|
|
|
ev_fd = io_eventfd_grab(ctx);
|
|
if (ev_fd)
|
|
io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
|
|
}
|
|
|
|
void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_ev_fd *ev_fd;
|
|
|
|
ev_fd = io_eventfd_grab(ctx);
|
|
if (ev_fd) {
|
|
bool skip, put_ref = true;
|
|
|
|
/*
|
|
* Eventfd should only get triggered when at least one event
|
|
* has been posted. Some applications rely on the eventfd
|
|
* notification count only changing IFF a new CQE has been
|
|
* added to the CQ ring. There's no dependency on 1:1
|
|
* relationship between how many times this function is called
|
|
* (and hence the eventfd count) and number of CQEs posted to
|
|
* the CQ ring.
|
|
*/
|
|
spin_lock(&ctx->completion_lock);
|
|
skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
|
|
ev_fd->last_cq_tail = ctx->cached_cq_tail;
|
|
spin_unlock(&ctx->completion_lock);
|
|
|
|
if (!skip)
|
|
put_ref = __io_eventfd_signal(ev_fd);
|
|
|
|
io_eventfd_release(ev_fd, put_ref);
|
|
}
|
|
}
|
|
|
|
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
|
unsigned int eventfd_async)
|
|
{
|
|
struct io_ev_fd *ev_fd;
|
|
__s32 __user *fds = arg;
|
|
int fd;
|
|
|
|
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
|
lockdep_is_held(&ctx->uring_lock));
|
|
if (ev_fd)
|
|
return -EBUSY;
|
|
|
|
if (copy_from_user(&fd, fds, sizeof(*fds)))
|
|
return -EFAULT;
|
|
|
|
ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
|
|
if (!ev_fd)
|
|
return -ENOMEM;
|
|
|
|
ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
|
|
if (IS_ERR(ev_fd->cq_ev_fd)) {
|
|
int ret = PTR_ERR(ev_fd->cq_ev_fd);
|
|
|
|
kfree(ev_fd);
|
|
return ret;
|
|
}
|
|
|
|
spin_lock(&ctx->completion_lock);
|
|
ev_fd->last_cq_tail = ctx->cached_cq_tail;
|
|
spin_unlock(&ctx->completion_lock);
|
|
|
|
ev_fd->eventfd_async = eventfd_async;
|
|
ctx->has_evfd = true;
|
|
refcount_set(&ev_fd->refs, 1);
|
|
atomic_set(&ev_fd->ops, 0);
|
|
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
|
|
return 0;
|
|
}
|
|
|
|
int io_eventfd_unregister(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_ev_fd *ev_fd;
|
|
|
|
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
|
lockdep_is_held(&ctx->uring_lock));
|
|
if (ev_fd) {
|
|
ctx->has_evfd = false;
|
|
rcu_assign_pointer(ctx->io_ev_fd, NULL);
|
|
io_eventfd_put(ev_fd);
|
|
return 0;
|
|
}
|
|
|
|
return -ENXIO;
|
|
}
|