2023-06-08 09:38:36 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
|
|
|
#include "io_uring.h"
|
|
|
|
#include "napi.h"
|
|
|
|
|
|
|
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
|
|
|
|
|
|
|
/* Timeout for cleanout of stale entries. */
|
|
|
|
#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
|
|
|
|
|
|
|
|
struct io_napi_entry {
|
|
|
|
unsigned int napi_id;
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
unsigned long timeout;
|
|
|
|
struct hlist_node node;
|
|
|
|
|
|
|
|
struct rcu_head rcu;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
|
|
|
|
unsigned int napi_id)
|
|
|
|
{
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
|
|
|
|
hlist_for_each_entry_rcu(e, hash_list, node) {
|
|
|
|
if (e->napi_id != napi_id)
|
|
|
|
continue;
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2024-07-26 15:24:30 +01:00
|
|
|
static inline ktime_t net_to_ktime(unsigned long t)
|
|
|
|
{
|
|
|
|
/* napi approximating usecs, reverse busy_loop_current_time */
|
|
|
|
return ns_to_ktime(t << 10);
|
|
|
|
}
|
|
|
|
|
2024-10-13 14:28:50 -04:00
|
|
|
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
|
2023-06-08 09:38:36 -07:00
|
|
|
{
|
|
|
|
struct hlist_head *hash_list;
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
|
|
|
|
/* Non-NAPI IDs can be rejected. */
|
|
|
|
if (napi_id < MIN_NAPI_ID)
|
2024-10-13 14:28:50 -04:00
|
|
|
return -EINVAL;
|
2023-06-08 09:38:36 -07:00
|
|
|
|
|
|
|
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
|
|
|
|
|
2024-10-13 14:29:02 -04:00
|
|
|
scoped_guard(rcu) {
|
|
|
|
e = io_napi_hash_find(hash_list, napi_id);
|
|
|
|
if (e) {
|
|
|
|
WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT);
|
|
|
|
return -EEXIST;
|
|
|
|
}
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
e = kmalloc(sizeof(*e), GFP_NOWAIT);
|
|
|
|
if (!e)
|
2024-10-13 14:28:50 -04:00
|
|
|
return -ENOMEM;
|
2023-06-08 09:38:36 -07:00
|
|
|
|
|
|
|
e->napi_id = napi_id;
|
|
|
|
e->timeout = jiffies + NAPI_TIMEOUT;
|
|
|
|
|
2024-10-13 14:29:02 -04:00
|
|
|
/*
|
|
|
|
* guard(spinlock) is not used to manually unlock it before calling
|
|
|
|
* kfree()
|
|
|
|
*/
|
2023-06-08 09:38:36 -07:00
|
|
|
spin_lock(&ctx->napi_lock);
|
|
|
|
if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
|
|
|
|
spin_unlock(&ctx->napi_lock);
|
|
|
|
kfree(e);
|
2024-10-13 14:28:50 -04:00
|
|
|
return -EEXIST;
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
hlist_add_tail_rcu(&e->node, hash_list);
|
2024-10-13 14:28:38 -04:00
|
|
|
list_add_tail_rcu(&e->list, &ctx->napi_list);
|
2023-06-08 09:38:36 -07:00
|
|
|
spin_unlock(&ctx->napi_lock);
|
2024-10-13 14:28:50 -04:00
|
|
|
return 0;
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
2024-10-13 14:29:24 -04:00
|
|
|
static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
|
|
|
|
{
|
|
|
|
struct hlist_head *hash_list;
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
|
|
|
|
/* Non-NAPI IDs can be rejected. */
|
|
|
|
if (napi_id < MIN_NAPI_ID)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
|
|
|
|
guard(spinlock)(&ctx->napi_lock);
|
|
|
|
e = io_napi_hash_find(hash_list, napi_id);
|
|
|
|
if (!e)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
list_del_rcu(&e->list);
|
|
|
|
hash_del_rcu(&e->node);
|
|
|
|
kfree_rcu(e, rcu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-06-08 09:38:36 -07:00
|
|
|
static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
|
|
|
|
{
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
|
2024-10-13 14:29:02 -04:00
|
|
|
guard(spinlock)(&ctx->napi_lock);
|
2024-10-13 14:28:38 -04:00
|
|
|
/*
|
|
|
|
* list_for_each_entry_safe() is not required as long as:
|
|
|
|
* 1. list_del_rcu() does not reset the deleted node next pointer
|
|
|
|
* 2. kfree_rcu() delays the memory freeing until the next quiescent
|
|
|
|
* state
|
|
|
|
*/
|
|
|
|
list_for_each_entry(e, &ctx->napi_list, list) {
|
2024-10-13 14:28:24 -04:00
|
|
|
if (time_after(jiffies, READ_ONCE(e->timeout))) {
|
2024-10-13 14:28:38 -04:00
|
|
|
list_del_rcu(&e->list);
|
2023-06-08 09:38:36 -07:00
|
|
|
hash_del_rcu(&e->node);
|
|
|
|
kfree_rcu(e, rcu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
|
|
|
|
{
|
|
|
|
if (is_stale)
|
|
|
|
__io_napi_remove_stale(ctx);
|
|
|
|
}
|
|
|
|
|
2024-07-26 15:24:30 +01:00
|
|
|
static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
|
|
|
|
ktime_t bp)
|
2023-06-08 09:38:36 -07:00
|
|
|
{
|
2024-07-26 15:24:30 +01:00
|
|
|
if (bp) {
|
|
|
|
ktime_t end_time = ktime_add(start_time, bp);
|
|
|
|
ktime_t now = net_to_ktime(busy_loop_current_time());
|
2023-06-08 09:38:36 -07:00
|
|
|
|
2024-07-26 15:24:30 +01:00
|
|
|
return ktime_after(now, end_time);
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool io_napi_busy_loop_should_end(void *data,
|
|
|
|
unsigned long start_time)
|
|
|
|
{
|
|
|
|
struct io_wait_queue *iowq = data;
|
|
|
|
|
|
|
|
if (signal_pending(current))
|
|
|
|
return true;
|
2024-02-14 12:59:36 -07:00
|
|
|
if (io_should_wake(iowq) || io_has_work(iowq->ctx))
|
2023-06-08 09:38:36 -07:00
|
|
|
return true;
|
2024-07-26 15:24:30 +01:00
|
|
|
if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
|
|
|
|
iowq->napi_busy_poll_dt))
|
2023-06-08 09:38:36 -07:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-10-13 14:29:24 -04:00
|
|
|
/*
|
|
|
|
* never report stale entries
|
|
|
|
*/
|
|
|
|
static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
|
|
|
|
bool (*loop_end)(void *, unsigned long),
|
|
|
|
void *loop_end_arg)
|
|
|
|
{
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
|
|
|
|
list_for_each_entry_rcu(e, &ctx->napi_list, list)
|
|
|
|
napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
|
|
|
|
ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
|
|
|
|
bool (*loop_end)(void *, unsigned long),
|
|
|
|
void *loop_end_arg)
|
2023-06-08 09:38:36 -07:00
|
|
|
{
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
bool is_stale = false;
|
|
|
|
|
|
|
|
list_for_each_entry_rcu(e, &ctx->napi_list, list) {
|
|
|
|
napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
|
|
|
|
ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
|
|
|
|
|
2024-10-13 14:28:24 -04:00
|
|
|
if (time_after(jiffies, READ_ONCE(e->timeout)))
|
2023-06-08 09:38:36 -07:00
|
|
|
is_stale = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return is_stale;
|
|
|
|
}
|
|
|
|
|
2024-10-13 14:29:24 -04:00
|
|
|
static inline bool
|
|
|
|
__io_napi_do_busy_loop(struct io_ring_ctx *ctx,
|
|
|
|
bool (*loop_end)(void *, unsigned long),
|
|
|
|
void *loop_end_arg)
|
|
|
|
{
|
|
|
|
if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
|
|
|
|
return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
|
|
|
|
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
|
|
|
|
}
|
|
|
|
|
2023-06-08 09:38:36 -07:00
|
|
|
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
|
|
|
|
struct io_wait_queue *iowq)
|
|
|
|
{
|
|
|
|
unsigned long start_time = busy_loop_current_time();
|
2024-10-13 14:29:12 -04:00
|
|
|
bool (*loop_end)(void *, unsigned long) = NULL;
|
2023-06-08 09:38:36 -07:00
|
|
|
void *loop_end_arg = NULL;
|
|
|
|
bool is_stale = false;
|
|
|
|
|
|
|
|
/* Singular lists use a different napi loop end check function and are
|
|
|
|
* only executed once.
|
|
|
|
*/
|
2024-10-13 14:29:12 -04:00
|
|
|
if (list_is_singular(&ctx->napi_list)) {
|
|
|
|
loop_end = io_napi_busy_loop_should_end;
|
2023-06-08 09:38:36 -07:00
|
|
|
loop_end_arg = iowq;
|
2024-10-13 14:29:12 -04:00
|
|
|
}
|
2023-06-08 09:38:36 -07:00
|
|
|
|
2024-10-13 14:29:02 -04:00
|
|
|
scoped_guard(rcu) {
|
|
|
|
do {
|
2024-10-13 14:29:12 -04:00
|
|
|
is_stale = __io_napi_do_busy_loop(ctx, loop_end,
|
|
|
|
loop_end_arg);
|
2024-10-13 14:29:02 -04:00
|
|
|
} while (!io_napi_busy_loop_should_end(iowq, start_time) &&
|
|
|
|
!loop_end_arg);
|
|
|
|
}
|
2023-06-08 09:38:36 -07:00
|
|
|
|
|
|
|
io_napi_remove_stale(ctx, is_stale);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* io_napi_init() - Init napi settings
|
|
|
|
* @ctx: pointer to io-uring context structure
|
|
|
|
*
|
|
|
|
* Init napi settings in the io-uring context.
|
|
|
|
*/
|
|
|
|
void io_napi_init(struct io_ring_ctx *ctx)
|
|
|
|
{
|
2024-07-26 15:24:30 +01:00
|
|
|
u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
|
|
|
|
|
2023-06-08 09:38:36 -07:00
|
|
|
INIT_LIST_HEAD(&ctx->napi_list);
|
|
|
|
spin_lock_init(&ctx->napi_lock);
|
|
|
|
ctx->napi_prefer_busy_poll = false;
|
2024-07-26 15:24:30 +01:00
|
|
|
ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
|
2024-10-13 14:29:24 -04:00
|
|
|
ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE;
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* io_napi_free() - Deallocate napi
|
|
|
|
* @ctx: pointer to io-uring context structure
|
|
|
|
*
|
|
|
|
* Free the napi list and the hash table in the io-uring context.
|
|
|
|
*/
|
|
|
|
void io_napi_free(struct io_ring_ctx *ctx)
|
|
|
|
{
|
|
|
|
struct io_napi_entry *e;
|
|
|
|
|
2024-10-13 14:29:02 -04:00
|
|
|
guard(spinlock)(&ctx->napi_lock);
|
2024-10-13 14:28:38 -04:00
|
|
|
list_for_each_entry(e, &ctx->napi_list, list) {
|
2023-06-08 09:38:36 -07:00
|
|
|
hash_del_rcu(&e->node);
|
|
|
|
kfree_rcu(e, rcu);
|
|
|
|
}
|
2024-10-13 14:28:38 -04:00
|
|
|
INIT_LIST_HEAD_RCU(&ctx->napi_list);
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
2024-10-13 14:29:24 -04:00
|
|
|
static int io_napi_register_napi(struct io_ring_ctx *ctx,
|
|
|
|
struct io_uring_napi *napi)
|
|
|
|
{
|
|
|
|
switch (napi->op_param) {
|
|
|
|
case IO_URING_NAPI_TRACKING_DYNAMIC:
|
|
|
|
case IO_URING_NAPI_TRACKING_STATIC:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* clean the napi list for new settings */
|
|
|
|
io_napi_free(ctx);
|
|
|
|
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
|
|
|
|
WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
|
|
|
|
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-06-08 09:38:38 -07:00
|
|
|
/*
|
|
|
|
* io_napi_register() - Register napi with io-uring
|
|
|
|
* @ctx: pointer to io-uring context structure
|
|
|
|
* @arg: pointer to io_uring_napi structure
|
|
|
|
*
|
|
|
|
* Register napi in the io-uring context.
|
|
|
|
*/
|
|
|
|
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
|
|
|
|
{
|
|
|
|
const struct io_uring_napi curr = {
|
2024-07-26 15:24:30 +01:00
|
|
|
.busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
|
2024-10-13 14:29:24 -04:00
|
|
|
.prefer_busy_poll = ctx->napi_prefer_busy_poll,
|
|
|
|
.op_param = ctx->napi_track_mode
|
2023-06-08 09:38:38 -07:00
|
|
|
};
|
|
|
|
struct io_uring_napi napi;
|
|
|
|
|
2024-07-24 12:16:17 +01:00
|
|
|
if (ctx->flags & IORING_SETUP_IOPOLL)
|
|
|
|
return -EINVAL;
|
2023-06-08 09:38:38 -07:00
|
|
|
if (copy_from_user(&napi, arg, sizeof(napi)))
|
|
|
|
return -EFAULT;
|
2024-10-13 14:29:24 -04:00
|
|
|
if (napi.pad[0] || napi.pad[1] || napi.resv)
|
2023-06-08 09:38:38 -07:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (copy_to_user(arg, &curr, sizeof(curr)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2024-10-13 14:29:24 -04:00
|
|
|
switch (napi.opcode) {
|
|
|
|
case IO_URING_NAPI_REGISTER_OP:
|
|
|
|
return io_napi_register_napi(ctx, &napi);
|
|
|
|
case IO_URING_NAPI_STATIC_ADD_ID:
|
|
|
|
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
|
|
|
|
return -EINVAL;
|
|
|
|
return __io_napi_add_id(ctx, napi.op_param);
|
|
|
|
case IO_URING_NAPI_STATIC_DEL_ID:
|
|
|
|
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
|
|
|
|
return -EINVAL;
|
|
|
|
return __io_napi_del_id(ctx, napi.op_param);
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2023-06-08 09:38:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* io_napi_unregister() - Unregister napi with io-uring
|
|
|
|
* @ctx: pointer to io-uring context structure
|
|
|
|
* @arg: pointer to io_uring_napi structure
|
|
|
|
*
|
|
|
|
* Unregister napi. If arg has been specified copy the busy poll timeout and
|
|
|
|
* prefer busy poll setting to the passed in structure.
|
|
|
|
*/
|
|
|
|
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
|
|
|
|
{
|
|
|
|
const struct io_uring_napi curr = {
|
2024-07-26 15:24:30 +01:00
|
|
|
.busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
|
2023-06-08 09:38:38 -07:00
|
|
|
.prefer_busy_poll = ctx->napi_prefer_busy_poll
|
|
|
|
};
|
|
|
|
|
|
|
|
if (arg && copy_to_user(arg, &curr, sizeof(curr)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2024-07-26 15:24:30 +01:00
|
|
|
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
|
2023-06-08 09:38:38 -07:00
|
|
|
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
|
2024-10-13 14:29:24 -04:00
|
|
|
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
|
2023-06-08 09:38:38 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-06-08 09:38:36 -07:00
|
|
|
/*
|
|
|
|
* __io_napi_busy_loop() - execute busy poll loop
|
|
|
|
* @ctx: pointer to io-uring context structure
|
|
|
|
* @iowq: pointer to io wait queue
|
|
|
|
*
|
|
|
|
* Execute the busy poll loop and merge the spliced off list.
|
|
|
|
*/
|
|
|
|
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
|
|
|
|
{
|
2024-08-07 15:18:11 +01:00
|
|
|
if (ctx->flags & IORING_SETUP_SQPOLL)
|
|
|
|
return;
|
2023-06-08 09:38:36 -07:00
|
|
|
|
2024-08-07 15:18:12 +01:00
|
|
|
iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
|
|
|
|
if (iowq->timeout != KTIME_MAX) {
|
2024-08-07 15:18:14 +01:00
|
|
|
ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
|
2024-08-07 15:18:12 +01:00
|
|
|
|
|
|
|
iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
|
|
|
|
}
|
|
|
|
|
2024-08-07 15:18:11 +01:00
|
|
|
iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
|
|
|
|
io_napi_blocking_busy_loop(ctx, iowq);
|
2023-06-08 09:38:36 -07:00
|
|
|
}
|
|
|
|
|
2023-06-08 09:38:37 -07:00
|
|
|
/*
|
|
|
|
* io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
|
|
|
|
* @ctx: pointer to io-uring context structure
|
|
|
|
*
|
|
|
|
* Splice of the napi list and execute the napi busy poll loop.
|
|
|
|
*/
|
|
|
|
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
|
|
|
|
{
|
|
|
|
bool is_stale = false;
|
|
|
|
|
2024-07-26 15:24:30 +01:00
|
|
|
if (!READ_ONCE(ctx->napi_busy_poll_dt))
|
2023-06-08 09:38:37 -07:00
|
|
|
return 0;
|
|
|
|
if (list_empty_careful(&ctx->napi_list))
|
|
|
|
return 0;
|
|
|
|
|
2024-10-13 14:29:02 -04:00
|
|
|
scoped_guard(rcu) {
|
2024-10-13 14:29:12 -04:00
|
|
|
is_stale = __io_napi_do_busy_loop(ctx, NULL, NULL);
|
2024-10-13 14:29:02 -04:00
|
|
|
}
|
2023-06-08 09:38:37 -07:00
|
|
|
|
|
|
|
io_napi_remove_stale(ctx, is_stale);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2023-06-08 09:38:36 -07:00
|
|
|
#endif
|