mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-29 17:25:38 +00:00
256 lines
5.6 KiB
C
256 lines
5.6 KiB
C
|
// SPDX-License-Identifier: GPL-2.0
|
||
|
|
||
|
#include "io_uring.h"
|
||
|
#include "napi.h"
|
||
|
|
||
|
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||
|
|
||
|
/* Timeout for cleanout of stale entries. */
|
||
|
#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
|
||
|
|
||
|
struct io_napi_entry {
|
||
|
unsigned int napi_id;
|
||
|
struct list_head list;
|
||
|
|
||
|
unsigned long timeout;
|
||
|
struct hlist_node node;
|
||
|
|
||
|
struct rcu_head rcu;
|
||
|
};
|
||
|
|
||
|
static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
|
||
|
unsigned int napi_id)
|
||
|
{
|
||
|
struct io_napi_entry *e;
|
||
|
|
||
|
hlist_for_each_entry_rcu(e, hash_list, node) {
|
||
|
if (e->napi_id != napi_id)
|
||
|
continue;
|
||
|
e->timeout = jiffies + NAPI_TIMEOUT;
|
||
|
return e;
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
|
||
|
{
|
||
|
struct hlist_head *hash_list;
|
||
|
unsigned int napi_id;
|
||
|
struct sock *sk;
|
||
|
struct io_napi_entry *e;
|
||
|
|
||
|
sk = sock->sk;
|
||
|
if (!sk)
|
||
|
return;
|
||
|
|
||
|
napi_id = READ_ONCE(sk->sk_napi_id);
|
||
|
|
||
|
/* Non-NAPI IDs can be rejected. */
|
||
|
if (napi_id < MIN_NAPI_ID)
|
||
|
return;
|
||
|
|
||
|
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
|
||
|
|
||
|
rcu_read_lock();
|
||
|
e = io_napi_hash_find(hash_list, napi_id);
|
||
|
if (e) {
|
||
|
e->timeout = jiffies + NAPI_TIMEOUT;
|
||
|
rcu_read_unlock();
|
||
|
return;
|
||
|
}
|
||
|
rcu_read_unlock();
|
||
|
|
||
|
e = kmalloc(sizeof(*e), GFP_NOWAIT);
|
||
|
if (!e)
|
||
|
return;
|
||
|
|
||
|
e->napi_id = napi_id;
|
||
|
e->timeout = jiffies + NAPI_TIMEOUT;
|
||
|
|
||
|
spin_lock(&ctx->napi_lock);
|
||
|
if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
|
||
|
spin_unlock(&ctx->napi_lock);
|
||
|
kfree(e);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
hlist_add_tail_rcu(&e->node, hash_list);
|
||
|
list_add_tail(&e->list, &ctx->napi_list);
|
||
|
spin_unlock(&ctx->napi_lock);
|
||
|
}
|
||
|
|
||
|
static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
|
||
|
{
|
||
|
struct io_napi_entry *e;
|
||
|
unsigned int i;
|
||
|
|
||
|
spin_lock(&ctx->napi_lock);
|
||
|
hash_for_each(ctx->napi_ht, i, e, node) {
|
||
|
if (time_after(jiffies, e->timeout)) {
|
||
|
list_del(&e->list);
|
||
|
hash_del_rcu(&e->node);
|
||
|
kfree_rcu(e, rcu);
|
||
|
}
|
||
|
}
|
||
|
spin_unlock(&ctx->napi_lock);
|
||
|
}
|
||
|
|
||
|
static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
|
||
|
{
|
||
|
if (is_stale)
|
||
|
__io_napi_remove_stale(ctx);
|
||
|
}
|
||
|
|
||
|
static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
|
||
|
unsigned long bp_usec)
|
||
|
{
|
||
|
if (bp_usec) {
|
||
|
unsigned long end_time = start_time + bp_usec;
|
||
|
unsigned long now = busy_loop_current_time();
|
||
|
|
||
|
return time_after(now, end_time);
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
static bool io_napi_busy_loop_should_end(void *data,
|
||
|
unsigned long start_time)
|
||
|
{
|
||
|
struct io_wait_queue *iowq = data;
|
||
|
|
||
|
if (signal_pending(current))
|
||
|
return true;
|
||
|
if (io_should_wake(iowq))
|
||
|
return true;
|
||
|
if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
|
||
|
return true;
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
|
||
|
void *loop_end_arg)
|
||
|
{
|
||
|
struct io_napi_entry *e;
|
||
|
bool (*loop_end)(void *, unsigned long) = NULL;
|
||
|
bool is_stale = false;
|
||
|
|
||
|
if (loop_end_arg)
|
||
|
loop_end = io_napi_busy_loop_should_end;
|
||
|
|
||
|
list_for_each_entry_rcu(e, &ctx->napi_list, list) {
|
||
|
napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
|
||
|
ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
|
||
|
|
||
|
if (time_after(jiffies, e->timeout))
|
||
|
is_stale = true;
|
||
|
}
|
||
|
|
||
|
return is_stale;
|
||
|
}
|
||
|
|
||
|
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
|
||
|
struct io_wait_queue *iowq)
|
||
|
{
|
||
|
unsigned long start_time = busy_loop_current_time();
|
||
|
void *loop_end_arg = NULL;
|
||
|
bool is_stale = false;
|
||
|
|
||
|
/* Singular lists use a different napi loop end check function and are
|
||
|
* only executed once.
|
||
|
*/
|
||
|
if (list_is_singular(&ctx->napi_list))
|
||
|
loop_end_arg = iowq;
|
||
|
|
||
|
rcu_read_lock();
|
||
|
do {
|
||
|
is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
|
||
|
} while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
|
||
|
rcu_read_unlock();
|
||
|
|
||
|
io_napi_remove_stale(ctx, is_stale);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* io_napi_init() - Init napi settings
|
||
|
* @ctx: pointer to io-uring context structure
|
||
|
*
|
||
|
* Init napi settings in the io-uring context.
|
||
|
*/
|
||
|
void io_napi_init(struct io_ring_ctx *ctx)
|
||
|
{
|
||
|
INIT_LIST_HEAD(&ctx->napi_list);
|
||
|
spin_lock_init(&ctx->napi_lock);
|
||
|
ctx->napi_prefer_busy_poll = false;
|
||
|
ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* io_napi_free() - Deallocate napi
|
||
|
* @ctx: pointer to io-uring context structure
|
||
|
*
|
||
|
* Free the napi list and the hash table in the io-uring context.
|
||
|
*/
|
||
|
void io_napi_free(struct io_ring_ctx *ctx)
|
||
|
{
|
||
|
struct io_napi_entry *e;
|
||
|
LIST_HEAD(napi_list);
|
||
|
unsigned int i;
|
||
|
|
||
|
spin_lock(&ctx->napi_lock);
|
||
|
hash_for_each(ctx->napi_ht, i, e, node) {
|
||
|
hash_del_rcu(&e->node);
|
||
|
kfree_rcu(e, rcu);
|
||
|
}
|
||
|
spin_unlock(&ctx->napi_lock);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __io_napi_adjust_timeout() - Add napi id to the busy poll list
|
||
|
* @ctx: pointer to io-uring context structure
|
||
|
* @iowq: pointer to io wait queue
|
||
|
* @ts: pointer to timespec or NULL
|
||
|
*
|
||
|
* Adjust the busy loop timeout according to timespec and busy poll timeout.
|
||
|
*/
|
||
|
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
|
||
|
struct timespec64 *ts)
|
||
|
{
|
||
|
unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
|
||
|
|
||
|
if (ts) {
|
||
|
struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
|
||
|
|
||
|
if (timespec64_compare(ts, &poll_to_ts) > 0) {
|
||
|
*ts = timespec64_sub(*ts, poll_to_ts);
|
||
|
} else {
|
||
|
u64 to = timespec64_to_ns(ts);
|
||
|
|
||
|
do_div(to, 1000);
|
||
|
ts->tv_sec = 0;
|
||
|
ts->tv_nsec = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
iowq->napi_busy_poll_to = poll_to;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* __io_napi_busy_loop() - execute busy poll loop
|
||
|
* @ctx: pointer to io-uring context structure
|
||
|
* @iowq: pointer to io wait queue
|
||
|
*
|
||
|
* Execute the busy poll loop and merge the spliced off list.
|
||
|
*/
|
||
|
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
|
||
|
{
|
||
|
iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
|
||
|
|
||
|
if (!(ctx->flags & IORING_SETUP_SQPOLL) && iowq->napi_busy_poll_to)
|
||
|
io_napi_blocking_busy_loop(ctx, iowq);
|
||
|
}
|
||
|
|
||
|
#endif
|