mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 09:13:38 +00:00
io_uring/notif: implement notification stacking
The network stack allows only one ubuf_info per skb, and unlike MSG_ZEROCOPY, each io_uring zerocopy send will carry a separate ubuf_info. That means that send requests can't reuse a previosly allocated skb and need to get one more or more of new ones. That's fine for large sends, but otherwise it would spam the stack with lots of skbs carrying just a little data each. To help with that implement linking notification (i.e. an io_uring wrapper around ubuf_info) into a list. Each is refcounted by skbs and the stack as usual. additionally all non head entries keep a reference to the head, which they put down when their refcount hits 0. When the head have no more users, it'll efficiently put all notifications in a batch. As mentioned previously about ->io_link_skb, the callback implementation always allows to bind to an skb without a ubuf_info. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/bf1e7f9b72f9ecc99999fdc0d2cded5eea87fd0b.1713369317.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
5a569469b9
commit
6fe4220912
@ -9,10 +9,17 @@
|
|||||||
#include "notif.h"
|
#include "notif.h"
|
||||||
#include "rsrc.h"
|
#include "rsrc.h"
|
||||||
|
|
||||||
|
static const struct ubuf_info_ops io_ubuf_ops;
|
||||||
|
|
||||||
static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
|
static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
|
||||||
{
|
{
|
||||||
struct io_notif_data *nd = io_notif_to_data(notif);
|
struct io_notif_data *nd = io_notif_to_data(notif);
|
||||||
|
|
||||||
|
do {
|
||||||
|
notif = cmd_to_io_kiocb(nd);
|
||||||
|
|
||||||
|
lockdep_assert(refcount_read(&nd->uarg.refcnt) == 0);
|
||||||
|
|
||||||
if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used))
|
if (unlikely(nd->zc_report) && (nd->zc_copied || !nd->zc_used))
|
||||||
notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
|
notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
|
||||||
|
|
||||||
@ -20,7 +27,10 @@ static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
|
|||||||
__io_unaccount_mem(notif->ctx->user, nd->account_pages);
|
__io_unaccount_mem(notif->ctx->user, nd->account_pages);
|
||||||
nd->account_pages = 0;
|
nd->account_pages = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nd = nd->next;
|
||||||
io_req_task_complete(notif, ts);
|
io_req_task_complete(notif, ts);
|
||||||
|
} while (nd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
|
void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
|
||||||
@ -39,12 +49,56 @@ void io_tx_ubuf_complete(struct sk_buff *skb, struct ubuf_info *uarg,
|
|||||||
if (!refcount_dec_and_test(&uarg->refcnt))
|
if (!refcount_dec_and_test(&uarg->refcnt))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (nd->head != nd) {
|
||||||
|
io_tx_ubuf_complete(skb, &nd->head->uarg, success);
|
||||||
|
return;
|
||||||
|
}
|
||||||
notif->io_task_work.func = io_notif_tw_complete;
|
notif->io_task_work.func = io_notif_tw_complete;
|
||||||
__io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
|
__io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg)
|
||||||
|
{
|
||||||
|
struct io_notif_data *nd, *prev_nd;
|
||||||
|
struct io_kiocb *prev_notif, *notif;
|
||||||
|
struct ubuf_info *prev_uarg = skb_zcopy(skb);
|
||||||
|
|
||||||
|
nd = container_of(uarg, struct io_notif_data, uarg);
|
||||||
|
notif = cmd_to_io_kiocb(nd);
|
||||||
|
|
||||||
|
if (!prev_uarg) {
|
||||||
|
net_zcopy_get(&nd->uarg);
|
||||||
|
skb_zcopy_init(skb, &nd->uarg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* handle it separately as we can't link a notif to itself */
|
||||||
|
if (unlikely(prev_uarg == &nd->uarg))
|
||||||
|
return 0;
|
||||||
|
/* we can't join two links together, just request a fresh skb */
|
||||||
|
if (unlikely(nd->head != nd || nd->next))
|
||||||
|
return -EEXIST;
|
||||||
|
/* don't mix zc providers */
|
||||||
|
if (unlikely(prev_uarg->ops != &io_ubuf_ops))
|
||||||
|
return -EEXIST;
|
||||||
|
|
||||||
|
prev_nd = container_of(prev_uarg, struct io_notif_data, uarg);
|
||||||
|
prev_notif = cmd_to_io_kiocb(nd);
|
||||||
|
|
||||||
|
/* make sure all noifications can be finished in the same task_work */
|
||||||
|
if (unlikely(notif->ctx != prev_notif->ctx ||
|
||||||
|
notif->task != prev_notif->task))
|
||||||
|
return -EEXIST;
|
||||||
|
|
||||||
|
nd->head = prev_nd->head;
|
||||||
|
nd->next = prev_nd->next;
|
||||||
|
prev_nd->next = nd;
|
||||||
|
net_zcopy_get(&nd->head->uarg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct ubuf_info_ops io_ubuf_ops = {
|
static const struct ubuf_info_ops io_ubuf_ops = {
|
||||||
.complete = io_tx_ubuf_complete,
|
.complete = io_tx_ubuf_complete,
|
||||||
|
.link_skb = io_link_skb,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
||||||
@ -65,6 +119,9 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
|||||||
nd = io_notif_to_data(notif);
|
nd = io_notif_to_data(notif);
|
||||||
nd->zc_report = false;
|
nd->zc_report = false;
|
||||||
nd->account_pages = 0;
|
nd->account_pages = 0;
|
||||||
|
nd->next = NULL;
|
||||||
|
nd->head = nd;
|
||||||
|
|
||||||
nd->uarg.flags = IO_NOTIF_UBUF_FLAGS;
|
nd->uarg.flags = IO_NOTIF_UBUF_FLAGS;
|
||||||
nd->uarg.ops = &io_ubuf_ops;
|
nd->uarg.ops = &io_ubuf_ops;
|
||||||
refcount_set(&nd->uarg.refcnt, 1);
|
refcount_set(&nd->uarg.refcnt, 1);
|
||||||
|
@ -14,6 +14,9 @@ struct io_notif_data {
|
|||||||
struct file *file;
|
struct file *file;
|
||||||
struct ubuf_info uarg;
|
struct ubuf_info uarg;
|
||||||
|
|
||||||
|
struct io_notif_data *next;
|
||||||
|
struct io_notif_data *head;
|
||||||
|
|
||||||
unsigned account_pages;
|
unsigned account_pages;
|
||||||
bool zc_report;
|
bool zc_report;
|
||||||
bool zc_used;
|
bool zc_used;
|
||||||
|
Loading…
Reference in New Issue
Block a user