mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-01 10:42:11 +00:00
io_uring: add support for hybrid IOPOLL
A new hybrid poll is implemented on the io_uring layer. Once an IO is issued, it will not poll immediately, but rather block first and re-run before IO complete, then poll to reap IO. While this poll method could be a suboptimal solution when running on a single thread, it offers performance lower than regular polling but higher than IRQ, and CPU utilization is also lower than polling. To use hybrid polling, the ring must be setup with both the IORING_SETUP_IOPOLL and IORING_SETUP_HYBRID)IOPOLL flags set. Hybrid polling has the same restrictions as IOPOLL, in that commands must explicitly support it. Signed-off-by: hexue <xue01.he@samsung.com> Link: https://lore.kernel.org/r/20241101091957.564220-2-xue01.he@samsung.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
c1329532d5
commit
01ee194d1a
@ -298,6 +298,11 @@ struct io_ring_ctx {
|
|||||||
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
|
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
|
||||||
*/
|
*/
|
||||||
struct hlist_head cancelable_uring_cmd;
|
struct hlist_head cancelable_uring_cmd;
|
||||||
|
/*
|
||||||
|
* For Hybrid IOPOLL, runtime in hybrid polling, without
|
||||||
|
* scheduling time
|
||||||
|
*/
|
||||||
|
u64 hybrid_poll_time;
|
||||||
} ____cacheline_aligned_in_smp;
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
@ -449,6 +454,7 @@ enum {
|
|||||||
REQ_F_LINK_TIMEOUT_BIT,
|
REQ_F_LINK_TIMEOUT_BIT,
|
||||||
REQ_F_NEED_CLEANUP_BIT,
|
REQ_F_NEED_CLEANUP_BIT,
|
||||||
REQ_F_POLLED_BIT,
|
REQ_F_POLLED_BIT,
|
||||||
|
REQ_F_HYBRID_IOPOLL_STATE_BIT,
|
||||||
REQ_F_BUFFER_SELECTED_BIT,
|
REQ_F_BUFFER_SELECTED_BIT,
|
||||||
REQ_F_BUFFER_RING_BIT,
|
REQ_F_BUFFER_RING_BIT,
|
||||||
REQ_F_REISSUE_BIT,
|
REQ_F_REISSUE_BIT,
|
||||||
@ -507,6 +513,8 @@ enum {
|
|||||||
REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
|
REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
|
||||||
/* already went through poll handler */
|
/* already went through poll handler */
|
||||||
REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
|
REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
|
||||||
|
/* every req only blocks once in hybrid poll */
|
||||||
|
REQ_F_IOPOLL_STATE = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT),
|
||||||
/* buffer already selected */
|
/* buffer already selected */
|
||||||
REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
|
REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
|
||||||
/* buffer selected from ring, needs commit */
|
/* buffer selected from ring, needs commit */
|
||||||
@ -639,8 +647,15 @@ struct io_kiocb {
|
|||||||
atomic_t refs;
|
atomic_t refs;
|
||||||
bool cancel_seq_set;
|
bool cancel_seq_set;
|
||||||
struct io_task_work io_task_work;
|
struct io_task_work io_task_work;
|
||||||
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
|
union {
|
||||||
struct hlist_node hash_node;
|
/*
|
||||||
|
* for polled requests, i.e. IORING_OP_POLL_ADD and async armed
|
||||||
|
* poll
|
||||||
|
*/
|
||||||
|
struct hlist_node hash_node;
|
||||||
|
/* For IOPOLL setup queues, with hybrid polling */
|
||||||
|
u64 iopoll_start;
|
||||||
|
};
|
||||||
/* internal polling, see IORING_FEAT_FAST_POLL */
|
/* internal polling, see IORING_FEAT_FAST_POLL */
|
||||||
struct async_poll *apoll;
|
struct async_poll *apoll;
|
||||||
/* opcode allocated if it needs to store data for async defer */
|
/* opcode allocated if it needs to store data for async defer */
|
||||||
|
@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
|
|||||||
*/
|
*/
|
||||||
#define IORING_SETUP_NO_SQARRAY (1U << 16)
|
#define IORING_SETUP_NO_SQARRAY (1U << 16)
|
||||||
|
|
||||||
|
/* Use hybrid poll in iopoll process */
|
||||||
|
#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
|
||||||
|
|
||||||
enum io_uring_op {
|
enum io_uring_op {
|
||||||
IORING_OP_NOP,
|
IORING_OP_NOP,
|
||||||
IORING_OP_READV,
|
IORING_OP_READV,
|
||||||
|
@ -307,6 +307,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
|||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ctx->flags = p->flags;
|
ctx->flags = p->flags;
|
||||||
|
ctx->hybrid_poll_time = LLONG_MAX;
|
||||||
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
|
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
|
||||||
init_waitqueue_head(&ctx->sqo_sq_wait);
|
init_waitqueue_head(&ctx->sqo_sq_wait);
|
||||||
INIT_LIST_HEAD(&ctx->sqd_list);
|
INIT_LIST_HEAD(&ctx->sqd_list);
|
||||||
@ -3630,6 +3631,11 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
|||||||
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
|
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
|
||||||
static_branch_inc(&io_key_has_sqarray);
|
static_branch_inc(&io_key_has_sqarray);
|
||||||
|
|
||||||
|
/* HYBRID_IOPOLL only valid with IOPOLL */
|
||||||
|
if ((ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_HYBRID_IOPOLL)) ==
|
||||||
|
IORING_SETUP_HYBRID_IOPOLL)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
|
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
|
||||||
!(ctx->flags & IORING_SETUP_IOPOLL) &&
|
!(ctx->flags & IORING_SETUP_IOPOLL) &&
|
||||||
!(ctx->flags & IORING_SETUP_SQPOLL))
|
!(ctx->flags & IORING_SETUP_SQPOLL))
|
||||||
@ -3785,7 +3791,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
|
|||||||
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
|
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
|
||||||
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
|
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
|
||||||
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
|
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
|
||||||
IORING_SETUP_NO_SQARRAY))
|
IORING_SETUP_NO_SQARRAY | IORING_SETUP_HYBRID_IOPOLL))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
return io_uring_create(entries, &p, params);
|
return io_uring_create(entries, &p, params);
|
||||||
|
@ -817,6 +817,11 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
|
|||||||
kiocb->ki_flags |= IOCB_HIPRI;
|
kiocb->ki_flags |= IOCB_HIPRI;
|
||||||
kiocb->ki_complete = io_complete_rw_iopoll;
|
kiocb->ki_complete = io_complete_rw_iopoll;
|
||||||
req->iopoll_completed = 0;
|
req->iopoll_completed = 0;
|
||||||
|
if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
|
||||||
|
/* make sure every req only blocks once*/
|
||||||
|
req->flags &= ~REQ_F_IOPOLL_STATE;
|
||||||
|
req->iopoll_start = ktime_get_ns();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (kiocb->ki_flags & IOCB_HIPRI)
|
if (kiocb->ki_flags & IOCB_HIPRI)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -1115,6 +1120,78 @@ void io_rw_fail(struct io_kiocb *req)
|
|||||||
io_req_set_res(req, res, req->cqe.flags);
|
io_req_set_res(req, res, req->cqe.flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int io_uring_classic_poll(struct io_kiocb *req, struct io_comp_batch *iob,
|
||||||
|
unsigned int poll_flags)
|
||||||
|
{
|
||||||
|
struct file *file = req->file;
|
||||||
|
|
||||||
|
if (req->opcode == IORING_OP_URING_CMD) {
|
||||||
|
struct io_uring_cmd *ioucmd;
|
||||||
|
|
||||||
|
ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
||||||
|
return file->f_op->uring_cmd_iopoll(ioucmd, iob, poll_flags);
|
||||||
|
} else {
|
||||||
|
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||||
|
|
||||||
|
return file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 io_hybrid_iopoll_delay(struct io_ring_ctx *ctx, struct io_kiocb *req)
|
||||||
|
{
|
||||||
|
struct hrtimer_sleeper timer;
|
||||||
|
enum hrtimer_mode mode;
|
||||||
|
ktime_t kt;
|
||||||
|
u64 sleep_time;
|
||||||
|
|
||||||
|
if (req->flags & REQ_F_IOPOLL_STATE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (ctx->hybrid_poll_time == LLONG_MAX)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Using half the running time to do schedule */
|
||||||
|
sleep_time = ctx->hybrid_poll_time / 2;
|
||||||
|
|
||||||
|
kt = ktime_set(0, sleep_time);
|
||||||
|
req->flags |= REQ_F_IOPOLL_STATE;
|
||||||
|
|
||||||
|
mode = HRTIMER_MODE_REL;
|
||||||
|
hrtimer_init_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
|
||||||
|
hrtimer_set_expires(&timer.timer, kt);
|
||||||
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
|
hrtimer_sleeper_start_expires(&timer, mode);
|
||||||
|
|
||||||
|
if (timer.task)
|
||||||
|
io_schedule();
|
||||||
|
|
||||||
|
hrtimer_cancel(&timer.timer);
|
||||||
|
__set_current_state(TASK_RUNNING);
|
||||||
|
destroy_hrtimer_on_stack(&timer.timer);
|
||||||
|
return sleep_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int io_uring_hybrid_poll(struct io_kiocb *req,
|
||||||
|
struct io_comp_batch *iob, unsigned int poll_flags)
|
||||||
|
{
|
||||||
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
|
u64 runtime, sleep_time;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
sleep_time = io_hybrid_iopoll_delay(ctx, req);
|
||||||
|
ret = io_uring_classic_poll(req, iob, poll_flags);
|
||||||
|
runtime = ktime_get_ns() - req->iopoll_start - sleep_time;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use minimum sleep time if we're polling devices with different
|
||||||
|
* latencies. We could get more completions from the faster ones.
|
||||||
|
*/
|
||||||
|
if (ctx->hybrid_poll_time > runtime)
|
||||||
|
ctx->hybrid_poll_time = runtime;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
||||||
{
|
{
|
||||||
struct io_wq_work_node *pos, *start, *prev;
|
struct io_wq_work_node *pos, *start, *prev;
|
||||||
@ -1131,7 +1208,6 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
|||||||
|
|
||||||
wq_list_for_each(pos, start, &ctx->iopoll_list) {
|
wq_list_for_each(pos, start, &ctx->iopoll_list) {
|
||||||
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
|
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
|
||||||
struct file *file = req->file;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1142,17 +1218,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
|
|||||||
if (READ_ONCE(req->iopoll_completed))
|
if (READ_ONCE(req->iopoll_completed))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (req->opcode == IORING_OP_URING_CMD) {
|
if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL)
|
||||||
struct io_uring_cmd *ioucmd;
|
ret = io_uring_hybrid_poll(req, &iob, poll_flags);
|
||||||
|
else
|
||||||
|
ret = io_uring_classic_poll(req, &iob, poll_flags);
|
||||||
|
|
||||||
ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
|
||||||
ret = file->f_op->uring_cmd_iopoll(ioucmd, &iob,
|
|
||||||
poll_flags);
|
|
||||||
} else {
|
|
||||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
|
||||||
|
|
||||||
ret = file->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
|
|
||||||
}
|
|
||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
return ret;
|
return ret;
|
||||||
else if (ret)
|
else if (ret)
|
||||||
|
Loading…
Reference in New Issue
Block a user