mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 05:26:07 +00:00
io_uring: use region api for CQ
Convert internal parts of the CQ/SQ array managment to the region API. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/46fc3c801290d6b1ac16023d78f6b8e685c87fd6.1732886067.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
55ea1ea1c8
commit
6be74cf5d7
@ -427,14 +427,8 @@ struct io_ring_ctx {
|
|||||||
*/
|
*/
|
||||||
struct mutex mmap_lock;
|
struct mutex mmap_lock;
|
||||||
|
|
||||||
/*
|
|
||||||
* If IORING_SETUP_NO_MMAP is used, then the below holds
|
|
||||||
* the gup'ed pages for the two rings, and the sqes.
|
|
||||||
*/
|
|
||||||
unsigned short n_ring_pages;
|
|
||||||
struct page **ring_pages;
|
|
||||||
|
|
||||||
struct io_mapped_region sq_region;
|
struct io_mapped_region sq_region;
|
||||||
|
struct io_mapped_region ring_region;
|
||||||
/* used for optimised request parameter and wait argument passing */
|
/* used for optimised request parameter and wait argument passing */
|
||||||
struct io_mapped_region param_region;
|
struct io_mapped_region param_region;
|
||||||
};
|
};
|
||||||
|
@ -2630,26 +2630,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
|||||||
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
|
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
|
|
||||||
size_t size)
|
|
||||||
{
|
|
||||||
return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
|
|
||||||
size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void io_rings_free(struct io_ring_ctx *ctx)
|
static void io_rings_free(struct io_ring_ctx *ctx)
|
||||||
{
|
{
|
||||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
|
|
||||||
io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
|
|
||||||
true);
|
|
||||||
} else {
|
|
||||||
io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
|
|
||||||
ctx->n_ring_pages = 0;
|
|
||||||
vunmap(ctx->rings);
|
|
||||||
}
|
|
||||||
|
|
||||||
io_free_region(ctx, &ctx->sq_region);
|
io_free_region(ctx, &ctx->sq_region);
|
||||||
|
io_free_region(ctx, &ctx->ring_region);
|
||||||
ctx->rings = NULL;
|
ctx->rings = NULL;
|
||||||
ctx->sq_sqes = NULL;
|
ctx->sq_sqes = NULL;
|
||||||
}
|
}
|
||||||
@ -3480,15 +3464,17 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
|
|||||||
if (size == SIZE_MAX)
|
if (size == SIZE_MAX)
|
||||||
return -EOVERFLOW;
|
return -EOVERFLOW;
|
||||||
|
|
||||||
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
|
memset(&rd, 0, sizeof(rd));
|
||||||
rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
|
rd.size = PAGE_ALIGN(size);
|
||||||
else
|
if (ctx->flags & IORING_SETUP_NO_MMAP) {
|
||||||
rings = io_rings_map(ctx, p->cq_off.user_addr, size);
|
rd.user_addr = p->cq_off.user_addr;
|
||||||
|
rd.flags |= IORING_MEM_REGION_TYPE_USER;
|
||||||
|
}
|
||||||
|
ret = io_create_region(ctx, &ctx->ring_region, &rd, IORING_OFF_CQ_RING);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
|
||||||
|
|
||||||
if (IS_ERR(rings))
|
|
||||||
return PTR_ERR(rings);
|
|
||||||
|
|
||||||
ctx->rings = rings;
|
|
||||||
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
|
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
|
||||||
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
|
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
|
||||||
rings->sq_ring_mask = p->sq_entries - 1;
|
rings->sq_ring_mask = p->sq_entries - 1;
|
||||||
|
@ -120,18 +120,6 @@ void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
|
|||||||
*npages = 0;
|
*npages = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void io_pages_free(struct page ***pages, int npages)
|
|
||||||
{
|
|
||||||
struct page **page_array = *pages;
|
|
||||||
|
|
||||||
if (!page_array)
|
|
||||||
return;
|
|
||||||
|
|
||||||
unpin_user_pages(page_array, npages);
|
|
||||||
kvfree(page_array);
|
|
||||||
*pages = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
|
struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
|
||||||
{
|
{
|
||||||
unsigned long start, end, nr_pages;
|
unsigned long start, end, nr_pages;
|
||||||
@ -174,34 +162,6 @@ struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
|
|||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
|
|
||||||
unsigned long uaddr, size_t size)
|
|
||||||
{
|
|
||||||
struct page **page_array;
|
|
||||||
unsigned int nr_pages;
|
|
||||||
void *page_addr;
|
|
||||||
|
|
||||||
*npages = 0;
|
|
||||||
|
|
||||||
if (uaddr & (PAGE_SIZE - 1) || !size)
|
|
||||||
return ERR_PTR(-EINVAL);
|
|
||||||
|
|
||||||
nr_pages = 0;
|
|
||||||
page_array = io_pin_pages(uaddr, size, &nr_pages);
|
|
||||||
if (IS_ERR(page_array))
|
|
||||||
return page_array;
|
|
||||||
|
|
||||||
page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
|
|
||||||
if (page_addr) {
|
|
||||||
*pages = page_array;
|
|
||||||
*npages = nr_pages;
|
|
||||||
return page_addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
io_pages_free(&page_array, nr_pages);
|
|
||||||
return ERR_PTR(-ENOMEM);
|
|
||||||
}
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
/* memory was vmap'ed for the kernel, freeing the region vunmap's it */
|
/* memory was vmap'ed for the kernel, freeing the region vunmap's it */
|
||||||
IO_REGION_F_VMAP = 1,
|
IO_REGION_F_VMAP = 1,
|
||||||
@ -446,9 +406,10 @@ int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
|
|||||||
|
|
||||||
static int io_region_mmap(struct io_ring_ctx *ctx,
|
static int io_region_mmap(struct io_ring_ctx *ctx,
|
||||||
struct io_mapped_region *mr,
|
struct io_mapped_region *mr,
|
||||||
struct vm_area_struct *vma)
|
struct vm_area_struct *vma,
|
||||||
|
unsigned max_pages)
|
||||||
{
|
{
|
||||||
unsigned long nr_pages = mr->nr_pages;
|
unsigned long nr_pages = min(mr->nr_pages, max_pages);
|
||||||
|
|
||||||
vm_flags_set(vma, VM_DONTEXPAND);
|
vm_flags_set(vma, VM_DONTEXPAND);
|
||||||
return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
|
return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages);
|
||||||
@ -459,7 +420,7 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
|
|||||||
struct io_ring_ctx *ctx = file->private_data;
|
struct io_ring_ctx *ctx = file->private_data;
|
||||||
size_t sz = vma->vm_end - vma->vm_start;
|
size_t sz = vma->vm_end - vma->vm_start;
|
||||||
long offset = vma->vm_pgoff << PAGE_SHIFT;
|
long offset = vma->vm_pgoff << PAGE_SHIFT;
|
||||||
unsigned int npages;
|
unsigned int page_limit;
|
||||||
void *ptr;
|
void *ptr;
|
||||||
|
|
||||||
guard(mutex)(&ctx->mmap_lock);
|
guard(mutex)(&ctx->mmap_lock);
|
||||||
@ -471,14 +432,14 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
|
|||||||
switch (offset & IORING_OFF_MMAP_MASK) {
|
switch (offset & IORING_OFF_MMAP_MASK) {
|
||||||
case IORING_OFF_SQ_RING:
|
case IORING_OFF_SQ_RING:
|
||||||
case IORING_OFF_CQ_RING:
|
case IORING_OFF_CQ_RING:
|
||||||
npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
|
page_limit = (sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||||
return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
|
return io_region_mmap(ctx, &ctx->ring_region, vma, page_limit);
|
||||||
case IORING_OFF_SQES:
|
case IORING_OFF_SQES:
|
||||||
return io_region_mmap(ctx, &ctx->sq_region, vma);
|
return io_region_mmap(ctx, &ctx->sq_region, vma, UINT_MAX);
|
||||||
case IORING_OFF_PBUF_RING:
|
case IORING_OFF_PBUF_RING:
|
||||||
return io_pbuf_mmap(file, vma);
|
return io_pbuf_mmap(file, vma);
|
||||||
case IORING_MAP_OFF_PARAM_REGION:
|
case IORING_MAP_OFF_PARAM_REGION:
|
||||||
return io_region_mmap(ctx, &ctx->param_region, vma);
|
return io_region_mmap(ctx, &ctx->param_region, vma, UINT_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
|
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
|
||||||
|
|
||||||
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
|
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
|
||||||
void io_pages_free(struct page ***pages, int npages);
|
|
||||||
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
|
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
|
||||||
struct page **pages, int npages);
|
struct page **pages, int npages);
|
||||||
|
|
||||||
@ -13,9 +12,6 @@ void *io_pages_map(struct page ***out_pages, unsigned short *npages,
|
|||||||
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
|
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
|
||||||
bool put_pages);
|
bool put_pages);
|
||||||
|
|
||||||
void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
|
|
||||||
unsigned long uaddr, size_t size);
|
|
||||||
|
|
||||||
#ifndef CONFIG_MMU
|
#ifndef CONFIG_MMU
|
||||||
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
|
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
|
||||||
#endif
|
#endif
|
||||||
|
@ -367,26 +367,19 @@ static int io_register_clock(struct io_ring_ctx *ctx,
|
|||||||
* either mapping or freeing.
|
* either mapping or freeing.
|
||||||
*/
|
*/
|
||||||
struct io_ring_ctx_rings {
|
struct io_ring_ctx_rings {
|
||||||
unsigned short n_ring_pages;
|
|
||||||
struct page **ring_pages;
|
|
||||||
struct io_rings *rings;
|
struct io_rings *rings;
|
||||||
|
|
||||||
struct io_uring_sqe *sq_sqes;
|
struct io_uring_sqe *sq_sqes;
|
||||||
|
|
||||||
struct io_mapped_region sq_region;
|
struct io_mapped_region sq_region;
|
||||||
|
struct io_mapped_region ring_region;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void io_register_free_rings(struct io_ring_ctx *ctx,
|
static void io_register_free_rings(struct io_ring_ctx *ctx,
|
||||||
struct io_uring_params *p,
|
struct io_uring_params *p,
|
||||||
struct io_ring_ctx_rings *r)
|
struct io_ring_ctx_rings *r)
|
||||||
{
|
{
|
||||||
if (!(p->flags & IORING_SETUP_NO_MMAP)) {
|
|
||||||
io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages,
|
|
||||||
true);
|
|
||||||
} else {
|
|
||||||
io_pages_free(&r->ring_pages, r->n_ring_pages);
|
|
||||||
vunmap(r->rings);
|
|
||||||
}
|
|
||||||
io_free_region(ctx, &r->sq_region);
|
io_free_region(ctx, &r->sq_region);
|
||||||
|
io_free_region(ctx, &r->ring_region);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define swap_old(ctx, o, n, field) \
|
#define swap_old(ctx, o, n, field) \
|
||||||
@ -436,13 +429,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
|
|||||||
if (size == SIZE_MAX)
|
if (size == SIZE_MAX)
|
||||||
return -EOVERFLOW;
|
return -EOVERFLOW;
|
||||||
|
|
||||||
if (!(p.flags & IORING_SETUP_NO_MMAP))
|
memset(&rd, 0, sizeof(rd));
|
||||||
n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
|
rd.size = PAGE_ALIGN(size);
|
||||||
else
|
if (p.flags & IORING_SETUP_NO_MMAP) {
|
||||||
n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
|
rd.user_addr = p.cq_off.user_addr;
|
||||||
p.cq_off.user_addr, size);
|
rd.flags |= IORING_MEM_REGION_TYPE_USER;
|
||||||
if (IS_ERR(n.rings))
|
}
|
||||||
return PTR_ERR(n.rings);
|
ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
|
||||||
|
if (ret) {
|
||||||
|
io_register_free_rings(ctx, &p, &n);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
n.rings = io_region_get_ptr(&n.ring_region);
|
||||||
|
|
||||||
n.rings->sq_ring_mask = p.sq_entries - 1;
|
n.rings->sq_ring_mask = p.sq_entries - 1;
|
||||||
n.rings->cq_ring_mask = p.cq_entries - 1;
|
n.rings->cq_ring_mask = p.cq_entries - 1;
|
||||||
@ -552,8 +550,7 @@ overflow:
|
|||||||
|
|
||||||
ctx->rings = n.rings;
|
ctx->rings = n.rings;
|
||||||
ctx->sq_sqes = n.sq_sqes;
|
ctx->sq_sqes = n.sq_sqes;
|
||||||
swap_old(ctx, o, n, n_ring_pages);
|
swap_old(ctx, o, n, ring_region);
|
||||||
swap_old(ctx, o, n, ring_pages);
|
|
||||||
swap_old(ctx, o, n, sq_region);
|
swap_old(ctx, o, n, sq_region);
|
||||||
to_free = &o;
|
to_free = &o;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user