mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-15 11:47:47 +00:00
io_uring/kbuf: use region api for pbuf rings
Convert internal parts of the provided buffer ring managment to the region API. It's the last non-region mapped ring we have, so it also kills a bunch of now unused memmap.c helpers. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/6c40cf7beaa648558acd4d84bc0fb3279a35d74b.1732886067.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
90175f3f50
commit
ef62de3c4a
172
io_uring/kbuf.c
172
io_uring/kbuf.c
@ -351,17 +351,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
|
||||
|
||||
if (bl->flags & IOBL_BUF_RING) {
|
||||
i = bl->buf_ring->tail - bl->head;
|
||||
if (bl->buf_nr_pages) {
|
||||
int j;
|
||||
|
||||
if (!(bl->flags & IOBL_MMAP)) {
|
||||
for (j = 0; j < bl->buf_nr_pages; j++)
|
||||
unpin_user_page(bl->buf_pages[j]);
|
||||
}
|
||||
io_pages_unmap(bl->buf_ring, &bl->buf_pages,
|
||||
&bl->buf_nr_pages, bl->flags & IOBL_MMAP);
|
||||
bl->flags &= ~IOBL_MMAP;
|
||||
}
|
||||
io_free_region(ctx, &bl->region);
|
||||
/* make sure it's seen as empty */
|
||||
INIT_LIST_HEAD(&bl->buf_list);
|
||||
bl->flags &= ~IOBL_BUF_RING;
|
||||
@ -614,75 +604,14 @@ err:
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
struct io_uring_buf_ring *br = NULL;
|
||||
struct page **pages;
|
||||
int nr_pages, ret;
|
||||
|
||||
pages = io_pin_pages(reg->ring_addr,
|
||||
flex_array_size(br, bufs, reg->ring_entries),
|
||||
&nr_pages);
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
br = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
if (!br) {
|
||||
ret = -ENOMEM;
|
||||
goto error_unpin;
|
||||
}
|
||||
|
||||
#ifdef SHM_COLOUR
|
||||
/*
|
||||
* On platforms that have specific aliasing requirements, SHM_COLOUR
|
||||
* is set and we must guarantee that the kernel and user side align
|
||||
* nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
|
||||
* the application mmap's the provided ring buffer. Fail the request
|
||||
* if we, by chance, don't end up with aligned addresses. The app
|
||||
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle
|
||||
* this transparently.
|
||||
*/
|
||||
if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
|
||||
ret = -EINVAL;
|
||||
goto error_unpin;
|
||||
}
|
||||
#endif
|
||||
bl->buf_pages = pages;
|
||||
bl->buf_nr_pages = nr_pages;
|
||||
bl->buf_ring = br;
|
||||
bl->flags |= IOBL_BUF_RING;
|
||||
bl->flags &= ~IOBL_MMAP;
|
||||
return 0;
|
||||
error_unpin:
|
||||
unpin_user_pages(pages, nr_pages);
|
||||
kvfree(pages);
|
||||
vunmap(br);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
|
||||
struct io_uring_buf_reg *reg,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
size_t ring_size;
|
||||
|
||||
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
|
||||
|
||||
bl->buf_ring = io_pages_map(&bl->buf_pages, &bl->buf_nr_pages, ring_size);
|
||||
if (IS_ERR(bl->buf_ring)) {
|
||||
bl->buf_ring = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
bl->flags |= (IOBL_BUF_RING | IOBL_MMAP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
{
|
||||
struct io_uring_buf_reg reg;
|
||||
struct io_buffer_list *bl, *free_bl = NULL;
|
||||
struct io_uring_region_desc rd;
|
||||
struct io_uring_buf_ring *br;
|
||||
unsigned long mmap_offset;
|
||||
unsigned long ring_size;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
@ -694,19 +623,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
return -EINVAL;
|
||||
if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
|
||||
return -EINVAL;
|
||||
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
|
||||
if (!reg.ring_addr)
|
||||
return -EFAULT;
|
||||
if (reg.ring_addr & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (reg.ring_addr)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!is_power_of_2(reg.ring_entries))
|
||||
return -EINVAL;
|
||||
|
||||
/* cannot disambiguate full vs empty due to head/tail size */
|
||||
if (reg.ring_entries >= 65536)
|
||||
return -EINVAL;
|
||||
@ -722,21 +640,47 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (!(reg.flags & IOU_PBUF_RING_MMAP))
|
||||
ret = io_pin_pbuf_ring(®, bl);
|
||||
else
|
||||
ret = io_alloc_pbuf_ring(ctx, ®, bl);
|
||||
mmap_offset = reg.bgid << IORING_OFF_PBUF_SHIFT;
|
||||
ring_size = flex_array_size(br, bufs, reg.ring_entries);
|
||||
|
||||
if (!ret) {
|
||||
bl->nr_entries = reg.ring_entries;
|
||||
bl->mask = reg.ring_entries - 1;
|
||||
if (reg.flags & IOU_PBUF_RING_INC)
|
||||
bl->flags |= IOBL_INC;
|
||||
|
||||
io_buffer_add_list(ctx, bl, reg.bgid);
|
||||
return 0;
|
||||
memset(&rd, 0, sizeof(rd));
|
||||
rd.size = PAGE_ALIGN(ring_size);
|
||||
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
|
||||
rd.user_addr = reg.ring_addr;
|
||||
rd.flags |= IORING_MEM_REGION_TYPE_USER;
|
||||
}
|
||||
ret = io_create_region_mmap_safe(ctx, &bl->region, &rd, mmap_offset);
|
||||
if (ret)
|
||||
goto fail;
|
||||
br = io_region_get_ptr(&bl->region);
|
||||
|
||||
#ifdef SHM_COLOUR
|
||||
/*
|
||||
* On platforms that have specific aliasing requirements, SHM_COLOUR
|
||||
* is set and we must guarantee that the kernel and user side align
|
||||
* nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
|
||||
* the application mmap's the provided ring buffer. Fail the request
|
||||
* if we, by chance, don't end up with aligned addresses. The app
|
||||
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle
|
||||
* this transparently.
|
||||
*/
|
||||
if (!(reg.flags & IOU_PBUF_RING_MMAP) &&
|
||||
((reg.ring_addr | (unsigned long)br) & (SHM_COLOUR - 1))) {
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
bl->nr_entries = reg.ring_entries;
|
||||
bl->mask = reg.ring_entries - 1;
|
||||
bl->flags |= IOBL_BUF_RING;
|
||||
bl->buf_ring = br;
|
||||
if (reg.flags & IOU_PBUF_RING_INC)
|
||||
bl->flags |= IOBL_INC;
|
||||
io_buffer_add_list(ctx, bl, reg.bgid);
|
||||
return 0;
|
||||
fail:
|
||||
io_free_region(ctx, &bl->region);
|
||||
kfree(free_bl);
|
||||
return ret;
|
||||
}
|
||||
@ -794,32 +738,18 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
|
||||
unsigned long bgid)
|
||||
struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
|
||||
unsigned int bgid)
|
||||
{
|
||||
struct io_buffer_list *bl;
|
||||
|
||||
bl = xa_load(&ctx->io_bl_xa, bgid);
|
||||
/* must be a mmap'able buffer ring and have pages */
|
||||
if (bl && bl->flags & IOBL_MMAP)
|
||||
return bl;
|
||||
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct io_ring_ctx *ctx = file->private_data;
|
||||
loff_t pgoff = vma->vm_pgoff << PAGE_SHIFT;
|
||||
struct io_buffer_list *bl;
|
||||
int bgid;
|
||||
|
||||
lockdep_assert_held(&ctx->mmap_lock);
|
||||
|
||||
bgid = (pgoff & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
|
||||
bl = io_pbuf_get_bl(ctx, bgid);
|
||||
if (IS_ERR(bl))
|
||||
return PTR_ERR(bl);
|
||||
bl = xa_load(&ctx->io_bl_xa, bgid);
|
||||
if (!bl || !(bl->flags & IOBL_BUF_RING))
|
||||
return NULL;
|
||||
if (WARN_ON_ONCE(!io_region_is_set(&bl->region)))
|
||||
return NULL;
|
||||
|
||||
return io_uring_mmap_pages(ctx, vma, bl->buf_pages, bl->buf_nr_pages);
|
||||
return &bl->region;
|
||||
}
|
||||
|
@ -3,15 +3,13 @@
|
||||
#define IOU_KBUF_H
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
enum {
|
||||
/* ring mapped provided buffers */
|
||||
IOBL_BUF_RING = 1,
|
||||
/* ring mapped provided buffers, but mmap'ed by application */
|
||||
IOBL_MMAP = 2,
|
||||
/* buffers are consumed incrementally rather than always fully */
|
||||
IOBL_INC = 4,
|
||||
|
||||
IOBL_INC = 2,
|
||||
};
|
||||
|
||||
struct io_buffer_list {
|
||||
@ -21,10 +19,7 @@ struct io_buffer_list {
|
||||
*/
|
||||
union {
|
||||
struct list_head buf_list;
|
||||
struct {
|
||||
struct page **buf_pages;
|
||||
struct io_uring_buf_ring *buf_ring;
|
||||
};
|
||||
struct io_uring_buf_ring *buf_ring;
|
||||
};
|
||||
__u16 bgid;
|
||||
|
||||
@ -35,6 +30,8 @@ struct io_buffer_list {
|
||||
__u16 mask;
|
||||
|
||||
__u16 flags;
|
||||
|
||||
struct io_mapped_region region;
|
||||
};
|
||||
|
||||
struct io_buffer {
|
||||
@ -81,9 +78,8 @@ void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags);
|
||||
|
||||
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
|
||||
|
||||
struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
|
||||
unsigned long bgid);
|
||||
int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
|
||||
struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
|
||||
unsigned int bgid);
|
||||
|
||||
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
|
||||
{
|
||||
|
@ -36,90 +36,6 @@ static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
|
||||
return page_address(page);
|
||||
}
|
||||
|
||||
static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size,
|
||||
gfp_t gfp)
|
||||
{
|
||||
void *ret;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
pages[i] = alloc_page(gfp);
|
||||
if (!pages[i])
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
err:
|
||||
while (i--)
|
||||
put_page(pages[i]);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
void *io_pages_map(struct page ***out_pages, unsigned short *npages,
|
||||
size_t size)
|
||||
{
|
||||
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
void *ret;
|
||||
|
||||
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp);
|
||||
if (!pages)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = io_mem_alloc_compound(pages, nr_pages, size, gfp);
|
||||
if (!IS_ERR(ret))
|
||||
goto done;
|
||||
if (nr_pages == 1)
|
||||
goto fail;
|
||||
|
||||
ret = io_mem_alloc_single(pages, nr_pages, size, gfp);
|
||||
if (!IS_ERR(ret)) {
|
||||
done:
|
||||
*out_pages = pages;
|
||||
*npages = nr_pages;
|
||||
return ret;
|
||||
}
|
||||
fail:
|
||||
kvfree(pages);
|
||||
*out_pages = NULL;
|
||||
*npages = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
|
||||
bool put_pages)
|
||||
{
|
||||
bool do_vunmap = false;
|
||||
|
||||
if (!ptr)
|
||||
return;
|
||||
|
||||
if (put_pages && *npages) {
|
||||
struct page **to_free = *pages;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Only did vmap for the non-compound multiple page case.
|
||||
* For the compound page, we just need to put the head.
|
||||
*/
|
||||
if (PageCompound(to_free[0]))
|
||||
*npages = 1;
|
||||
else if (*npages > 1)
|
||||
do_vunmap = true;
|
||||
for (i = 0; i < *npages; i++)
|
||||
put_page(to_free[i]);
|
||||
}
|
||||
if (do_vunmap)
|
||||
vunmap(ptr);
|
||||
kvfree(*pages);
|
||||
*pages = NULL;
|
||||
*npages = 0;
|
||||
}
|
||||
|
||||
struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
|
||||
{
|
||||
unsigned long start, end, nr_pages;
|
||||
@ -374,16 +290,14 @@ static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
|
||||
return ERR_PTR(-EFAULT);
|
||||
return ctx->sq_sqes;
|
||||
case IORING_OFF_PBUF_RING: {
|
||||
struct io_buffer_list *bl;
|
||||
struct io_mapped_region *region;
|
||||
unsigned int bgid;
|
||||
void *ptr;
|
||||
|
||||
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
|
||||
bl = io_pbuf_get_bl(ctx, bgid);
|
||||
if (IS_ERR(bl))
|
||||
return bl;
|
||||
ptr = bl->buf_ring;
|
||||
return ptr;
|
||||
region = io_pbuf_get_region(ctx, bgid);
|
||||
if (!region)
|
||||
return ERR_PTR(-EINVAL);
|
||||
return io_region_validate_mmap(ctx, region);
|
||||
}
|
||||
case IORING_MAP_OFF_PARAM_REGION:
|
||||
return io_region_validate_mmap(ctx, &ctx->param_region);
|
||||
@ -392,15 +306,6 @@ static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
|
||||
struct page **pages, int npages)
|
||||
{
|
||||
unsigned long nr_pages = npages;
|
||||
|
||||
vm_flags_set(vma, VM_DONTEXPAND);
|
||||
return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
static int io_region_mmap(struct io_ring_ctx *ctx,
|
||||
@ -435,8 +340,17 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
return io_region_mmap(ctx, &ctx->ring_region, vma, page_limit);
|
||||
case IORING_OFF_SQES:
|
||||
return io_region_mmap(ctx, &ctx->sq_region, vma, UINT_MAX);
|
||||
case IORING_OFF_PBUF_RING:
|
||||
return io_pbuf_mmap(file, vma);
|
||||
case IORING_OFF_PBUF_RING: {
|
||||
struct io_mapped_region *region;
|
||||
unsigned int bgid;
|
||||
|
||||
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
|
||||
region = io_pbuf_get_region(ctx, bgid);
|
||||
if (!region)
|
||||
return -EINVAL;
|
||||
|
||||
return io_region_mmap(ctx, region, vma, UINT_MAX);
|
||||
}
|
||||
case IORING_MAP_OFF_PARAM_REGION:
|
||||
return io_region_mmap(ctx, &ctx->param_region, vma, UINT_MAX);
|
||||
}
|
||||
|
@ -4,13 +4,6 @@
|
||||
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
|
||||
|
||||
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
|
||||
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
|
||||
struct page **pages, int npages);
|
||||
|
||||
void *io_pages_map(struct page ***out_pages, unsigned short *npages,
|
||||
size_t size);
|
||||
void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
|
||||
bool put_pages);
|
||||
|
||||
#ifndef CONFIG_MMU
|
||||
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
|
||||
|
Loading…
x
Reference in New Issue
Block a user