mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-16 13:34:30 +00:00
87585b0575
Rather than use remap_pfn_range() for this and manually free later, switch to using vm_insert_page() and have it Just Work. This requires a bit of effort on the mmap lookup side, as the ctx uring_lock isn't held, which otherwise protects buffer_lists from being torn down, and it's not safe to grab from mmap context that would introduce an ABBA deadlock between the mmap lock and the ctx uring_lock. Instead, lookup the buffer_list under RCU, as the the list is RCU freed already. Use the existing reference count to determine whether it's possible to safely grab a reference to it (eg if it's not zero already), and drop that reference when done with the mapping. If the mmap reference is the last one, the buffer_list and the associated memory can go away, since the vma insertion has references to the inserted pages at that point. Signed-off-by: Jens Axboe <axboe@kernel.dk>
153 lines
4.0 KiB
C
153 lines
4.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#ifndef IOU_KBUF_H
|
|
#define IOU_KBUF_H
|
|
|
|
#include <uapi/linux/io_uring.h>
|
|
|
|
struct io_buffer_list {
|
|
/*
|
|
* If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not,
|
|
* then these are classic provided buffers and ->buf_list is used.
|
|
*/
|
|
union {
|
|
struct list_head buf_list;
|
|
struct {
|
|
struct page **buf_pages;
|
|
struct io_uring_buf_ring *buf_ring;
|
|
};
|
|
struct rcu_head rcu;
|
|
};
|
|
__u16 bgid;
|
|
|
|
/* below is for ring provided buffers */
|
|
__u16 buf_nr_pages;
|
|
__u16 nr_entries;
|
|
__u16 head;
|
|
__u16 mask;
|
|
|
|
atomic_t refs;
|
|
|
|
/* ring mapped provided buffers */
|
|
__u8 is_buf_ring;
|
|
/* ring mapped provided buffers, but mmap'ed by application */
|
|
__u8 is_mmap;
|
|
};
|
|
|
|
struct io_buffer {
|
|
struct list_head list;
|
|
__u64 addr;
|
|
__u32 len;
|
|
__u16 bid;
|
|
__u16 bgid;
|
|
};
|
|
|
|
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
|
unsigned int issue_flags);
|
|
void io_destroy_buffers(struct io_ring_ctx *ctx);
|
|
|
|
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
|
int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags);
|
|
|
|
int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
|
int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
|
|
|
|
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
|
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
|
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
|
|
|
|
void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
|
|
|
|
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
|
|
|
|
void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
|
|
struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
|
|
unsigned long bgid);
|
|
int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
|
|
|
|
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
|
|
{
|
|
/*
|
|
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
|
|
* the flag and hence ensure that bl->head doesn't get incremented.
|
|
* If the tail has already been incremented, hang on to it.
|
|
* The exception is partial io, that case we should increment bl->head
|
|
* to monopolize the buffer.
|
|
*/
|
|
if (req->buf_list) {
|
|
req->buf_index = req->buf_list->bgid;
|
|
req->flags &= ~REQ_F_BUFFER_RING;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static inline bool io_do_buffer_select(struct io_kiocb *req)
|
|
{
|
|
if (!(req->flags & REQ_F_BUFFER_SELECT))
|
|
return false;
|
|
return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
|
|
}
|
|
|
|
static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
|
|
{
|
|
if (req->flags & REQ_F_BL_NO_RECYCLE)
|
|
return false;
|
|
if (req->flags & REQ_F_BUFFER_SELECTED)
|
|
return io_kbuf_recycle_legacy(req, issue_flags);
|
|
if (req->flags & REQ_F_BUFFER_RING)
|
|
return io_kbuf_recycle_ring(req);
|
|
return false;
|
|
}
|
|
|
|
static inline void __io_put_kbuf_ring(struct io_kiocb *req)
|
|
{
|
|
if (req->buf_list) {
|
|
req->buf_index = req->buf_list->bgid;
|
|
req->buf_list->head++;
|
|
}
|
|
req->flags &= ~REQ_F_BUFFER_RING;
|
|
}
|
|
|
|
static inline void __io_put_kbuf_list(struct io_kiocb *req,
|
|
struct list_head *list)
|
|
{
|
|
if (req->flags & REQ_F_BUFFER_RING) {
|
|
__io_put_kbuf_ring(req);
|
|
} else {
|
|
req->buf_index = req->kbuf->bgid;
|
|
list_add(&req->kbuf->list, list);
|
|
req->flags &= ~REQ_F_BUFFER_SELECTED;
|
|
}
|
|
}
|
|
|
|
static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req)
|
|
{
|
|
unsigned int ret;
|
|
|
|
lockdep_assert_held(&req->ctx->completion_lock);
|
|
|
|
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
|
|
return 0;
|
|
|
|
ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
|
|
__io_put_kbuf_list(req, &req->ctx->io_buffers_comp);
|
|
return ret;
|
|
}
|
|
|
|
static inline unsigned int io_put_kbuf(struct io_kiocb *req,
|
|
unsigned issue_flags)
|
|
{
|
|
unsigned int ret;
|
|
|
|
if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
|
|
return 0;
|
|
|
|
ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
|
|
if (req->flags & REQ_F_BUFFER_RING)
|
|
__io_put_kbuf_ring(req);
|
|
else
|
|
__io_put_kbuf(req, issue_flags);
|
|
return ret;
|
|
}
|
|
#endif
|