io_uring: introduce attributes for read/write and PI support

Add the ability to pass additional attributes along with read/write.
Application can prepare attibute specific information and pass its
address using the SQE field:
	__u64	attr_ptr;

Along with setting a mask indicating attributes being passed:
	__u64	attr_type_mask;

Overall 64 attributes are allowed and currently one attribute
'IORING_RW_ATTR_FLAG_PI' is supported.

With PI attribute, userspace can pass following information:
- flags: integrity check flags IO_INTEGRITY_CHK_{GUARD/APPTAG/REFTAG}
- len: length of PI/metadata buffer
- addr: address of metadata buffer
- seed: seed value for reftag remapping
- app_tag: application defined 16b value

Process this information to prepare uio_meta_descriptor and pass it down
using kiocb->private.

PI attribute is supported only for direct IO.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20241128112240.8867-7-anuj20.g@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Anuj Gupta 2024-11-28 16:52:36 +05:30 committed by Jens Axboe
parent 883a606457
commit 4dde0cc445
4 changed files with 112 additions and 3 deletions

View File

@ -98,6 +98,10 @@ struct io_uring_sqe {
__u64 addr3; __u64 addr3;
__u64 __pad2[1]; __u64 __pad2[1];
}; };
struct {
__u64 attr_ptr; /* pointer to attribute information */
__u64 attr_type_mask; /* bit mask of attributes */
};
__u64 optval; __u64 optval;
/* /*
* If the ring is initialized with IORING_SETUP_SQE128, then * If the ring is initialized with IORING_SETUP_SQE128, then
@ -107,6 +111,18 @@ struct io_uring_sqe {
}; };
}; };
/* sqe->attr_type_mask flags */
#define IORING_RW_ATTR_FLAG_PI (1U << 0)
/* PI attribute information */
struct io_uring_attr_pi {
__u16 flags;
__u16 app_tag;
__u32 len;
__u64 addr;
__u64 seed;
__u64 rsvd;
};
/* /*
* If sqe->file_index is set to this for opcodes that instantiate a new * If sqe->file_index is set to this for opcodes that instantiate a new
* direct descriptor (like openat/openat2/accept), then io_uring will allocate * direct descriptor (like openat/openat2/accept), then io_uring will allocate

View File

@ -3871,6 +3871,8 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]); BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]);
BUILD_BUG_SQE_ELEM(48, __u64, addr3); BUILD_BUG_SQE_ELEM(48, __u64, addr3);
BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd); BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
BUILD_BUG_SQE_ELEM(48, __u64, attr_ptr);
BUILD_BUG_SQE_ELEM(56, __u64, attr_type_mask);
BUILD_BUG_SQE_ELEM(56, __u64, __pad2); BUILD_BUG_SQE_ELEM(56, __u64, __pad2);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) != BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=

View File

@ -257,11 +257,53 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
return 0; return 0;
} }
static inline void io_meta_save_state(struct io_async_rw *io)
{
io->meta_state.seed = io->meta.seed;
iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
}
static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
{
if (kiocb->ki_flags & IOCB_HAS_METADATA) {
io->meta.seed = io->meta_state.seed;
iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
}
}
static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
u64 attr_ptr, u64 attr_type_mask)
{
struct io_uring_attr_pi pi_attr;
struct io_async_rw *io;
int ret;
if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
sizeof(pi_attr)))
return -EFAULT;
if (pi_attr.rsvd)
return -EINVAL;
io = req->async_data;
io->meta.flags = pi_attr.flags;
io->meta.app_tag = pi_attr.app_tag;
io->meta.seed = pi_attr.seed;
ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
pi_attr.len, &io->meta.iter);
if (unlikely(ret < 0))
return ret;
rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
io_meta_save_state(io);
return ret;
}
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
int ddir, bool do_import) int ddir, bool do_import)
{ {
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
unsigned ioprio; unsigned ioprio;
u64 attr_type_mask;
int ret; int ret;
rw->kiocb.ki_pos = READ_ONCE(sqe->off); rw->kiocb.ki_pos = READ_ONCE(sqe->off);
@ -279,11 +321,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
rw->kiocb.ki_ioprio = get_current_ioprio(); rw->kiocb.ki_ioprio = get_current_ioprio();
} }
rw->kiocb.dio_complete = NULL; rw->kiocb.dio_complete = NULL;
rw->kiocb.ki_flags = 0;
rw->addr = READ_ONCE(sqe->addr); rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len); rw->len = READ_ONCE(sqe->len);
rw->flags = READ_ONCE(sqe->rw_flags); rw->flags = READ_ONCE(sqe->rw_flags);
return io_prep_rw_setup(req, ddir, do_import); ret = io_prep_rw_setup(req, ddir, do_import);
if (unlikely(ret))
return ret;
attr_type_mask = READ_ONCE(sqe->attr_type_mask);
if (attr_type_mask) {
u64 attr_ptr;
/* only PI attribute is supported currently */
if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
return -EINVAL;
attr_ptr = READ_ONCE(sqe->attr_ptr);
ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
}
return ret;
} }
int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe) int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@ -409,7 +468,9 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
static void io_resubmit_prep(struct io_kiocb *req) static void io_resubmit_prep(struct io_kiocb *req)
{ {
struct io_async_rw *io = req->async_data; struct io_async_rw *io = req->async_data;
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
io_meta_restore(io, &rw->kiocb);
iov_iter_restore(&io->iter, &io->iter_state); iov_iter_restore(&io->iter, &io->iter_state);
} }
@ -744,6 +805,10 @@ static bool io_rw_should_retry(struct io_kiocb *req)
if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
return false; return false;
/* never retry for meta io */
if (kiocb->ki_flags & IOCB_HAS_METADATA)
return false;
/* /*
* just use poll if we can, and don't attempt if the fs doesn't * just use poll if we can, and don't attempt if the fs doesn't
* support callback based unlocks * support callback based unlocks
@ -794,7 +859,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
if (!(req->flags & REQ_F_FIXED_FILE)) if (!(req->flags & REQ_F_FIXED_FILE))
req->flags |= io_file_get_flags(file); req->flags |= io_file_get_flags(file);
kiocb->ki_flags = file->f_iocb_flags; kiocb->ki_flags |= file->f_iocb_flags;
ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;
@ -828,6 +893,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
kiocb->ki_complete = io_complete_rw; kiocb->ki_complete = io_complete_rw;
} }
if (kiocb->ki_flags & IOCB_HAS_METADATA) {
struct io_async_rw *io = req->async_data;
/*
* We have a union of meta fields with wpq used for buffered-io
* in io_async_rw, so fail it here.
*/
if (!(req->file->f_flags & O_DIRECT))
return -EOPNOTSUPP;
kiocb->private = &io->meta;
}
return 0; return 0;
} }
@ -902,6 +979,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
* manually if we need to. * manually if we need to.
*/ */
iov_iter_restore(&io->iter, &io->iter_state); iov_iter_restore(&io->iter, &io->iter_state);
io_meta_restore(io, kiocb);
do { do {
/* /*
@ -1125,6 +1203,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
} else { } else {
ret_eagain: ret_eagain:
iov_iter_restore(&io->iter, &io->iter_state); iov_iter_restore(&io->iter, &io->iter_state);
io_meta_restore(io, kiocb);
if (kiocb->ki_flags & IOCB_WRITE) if (kiocb->ki_flags & IOCB_WRITE)
io_req_end_write(req); io_req_end_write(req);
return -EAGAIN; return -EAGAIN;

View File

@ -2,6 +2,11 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
struct io_meta_state {
u32 seed;
struct iov_iter_state iter_meta;
};
struct io_async_rw { struct io_async_rw {
size_t bytes_done; size_t bytes_done;
struct iov_iter iter; struct iov_iter iter;
@ -9,7 +14,14 @@ struct io_async_rw {
struct iovec fast_iov; struct iovec fast_iov;
struct iovec *free_iovec; struct iovec *free_iovec;
int free_iov_nr; int free_iov_nr;
struct wait_page_queue wpq; /* wpq is for buffered io, while meta fields are used with direct io */
union {
struct wait_page_queue wpq;
struct {
struct uio_meta meta;
struct io_meta_state meta_state;
};
};
}; };
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);