mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-29 09:12:07 +00:00
for-6.8/io_uring-2024-01-08
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmWcOk0QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpq2wEACgP1Gvfb2cR65B/6tkLJ6neKnYOPVSYx9F 4Mv6KS306vmOsE67LqynhtAe/Hgfv0NKADN+mKLLV8IdwhneLAwFxRoHl8QPO2dW DIxohDMLIqzY/SOUBqBHMS8b5lEr79UVh8vXjAuuYCBTcHBndi4hj0S0zkwf5YiK Vma6ty+TnzmOv+c6+OCgZZlwFhkxD1pQBM5iOlFRAkdt3Er/2e8FqthK0/od7Fgy Ii9xdxYZU9KTsM4R+IWhqZ1rj1qUdtMPSGzFRbzFt3b6NdANRZT9MUlxvSkuHPIE P/9anpmgu41gd2JbHuyVnw+4jdZMhhZUPUYtphmQ5n35rcFZjv1gnJalH/Nw/DTE 78bDxxP0+35bkuq5MwHfvA3imVsGnvnFx5MlZBoqvv+VK4S3Q6E2+ARQZdiG+2Is 8Uyjzzt0nq2waUO3H1JLkgiM9J9LFqaYQLE68u569m4NCfRSpoZva9KVmNpre2K0 9WdGy2gfzaYAQkGud2TULzeLiEbWfvZAL0o43jYXkVPRKmnffCEphf2X6C2IDV/3 5ZR4bandRRC6DVnE+8n1MB06AYtpJPB/w3rplON+l/V5Gnb7wRNwiUrBr/F15OOy OPbAnP6k56wY/JRpgzdbNqwGi8EvGWX6t3Kqjp2mczhs2as8M193FKS2xppl1TYl BPyTsAfbdQ== =5v/U -----END PGP SIGNATURE----- Merge tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux Pull io_uring updates from Jens Axboe: "Mostly just come fixes and cleanups, but one feature as well. In detail: - Harden the check for handling IOPOLL based on return (Pavel) - Various minor optimizations (Pavel) - Drop remnants of SCM_RIGHTS fd passing support, now that it's no longer supported since 6.7 (me) - Fix for a case where bytes_done wasn't initialized properly on a failure condition for read/write requests (me) - Move the register related code to a separate file (me) - Add support for returning the provided ring buffer head (me) - Add support for adding a direct descriptor to the normal file table (me, Christian Brauner) - Fix for ensuring pending task_work for a ring with DEFER_TASKRUN is run even if we timeout waiting (me)" * tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux: io_uring: ensure local task_work is run on wait timeout io_uring/kbuf: add method for returning provided buffer ring head io_uring/rw: ensure io->bytes_done is always initialized io_uring: drop any code related to SCM_RIGHTS io_uring/unix: drop usage of io_uring socket io_uring/register: move io_uring_register(2) related code to register.c io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL io_uring/cmd: inline io_uring_cmd_get_task io_uring/cmd: inline io_uring_cmd_do_in_task_lazy io_uring: split out cmd api into a separate header io_uring: optimise ltimeout for inline execution io_uring: don't check iopoll if request completes
This commit is contained in:
commit
4c72e2b8c4
@ -11142,6 +11142,7 @@ L: io-uring@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.dk/linux-block
|
||||
T: git git://git.kernel.dk/liburing
|
||||
F: include/linux/io_uring/
|
||||
F: include/linux/io_uring.h
|
||||
F: include/linux/io_uring_types.h
|
||||
F: include/trace/events/io_uring.h
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/mm.h>
|
||||
|
@ -5,7 +5,7 @@
|
||||
*/
|
||||
#include <linux/ptrace.h> /* for force_successful_syscall_return */
|
||||
#include <linux/nvme_ioctl.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include "nvme.h"
|
||||
|
||||
enum {
|
||||
|
@ -6,66 +6,13 @@
|
||||
#include <linux/xarray.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* the request is executed from poll, it should not be freed */
|
||||
IO_URING_F_MULTISHOT = 4,
|
||||
/* executed by io-wq */
|
||||
IO_URING_F_IOWQ = 8,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = (1 << 8),
|
||||
IO_URING_F_CQE32 = (1 << 9),
|
||||
IO_URING_F_IOPOLL = (1 << 10),
|
||||
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
};
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const struct io_uring_sqe *sqe;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
u32 cmd_op;
|
||||
u32 flags;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
@ -84,32 +31,7 @@ static inline void io_uring_free(struct task_struct *tsk)
|
||||
if (tsk->io_uring)
|
||||
__io_uring_free(tsk);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd);
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2, unsigned issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void io_uring_task_cancel(void)
|
||||
{
|
||||
}
|
||||
@ -128,13 +50,9 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
static inline bool io_is_uring_fops(struct file *file)
|
||||
{
|
||||
}
|
||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return NULL;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
77
include/linux/io_uring/cmd.h
Normal file
77
include/linux/io_uring/cmd.h
Normal file
@ -0,0 +1,77 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef _LINUX_IO_URING_CMD_H
|
||||
#define _LINUX_IO_URING_CMD_H
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const struct io_uring_sqe *sqe;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
u32 cmd_op;
|
||||
u32 flags;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2, unsigned issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
|
||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return cmd_to_io_kiocb(cmd)->task;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_IO_URING_CMD_H */
|
@ -7,6 +7,37 @@
|
||||
#include <linux/llist.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
enum {
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
*
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* the request is executed from poll, it should not be freed */
|
||||
IO_URING_F_MULTISHOT = 4,
|
||||
/* executed by io-wq */
|
||||
IO_URING_F_IOWQ = 8,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = (1 << 8),
|
||||
IO_URING_F_CQE32 = (1 << 9),
|
||||
IO_URING_F_IOPOLL = (1 << 10),
|
||||
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
};
|
||||
|
||||
struct io_wq_work_node {
|
||||
struct io_wq_work_node *next;
|
||||
};
|
||||
@ -358,9 +389,6 @@ struct io_ring_ctx {
|
||||
struct wait_queue_head rsrc_quiesce_wq;
|
||||
unsigned rsrc_quiesce;
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct socket *ring_sock;
|
||||
#endif
|
||||
/* hashed buffered write serialization */
|
||||
struct io_wq_hash *hash_map;
|
||||
|
||||
|
@ -71,6 +71,7 @@ struct io_uring_sqe {
|
||||
__u32 uring_cmd_flags;
|
||||
__u32 waitid_flags;
|
||||
__u32 futex_flags;
|
||||
__u32 install_fd_flags;
|
||||
};
|
||||
__u64 user_data; /* data to be passed back at completion time */
|
||||
/* pack this to avoid bogus arm OABI complaints */
|
||||
@ -253,6 +254,7 @@ enum io_uring_op {
|
||||
IORING_OP_FUTEX_WAIT,
|
||||
IORING_OP_FUTEX_WAKE,
|
||||
IORING_OP_FUTEX_WAITV,
|
||||
IORING_OP_FIXED_FD_INSTALL,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
@ -386,6 +388,13 @@ enum {
|
||||
/* Pass through the flags from sqe->file_index to cqe->flags */
|
||||
#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
|
||||
|
||||
/*
|
||||
* IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
|
||||
*
|
||||
* IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
|
||||
*/
|
||||
#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
|
||||
|
||||
/*
|
||||
* IO completion data structure (Completion Queue Entry)
|
||||
*/
|
||||
@ -558,6 +567,9 @@ enum {
|
||||
/* register a range of fixed file slots for automatic slot allocation */
|
||||
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
|
||||
|
||||
/* return status information for a buffer group */
|
||||
IORING_REGISTER_PBUF_STATUS = 26,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST,
|
||||
|
||||
@ -684,6 +696,13 @@ struct io_uring_buf_reg {
|
||||
__u64 resv[3];
|
||||
};
|
||||
|
||||
/* argument for IORING_REGISTER_PBUF_STATUS */
|
||||
struct io_uring_buf_status {
|
||||
__u32 buf_group; /* input */
|
||||
__u32 head; /* output */
|
||||
__u32 resv[8];
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_restriction->opcode values
|
||||
*/
|
||||
|
@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
|
||||
statx.o net.o msg_ring.o timeout.o \
|
||||
sqpoll.o fdinfo.o tctx.o poll.o \
|
||||
cancel.o kbuf.o rsrc.o rw.o opdef.o \
|
||||
notif.o waitid.o
|
||||
notif.o waitid.o register.o
|
||||
obj-$(CONFIG_IO_WQ) += io-wq.o
|
||||
obj-$(CONFIG_FUTEX) += futex.o
|
||||
|
@ -87,13 +87,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
||||
io_file_bitmap_clear(&ctx->file_table, slot_index);
|
||||
}
|
||||
|
||||
ret = io_scm_file_account(ctx, file);
|
||||
if (!ret) {
|
||||
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, slot_index);
|
||||
}
|
||||
return ret;
|
||||
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, slot_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
|
||||
|
@ -60,7 +60,6 @@
|
||||
#include <linux/net.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/af_unix.h>
|
||||
#include <net/scm.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/uaccess.h>
|
||||
@ -70,6 +69,7 @@
|
||||
#include <linux/fadvise.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/security.h>
|
||||
#include <asm/shmparam.h>
|
||||
@ -85,6 +85,7 @@
|
||||
#include "opdef.h"
|
||||
#include "refs.h"
|
||||
#include "tctx.h"
|
||||
#include "register.h"
|
||||
#include "sqpoll.h"
|
||||
#include "fdinfo.h"
|
||||
#include "kbuf.h"
|
||||
@ -103,9 +104,6 @@
|
||||
#define IORING_MAX_ENTRIES 32768
|
||||
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
|
||||
|
||||
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
|
||||
IORING_REGISTER_LAST + IORING_OP_LAST)
|
||||
|
||||
#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \
|
||||
IOSQE_IO_HARDLINK | IOSQE_ASYNC)
|
||||
|
||||
@ -129,11 +127,6 @@ enum {
|
||||
IO_CHECK_CQ_DROPPED_BIT,
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_EVENTFD_OP_SIGNAL_BIT,
|
||||
IO_EVENTFD_OP_FREE_BIT,
|
||||
};
|
||||
|
||||
struct io_defer_entry {
|
||||
struct list_head list;
|
||||
struct io_kiocb *req;
|
||||
@ -177,19 +170,6 @@ static struct ctl_table kernel_io_uring_disabled_table[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
#if defined(CONFIG_UNIX)
|
||||
if (io_is_uring_fops(file)) {
|
||||
struct io_ring_ctx *ctx = file->private_data;
|
||||
|
||||
return ctx->ring_sock->sk;
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(io_uring_get_socket);
|
||||
|
||||
static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (!wq_list_empty(&ctx->submit_state.compl_reqs) ||
|
||||
@ -554,8 +534,7 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void io_eventfd_ops(struct rcu_head *rcu)
|
||||
void io_eventfd_ops(struct rcu_head *rcu)
|
||||
{
|
||||
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
||||
int ops = atomic_xchg(&ev_fd->ops, 0);
|
||||
@ -1898,14 +1877,19 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
io_req_complete_defer(req);
|
||||
else
|
||||
io_req_complete_post(req, issue_flags);
|
||||
} else if (ret != IOU_ISSUE_SKIP_COMPLETE)
|
||||
return ret;
|
||||
|
||||
/* If the op doesn't have a file, we're not polling for it */
|
||||
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
|
||||
io_iopoll_req_issued(req, issue_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (ret == IOU_ISSUE_SKIP_COMPLETE) {
|
||||
ret = 0;
|
||||
io_arm_ltimeout(req);
|
||||
|
||||
/* If the op doesn't have a file, we're not polling for it */
|
||||
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
|
||||
io_iopoll_req_issued(req, issue_flags);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
@ -2076,9 +2060,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
|
||||
* We async punt it if the file wasn't marked NOWAIT, or if the file
|
||||
* doesn't support non-blocking read/write attempts
|
||||
*/
|
||||
if (likely(!ret))
|
||||
io_arm_ltimeout(req);
|
||||
else
|
||||
if (unlikely(ret))
|
||||
io_queue_async(req, ret);
|
||||
}
|
||||
|
||||
@ -2633,8 +2615,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
|
||||
__set_current_state(TASK_RUNNING);
|
||||
atomic_set(&ctx->cq_wait_nr, 0);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
/*
|
||||
* Run task_work after scheduling and before io_should_wake().
|
||||
* If we got woken because of task_work being processed, run it
|
||||
@ -2644,6 +2624,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
|
||||
if (!llist_empty(&ctx->work_llist))
|
||||
io_run_local_work(ctx);
|
||||
|
||||
/*
|
||||
* Non-local task_work will be run on exit to userspace, but
|
||||
* if we're using DEFER_TASKRUN, then we could have waited
|
||||
* with a timeout for a number of requests. If the timeout
|
||||
* hits, we could have some requests ready to process. Ensure
|
||||
* this break is _after_ we have run task_work, to avoid
|
||||
* deferring running potentially pending requests until the
|
||||
* next time we wait for events.
|
||||
*/
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
check_cq = READ_ONCE(ctx->check_cq);
|
||||
if (unlikely(check_cq)) {
|
||||
/* let the caller flush overflows, retry */
|
||||
@ -2831,61 +2823,6 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries
|
||||
return off;
|
||||
}
|
||||
|
||||
static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int eventfd_async)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
__s32 __user *fds = arg;
|
||||
int fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd)
|
||||
return -EBUSY;
|
||||
|
||||
if (copy_from_user(&fd, fds, sizeof(*fds)))
|
||||
return -EFAULT;
|
||||
|
||||
ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
|
||||
if (!ev_fd)
|
||||
return -ENOMEM;
|
||||
|
||||
ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(ev_fd->cq_ev_fd)) {
|
||||
int ret = PTR_ERR(ev_fd->cq_ev_fd);
|
||||
kfree(ev_fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
ev_fd->eventfd_async = eventfd_async;
|
||||
ctx->has_evfd = true;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
|
||||
atomic_set(&ev_fd->refs, 1);
|
||||
atomic_set(&ev_fd->ops, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_eventfd_unregister(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd) {
|
||||
ctx->has_evfd = false;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, NULL);
|
||||
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops))
|
||||
call_rcu(&ev_fd->rcu, io_eventfd_ops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static void io_req_caches_free(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_kiocb *req;
|
||||
@ -2938,13 +2875,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||
io_rsrc_node_destroy(ctx, ctx->rsrc_node);
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
if (ctx->ring_sock) {
|
||||
ctx->ring_sock->file = NULL; /* so that iput() is called */
|
||||
sock_release(ctx->ring_sock);
|
||||
}
|
||||
#endif
|
||||
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
|
||||
|
||||
io_alloc_cache_free(&ctx->rsrc_node_cache, io_rsrc_node_cache_free);
|
||||
@ -2984,7 +2914,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb)
|
||||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
|
||||
static __cold void io_activate_pollwq(struct io_ring_ctx *ctx)
|
||||
__cold void io_activate_pollwq(struct io_ring_ctx *ctx)
|
||||
{
|
||||
spin_lock(&ctx->completion_lock);
|
||||
/* already activated or in progress */
|
||||
@ -3043,19 +2973,6 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
|
||||
return mask;
|
||||
}
|
||||
|
||||
static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
|
||||
{
|
||||
const struct cred *creds;
|
||||
|
||||
creds = xa_erase(&ctx->personalities, id);
|
||||
if (creds) {
|
||||
put_cred(creds);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
struct io_tctx_exit {
|
||||
struct callback_head task_work;
|
||||
struct completion completion;
|
||||
@ -3866,32 +3783,12 @@ static int io_uring_install_fd(struct file *file)
|
||||
/*
|
||||
* Allocate an anonymous fd, this is what constitutes the application
|
||||
* visible backing of an io_uring instance. The application mmaps this
|
||||
* fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
|
||||
* we have to tie this fd to a socket for file garbage collection purposes.
|
||||
* fd to gain access to the SQ/CQ ring details.
|
||||
*/
|
||||
static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct file *file;
|
||||
#if defined(CONFIG_UNIX)
|
||||
int ret;
|
||||
|
||||
ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
|
||||
&ctx->ring_sock);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
#endif
|
||||
|
||||
file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
|
||||
return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
|
||||
O_RDWR | O_CLOEXEC, NULL);
|
||||
#if defined(CONFIG_UNIX)
|
||||
if (IS_ERR(file)) {
|
||||
sock_release(ctx->ring_sock);
|
||||
ctx->ring_sock = NULL;
|
||||
} else {
|
||||
ctx->ring_sock->file = file;
|
||||
}
|
||||
#endif
|
||||
return file;
|
||||
}
|
||||
|
||||
static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
@ -4158,506 +4055,6 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
|
||||
return io_uring_setup(entries, params);
|
||||
}
|
||||
|
||||
static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args)
|
||||
{
|
||||
struct io_uring_probe *p;
|
||||
size_t size;
|
||||
int i, ret;
|
||||
|
||||
size = struct_size(p, ops, nr_args);
|
||||
if (size == SIZE_MAX)
|
||||
return -EOVERFLOW;
|
||||
p = kzalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(p, arg, size))
|
||||
goto out;
|
||||
ret = -EINVAL;
|
||||
if (memchr_inv(p, 0, size))
|
||||
goto out;
|
||||
|
||||
p->last_op = IORING_OP_LAST - 1;
|
||||
if (nr_args > IORING_OP_LAST)
|
||||
nr_args = IORING_OP_LAST;
|
||||
|
||||
for (i = 0; i < nr_args; i++) {
|
||||
p->ops[i].op = i;
|
||||
if (!io_issue_defs[i].not_supported)
|
||||
p->ops[i].flags = IO_URING_OP_SUPPORTED;
|
||||
}
|
||||
p->ops_len = i;
|
||||
|
||||
ret = 0;
|
||||
if (copy_to_user(arg, p, size))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
kfree(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_register_personality(struct io_ring_ctx *ctx)
|
||||
{
|
||||
const struct cred *creds;
|
||||
u32 id;
|
||||
int ret;
|
||||
|
||||
creds = get_current_cred();
|
||||
|
||||
ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
|
||||
XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
put_cred(creds);
|
||||
return ret;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
|
||||
void __user *arg, unsigned int nr_args)
|
||||
{
|
||||
struct io_uring_restriction *res;
|
||||
size_t size;
|
||||
int i, ret;
|
||||
|
||||
/* Restrictions allowed only if rings started disabled */
|
||||
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
|
||||
return -EBADFD;
|
||||
|
||||
/* We allow only a single restrictions registration */
|
||||
if (ctx->restrictions.registered)
|
||||
return -EBUSY;
|
||||
|
||||
if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
|
||||
return -EINVAL;
|
||||
|
||||
size = array_size(nr_args, sizeof(*res));
|
||||
if (size == SIZE_MAX)
|
||||
return -EOVERFLOW;
|
||||
|
||||
res = memdup_user(arg, size);
|
||||
if (IS_ERR(res))
|
||||
return PTR_ERR(res);
|
||||
|
||||
ret = 0;
|
||||
|
||||
for (i = 0; i < nr_args; i++) {
|
||||
switch (res[i].opcode) {
|
||||
case IORING_RESTRICTION_REGISTER_OP:
|
||||
if (res[i].register_op >= IORING_REGISTER_LAST) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__set_bit(res[i].register_op,
|
||||
ctx->restrictions.register_op);
|
||||
break;
|
||||
case IORING_RESTRICTION_SQE_OP:
|
||||
if (res[i].sqe_op >= IORING_OP_LAST) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
|
||||
break;
|
||||
case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
|
||||
ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
|
||||
break;
|
||||
case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
|
||||
ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
/* Reset all restrictions if an error happened */
|
||||
if (ret != 0)
|
||||
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
|
||||
else
|
||||
ctx->restrictions.registered = true;
|
||||
|
||||
kfree(res);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_register_enable_rings(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
|
||||
return -EBADFD;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) {
|
||||
WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
|
||||
/*
|
||||
* Lazy activation attempts would fail if it was polled before
|
||||
* submitter_task is set.
|
||||
*/
|
||||
if (wq_has_sleeper(&ctx->poll_wq))
|
||||
io_activate_pollwq(ctx);
|
||||
}
|
||||
|
||||
if (ctx->restrictions.registered)
|
||||
ctx->restricted = 1;
|
||||
|
||||
ctx->flags &= ~IORING_SETUP_R_DISABLED;
|
||||
if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
|
||||
wake_up(&ctx->sq_data->wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx,
|
||||
cpumask_var_t new_mask)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
|
||||
ret = io_wq_cpu_affinity(current->io_uring, new_mask);
|
||||
} else {
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
|
||||
void __user *arg, unsigned len)
|
||||
{
|
||||
cpumask_var_t new_mask;
|
||||
int ret;
|
||||
|
||||
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
cpumask_clear(new_mask);
|
||||
if (len > cpumask_size())
|
||||
len = cpumask_size();
|
||||
|
||||
if (in_compat_syscall()) {
|
||||
ret = compat_get_bitmap(cpumask_bits(new_mask),
|
||||
(const compat_ulong_t __user *)arg,
|
||||
len * 8 /* CHAR_BIT */);
|
||||
} else {
|
||||
ret = copy_from_user(new_mask, arg, len);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
free_cpumask_var(new_mask);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ret = __io_register_iowq_aff(ctx, new_mask);
|
||||
free_cpumask_var(new_mask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return __io_register_iowq_aff(ctx, NULL);
|
||||
}
|
||||
|
||||
static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
||||
void __user *arg)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
struct io_tctx_node *node;
|
||||
struct io_uring_task *tctx = NULL;
|
||||
struct io_sq_data *sqd = NULL;
|
||||
__u32 new_count[2];
|
||||
int i, ret;
|
||||
|
||||
if (copy_from_user(new_count, arg, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
if (new_count[i] > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
sqd = ctx->sq_data;
|
||||
if (sqd) {
|
||||
/*
|
||||
* Observe the correct sqd->lock -> ctx->uring_lock
|
||||
* ordering. Fine to drop uring_lock here, we hold
|
||||
* a ref to the ctx.
|
||||
*/
|
||||
refcount_inc(&sqd->refs);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
mutex_lock(&sqd->lock);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (sqd->thread)
|
||||
tctx = sqd->thread->io_uring;
|
||||
}
|
||||
} else {
|
||||
tctx = current->io_uring;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
if (new_count[i])
|
||||
ctx->iowq_limits[i] = new_count[i];
|
||||
ctx->iowq_limits_set = true;
|
||||
|
||||
if (tctx && tctx->io_wq) {
|
||||
ret = io_wq_max_workers(tctx->io_wq, new_count);
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
memset(new_count, 0, sizeof(new_count));
|
||||
}
|
||||
|
||||
if (sqd) {
|
||||
mutex_unlock(&sqd->lock);
|
||||
io_put_sq_data(sqd);
|
||||
}
|
||||
|
||||
if (copy_to_user(arg, new_count, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
|
||||
/* that's it for SQPOLL, only the SQPOLL task creates requests */
|
||||
if (sqd)
|
||||
return 0;
|
||||
|
||||
/* now propagate the restriction to all registered users */
|
||||
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
|
||||
struct io_uring_task *tctx = node->task->io_uring;
|
||||
|
||||
if (WARN_ON_ONCE(!tctx->io_wq))
|
||||
continue;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
new_count[i] = ctx->iowq_limits[i];
|
||||
/* ignore errors, it always returns zero anyway */
|
||||
(void)io_wq_max_workers(tctx->io_wq, new_count);
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
if (sqd) {
|
||||
mutex_unlock(&sqd->lock);
|
||||
io_put_sq_data(sqd);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
||||
void __user *arg, unsigned nr_args)
|
||||
__releases(ctx->uring_lock)
|
||||
__acquires(ctx->uring_lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We don't quiesce the refs for register anymore and so it can't be
|
||||
* dying as we're holding a file ref here.
|
||||
*/
|
||||
if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
|
||||
return -ENXIO;
|
||||
|
||||
if (ctx->submitter_task && ctx->submitter_task != current)
|
||||
return -EEXIST;
|
||||
|
||||
if (ctx->restricted) {
|
||||
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
|
||||
if (!test_bit(opcode, ctx->restrictions.register_op))
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case IORING_REGISTER_BUFFERS:
|
||||
ret = -EFAULT;
|
||||
if (!arg)
|
||||
break;
|
||||
ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
|
||||
break;
|
||||
case IORING_UNREGISTER_BUFFERS:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_sqe_buffers_unregister(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_FILES:
|
||||
ret = -EFAULT;
|
||||
if (!arg)
|
||||
break;
|
||||
ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
|
||||
break;
|
||||
case IORING_UNREGISTER_FILES:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_sqe_files_unregister(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_FILES_UPDATE:
|
||||
ret = io_register_files_update(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_EVENTFD:
|
||||
ret = -EINVAL;
|
||||
if (nr_args != 1)
|
||||
break;
|
||||
ret = io_eventfd_register(ctx, arg, 0);
|
||||
break;
|
||||
case IORING_REGISTER_EVENTFD_ASYNC:
|
||||
ret = -EINVAL;
|
||||
if (nr_args != 1)
|
||||
break;
|
||||
ret = io_eventfd_register(ctx, arg, 1);
|
||||
break;
|
||||
case IORING_UNREGISTER_EVENTFD:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_eventfd_unregister(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_PROBE:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args > 256)
|
||||
break;
|
||||
ret = io_probe(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_PERSONALITY:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_register_personality(ctx);
|
||||
break;
|
||||
case IORING_UNREGISTER_PERSONALITY:
|
||||
ret = -EINVAL;
|
||||
if (arg)
|
||||
break;
|
||||
ret = io_unregister_personality(ctx, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_ENABLE_RINGS:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_register_enable_rings(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_RESTRICTIONS:
|
||||
ret = io_register_restrictions(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_FILES2:
|
||||
ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
|
||||
break;
|
||||
case IORING_REGISTER_FILES_UPDATE2:
|
||||
ret = io_register_rsrc_update(ctx, arg, nr_args,
|
||||
IORING_RSRC_FILE);
|
||||
break;
|
||||
case IORING_REGISTER_BUFFERS2:
|
||||
ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
|
||||
break;
|
||||
case IORING_REGISTER_BUFFERS_UPDATE:
|
||||
ret = io_register_rsrc_update(ctx, arg, nr_args,
|
||||
IORING_RSRC_BUFFER);
|
||||
break;
|
||||
case IORING_REGISTER_IOWQ_AFF:
|
||||
ret = -EINVAL;
|
||||
if (!arg || !nr_args)
|
||||
break;
|
||||
ret = io_register_iowq_aff(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_UNREGISTER_IOWQ_AFF:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_unregister_iowq_aff(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_IOWQ_MAX_WORKERS:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 2)
|
||||
break;
|
||||
ret = io_register_iowq_max_workers(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_RING_FDS:
|
||||
ret = io_ringfd_register(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_UNREGISTER_RING_FDS:
|
||||
ret = io_ringfd_unregister(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_PBUF_RING:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_register_pbuf_ring(ctx, arg);
|
||||
break;
|
||||
case IORING_UNREGISTER_PBUF_RING:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_unregister_pbuf_ring(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_SYNC_CANCEL:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_sync_cancel(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_FILE_ALLOC_RANGE:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args)
|
||||
break;
|
||||
ret = io_register_file_alloc_range(ctx, arg);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
|
||||
void __user *, arg, unsigned int, nr_args)
|
||||
{
|
||||
struct io_ring_ctx *ctx;
|
||||
long ret = -EBADF;
|
||||
struct file *file;
|
||||
bool use_registered_ring;
|
||||
|
||||
use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
|
||||
opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
|
||||
|
||||
if (opcode >= IORING_REGISTER_LAST)
|
||||
return -EINVAL;
|
||||
|
||||
if (use_registered_ring) {
|
||||
/*
|
||||
* Ring fd has been registered via IORING_REGISTER_RING_FDS, we
|
||||
* need only dereference our task private array to find it.
|
||||
*/
|
||||
struct io_uring_task *tctx = current->io_uring;
|
||||
|
||||
if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
|
||||
return -EINVAL;
|
||||
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
|
||||
file = tctx->registered_rings[fd];
|
||||
if (unlikely(!file))
|
||||
return -EBADF;
|
||||
} else {
|
||||
file = fget(fd);
|
||||
if (unlikely(!file))
|
||||
return -EBADF;
|
||||
ret = -EOPNOTSUPP;
|
||||
if (!io_is_uring_fops(file))
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
ctx = file->private_data;
|
||||
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret = __io_uring_register(ctx, opcode, arg, nr_args);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
|
||||
out_fput:
|
||||
if (!use_registered_ring)
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init io_uring_init(void)
|
||||
{
|
||||
#define __BUILD_BUG_VERIFY_OFFSET_SIZE(stype, eoffset, esize, ename) do { \
|
||||
|
@ -15,16 +15,6 @@
|
||||
#include <trace/events/io_uring.h>
|
||||
#endif
|
||||
|
||||
enum {
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
*
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum {
|
||||
IOU_OK = 0,
|
||||
@ -54,7 +44,6 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
unsigned issue_flags);
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
bool io_alloc_async_data(struct io_kiocb *req);
|
||||
void io_req_task_queue(struct io_kiocb *req);
|
||||
void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
|
||||
@ -89,6 +78,14 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
|
||||
void *io_mem_alloc(size_t size);
|
||||
void io_mem_free(void *ptr);
|
||||
|
||||
enum {
|
||||
IO_EVENTFD_OP_SIGNAL_BIT,
|
||||
IO_EVENTFD_OP_FREE_BIT,
|
||||
};
|
||||
|
||||
void io_eventfd_ops(struct rcu_head *rcu);
|
||||
void io_activate_pollwq(struct io_ring_ctx *ctx);
|
||||
|
||||
#if defined(CONFIG_PROVE_LOCKING)
|
||||
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
{
|
||||
|
@ -750,6 +750,32 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
|
||||
{
|
||||
struct io_uring_buf_status buf_status;
|
||||
struct io_buffer_list *bl;
|
||||
int i;
|
||||
|
||||
if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
|
||||
return -EFAULT;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++)
|
||||
if (buf_status.resv[i])
|
||||
return -EINVAL;
|
||||
|
||||
bl = io_buffer_get_list(ctx, buf_status.buf_group);
|
||||
if (!bl)
|
||||
return -ENOENT;
|
||||
if (!bl->is_mapped)
|
||||
return -EINVAL;
|
||||
|
||||
buf_status.head = bl->head;
|
||||
if (copy_to_user(arg, &buf_status, sizeof(buf_status)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
|
||||
{
|
||||
struct io_buffer_list *bl;
|
||||
|
@ -53,6 +53,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
|
||||
|
||||
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
|
||||
|
||||
|
@ -469,6 +469,12 @@ const struct io_issue_def io_issue_defs[] = {
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_FIXED_FD_INSTALL] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
.prep = io_install_fixed_fd_prep,
|
||||
.issue = io_install_fixed_fd,
|
||||
},
|
||||
};
|
||||
|
||||
const struct io_cold_def io_cold_defs[] = {
|
||||
@ -704,6 +710,9 @@ const struct io_cold_def io_cold_defs[] = {
|
||||
[IORING_OP_FUTEX_WAITV] = {
|
||||
.name = "FUTEX_WAITV",
|
||||
},
|
||||
[IORING_OP_FIXED_FD_INSTALL] = {
|
||||
.name = "FIXED_FD_INSTALL",
|
||||
},
|
||||
};
|
||||
|
||||
const char *io_uring_get_opcode(u8 opcode)
|
||||
|
@ -31,6 +31,11 @@ struct io_close {
|
||||
u32 file_slot;
|
||||
};
|
||||
|
||||
struct io_fixed_install {
|
||||
struct file *file;
|
||||
unsigned int o_flags;
|
||||
};
|
||||
|
||||
static bool io_openat_force_async(struct io_open *open)
|
||||
{
|
||||
/*
|
||||
@ -254,3 +259,42 @@ int io_close(struct io_kiocb *req, unsigned int issue_flags)
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_fixed_install *ifi;
|
||||
unsigned int flags;
|
||||
|
||||
if (sqe->off || sqe->addr || sqe->len || sqe->buf_index ||
|
||||
sqe->splice_fd_in || sqe->addr3)
|
||||
return -EINVAL;
|
||||
|
||||
/* must be a fixed file */
|
||||
if (!(req->flags & REQ_F_FIXED_FILE))
|
||||
return -EBADF;
|
||||
|
||||
flags = READ_ONCE(sqe->install_fd_flags);
|
||||
if (flags & ~IORING_FIXED_FD_NO_CLOEXEC)
|
||||
return -EINVAL;
|
||||
|
||||
/* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */
|
||||
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
|
||||
ifi->o_flags = O_CLOEXEC;
|
||||
if (flags & IORING_FIXED_FD_NO_CLOEXEC)
|
||||
ifi->o_flags = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_fixed_install *ifi;
|
||||
int ret;
|
||||
|
||||
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
|
||||
ret = receive_fd(req->file, NULL, ifi->o_flags);
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
@ -12,3 +12,6 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_close(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
605
io_uring/register.c
Normal file
605
io_uring/register.c
Normal file
@ -0,0 +1,605 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Code related to the io_uring_register() syscall
|
||||
*
|
||||
* Copyright (C) 2023 Jens Axboe
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/bits.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
#include "io_uring.h"
|
||||
#include "opdef.h"
|
||||
#include "tctx.h"
|
||||
#include "rsrc.h"
|
||||
#include "sqpoll.h"
|
||||
#include "register.h"
|
||||
#include "cancel.h"
|
||||
#include "kbuf.h"
|
||||
|
||||
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
|
||||
IORING_REGISTER_LAST + IORING_OP_LAST)
|
||||
|
||||
static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int eventfd_async)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
__s32 __user *fds = arg;
|
||||
int fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd)
|
||||
return -EBUSY;
|
||||
|
||||
if (copy_from_user(&fd, fds, sizeof(*fds)))
|
||||
return -EFAULT;
|
||||
|
||||
ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
|
||||
if (!ev_fd)
|
||||
return -ENOMEM;
|
||||
|
||||
ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(ev_fd->cq_ev_fd)) {
|
||||
int ret = PTR_ERR(ev_fd->cq_ev_fd);
|
||||
kfree(ev_fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
ev_fd->eventfd_async = eventfd_async;
|
||||
ctx->has_evfd = true;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
|
||||
atomic_set(&ev_fd->refs, 1);
|
||||
atomic_set(&ev_fd->ops, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd) {
|
||||
ctx->has_evfd = false;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, NULL);
|
||||
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops))
|
||||
call_rcu(&ev_fd->rcu, io_eventfd_ops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args)
|
||||
{
|
||||
struct io_uring_probe *p;
|
||||
size_t size;
|
||||
int i, ret;
|
||||
|
||||
size = struct_size(p, ops, nr_args);
|
||||
if (size == SIZE_MAX)
|
||||
return -EOVERFLOW;
|
||||
p = kzalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(p, arg, size))
|
||||
goto out;
|
||||
ret = -EINVAL;
|
||||
if (memchr_inv(p, 0, size))
|
||||
goto out;
|
||||
|
||||
p->last_op = IORING_OP_LAST - 1;
|
||||
if (nr_args > IORING_OP_LAST)
|
||||
nr_args = IORING_OP_LAST;
|
||||
|
||||
for (i = 0; i < nr_args; i++) {
|
||||
p->ops[i].op = i;
|
||||
if (!io_issue_defs[i].not_supported)
|
||||
p->ops[i].flags = IO_URING_OP_SUPPORTED;
|
||||
}
|
||||
p->ops_len = i;
|
||||
|
||||
ret = 0;
|
||||
if (copy_to_user(arg, p, size))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
kfree(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
|
||||
{
|
||||
const struct cred *creds;
|
||||
|
||||
creds = xa_erase(&ctx->personalities, id);
|
||||
if (creds) {
|
||||
put_cred(creds);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
static int io_register_personality(struct io_ring_ctx *ctx)
|
||||
{
|
||||
const struct cred *creds;
|
||||
u32 id;
|
||||
int ret;
|
||||
|
||||
creds = get_current_cred();
|
||||
|
||||
ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
|
||||
XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
put_cred(creds);
|
||||
return ret;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
|
||||
void __user *arg, unsigned int nr_args)
|
||||
{
|
||||
struct io_uring_restriction *res;
|
||||
size_t size;
|
||||
int i, ret;
|
||||
|
||||
/* Restrictions allowed only if rings started disabled */
|
||||
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
|
||||
return -EBADFD;
|
||||
|
||||
/* We allow only a single restrictions registration */
|
||||
if (ctx->restrictions.registered)
|
||||
return -EBUSY;
|
||||
|
||||
if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
|
||||
return -EINVAL;
|
||||
|
||||
size = array_size(nr_args, sizeof(*res));
|
||||
if (size == SIZE_MAX)
|
||||
return -EOVERFLOW;
|
||||
|
||||
res = memdup_user(arg, size);
|
||||
if (IS_ERR(res))
|
||||
return PTR_ERR(res);
|
||||
|
||||
ret = 0;
|
||||
|
||||
for (i = 0; i < nr_args; i++) {
|
||||
switch (res[i].opcode) {
|
||||
case IORING_RESTRICTION_REGISTER_OP:
|
||||
if (res[i].register_op >= IORING_REGISTER_LAST) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__set_bit(res[i].register_op,
|
||||
ctx->restrictions.register_op);
|
||||
break;
|
||||
case IORING_RESTRICTION_SQE_OP:
|
||||
if (res[i].sqe_op >= IORING_OP_LAST) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
|
||||
break;
|
||||
case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
|
||||
ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
|
||||
break;
|
||||
case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
|
||||
ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
/* Reset all restrictions if an error happened */
|
||||
if (ret != 0)
|
||||
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
|
||||
else
|
||||
ctx->restrictions.registered = true;
|
||||
|
||||
kfree(res);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_register_enable_rings(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
|
||||
return -EBADFD;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) {
|
||||
WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
|
||||
/*
|
||||
* Lazy activation attempts would fail if it was polled before
|
||||
* submitter_task is set.
|
||||
*/
|
||||
if (wq_has_sleeper(&ctx->poll_wq))
|
||||
io_activate_pollwq(ctx);
|
||||
}
|
||||
|
||||
if (ctx->restrictions.registered)
|
||||
ctx->restricted = 1;
|
||||
|
||||
ctx->flags &= ~IORING_SETUP_R_DISABLED;
|
||||
if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
|
||||
wake_up(&ctx->sq_data->wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx,
|
||||
cpumask_var_t new_mask)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
|
||||
ret = io_wq_cpu_affinity(current->io_uring, new_mask);
|
||||
} else {
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
|
||||
void __user *arg, unsigned len)
|
||||
{
|
||||
cpumask_var_t new_mask;
|
||||
int ret;
|
||||
|
||||
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
cpumask_clear(new_mask);
|
||||
if (len > cpumask_size())
|
||||
len = cpumask_size();
|
||||
|
||||
if (in_compat_syscall()) {
|
||||
ret = compat_get_bitmap(cpumask_bits(new_mask),
|
||||
(const compat_ulong_t __user *)arg,
|
||||
len * 8 /* CHAR_BIT */);
|
||||
} else {
|
||||
ret = copy_from_user(new_mask, arg, len);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
free_cpumask_var(new_mask);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
ret = __io_register_iowq_aff(ctx, new_mask);
|
||||
free_cpumask_var(new_mask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return __io_register_iowq_aff(ctx, NULL);
|
||||
}
|
||||
|
||||
static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
||||
void __user *arg)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
struct io_tctx_node *node;
|
||||
struct io_uring_task *tctx = NULL;
|
||||
struct io_sq_data *sqd = NULL;
|
||||
__u32 new_count[2];
|
||||
int i, ret;
|
||||
|
||||
if (copy_from_user(new_count, arg, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
if (new_count[i] > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
sqd = ctx->sq_data;
|
||||
if (sqd) {
|
||||
/*
|
||||
* Observe the correct sqd->lock -> ctx->uring_lock
|
||||
* ordering. Fine to drop uring_lock here, we hold
|
||||
* a ref to the ctx.
|
||||
*/
|
||||
refcount_inc(&sqd->refs);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
mutex_lock(&sqd->lock);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (sqd->thread)
|
||||
tctx = sqd->thread->io_uring;
|
||||
}
|
||||
} else {
|
||||
tctx = current->io_uring;
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
if (new_count[i])
|
||||
ctx->iowq_limits[i] = new_count[i];
|
||||
ctx->iowq_limits_set = true;
|
||||
|
||||
if (tctx && tctx->io_wq) {
|
||||
ret = io_wq_max_workers(tctx->io_wq, new_count);
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
memset(new_count, 0, sizeof(new_count));
|
||||
}
|
||||
|
||||
if (sqd) {
|
||||
mutex_unlock(&sqd->lock);
|
||||
io_put_sq_data(sqd);
|
||||
}
|
||||
|
||||
if (copy_to_user(arg, new_count, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
|
||||
/* that's it for SQPOLL, only the SQPOLL task creates requests */
|
||||
if (sqd)
|
||||
return 0;
|
||||
|
||||
/* now propagate the restriction to all registered users */
|
||||
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
|
||||
struct io_uring_task *tctx = node->task->io_uring;
|
||||
|
||||
if (WARN_ON_ONCE(!tctx->io_wq))
|
||||
continue;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
new_count[i] = ctx->iowq_limits[i];
|
||||
/* ignore errors, it always returns zero anyway */
|
||||
(void)io_wq_max_workers(tctx->io_wq, new_count);
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
if (sqd) {
|
||||
mutex_unlock(&sqd->lock);
|
||||
io_put_sq_data(sqd);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
||||
void __user *arg, unsigned nr_args)
|
||||
__releases(ctx->uring_lock)
|
||||
__acquires(ctx->uring_lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We don't quiesce the refs for register anymore and so it can't be
|
||||
* dying as we're holding a file ref here.
|
||||
*/
|
||||
if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
|
||||
return -ENXIO;
|
||||
|
||||
if (ctx->submitter_task && ctx->submitter_task != current)
|
||||
return -EEXIST;
|
||||
|
||||
if (ctx->restricted) {
|
||||
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
|
||||
if (!test_bit(opcode, ctx->restrictions.register_op))
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case IORING_REGISTER_BUFFERS:
|
||||
ret = -EFAULT;
|
||||
if (!arg)
|
||||
break;
|
||||
ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
|
||||
break;
|
||||
case IORING_UNREGISTER_BUFFERS:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_sqe_buffers_unregister(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_FILES:
|
||||
ret = -EFAULT;
|
||||
if (!arg)
|
||||
break;
|
||||
ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
|
||||
break;
|
||||
case IORING_UNREGISTER_FILES:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_sqe_files_unregister(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_FILES_UPDATE:
|
||||
ret = io_register_files_update(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_EVENTFD:
|
||||
ret = -EINVAL;
|
||||
if (nr_args != 1)
|
||||
break;
|
||||
ret = io_eventfd_register(ctx, arg, 0);
|
||||
break;
|
||||
case IORING_REGISTER_EVENTFD_ASYNC:
|
||||
ret = -EINVAL;
|
||||
if (nr_args != 1)
|
||||
break;
|
||||
ret = io_eventfd_register(ctx, arg, 1);
|
||||
break;
|
||||
case IORING_UNREGISTER_EVENTFD:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_eventfd_unregister(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_PROBE:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args > 256)
|
||||
break;
|
||||
ret = io_probe(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_PERSONALITY:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_register_personality(ctx);
|
||||
break;
|
||||
case IORING_UNREGISTER_PERSONALITY:
|
||||
ret = -EINVAL;
|
||||
if (arg)
|
||||
break;
|
||||
ret = io_unregister_personality(ctx, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_ENABLE_RINGS:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_register_enable_rings(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_RESTRICTIONS:
|
||||
ret = io_register_restrictions(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_FILES2:
|
||||
ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
|
||||
break;
|
||||
case IORING_REGISTER_FILES_UPDATE2:
|
||||
ret = io_register_rsrc_update(ctx, arg, nr_args,
|
||||
IORING_RSRC_FILE);
|
||||
break;
|
||||
case IORING_REGISTER_BUFFERS2:
|
||||
ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
|
||||
break;
|
||||
case IORING_REGISTER_BUFFERS_UPDATE:
|
||||
ret = io_register_rsrc_update(ctx, arg, nr_args,
|
||||
IORING_RSRC_BUFFER);
|
||||
break;
|
||||
case IORING_REGISTER_IOWQ_AFF:
|
||||
ret = -EINVAL;
|
||||
if (!arg || !nr_args)
|
||||
break;
|
||||
ret = io_register_iowq_aff(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_UNREGISTER_IOWQ_AFF:
|
||||
ret = -EINVAL;
|
||||
if (arg || nr_args)
|
||||
break;
|
||||
ret = io_unregister_iowq_aff(ctx);
|
||||
break;
|
||||
case IORING_REGISTER_IOWQ_MAX_WORKERS:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 2)
|
||||
break;
|
||||
ret = io_register_iowq_max_workers(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_RING_FDS:
|
||||
ret = io_ringfd_register(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_UNREGISTER_RING_FDS:
|
||||
ret = io_ringfd_unregister(ctx, arg, nr_args);
|
||||
break;
|
||||
case IORING_REGISTER_PBUF_RING:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_register_pbuf_ring(ctx, arg);
|
||||
break;
|
||||
case IORING_UNREGISTER_PBUF_RING:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_unregister_pbuf_ring(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_SYNC_CANCEL:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_sync_cancel(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_FILE_ALLOC_RANGE:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args)
|
||||
break;
|
||||
ret = io_register_file_alloc_range(ctx, arg);
|
||||
break;
|
||||
case IORING_REGISTER_PBUF_STATUS:
|
||||
ret = -EINVAL;
|
||||
if (!arg || nr_args != 1)
|
||||
break;
|
||||
ret = io_register_pbuf_status(ctx, arg);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
|
||||
void __user *, arg, unsigned int, nr_args)
|
||||
{
|
||||
struct io_ring_ctx *ctx;
|
||||
long ret = -EBADF;
|
||||
struct file *file;
|
||||
bool use_registered_ring;
|
||||
|
||||
use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
|
||||
opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
|
||||
|
||||
if (opcode >= IORING_REGISTER_LAST)
|
||||
return -EINVAL;
|
||||
|
||||
if (use_registered_ring) {
|
||||
/*
|
||||
* Ring fd has been registered via IORING_REGISTER_RING_FDS, we
|
||||
* need only dereference our task private array to find it.
|
||||
*/
|
||||
struct io_uring_task *tctx = current->io_uring;
|
||||
|
||||
if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
|
||||
return -EINVAL;
|
||||
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
|
||||
file = tctx->registered_rings[fd];
|
||||
if (unlikely(!file))
|
||||
return -EBADF;
|
||||
} else {
|
||||
file = fget(fd);
|
||||
if (unlikely(!file))
|
||||
return -EBADF;
|
||||
ret = -EOPNOTSUPP;
|
||||
if (!io_is_uring_fops(file))
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
ctx = file->private_data;
|
||||
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret = __io_uring_register(ctx, opcode, arg, nr_args);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
|
||||
out_fput:
|
||||
if (!use_registered_ring)
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
8
io_uring/register.h
Normal file
8
io_uring/register.h
Normal file
@ -0,0 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef IORING_REGISTER_H
|
||||
#define IORING_REGISTER_H
|
||||
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx);
|
||||
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id);
|
||||
|
||||
#endif
|
169
io_uring/rsrc.c
169
io_uring/rsrc.c
@ -24,7 +24,6 @@ struct io_rsrc_update {
|
||||
};
|
||||
|
||||
static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
|
||||
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
||||
struct io_mapped_ubuf **pimu,
|
||||
struct page **last_hpage);
|
||||
@ -157,7 +156,7 @@ static void io_rsrc_put_work(struct io_rsrc_node *node)
|
||||
|
||||
switch (node->type) {
|
||||
case IORING_RSRC_FILE:
|
||||
io_rsrc_file_put(node->ctx, prsrc);
|
||||
fput(prsrc->file);
|
||||
break;
|
||||
case IORING_RSRC_BUFFER:
|
||||
io_rsrc_buf_put(node->ctx, prsrc);
|
||||
@ -402,23 +401,13 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Don't allow io_uring instances to be registered. If
|
||||
* UNIX isn't enabled, then this causes a reference
|
||||
* cycle and this instance can never get freed. If UNIX
|
||||
* is enabled we'll handle it just fine, but there's
|
||||
* still no point in allowing a ring fd as it doesn't
|
||||
* support regular read/write anyway.
|
||||
* Don't allow io_uring instances to be registered.
|
||||
*/
|
||||
if (io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
err = -EBADF;
|
||||
break;
|
||||
}
|
||||
err = io_scm_file_account(ctx, file);
|
||||
if (err) {
|
||||
fput(file);
|
||||
break;
|
||||
}
|
||||
*io_get_tag_slot(data, i) = tag;
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, i);
|
||||
@ -675,22 +664,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
|
||||
for (i = 0; i < ctx->nr_user_files; i++) {
|
||||
struct file *file = io_file_from_index(&ctx->file_table, i);
|
||||
|
||||
/* skip scm accounted files, they'll be freed by ->ring_sock */
|
||||
if (!file || io_file_need_scm(file))
|
||||
if (!file)
|
||||
continue;
|
||||
io_file_bitmap_clear(&ctx->file_table, i);
|
||||
fput(file);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
if (ctx->ring_sock) {
|
||||
struct sock *sock = ctx->ring_sock->sk;
|
||||
struct sk_buff *skb;
|
||||
|
||||
while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
|
||||
kfree_skb(skb);
|
||||
}
|
||||
#endif
|
||||
io_free_file_tables(&ctx->file_table);
|
||||
io_file_table_set_alloc_range(ctx, 0, 0);
|
||||
io_rsrc_data_free(ctx->file_data);
|
||||
@ -718,137 +697,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure the UNIX gc is aware of our file set, so we are certain that
|
||||
* the io_uring can be safely unregistered on process exit, even if we have
|
||||
* loops in the file referencing. We account only files that can hold other
|
||||
* files because otherwise they can't form a loop and so are not interesting
|
||||
* for GC.
|
||||
*/
|
||||
int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
|
||||
{
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct sock *sk = ctx->ring_sock->sk;
|
||||
struct sk_buff_head *head = &sk->sk_receive_queue;
|
||||
struct scm_fp_list *fpl;
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* See if we can merge this file into an existing skb SCM_RIGHTS
|
||||
* file set. If there's no room, fall back to allocating a new skb
|
||||
* and filling it in.
|
||||
*/
|
||||
spin_lock_irq(&head->lock);
|
||||
skb = skb_peek(head);
|
||||
if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
|
||||
__skb_unlink(skb, head);
|
||||
else
|
||||
skb = NULL;
|
||||
spin_unlock_irq(&head->lock);
|
||||
|
||||
if (!skb) {
|
||||
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
|
||||
if (!fpl)
|
||||
return -ENOMEM;
|
||||
|
||||
skb = alloc_skb(0, GFP_KERNEL);
|
||||
if (!skb) {
|
||||
kfree(fpl);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
fpl->user = get_uid(current_user());
|
||||
fpl->max = SCM_MAX_FD;
|
||||
fpl->count = 0;
|
||||
|
||||
UNIXCB(skb).fp = fpl;
|
||||
skb->sk = sk;
|
||||
skb->destructor = io_uring_destruct_scm;
|
||||
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
|
||||
}
|
||||
|
||||
fpl = UNIXCB(skb).fp;
|
||||
fpl->fp[fpl->count++] = get_file(file);
|
||||
unix_inflight(fpl->user, file);
|
||||
skb_queue_head(head, skb);
|
||||
fput(file);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __cold void io_rsrc_file_scm_put(struct io_ring_ctx *ctx, struct file *file)
|
||||
{
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct sock *sock = ctx->ring_sock->sk;
|
||||
struct sk_buff_head list, *head = &sock->sk_receive_queue;
|
||||
struct sk_buff *skb;
|
||||
int i;
|
||||
|
||||
__skb_queue_head_init(&list);
|
||||
|
||||
/*
|
||||
* Find the skb that holds this file in its SCM_RIGHTS. When found,
|
||||
* remove this entry and rearrange the file array.
|
||||
*/
|
||||
skb = skb_dequeue(head);
|
||||
while (skb) {
|
||||
struct scm_fp_list *fp;
|
||||
|
||||
fp = UNIXCB(skb).fp;
|
||||
for (i = 0; i < fp->count; i++) {
|
||||
int left;
|
||||
|
||||
if (fp->fp[i] != file)
|
||||
continue;
|
||||
|
||||
unix_notinflight(fp->user, fp->fp[i]);
|
||||
left = fp->count - 1 - i;
|
||||
if (left) {
|
||||
memmove(&fp->fp[i], &fp->fp[i + 1],
|
||||
left * sizeof(struct file *));
|
||||
}
|
||||
fp->count--;
|
||||
if (!fp->count) {
|
||||
kfree_skb(skb);
|
||||
skb = NULL;
|
||||
} else {
|
||||
__skb_queue_tail(&list, skb);
|
||||
}
|
||||
fput(file);
|
||||
file = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!file)
|
||||
break;
|
||||
|
||||
__skb_queue_tail(&list, skb);
|
||||
|
||||
skb = skb_dequeue(head);
|
||||
}
|
||||
|
||||
if (skb_peek(&list)) {
|
||||
spin_lock_irq(&head->lock);
|
||||
while ((skb = __skb_dequeue(&list)) != NULL)
|
||||
__skb_queue_tail(head, skb);
|
||||
spin_unlock_irq(&head->lock);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
|
||||
{
|
||||
struct file *file = prsrc->file;
|
||||
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
fput(file);
|
||||
else
|
||||
io_rsrc_file_scm_put(ctx, file);
|
||||
}
|
||||
|
||||
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args, u64 __user *tags)
|
||||
{
|
||||
@ -897,21 +745,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Don't allow io_uring instances to be registered. If UNIX
|
||||
* isn't enabled, then this causes a reference cycle and this
|
||||
* instance can never get freed. If UNIX is enabled we'll
|
||||
* handle it just fine, but there's still no point in allowing
|
||||
* a ring fd as it doesn't support regular read/write anyway.
|
||||
* Don't allow io_uring instances to be registered.
|
||||
*/
|
||||
if (io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
goto fail;
|
||||
}
|
||||
ret = io_scm_file_account(ctx, file);
|
||||
if (ret) {
|
||||
fput(file);
|
||||
goto fail;
|
||||
}
|
||||
file_slot = io_fixed_file_slot(&ctx->file_table, i);
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, i);
|
||||
|
@ -75,21 +75,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx);
|
||||
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args, u64 __user *tags);
|
||||
|
||||
int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file);
|
||||
|
||||
static inline bool io_file_need_scm(struct file *filp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int io_scm_file_account(struct io_ring_ctx *ctx,
|
||||
struct file *file)
|
||||
{
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
return 0;
|
||||
return __io_scm_file_account(ctx, file);
|
||||
}
|
||||
|
||||
int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args);
|
||||
int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <linux/poll.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
@ -589,15 +589,19 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
|
||||
struct iovec *iov;
|
||||
int ret;
|
||||
|
||||
iorw->bytes_done = 0;
|
||||
iorw->free_iovec = NULL;
|
||||
|
||||
/* submission path, ->uring_lock should already be taken */
|
||||
ret = io_import_iovec(rw, req, &iov, &iorw->s, 0);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
iorw->bytes_done = 0;
|
||||
iorw->free_iovec = iov;
|
||||
if (iov)
|
||||
if (iov) {
|
||||
iorw->free_iovec = iov;
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
@ -52,12 +52,6 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
|
||||
|
||||
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return cmd_to_io_kiocb(cmd)->task;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_get_task);
|
||||
|
||||
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
||||
@ -78,13 +72,6 @@ void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
|
||||
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_do_in_task_lazy);
|
||||
|
||||
static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
|
||||
u64 extra1, u64 extra2)
|
||||
{
|
||||
|
@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
|
||||
if (fd < 0 || !(file = fget_raw(fd)))
|
||||
return -EBADF;
|
||||
/* don't allow io_uring files */
|
||||
if (io_uring_get_socket(file)) {
|
||||
if (io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -35,10 +35,8 @@ struct sock *unix_get_socket(struct file *filp)
|
||||
/* PF_UNIX ? */
|
||||
if (s && ops && ops->family == PF_UNIX)
|
||||
u_sock = s;
|
||||
} else {
|
||||
/* Could be an io_uring instance */
|
||||
u_sock = io_uring_get_socket(filp);
|
||||
}
|
||||
|
||||
return u_sock;
|
||||
}
|
||||
EXPORT_SYMBOL(unix_get_socket);
|
||||
|
@ -92,7 +92,7 @@
|
||||
#include <uapi/linux/mount.h>
|
||||
#include <linux/fsnotify.h>
|
||||
#include <linux/fanotify.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <uapi/linux/lsm.h>
|
||||
|
||||
#include "avc.h"
|
||||
|
@ -43,7 +43,7 @@
|
||||
#include <linux/fs_context.h>
|
||||
#include <linux/fs_parser.h>
|
||||
#include <linux/watch_queue.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <uapi/linux/lsm.h>
|
||||
#include "smack.h"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user