mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
"Mostly just come fixes and cleanups, but one feature as well. In
detail:
- Harden the check for handling IOPOLL based on return (Pavel)
- Various minor optimizations (Pavel)
- Drop remnants of SCM_RIGHTS fd passing support, now that it's no
longer supported since 6.7 (me)
- Fix for a case where bytes_done wasn't initialized properly on a
failure condition for read/write requests (me)
- Move the register related code to a separate file (me)
- Add support for returning the provided ring buffer head (me)
- Add support for adding a direct descriptor to the normal file table
(me, Christian Brauner)
- Fix for ensuring pending task_work for a ring with DEFER_TASKRUN is
run even if we timeout waiting (me)"
* tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux:
io_uring: ensure local task_work is run on wait timeout
io_uring/kbuf: add method for returning provided buffer ring head
io_uring/rw: ensure io->bytes_done is always initialized
io_uring: drop any code related to SCM_RIGHTS
io_uring/unix: drop usage of io_uring socket
io_uring/register: move io_uring_register(2) related code to register.c
io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL
io_uring/cmd: inline io_uring_cmd_get_task
io_uring/cmd: inline io_uring_cmd_do_in_task_lazy
io_uring: split out cmd api into a separate header
io_uring: optimise ltimeout for inline execution
io_uring: don't check iopoll if request completes
This commit is contained in:
@@ -11142,6 +11142,7 @@ L: io-uring@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.dk/linux-block
|
||||
T: git git://git.kernel.dk/liburing
|
||||
F: include/linux/io_uring/
|
||||
F: include/linux/io_uring.h
|
||||
F: include/linux/io_uring_types.h
|
||||
F: include/trace/events/io_uring.h
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
#include <linux/ptrace.h> /* for force_successful_syscall_return */
|
||||
#include <linux/nvme_ioctl.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include "nvme.h"
|
||||
|
||||
enum {
|
||||
|
||||
@@ -6,66 +6,13 @@
|
||||
#include <linux/xarray.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* the request is executed from poll, it should not be freed */
|
||||
IO_URING_F_MULTISHOT = 4,
|
||||
/* executed by io-wq */
|
||||
IO_URING_F_IOWQ = 8,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = (1 << 8),
|
||||
IO_URING_F_CQE32 = (1 << 9),
|
||||
IO_URING_F_IOPOLL = (1 << 10),
|
||||
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
};
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const struct io_uring_sqe *sqe;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
u32 cmd_op;
|
||||
u32 flags;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
struct sock *io_uring_get_socket(struct file *file);
|
||||
void __io_uring_cancel(bool cancel_all);
|
||||
void __io_uring_free(struct task_struct *tsk);
|
||||
void io_uring_unreg_ringfd(void);
|
||||
const char *io_uring_get_opcode(u8 opcode);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
|
||||
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
|
||||
static inline void io_uring_files_cancel(void)
|
||||
{
|
||||
@@ -84,32 +31,7 @@ static inline void io_uring_free(struct task_struct *tsk)
|
||||
if (tsk->io_uring)
|
||||
__io_uring_free(tsk);
|
||||
}
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd);
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2, unsigned issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
}
|
||||
static inline struct sock *io_uring_get_socket(struct file *file)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void io_uring_task_cancel(void)
|
||||
{
|
||||
}
|
||||
@@ -128,13 +50,9 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
static inline bool io_is_uring_fops(struct file *file)
|
||||
{
|
||||
}
|
||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return NULL;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
77
include/linux/io_uring/cmd.h
Normal file
77
include/linux/io_uring/cmd.h
Normal file
@@ -0,0 +1,77 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#ifndef _LINUX_IO_URING_CMD_H
|
||||
#define _LINUX_IO_URING_CMD_H
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
|
||||
struct io_uring_cmd {
|
||||
struct file *file;
|
||||
const struct io_uring_sqe *sqe;
|
||||
/* callback to defer completions to task context */
|
||||
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
|
||||
u32 cmd_op;
|
||||
u32 flags;
|
||||
u8 pdu[32]; /* available inline for free use */
|
||||
};
|
||||
|
||||
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return sqe->cmd;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
|
||||
unsigned issue_flags);
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags);
|
||||
|
||||
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags);
|
||||
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
|
||||
ssize_t ret2, unsigned issue_flags)
|
||||
{
|
||||
}
|
||||
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
|
||||
unsigned flags)
|
||||
{
|
||||
}
|
||||
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
|
||||
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
|
||||
}
|
||||
|
||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
|
||||
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
|
||||
{
|
||||
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
|
||||
}
|
||||
|
||||
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
|
||||
{
|
||||
return cmd_to_io_kiocb(cmd)->task;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_IO_URING_CMD_H */
|
||||
@@ -7,6 +7,37 @@
|
||||
#include <linux/llist.h>
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
enum {
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
*
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum io_uring_cmd_flags {
|
||||
IO_URING_F_COMPLETE_DEFER = 1,
|
||||
IO_URING_F_UNLOCKED = 2,
|
||||
/* the request is executed from poll, it should not be freed */
|
||||
IO_URING_F_MULTISHOT = 4,
|
||||
/* executed by io-wq */
|
||||
IO_URING_F_IOWQ = 8,
|
||||
/* int's last bit, sign checks are usually faster than a bit test */
|
||||
IO_URING_F_NONBLOCK = INT_MIN,
|
||||
|
||||
/* ctx state flags, for URING_CMD */
|
||||
IO_URING_F_SQE128 = (1 << 8),
|
||||
IO_URING_F_CQE32 = (1 << 9),
|
||||
IO_URING_F_IOPOLL = (1 << 10),
|
||||
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
};
|
||||
|
||||
struct io_wq_work_node {
|
||||
struct io_wq_work_node *next;
|
||||
};
|
||||
@@ -358,9 +389,6 @@ struct io_ring_ctx {
|
||||
struct wait_queue_head rsrc_quiesce_wq;
|
||||
unsigned rsrc_quiesce;
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct socket *ring_sock;
|
||||
#endif
|
||||
/* hashed buffered write serialization */
|
||||
struct io_wq_hash *hash_map;
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ struct io_uring_sqe {
|
||||
__u32 uring_cmd_flags;
|
||||
__u32 waitid_flags;
|
||||
__u32 futex_flags;
|
||||
__u32 install_fd_flags;
|
||||
};
|
||||
__u64 user_data; /* data to be passed back at completion time */
|
||||
/* pack this to avoid bogus arm OABI complaints */
|
||||
@@ -253,6 +254,7 @@ enum io_uring_op {
|
||||
IORING_OP_FUTEX_WAIT,
|
||||
IORING_OP_FUTEX_WAKE,
|
||||
IORING_OP_FUTEX_WAITV,
|
||||
IORING_OP_FIXED_FD_INSTALL,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
@@ -386,6 +388,13 @@ enum {
|
||||
/* Pass through the flags from sqe->file_index to cqe->flags */
|
||||
#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
|
||||
|
||||
/*
|
||||
* IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
|
||||
*
|
||||
* IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
|
||||
*/
|
||||
#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
|
||||
|
||||
/*
|
||||
* IO completion data structure (Completion Queue Entry)
|
||||
*/
|
||||
@@ -558,6 +567,9 @@ enum {
|
||||
/* register a range of fixed file slots for automatic slot allocation */
|
||||
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
|
||||
|
||||
/* return status information for a buffer group */
|
||||
IORING_REGISTER_PBUF_STATUS = 26,
|
||||
|
||||
/* this goes last */
|
||||
IORING_REGISTER_LAST,
|
||||
|
||||
@@ -684,6 +696,13 @@ struct io_uring_buf_reg {
|
||||
__u64 resv[3];
|
||||
};
|
||||
|
||||
/* argument for IORING_REGISTER_PBUF_STATUS */
|
||||
struct io_uring_buf_status {
|
||||
__u32 buf_group; /* input */
|
||||
__u32 head; /* output */
|
||||
__u32 resv[8];
|
||||
};
|
||||
|
||||
/*
|
||||
* io_uring_restriction->opcode values
|
||||
*/
|
||||
|
||||
@@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
|
||||
statx.o net.o msg_ring.o timeout.o \
|
||||
sqpoll.o fdinfo.o tctx.o poll.o \
|
||||
cancel.o kbuf.o rsrc.o rw.o opdef.o \
|
||||
notif.o waitid.o
|
||||
notif.o waitid.o register.o
|
||||
obj-$(CONFIG_IO_WQ) += io-wq.o
|
||||
obj-$(CONFIG_FUTEX) += futex.o
|
||||
|
||||
@@ -87,13 +87,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
||||
io_file_bitmap_clear(&ctx->file_table, slot_index);
|
||||
}
|
||||
|
||||
ret = io_scm_file_account(ctx, file);
|
||||
if (!ret) {
|
||||
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, slot_index);
|
||||
}
|
||||
return ret;
|
||||
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, slot_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,16 +15,6 @@
|
||||
#include <trace/events/io_uring.h>
|
||||
#endif
|
||||
|
||||
enum {
|
||||
/*
|
||||
* A hint to not wake right away but delay until there are enough of
|
||||
* tw's queued to match the number of CQEs the task is waiting for.
|
||||
*
|
||||
* Must not be used wirh requests generating more than one CQE.
|
||||
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
|
||||
*/
|
||||
IOU_F_TWQ_LAZY_WAKE = 1,
|
||||
};
|
||||
|
||||
enum {
|
||||
IOU_OK = 0,
|
||||
@@ -54,7 +44,6 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
unsigned issue_flags);
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
|
||||
bool io_is_uring_fops(struct file *file);
|
||||
bool io_alloc_async_data(struct io_kiocb *req);
|
||||
void io_req_task_queue(struct io_kiocb *req);
|
||||
void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
|
||||
@@ -89,6 +78,14 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
|
||||
void *io_mem_alloc(size_t size);
|
||||
void io_mem_free(void *ptr);
|
||||
|
||||
enum {
|
||||
IO_EVENTFD_OP_SIGNAL_BIT,
|
||||
IO_EVENTFD_OP_FREE_BIT,
|
||||
};
|
||||
|
||||
void io_eventfd_ops(struct rcu_head *rcu);
|
||||
void io_activate_pollwq(struct io_ring_ctx *ctx);
|
||||
|
||||
#if defined(CONFIG_PROVE_LOCKING)
|
||||
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
{
|
||||
|
||||
@@ -750,6 +750,32 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
|
||||
{
|
||||
struct io_uring_buf_status buf_status;
|
||||
struct io_buffer_list *bl;
|
||||
int i;
|
||||
|
||||
if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
|
||||
return -EFAULT;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++)
|
||||
if (buf_status.resv[i])
|
||||
return -EINVAL;
|
||||
|
||||
bl = io_buffer_get_list(ctx, buf_status.buf_group);
|
||||
if (!bl)
|
||||
return -ENOENT;
|
||||
if (!bl->is_mapped)
|
||||
return -EINVAL;
|
||||
|
||||
buf_status.head = bl->head;
|
||||
if (copy_to_user(arg, &buf_status, sizeof(buf_status)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
|
||||
{
|
||||
struct io_buffer_list *bl;
|
||||
|
||||
@@ -53,6 +53,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
|
||||
|
||||
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
|
||||
|
||||
|
||||
@@ -469,6 +469,12 @@ const struct io_issue_def io_issue_defs[] = {
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_FIXED_FD_INSTALL] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
.prep = io_install_fixed_fd_prep,
|
||||
.issue = io_install_fixed_fd,
|
||||
},
|
||||
};
|
||||
|
||||
const struct io_cold_def io_cold_defs[] = {
|
||||
@@ -704,6 +710,9 @@ const struct io_cold_def io_cold_defs[] = {
|
||||
[IORING_OP_FUTEX_WAITV] = {
|
||||
.name = "FUTEX_WAITV",
|
||||
},
|
||||
[IORING_OP_FIXED_FD_INSTALL] = {
|
||||
.name = "FIXED_FD_INSTALL",
|
||||
},
|
||||
};
|
||||
|
||||
const char *io_uring_get_opcode(u8 opcode)
|
||||
|
||||
@@ -31,6 +31,11 @@ struct io_close {
|
||||
u32 file_slot;
|
||||
};
|
||||
|
||||
struct io_fixed_install {
|
||||
struct file *file;
|
||||
unsigned int o_flags;
|
||||
};
|
||||
|
||||
static bool io_openat_force_async(struct io_open *open)
|
||||
{
|
||||
/*
|
||||
@@ -254,3 +259,42 @@ err:
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_fixed_install *ifi;
|
||||
unsigned int flags;
|
||||
|
||||
if (sqe->off || sqe->addr || sqe->len || sqe->buf_index ||
|
||||
sqe->splice_fd_in || sqe->addr3)
|
||||
return -EINVAL;
|
||||
|
||||
/* must be a fixed file */
|
||||
if (!(req->flags & REQ_F_FIXED_FILE))
|
||||
return -EBADF;
|
||||
|
||||
flags = READ_ONCE(sqe->install_fd_flags);
|
||||
if (flags & ~IORING_FIXED_FD_NO_CLOEXEC)
|
||||
return -EINVAL;
|
||||
|
||||
/* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */
|
||||
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
|
||||
ifi->o_flags = O_CLOEXEC;
|
||||
if (flags & IORING_FIXED_FD_NO_CLOEXEC)
|
||||
ifi->o_flags = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_fixed_install *ifi;
|
||||
int ret;
|
||||
|
||||
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
|
||||
ret = receive_fd(req->file, NULL, ifi->o_flags);
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
@@ -12,3 +12,6 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_close(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
605
io_uring/register.c
Normal file
605
io_uring/register.c
Normal file
File diff suppressed because it is too large
Load Diff
8
io_uring/register.h
Normal file
8
io_uring/register.h
Normal file
@@ -0,0 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef IORING_REGISTER_H
|
||||
#define IORING_REGISTER_H
|
||||
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx);
|
||||
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id);
|
||||
|
||||
#endif
|
||||
169
io_uring/rsrc.c
169
io_uring/rsrc.c
@@ -24,7 +24,6 @@ struct io_rsrc_update {
|
||||
};
|
||||
|
||||
static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
|
||||
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
||||
struct io_mapped_ubuf **pimu,
|
||||
struct page **last_hpage);
|
||||
@@ -157,7 +156,7 @@ static void io_rsrc_put_work(struct io_rsrc_node *node)
|
||||
|
||||
switch (node->type) {
|
||||
case IORING_RSRC_FILE:
|
||||
io_rsrc_file_put(node->ctx, prsrc);
|
||||
fput(prsrc->file);
|
||||
break;
|
||||
case IORING_RSRC_BUFFER:
|
||||
io_rsrc_buf_put(node->ctx, prsrc);
|
||||
@@ -402,23 +401,13 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Don't allow io_uring instances to be registered. If
|
||||
* UNIX isn't enabled, then this causes a reference
|
||||
* cycle and this instance can never get freed. If UNIX
|
||||
* is enabled we'll handle it just fine, but there's
|
||||
* still no point in allowing a ring fd as it doesn't
|
||||
* support regular read/write anyway.
|
||||
* Don't allow io_uring instances to be registered.
|
||||
*/
|
||||
if (io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
err = -EBADF;
|
||||
break;
|
||||
}
|
||||
err = io_scm_file_account(ctx, file);
|
||||
if (err) {
|
||||
fput(file);
|
||||
break;
|
||||
}
|
||||
*io_get_tag_slot(data, i) = tag;
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, i);
|
||||
@@ -675,22 +664,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
|
||||
for (i = 0; i < ctx->nr_user_files; i++) {
|
||||
struct file *file = io_file_from_index(&ctx->file_table, i);
|
||||
|
||||
/* skip scm accounted files, they'll be freed by ->ring_sock */
|
||||
if (!file || io_file_need_scm(file))
|
||||
if (!file)
|
||||
continue;
|
||||
io_file_bitmap_clear(&ctx->file_table, i);
|
||||
fput(file);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_UNIX)
|
||||
if (ctx->ring_sock) {
|
||||
struct sock *sock = ctx->ring_sock->sk;
|
||||
struct sk_buff *skb;
|
||||
|
||||
while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
|
||||
kfree_skb(skb);
|
||||
}
|
||||
#endif
|
||||
io_free_file_tables(&ctx->file_table);
|
||||
io_file_table_set_alloc_range(ctx, 0, 0);
|
||||
io_rsrc_data_free(ctx->file_data);
|
||||
@@ -718,137 +697,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure the UNIX gc is aware of our file set, so we are certain that
|
||||
* the io_uring can be safely unregistered on process exit, even if we have
|
||||
* loops in the file referencing. We account only files that can hold other
|
||||
* files because otherwise they can't form a loop and so are not interesting
|
||||
* for GC.
|
||||
*/
|
||||
int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
|
||||
{
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct sock *sk = ctx->ring_sock->sk;
|
||||
struct sk_buff_head *head = &sk->sk_receive_queue;
|
||||
struct scm_fp_list *fpl;
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* See if we can merge this file into an existing skb SCM_RIGHTS
|
||||
* file set. If there's no room, fall back to allocating a new skb
|
||||
* and filling it in.
|
||||
*/
|
||||
spin_lock_irq(&head->lock);
|
||||
skb = skb_peek(head);
|
||||
if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
|
||||
__skb_unlink(skb, head);
|
||||
else
|
||||
skb = NULL;
|
||||
spin_unlock_irq(&head->lock);
|
||||
|
||||
if (!skb) {
|
||||
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
|
||||
if (!fpl)
|
||||
return -ENOMEM;
|
||||
|
||||
skb = alloc_skb(0, GFP_KERNEL);
|
||||
if (!skb) {
|
||||
kfree(fpl);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
fpl->user = get_uid(current_user());
|
||||
fpl->max = SCM_MAX_FD;
|
||||
fpl->count = 0;
|
||||
|
||||
UNIXCB(skb).fp = fpl;
|
||||
skb->sk = sk;
|
||||
skb->destructor = io_uring_destruct_scm;
|
||||
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
|
||||
}
|
||||
|
||||
fpl = UNIXCB(skb).fp;
|
||||
fpl->fp[fpl->count++] = get_file(file);
|
||||
unix_inflight(fpl->user, file);
|
||||
skb_queue_head(head, skb);
|
||||
fput(file);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __cold void io_rsrc_file_scm_put(struct io_ring_ctx *ctx, struct file *file)
|
||||
{
|
||||
#if defined(CONFIG_UNIX)
|
||||
struct sock *sock = ctx->ring_sock->sk;
|
||||
struct sk_buff_head list, *head = &sock->sk_receive_queue;
|
||||
struct sk_buff *skb;
|
||||
int i;
|
||||
|
||||
__skb_queue_head_init(&list);
|
||||
|
||||
/*
|
||||
* Find the skb that holds this file in its SCM_RIGHTS. When found,
|
||||
* remove this entry and rearrange the file array.
|
||||
*/
|
||||
skb = skb_dequeue(head);
|
||||
while (skb) {
|
||||
struct scm_fp_list *fp;
|
||||
|
||||
fp = UNIXCB(skb).fp;
|
||||
for (i = 0; i < fp->count; i++) {
|
||||
int left;
|
||||
|
||||
if (fp->fp[i] != file)
|
||||
continue;
|
||||
|
||||
unix_notinflight(fp->user, fp->fp[i]);
|
||||
left = fp->count - 1 - i;
|
||||
if (left) {
|
||||
memmove(&fp->fp[i], &fp->fp[i + 1],
|
||||
left * sizeof(struct file *));
|
||||
}
|
||||
fp->count--;
|
||||
if (!fp->count) {
|
||||
kfree_skb(skb);
|
||||
skb = NULL;
|
||||
} else {
|
||||
__skb_queue_tail(&list, skb);
|
||||
}
|
||||
fput(file);
|
||||
file = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!file)
|
||||
break;
|
||||
|
||||
__skb_queue_tail(&list, skb);
|
||||
|
||||
skb = skb_dequeue(head);
|
||||
}
|
||||
|
||||
if (skb_peek(&list)) {
|
||||
spin_lock_irq(&head->lock);
|
||||
while ((skb = __skb_dequeue(&list)) != NULL)
|
||||
__skb_queue_tail(head, skb);
|
||||
spin_unlock_irq(&head->lock);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
|
||||
{
|
||||
struct file *file = prsrc->file;
|
||||
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
fput(file);
|
||||
else
|
||||
io_rsrc_file_scm_put(ctx, file);
|
||||
}
|
||||
|
||||
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args, u64 __user *tags)
|
||||
{
|
||||
@@ -897,21 +745,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Don't allow io_uring instances to be registered. If UNIX
|
||||
* isn't enabled, then this causes a reference cycle and this
|
||||
* instance can never get freed. If UNIX is enabled we'll
|
||||
* handle it just fine, but there's still no point in allowing
|
||||
* a ring fd as it doesn't support regular read/write anyway.
|
||||
* Don't allow io_uring instances to be registered.
|
||||
*/
|
||||
if (io_is_uring_fops(file)) {
|
||||
fput(file);
|
||||
goto fail;
|
||||
}
|
||||
ret = io_scm_file_account(ctx, file);
|
||||
if (ret) {
|
||||
fput(file);
|
||||
goto fail;
|
||||
}
|
||||
file_slot = io_fixed_file_slot(&ctx->file_table, i);
|
||||
io_fixed_file_set(file_slot, file);
|
||||
io_file_bitmap_set(&ctx->file_table, i);
|
||||
|
||||
@@ -75,21 +75,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx);
|
||||
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args, u64 __user *tags);
|
||||
|
||||
int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file);
|
||||
|
||||
static inline bool io_file_need_scm(struct file *filp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int io_scm_file_account(struct io_ring_ctx *ctx,
|
||||
struct file *file)
|
||||
{
|
||||
if (likely(!io_file_need_scm(file)))
|
||||
return 0;
|
||||
return __io_scm_file_account(ctx, file);
|
||||
}
|
||||
|
||||
int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args);
|
||||
int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user