diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/io_uring.h | 6 | ||||
-rw-r--r-- | include/linux/io_uring/cmd.h | 24 | ||||
-rw-r--r-- | include/linux/io_uring/net.h | 18 | ||||
-rw-r--r-- | include/linux/io_uring_types.h | 19 | ||||
-rw-r--r-- | include/linux/skbuff.h | 21 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 38 |
6 files changed, 90 insertions, 36 deletions
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 68ed6697fece..e123d5e17b52 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -11,7 +11,6 @@ void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); -int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) @@ -45,11 +44,6 @@ static inline const char *io_uring_get_opcode(u8 opcode) { return ""; } -static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, - unsigned int issue_flags) -{ - return -EOPNOTSUPP; -} static inline bool io_is_uring_fops(struct file *file) { return false; diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index e453a997c060..447fbfd32215 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -26,12 +26,25 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); + +/* + * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd + * and the corresponding io_uring request. + * + * Note: the caller should never hard code @issue_flags and is only allowed + * to pass the mask provided by the core io_uring code. + */ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); + void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned), unsigned flags); +/* + * Note: the caller should never hard code @issue_flags and only use the + * mask provided by the core io_uring code. + */ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags); @@ -56,6 +69,17 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, } #endif +/* + * Polled completions must ensure they are coming from a poll queue, and + * hence are completed inside the usual poll handling loops. + */ +static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd, + ssize_t ret, ssize_t res2) +{ + lockdep_assert(in_task()); + io_uring_cmd_done(ioucmd, ret, res2, 0); +} + /* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */ static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)) diff --git a/include/linux/io_uring/net.h b/include/linux/io_uring/net.h new file mode 100644 index 000000000000..b58f39fed4d5 --- /dev/null +++ b/include/linux/io_uring/net.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IO_URING_NET_H +#define _LINUX_IO_URING_NET_H + +struct io_uring_cmd; + +#if defined(CONFIG_IO_URING) +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); + +#else +static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index ac333ea81d31..7a6b190c7da7 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -205,6 +205,7 @@ struct io_submit_state { bool plug_started; bool need_plug; + bool cq_flush; unsigned short submit_nr; unsigned int cqes_count; struct blk_plug plug; @@ -219,7 +220,7 @@ struct io_ev_fd { }; struct io_alloc_cache { - struct io_wq_work_node list; + void **entries; unsigned int nr_cached; unsigned int max_cached; size_t elem_size; @@ -299,6 +300,8 @@ struct io_ring_ctx { struct io_hash_table cancel_table_locked; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; + struct io_alloc_cache rw_cache; + struct io_alloc_cache uring_cache; /* * Any cancelable uring_cmd is added to this list in @@ -341,14 +344,8 @@ struct io_ring_ctx { unsigned cq_last_tm_flush; } ____cacheline_aligned_in_smp; - struct io_uring_cqe completion_cqes[16]; - spinlock_t completion_lock; - /* IRQ completion list, under ->completion_lock */ - unsigned int locked_free_nr; - struct io_wq_work_list locked_free_list; - struct list_head io_buffers_comp; struct list_head cq_overflow_list; struct io_hash_table cancel_table; @@ -371,9 +368,6 @@ struct io_ring_ctx { struct list_head io_buffers_cache; - /* deferred free list, protected by ->uring_lock */ - struct hlist_head io_buf_list; - /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; @@ -438,8 +432,6 @@ struct io_ring_ctx { }; struct io_tw_state { - /* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */ - bool locked; }; enum { @@ -480,6 +472,7 @@ enum { REQ_F_CAN_POLL_BIT, REQ_F_BL_EMPTY_BIT, REQ_F_BL_NO_RECYCLE_BIT, + REQ_F_BUFFERS_COMMIT_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -558,6 +551,8 @@ enum { REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), /* don't recycle provided buffers for this request */ REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), + /* buffer ring head needs incrementing on put */ + REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4ff48eda3f64..1cc72c370190 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -527,6 +527,13 @@ enum { #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) +struct ubuf_info_ops { + void (*complete)(struct sk_buff *, struct ubuf_info *, + bool zerocopy_success); + /* has to be compatible with skb_zcopy_set() */ + int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg); +}; + /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. @@ -536,8 +543,7 @@ enum { * The desc field is used to track userspace buffer index. */ struct ubuf_info { - void (*callback)(struct sk_buff *, struct ubuf_info *, - bool zerocopy_success); + const struct ubuf_info_ops *ops; refcount_t refcnt; u8 flags; }; @@ -1662,14 +1668,13 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) } #endif +extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; + struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); -void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); - int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); @@ -1757,13 +1762,13 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) static inline void net_zcopy_put(struct ubuf_info *uarg) { if (uarg) - uarg->callback(NULL, uarg, true); + uarg->ops->complete(NULL, uarg, true); } static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { - if (uarg->callback == msg_zerocopy_callback) + if (uarg->ops == &msg_zerocopy_ubuf_ops) msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) net_zcopy_put(uarg); @@ -1777,7 +1782,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (uarg) { if (!skb_zcopy_is_nouarg(skb)) - uarg->callback(skb, uarg, zerocopy_success); + uarg->ops->complete(skb, uarg, zerocopy_success); skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 7bd10201a02b..994bf7af0efe 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -72,6 +72,7 @@ struct io_uring_sqe { __u32 waitid_flags; __u32 futex_flags; __u32 install_fd_flags; + __u32 nop_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -115,7 +116,7 @@ struct io_uring_sqe { */ #define IORING_FILE_INDEX_ALLOC (~0U) -enum { +enum io_uring_sqe_flags_bit { IOSQE_FIXED_FILE_BIT, IOSQE_IO_DRAIN_BIT, IOSQE_IO_LINK_BIT, @@ -351,11 +352,20 @@ enum io_uring_op { * 0 is reported if zerocopy was actually possible. * IORING_NOTIF_USAGE_ZC_COPIED if data was copied * (at least partially). + * + * IORING_RECVSEND_BUNDLE Used with IOSQE_BUFFER_SELECT. If set, send or + * recv will grab as many buffers from the buffer + * group ID given and send them all. The completion + * result will be the number of buffers send, with + * the starting buffer ID in cqe->flags as per + * usual for provided buffer usage. The buffers + * will be contigious from the starting buffer ID. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) +#define IORING_RECVSEND_BUNDLE (1U << 4) /* * cqe.res for IORING_CQE_F_NOTIF if @@ -370,11 +380,13 @@ enum io_uring_op { * accept flags stored in sqe->ioprio */ #define IORING_ACCEPT_MULTISHOT (1U << 0) +#define IORING_ACCEPT_DONTWAIT (1U << 1) +#define IORING_ACCEPT_POLL_FIRST (1U << 2) /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ -enum { +enum io_uring_msg_ring_flags { IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ IORING_MSG_SEND_FD, /* send a registered fd to another ring */ }; @@ -397,6 +409,13 @@ enum { #define IORING_FIXED_FD_NO_CLOEXEC (1U << 0) /* + * IORING_OP_NOP flags (sqe->nop_flags) + * + * IORING_NOP_INJECT_RESULT Inject result from sqe->result + */ +#define IORING_NOP_INJECT_RESULT (1U << 0) + +/* * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { @@ -425,9 +444,7 @@ struct io_uring_cqe { #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) #define IORING_CQE_F_NOTIF (1U << 3) -enum { - IORING_CQE_BUFFER_SHIFT = 16, -}; +#define IORING_CQE_BUFFER_SHIFT 16 /* * Magic offsets for the application to mmap the data it needs @@ -522,11 +539,12 @@ struct io_uring_params { #define IORING_FEAT_CQE_SKIP (1U << 11) #define IORING_FEAT_LINKED_FILE (1U << 12) #define IORING_FEAT_REG_REG_RING (1U << 13) +#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14) /* * io_uring_register(2) opcodes and arguments */ -enum { +enum io_uring_register_op { IORING_REGISTER_BUFFERS = 0, IORING_UNREGISTER_BUFFERS = 1, IORING_REGISTER_FILES = 2, @@ -583,7 +601,7 @@ enum { }; /* io-wq worker categories */ -enum { +enum io_wq_type { IO_WQ_BOUND, IO_WQ_UNBOUND, }; @@ -688,7 +706,7 @@ struct io_uring_buf_ring { * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) * to get a virtual mapping for the ring. */ -enum { +enum io_uring_register_pbuf_ring_flags { IOU_PBUF_RING_MMAP = 1, }; @@ -719,7 +737,7 @@ struct io_uring_napi { /* * io_uring_restriction->opcode values */ -enum { +enum io_uring_register_restriction_op { /* Allow an io_uring_register(2) opcode */ IORING_RESTRICTION_REGISTER_OP = 0, @@ -775,7 +793,7 @@ struct io_uring_recvmsg_out { /* * Argument for IORING_OP_URING_CMD when file is a socket */ -enum { +enum io_uring_socket_op { SOCKET_URING_OP_SIOCINQ = 0, SOCKET_URING_OP_SIOCOUTQ, SOCKET_URING_OP_GETSOCKOPT, |