From 915967f69c591b34c5a18d6618af021a81ffd700 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Nov 2019 09:01:20 -0700 Subject: io_uring: improve trace_io_uring_defer() trace point We don't have shadow requests anymore, so get rid of the shadow argument. Add the user_data argument, as that's often useful to easily match up requests, instead of having to look at request pointers. Signed-off-by: Jens Axboe --- include/trace/events/io_uring.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index 72a4d0174b02..b352d66b5d51 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -163,35 +163,35 @@ TRACE_EVENT(io_uring_queue_async_work, ); /** - * io_uring_defer_list - called before the io_uring work added into defer_list + * io_uring_defer - called when an io_uring request is deferred * * @ctx: pointer to a ring context structure * @req: pointer to a deferred request - * @shadow: whether request is shadow or not + * @user_data: user data associated with the request * * Allows to track deferred requests, to get an insight about what requests are * not started immediately. */ TRACE_EVENT(io_uring_defer, - TP_PROTO(void *ctx, void *req, bool shadow), + TP_PROTO(void *ctx, void *req, unsigned long long user_data), - TP_ARGS(ctx, req, shadow), + TP_ARGS(ctx, req, user_data), TP_STRUCT__entry ( __field( void *, ctx ) __field( void *, req ) - __field( bool, shadow ) + __field( unsigned long long, data ) ), TP_fast_assign( __entry->ctx = ctx; __entry->req = req; - __entry->shadow = shadow; + __entry->data = user_data; ), - TP_printk("ring %p, request %p%s", __entry->ctx, __entry->req, - __entry->shadow ? ", shadow": "") + TP_printk("ring %p, request %p user_data %llu", __entry->ctx, + __entry->req, __entry->data) ); /** -- cgit v1.2.3 From bd3ded3146daa2cbb57ed353749ef99cf75371b0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Nov 2019 14:17:16 -0700 Subject: net: add __sys_connect_file() helper This is identical to __sys_connect(), except it takes a struct file instead of an fd, and it also allows passing in extra file->f_flags flags. The latter is done to support masking in O_NONBLOCK without manipulating the original file flags. No functional changes in this patch. Cc: netdev@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/socket.h | 3 +++ net/socket.c | 30 ++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 09c32a21555b..4bde63021c09 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -399,6 +399,9 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_connect_file(struct file *file, + struct sockaddr __user *uservaddr, int addrlen, + int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); diff --git a/net/socket.c b/net/socket.c index 17bc1eee198a..274df4ddfc2c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1825,32 +1825,46 @@ SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, * include the -EINPROGRESS status for such sockets. */ -int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) +int __sys_connect_file(struct file *file, struct sockaddr __user *uservaddr, + int addrlen, int file_flags) { struct socket *sock; struct sockaddr_storage address; - int err, fput_needed; + int err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); + sock = sock_from_file(file, &err); if (!sock) goto out; err = move_addr_to_kernel(uservaddr, addrlen, &address); if (err < 0) - goto out_put; + goto out; err = security_socket_connect(sock, (struct sockaddr *)&address, addrlen); if (err) - goto out_put; + goto out; err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, - sock->file->f_flags); -out_put: - fput_light(sock->file, fput_needed); + sock->file->f_flags | file_flags); out: return err; } +int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) +{ + int ret = -EBADF; + struct fd f; + + f = fdget(fd); + if (f.file) { + ret = __sys_connect_file(f.file, uservaddr, addrlen, 0); + if (f.flags) + fput(f.file); + } + + return ret; +} + SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen) { -- cgit v1.2.3 From f8e85cf255ad57d65eeb9a9d0e59e3dec55bdd9e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Nov 2019 14:24:24 -0700 Subject: io_uring: add support for IORING_OP_CONNECT This allows an application to call connect() in an async fashion. Like other opcodes, we first try a non-blocking connect, then punt to async context if we have to. Note that we can still return -EINPROGRESS, and in that case the caller should use IORING_OP_POLL_ADD to do an async wait for completion of the connect request (just like for regular connect(2), except we can do it async here too). Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 1 + 2 files changed, 37 insertions(+) (limited to 'include') diff --git a/fs/io_uring.c b/fs/io_uring.c index 129723087bad..02254929231b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -550,6 +550,7 @@ static inline bool io_prep_async_work(struct io_kiocb *req, case IORING_OP_RECVMSG: case IORING_OP_ACCEPT: case IORING_OP_POLL_ADD: + case IORING_OP_CONNECT: /* * We know REQ_F_ISREG is not set on some of these * opcodes, but this enables us to keep the check in @@ -1974,6 +1975,38 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe, #endif } +static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe, + struct io_kiocb **nxt, bool force_nonblock) +{ +#if defined(CONFIG_NET) + struct sockaddr __user *addr; + unsigned file_flags; + int addr_len, ret; + + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + return -EINVAL; + if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags) + return -EINVAL; + + addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr); + addr_len = READ_ONCE(sqe->addr2); + file_flags = force_nonblock ? O_NONBLOCK : 0; + + ret = __sys_connect_file(req->file, addr, addr_len, file_flags); + if (ret == -EAGAIN && force_nonblock) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; + io_cqring_add_event(req, ret); + io_put_req_find_next(req, nxt); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + static inline void io_poll_remove_req(struct io_kiocb *req) { if (!RB_EMPTY_NODE(&req->rb_node)) { @@ -2637,6 +2670,9 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt, case IORING_OP_ACCEPT: ret = io_accept(req, s->sqe, nxt, force_nonblock); break; + case IORING_OP_CONNECT: + ret = io_connect(req, s->sqe, nxt, force_nonblock); + break; case IORING_OP_ASYNC_CANCEL: ret = io_async_cancel(req, s->sqe, nxt); break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2a1569211d87..4637ed1d9949 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -73,6 +73,7 @@ struct io_uring_sqe { #define IORING_OP_ACCEPT 13 #define IORING_OP_ASYNC_CANCEL 14 #define IORING_OP_LINK_TIMEOUT 15 +#define IORING_OP_CONNECT 16 /* * sqe->fsync_flags -- cgit v1.2.3