diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 79 |
1 files changed, 60 insertions, 19 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index de9c9de90655..e54c4127422e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req) return atomic_dec_and_test(&req->refs); } -static inline void req_ref_put(struct io_kiocb *req) -{ - WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); - WARN_ON_ONCE(req_ref_put_and_test(req)); -} - static inline void req_ref_get(struct io_kiocb *req) { WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); @@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, static inline void io_poll_remove_entry(struct io_poll_iocb *poll) { - struct wait_queue_head *head = poll->head; + struct wait_queue_head *head = smp_load_acquire(&poll->head); - spin_lock_irq(&head->lock); - list_del_init(&poll->wait.entry); - poll->head = NULL; - spin_unlock_irq(&head->lock); + if (head) { + spin_lock_irq(&head->lock); + list_del_init(&poll->wait.entry); + poll->head = NULL; + spin_unlock_irq(&head->lock); + } } static void io_poll_remove_entries(struct io_kiocb *req) @@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req) struct io_poll_iocb *poll = io_poll_get_single(req); struct io_poll_iocb *poll_double = io_poll_get_double(req); - if (poll->head) - io_poll_remove_entry(poll); - if (poll_double && poll_double->head) + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + io_poll_remove_entry(poll); + if (poll_double) io_poll_remove_entry(poll_double); + rcu_read_unlock(); } /* @@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, wait); __poll_t mask = key_to_poll(key); + if (unlikely(mask & POLLFREE)) { + io_poll_mark_cancelled(req); + /* we have to kick tw in case it's not already */ + io_poll_execute(req, 0); + + /* + * If the waitqueue is being freed early but someone is already + * holds ownership over it, we have to tear down the request as + * best we can. That means immediately removing the request from + * its waitqueue and preventing all further accesses to the + * waitqueue via the request. + */ + list_del_init(&poll->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&poll->head, NULL); + return 1; + } + /* for instances that support it check for an event match first */ if (mask && !(mask & poll->events)) return 0; @@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); - if (ret != -ENOENT) - return ret; + /* + * Fall-through even for -EALREADY, as we may have poll armed + * that need unarming. + */ + if (!ret) + return 0; spin_lock(&ctx->completion_lock); + ret = io_poll_cancel(ctx, sqe_addr, false); + if (ret != -ENOENT) + goto out; + spin_lock_irq(&ctx->timeout_lock); ret = io_timeout_cancel(ctx, sqe_addr); spin_unlock_irq(&ctx->timeout_lock); - if (ret != -ENOENT) - goto out; - ret = io_poll_cancel(ctx, sqe_addr, false); out: spin_unlock(&ctx->completion_lock); return ret; |