mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-06 05:02:31 +00:00
io_uring-5.17-2022-01-21
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmHqtf8QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpoIbD/9pfiURokRzb/UncQoS+zX0mMQxToYWluMN xyyAHLRqAyylzdZykJFnxfGKMN4z/cfBKJugISEd+O+88wjNLUALUBzD56C6vbVq h+gPL+zBRNu/Bx5bfEYEI+zhUr7AE1gtlg+x/hzZy1x/Q2b2Fyuely+FaHY/ChUy +HUnLVP74Gj2H7XY6a1yEuEQRdRRBn0NjWVU0v8g8zNLjO+XUKJFE5se1EAjIcBy IzcMvbDbnfGnHQRp7saq3TaY8+OaADSch0QYS1pDyCRUcqMTBnONTgyOkwxNQA+z 58KHM7n9QDp3QzRPE4TbvjHuX1J9fbw9wekIQIm6eAQfGRa6IvoMbUaI1btAsFLU zsIRUWNPv2w7tzG4OMY438bISnUhnGKnNG4ZnWCx7oa+GF8SgOJ7D8EOGzqimh8n YBFfoLTkKfcVcrb+G/cfa5qWK35qv1nGTVk1A9V37eOi4YRBOFnbfmzMAb0CC5bw 8zpFyRS45VJLjnS+FUdvpqZeSuP0sZlKx6KH0hUa2Nd/7xSmsVkNuexCrttTWuLX sfHcXj3rMG7YBXRtxHnMH7rDsdph4h5CPPqqMXHD8ho4YYxvFNktxu8IJgJRV6+J cDH937ADsYNM8MBAgPZ3i9Kg8mVttnpHLMRhCeVpPKzZtr2tLoUh0mXLHb87docu L12jvdCS0A== =bFY0 -----END PGP SIGNATURE----- Merge tag 'io_uring-5.17-2022-01-21' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: - Fix the io_uring POLLFREE handling, similarly to how it was done for aio (Pavel) - Remove (now) unused function (Jiapeng) - Small series fixing an issue with work cancelations. A window exists where work isn't locatable in the pending list, and isn't active in a worker yet either. (me) * tag 'io_uring-5.17-2022-01-21' of git://git.kernel.dk/linux-block: io-wq: delete dead lock shuffling code io_uring: perform poll removal even if async work removal is successful io-wq: add intermediate work step between pending list and active work io-wq: perform both unstarted and started work cancelations in one go io-wq: invoke work cancelation with wqe->lock held io-wq: make io_worker lock a raw spinlock io-wq: remove useless 'work' argument to __io_worker_busy() io_uring: fix UAF due to missing POLLFREE handling io_uring: Remove unused function req_ref_put
This commit is contained in:
commit
f3a78227ee
89
fs/io-wq.c
89
fs/io-wq.c
@ -48,7 +48,8 @@ struct io_worker {
|
||||
struct io_wqe *wqe;
|
||||
|
||||
struct io_wq_work *cur_work;
|
||||
spinlock_t lock;
|
||||
struct io_wq_work *next_work;
|
||||
raw_spinlock_t lock;
|
||||
|
||||
struct completion ref_done;
|
||||
|
||||
@ -405,8 +406,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
|
||||
* Worker will start processing some work. Move it to the busy list, if
|
||||
* it's currently on the freelist
|
||||
*/
|
||||
static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
|
||||
__must_hold(wqe->lock)
|
||||
{
|
||||
if (worker->flags & IO_WORKER_F_FREE) {
|
||||
@ -529,9 +529,10 @@ static void io_assign_current_work(struct io_worker *worker,
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
spin_lock(&worker->lock);
|
||||
raw_spin_lock(&worker->lock);
|
||||
worker->cur_work = work;
|
||||
spin_unlock(&worker->lock);
|
||||
worker->next_work = NULL;
|
||||
raw_spin_unlock(&worker->lock);
|
||||
}
|
||||
|
||||
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
|
||||
@ -546,7 +547,7 @@ static void io_worker_handle_work(struct io_worker *worker)
|
||||
|
||||
do {
|
||||
struct io_wq_work *work;
|
||||
get_next:
|
||||
|
||||
/*
|
||||
* If we got some work, mark us as busy. If we didn't, but
|
||||
* the list isn't empty, it means we stalled on hashed work.
|
||||
@ -555,9 +556,20 @@ static void io_worker_handle_work(struct io_worker *worker)
|
||||
* clear the stalled flag.
|
||||
*/
|
||||
work = io_get_next_work(acct, worker);
|
||||
if (work)
|
||||
__io_worker_busy(wqe, worker, work);
|
||||
if (work) {
|
||||
__io_worker_busy(wqe, worker);
|
||||
|
||||
/*
|
||||
* Make sure cancelation can find this, even before
|
||||
* it becomes the active work. That avoids a window
|
||||
* where the work has been removed from our general
|
||||
* work list, but isn't yet discoverable as the
|
||||
* current work item for this worker.
|
||||
*/
|
||||
raw_spin_lock(&worker->lock);
|
||||
worker->next_work = work;
|
||||
raw_spin_unlock(&worker->lock);
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (!work)
|
||||
break;
|
||||
@ -594,11 +606,6 @@ static void io_worker_handle_work(struct io_worker *worker)
|
||||
spin_unlock_irq(&wq->hash->wait.lock);
|
||||
if (wq_has_sleeper(&wq->hash->wait))
|
||||
wake_up(&wq->hash->wait);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
/* skip unnecessary unlock-lock wqe->lock */
|
||||
if (!work)
|
||||
goto get_next;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
}
|
||||
} while (work);
|
||||
|
||||
@ -815,7 +822,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
||||
|
||||
refcount_set(&worker->ref, 1);
|
||||
worker->wqe = wqe;
|
||||
spin_lock_init(&worker->lock);
|
||||
raw_spin_lock_init(&worker->lock);
|
||||
init_completion(&worker->ref_done);
|
||||
|
||||
if (index == IO_WQ_ACCT_BOUND)
|
||||
@ -973,6 +980,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
|
||||
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
|
||||
}
|
||||
|
||||
static bool __io_wq_worker_cancel(struct io_worker *worker,
|
||||
struct io_cb_cancel_data *match,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
if (work && match->fn(work, match->data)) {
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
set_notify_signal(worker->task);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||
{
|
||||
struct io_cb_cancel_data *match = data;
|
||||
@ -981,13 +1001,11 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||
* Hold the lock to avoid ->cur_work going out of scope, caller
|
||||
* may dereference the passed in work.
|
||||
*/
|
||||
spin_lock(&worker->lock);
|
||||
if (worker->cur_work &&
|
||||
match->fn(worker->cur_work, match->data)) {
|
||||
set_notify_signal(worker->task);
|
||||
raw_spin_lock(&worker->lock);
|
||||
if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
|
||||
__io_wq_worker_cancel(worker, match, worker->next_work))
|
||||
match->nr_running++;
|
||||
}
|
||||
spin_unlock(&worker->lock);
|
||||
raw_spin_unlock(&worker->lock);
|
||||
|
||||
return match->nr_running && !match->cancel_all;
|
||||
}
|
||||
@ -1039,17 +1057,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
||||
{
|
||||
int i;
|
||||
retry:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
|
||||
|
||||
if (io_acct_cancel_pending_work(wqe, acct, match)) {
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (match->cancel_all)
|
||||
goto retry;
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
}
|
||||
|
||||
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
|
||||
@ -1074,25 +1091,27 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
||||
* First check pending list, if we're lucky we can just remove it
|
||||
* from there. CANCEL_OK means that the work is returned as-new,
|
||||
* no completion will be posted for it.
|
||||
*/
|
||||
for_each_node(node) {
|
||||
struct io_wqe *wqe = wq->wqes[node];
|
||||
|
||||
io_wqe_cancel_pending_work(wqe, &match);
|
||||
if (match.nr_pending && !match.cancel_all)
|
||||
return IO_WQ_CANCEL_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now check if a free (going busy) or busy worker has the work
|
||||
*
|
||||
* Then check if a free (going busy) or busy worker has the work
|
||||
* currently running. If we find it there, we'll return CANCEL_RUNNING
|
||||
* as an indication that we attempt to signal cancellation. The
|
||||
* completion will run normally in this case.
|
||||
*
|
||||
* Do both of these while holding the wqe->lock, to ensure that
|
||||
* we'll find a work item regardless of state.
|
||||
*/
|
||||
for_each_node(node) {
|
||||
struct io_wqe *wqe = wq->wqes[node];
|
||||
|
||||
raw_spin_lock(&wqe->lock);
|
||||
io_wqe_cancel_pending_work(wqe, &match);
|
||||
if (match.nr_pending && !match.cancel_all) {
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
return IO_WQ_CANCEL_OK;
|
||||
}
|
||||
|
||||
io_wqe_cancel_running_work(wqe, &match);
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (match.nr_running && !match.cancel_all)
|
||||
return IO_WQ_CANCEL_RUNNING;
|
||||
}
|
||||
@ -1263,7 +1282,9 @@ static void io_wq_destroy(struct io_wq *wq)
|
||||
.fn = io_wq_work_match_all,
|
||||
.cancel_all = true,
|
||||
};
|
||||
raw_spin_lock(&wqe->lock);
|
||||
io_wqe_cancel_pending_work(wqe, &match);
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
free_cpumask_var(wqe->cpu_mask);
|
||||
kfree(wqe);
|
||||
}
|
||||
|
@ -1192,12 +1192,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req)
|
||||
return atomic_dec_and_test(&req->refs);
|
||||
}
|
||||
|
||||
static inline void req_ref_put(struct io_kiocb *req)
|
||||
{
|
||||
WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
|
||||
WARN_ON_ONCE(req_ref_put_and_test(req));
|
||||
}
|
||||
|
||||
static inline void req_ref_get(struct io_kiocb *req)
|
||||
{
|
||||
WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
|
||||
@ -5468,12 +5462,14 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
|
||||
|
||||
static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
|
||||
{
|
||||
struct wait_queue_head *head = poll->head;
|
||||
struct wait_queue_head *head = smp_load_acquire(&poll->head);
|
||||
|
||||
spin_lock_irq(&head->lock);
|
||||
list_del_init(&poll->wait.entry);
|
||||
poll->head = NULL;
|
||||
spin_unlock_irq(&head->lock);
|
||||
if (head) {
|
||||
spin_lock_irq(&head->lock);
|
||||
list_del_init(&poll->wait.entry);
|
||||
poll->head = NULL;
|
||||
spin_unlock_irq(&head->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void io_poll_remove_entries(struct io_kiocb *req)
|
||||
@ -5481,10 +5477,26 @@ static void io_poll_remove_entries(struct io_kiocb *req)
|
||||
struct io_poll_iocb *poll = io_poll_get_single(req);
|
||||
struct io_poll_iocb *poll_double = io_poll_get_double(req);
|
||||
|
||||
if (poll->head)
|
||||
io_poll_remove_entry(poll);
|
||||
if (poll_double && poll_double->head)
|
||||
/*
|
||||
* While we hold the waitqueue lock and the waitqueue is nonempty,
|
||||
* wake_up_pollfree() will wait for us. However, taking the waitqueue
|
||||
* lock in the first place can race with the waitqueue being freed.
|
||||
*
|
||||
* We solve this as eventpoll does: by taking advantage of the fact that
|
||||
* all users of wake_up_pollfree() will RCU-delay the actual free. If
|
||||
* we enter rcu_read_lock() and see that the pointer to the queue is
|
||||
* non-NULL, we can then lock it without the memory being freed out from
|
||||
* under us.
|
||||
*
|
||||
* Keep holding rcu_read_lock() as long as we hold the queue lock, in
|
||||
* case the caller deletes the entry from the queue, leaving it empty.
|
||||
* In that case, only RCU prevents the queue memory from being freed.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
io_poll_remove_entry(poll);
|
||||
if (poll_double)
|
||||
io_poll_remove_entry(poll_double);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5624,6 +5636,30 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
|
||||
wait);
|
||||
__poll_t mask = key_to_poll(key);
|
||||
|
||||
if (unlikely(mask & POLLFREE)) {
|
||||
io_poll_mark_cancelled(req);
|
||||
/* we have to kick tw in case it's not already */
|
||||
io_poll_execute(req, 0);
|
||||
|
||||
/*
|
||||
* If the waitqueue is being freed early but someone is already
|
||||
* holds ownership over it, we have to tear down the request as
|
||||
* best we can. That means immediately removing the request from
|
||||
* its waitqueue and preventing all further accesses to the
|
||||
* waitqueue via the request.
|
||||
*/
|
||||
list_del_init(&poll->wait.entry);
|
||||
|
||||
/*
|
||||
* Careful: this *must* be the last step, since as soon
|
||||
* as req->head is NULL'ed out, the request can be
|
||||
* completed and freed, since aio_poll_complete_work()
|
||||
* will no longer need to take the waitqueue lock.
|
||||
*/
|
||||
smp_store_release(&poll->head, NULL);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* for instances that support it check for an event match first */
|
||||
if (mask && !(mask & poll->events))
|
||||
return 0;
|
||||
@ -6350,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
|
||||
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
|
||||
|
||||
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
|
||||
if (ret != -ENOENT)
|
||||
return ret;
|
||||
/*
|
||||
* Fall-through even for -EALREADY, as we may have poll armed
|
||||
* that need unarming.
|
||||
*/
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ret = io_poll_cancel(ctx, sqe_addr, false);
|
||||
if (ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
spin_lock_irq(&ctx->timeout_lock);
|
||||
ret = io_timeout_cancel(ctx, sqe_addr);
|
||||
spin_unlock_irq(&ctx->timeout_lock);
|
||||
if (ret != -ENOENT)
|
||||
goto out;
|
||||
ret = io_poll_cancel(ctx, sqe_addr, false);
|
||||
out:
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user