io_uring/poll: don't enable lazy wake for POLLEXCLUSIVE
There are a few quirks around using lazy wake for poll unconditionally,
and one of them is related the EPOLLEXCLUSIVE. Those may trigger
exclusive wakeups, which wake a limited number of entries in the wait
queue. If that wake number is less than the number of entries someone is
waiting for (and that someone is also using DEFER_TASKRUN), then we can
get stuck waiting for more entries while we should be processing the ones
we already got.
If we're doing exclusive poll waits, flag the request as not being
compatible with lazy wakeups.
Reported-by: Pavel Begunkov <asml.silence@gmail.com>
Fixes: 6ce4a93dbb
("io_uring/poll: use IOU_F_TWQ_LAZY_WAKE for wakeups")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
705318a99a
commit
595e52284d
@ -434,6 +434,7 @@ enum {
|
|||||||
/* keep async read/write and isreg together and in order */
|
/* keep async read/write and isreg together and in order */
|
||||||
REQ_F_SUPPORT_NOWAIT_BIT,
|
REQ_F_SUPPORT_NOWAIT_BIT,
|
||||||
REQ_F_ISREG_BIT,
|
REQ_F_ISREG_BIT,
|
||||||
|
REQ_F_POLL_NO_LAZY_BIT,
|
||||||
|
|
||||||
/* not a real bit, just to check we're not overflowing the space */
|
/* not a real bit, just to check we're not overflowing the space */
|
||||||
__REQ_F_LAST_BIT,
|
__REQ_F_LAST_BIT,
|
||||||
@ -501,6 +502,8 @@ enum {
|
|||||||
REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
|
REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
|
||||||
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
|
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
|
||||||
REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
|
REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
|
||||||
|
/* don't use lazy poll wake for this request */
|
||||||
|
REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT),
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
|
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
|
||||||
|
@ -366,11 +366,16 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
|||||||
|
|
||||||
static void __io_poll_execute(struct io_kiocb *req, int mask)
|
static void __io_poll_execute(struct io_kiocb *req, int mask)
|
||||||
{
|
{
|
||||||
|
unsigned flags = 0;
|
||||||
|
|
||||||
io_req_set_res(req, mask, 0);
|
io_req_set_res(req, mask, 0);
|
||||||
req->io_task_work.func = io_poll_task_func;
|
req->io_task_work.func = io_poll_task_func;
|
||||||
|
|
||||||
trace_io_uring_task_add(req, mask);
|
trace_io_uring_task_add(req, mask);
|
||||||
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
|
|
||||||
|
if (!(req->flags & REQ_F_POLL_NO_LAZY))
|
||||||
|
flags = IOU_F_TWQ_LAZY_WAKE;
|
||||||
|
__io_req_task_work_add(req, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_poll_execute(struct io_kiocb *req, int res)
|
static inline void io_poll_execute(struct io_kiocb *req, int res)
|
||||||
@ -526,10 +531,19 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
|
|||||||
poll->head = head;
|
poll->head = head;
|
||||||
poll->wait.private = (void *) wqe_private;
|
poll->wait.private = (void *) wqe_private;
|
||||||
|
|
||||||
if (poll->events & EPOLLEXCLUSIVE)
|
if (poll->events & EPOLLEXCLUSIVE) {
|
||||||
|
/*
|
||||||
|
* Exclusive waits may only wake a limited amount of entries
|
||||||
|
* rather than all of them, this may interfere with lazy
|
||||||
|
* wake if someone does wait(events > 1). Ensure we don't do
|
||||||
|
* lazy wake for those, as we need to process each one as they
|
||||||
|
* come in.
|
||||||
|
*/
|
||||||
|
req->flags |= REQ_F_POLL_NO_LAZY;
|
||||||
add_wait_queue_exclusive(head, &poll->wait);
|
add_wait_queue_exclusive(head, &poll->wait);
|
||||||
else
|
} else {
|
||||||
add_wait_queue(head, &poll->wait);
|
add_wait_queue(head, &poll->wait);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
|
static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
|
||||||
|
Loading…
Reference in New Issue
Block a user