for-6.11/io_uring-20240714
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmaTgusQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpr+1EAC4I7pRAM341sfmhe/9QQKMM8VzGwy5Tlr1 AFLO3BujRTl6X8S9fQjIjN1coW6u4F42I19+vVlxqvB7CUnqt9VWpexEjxe4K0FR R+hIZW+fWV9K/eMrcsLcI7oReN5kIihHOzzy3wz0rENoGB5dCl6JAZMHDUCSqP0/ ZJJQ5ut8ah20Y/myHnzP5o4TfdE7nGo73Di2YoE2g3KqeX/dlAKW9+5hqKzzrHhM 2U25k/6KLy0ROzKpy2qW0QRE3pT5udoHLK2ue9+XwXF8JWVTlfVkHBzGY7NstyyT z07SEzW1q4xV1HdCwGDAU7cL2NJMRXSG0p2WZTm8QyaVTdsZQvEx08GLsVdLvFH5 Gg+oOaxVE+INzW+/Lwz7lFHgq6XEjdAlEAOXDtGkZoni6Rt6iCzFCW6RTf/guy8o Cub7tatMyegxai9+FTN/oFVoydRR0tsMf0OHrWnLOperh9CaxAwXvmKFeT/UTwiB KIuIOJop7aThJbiV42a/xwTrEjNMZRv6uVBBEtJX3rxpmIhqTbjcAv9rKMmgtLMk s6yX1MvYdOLhhEDyoUBX0dJdEETBf3KbnYIwi8kb4Sbkw/ZDgnkmSxFysom61wUF byAFEpah3ZFR8aES0uNKUE6UHK6i5qqp0Za/n6gA927E/WGCU9ndaS+01gyknog0 8FqFYwruHQ== =50CO -----END PGP SIGNATURE----- Merge tag 'for-6.11/io_uring-20240714' of git://git.kernel.dk/linux Pull io_uring updates from Jens Axboe: "Here are the io_uring updates queued up for 6.11. Nothing major this time around, various minor improvements and cleanups/fixes. This contains: - Add bind/listen opcodes. Main motivation is to support direct descriptors, to avoid needing a regular fd just for doing these two operations (Gabriel) - Probe fixes (Gabriel) - Treat io-wq work flags as atomics. Not fixing a real issue, but may as well and it silences a KCSAN warning (me) - Cleanup of rsrc __set_current_state() usage (me) - Add 64-bit for {m,f}advise operations (me) - Improve performance of data ring messages (me) - Fix for ring message overflow posting (Pavel) - Fix for freezer interaction with TWA_NOTIFY_SIGNAL. Not strictly an io_uring thing, but since TWA_NOTIFY_SIGNAL was originally added for faster task_work signaling for io_uring, bundling it with this pull (Pavel) - Add Pavel as a co-maintainer - Various cleanups (me, Thorsten)" * tag 'for-6.11/io_uring-20240714' of git://git.kernel.dk/linux: (28 commits) io_uring/net: check socket is valid in io_bind()/io_listen() kernel: rerun task_work while freezing in get_signal() io_uring/io-wq: limit retrying worker initialisation io_uring/napi: Remove unnecessary s64 cast io_uring/net: cleanup io_recv_finish() bundle handling io_uring/msg_ring: fix overflow posting MAINTAINERS: change Pavel Begunkov from io_uring reviewer to maintainer io_uring/msg_ring: use kmem_cache_free() to free request io_uring/msg_ring: check for dead submitter task io_uring/msg_ring: add an alloc cache for io_kiocb entries io_uring/msg_ring: improve handling of target CQE posting io_uring: add io_add_aux_cqe() helper io_uring: add remote task_work execution helper io_uring/msg_ring: tighten requirement for remote posting io_uring: Allocate only necessary memory in io_probe io_uring: Fix probe of disabled operations io_uring: Introduce IORING_OP_LISTEN io_uring: Introduce IORING_OP_BIND net: Split a __sys_listen helper for io_uring net: Split a __sys_bind helper for io_uring ...
This commit is contained in:
commit
3a56e24173
@ -11551,7 +11551,7 @@ F: include/linux/iosys-map.h
|
||||
|
||||
IO_URING
|
||||
M: Jens Axboe <axboe@kernel.dk>
|
||||
R: Pavel Begunkov <asml.silence@gmail.com>
|
||||
M: Pavel Begunkov <asml.silence@gmail.com>
|
||||
L: io-uring@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.dk/linux-block
|
||||
|
@ -50,7 +50,7 @@ struct io_wq_work_list {
|
||||
|
||||
struct io_wq_work {
|
||||
struct io_wq_work_node list;
|
||||
unsigned flags;
|
||||
atomic_t flags;
|
||||
/* place it here instead of io_kiocb as it fills padding and saves 4B */
|
||||
int cancel_seq;
|
||||
};
|
||||
@ -210,14 +210,6 @@ struct io_submit_state {
|
||||
struct blk_plug plug;
|
||||
};
|
||||
|
||||
struct io_ev_fd {
|
||||
struct eventfd_ctx *cq_ev_fd;
|
||||
unsigned int eventfd_async: 1;
|
||||
struct rcu_head rcu;
|
||||
atomic_t refs;
|
||||
atomic_t ops;
|
||||
};
|
||||
|
||||
struct io_alloc_cache {
|
||||
void **entries;
|
||||
unsigned int nr_cached;
|
||||
@ -372,7 +364,6 @@ struct io_ring_ctx {
|
||||
struct io_restriction restrictions;
|
||||
|
||||
/* slow path rsrc auxilary data, used by update/register */
|
||||
struct io_mapped_ubuf *dummy_ubuf;
|
||||
struct io_rsrc_data *file_data;
|
||||
struct io_rsrc_data *buf_data;
|
||||
|
||||
@ -405,6 +396,9 @@ struct io_ring_ctx {
|
||||
struct callback_head poll_wq_task_work;
|
||||
struct list_head defer_list;
|
||||
|
||||
struct io_alloc_cache msg_cache;
|
||||
spinlock_t msg_lock;
|
||||
|
||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||
struct list_head napi_list; /* track busy poll napi_id */
|
||||
spinlock_t napi_lock; /* napi_list lock */
|
||||
|
@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
|
||||
extern int __sys_socket(int family, int type, int protocol);
|
||||
extern struct file *__sys_socket_file(int family, int type, int protocol);
|
||||
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
|
||||
extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
|
||||
int addrlen);
|
||||
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
|
||||
int addrlen, int file_flags);
|
||||
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
|
||||
int addrlen);
|
||||
extern int __sys_listen(int fd, int backlog);
|
||||
extern int __sys_listen_socket(struct socket *sock, int backlog);
|
||||
extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
|
||||
int __user *usockaddr_len);
|
||||
extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
|
||||
|
@ -257,6 +257,8 @@ enum io_uring_op {
|
||||
IORING_OP_FUTEX_WAITV,
|
||||
IORING_OP_FIXED_FD_INSTALL,
|
||||
IORING_OP_FTRUNCATE,
|
||||
IORING_OP_BIND,
|
||||
IORING_OP_LISTEN,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
|
@ -4,9 +4,9 @@
|
||||
|
||||
obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
|
||||
tctx.o filetable.o rw.o net.o poll.o \
|
||||
uring_cmd.o openclose.o sqpoll.o \
|
||||
xattr.o nop.o fs.o splice.o sync.o \
|
||||
msg_ring.o advise.o openclose.o \
|
||||
eventfd.o uring_cmd.o openclose.o \
|
||||
sqpoll.o xattr.o nop.o fs.o splice.o \
|
||||
sync.o msg_ring.o advise.o openclose.o \
|
||||
epoll.o statx.o timeout.o fdinfo.o \
|
||||
cancel.o waitid.o register.o \
|
||||
truncate.o memmap.o
|
||||
|
@ -17,14 +17,14 @@
|
||||
struct io_fadvise {
|
||||
struct file *file;
|
||||
u64 offset;
|
||||
u32 len;
|
||||
u64 len;
|
||||
u32 advice;
|
||||
};
|
||||
|
||||
struct io_madvise {
|
||||
struct file *file;
|
||||
u64 addr;
|
||||
u32 len;
|
||||
u64 len;
|
||||
u32 advice;
|
||||
};
|
||||
|
||||
@ -33,11 +33,13 @@ int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
|
||||
struct io_madvise *ma = io_kiocb_to_cmd(req, struct io_madvise);
|
||||
|
||||
if (sqe->buf_index || sqe->off || sqe->splice_fd_in)
|
||||
if (sqe->buf_index || sqe->splice_fd_in)
|
||||
return -EINVAL;
|
||||
|
||||
ma->addr = READ_ONCE(sqe->addr);
|
||||
ma->len = READ_ONCE(sqe->len);
|
||||
ma->len = READ_ONCE(sqe->off);
|
||||
if (!ma->len)
|
||||
ma->len = READ_ONCE(sqe->len);
|
||||
ma->advice = READ_ONCE(sqe->fadvise_advice);
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
return 0;
|
||||
@ -78,11 +80,13 @@ int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_fadvise *fa = io_kiocb_to_cmd(req, struct io_fadvise);
|
||||
|
||||
if (sqe->buf_index || sqe->addr || sqe->splice_fd_in)
|
||||
if (sqe->buf_index || sqe->splice_fd_in)
|
||||
return -EINVAL;
|
||||
|
||||
fa->offset = READ_ONCE(sqe->off);
|
||||
fa->len = READ_ONCE(sqe->len);
|
||||
fa->len = READ_ONCE(sqe->addr);
|
||||
if (!fa->len)
|
||||
fa->len = READ_ONCE(sqe->len);
|
||||
fa->advice = READ_ONCE(sqe->fadvise_advice);
|
||||
if (io_fadvise_force_async(fa))
|
||||
req->flags |= REQ_F_FORCE_ASYNC;
|
||||
|
160
io_uring/eventfd.c
Normal file
160
io_uring/eventfd.c
Normal file
@ -0,0 +1,160 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/eventpoll.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
#include "io-wq.h"
|
||||
#include "eventfd.h"
|
||||
|
||||
struct io_ev_fd {
|
||||
struct eventfd_ctx *cq_ev_fd;
|
||||
unsigned int eventfd_async: 1;
|
||||
struct rcu_head rcu;
|
||||
atomic_t refs;
|
||||
atomic_t ops;
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_EVENTFD_OP_SIGNAL_BIT,
|
||||
};
|
||||
|
||||
static void io_eventfd_free(struct rcu_head *rcu)
|
||||
{
|
||||
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
||||
|
||||
eventfd_ctx_put(ev_fd->cq_ev_fd);
|
||||
kfree(ev_fd);
|
||||
}
|
||||
|
||||
static void io_eventfd_do_signal(struct rcu_head *rcu)
|
||||
{
|
||||
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
||||
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
|
||||
if (atomic_dec_and_test(&ev_fd->refs))
|
||||
io_eventfd_free(rcu);
|
||||
}
|
||||
|
||||
void io_eventfd_signal(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd = NULL;
|
||||
|
||||
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
|
||||
return;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
/*
|
||||
* rcu_dereference ctx->io_ev_fd once and use it for both for checking
|
||||
* and eventfd_signal
|
||||
*/
|
||||
ev_fd = rcu_dereference(ctx->io_ev_fd);
|
||||
|
||||
/*
|
||||
* Check again if ev_fd exists incase an io_eventfd_unregister call
|
||||
* completed between the NULL check of ctx->io_ev_fd at the start of
|
||||
* the function and rcu_read_lock.
|
||||
*/
|
||||
if (unlikely(!ev_fd))
|
||||
return;
|
||||
if (!atomic_inc_not_zero(&ev_fd->refs))
|
||||
return;
|
||||
if (ev_fd->eventfd_async && !io_wq_current_is_worker())
|
||||
goto out;
|
||||
|
||||
if (likely(eventfd_signal_allowed())) {
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
} else {
|
||||
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
|
||||
call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
|
||||
return;
|
||||
}
|
||||
}
|
||||
out:
|
||||
if (atomic_dec_and_test(&ev_fd->refs))
|
||||
call_rcu(&ev_fd->rcu, io_eventfd_free);
|
||||
}
|
||||
|
||||
void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
|
||||
{
|
||||
bool skip;
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
|
||||
/*
|
||||
* Eventfd should only get triggered when at least one event has been
|
||||
* posted. Some applications rely on the eventfd notification count
|
||||
* only changing IFF a new CQE has been added to the CQ ring. There's
|
||||
* no depedency on 1:1 relationship between how many times this
|
||||
* function is called (and hence the eventfd count) and number of CQEs
|
||||
* posted to the CQ ring.
|
||||
*/
|
||||
skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
|
||||
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
if (skip)
|
||||
return;
|
||||
|
||||
io_eventfd_signal(ctx);
|
||||
}
|
||||
|
||||
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int eventfd_async)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
__s32 __user *fds = arg;
|
||||
int fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd)
|
||||
return -EBUSY;
|
||||
|
||||
if (copy_from_user(&fd, fds, sizeof(*fds)))
|
||||
return -EFAULT;
|
||||
|
||||
ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
|
||||
if (!ev_fd)
|
||||
return -ENOMEM;
|
||||
|
||||
ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(ev_fd->cq_ev_fd)) {
|
||||
int ret = PTR_ERR(ev_fd->cq_ev_fd);
|
||||
kfree(ev_fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
ev_fd->eventfd_async = eventfd_async;
|
||||
ctx->has_evfd = true;
|
||||
atomic_set(&ev_fd->refs, 1);
|
||||
atomic_set(&ev_fd->ops, 0);
|
||||
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd) {
|
||||
ctx->has_evfd = false;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, NULL);
|
||||
if (atomic_dec_and_test(&ev_fd->refs))
|
||||
call_rcu(&ev_fd->rcu, io_eventfd_free);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
8
io_uring/eventfd.h
Normal file
8
io_uring/eventfd.h
Normal file
@ -0,0 +1,8 @@
|
||||
|
||||
struct io_ring_ctx;
|
||||
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int eventfd_async);
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx);
|
||||
|
||||
void io_eventfd_flush_signal(struct io_ring_ctx *ctx);
|
||||
void io_eventfd_signal(struct io_ring_ctx *ctx);
|
@ -23,6 +23,7 @@
|
||||
#include "io_uring.h"
|
||||
|
||||
#define WORKER_IDLE_TIMEOUT (5 * HZ)
|
||||
#define WORKER_INIT_LIMIT 3
|
||||
|
||||
enum {
|
||||
IO_WORKER_F_UP = 0, /* up and active */
|
||||
@ -58,6 +59,7 @@ struct io_worker {
|
||||
|
||||
unsigned long create_state;
|
||||
struct callback_head create_work;
|
||||
int init_retries;
|
||||
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
@ -159,7 +161,7 @@ static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound)
|
||||
static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND));
|
||||
return io_get_acct(wq, !(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND));
|
||||
}
|
||||
|
||||
static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
|
||||
@ -451,7 +453,7 @@ static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker)
|
||||
|
||||
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
|
||||
{
|
||||
return work->flags >> IO_WQ_HASH_SHIFT;
|
||||
return atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT;
|
||||
}
|
||||
|
||||
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
|
||||
@ -592,8 +594,9 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
|
||||
|
||||
next_hashed = wq_next_work(work);
|
||||
|
||||
if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND))
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
if (do_kill &&
|
||||
(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND))
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
wq->do_work(work);
|
||||
io_assign_current_work(worker, NULL);
|
||||
|
||||
@ -744,7 +747,7 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool io_should_retry_thread(long err)
|
||||
static inline bool io_should_retry_thread(struct io_worker *worker, long err)
|
||||
{
|
||||
/*
|
||||
* Prevent perpetual task_work retry, if the task (or its group) is
|
||||
@ -752,6 +755,8 @@ static inline bool io_should_retry_thread(long err)
|
||||
*/
|
||||
if (fatal_signal_pending(current))
|
||||
return false;
|
||||
if (worker->init_retries++ >= WORKER_INIT_LIMIT)
|
||||
return false;
|
||||
|
||||
switch (err) {
|
||||
case -EAGAIN:
|
||||
@ -778,7 +783,7 @@ static void create_worker_cont(struct callback_head *cb)
|
||||
io_init_new_worker(wq, worker, tsk);
|
||||
io_worker_release(worker);
|
||||
return;
|
||||
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
|
||||
} else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
|
||||
struct io_wq_acct *acct = io_wq_get_acct(worker);
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
@ -845,7 +850,7 @@ fail:
|
||||
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wq, worker, tsk);
|
||||
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
|
||||
} else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
|
||||
kfree(worker);
|
||||
goto fail;
|
||||
} else {
|
||||
@ -891,7 +896,7 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
||||
static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
|
||||
{
|
||||
do {
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
wq->do_work(work);
|
||||
work = wq->free_work(work);
|
||||
} while (work);
|
||||
@ -926,7 +931,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
|
||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
{
|
||||
struct io_wq_acct *acct = io_work_get_acct(wq, work);
|
||||
unsigned long work_flags = work->flags;
|
||||
unsigned int work_flags = atomic_read(&work->flags);
|
||||
struct io_cb_cancel_data match = {
|
||||
.fn = io_wq_work_match_item,
|
||||
.data = work,
|
||||
@ -939,7 +944,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
* been marked as one that should not get executed, cancel it here.
|
||||
*/
|
||||
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
|
||||
(work->flags & IO_WQ_WORK_CANCEL)) {
|
||||
(work_flags & IO_WQ_WORK_CANCEL)) {
|
||||
io_run_cancel(work, wq);
|
||||
return;
|
||||
}
|
||||
@ -982,7 +987,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
|
||||
unsigned int bit;
|
||||
|
||||
bit = hash_ptr(val, IO_WQ_HASH_ORDER);
|
||||
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
|
||||
atomic_or(IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT), &work->flags);
|
||||
}
|
||||
|
||||
static bool __io_wq_worker_cancel(struct io_worker *worker,
|
||||
@ -990,7 +995,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
if (work && match->fn(work, match->data)) {
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
__set_notify_signal(worker->task);
|
||||
return true;
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ bool io_wq_worker_stopped(void);
|
||||
|
||||
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||
{
|
||||
return work->flags & IO_WQ_WORK_HASHED;
|
||||
return atomic_read(&work->flags) & IO_WQ_WORK_HASHED;
|
||||
}
|
||||
|
||||
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
||||
|
@ -95,12 +95,14 @@
|
||||
#include "futex.h"
|
||||
#include "napi.h"
|
||||
#include "uring_cmd.h"
|
||||
#include "msg_ring.h"
|
||||
#include "memmap.h"
|
||||
|
||||
#include "timeout.h"
|
||||
#include "poll.h"
|
||||
#include "rw.h"
|
||||
#include "alloc_cache.h"
|
||||
#include "eventfd.h"
|
||||
|
||||
#define IORING_MAX_ENTRIES 32768
|
||||
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
|
||||
@ -314,6 +316,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
||||
sizeof(struct io_async_rw));
|
||||
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
|
||||
sizeof(struct uring_cache));
|
||||
spin_lock_init(&ctx->msg_lock);
|
||||
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
|
||||
sizeof(struct io_kiocb));
|
||||
ret |= io_futex_cache_init(ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -350,6 +355,7 @@ err:
|
||||
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
|
||||
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
|
||||
io_alloc_cache_free(&ctx->uring_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
|
||||
io_futex_cache_free(ctx);
|
||||
kfree(ctx->cancel_table.hbs);
|
||||
kfree(ctx->cancel_table_locked.hbs);
|
||||
@ -461,9 +467,9 @@ static void io_prep_async_work(struct io_kiocb *req)
|
||||
}
|
||||
|
||||
req->work.list.next = NULL;
|
||||
req->work.flags = 0;
|
||||
atomic_set(&req->work.flags, 0);
|
||||
if (req->flags & REQ_F_FORCE_ASYNC)
|
||||
req->work.flags |= IO_WQ_WORK_CONCURRENT;
|
||||
atomic_or(IO_WQ_WORK_CONCURRENT, &req->work.flags);
|
||||
|
||||
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
|
||||
req->flags |= io_file_get_flags(req->file);
|
||||
@ -479,7 +485,7 @@ static void io_prep_async_work(struct io_kiocb *req)
|
||||
io_wq_hash_work(&req->work, file_inode(req->file));
|
||||
} else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
|
||||
if (def->unbound_nonreg_file)
|
||||
req->work.flags |= IO_WQ_WORK_UNBOUND;
|
||||
atomic_or(IO_WQ_WORK_UNBOUND, &req->work.flags);
|
||||
}
|
||||
}
|
||||
|
||||
@ -519,7 +525,7 @@ static void io_queue_iowq(struct io_kiocb *req)
|
||||
* worker for it).
|
||||
*/
|
||||
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
|
||||
req->work.flags |= IO_WQ_WORK_CANCEL;
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);
|
||||
|
||||
trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
|
||||
io_wq_enqueue(tctx->io_wq, &req->work);
|
||||
@ -541,84 +547,6 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
void io_eventfd_ops(struct rcu_head *rcu)
|
||||
{
|
||||
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
|
||||
int ops = atomic_xchg(&ev_fd->ops, 0);
|
||||
|
||||
if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
|
||||
/* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
|
||||
* ordering in a race but if references are 0 we know we have to free
|
||||
* it regardless.
|
||||
*/
|
||||
if (atomic_dec_and_test(&ev_fd->refs)) {
|
||||
eventfd_ctx_put(ev_fd->cq_ev_fd);
|
||||
kfree(ev_fd);
|
||||
}
|
||||
}
|
||||
|
||||
static void io_eventfd_signal(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* rcu_dereference ctx->io_ev_fd once and use it for both for checking
|
||||
* and eventfd_signal
|
||||
*/
|
||||
ev_fd = rcu_dereference(ctx->io_ev_fd);
|
||||
|
||||
/*
|
||||
* Check again if ev_fd exists incase an io_eventfd_unregister call
|
||||
* completed between the NULL check of ctx->io_ev_fd at the start of
|
||||
* the function and rcu_read_lock.
|
||||
*/
|
||||
if (unlikely(!ev_fd))
|
||||
goto out;
|
||||
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
|
||||
goto out;
|
||||
if (ev_fd->eventfd_async && !io_wq_current_is_worker())
|
||||
goto out;
|
||||
|
||||
if (likely(eventfd_signal_allowed())) {
|
||||
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
|
||||
} else {
|
||||
atomic_inc(&ev_fd->refs);
|
||||
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
|
||||
call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops);
|
||||
else
|
||||
atomic_dec(&ev_fd->refs);
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
|
||||
{
|
||||
bool skip;
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
|
||||
/*
|
||||
* Eventfd should only get triggered when at least one event has been
|
||||
* posted. Some applications rely on the eventfd notification count
|
||||
* only changing IFF a new CQE has been added to the CQ ring. There's
|
||||
* no depedency on 1:1 relationship between how many times this
|
||||
* function is called (and hence the eventfd count) and number of CQEs
|
||||
* posted to the CQ ring.
|
||||
*/
|
||||
skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
|
||||
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
if (skip)
|
||||
return;
|
||||
|
||||
io_eventfd_signal(ctx);
|
||||
}
|
||||
|
||||
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (ctx->poll_activated)
|
||||
@ -878,19 +806,42 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool __io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res,
|
||||
u32 cflags)
|
||||
{
|
||||
bool filled;
|
||||
|
||||
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
|
||||
if (!filled)
|
||||
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
|
||||
|
||||
return filled;
|
||||
}
|
||||
|
||||
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
|
||||
{
|
||||
bool filled;
|
||||
|
||||
io_cq_lock(ctx);
|
||||
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
|
||||
if (!filled)
|
||||
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
|
||||
|
||||
filled = __io_post_aux_cqe(ctx, user_data, res, cflags);
|
||||
io_cq_unlock_post(ctx);
|
||||
return filled;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called from inline task_work so we now a flush will happen later,
|
||||
* and obviously with ctx->uring_lock held (tw always has that).
|
||||
*/
|
||||
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
|
||||
{
|
||||
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
|
||||
spin_lock(&ctx->completion_lock);
|
||||
io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
ctx->submit_state.cq_flush = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* A helper for multishot requests posting additional CQEs.
|
||||
* Should only be used from a task_work including IO_URING_F_MULTISHOT.
|
||||
@ -1175,9 +1126,10 @@ void tctx_task_work(struct callback_head *cb)
|
||||
WARN_ON_ONCE(ret);
|
||||
}
|
||||
|
||||
static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
|
||||
static inline void io_req_local_work_add(struct io_kiocb *req,
|
||||
struct io_ring_ctx *ctx,
|
||||
unsigned flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
unsigned nr_wait, nr_tw, nr_tw_prev;
|
||||
struct llist_node *head;
|
||||
|
||||
@ -1191,6 +1143,8 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
|
||||
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
|
||||
flags &= ~IOU_F_TWQ_LAZY_WAKE;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
head = READ_ONCE(ctx->work_llist.first);
|
||||
do {
|
||||
nr_tw_prev = 0;
|
||||
@ -1272,13 +1226,18 @@ static void io_req_normal_work_add(struct io_kiocb *req)
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
|
||||
{
|
||||
if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
|
||||
rcu_read_lock();
|
||||
io_req_local_work_add(req, flags);
|
||||
rcu_read_unlock();
|
||||
} else {
|
||||
if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN)
|
||||
io_req_local_work_add(req, req->ctx, flags);
|
||||
else
|
||||
io_req_normal_work_add(req);
|
||||
}
|
||||
}
|
||||
|
||||
void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
|
||||
unsigned flags)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)))
|
||||
return;
|
||||
io_req_local_work_add(req, ctx, flags);
|
||||
}
|
||||
|
||||
static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
|
||||
@ -1467,7 +1426,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
|
||||
}
|
||||
__io_cq_unlock_post(ctx);
|
||||
|
||||
if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
|
||||
if (!wq_list_empty(&state->compl_reqs)) {
|
||||
io_free_batch_list(ctx, state->compl_reqs.first);
|
||||
INIT_WQ_LIST(&state->compl_reqs);
|
||||
}
|
||||
@ -1813,14 +1772,14 @@ void io_wq_submit_work(struct io_wq_work *work)
|
||||
io_arm_ltimeout(req);
|
||||
|
||||
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
|
||||
if (work->flags & IO_WQ_WORK_CANCEL) {
|
||||
if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
|
||||
fail:
|
||||
io_req_task_queue_fail(req, err);
|
||||
return;
|
||||
}
|
||||
if (!io_assign_file(req, def, issue_flags)) {
|
||||
err = -EBADF;
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -2649,6 +2608,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
|
||||
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
|
||||
io_alloc_cache_free(&ctx->uring_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
|
||||
io_futex_cache_free(ctx);
|
||||
io_destroy_buffers(ctx);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
|
@ -65,6 +65,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow);
|
||||
int io_run_task_work_sig(struct io_ring_ctx *ctx);
|
||||
void io_req_defer_failed(struct io_kiocb *req, s32 res);
|
||||
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
|
||||
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
|
||||
bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags);
|
||||
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
|
||||
|
||||
@ -73,6 +74,8 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
unsigned issue_flags);
|
||||
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
|
||||
void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
|
||||
unsigned flags);
|
||||
bool io_alloc_async_data(struct io_kiocb *req);
|
||||
void io_req_task_queue(struct io_kiocb *req);
|
||||
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
@ -104,12 +107,6 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
|
||||
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
|
||||
bool cancel_all);
|
||||
|
||||
enum {
|
||||
IO_EVENTFD_OP_SIGNAL_BIT,
|
||||
IO_EVENTFD_OP_FREE_BIT,
|
||||
};
|
||||
|
||||
void io_eventfd_ops(struct rcu_head *rcu);
|
||||
void io_activate_pollwq(struct io_ring_ctx *ctx);
|
||||
|
||||
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
|
@ -11,9 +11,9 @@
|
||||
#include "io_uring.h"
|
||||
#include "rsrc.h"
|
||||
#include "filetable.h"
|
||||
#include "alloc_cache.h"
|
||||
#include "msg_ring.h"
|
||||
|
||||
|
||||
/* All valid masks for MSG_RING */
|
||||
#define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
|
||||
IORING_MSG_RING_FLAGS_PASS)
|
||||
@ -68,59 +68,70 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
|
||||
|
||||
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
|
||||
{
|
||||
if (!target_ctx->task_complete)
|
||||
return false;
|
||||
return current != target_ctx->submitter_task;
|
||||
return target_ctx->task_complete;
|
||||
}
|
||||
|
||||
static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
|
||||
static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->file->private_data;
|
||||
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
|
||||
struct task_struct *task = READ_ONCE(ctx->submitter_task);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
if (unlikely(!task))
|
||||
return -EOWNERDEAD;
|
||||
|
||||
init_task_work(&msg->tw, func);
|
||||
if (task_work_add(task, &msg->tw, TWA_SIGNAL))
|
||||
return -EOWNERDEAD;
|
||||
|
||||
return IOU_ISSUE_SKIP_COMPLETE;
|
||||
}
|
||||
|
||||
static void io_msg_tw_complete(struct callback_head *head)
|
||||
{
|
||||
struct io_msg *msg = container_of(head, struct io_msg, tw);
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(msg);
|
||||
struct io_ring_ctx *target_ctx = req->file->private_data;
|
||||
int ret = 0;
|
||||
|
||||
if (current->flags & PF_EXITING) {
|
||||
ret = -EOWNERDEAD;
|
||||
} else {
|
||||
u32 flags = 0;
|
||||
|
||||
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
|
||||
flags = msg->cqe_flags;
|
||||
|
||||
/*
|
||||
* If the target ring is using IOPOLL mode, then we need to be
|
||||
* holding the uring_lock for posting completions. Other ring
|
||||
* types rely on the regular completion locking, which is
|
||||
* handled while posting.
|
||||
*/
|
||||
if (target_ctx->flags & IORING_SETUP_IOPOLL)
|
||||
mutex_lock(&target_ctx->uring_lock);
|
||||
if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
|
||||
ret = -EOVERFLOW;
|
||||
if (target_ctx->flags & IORING_SETUP_IOPOLL)
|
||||
mutex_unlock(&target_ctx->uring_lock);
|
||||
io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
|
||||
if (spin_trylock(&ctx->msg_lock)) {
|
||||
if (io_alloc_cache_put(&ctx->msg_cache, req))
|
||||
req = NULL;
|
||||
spin_unlock(&ctx->msg_lock);
|
||||
}
|
||||
if (req)
|
||||
kmem_cache_free(req_cachep, req);
|
||||
percpu_ref_put(&ctx->refs);
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_queue_tw_complete(req, ret);
|
||||
static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
||||
int res, u32 cflags, u64 user_data)
|
||||
{
|
||||
req->task = READ_ONCE(ctx->submitter_task);
|
||||
if (!req->task) {
|
||||
kmem_cache_free(req_cachep, req);
|
||||
return -EOWNERDEAD;
|
||||
}
|
||||
req->cqe.user_data = user_data;
|
||||
io_req_set_res(req, res, cflags);
|
||||
percpu_ref_get(&ctx->refs);
|
||||
req->ctx = ctx;
|
||||
req->io_task_work.func = io_msg_tw_complete;
|
||||
io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_kiocb *req = NULL;
|
||||
|
||||
if (spin_trylock(&ctx->msg_lock)) {
|
||||
req = io_alloc_cache_get(&ctx->msg_cache);
|
||||
spin_unlock(&ctx->msg_lock);
|
||||
}
|
||||
if (req)
|
||||
return req;
|
||||
return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN);
|
||||
}
|
||||
|
||||
static int io_msg_data_remote(struct io_kiocb *req)
|
||||
{
|
||||
struct io_ring_ctx *target_ctx = req->file->private_data;
|
||||
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
|
||||
struct io_kiocb *target;
|
||||
u32 flags = 0;
|
||||
|
||||
target = io_msg_get_kiocb(req->ctx);
|
||||
if (unlikely(!target))
|
||||
return -ENOMEM;
|
||||
|
||||
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
|
||||
flags = msg->cqe_flags;
|
||||
|
||||
return io_msg_remote_post(target_ctx, target, msg->len, flags,
|
||||
msg->user_data);
|
||||
}
|
||||
|
||||
static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
|
||||
@ -138,7 +149,7 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
|
||||
return -EBADFD;
|
||||
|
||||
if (io_msg_need_remote(target_ctx))
|
||||
return io_msg_exec_remote(req, io_msg_tw_complete);
|
||||
return io_msg_data_remote(req);
|
||||
|
||||
if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
|
||||
flags = msg->cqe_flags;
|
||||
@ -218,6 +229,22 @@ static void io_msg_tw_fd_complete(struct callback_head *head)
|
||||
io_req_queue_tw_complete(req, ret);
|
||||
}
|
||||
|
||||
static int io_msg_fd_remote(struct io_kiocb *req)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->file->private_data;
|
||||
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
|
||||
struct task_struct *task = READ_ONCE(ctx->submitter_task);
|
||||
|
||||
if (unlikely(!task))
|
||||
return -EOWNERDEAD;
|
||||
|
||||
init_task_work(&msg->tw, io_msg_tw_fd_complete);
|
||||
if (task_work_add(task, &msg->tw, TWA_SIGNAL))
|
||||
return -EOWNERDEAD;
|
||||
|
||||
return IOU_ISSUE_SKIP_COMPLETE;
|
||||
}
|
||||
|
||||
static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *target_ctx = req->file->private_data;
|
||||
@ -240,7 +267,7 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
|
||||
}
|
||||
|
||||
if (io_msg_need_remote(target_ctx))
|
||||
return io_msg_exec_remote(req, io_msg_tw_fd_complete);
|
||||
return io_msg_fd_remote(req);
|
||||
return io_msg_install_complete(req, issue_flags);
|
||||
}
|
||||
|
||||
@ -294,3 +321,10 @@ done:
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
void io_msg_cache_free(const void *entry)
|
||||
{
|
||||
struct io_kiocb *req = (struct io_kiocb *) entry;
|
||||
|
||||
kmem_cache_free(req_cachep, req);
|
||||
}
|
||||
|
@ -3,3 +3,4 @@
|
||||
int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags);
|
||||
void io_msg_ring_cleanup(struct io_kiocb *req);
|
||||
void io_msg_cache_free(const void *entry);
|
||||
|
@ -283,7 +283,7 @@ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iow
|
||||
s64 poll_to_ns = timespec64_to_ns(ts);
|
||||
if (poll_to_ns > 0) {
|
||||
u64 val = poll_to_ns + 999;
|
||||
do_div(val, (s64) 1000);
|
||||
do_div(val, 1000);
|
||||
poll_to = val;
|
||||
}
|
||||
}
|
||||
|
@ -51,6 +51,16 @@ struct io_connect {
|
||||
bool seen_econnaborted;
|
||||
};
|
||||
|
||||
struct io_bind {
|
||||
struct file *file;
|
||||
int addr_len;
|
||||
};
|
||||
|
||||
struct io_listen {
|
||||
struct file *file;
|
||||
int backlog;
|
||||
};
|
||||
|
||||
struct io_sr_msg {
|
||||
struct file *file;
|
||||
union {
|
||||
@ -817,20 +827,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
bool mshot_finished, unsigned issue_flags)
|
||||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
unsigned int cflags;
|
||||
|
||||
if (sr->flags & IORING_RECVSEND_BUNDLE)
|
||||
cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret),
|
||||
issue_flags);
|
||||
else
|
||||
cflags = io_put_kbuf(req, issue_flags);
|
||||
unsigned int cflags = 0;
|
||||
|
||||
if (kmsg->msg.msg_inq > 0)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
/* bundle with no more immediate buffers, we're done */
|
||||
if (sr->flags & IORING_RECVSEND_BUNDLE && req->flags & REQ_F_BL_EMPTY)
|
||||
goto finish;
|
||||
if (sr->flags & IORING_RECVSEND_BUNDLE) {
|
||||
cflags |= io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret),
|
||||
issue_flags);
|
||||
/* bundle with no more immediate buffers, we're done */
|
||||
if (req->flags & REQ_F_BL_EMPTY)
|
||||
goto finish;
|
||||
} else {
|
||||
cflags |= io_put_kbuf(req, issue_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill CQE for this receive and see if we should keep trying to
|
||||
@ -1717,6 +1727,70 @@ out:
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
|
||||
struct sockaddr __user *uaddr;
|
||||
struct io_async_msghdr *io;
|
||||
|
||||
if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
|
||||
return -EINVAL;
|
||||
|
||||
uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
bind->addr_len = READ_ONCE(sqe->addr2);
|
||||
|
||||
io = io_msg_alloc_async(req);
|
||||
if (unlikely(!io))
|
||||
return -ENOMEM;
|
||||
return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
|
||||
}
|
||||
|
||||
int io_bind(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
|
||||
struct io_async_msghdr *io = req->async_data;
|
||||
struct socket *sock;
|
||||
int ret;
|
||||
|
||||
sock = sock_from_file(req->file);
|
||||
if (unlikely(!sock))
|
||||
return -ENOTSOCK;
|
||||
|
||||
ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
|
||||
|
||||
if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
|
||||
return -EINVAL;
|
||||
|
||||
listen->backlog = READ_ONCE(sqe->len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_listen(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
|
||||
struct socket *sock;
|
||||
int ret;
|
||||
|
||||
sock = sock_from_file(req->file);
|
||||
if (unlikely(!sock))
|
||||
return -ENOTSOCK;
|
||||
|
||||
ret = __sys_listen_socket(sock, listen->backlog);
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void io_netmsg_cache_free(const void *entry)
|
||||
{
|
||||
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
|
||||
|
@ -49,6 +49,12 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
void io_send_zc_cleanup(struct io_kiocb *req);
|
||||
|
||||
int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_bind(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_listen(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
void io_netmsg_cache_free(const void *entry);
|
||||
#else
|
||||
static inline void io_netmsg_cache_free(const void *entry)
|
||||
|
@ -495,6 +495,26 @@ const struct io_issue_def io_issue_defs[] = {
|
||||
.prep = io_ftruncate_prep,
|
||||
.issue = io_ftruncate,
|
||||
},
|
||||
[IORING_OP_BIND] = {
|
||||
#if defined(CONFIG_NET)
|
||||
.needs_file = 1,
|
||||
.prep = io_bind_prep,
|
||||
.issue = io_bind,
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_LISTEN] = {
|
||||
#if defined(CONFIG_NET)
|
||||
.needs_file = 1,
|
||||
.prep = io_listen_prep,
|
||||
.issue = io_listen,
|
||||
.async_size = sizeof(struct io_async_msghdr),
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
|
||||
const struct io_cold_def io_cold_defs[] = {
|
||||
@ -716,6 +736,12 @@ const struct io_cold_def io_cold_defs[] = {
|
||||
[IORING_OP_FTRUNCATE] = {
|
||||
.name = "FTRUNCATE",
|
||||
},
|
||||
[IORING_OP_BIND] = {
|
||||
.name = "BIND",
|
||||
},
|
||||
[IORING_OP_LISTEN] = {
|
||||
.name = "LISTEN",
|
||||
},
|
||||
};
|
||||
|
||||
const char *io_uring_get_opcode(u8 opcode)
|
||||
@ -725,6 +751,14 @@ const char *io_uring_get_opcode(u8 opcode)
|
||||
return "INVALID";
|
||||
}
|
||||
|
||||
bool io_uring_op_supported(u8 opcode)
|
||||
{
|
||||
if (opcode < IORING_OP_LAST &&
|
||||
io_issue_defs[opcode].prep != io_eopnotsupp_prep)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void __init io_uring_optable_init(void)
|
||||
{
|
||||
int i;
|
||||
|
@ -17,8 +17,6 @@ struct io_issue_def {
|
||||
unsigned poll_exclusive : 1;
|
||||
/* op supports buffer selection */
|
||||
unsigned buffer_select : 1;
|
||||
/* opcode is not supported by this kernel */
|
||||
unsigned not_supported : 1;
|
||||
/* skip auditing */
|
||||
unsigned audit_skip : 1;
|
||||
/* supports ioprio */
|
||||
@ -47,5 +45,7 @@ struct io_cold_def {
|
||||
extern const struct io_issue_def io_issue_defs[];
|
||||
extern const struct io_cold_def io_cold_defs[];
|
||||
|
||||
bool io_uring_op_supported(u8 opcode);
|
||||
|
||||
void io_uring_optable_init(void);
|
||||
#endif
|
||||
|
@ -27,65 +27,11 @@
|
||||
#include "cancel.h"
|
||||
#include "kbuf.h"
|
||||
#include "napi.h"
|
||||
#include "eventfd.h"
|
||||
|
||||
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
|
||||
IORING_REGISTER_LAST + IORING_OP_LAST)
|
||||
|
||||
static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int eventfd_async)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
__s32 __user *fds = arg;
|
||||
int fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd)
|
||||
return -EBUSY;
|
||||
|
||||
if (copy_from_user(&fd, fds, sizeof(*fds)))
|
||||
return -EFAULT;
|
||||
|
||||
ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
|
||||
if (!ev_fd)
|
||||
return -ENOMEM;
|
||||
|
||||
ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(ev_fd->cq_ev_fd)) {
|
||||
int ret = PTR_ERR(ev_fd->cq_ev_fd);
|
||||
kfree(ev_fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
ev_fd->eventfd_async = eventfd_async;
|
||||
ctx->has_evfd = true;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
|
||||
atomic_set(&ev_fd->refs, 1);
|
||||
atomic_set(&ev_fd->ops, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
|
||||
lockdep_is_held(&ctx->uring_lock));
|
||||
if (ev_fd) {
|
||||
ctx->has_evfd = false;
|
||||
rcu_assign_pointer(ctx->io_ev_fd, NULL);
|
||||
if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops))
|
||||
call_rcu(&ev_fd->rcu, io_eventfd_ops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned nr_args)
|
||||
{
|
||||
@ -93,9 +39,10 @@ static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
|
||||
size_t size;
|
||||
int i, ret;
|
||||
|
||||
if (nr_args > IORING_OP_LAST)
|
||||
nr_args = IORING_OP_LAST;
|
||||
|
||||
size = struct_size(p, ops, nr_args);
|
||||
if (size == SIZE_MAX)
|
||||
return -EOVERFLOW;
|
||||
p = kzalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
@ -108,12 +55,10 @@ static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
|
||||
goto out;
|
||||
|
||||
p->last_op = IORING_OP_LAST - 1;
|
||||
if (nr_args > IORING_OP_LAST)
|
||||
nr_args = IORING_OP_LAST;
|
||||
|
||||
for (i = 0; i < nr_args; i++) {
|
||||
p->ops[i].op = i;
|
||||
if (!io_issue_defs[i].not_supported)
|
||||
if (io_uring_op_supported(i))
|
||||
p->ops[i].flags = IO_URING_OP_SUPPORTED;
|
||||
}
|
||||
p->ops_len = i;
|
||||
|
@ -85,31 +85,6 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
|
||||
void __user *arg, unsigned index)
|
||||
{
|
||||
struct iovec __user *src;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (ctx->compat) {
|
||||
struct compat_iovec __user *ciovs;
|
||||
struct compat_iovec ciov;
|
||||
|
||||
ciovs = (struct compat_iovec __user *) arg;
|
||||
if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
|
||||
return -EFAULT;
|
||||
|
||||
dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
|
||||
dst->iov_len = ciov.iov_len;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
src = (struct iovec __user *) arg;
|
||||
if (copy_from_user(dst, &src[index], sizeof(*dst)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_buffer_validate(struct iovec *iov)
|
||||
{
|
||||
unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
|
||||
@ -249,7 +224,7 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
|
||||
|
||||
ret = io_run_task_work_sig(ctx);
|
||||
if (ret < 0) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
finish_wait(&ctx->rsrc_quiesce_wq, &we);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (list_empty(&ctx->rsrc_ref_list))
|
||||
ret = 0;
|
||||
@ -257,7 +232,6 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
|
||||
}
|
||||
|
||||
schedule();
|
||||
__set_current_state(TASK_RUNNING);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret = 0;
|
||||
} while (!list_empty(&ctx->rsrc_ref_list));
|
||||
@ -420,8 +394,9 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
|
||||
struct io_uring_rsrc_update2 *up,
|
||||
unsigned int nr_args)
|
||||
{
|
||||
struct iovec __user *uvec = u64_to_user_ptr(up->data);
|
||||
u64 __user *tags = u64_to_user_ptr(up->tags);
|
||||
struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
|
||||
struct iovec fast_iov, *iov;
|
||||
struct page *last_hpage = NULL;
|
||||
__u32 done;
|
||||
int i, err;
|
||||
@ -435,21 +410,23 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
|
||||
struct io_mapped_ubuf *imu;
|
||||
u64 tag = 0;
|
||||
|
||||
err = io_copy_iov(ctx, &iov, iovs, done);
|
||||
if (err)
|
||||
iov = iovec_from_user(&uvec[done], 1, 1, &fast_iov, ctx->compat);
|
||||
if (IS_ERR(iov)) {
|
||||
err = PTR_ERR(iov);
|
||||
break;
|
||||
}
|
||||
if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) {
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
err = io_buffer_validate(&iov);
|
||||
err = io_buffer_validate(iov);
|
||||
if (err)
|
||||
break;
|
||||
if (!iov.iov_base && tag) {
|
||||
if (!iov->iov_base && tag) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
|
||||
err = io_sqe_buffer_register(ctx, iov, &imu, &last_hpage);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
@ -971,8 +948,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
{
|
||||
struct page *last_hpage = NULL;
|
||||
struct io_rsrc_data *data;
|
||||
struct iovec fast_iov, *iov = &fast_iov;
|
||||
const struct iovec __user *uvec = (struct iovec * __user) arg;
|
||||
int i, ret;
|
||||
struct iovec iov;
|
||||
|
||||
BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
|
||||
|
||||
@ -989,24 +967,27 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!arg)
|
||||
memset(iov, 0, sizeof(*iov));
|
||||
|
||||
for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
|
||||
if (arg) {
|
||||
ret = io_copy_iov(ctx, &iov, arg, i);
|
||||
iov = iovec_from_user(&uvec[i], 1, 1, &fast_iov, ctx->compat);
|
||||
if (IS_ERR(iov)) {
|
||||
ret = PTR_ERR(iov);
|
||||
break;
|
||||
}
|
||||
ret = io_buffer_validate(iov);
|
||||
if (ret)
|
||||
break;
|
||||
ret = io_buffer_validate(&iov);
|
||||
if (ret)
|
||||
break;
|
||||
} else {
|
||||
memset(&iov, 0, sizeof(iov));
|
||||
}
|
||||
|
||||
if (!iov.iov_base && *io_get_tag_slot(data, i)) {
|
||||
if (!iov->iov_base && *io_get_tag_slot(data, i)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
|
||||
ret = io_sqe_buffer_register(ctx, iov, &ctx->user_bufs[i],
|
||||
&last_hpage);
|
||||
if (ret)
|
||||
break;
|
||||
|
@ -2600,6 +2600,14 @@ static void do_freezer_trap(void)
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
cgroup_enter_frozen();
|
||||
schedule();
|
||||
|
||||
/*
|
||||
* We could've been woken by task_work, run it to clear
|
||||
* TIF_NOTIFY_SIGNAL. The caller will retry if necessary.
|
||||
*/
|
||||
clear_notify_signal();
|
||||
if (unlikely(task_work_pending(current)))
|
||||
task_work_run();
|
||||
}
|
||||
|
||||
static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)
|
||||
|
48
net/socket.c
48
net/socket.c
@ -1822,6 +1822,20 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
|
||||
return __sys_socketpair(family, type, protocol, usockvec);
|
||||
}
|
||||
|
||||
int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
|
||||
int addrlen)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = security_socket_bind(sock, (struct sockaddr *)address,
|
||||
addrlen);
|
||||
if (!err)
|
||||
err = READ_ONCE(sock->ops)->bind(sock,
|
||||
(struct sockaddr *)address,
|
||||
addrlen);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bind a name to a socket. Nothing much to do here since it's
|
||||
* the protocol's responsibility to handle the local address.
|
||||
@ -1839,15 +1853,8 @@ int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
|
||||
sock = sockfd_lookup_light(fd, &err, &fput_needed);
|
||||
if (sock) {
|
||||
err = move_addr_to_kernel(umyaddr, addrlen, &address);
|
||||
if (!err) {
|
||||
err = security_socket_bind(sock,
|
||||
(struct sockaddr *)&address,
|
||||
addrlen);
|
||||
if (!err)
|
||||
err = READ_ONCE(sock->ops)->bind(sock,
|
||||
(struct sockaddr *)
|
||||
&address, addrlen);
|
||||
}
|
||||
if (!err)
|
||||
err = __sys_bind_socket(sock, &address, addrlen);
|
||||
fput_light(sock->file, fput_needed);
|
||||
}
|
||||
return err;
|
||||
@ -1863,23 +1870,28 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
|
||||
* necessary for a listen, and if that works, we mark the socket as
|
||||
* ready for listening.
|
||||
*/
|
||||
int __sys_listen_socket(struct socket *sock, int backlog)
|
||||
{
|
||||
int somaxconn, err;
|
||||
|
||||
somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
|
||||
if ((unsigned int)backlog > somaxconn)
|
||||
backlog = somaxconn;
|
||||
|
||||
err = security_socket_listen(sock, backlog);
|
||||
if (!err)
|
||||
err = READ_ONCE(sock->ops)->listen(sock, backlog);
|
||||
return err;
|
||||
}
|
||||
|
||||
int __sys_listen(int fd, int backlog)
|
||||
{
|
||||
struct socket *sock;
|
||||
int err, fput_needed;
|
||||
int somaxconn;
|
||||
|
||||
sock = sockfd_lookup_light(fd, &err, &fput_needed);
|
||||
if (sock) {
|
||||
somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
|
||||
if ((unsigned int)backlog > somaxconn)
|
||||
backlog = somaxconn;
|
||||
|
||||
err = security_socket_listen(sock, backlog);
|
||||
if (!err)
|
||||
err = READ_ONCE(sock->ops)->listen(sock, backlog);
|
||||
|
||||
err = __sys_listen_socket(sock, backlog);
|
||||
fput_light(sock->file, fput_needed);
|
||||
}
|
||||
return err;
|
||||
|
Loading…
Reference in New Issue
Block a user