diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3a216f0744dd..05f933dddfde 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3498,9 +3498,9 @@ static void *io_uring_validate_mmap_request(struct file *file, unsigned int bgid; bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; - mutex_lock(&ctx->uring_lock); + rcu_read_lock(); ptr = io_pbuf_get_address(ctx, bgid); - mutex_unlock(&ctx->uring_lock); + rcu_read_unlock(); if (!ptr) return ERR_PTR(-EINVAL); break; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 39d15a27eb92..268788305b61 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -40,19 +40,35 @@ struct io_buf_free { int inuse; }; +static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, + unsigned int bgid) +{ + if (bl && bgid < BGID_ARRAY) + return &bl[bgid]; + + return xa_load(&ctx->io_bl_xa, bgid); +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { - if (ctx->io_bl && bgid < BGID_ARRAY) - return &ctx->io_bl[bgid]; + lockdep_assert_held(&ctx->uring_lock); - return xa_load(&ctx->io_bl_xa, bgid); + return __io_buffer_get_list(ctx, ctx->io_bl, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned int bgid) { + /* + * Store buffer group ID and finally mark the list as visible. + * The normal lookup doesn't care about the visibility as we're + * always under the ->uring_lock, but the RCU lookup from mmap does. + */ bl->bgid = bgid; + smp_store_release(&bl->is_ready, 1); + if (bgid < BGID_ARRAY) return 0; @@ -203,18 +219,19 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, static __cold int io_init_bl_list(struct io_ring_ctx *ctx) { + struct io_buffer_list *bl; int i; - ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), - GFP_KERNEL); - if (!ctx->io_bl) + bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); + if (!bl) return -ENOMEM; for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&ctx->io_bl[i].buf_list); - ctx->io_bl[i].bgid = i; + INIT_LIST_HEAD(&bl[i].buf_list); + bl[i].bgid = i; } + smp_store_release(&ctx->io_bl, bl); return 0; } @@ -303,7 +320,7 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); __io_remove_buffers(ctx, bl, -1U); - kfree(bl); + kfree_rcu(bl, rcu); } /* @@ -497,7 +514,16 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) INIT_LIST_HEAD(&bl->buf_list); ret = io_buffer_add_list(ctx, bl, p->bgid); if (ret) { - kfree(bl); + /* + * Doesn't need rcu free as it was never visible, but + * let's keep it consistent throughout. Also can't + * be a lower indexed array group, as adding one + * where lookup failed cannot happen. + */ + if (p->bgid >= BGID_ARRAY) + kfree_rcu(bl, rcu); + else + WARN_ON_ONCE(1); goto err; } } @@ -636,6 +662,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_buffer_list *bl, *free_bl = NULL; int ret; + lockdep_assert_held(&ctx->uring_lock); + if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; @@ -690,7 +718,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return 0; } - kfree(free_bl); + kfree_rcu(free_bl, rcu); return ret; } @@ -699,6 +727,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_uring_buf_reg reg; struct io_buffer_list *bl; + lockdep_assert_held(&ctx->uring_lock); + if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; if (reg.resv[0] || reg.resv[1] || reg.resv[2]) @@ -715,7 +745,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) __io_remove_buffers(ctx, bl, -1U); if (bl->bgid >= BGID_ARRAY) { xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree(bl); + kfree_rcu(bl, rcu); } return 0; } @@ -724,7 +754,15 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) { struct io_buffer_list *bl; - bl = io_buffer_get_list(ctx, bgid); + bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); + + /* + * Ensure the list is fully setup. Only strictly needed for RCU lookup + * via mmap, and in that case only for the array indexed groups. For + * the xarray lookups, it's either visible and ready, or not at all. + */ + if (!smp_load_acquire(&bl->is_ready)) + return NULL; if (!bl || !bl->is_mmap) return NULL; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 6c7646e6057c..9be5960817ea 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -15,6 +15,7 @@ struct io_buffer_list { struct page **buf_pages; struct io_uring_buf_ring *buf_ring; }; + struct rcu_head rcu; }; __u16 bgid; @@ -28,6 +29,8 @@ struct io_buffer_list { __u8 is_mapped; /* ring mapped provided buffers, but mmap'ed by application */ __u8 is_mmap; + /* bl is visible from an RCU point of view for lookup */ + __u8 is_ready; }; struct io_buffer {