1

nvme: use bio_integrity_map_user

Map user metadata buffers directly. Now that the bio tracks the
metadata, nvme doesn't need special metadata handling and tracking with
callbacks and additional fields in the pdu.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20231130215309.2923568-3-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Keith Busch 2023-11-30 13:53:07 -08:00 committed by Jens Axboe
parent 492c5d4559
commit d6aacee925

View File

@ -96,58 +96,6 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
return (void __user *)ptrval;
}
static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
unsigned len, u32 seed)
{
struct bio_integrity_payload *bip;
int ret = -ENOMEM;
void *buf;
struct bio *bio = req->bio;
buf = kmalloc(len, GFP_KERNEL);
if (!buf)
goto out;
if (req_op(req) == REQ_OP_DRV_OUT) {
ret = -EFAULT;
if (copy_from_user(buf, ubuf, len))
goto out_free_meta;
} else {
memset(buf, 0, len);
}
bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
if (IS_ERR(bip)) {
ret = PTR_ERR(bip);
goto out_free_meta;
}
bip->bip_iter.bi_sector = seed;
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf));
if (ret != len) {
ret = -ENOMEM;
goto out_free_meta;
}
req->cmd_flags |= REQ_INTEGRITY;
return buf;
out_free_meta:
kfree(buf);
out:
return ERR_PTR(ret);
}
static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
void *meta, unsigned len, int ret)
{
if (!ret && req_op(req) == REQ_OP_DRV_IN &&
copy_to_user(ubuf, meta, len))
ret = -EFAULT;
kfree(meta);
return ret;
}
static struct request *nvme_alloc_user_request(struct request_queue *q,
struct nvme_command *cmd, blk_opf_t rq_flags,
blk_mq_req_flags_t blk_flags)
@ -164,14 +112,12 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd,
unsigned int flags)
u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
struct bio *bio = NULL;
void *meta = NULL;
int ret;
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
@ -193,18 +139,17 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (ret)
goto out;
bio = req->bio;
if (bdev)
bio_set_dev(bio, bdev);
if (bdev && meta_buffer && meta_len) {
meta = nvme_add_user_metadata(req, meta_buffer, meta_len,
meta_seed);
if (IS_ERR(meta)) {
ret = PTR_ERR(meta);
goto out_unmap;
bio = req->bio;
if (bdev) {
bio_set_dev(bio, bdev);
if (meta_buffer && meta_len) {
ret = bio_integrity_map_user(bio, meta_buffer, meta_len,
meta_seed);
if (ret)
goto out_unmap;
req->cmd_flags |= REQ_INTEGRITY;
}
*metap = meta;
}
return ret;
@ -225,7 +170,6 @@ static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_ns *ns = q->queuedata;
struct nvme_ctrl *ctrl;
struct request *req;
void *meta = NULL;
struct bio *bio;
u32 effects;
int ret;
@ -237,7 +181,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, meta_seed, &meta, NULL, flags);
meta_len, meta_seed, NULL, flags);
if (ret)
return ret;
}
@ -249,9 +193,6 @@ static int nvme_submit_user_cmd(struct request_queue *q,
ret = nvme_execute_rq(req, false);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta)
ret = nvme_finish_user_metadata(req, meta_buffer, meta,
meta_len, ret);
if (bio)
blk_rq_unmap_user(bio);
blk_mq_free_request(req);
@ -446,19 +387,10 @@ struct nvme_uring_data {
* Expect build errors if this grows larger than that.
*/
struct nvme_uring_cmd_pdu {
union {
struct bio *bio;
struct request *req;
};
u32 meta_len;
u32 nvme_status;
union {
struct {
void *meta; /* kernel-resident buffer */
void __user *meta_buffer;
};
u64 result;
} u;
struct request *req;
struct bio *bio;
u64 result;
int status;
};
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
@ -467,31 +399,6 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
}
static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd,
unsigned issue_flags)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
struct request *req = pdu->req;
int status;
u64 result;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
status = -EINTR;
else
status = nvme_req(req)->status;
result = le64_to_cpu(nvme_req(req)->result.u64);
if (pdu->meta_len)
status = nvme_finish_user_metadata(req, pdu->u.meta_buffer,
pdu->u.meta, pdu->meta_len, status);
if (req->bio)
blk_rq_unmap_user(req->bio);
blk_mq_free_request(req);
io_uring_cmd_done(ioucmd, status, result, issue_flags);
}
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
unsigned issue_flags)
{
@ -499,8 +406,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
if (pdu->bio)
blk_rq_unmap_user(pdu->bio);
io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags);
io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags);
}
static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
@ -509,53 +415,24 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
req->bio = pdu->bio;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
pdu->nvme_status = -EINTR;
} else {
pdu->nvme_status = nvme_req(req)->status;
if (!pdu->nvme_status)
pdu->nvme_status = blk_status_to_errno(err);
}
pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64);
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
pdu->status = -EINTR;
else
pdu->status = nvme_req(req)->status;
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
if (blk_rq_is_poll(req)) {
WRITE_ONCE(ioucmd->cookie, NULL);
if (blk_rq_is_poll(req))
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
} else {
else
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
}
return RQ_END_IO_FREE;
}
static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
blk_status_t err)
{
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
req->bio = pdu->bio;
pdu->req = req;
/*
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
if (blk_rq_is_poll(req)) {
WRITE_ONCE(ioucmd->cookie, NULL);
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
} else {
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
}
return RQ_END_IO_NONE;
}
static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{
@ -567,7 +444,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
void *meta = NULL;
int ret;
c.common.opcode = READ_ONCE(cmd->opcode);
@ -615,27 +491,16 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (d.addr && d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, 0, &meta, ioucmd, vec);
d.metadata_len, 0, ioucmd, vec);
if (ret)
return ret;
}
if (blk_rq_is_poll(req)) {
ioucmd->flags |= IORING_URING_CMD_POLLED;
WRITE_ONCE(ioucmd->cookie, req);
}
/* to free bio on completion, as req->bio will be null at that time */
pdu->bio = req->bio;
pdu->meta_len = d.metadata_len;
pdu->req = req;
req->end_io_data = ioucmd;
if (pdu->meta_len) {
pdu->u.meta = meta;
pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata);
req->end_io = nvme_uring_cmd_end_io_meta;
} else {
req->end_io = nvme_uring_cmd_end_io;
}
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;
}
@ -786,16 +651,12 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
struct io_comp_batch *iob,
unsigned int poll_flags)
{
struct request *req;
int ret = 0;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
struct request *req = pdu->req;
if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
return 0;
req = READ_ONCE(ioucmd->cookie);
if (req && blk_rq_is_poll(req))
ret = blk_rq_poll(req, iob, poll_flags);
return ret;
return blk_rq_poll(req, iob, poll_flags);
return 0;
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,