From f54f0d0e2b1f74de85ff02013fa4886e4154aca5 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 17 Oct 2024 10:45:24 -0700 Subject: [PATCH 1/4] nvme: enhance cns version checking The number of CNS bits in the command is specific to the nvme spec version compliance. The existing check is not sufficient for possible CNS values the driver uses that may create confusion between host and device, so enhance the check to consider the version and desired CNS value. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 876c8e6311db..09466e208729 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1390,17 +1390,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, nvme_start_keep_alive(ctrl); } -/* - * In NVMe 1.0 the CNS field was just a binary controller or namespace - * flag, thus sending any new CNS opcodes has a big chance of not working. - * Qemu unfortunately had that bug after reporting a 1.1 version compliance - * (but not for any later version). - */ -static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns) { - if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) - return ctrl->vs < NVME_VS(1, 2, 0); - return ctrl->vs < NVME_VS(1, 1, 0); + /* + * The CNS field occupies a full byte starting with NVMe 1.2 + */ + if (ctrl->vs >= NVME_VS(1, 2, 0)) + return true; + + /* + * NVMe 1.1 expanded the CNS value to two bits, which means values + * larger than that could get truncated and treated as an incorrect + * value. + * + * Qemu implemented 1.0 behavior for controllers claiming 1.1 + * compliance, so they need to be quirked here. + */ + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) + return cns <= 3; + + /* + * NVMe 1.0 used a single bit for the CNS value. + */ + return cns <= 1; } static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) @@ -3104,7 +3117,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) ctrl->max_zeroes_sectors = 0; if (ctrl->subsys->subtype != NVME_NQN_NVME || - nvme_ctrl_limited_cns(ctrl) || + !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) || test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags)) return 0; @@ -4200,7 +4213,7 @@ static void nvme_scan_work(struct work_struct *work) } mutex_lock(&ctrl->scan_lock); - if (nvme_ctrl_limited_cns(ctrl)) { + if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) { nvme_scan_ns_sequential(ctrl); } else { /* From 42ab37eaad17aee458489c553a367621ee04e0bc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 23 Oct 2024 08:40:26 -0700 Subject: [PATCH 2/4] nvme: module parameter to disable pi with offsets A recent commit enables integrity checks for formats the previous kernel versions registered with the "nop" integrity profile. This means namespaces using that format become unreadable when upgrading the kernel past that commit. Introduce a module parameter to restore the "nop" integrity profile so that storage can be readable once again. This could be a boot device, so the setting needs to happen at module load time. Fixes: 921e81db524d17 ("nvme: allow integrity when PI is not in first bytes") Reported-by: David Wei Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 09466e208729..ce20b916301a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -91,6 +91,17 @@ module_param(apst_secondary_latency_tol_us, ulong, 0644); MODULE_PARM_DESC(apst_secondary_latency_tol_us, "secondary APST latency tolerance in us"); +/* + * Older kernels didn't enable protection information if it was at an offset. + * Newer kernels do, so it breaks reads on the upgrade if such formats were + * used in prior kernels since the metadata written did not contain a valid + * checksum. + */ +static bool disable_pi_offsets = false; +module_param(disable_pi_offsets, bool, 0444); +MODULE_PARM_DESC(disable_pi_offsets, + "disable protection information if it has an offset"); + /* * nvme_wq - hosts nvme related works that are not reset or delete * nvme_reset_wq - hosts nvme reset works @@ -1926,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; - if (!(id->dps & NVME_NS_DPS_PI_FIRST)) - info->pi_offset = head->ms - head->pi_size; + if (!(id->dps & NVME_NS_DPS_PI_FIRST)) { + if (disable_pi_offsets) + head->pi_type = 0; + else + info->pi_offset = head->ms - head->pi_size; + } if (ctrl->ops->flags & NVME_F_FABRICS) { /* From d2f551b1f72b4c508ab9298419f6feadc3b5d791 Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Mon, 16 Sep 2024 22:41:37 +0500 Subject: [PATCH 3/4] nvmet-auth: assign dh_key to NULL after kfree_sensitive ctrl->dh_key might be used across multiple calls to nvmet_setup_dhgroup() for the same controller. So it's better to nullify it after release on error path in order to avoid double free later in nvmet_destroy_auth(). Found by Linux Verification Center (linuxtesting.org) with Svace. Fixes: 7a277c37d352 ("nvmet-auth: Diffie-Hellman key exchange support") Cc: stable@vger.kernel.org Signed-off-by: Vitaliy Shevtsov Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 7897d02c681d..b0fd211ec57e 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -115,6 +115,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) pr_debug("%s: ctrl %d failed to generate private key, err %d\n", __func__, ctrl->cntlid, ret); kfree_sensitive(ctrl->dh_key); + ctrl->dh_key = NULL; return ret; } ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm); From 5eed4fb274cd6579f2fb4190b11c4c86c553cd06 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 28 Oct 2024 13:45:46 -0700 Subject: [PATCH 4/4] nvme: re-fix error-handling for io_uring nvme-passthrough This was previously fixed with commit 1147dd0503564fa0e0348 ("nvme: fix error-handling for io_uring nvme-passthrough"), but the change was mistakenly undone in a later commit. Fixes: d6aacee9255e7f ("nvme: use bio_integrity_map_user") Cc: stable@vger.kernel.org Reported-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index b9b79ccfabf8..a96976b22fa7 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -421,10 +421,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, struct io_uring_cmd *ioucmd = req->end_io_data; struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { pdu->status = -EINTR; - else + } else { pdu->status = nvme_req(req)->status; + if (!pdu->status) + pdu->status = blk_status_to_errno(err); + } pdu->result = le64_to_cpu(nvme_req(req)->result.u64); /*