1

block-6.12-20241101

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmclGQUQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpkKsEADWz2dU1Edfbg5lez9Kf2SZwLKUbh9VVBQY
 kuIeGI/lQ1eeIdIgEJtCmx6ePlbk9D5WYu7lJvXfODAh5gUBLMpI4t6p4L1TkU2o
 yZ9UDoy5SK8o0UAOZhrHw4fE70EdI31Olm2mz3ToIevmpKOabNr/joVEfpEYaoPV
 t9FIGH1DvUZQSWUgw6G2lxWA7gTwfzvAfOegXF0VlFyhCEXjLXwJSUq+lWaShL33
 oDxETRYGaoBVfNVLIaZ6LFVUrvmgzFQ1Q6q1iY+DAUgMh9NXV2mzQ+/17HZjWaeU
 jsEeeOMnvtxhvQQpRyEvGA93Fh+gDkx/wljB4EmK2oqETs+oDVzts+L+YJk2ef7K
 n71dhuj04BRfduu6DFNg6ufAcMJOTgd0odgN9h9hPa0z3y0sx9hOSW6XZfepABl1
 +XDDyI4p/lVTMH/vYlS2ay2RUo7KrxfX26qSYbz6UItzo6dMl/3YjtKbzYpP0mZM
 4+Yu5YxDXirbnxpk9uhzOA4CdwiXXWxLIVIX1KeUWkJZ5wzmH2rFqmpzH8OL91uH
 J4SIMLgMCm4e45nDOLH5ZSLIc1MO2df0sJ1gZZD6MoT0xbxULyIQuj19sBszeHwH
 YgV4sQlPead4NT2zoSDeXSpeIWmv5dd15To7wVpeInyj/DHyXSHzycHYSehgdmv1
 FBQ+LMffyA==
 =e3uK
 -----END PGP SIGNATURE-----

Merge tag 'block-6.12-20241101' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fixup for a recent blk_rq_map_user_bvec() patch

 - NVMe pull request via Keith:
     - Spec compliant identification fix (Keith)
     - Module parameter to enable backward compatibility on unusual
       namespace formats (Keith)
     - Target double free fix when using keys (Vitaliy)
     - Passthrough command error handling fix (Keith)

* tag 'block-6.12-20241101' of git://git.kernel.dk/linux:
  nvme: re-fix error-handling for io_uring nvme-passthrough
  nvmet-auth: assign dh_key to NULL after kfree_sensitive
  nvme: module parameter to disable pi with offsets
  block: fix queue limits checks in blk_rq_map_user_bvec for real
  nvme: enhance cns version checking
This commit is contained in:
Linus Torvalds 2024-11-01 13:41:55 -10:00
commit f4a1e8e369
4 changed files with 65 additions and 55 deletions

View File

@ -561,55 +561,33 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
struct request_queue *q = rq->q;
size_t nr_iter = iov_iter_count(iter);
size_t nr_segs = iter->nr_segs;
struct bio_vec *bvecs, *bvprvp = NULL;
const struct queue_limits *lim = &q->limits;
unsigned int nsegs = 0, bytes = 0;
const struct queue_limits *lim = &rq->q->limits;
unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
unsigned int nsegs;
struct bio *bio;
size_t i;
int ret;
if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
return -EINVAL;
if (nr_segs > queue_max_segments(q))
if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
return -EINVAL;
/* no iovecs to alloc, as we already have a BVEC iterator */
/* reuse the bvecs from the iterator instead of allocating new ones */
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
if (bio == NULL)
if (!bio)
return -ENOMEM;
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
blk_rq_bio_prep(rq, bio, nr_segs);
/* loop to perform a bunch of sanity checks */
bvecs = (struct bio_vec *)iter->bvec;
for (i = 0; i < nr_segs; i++) {
struct bio_vec *bv = &bvecs[i];
/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, fallback to copy.
*/
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
/* check that the data layout matches the hardware restrictions */
ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
if (ret) {
/* if we would have to split the bio, copy instead */
if (ret > 0)
ret = -EREMOTEIO;
blk_mq_map_bio_put(bio);
return -EREMOTEIO;
return ret;
}
/* check full condition */
if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
goto put_bio;
if (bytes + bv->bv_len > nr_iter)
break;
nsegs++;
bytes += bv->bv_len;
bvprvp = bv;
}
blk_rq_bio_prep(rq, bio, nsegs);
return 0;
put_bio:
blk_mq_map_bio_put(bio);
return -EINVAL;
}
/**

View File

@ -91,6 +91,17 @@ module_param(apst_secondary_latency_tol_us, ulong, 0644);
MODULE_PARM_DESC(apst_secondary_latency_tol_us,
"secondary APST latency tolerance in us");
/*
* Older kernels didn't enable protection information if it was at an offset.
* Newer kernels do, so it breaks reads on the upgrade if such formats were
* used in prior kernels since the metadata written did not contain a valid
* checksum.
*/
static bool disable_pi_offsets = false;
module_param(disable_pi_offsets, bool, 0444);
MODULE_PARM_DESC(disable_pi_offsets,
"disable protection information if it has an offset");
/*
* nvme_wq - hosts nvme related works that are not reset or delete
* nvme_reset_wq - hosts nvme reset works
@ -1390,17 +1401,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
nvme_start_keep_alive(ctrl);
}
/*
* In NVMe 1.0 the CNS field was just a binary controller or namespace
* flag, thus sending any new CNS opcodes has a big chance of not working.
* Qemu unfortunately had that bug after reporting a 1.1 version compliance
* (but not for any later version).
*/
static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)
{
if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
return ctrl->vs < NVME_VS(1, 2, 0);
return ctrl->vs < NVME_VS(1, 1, 0);
/*
* The CNS field occupies a full byte starting with NVMe 1.2
*/
if (ctrl->vs >= NVME_VS(1, 2, 0))
return true;
/*
* NVMe 1.1 expanded the CNS value to two bits, which means values
* larger than that could get truncated and treated as an incorrect
* value.
*
* Qemu implemented 1.0 behavior for controllers claiming 1.1
* compliance, so they need to be quirked here.
*/
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS))
return cns <= 3;
/*
* NVMe 1.0 used a single bit for the CNS value.
*/
return cns <= 1;
}
static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
@ -1913,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
if (head->pi_size && head->ms >= head->pi_size)
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
if (!(id->dps & NVME_NS_DPS_PI_FIRST))
if (!(id->dps & NVME_NS_DPS_PI_FIRST)) {
if (disable_pi_offsets)
head->pi_type = 0;
else
info->pi_offset = head->ms - head->pi_size;
}
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
@ -3104,7 +3132,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
ctrl->max_zeroes_sectors = 0;
if (ctrl->subsys->subtype != NVME_NQN_NVME ||
nvme_ctrl_limited_cns(ctrl) ||
!nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||
test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
return 0;
@ -4200,7 +4228,7 @@ static void nvme_scan_work(struct work_struct *work)
}
mutex_lock(&ctrl->scan_lock);
if (nvme_ctrl_limited_cns(ctrl)) {
if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) {
nvme_scan_ns_sequential(ctrl);
} else {
/*

View File

@ -421,10 +421,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
pdu->status = -EINTR;
else
} else {
pdu->status = nvme_req(req)->status;
if (!pdu->status)
pdu->status = blk_status_to_errno(err);
}
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*

View File

@ -115,6 +115,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
__func__, ctrl->cntlid, ret);
kfree_sensitive(ctrl->dh_key);
ctrl->dh_key = NULL;
return ret;
}
ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);