From fed13a5478680614ba97fc87e71f16e2e197912e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 7 Oct 2024 13:38:12 +0200 Subject: [PATCH 1/8] dm: fix a crash if blk_alloc_disk fails If blk_alloc_disk fails, the variable md->disk is set to an error value. cleanup_mapped_device will see that md->disk is non-NULL and it will attempt to access it, causing a crash on this statement "md->disk->private_data = NULL;". Signed-off-by: Mikulas Patocka Reported-by: Chenyuan Yang Closes: https://marc.info/?l=dm-devel&m=172824125004329&w=2 Cc: stable@vger.kernel.org Reviewed-by: Nitesh Shetty --- drivers/md/dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ff4a6b570b76..19230404d8c2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2290,8 +2290,10 @@ static struct mapped_device *alloc_dev(int minor) * override accordingly. */ md->disk = blk_alloc_disk(NULL, md->numa_node_id); - if (IS_ERR(md->disk)) + if (IS_ERR(md->disk)) { + md->disk = NULL; goto bad; + } md->queue = md->disk->queue; init_waitqueue_head(&md->wait); From 5a4510c762fc04c74cff264cd4d9e9f5bf364bae Mon Sep 17 00:00:00 2001 From: Zichen Xie Date: Mon, 21 Oct 2024 14:54:45 -0500 Subject: [PATCH 2/8] dm-unstriped: cast an operand to sector_t to prevent potential uint32_t overflow This was found by a static analyzer. There may be a potential integer overflow issue in unstripe_ctr(). uc->unstripe_offset and uc->unstripe_width are defined as "sector_t"(uint64_t), while uc->unstripe, uc->chunk_size and uc->stripes are all defined as "uint32_t". The result of the calculation will be limited to "uint32_t" without correct casting. So, we recommend adding an extra cast to prevent potential integer overflow. Fixes: 18a5bf270532 ("dm: add unstriped target") Signed-off-by: Zichen Xie Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-unstripe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 48587c16c445..e8a9432057dc 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -85,8 +85,8 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } uc->physical_start = start; - uc->unstripe_offset = uc->unstripe * uc->chunk_size; - uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size; + uc->unstripe_offset = (sector_t)uc->unstripe * uc->chunk_size; + uc->unstripe_width = (sector_t)(uc->stripes - 1) * uc->chunk_size; uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0; tmp_len = ti->len; From a674d0cd56f47628e8057232833cd0654c85d50b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 29 Oct 2024 12:17:13 +0100 Subject: [PATCH 3/8] dm-verity: don't crash if panic_on_corruption is not selected If the user sets panic_on_error and doesn't set panic_on_corruption, dm-verity should not panic on data mismatch. But, currently it panics, because it treats data mismatch as I/O error. This commit fixes the logic so that if there is data mismatch and panic_on_corruption or restart_on_corruption is not selected, the system won't restart or panic. Signed-off-by: Mikulas Patocka Reviewed-by: Sami Tolvanen Fixes: f811b83879fb ("dm-verity: introduce the options restart_on_error and panic_on_error") --- drivers/md/dm-verity-target.c | 9 ++++++--- drivers/md/dm-verity.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 7d4d90b4395a..c142ec5458b7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -356,9 +356,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { - struct bio *bio = - dm_bio_from_per_bio_data(io, - v->ti->per_io_data_size); + struct bio *bio; + io->had_mismatch = true; + bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); dm_audit_log_bio(DM_MSG_PREFIX, "verify-metadata", bio, block, 0); r = -EIO; @@ -482,6 +482,7 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v, return -EIO; /* Error correction failed; Just return error */ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) { + io->had_mismatch = true; dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0); return -EIO; } @@ -606,6 +607,7 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (unlikely(status != BLK_STS_OK) && unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !io->had_mismatch && !verity_is_system_shutting_down()) { if (v->error_mode == DM_VERITY_MODE_PANIC) { panic("dm-verity device has I/O error"); @@ -779,6 +781,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) io->orig_bi_end_io = bio->bi_end_io; io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; + io->had_mismatch = false; bio->bi_end_io = verity_end_io; bio->bi_private = io; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 6b75159bf835..c996140bda94 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -92,6 +92,7 @@ struct dm_verity_io { sector_t block; unsigned int n_blocks; bool in_bh; + bool had_mismatch; struct work_struct work; struct work_struct bh_work; From 235d2e739fcbe964c9ce179b4c991025662dcdb6 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:22 +0800 Subject: [PATCH 4/8] dm cache: correct the number of origin blocks to match the target length When creating a cache device, the actual size of the cache origin might be greater than the specified cache target length. In such case, the number of origin blocks should match the cache target length, not the full size of the origin device, since access beyond the cache target is not possible. This issue occurs when reducing the origin device size using lvm, as lvreduce preloads the new cache table before resuming the cache origin, which can result in incorrect sizes for the discard bitset and smq hotspot blocks. Reproduce steps: 1. create a cache device consists of 4096 origin blocks dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. reduce the cache origin to 2048 oblocks, in lvreduce's approach dmsetup reload corig --table "0 262144 linear /dev/sdc 262144" dmsetup reload cache --table "0 262144 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup suspend cache dmsetup suspend corig dmsetup suspend cdata dmsetup suspend cmeta dmsetup resume corig dmsetup resume cdata dmsetup resume cmeta dmsetup resume cache 3. shutdown the cache, and check the number of discard blocks in superblock. The value is expected to be 2048, but actually is 4096. dmsetup remove cache corig cdata cmeta dd if=/dev/sdc bs=1c count=8 skip=224 2>/dev/null | hexdump -e '1/8 "%u\n"' Fix by correcting the origin_blocks initialization in cache_create and removing the unused origin_sectors from struct cache_args accordingly. Signed-off-by: Ming-Hung Tsai Fixes: c6b4fcbad044 ("dm: add cache target") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index aaeeabfab09b..90772b42c234 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2003,7 +2003,6 @@ struct cache_args { sector_t cache_sectors; struct dm_dev *origin_dev; - sector_t origin_sectors; uint32_t block_size; @@ -2084,6 +2083,7 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, char **error) { + sector_t origin_sectors; int r; if (!at_least_one_arg(as, error)) @@ -2096,8 +2096,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, return r; } - ca->origin_sectors = get_dev_size(ca->origin_dev); - if (ca->ti->len > ca->origin_sectors) { + origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > origin_sectors) { *error = "Device size larger than cached device"; return -EINVAL; } @@ -2407,7 +2407,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; - origin_blocks = cache->origin_sectors = ca->origin_sectors; + origin_blocks = cache->origin_sectors = ti->len; origin_blocks = block_div(origin_blocks, ca->block_size); cache->origin_blocks = to_oblock(origin_blocks); From 135496c208ba26fd68cdef10b64ed7a91ac9a7ff Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:49 +0800 Subject: [PATCH 5/8] dm cache: fix flushing uninitialized delayed_work on cache_ctr error An unexpected WARN_ON from flush_work() may occur when cache creation fails, caused by destroying the uninitialized delayed_work waker in the error path of cache_create(). For example, the warning appears on the superblock checksum error. Reproduce steps: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/urandom of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" Kernel logs: (snip) WARNING: CPU: 0 PID: 84 at kernel/workqueue.c:4178 __flush_work+0x5d4/0x890 Fix by pulling out the cancel_delayed_work_sync() from the constructor's error path. This patch doesn't affect the use-after-free fix for concurrent dm_resume and dm_destroy (commit 6a459d8edbdb ("dm cache: Fix UAF in destroy()")) as cache_dtr is not changed. Signed-off-by: Ming-Hung Tsai Fixes: 6a459d8edbdb ("dm cache: Fix UAF in destroy()") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 90772b42c234..68e9a1abd303 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1905,16 +1905,13 @@ static void check_migrations(struct work_struct *ws) * This function gets called on the error paths of the constructor, so we * have to cope with a partially initialised struct. */ -static void destroy(struct cache *cache) +static void __destroy(struct cache *cache) { - unsigned int i; - mempool_exit(&cache->migration_pool); if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); - cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); @@ -1942,13 +1939,22 @@ static void destroy(struct cache *cache) if (cache->policy) dm_cache_policy_destroy(cache->policy); + bioset_exit(&cache->bs); + + kfree(cache); +} + +static void destroy(struct cache *cache) +{ + unsigned int i; + + cancel_delayed_work_sync(&cache->waker); + for (i = 0; i < cache->nr_ctr_args ; i++) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); - bioset_exit(&cache->bs); - - kfree(cache); + __destroy(cache); } static void cache_dtr(struct dm_target *ti) @@ -2561,7 +2567,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) *result = cache; return 0; bad: - destroy(cache); + __destroy(cache); return r; } @@ -2612,7 +2618,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv) r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); if (r) { - destroy(cache); + __destroy(cache); goto out; } From 792227719725497ce10a8039803bec13f89f8910 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:16 +0800 Subject: [PATCH 6/8] dm cache: fix out-of-bounds access to the dirty bitset when resizing dm-cache checks the dirty bits of the cache blocks to be dropped when shrinking the fast device, but an index bug in bitset iteration causes out-of-bounds access. Reproduce steps: 1. create a cache device of 1024 cache blocks (128 bytes dirty bitset) dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 131072 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. shrink the fast device to 512 cache blocks, triggering out-of-bounds access to the dirty bitset (offset 0x80) dmsetup suspend cache dmsetup reload cdata --table "0 65536 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in cache_preresume+0x269/0x7b0 Read of size 8 at addr ffffc900000f3080 by task dmsetup/131 (...snip...) The buggy address belongs to the virtual mapping at [ffffc900000f3000, ffffc900000f5000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc900000f2f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffc900000f3080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc900000f3100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3180: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by making the index post-incremented. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 68e9a1abd303..1fcd8c5c220e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2912,13 +2912,13 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) * We can't drop a dirty block when shrinking the cache. */ while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - new_size = to_cblock(from_cblock(new_size) + 1); if (is_dirty(cache, new_size)) { DMERR("%s: unable to shrink cache; cache block %llu is dirty", cache_device_name(cache), (unsigned long long) from_cblock(new_size)); return false; } + new_size = to_cblock(from_cblock(new_size) + 1); } return true; From f484697e619a83ecc370443a34746379ad99d204 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:39 +0800 Subject: [PATCH 7/8] dm cache: optimize dirty bit checking with find_next_bit when resizing When shrinking the fast device, dm-cache iteratively searches for a dirty bit among the cache blocks to be dropped, which is less efficient. Use find_next_bit instead, as it is twice as fast as the iterative approach with test_bit. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1fcd8c5c220e..fa8ef2c32af8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2911,14 +2911,14 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) /* * We can't drop a dirty block when shrinking the cache. */ - while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - if (is_dirty(cache, new_size)) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; - } - new_size = to_cblock(from_cblock(new_size) + 1); + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; } return true; From c0ade5d98979585d4f5a93e4514c2e9a65afa08d Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:54 +0800 Subject: [PATCH 8/8] dm cache: fix potential out-of-bounds access on the first resume Out-of-bounds access occurs if the fast device is expanded unexpectedly before the first-time resume of the cache table. This happens because expanding the fast device requires reloading the cache table for cache_create to allocate new in-core data structures that fit the new size, and the check in cache_preresume is not performed during the first resume, leading to the issue. Reproduce steps: 1. prepare component devices: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct 2. load a cache table of 512 cache blocks, and deliberately expand the fast device before resuming the cache, making the in-core data structures inadequate. dmsetup create cache --notable dmsetup reload cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup reload cdata --table "0 131072 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache 3. suspend the cache to write out the in-core dirty bitset and hint array, leading to out-of-bounds access to the dirty bitset at offset 0x40: dmsetup suspend cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in is_dirty_callback+0x2b/0x80 Read of size 8 at addr ffffc90000085040 by task dmsetup/90 (...snip...) The buggy address belongs to the virtual mapping at [ffffc90000085000, ffffc90000087000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc90000084f00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000084f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >ffffc90000085000: 00 00 00 00 00 00 00 00 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc90000085080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000085100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by checking the size change on the first resume. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 37 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index fa8ef2c32af8..40709310e327 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2901,24 +2901,24 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { if (from_cblock(new_size) > from_cblock(cache->cache_size)) { - if (cache->sized) { - DMERR("%s: unable to extend cache due to missing cache table reload", - cache_device_name(cache)); - return false; - } + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; } /* * We can't drop a dirty block when shrinking the cache. */ - new_size = to_cblock(find_next_bit(cache->dirty_bitset, - from_cblock(cache->cache_size), - from_cblock(new_size))); - if (new_size != cache->cache_size) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; + if (cache->loaded_mappings) { + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; + } } return true; @@ -2949,20 +2949,15 @@ static int cache_preresume(struct dm_target *ti) /* * Check to see if the cache has resized. */ - if (!cache->sized) { - r = resize_cache_dev(cache, csize); - if (r) - return r; - - cache->sized = true; - - } else if (csize != cache->cache_size) { + if (!cache->sized || csize != cache->cache_size) { if (!can_resize(cache, csize)) return -EINVAL; r = resize_cache_dev(cache, csize); if (r) return r; + + cache->sized = true; } if (!cache->loaded_mappings) {