1

btrfs: raid56: extra debugging for raid6 syndrome generation

[BUG]
I have got at least two crash report for RAID6 syndrome generation, no
matter if it's AVX2 or SSE2, they all seems to have a similar
calltrace with corrupted RAX:

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: 0000 [#1] PREEMPT SMP PTI
  Workqueue: btrfs-rmw rmw_rbio_work [btrfs]
  RIP: 0010:raid6_sse21_gen_syndrome+0x9e/0x130 [raid6_pq]
  RAX: 0000000000000000 RBX: 0000000000001000 RCX: ffffa0ff4cfa3248
  RDX: 0000000000000000 RSI: ffffa0f74cfa3238 RDI: 0000000000000000
  Call Trace:
   <TASK>
   rmw_rbio+0x5c8/0xa80 [btrfs]
   process_one_work+0x1c7/0x3d0
   worker_thread+0x4d/0x380
   kthread+0xf3/0x120
   ret_from_fork+0x2c/0x50
   </TASK>

[CAUSE]
The cause is not known.  Recently I also hit this in AVX512 path, and
that's even in v5.15 backport, which doesn't have any of my RAID56
rework.

Furthermore according to the registers:

  RAX: 0000000000000000 RBX: 0000000000001000 RCX: ffffa0ff4cfa3248

The RAX register is showing the number of stripes (including PQ), which
is not correct (0).  But the remaining two registers are all sane.

- RBX is the sectorsize
  For x86_64 it should always be 4K and matches the output.

- RCX is the pointers array
  Which is from rbio->finish_pointers, and it looks like a sane
  kernel address.

[WORKAROUND]
For now, I can only add extra debug ASSERT()s before we call raid6
gen_syndrome() helper and hopes to catch the problem.

The debug requires both CONFIG_BTRFS_DEBUG and CONFIG_BTRFS_ASSERT
enabled.

My current guess is some use-after-free, but every report is only having
corrupted RAX but seemingly valid pointers doesn't make much sense.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2024-01-26 13:51:32 +10:30 committed by David Sterba
parent 74cd8cac0b
commit b2324e08b8

View File

@ -917,6 +917,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
*/
ASSERT(stripe_nsectors <= BITS_PER_LONG);
/*
* Real stripes must be between 2 (2 disks RAID5, aka RAID1) and 256
* (limited by u8).
*/
ASSERT(real_stripes >= 2);
ASSERT(real_stripes <= U8_MAX);
rbio = kzalloc(sizeof(*rbio), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@ -954,6 +961,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
ASSERT(btrfs_nr_parity_stripes(bioc->map_type));
rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type);
ASSERT(rbio->nr_data > 0);
return rbio;
}
@ -1180,6 +1188,26 @@ static inline void bio_list_put(struct bio_list *bio_list)
bio_put(bio);
}
static void assert_rbio(struct btrfs_raid_bio *rbio)
{
if (!IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
!IS_ENABLED(CONFIG_BTRFS_ASSERT))
return;
/*
* At least two stripes (2 disks RAID5), and since real_stripes is U8,
* we won't go beyond 256 disks anyway.
*/
ASSERT(rbio->real_stripes >= 2);
ASSERT(rbio->nr_data > 0);
/*
* This is another check to make sure nr data stripes is smaller
* than total stripes.
*/
ASSERT(rbio->nr_data < rbio->real_stripes);
}
/* Generate PQ for one vertical stripe. */
static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
{
@ -1211,6 +1239,7 @@ static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
pointers[stripe++] = kmap_local_page(sector->page) +
sector->pgoff;
assert_rbio(rbio);
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
pointers);
} else {
@ -2472,6 +2501,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
}
if (has_qstripe) {
assert_rbio(rbio);
/* RAID6, call the library function to fill in our P/Q */
raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
pointers);