1
linux/block
Li Lingfeng b313a8c835 block: Fix lockdep warning in blk_mq_mark_tag_wait
Lockdep reported a warning in Linux version 6.6:

[  414.344659] ================================
[  414.345155] WARNING: inconsistent lock state
[  414.345658] 6.6.0-07439-gba2303cacfda #6 Not tainted
[  414.346221] --------------------------------
[  414.346712] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
[  414.347545] kworker/u10:3/1152 [HC0[0]:SC0[0]:HE0:SE1] takes:
[  414.349245] ffff88810edd1098 (&sbq->ws[i].wait){+.?.}-{2:2}, at: blk_mq_dispatch_rq_list+0x131c/0x1ee0
[  414.351204] {IN-SOFTIRQ-W} state was registered at:
[  414.351751]   lock_acquire+0x18d/0x460
[  414.352218]   _raw_spin_lock_irqsave+0x39/0x60
[  414.352769]   __wake_up_common_lock+0x22/0x60
[  414.353289]   sbitmap_queue_wake_up+0x375/0x4f0
[  414.353829]   sbitmap_queue_clear+0xdd/0x270
[  414.354338]   blk_mq_put_tag+0xdf/0x170
[  414.354807]   __blk_mq_free_request+0x381/0x4d0
[  414.355335]   blk_mq_free_request+0x28b/0x3e0
[  414.355847]   __blk_mq_end_request+0x242/0xc30
[  414.356367]   scsi_end_request+0x2c1/0x830
[  414.345155] WARNING: inconsistent lock state
[  414.345658] 6.6.0-07439-gba2303cacfda #6 Not tainted
[  414.346221] --------------------------------
[  414.346712] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
[  414.347545] kworker/u10:3/1152 [HC0[0]:SC0[0]:HE0:SE1] takes:
[  414.349245] ffff88810edd1098 (&sbq->ws[i].wait){+.?.}-{2:2}, at: blk_mq_dispatch_rq_list+0x131c/0x1ee0
[  414.351204] {IN-SOFTIRQ-W} state was registered at:
[  414.351751]   lock_acquire+0x18d/0x460
[  414.352218]   _raw_spin_lock_irqsave+0x39/0x60
[  414.352769]   __wake_up_common_lock+0x22/0x60
[  414.353289]   sbitmap_queue_wake_up+0x375/0x4f0
[  414.353829]   sbitmap_queue_clear+0xdd/0x270
[  414.354338]   blk_mq_put_tag+0xdf/0x170
[  414.354807]   __blk_mq_free_request+0x381/0x4d0
[  414.355335]   blk_mq_free_request+0x28b/0x3e0
[  414.355847]   __blk_mq_end_request+0x242/0xc30
[  414.356367]   scsi_end_request+0x2c1/0x830
[  414.356863]   scsi_io_completion+0x177/0x1610
[  414.357379]   scsi_complete+0x12f/0x260
[  414.357856]   blk_complete_reqs+0xba/0xf0
[  414.358338]   __do_softirq+0x1b0/0x7a2
[  414.358796]   irq_exit_rcu+0x14b/0x1a0
[  414.359262]   sysvec_call_function_single+0xaf/0xc0
[  414.359828]   asm_sysvec_call_function_single+0x1a/0x20
[  414.360426]   default_idle+0x1e/0x30
[  414.360873]   default_idle_call+0x9b/0x1f0
[  414.361390]   do_idle+0x2d2/0x3e0
[  414.361819]   cpu_startup_entry+0x55/0x60
[  414.362314]   start_secondary+0x235/0x2b0
[  414.362809]   secondary_startup_64_no_verify+0x18f/0x19b
[  414.363413] irq event stamp: 428794
[  414.363825] hardirqs last  enabled at (428793): [<ffffffff816bfd1c>] ktime_get+0x1dc/0x200
[  414.364694] hardirqs last disabled at (428794): [<ffffffff85470177>] _raw_spin_lock_irq+0x47/0x50
[  414.365629] softirqs last  enabled at (428444): [<ffffffff85474780>] __do_softirq+0x540/0x7a2
[  414.366522] softirqs last disabled at (428419): [<ffffffff813f65ab>] irq_exit_rcu+0x14b/0x1a0
[  414.367425]
               other info that might help us debug this:
[  414.368194]  Possible unsafe locking scenario:
[  414.368900]        CPU0
[  414.369225]        ----
[  414.369548]   lock(&sbq->ws[i].wait);
[  414.370000]   <Interrupt>
[  414.370342]     lock(&sbq->ws[i].wait);
[  414.370802]
                *** DEADLOCK ***
[  414.371569] 5 locks held by kworker/u10:3/1152:
[  414.372088]  #0: ffff88810130e938 ((wq_completion)writeback){+.+.}-{0:0}, at: process_scheduled_works+0x357/0x13f0
[  414.373180]  #1: ffff88810201fdb8 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}, at: process_scheduled_works+0x3a3/0x13f0
[  414.374384]  #2: ffffffff86ffbdc0 (rcu_read_lock){....}-{1:2}, at: blk_mq_run_hw_queue+0x637/0xa00
[  414.375342]  #3: ffff88810edd1098 (&sbq->ws[i].wait){+.?.}-{2:2}, at: blk_mq_dispatch_rq_list+0x131c/0x1ee0
[  414.376377]  #4: ffff888106205a08 (&hctx->dispatch_wait_lock){+.-.}-{2:2}, at: blk_mq_dispatch_rq_list+0x1337/0x1ee0
[  414.378607]
               stack backtrace:
[  414.379177] CPU: 0 PID: 1152 Comm: kworker/u10:3 Not tainted 6.6.0-07439-gba2303cacfda #6
[  414.380032] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[  414.381177] Workqueue: writeback wb_workfn (flush-253:0)
[  414.381805] Call Trace:
[  414.382136]  <TASK>
[  414.382429]  dump_stack_lvl+0x91/0xf0
[  414.382884]  mark_lock_irq+0xb3b/0x1260
[  414.383367]  ? __pfx_mark_lock_irq+0x10/0x10
[  414.383889]  ? stack_trace_save+0x8e/0xc0
[  414.384373]  ? __pfx_stack_trace_save+0x10/0x10
[  414.384903]  ? graph_lock+0xcf/0x410
[  414.385350]  ? save_trace+0x3d/0xc70
[  414.385808]  mark_lock.part.20+0x56d/0xa90
[  414.386317]  mark_held_locks+0xb0/0x110
[  414.386791]  ? __pfx_do_raw_spin_lock+0x10/0x10
[  414.387320]  lockdep_hardirqs_on_prepare+0x297/0x3f0
[  414.387901]  ? _raw_spin_unlock_irq+0x28/0x50
[  414.388422]  trace_hardirqs_on+0x58/0x100
[  414.388917]  _raw_spin_unlock_irq+0x28/0x50
[  414.389422]  __blk_mq_tag_busy+0x1d6/0x2a0
[  414.389920]  __blk_mq_get_driver_tag+0x761/0x9f0
[  414.390899]  blk_mq_dispatch_rq_list+0x1780/0x1ee0
[  414.391473]  ? __pfx_blk_mq_dispatch_rq_list+0x10/0x10
[  414.392070]  ? sbitmap_get+0x2b8/0x450
[  414.392533]  ? __blk_mq_get_driver_tag+0x210/0x9f0
[  414.393095]  __blk_mq_sched_dispatch_requests+0xd99/0x1690
[  414.393730]  ? elv_attempt_insert_merge+0x1b1/0x420
[  414.394302]  ? __pfx___blk_mq_sched_dispatch_requests+0x10/0x10
[  414.394970]  ? lock_acquire+0x18d/0x460
[  414.395456]  ? blk_mq_run_hw_queue+0x637/0xa00
[  414.395986]  ? __pfx_lock_acquire+0x10/0x10
[  414.396499]  blk_mq_sched_dispatch_requests+0x109/0x190
[  414.397100]  blk_mq_run_hw_queue+0x66e/0xa00
[  414.397616]  blk_mq_flush_plug_list.part.17+0x614/0x2030
[  414.398244]  ? __pfx_blk_mq_flush_plug_list.part.17+0x10/0x10
[  414.398897]  ? writeback_sb_inodes+0x241/0xcc0
[  414.399429]  blk_mq_flush_plug_list+0x65/0x80
[  414.399957]  __blk_flush_plug+0x2f1/0x530
[  414.400458]  ? __pfx___blk_flush_plug+0x10/0x10
[  414.400999]  blk_finish_plug+0x59/0xa0
[  414.401467]  wb_writeback+0x7cc/0x920
[  414.401935]  ? __pfx_wb_writeback+0x10/0x10
[  414.402442]  ? mark_held_locks+0xb0/0x110
[  414.402931]  ? __pfx_do_raw_spin_lock+0x10/0x10
[  414.403462]  ? lockdep_hardirqs_on_prepare+0x297/0x3f0
[  414.404062]  wb_workfn+0x2b3/0xcf0
[  414.404500]  ? __pfx_wb_workfn+0x10/0x10
[  414.404989]  process_scheduled_works+0x432/0x13f0
[  414.405546]  ? __pfx_process_scheduled_works+0x10/0x10
[  414.406139]  ? do_raw_spin_lock+0x101/0x2a0
[  414.406641]  ? assign_work+0x19b/0x240
[  414.407106]  ? lock_is_held_type+0x9d/0x110
[  414.407604]  worker_thread+0x6f2/0x1160
[  414.408075]  ? __kthread_parkme+0x62/0x210
[  414.408572]  ? lockdep_hardirqs_on_prepare+0x297/0x3f0
[  414.409168]  ? __kthread_parkme+0x13c/0x210
[  414.409678]  ? __pfx_worker_thread+0x10/0x10
[  414.410191]  kthread+0x33c/0x440
[  414.410602]  ? __pfx_kthread+0x10/0x10
[  414.411068]  ret_from_fork+0x4d/0x80
[  414.411526]  ? __pfx_kthread+0x10/0x10
[  414.411993]  ret_from_fork_asm+0x1b/0x30
[  414.412489]  </TASK>

When interrupt is turned on while a lock holding by spin_lock_irq it
throws a warning because of potential deadlock.

blk_mq_prep_dispatch_rq
 blk_mq_get_driver_tag
  __blk_mq_get_driver_tag
   __blk_mq_alloc_driver_tag
    blk_mq_tag_busy -> tag is already busy
    // failed to get driver tag
 blk_mq_mark_tag_wait
  spin_lock_irq(&wq->lock) -> lock A (&sbq->ws[i].wait)
  __add_wait_queue(wq, wait) -> wait queue active
  blk_mq_get_driver_tag
  __blk_mq_tag_busy
-> 1) tag must be idle, which means there can't be inflight IO
   spin_lock_irq(&tags->lock) -> lock B (hctx->tags)
   spin_unlock_irq(&tags->lock) -> unlock B, turn on interrupt accidentally
-> 2) context must be preempt by IO interrupt to trigger deadlock.

As shown above, the deadlock is not possible in theory, but the warning
still need to be fixed.

Fix it by using spin_lock_irqsave to get lockB instead of spin_lock_irq.

Fixes: 4f1731df60 ("blk-mq: fix potential io hang by wrong 'wake_batch'")
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240815024736.2040971-1-lilingfeng@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2024-08-15 19:25:03 -06:00
..
partitions Compactifying bdev flags 2024-05-21 13:02:56 -07:00
badblocks.c
bdev.c for-6.11/block-20240710 2024-07-15 14:20:22 -07:00
bfq-cgroup.c block, bfq: remove blkg_path() 2024-06-18 09:22:45 -06:00
bfq-iosched.c block: BFQ: Refactor bfq_exit_icq() to silence sparse warning 2024-06-16 15:30:32 -06:00
bfq-iosched.h block, bfq: remove blkg_path() 2024-06-18 09:22:45 -06:00
bfq-wf2q.c
bio-integrity.c for-6.11/block-post-20240722 2024-07-22 11:04:09 -07:00
bio.c for-6.11/block-post-20240722 2024-07-22 11:04:09 -07:00
blk-cgroup-fc-appid.c
blk-cgroup-rwstat.c blk-cgroup: use group allocation/free of per-cpu counters API 2024-04-03 09:10:17 -06:00
blk-cgroup-rwstat.h
blk-cgroup.c blk-cgroup: move congestion_count to struct blkcg 2024-07-19 09:40:07 -06:00
blk-cgroup.h blk-cgroup: move congestion_count to struct blkcg 2024-07-19 09:40:07 -06:00
blk-core.c block: avoid polling configuration errors 2024-07-19 09:35:35 -06:00
blk-crypto-fallback.c block, fs: Restore the per-bio/request data lifetime fields 2024-02-06 14:31:05 +01:00
blk-crypto-internal.h
blk-crypto-profile.c
blk-crypto-sysfs.c
blk-crypto.c
blk-flush.c for-6.11/block-20240710 2024-07-15 14:20:22 -07:00
blk-ia-ranges.c
blk-integrity.c block: cleanup flag_{show,store} 2024-06-17 10:13:37 -06:00
blk-ioc.c
blk-iocost.c blk-iocost: do not WARN if iocg was already offlined 2024-04-19 08:06:24 -06:00
blk-iolatency.c block: add blk_time_get_ns() and blk_time_get() helpers 2024-02-05 10:07:22 -07:00
blk-ioprio.c
blk-ioprio.h
blk-lib.c blk-lib: check for kill signal in ioctl BLKZEROOUT 2024-07-05 00:53:15 -06:00
blk-map.c block: don't free the integrity payload in bio_integrity_unmap_free_user 2024-07-03 10:21:16 -06:00
blk-merge.c block: take offset into account in blk_bvec_map_sg again 2024-07-09 01:02:44 -06:00
blk-mq-cpumap.c
blk-mq-debugfs.c block: Catch possible entries missing from rqf_name[] 2024-07-19 09:32:49 -06:00
blk-mq-debugfs.h block: Replace zone_wlock debugfs entry with zone_wplugs entry 2024-04-17 08:44:03 -06:00
blk-mq-pci.c
blk-mq-sched.c
blk-mq-sched.h
blk-mq-sysfs.c
blk-mq-tag.c block: Fix lockdep warning in blk_mq_mark_tag_wait 2024-08-15 19:25:03 -06:00
blk-mq-virtio.c
blk-mq.c block: Call .limit_depth() after .hctx has been set 2024-07-02 08:47:45 -06:00
blk-mq.h block: Relocate BLK_MQ_CPU_WORK_BATCH 2024-07-19 09:32:48 -06:00
blk-pm.c
blk-pm.h
blk-rq-qos.c
blk-rq-qos.h
blk-settings.c block: Validate logical block size in blk_validate_limits() 2024-07-09 00:00:17 -06:00
blk-stat.c blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW 2024-05-09 09:44:55 -06:00
blk-stat.h block: delete redundant function declaration 2024-05-27 13:58:06 -06:00
blk-sysfs.c block: pass a gendisk to the queue_sysfs_entry methods 2024-06-28 15:06:16 -06:00
blk-throttle.c blk-throttle: remove more latency dead-code 2024-07-27 10:33:25 -06:00
blk-throttle.h blk-throttle: Fix incorrect display of io.max 2024-05-30 19:44:29 -06:00
blk-timeout.c
blk-wbt.c blk-wbt: don't throttle swap writes in direct reclaim 2024-07-01 06:51:53 -06:00
blk-wbt.h
blk-zoned.c for-6.11/block-20240710 2024-07-15 14:20:22 -07:00
blk.h for-6.11/block-post-20240722 2024-07-22 11:04:09 -07:00
bounce.c block: split integrity support out of bio.h 2024-07-03 10:21:15 -06:00
bsg-lib.c scsi: bsg: Pass dev to blk_mq_alloc_queue() 2024-05-30 20:22:15 -04:00
bsg.c
disk-events.c
early-lookup.c wrapper for access to ->bd_partno 2024-05-02 17:48:09 -04:00
elevator.c block: pass a gendisk to the queue_sysfs_entry methods 2024-06-28 15:06:16 -06:00
elevator.h block: pass a gendisk to the queue_sysfs_entry methods 2024-06-28 15:06:16 -06:00
fops.c block: clean up the check in blkdev_iomap_begin() 2024-06-27 05:56:35 -06:00
genhd.c block: fix deadlock between sd_remove & sd_release 2024-07-24 09:51:21 -06:00
holder.c block: fix deadlock between bd_link_disk_holder and partition scan 2024-02-23 07:44:19 -07:00
ioctl.c blk-lib: check for kill signal in ioctl BLKZEROOUT 2024-07-05 00:53:15 -06:00
ioprio.c
Kconfig block: remove the blk_integrity_profile structure 2024-06-14 10:20:06 -06:00
Kconfig.iosched
kyber-iosched.c
Makefile block: remove the blk_integrity_profile structure 2024-06-14 10:20:06 -06:00
mq-deadline.c block/mq-deadline: Fix the tag reservation code 2024-07-02 08:47:45 -06:00
opal_proto.h block: sed-opal: handle empty atoms when parsing response 2024-02-16 15:52:45 -07:00
sed-opal.c block: sed-opal: avoid possible wrong address reference in read_sed_opal_key() 2024-06-12 10:53:20 -06:00
t10-pi.c block: remove bio_integrity_process 2024-06-28 10:29:42 -06:00