1

Compactifying bdev flags

We can easily have up to 24 flags with sane
 atomicity, _without_ pushing anything out
 of the first cacheline of struct block_device.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQQqUNBr3gm4hGXdBJlZ7Krx/gZQ6wUCZkznRwAKCRBZ7Krx/gZQ
 69XpAQDOZCyvYOZ/dlMOKKLf2vAojC/h++E/NjvGt3erbvVN2wEArXMi13ECsoCw
 JYJA3MsmvjuY6VNcm24icf2/p4TMIgo=
 =JyYi
 -----END PGP SIGNATURE-----

Merge tag 'pull-bd_flags-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull bdev flags update from Al Viro:
 "Compactifying bdev flags.

  We can easily have up to 24 flags with sane atomicity, _without_
  pushing anything out of the first cacheline of struct block_device"

* tag 'pull-bd_flags-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  bdev: move ->bd_make_it_fail to ->__bd_flags
  bdev: move ->bd_ro_warned to ->__bd_flags
  bdev: move ->bd_has_subit_bio to ->__bd_flags
  bdev: move ->bd_write_holder into ->__bd_flags
  bdev: move ->bd_read_only to ->__bd_flags
  bdev: infrastructure for flags
  wrapper for access to ->bd_partno
  Use bdev_is_paritition() instead of open-coding it
This commit is contained in:
Linus Torvalds 2024-05-21 13:02:56 -07:00
commit 3413efa888
12 changed files with 77 additions and 46 deletions

View File

@ -422,13 +422,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
mutex_init(&bdev->bd_holder_lock);
bdev->bd_partno = partno;
atomic_set(&bdev->__bd_flags, partno);
bdev->bd_mapping = &inode->i_data;
bdev->bd_queue = disk->queue;
if (partno)
bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
else
bdev->bd_has_submit_bio = false;
if (partno && bdev_test_flag(disk->part0, BD_HAS_SUBMIT_BIO))
bdev_set_flag(bdev, BD_HAS_SUBMIT_BIO);
bdev->bd_stats = alloc_percpu(struct disk_stats);
if (!bdev->bd_stats) {
iput(inode);
@ -642,7 +640,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
bdev->bd_holder = NULL;
bdev->bd_holder_ops = NULL;
mutex_unlock(&bdev->bd_holder_lock);
if (bdev->bd_write_holder)
if (bdev_test_flag(bdev, BD_WRITE_HOLDER))
unblock = true;
}
if (!whole->bd_holders)
@ -655,7 +653,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
*/
if (unblock) {
disk_unblock_events(bdev->bd_disk);
bdev->bd_write_holder = false;
bdev_clear_flag(bdev, BD_WRITE_HOLDER);
}
}
@ -922,9 +920,10 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
* writeable reference is too fragile given the way @mode is
* used in blkdev_get/put().
*/
if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
if ((mode & BLK_OPEN_WRITE) &&
!bdev_test_flag(bdev, BD_WRITE_HOLDER) &&
(disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
bdev->bd_write_holder = true;
bdev_set_flag(bdev, BD_WRITE_HOLDER);
unblock_events = false;
}
}

View File

@ -496,7 +496,8 @@ __setup("fail_make_request=", setup_fail_make_request);
bool should_fail_request(struct block_device *part, unsigned int bytes)
{
return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
return bdev_test_flag(part, BD_MAKE_IT_FAIL) &&
should_fail(&fail_make_request, bytes);
}
static int __init fail_make_request_debugfs(void)
@ -516,10 +517,11 @@ static inline void bio_check_ro(struct bio *bio)
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return;
if (bio->bi_bdev->bd_ro_warned)
if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED))
return;
bio->bi_bdev->bd_ro_warned = true;
bdev_set_flag(bio->bi_bdev, BD_RO_WARNED);
/*
* Use ioctl to set underlying disk of raid/dm to read-only
* will trigger this.
@ -616,7 +618,7 @@ static void __submit_bio(struct bio *bio)
if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
if (!bio->bi_bdev->bd_has_submit_bio) {
if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) {
blk_mq_submit_bio(bio);
} else if (likely(bio_queue_enter(bio) == 0)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
@ -730,7 +732,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
*/
if (current->bio_list)
bio_list_add(&current->bio_list[0], bio);
else if (!bio->bi_bdev->bd_has_submit_bio)
else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO))
__submit_bio_noacct_mq(bio);
else
__submit_bio_noacct(bio);
@ -766,7 +768,8 @@ void submit_bio_noacct(struct bio *bio)
if (!bio_flagged(bio, BIO_REMAPPED)) {
if (unlikely(bio_check_eod(bio)))
goto end_io;
if (bdev->bd_partno && unlikely(blk_partition_remap(bio)))
if (bdev_is_partition(bdev) &&
unlikely(blk_partition_remap(bio)))
goto end_io;
}
@ -991,7 +994,7 @@ again:
(end || part_in_flight(part)))
__part_stat_add(part, io_ticks, now - stamp);
if (part->bd_partno) {
if (bdev_is_partition(part)) {
part = bdev_whole(part);
goto again;
}

View File

@ -93,7 +93,7 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv)
struct mq_inflight *mi = priv;
if (rq->part && blk_do_io_stat(rq) &&
(!mi->part->bd_partno || rq->part == mi->part) &&
(!bdev_is_partition(mi->part) || rq->part == mi->part) &&
blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
mi->inflight[rq_data_dir(rq)]++;

View File

@ -1257,7 +1257,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
* is not called. So we need to schedule execution of the next
* plugged BIO here.
*/
if (bio->bi_bdev->bd_has_submit_bio)
if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO))
disk_zone_wplug_unplug_bio(disk, zwplug);
/* Drop the reference we took when entering this function. */
@ -1326,7 +1326,7 @@ static void blk_zone_wplug_bio_work(struct work_struct *work)
* path for BIO-based devices will not do that. So drop this extra
* reference here.
*/
if (bdev->bd_has_submit_bio)
if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO))
blk_queue_exit(bdev->bd_disk->queue);
put_zwplug:

View File

@ -78,7 +78,7 @@ static int __init devt_from_partuuid(const char *uuid_str, dev_t *devt)
* to the partition number found by UUID.
*/
*devt = part_devt(dev_to_disk(dev),
dev_to_bdev(dev)->bd_partno + offset);
bdev_partno(dev_to_bdev(dev)) + offset);
} else {
*devt = dev->devt;
}

View File

@ -411,7 +411,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
elevator_init_mq(disk->queue);
/* Mark bdev as having a submit_bio, if needed */
disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
if (disk->fops->submit_bio)
bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO);
/*
* If the driver provides an explicit major number it also must provide
@ -1064,7 +1065,8 @@ static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
ssize_t part_fail_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
return sprintf(buf, "%d\n",
bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL));
}
ssize_t part_fail_store(struct device *dev,
@ -1073,9 +1075,12 @@ ssize_t part_fail_store(struct device *dev,
{
int i;
if (count > 0 && sscanf(buf, "%d", &i) > 0)
dev_to_bdev(dev)->bd_make_it_fail = i;
if (count > 0 && sscanf(buf, "%d", &i) > 0) {
if (i)
bdev_set_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
else
bdev_clear_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
}
return count;
}

View File

@ -431,7 +431,10 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
if (ret)
return ret;
}
bdev->bd_read_only = n;
if (n)
bdev_set_flag(bdev, BD_READ_ONLY);
else
bdev_clear_flag(bdev, BD_READ_ONLY);
return 0;
}

View File

@ -173,7 +173,7 @@ static struct parsed_partitions *check_partition(struct gendisk *hd)
static ssize_t part_partition_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_partno);
return sprintf(buf, "%d\n", bdev_partno(dev_to_bdev(dev)));
}
static ssize_t part_start_show(struct device *dev,
@ -250,7 +250,7 @@ static int part_uevent(const struct device *dev, struct kobj_uevent_env *env)
{
const struct block_device *part = dev_to_bdev(dev);
add_uevent_var(env, "PARTN=%u", part->bd_partno);
add_uevent_var(env, "PARTN=%u", bdev_partno(part));
if (part->bd_meta_info && part->bd_meta_info->volname[0])
add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname);
return 0;
@ -267,7 +267,7 @@ void drop_partition(struct block_device *part)
{
lockdep_assert_held(&part->bd_disk->open_mutex);
xa_erase(&part->bd_disk->part_tbl, part->bd_partno);
xa_erase(&part->bd_disk->part_tbl, bdev_partno(part));
kobject_put(part->bd_holder_dir);
device_del(&part->bd_device);
@ -338,8 +338,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
pdev->parent = ddev;
/* in consecutive minor range? */
if (bdev->bd_partno < disk->minors) {
devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
if (bdev_partno(bdev) < disk->minors) {
devt = MKDEV(disk->major, disk->first_minor + bdev_partno(bdev));
} else {
err = blk_alloc_ext_minor();
if (err < 0)
@ -404,7 +404,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
rcu_read_lock();
xa_for_each_start(&disk->part_tbl, idx, part, 1) {
if (part->bd_partno != skip_partno &&
if (bdev_partno(part) != skip_partno &&
start < part->bd_start_sect + bdev_nr_sectors(part) &&
start + length > part->bd_start_sect) {
overlap = true;

View File

@ -45,10 +45,15 @@ struct block_device {
struct request_queue * bd_queue;
struct disk_stats __percpu *bd_stats;
unsigned long bd_stamp;
bool bd_read_only; /* read-only policy */
u8 bd_partno;
bool bd_write_holder;
bool bd_has_submit_bio;
atomic_t __bd_flags; // partition number + flags
#define BD_PARTNO 255 // lower 8 bits; assign-once
#define BD_READ_ONLY (1u<<8) // read-only policy
#define BD_WRITE_HOLDER (1u<<9)
#define BD_HAS_SUBMIT_BIO (1u<<10)
#define BD_RO_WARNED (1u<<11)
#ifdef CONFIG_FAIL_MAKE_REQUEST
#define BD_MAKE_IT_FAIL (1u<<12)
#endif
dev_t bd_dev;
struct address_space *bd_mapping; /* page cache */
@ -65,10 +70,6 @@ struct block_device {
struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */
struct partition_meta_info *bd_meta_info;
#ifdef CONFIG_FAIL_MAKE_REQUEST
bool bd_make_it_fail;
#endif
bool bd_ro_warned;
int bd_writers;
/*
* keep this out-of-line as it's both big and not needed in the fast

View File

@ -718,15 +718,35 @@ void invalidate_disk(struct gendisk *disk);
void set_disk_ro(struct gendisk *disk, bool read_only);
void disk_uevent(struct gendisk *disk, enum kobject_action action);
static inline u8 bdev_partno(const struct block_device *bdev)
{
return atomic_read(&bdev->__bd_flags) & BD_PARTNO;
}
static inline bool bdev_test_flag(const struct block_device *bdev, unsigned flag)
{
return atomic_read(&bdev->__bd_flags) & flag;
}
static inline void bdev_set_flag(struct block_device *bdev, unsigned flag)
{
atomic_or(flag, &bdev->__bd_flags);
}
static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag)
{
atomic_andnot(flag, &bdev->__bd_flags);
}
static inline int get_disk_ro(struct gendisk *disk)
{
return disk->part0->bd_read_only ||
return bdev_test_flag(disk->part0, BD_READ_ONLY) ||
test_bit(GD_READ_ONLY, &disk->state);
}
static inline int bdev_read_only(struct block_device *bdev)
{
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk);
}
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
@ -1086,7 +1106,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
static inline bool bdev_is_partition(struct block_device *bdev)
{
return bdev->bd_partno;
return bdev_partno(bdev) != 0;
}
enum blk_default_limits {

View File

@ -59,7 +59,7 @@ static inline void part_stat_set_all(struct block_device *part, int value)
#define part_stat_add(part, field, addnd) do { \
__part_stat_add((part), field, addnd); \
if ((part)->bd_partno) \
if (bdev_is_partition(part)) \
__part_stat_add(bdev_whole(part), field, addnd); \
} while (0)

View File

@ -966,13 +966,13 @@ char *bdev_name(char *buf, char *end, struct block_device *bdev,
hd = bdev->bd_disk;
buf = string(buf, end, hd->disk_name, spec);
if (bdev->bd_partno) {
if (bdev_is_partition(bdev)) {
if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) {
if (buf < end)
*buf = 'p';
buf++;
}
buf = number(buf, end, bdev->bd_partno, spec);
buf = number(buf, end, bdev_partno(bdev), spec);
}
return buf;
}