2a07bb64d8
This reverts commitbc792884b7
("s390/dasd: Establish DMA alignment"). Quoting the original commit: linux-next commitbf8d08532b
("iomap: add support for dma aligned direct-io") changes the alignment requirement to come from the block device rather than the block size, and the default alignment requirement is 512-byte boundaries. Since DASD I/O has page alignments for IDAW/TIDAW requests, let's override this value to restore the expected behavior. I mentioned TIDAW, but that was wrong. TIDAWs have no distinct alignment requirement (per p. 15-70 of POPS SA22-7832-13): Unless otherwise specified, TIDAWs may designate a block of main storage on any boundary and length up to 4K bytes, provided the specified block does not cross a 4 K-byte boundary. IDAWs do, but the original commit neglected that while ECKD DASD are typically formatted in 4096-byte blocks, they don't HAVE to be. Formatting an ECKD volume with smaller blocks is permitted (dasdfmt -b xxx), and the problematic commit enforces alignment properties to such a device that will result in errors, such as: [test@host ~]# lsdasd -l a367 | grep blksz blksz: 512 [test@host ~]# mkfs.xfs -f /dev/disk/by-path/ccw-0.0.a367-part1 meta-data=/dev/dasdc1 isize=512 agcount=4, agsize=230075 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=1 = reflink=1 bigtime=1 inobtcount=1 nrext64=1 data = bsize=4096 blocks=920299, imaxpct=25 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=16384, version=2 = sectsz=512 sunit=0 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 error reading existing superblock: Invalid argument mkfs.xfs: pwrite failed: Invalid argument libxfs_bwrite: write failed on (unknown) bno 0x70565c/0x100, err=22 mkfs.xfs: Releasing dirty buffer to free list! found dirty buffer (bulk) on free list! mkfs.xfs: pwrite failed: Invalid argument ...snipped... The original commit omitted the FBA discipline for just this reason, but the formatted block size of the other disciplines was overlooked. The solution to all of this is to revert to the original behavior, such that the block size can be respected. There were two commits [1] that moved this code in the interim, so a straight git-revert is not possible, but the change is straightforward. But what of the original problem? That was manifested with a direct-io QEMU guest, where QEMU itself was changed a month or two later with commit 25474d90aa ("block: use the request length for iov alignment") such that the blamed kernel commit is unnecessary. [1] commit0127a47f58
("dasd: move queue setup to common code") commitfde07a4d74
("dasd: use the atomic queue limits API") Fixes:bc792884b7
("s390/dasd: Establish DMA alignment") Reviewed-by: Stefan Haberland <sth@linux.ibm.com> Signed-off-by: Eric Farman <farman@linux.ibm.com> Signed-off-by: Stefan Haberland <sth@linux.ibm.com> Link: https://lore.kernel.org/r/20240812125733.126431-2-sth@linux.ibm.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
207 lines
5.4 KiB
C
207 lines
5.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
|
|
* Horst Hummel <Horst.Hummel@de.ibm.com>
|
|
* Carsten Otte <Cotte@de.ibm.com>
|
|
* Martin Schwidefsky <schwidefsky@de.ibm.com>
|
|
* Bugreports.to..: <Linux390@de.ibm.com>
|
|
* Copyright IBM Corp. 1999, 2001
|
|
*
|
|
* gendisk related functions for the dasd driver.
|
|
*
|
|
*/
|
|
|
|
#include <linux/interrupt.h>
|
|
#include <linux/major.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/blkpg.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include "dasd_int.h"
|
|
|
|
static unsigned int queue_depth = 32;
|
|
static unsigned int nr_hw_queues = 4;
|
|
|
|
module_param(queue_depth, uint, 0444);
|
|
MODULE_PARM_DESC(queue_depth, "Default queue depth for new DASD devices");
|
|
|
|
module_param(nr_hw_queues, uint, 0444);
|
|
MODULE_PARM_DESC(nr_hw_queues, "Default number of hardware queues for new DASD devices");
|
|
|
|
/*
|
|
* Allocate and register gendisk structure for device.
|
|
*/
|
|
int dasd_gendisk_alloc(struct dasd_block *block)
|
|
{
|
|
struct queue_limits lim = {
|
|
/*
|
|
* With page sized segments, each segment can be translated into
|
|
* one idaw/tidaw.
|
|
*/
|
|
.max_segment_size = PAGE_SIZE,
|
|
.seg_boundary_mask = PAGE_SIZE - 1,
|
|
.max_segments = USHRT_MAX,
|
|
};
|
|
struct gendisk *gdp;
|
|
struct dasd_device *base;
|
|
int len, rc;
|
|
|
|
/* Make sure the minor for this device exists. */
|
|
base = block->base;
|
|
if (base->devindex >= DASD_PER_MAJOR)
|
|
return -EBUSY;
|
|
|
|
block->tag_set.ops = &dasd_mq_ops;
|
|
block->tag_set.cmd_size = sizeof(struct dasd_ccw_req);
|
|
block->tag_set.nr_hw_queues = nr_hw_queues;
|
|
block->tag_set.queue_depth = queue_depth;
|
|
block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
|
block->tag_set.numa_node = NUMA_NO_NODE;
|
|
rc = blk_mq_alloc_tag_set(&block->tag_set);
|
|
if (rc)
|
|
return rc;
|
|
|
|
gdp = blk_mq_alloc_disk(&block->tag_set, &lim, block);
|
|
if (IS_ERR(gdp)) {
|
|
blk_mq_free_tag_set(&block->tag_set);
|
|
return PTR_ERR(gdp);
|
|
}
|
|
|
|
/* Initialize gendisk structure. */
|
|
gdp->major = DASD_MAJOR;
|
|
gdp->first_minor = base->devindex << DASD_PARTN_BITS;
|
|
gdp->minors = 1 << DASD_PARTN_BITS;
|
|
gdp->fops = &dasd_device_operations;
|
|
|
|
/*
|
|
* Set device name.
|
|
* dasda - dasdz : 26 devices
|
|
* dasdaa - dasdzz : 676 devices, added up = 702
|
|
* dasdaaa - dasdzzz : 17576 devices, added up = 18278
|
|
* dasdaaaa - dasdzzzz : 456976 devices, added up = 475252
|
|
*/
|
|
len = sprintf(gdp->disk_name, "dasd");
|
|
if (base->devindex > 25) {
|
|
if (base->devindex > 701) {
|
|
if (base->devindex > 18277)
|
|
len += sprintf(gdp->disk_name + len, "%c",
|
|
'a'+(((base->devindex-18278)
|
|
/17576)%26));
|
|
len += sprintf(gdp->disk_name + len, "%c",
|
|
'a'+(((base->devindex-702)/676)%26));
|
|
}
|
|
len += sprintf(gdp->disk_name + len, "%c",
|
|
'a'+(((base->devindex-26)/26)%26));
|
|
}
|
|
len += sprintf(gdp->disk_name + len, "%c", 'a'+(base->devindex%26));
|
|
|
|
if (base->features & DASD_FEATURE_READONLY ||
|
|
test_bit(DASD_FLAG_DEVICE_RO, &base->flags))
|
|
set_disk_ro(gdp, 1);
|
|
dasd_add_link_to_gendisk(gdp, base);
|
|
block->gdp = gdp;
|
|
set_capacity(block->gdp, 0);
|
|
|
|
rc = device_add_disk(&base->cdev->dev, block->gdp, NULL);
|
|
if (rc) {
|
|
dasd_gendisk_free(block);
|
|
return rc;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Unregister and free gendisk structure for device.
|
|
*/
|
|
void dasd_gendisk_free(struct dasd_block *block)
|
|
{
|
|
if (block->gdp) {
|
|
del_gendisk(block->gdp);
|
|
block->gdp->private_data = NULL;
|
|
put_disk(block->gdp);
|
|
block->gdp = NULL;
|
|
blk_mq_free_tag_set(&block->tag_set);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Trigger a partition detection.
|
|
*/
|
|
int dasd_scan_partitions(struct dasd_block *block)
|
|
{
|
|
struct file *bdev_file;
|
|
int rc;
|
|
|
|
bdev_file = bdev_file_open_by_dev(disk_devt(block->gdp), BLK_OPEN_READ,
|
|
NULL, NULL);
|
|
if (IS_ERR(bdev_file)) {
|
|
DBF_DEV_EVENT(DBF_ERR, block->base,
|
|
"scan partitions error, blkdev_get returned %ld",
|
|
PTR_ERR(bdev_file));
|
|
return -ENODEV;
|
|
}
|
|
|
|
mutex_lock(&block->gdp->open_mutex);
|
|
rc = bdev_disk_changed(block->gdp, false);
|
|
mutex_unlock(&block->gdp->open_mutex);
|
|
if (rc)
|
|
DBF_DEV_EVENT(DBF_ERR, block->base,
|
|
"scan partitions error, rc %d", rc);
|
|
|
|
/*
|
|
* Since the matching fput() call to the
|
|
* bdev_file_open_by_path() in this function is not called before
|
|
* dasd_destroy_partitions the offline open_count limit needs to be
|
|
* increased from 0 to 1. This is done by setting device->bdev_file
|
|
* (see dasd_generic_set_offline). As long as the partition detection
|
|
* is running no offline should be allowed. That is why the assignment
|
|
* to block->bdev_file is done AFTER the BLKRRPART ioctl.
|
|
*/
|
|
block->bdev_file = bdev_file;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Remove all inodes in the system for a device, delete the
|
|
* partitions and make device unusable by setting its size to zero.
|
|
*/
|
|
void dasd_destroy_partitions(struct dasd_block *block)
|
|
{
|
|
struct file *bdev_file;
|
|
|
|
/*
|
|
* Get the bdev_file pointer from the device structure and clear
|
|
* device->bdev_file to lower the offline open_count limit again.
|
|
*/
|
|
bdev_file = block->bdev_file;
|
|
block->bdev_file = NULL;
|
|
|
|
mutex_lock(&file_bdev(bdev_file)->bd_disk->open_mutex);
|
|
bdev_disk_changed(file_bdev(bdev_file)->bd_disk, true);
|
|
mutex_unlock(&file_bdev(bdev_file)->bd_disk->open_mutex);
|
|
|
|
/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
|
|
fput(bdev_file);
|
|
}
|
|
|
|
int dasd_gendisk_init(void)
|
|
{
|
|
int rc;
|
|
|
|
/* Register to static dasd major 94 */
|
|
rc = register_blkdev(DASD_MAJOR, "dasd");
|
|
if (rc != 0) {
|
|
pr_warn("Registering the device driver with major number %d failed\n",
|
|
DASD_MAJOR);
|
|
return rc;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void dasd_gendisk_exit(void)
|
|
{
|
|
unregister_blkdev(DASD_MAJOR, "dasd");
|
|
}
|