bd4a633b6f
Move the nonrot flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Use the chance to switch to defaulting to non-rotational and require the driver to opt into rotational, which matches the polarity of the sysfs interface. For the z2ram, ps3vram, 2x memstick, ubiblock and dcssblk the new rotational flag is not set as they clearly are not rotational despite this being a behavior change. There are some other drivers that unconditionally set the rotational flag to keep the existing behavior as they arguably can be used on rotational devices even if that is probably not their main use today (e.g. virtio_blk and drbd). The flag is automatically inherited in blk_stack_limits matching the existing behavior in dm and md. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Hannes Reinecke <hare@suse.de> Link: https://lore.kernel.org/r/20240617060532.127975-15-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk>
455 lines
11 KiB
C
455 lines
11 KiB
C
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
|
|
/*
|
|
* aoeblk.c
|
|
* block device routines
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/hdreg.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/ioctl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/ratelimit.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/export.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/debugfs.h>
|
|
#include <scsi/sg.h>
|
|
#include "aoe.h"
|
|
|
|
static DEFINE_MUTEX(aoeblk_mutex);
|
|
static struct kmem_cache *buf_pool_cache;
|
|
static struct dentry *aoe_debugfs_dir;
|
|
|
|
/* random default picked from the historic block max_sectors cap */
|
|
static int aoe_maxsectors = 2560;
|
|
module_param(aoe_maxsectors, int, 0644);
|
|
MODULE_PARM_DESC(aoe_maxsectors,
|
|
"When nonzero, set the maximum number of sectors per I/O request");
|
|
|
|
static ssize_t aoedisk_show_state(struct device *dev,
|
|
struct device_attribute *attr, char *page)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
struct aoedev *d = disk->private_data;
|
|
|
|
return sysfs_emit(page, "%s%s\n",
|
|
(d->flags & DEVFL_UP) ? "up" : "down",
|
|
(d->flags & DEVFL_KICKME) ? ",kickme" :
|
|
(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
|
|
/* I'd rather see nopen exported so we can ditch closewait */
|
|
}
|
|
static ssize_t aoedisk_show_mac(struct device *dev,
|
|
struct device_attribute *attr, char *page)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
struct aoedev *d = disk->private_data;
|
|
struct aoetgt *t = d->targets[0];
|
|
|
|
if (t == NULL)
|
|
return sysfs_emit(page, "none\n");
|
|
return sysfs_emit(page, "%pm\n", t->addr);
|
|
}
|
|
static ssize_t aoedisk_show_netif(struct device *dev,
|
|
struct device_attribute *attr, char *page)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
struct aoedev *d = disk->private_data;
|
|
struct net_device *nds[8], **nd, **nnd, **ne;
|
|
struct aoetgt **t, **te;
|
|
struct aoeif *ifp, *e;
|
|
char *p;
|
|
|
|
memset(nds, 0, sizeof nds);
|
|
nd = nds;
|
|
ne = nd + ARRAY_SIZE(nds);
|
|
t = d->targets;
|
|
te = t + d->ntargets;
|
|
for (; t < te && *t; t++) {
|
|
ifp = (*t)->ifs;
|
|
e = ifp + NAOEIFS;
|
|
for (; ifp < e && ifp->nd; ifp++) {
|
|
for (nnd = nds; nnd < nd; nnd++)
|
|
if (*nnd == ifp->nd)
|
|
break;
|
|
if (nnd == nd && nd != ne)
|
|
*nd++ = ifp->nd;
|
|
}
|
|
}
|
|
|
|
ne = nd;
|
|
nd = nds;
|
|
if (*nd == NULL)
|
|
return sysfs_emit(page, "none\n");
|
|
for (p = page; nd < ne; nd++)
|
|
p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
|
|
p == page ? "" : ",", (*nd)->name);
|
|
p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
|
|
return p-page;
|
|
}
|
|
/* firmware version */
|
|
static ssize_t aoedisk_show_fwver(struct device *dev,
|
|
struct device_attribute *attr, char *page)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
struct aoedev *d = disk->private_data;
|
|
|
|
return sysfs_emit(page, "0x%04x\n", (unsigned int) d->fw_ver);
|
|
}
|
|
static ssize_t aoedisk_show_payload(struct device *dev,
|
|
struct device_attribute *attr, char *page)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
struct aoedev *d = disk->private_data;
|
|
|
|
return sysfs_emit(page, "%lu\n", d->maxbcnt);
|
|
}
|
|
|
|
static int aoe_debugfs_show(struct seq_file *s, void *ignored)
|
|
{
|
|
struct aoedev *d;
|
|
struct aoetgt **t, **te;
|
|
struct aoeif *ifp, *ife;
|
|
unsigned long flags;
|
|
char c;
|
|
|
|
d = s->private;
|
|
seq_printf(s, "rttavg: %d rttdev: %d\n",
|
|
d->rttavg >> RTTSCALE,
|
|
d->rttdev >> RTTDSCALE);
|
|
seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool));
|
|
seq_printf(s, "kicked: %ld\n", d->kicked);
|
|
seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt);
|
|
seq_printf(s, "ref: %ld\n", d->ref);
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
t = d->targets;
|
|
te = t + d->ntargets;
|
|
for (; t < te && *t; t++) {
|
|
c = '\t';
|
|
seq_printf(s, "falloc: %ld\n", (*t)->falloc);
|
|
seq_printf(s, "ffree: %p\n",
|
|
list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next);
|
|
seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout,
|
|
(*t)->maxout, (*t)->nframes);
|
|
seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh);
|
|
seq_printf(s, "\ttaint:%d\n", (*t)->taint);
|
|
seq_printf(s, "\tr:%d\n", (*t)->rpkts);
|
|
seq_printf(s, "\tw:%d\n", (*t)->wpkts);
|
|
ifp = (*t)->ifs;
|
|
ife = ifp + ARRAY_SIZE((*t)->ifs);
|
|
for (; ifp->nd && ifp < ife; ifp++) {
|
|
seq_printf(s, "%c%s", c, ifp->nd->name);
|
|
c = ',';
|
|
}
|
|
seq_puts(s, "\n");
|
|
}
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
|
|
return 0;
|
|
}
|
|
DEFINE_SHOW_ATTRIBUTE(aoe_debugfs);
|
|
|
|
static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
|
|
static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
|
|
static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
|
|
static struct device_attribute dev_attr_firmware_version = {
|
|
.attr = { .name = "firmware-version", .mode = 0444 },
|
|
.show = aoedisk_show_fwver,
|
|
};
|
|
static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
|
|
|
|
static struct attribute *aoe_attrs[] = {
|
|
&dev_attr_state.attr,
|
|
&dev_attr_mac.attr,
|
|
&dev_attr_netif.attr,
|
|
&dev_attr_firmware_version.attr,
|
|
&dev_attr_payload.attr,
|
|
NULL,
|
|
};
|
|
|
|
static const struct attribute_group aoe_attr_group = {
|
|
.attrs = aoe_attrs,
|
|
};
|
|
|
|
static const struct attribute_group *aoe_attr_groups[] = {
|
|
&aoe_attr_group,
|
|
NULL,
|
|
};
|
|
|
|
static void
|
|
aoedisk_add_debugfs(struct aoedev *d)
|
|
{
|
|
char *p;
|
|
|
|
if (aoe_debugfs_dir == NULL)
|
|
return;
|
|
p = strchr(d->gd->disk_name, '/');
|
|
if (p == NULL)
|
|
p = d->gd->disk_name;
|
|
else
|
|
p++;
|
|
BUG_ON(*p == '\0');
|
|
d->debugfs = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
|
|
&aoe_debugfs_fops);
|
|
}
|
|
void
|
|
aoedisk_rm_debugfs(struct aoedev *d)
|
|
{
|
|
debugfs_remove(d->debugfs);
|
|
d->debugfs = NULL;
|
|
}
|
|
|
|
static int
|
|
aoeblk_open(struct gendisk *disk, blk_mode_t mode)
|
|
{
|
|
struct aoedev *d = disk->private_data;
|
|
ulong flags;
|
|
|
|
if (!virt_addr_valid(d)) {
|
|
pr_crit("aoe: invalid device pointer in %s\n",
|
|
__func__);
|
|
WARN_ON(1);
|
|
return -ENODEV;
|
|
}
|
|
if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
|
|
return -ENODEV;
|
|
|
|
mutex_lock(&aoeblk_mutex);
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
|
|
d->nopen++;
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
mutex_unlock(&aoeblk_mutex);
|
|
return 0;
|
|
}
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
mutex_unlock(&aoeblk_mutex);
|
|
return -ENODEV;
|
|
}
|
|
|
|
static void
|
|
aoeblk_release(struct gendisk *disk)
|
|
{
|
|
struct aoedev *d = disk->private_data;
|
|
ulong flags;
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
|
|
if (--d->nopen == 0) {
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
aoecmd_cfg(d->aoemajor, d->aoeminor);
|
|
return;
|
|
}
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
}
|
|
|
|
static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
const struct blk_mq_queue_data *bd)
|
|
{
|
|
struct aoedev *d = hctx->queue->queuedata;
|
|
|
|
spin_lock_irq(&d->lock);
|
|
|
|
if ((d->flags & DEVFL_UP) == 0) {
|
|
pr_info_ratelimited("aoe: device %ld.%d is not up\n",
|
|
d->aoemajor, d->aoeminor);
|
|
spin_unlock_irq(&d->lock);
|
|
blk_mq_start_request(bd->rq);
|
|
return BLK_STS_IOERR;
|
|
}
|
|
|
|
list_add_tail(&bd->rq->queuelist, &d->rq_list);
|
|
aoecmd_work(d);
|
|
spin_unlock_irq(&d->lock);
|
|
return BLK_STS_OK;
|
|
}
|
|
|
|
static int
|
|
aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
|
{
|
|
struct aoedev *d = bdev->bd_disk->private_data;
|
|
|
|
if ((d->flags & DEVFL_UP) == 0) {
|
|
printk(KERN_ERR "aoe: disk not up\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
geo->cylinders = d->geo.cylinders;
|
|
geo->heads = d->geo.heads;
|
|
geo->sectors = d->geo.sectors;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
aoeblk_ioctl(struct block_device *bdev, blk_mode_t mode, uint cmd, ulong arg)
|
|
{
|
|
struct aoedev *d;
|
|
|
|
if (!arg)
|
|
return -EINVAL;
|
|
|
|
d = bdev->bd_disk->private_data;
|
|
if ((d->flags & DEVFL_UP) == 0) {
|
|
pr_err("aoe: disk not up\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (cmd == HDIO_GET_IDENTITY) {
|
|
if (!copy_to_user((void __user *) arg, &d->ident,
|
|
sizeof(d->ident)))
|
|
return 0;
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* udev calls scsi_id, which uses SG_IO, resulting in noise */
|
|
if (cmd != SG_IO)
|
|
pr_info("aoe: unknown ioctl 0x%x\n", cmd);
|
|
|
|
return -ENOTTY;
|
|
}
|
|
|
|
static const struct block_device_operations aoe_bdops = {
|
|
.open = aoeblk_open,
|
|
.release = aoeblk_release,
|
|
.ioctl = aoeblk_ioctl,
|
|
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
|
.getgeo = aoeblk_getgeo,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
static const struct blk_mq_ops aoeblk_mq_ops = {
|
|
.queue_rq = aoeblk_queue_rq,
|
|
};
|
|
|
|
/* blk_mq_alloc_disk and add_disk can sleep */
|
|
void
|
|
aoeblk_gdalloc(void *vp)
|
|
{
|
|
struct aoedev *d = vp;
|
|
struct gendisk *gd;
|
|
mempool_t *mp;
|
|
struct blk_mq_tag_set *set;
|
|
sector_t ssize;
|
|
struct queue_limits lim = {
|
|
.max_hw_sectors = aoe_maxsectors,
|
|
.io_opt = SZ_2M,
|
|
.features = BLK_FEAT_ROTATIONAL,
|
|
};
|
|
ulong flags;
|
|
int late = 0;
|
|
int err;
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
if (d->flags & DEVFL_GDALLOC
|
|
&& !(d->flags & DEVFL_TKILL)
|
|
&& !(d->flags & DEVFL_GD_NOW))
|
|
d->flags |= DEVFL_GD_NOW;
|
|
else
|
|
late = 1;
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
if (late)
|
|
return;
|
|
|
|
mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab,
|
|
buf_pool_cache);
|
|
if (mp == NULL) {
|
|
printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
|
|
d->aoemajor, d->aoeminor);
|
|
goto err;
|
|
}
|
|
|
|
set = &d->tag_set;
|
|
set->ops = &aoeblk_mq_ops;
|
|
set->cmd_size = sizeof(struct aoe_req);
|
|
set->nr_hw_queues = 1;
|
|
set->queue_depth = 128;
|
|
set->numa_node = NUMA_NO_NODE;
|
|
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
|
err = blk_mq_alloc_tag_set(set);
|
|
if (err) {
|
|
pr_err("aoe: cannot allocate tag set for %ld.%d\n",
|
|
d->aoemajor, d->aoeminor);
|
|
goto err_mempool;
|
|
}
|
|
|
|
gd = blk_mq_alloc_disk(set, &lim, d);
|
|
if (IS_ERR(gd)) {
|
|
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
|
|
d->aoemajor, d->aoeminor);
|
|
goto err_tagset;
|
|
}
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
WARN_ON(!(d->flags & DEVFL_GD_NOW));
|
|
WARN_ON(!(d->flags & DEVFL_GDALLOC));
|
|
WARN_ON(d->flags & DEVFL_TKILL);
|
|
WARN_ON(d->gd);
|
|
WARN_ON(d->flags & DEVFL_UP);
|
|
d->bufpool = mp;
|
|
d->blkq = gd->queue;
|
|
d->gd = gd;
|
|
gd->major = AOE_MAJOR;
|
|
gd->first_minor = d->sysminor;
|
|
gd->minors = AOE_PARTITIONS;
|
|
gd->fops = &aoe_bdops;
|
|
gd->private_data = d;
|
|
ssize = d->ssize;
|
|
snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
|
|
d->aoemajor, d->aoeminor);
|
|
|
|
d->flags &= ~DEVFL_GDALLOC;
|
|
d->flags |= DEVFL_UP;
|
|
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
|
|
set_capacity(gd, ssize);
|
|
|
|
err = device_add_disk(NULL, gd, aoe_attr_groups);
|
|
if (err)
|
|
goto out_disk_cleanup;
|
|
aoedisk_add_debugfs(d);
|
|
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
WARN_ON(!(d->flags & DEVFL_GD_NOW));
|
|
d->flags &= ~DEVFL_GD_NOW;
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
return;
|
|
|
|
out_disk_cleanup:
|
|
put_disk(gd);
|
|
err_tagset:
|
|
blk_mq_free_tag_set(set);
|
|
err_mempool:
|
|
mempool_destroy(mp);
|
|
err:
|
|
spin_lock_irqsave(&d->lock, flags);
|
|
d->flags &= ~DEVFL_GD_NOW;
|
|
queue_work(aoe_wq, &d->work);
|
|
spin_unlock_irqrestore(&d->lock, flags);
|
|
}
|
|
|
|
void
|
|
aoeblk_exit(void)
|
|
{
|
|
debugfs_remove_recursive(aoe_debugfs_dir);
|
|
aoe_debugfs_dir = NULL;
|
|
kmem_cache_destroy(buf_pool_cache);
|
|
}
|
|
|
|
int __init
|
|
aoeblk_init(void)
|
|
{
|
|
buf_pool_cache = kmem_cache_create("aoe_bufs",
|
|
sizeof(struct buf),
|
|
0, 0, NULL);
|
|
if (buf_pool_cache == NULL)
|
|
return -ENOMEM;
|
|
aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
|
|
return 0;
|
|
}
|
|
|