Some md fixes for 4.2
Several are tagged for -stable. A few aren't because they are not very, serious or because they are in the 'experimental' cluster code. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJVsbRgAAoJEDnsnt1WYoG54BcQAJlVxcGdMXNAbFAfhToH+cwY DcCKmnXiu3TCcK/gtr0SlUKIhv7kPE2HaGbTko3g5/uTk7SCuMSWRbjpQJr1u98U 9VUPZV0RGLEcQXgsjG3sobEtdSYMSn1/BpdJpmsn/Q6cyTheiEJA9fEghn9F0Iw9 3Ctc56aL0nKsnpeRTvWmKR3F995L2Ene+pIHPbSqTlQcGU+DdxQL/iY9YspMzeih 6SWQICo59+pGSRQdKaU968nZ1a8hJCvhV2NbW0JsJMo9iGo5htCJLdxZatscZV6E xAppCRymW/Nl0n59wCOBhUp+0skVhtYZ1UmNdx/vUA7vcZJX85vIG7WGaaSQAmlF Y4nNMSaX6SWbt/oVgq+JiD39T1oFZN5N5Fh9juqcp3fshiWLO74cnTwea1LtkQZX LfW2HhajDUgoJqoXL6ppKDK8l7alQdcaoYn0C6SfqRZ+PLB5ERrSBHNZpKDbI9aw CFW93lHTtlwtwZn0S7k06mCG8ilO4iPthUVaJEeI1kUWVKY0Ju4xnVTt/7jCroUa Km14KdWeZsS8j9/xr6FYBjarjuh7M9SoflgUK84txE+PIZsSczyrErpBkMf2YBWQ 0KduqQabXa1XYWGtZ7Uhj6iOOGNBcTcZsww8cBEDOJEDyCtSs5w/iPmsFCEGUuo2 YTz6qKpYPgB1VzK2PoBG =6ZCK -----END PGP SIGNATURE----- Merge tag 'md/4.2-fixes' of git://neil.brown.name/md Pull md fixes from Neil Brown: "Some md fixes for 4.2 Several are tagged for -stable. A few aren't because they are not very, serious or because they are in the 'experimental' cluster code" * tag 'md/4.2-fixes' of git://neil.brown.name/md: md/raid5: clear R5_NeedReplace when no longer needed. Fix read-balancing during node failure md-cluster: fix bitmap sub-offset in bitmap_read_sb md: Return error if request_module fails and returns positive value md: Skip cluster setup in case of error while reading bitmap md/raid1: fix test for 'was read error from last working device'. md: Skip cluster setup for dm-raid md: flush ->event_work before stopping array. md/raid10: always set reshape_safe when initializing reshape_position. md/raid5: avoid races when changing cache size.
This commit is contained in:
commit
aca105a697
@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
bitmap_super_t *sb;
|
||||
unsigned long chunksize, daemon_sleep, write_behind;
|
||||
|
||||
bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
|
||||
bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (bitmap->storage.sb_page == NULL)
|
||||
return -ENOMEM;
|
||||
bitmap->storage.sb_page->index = 0;
|
||||
@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
|
||||
sb->state = cpu_to_le32(bitmap->flags);
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
|
||||
bitmap->mddev->bitmap_info.nodes = 0;
|
||||
|
||||
kunmap_atomic(sb);
|
||||
|
||||
@ -558,6 +559,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
|
||||
unsigned long sectors_reserved = 0;
|
||||
int err = -EINVAL;
|
||||
struct page *sb_page;
|
||||
loff_t offset = bitmap->mddev->bitmap_info.offset;
|
||||
|
||||
if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
|
||||
chunksize = 128 * 1024 * 1024;
|
||||
@ -584,9 +586,9 @@ re_read:
|
||||
bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
|
||||
/* to 4k blocks */
|
||||
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
|
||||
bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3);
|
||||
offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
|
||||
pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
|
||||
bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset);
|
||||
bitmap->cluster_slot, offset);
|
||||
}
|
||||
|
||||
if (bitmap->storage.file) {
|
||||
@ -597,7 +599,7 @@ re_read:
|
||||
bitmap, bytes, sb_page);
|
||||
} else {
|
||||
err = read_sb_page(bitmap->mddev,
|
||||
bitmap->mddev->bitmap_info.offset,
|
||||
offset,
|
||||
sb_page,
|
||||
0, sizeof(bitmap_super_t));
|
||||
}
|
||||
@ -611,8 +613,16 @@ re_read:
|
||||
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
|
||||
write_behind = le32_to_cpu(sb->write_behind);
|
||||
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
|
||||
nodes = le32_to_cpu(sb->nodes);
|
||||
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
|
||||
/* XXX: This is a hack to ensure that we don't use clustering
|
||||
* in case:
|
||||
* - dm-raid is in use and
|
||||
* - the nodes written in bitmap_sb is erroneous.
|
||||
*/
|
||||
if (!bitmap->mddev->sync_super) {
|
||||
nodes = le32_to_cpu(sb->nodes);
|
||||
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
|
||||
sb->cluster_name, 64);
|
||||
}
|
||||
|
||||
/* verify that the bitmap-specific fields are valid */
|
||||
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
|
||||
@ -671,7 +681,7 @@ out:
|
||||
kunmap_atomic(sb);
|
||||
/* Assiging chunksize is required for "re_read" */
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
if (nodes && (bitmap->cluster_slot < 0)) {
|
||||
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
|
||||
err = md_setup_cluster(bitmap->mddev, nodes);
|
||||
if (err) {
|
||||
pr_err("%s: Could not setup cluster service (%d)\n",
|
||||
@ -1866,10 +1876,6 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
|
||||
if (IS_ERR(bitmap))
|
||||
return PTR_ERR(bitmap);
|
||||
|
||||
rv = bitmap_read_sb(bitmap);
|
||||
if (rv)
|
||||
goto err;
|
||||
|
||||
rv = bitmap_init_from_disk(bitmap, 0);
|
||||
if (rv)
|
||||
goto err;
|
||||
|
@ -44,6 +44,7 @@ struct resync_info {
|
||||
|
||||
/* md_cluster_info flags */
|
||||
#define MD_CLUSTER_WAITING_FOR_NEWDISK 1
|
||||
#define MD_CLUSTER_SUSPEND_READ_BALANCING 2
|
||||
|
||||
|
||||
struct md_cluster_info {
|
||||
@ -275,6 +276,9 @@ clear_bit:
|
||||
|
||||
static void recover_prep(void *arg)
|
||||
{
|
||||
struct mddev *mddev = arg;
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
|
||||
}
|
||||
|
||||
static void recover_slot(void *arg, struct dlm_slot *slot)
|
||||
@ -307,6 +311,7 @@ static void recover_done(void *arg, struct dlm_slot *slots,
|
||||
|
||||
cinfo->slot_number = our_slot;
|
||||
complete(&cinfo->completion);
|
||||
clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
|
||||
}
|
||||
|
||||
static const struct dlm_lockspace_ops md_ls_ops = {
|
||||
@ -816,12 +821,17 @@ static void resync_finish(struct mddev *mddev)
|
||||
resync_send(mddev, RESYNCING, 0, 0);
|
||||
}
|
||||
|
||||
static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi)
|
||||
static int area_resyncing(struct mddev *mddev, int direction,
|
||||
sector_t lo, sector_t hi)
|
||||
{
|
||||
struct md_cluster_info *cinfo = mddev->cluster_info;
|
||||
int ret = 0;
|
||||
struct suspend_info *s;
|
||||
|
||||
if ((direction == READ) &&
|
||||
test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
|
||||
return 1;
|
||||
|
||||
spin_lock_irq(&cinfo->suspend_lock);
|
||||
if (list_empty(&cinfo->suspend_list))
|
||||
goto out;
|
||||
|
@ -18,7 +18,7 @@ struct md_cluster_operations {
|
||||
int (*metadata_update_start)(struct mddev *mddev);
|
||||
int (*metadata_update_finish)(struct mddev *mddev);
|
||||
int (*metadata_update_cancel)(struct mddev *mddev);
|
||||
int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi);
|
||||
int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
|
||||
int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
|
||||
int (*add_new_disk_finish)(struct mddev *mddev);
|
||||
int (*new_disk_ack)(struct mddev *mddev, bool ack);
|
||||
|
@ -5382,6 +5382,8 @@ static void __md_stop(struct mddev *mddev)
|
||||
{
|
||||
struct md_personality *pers = mddev->pers;
|
||||
mddev_detach(mddev);
|
||||
/* Ensure ->event_work is done */
|
||||
flush_workqueue(md_misc_wq);
|
||||
spin_lock(&mddev->lock);
|
||||
mddev->ready = 0;
|
||||
mddev->pers = NULL;
|
||||
@ -7437,7 +7439,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
|
||||
err = request_module("md-cluster");
|
||||
if (err) {
|
||||
pr_err("md-cluster module not found.\n");
|
||||
return err;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
spin_lock(&pers_lock);
|
||||
|
@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error)
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (r1_bio->mddev->degraded == conf->raid_disks ||
|
||||
(r1_bio->mddev->degraded == conf->raid_disks-1 &&
|
||||
!test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
|
||||
test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
|
||||
uptodate = 1;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
@ -541,7 +541,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||
|
||||
if ((conf->mddev->recovery_cp < this_sector + sectors) ||
|
||||
(mddev_is_clustered(conf->mddev) &&
|
||||
md_cluster_ops->area_resyncing(conf->mddev, this_sector,
|
||||
md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
|
||||
this_sector + sectors)))
|
||||
choose_first = 1;
|
||||
else
|
||||
@ -1111,7 +1111,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
((bio_end_sector(bio) > mddev->suspend_lo &&
|
||||
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
|
||||
(mddev_is_clustered(mddev) &&
|
||||
md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
|
||||
md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
|
||||
/* As the suspend_* range is controlled by
|
||||
* userspace, we want an interruptible
|
||||
* wait.
|
||||
@ -1124,7 +1125,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
|
||||
if (bio_end_sector(bio) <= mddev->suspend_lo ||
|
||||
bio->bi_iter.bi_sector >= mddev->suspend_hi ||
|
||||
(mddev_is_clustered(mddev) &&
|
||||
!md_cluster_ops->area_resyncing(mddev,
|
||||
!md_cluster_ops->area_resyncing(mddev, WRITE,
|
||||
bio->bi_iter.bi_sector, bio_end_sector(bio))))
|
||||
break;
|
||||
schedule();
|
||||
|
@ -3556,6 +3556,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
|
||||
/* far_copies must be 1 */
|
||||
conf->prev.stride = conf->dev_sectors;
|
||||
}
|
||||
conf->reshape_safe = conf->reshape_progress;
|
||||
spin_lock_init(&conf->device_lock);
|
||||
INIT_LIST_HEAD(&conf->retry_list);
|
||||
|
||||
@ -3760,7 +3761,6 @@ static int run(struct mddev *mddev)
|
||||
}
|
||||
conf->offset_diff = min_offset_diff;
|
||||
|
||||
conf->reshape_safe = conf->reshape_progress;
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
@ -4103,6 +4103,7 @@ static int raid10_start_reshape(struct mddev *mddev)
|
||||
conf->reshape_progress = size;
|
||||
} else
|
||||
conf->reshape_progress = 0;
|
||||
conf->reshape_safe = conf->reshape_progress;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
if (mddev->delta_disks && mddev->bitmap) {
|
||||
@ -4170,6 +4171,7 @@ abort:
|
||||
rdev->new_data_offset = rdev->data_offset;
|
||||
smp_wmb();
|
||||
conf->reshape_progress = MaxSector;
|
||||
conf->reshape_safe = MaxSector;
|
||||
mddev->reshape_position = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
return ret;
|
||||
@ -4524,6 +4526,7 @@ static void end_reshape(struct r10conf *conf)
|
||||
md_finish_reshape(conf->mddev);
|
||||
smp_wmb();
|
||||
conf->reshape_progress = MaxSector;
|
||||
conf->reshape_safe = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
/* read-ahead size must cover two whole stripes, which is
|
||||
|
@ -2162,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
||||
if (!sc)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Need to ensure auto-resizing doesn't interfere */
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
|
||||
for (i = conf->max_nr_stripes; i; i--) {
|
||||
nsh = alloc_stripe(sc, GFP_KERNEL);
|
||||
if (!nsh)
|
||||
@ -2178,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
||||
kmem_cache_free(sc, nsh);
|
||||
}
|
||||
kmem_cache_destroy(sc);
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
return -ENOMEM;
|
||||
}
|
||||
/* Step 2 - Must use GFP_NOIO now.
|
||||
@ -2224,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
||||
} else
|
||||
err = -ENOMEM;
|
||||
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
/* Step 4, return new stripes to service */
|
||||
while(!list_empty(&newstripes)) {
|
||||
nsh = list_entry(newstripes.next, struct stripe_head, lru);
|
||||
@ -4061,8 +4066,10 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
&first_bad, &bad_sectors))
|
||||
set_bit(R5_ReadRepl, &dev->flags);
|
||||
else {
|
||||
if (rdev)
|
||||
if (rdev && !test_bit(Faulty, &rdev->flags))
|
||||
set_bit(R5_NeedReplace, &dev->flags);
|
||||
else
|
||||
clear_bit(R5_NeedReplace, &dev->flags);
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
clear_bit(R5_ReadRepl, &dev->flags);
|
||||
}
|
||||
@ -5857,12 +5864,14 @@ static void raid5d(struct md_thread *thread)
|
||||
pr_debug("%d stripes handled\n", handled);
|
||||
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
|
||||
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
|
||||
mutex_trylock(&conf->cache_size_mutex)) {
|
||||
grow_one_stripe(conf, __GFP_NOWARN);
|
||||
/* Set flag even if allocation failed. This helps
|
||||
* slow down allocation requests when mem is short
|
||||
*/
|
||||
set_bit(R5_DID_ALLOC, &conf->cache_state);
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
}
|
||||
|
||||
async_tx_issue_pending_all();
|
||||
@ -5894,18 +5903,22 @@ raid5_set_cache_size(struct mddev *mddev, int size)
|
||||
return -EINVAL;
|
||||
|
||||
conf->min_nr_stripes = size;
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
while (size < conf->max_nr_stripes &&
|
||||
drop_one_stripe(conf))
|
||||
;
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
|
||||
|
||||
err = md_allow_write(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mutex_lock(&conf->cache_size_mutex);
|
||||
while (size > conf->max_nr_stripes)
|
||||
if (!grow_one_stripe(conf, GFP_KERNEL))
|
||||
break;
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -6371,11 +6384,18 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
|
||||
int ret = 0;
|
||||
while (ret < sc->nr_to_scan) {
|
||||
if (drop_one_stripe(conf) == 0)
|
||||
return SHRINK_STOP;
|
||||
ret++;
|
||||
unsigned long ret = SHRINK_STOP;
|
||||
|
||||
if (mutex_trylock(&conf->cache_size_mutex)) {
|
||||
ret= 0;
|
||||
while (ret < sc->nr_to_scan) {
|
||||
if (drop_one_stripe(conf) == 0) {
|
||||
ret = SHRINK_STOP;
|
||||
break;
|
||||
}
|
||||
ret++;
|
||||
}
|
||||
mutex_unlock(&conf->cache_size_mutex);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -6444,6 +6464,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
goto abort;
|
||||
spin_lock_init(&conf->device_lock);
|
||||
seqcount_init(&conf->gen_lock);
|
||||
mutex_init(&conf->cache_size_mutex);
|
||||
init_waitqueue_head(&conf->wait_for_quiescent);
|
||||
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
|
||||
init_waitqueue_head(&conf->wait_for_stripe[i]);
|
||||
|
@ -482,7 +482,8 @@ struct r5conf {
|
||||
*/
|
||||
int active_name;
|
||||
char cache_name[2][32];
|
||||
struct kmem_cache *slab_cache; /* for allocating stripes */
|
||||
struct kmem_cache *slab_cache; /* for allocating stripes */
|
||||
struct mutex cache_size_mutex; /* Protect changes to cache size */
|
||||
|
||||
int seq_flush, seq_write;
|
||||
int quiesce;
|
||||
|
Loading…
Reference in New Issue
Block a user