Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md: (24 commits)
  md: clean up do_md_stop
  md: fix another deadlock with removing sysfs attributes.
  md: move revalidate_disk() back outside open_mutex
  md/raid10: fix deadlock with unaligned read during resync
  md/bitmap:  separate out loading a bitmap from initialising the structures.
  md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
  md/bitmap: optimise scanning of empty bitmaps.
  md/bitmap: clean up plugging calls.
  md/bitmap: reduce dependence on sysfs.
  md/bitmap: white space clean up and similar.
  md/raid5: export raid5 unplugging interface.
  md/plug: optionally use plugger to unplug an array during resync/recovery.
  md/raid5: add simple plugging infrastructure.
  md/raid5: export is_congested test
  raid5: Don't set read-ahead when there is no queue
  md: add support for raising dm events.
  md: export various start/stop interfaces
  md: split out md_rdev_init
  md: be more careful setting MD_CHANGE_CLEAN
  md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
  ...
This commit is contained in:
Linus Torvalds
2010-08-10 15:38:19 -07:00
25 changed files with 748 additions and 509 deletions
+108 -72
View File
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
if (test_bit(STRIPE_HANDLE, &sh->state)) {
if (test_bit(STRIPE_DELAYED, &sh->state)) {
list_add_tail(&sh->lru, &conf->delayed_list);
blk_plug_device(conf->mddev->queue);
plugger_set_plug(&conf->plug);
} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0) {
list_add_tail(&sh->lru, &conf->bitmap_list);
blk_plug_device(conf->mddev->queue);
plugger_set_plug(&conf->plug);
} else {
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf)
}
static void unplug_slaves(mddev_t *mddev);
static void raid5_unplug_device(struct request_queue *q);
static struct stripe_head *
get_active_stripe(raid5_conf_t *conf, sector_t sector,
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
< (conf->max_nr_stripes *3/4)
|| !conf->inactive_blocked),
conf->device_lock,
raid5_unplug_device(conf->mddev->queue)
md_raid5_unplug_device(conf)
);
conf->inactive_blocked = 0;
} else
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num)
struct kmem_cache *sc;
int devs = max(conf->raid_disks, conf->previous_raid_disks);
sprintf(conf->cache_name[0],
"raid%d-%s", conf->level, mdname(conf->mddev));
sprintf(conf->cache_name[1],
"raid%d-%s-alt", conf->level, mdname(conf->mddev));
if (conf->mddev->gendisk)
sprintf(conf->cache_name[0],
"raid%d-%s", conf->level, mdname(conf->mddev));
else
sprintf(conf->cache_name[0],
"raid%d-%p", conf->level, conf->mddev);
sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
list_add_tail(&sh->lru, &conf->hold_list);
}
} else
blk_plug_device(conf->mddev->queue);
plugger_set_plug(&conf->plug);
}
static void activate_bit_delay(raid5_conf_t *conf)
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev)
rcu_read_unlock();
}
static void raid5_unplug_device(struct request_queue *q)
void md_raid5_unplug_device(raid5_conf_t *conf)
{
mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev->private;
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
if (blk_remove_plug(q)) {
if (plugger_remove_plug(&conf->plug)) {
conf->seq_flush++;
raid5_activate_delayed(conf);
}
md_wakeup_thread(mddev->thread);
md_wakeup_thread(conf->mddev->thread);
spin_unlock_irqrestore(&conf->device_lock, flags);
unplug_slaves(mddev);
unplug_slaves(conf->mddev);
}
EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
static void raid5_unplug(struct plug_handle *plug)
{
raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
md_raid5_unplug_device(conf);
}
static int raid5_congested(void *data, int bits)
static void raid5_unplug_queue(struct request_queue *q)
{
mddev_t *mddev = q->queuedata;
md_raid5_unplug_device(mddev->private);
}
int md_raid5_congested(mddev_t *mddev, int bits)
{
mddev_t *mddev = data;
raid5_conf_t *conf = mddev->private;
/* No difference between reads and writes. Just check
* how busy the stripe_cache is
*/
if (mddev_congested(mddev, bits))
return 1;
if (conf->inactive_blocked)
return 1;
if (conf->quiesce)
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits)
return 0;
}
EXPORT_SYMBOL_GPL(md_raid5_congested);
static int raid5_congested(void *data, int bits)
{
mddev_t *mddev = data;
return mddev_congested(mddev, bits) ||
md_raid5_congested(mddev, bits);
}
/* We want read requests to align with chunks where possible,
* but write requests don't need to.
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
* add failed due to overlap. Flush everything
* and wait a while
*/
raid5_unplug_device(mddev->queue);
md_raid5_unplug_device(conf);
release_stripe(sh);
schedule();
goto retry;
@@ -4566,6 +4586,32 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
return 0;
}
int
raid5_set_cache_size(mddev_t *mddev, int size)
{
raid5_conf_t *conf = mddev->private;
int err;
if (size <= 16 || size > 32768)
return -EINVAL;
while (size < conf->max_nr_stripes) {
if (drop_one_stripe(conf))
conf->max_nr_stripes--;
else
break;
}
err = md_allow_write(mddev);
if (err)
return err;
while (size > conf->max_nr_stripes) {
if (grow_one_stripe(conf))
conf->max_nr_stripes++;
else break;
}
return 0;
}
EXPORT_SYMBOL(raid5_set_cache_size);
static ssize_t
raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
{
@@ -4580,22 +4626,9 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
if (strict_strtoul(page, 10, &new))
return -EINVAL;
if (new <= 16 || new > 32768)
return -EINVAL;
while (new < conf->max_nr_stripes) {
if (drop_one_stripe(conf))
conf->max_nr_stripes--;
else
break;
}
err = md_allow_write(mddev);
err = raid5_set_cache_size(mddev, new);
if (err)
return err;
while (new > conf->max_nr_stripes) {
if (grow_one_stripe(conf))
conf->max_nr_stripes++;
else break;
}
return len;
}
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
static int run(mddev_t *mddev)
{
raid5_conf_t *conf;
int working_disks = 0, chunk_size;
int working_disks = 0;
int dirty_parity_disks = 0;
mdk_rdev_t *rdev;
sector_t reshape_offset = 0;
@@ -5144,43 +5177,48 @@ static int run(mddev_t *mddev)
"reshape");
}
/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
{
/* Ok, everything is just fine now */
if (mddev->to_remove == &raid5_attrs_group)
mddev->to_remove = NULL;
else if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
printk(KERN_WARNING
"raid5: failed to create sysfs attributes for %s\n",
mdname(mddev));
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
plugger_init(&conf->plug, raid5_unplug);
mddev->plug = &conf->plug;
if (mddev->queue) {
int chunk_size;
/* read-ahead size must cover two whole stripes, which
* is 2 * (datadisks) * chunksize where 'n' is the
* number of raid devices
*/
int data_disks = conf->previous_raid_disks - conf->max_degraded;
int stripe = data_disks *
((mddev->chunk_sectors << 9) / PAGE_SIZE);
if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
mddev->queue->queue_lock = &conf->device_lock;
mddev->queue->unplug_fn = raid5_unplug_queue;
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded));
list_for_each_entry(rdev, &mddev->disks, same_set)
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
}
/* Ok, everything is just fine now */
if (mddev->to_remove == &raid5_attrs_group)
mddev->to_remove = NULL;
else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
printk(KERN_WARNING
"md/raid:%s: failed to create sysfs attributes.\n",
mdname(mddev));
mddev->queue->queue_lock = &conf->device_lock;
mddev->queue->unplug_fn = raid5_unplug_device;
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded));
list_for_each_entry(rdev, &mddev->disks, same_set)
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
return 0;
abort:
md_unregister_thread(mddev->thread);
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev)
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
if (mddev->queue)
mddev->queue->backing_dev_info.congested_fn = NULL;
plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
free_conf(conf);
mddev->private = NULL;
mddev->to_remove = &raid5_attrs_group;
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev)
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm))
printk(KERN_WARNING
"md/raid:%s: failed to create "
" link %s\n",
mdname(mddev), nm);
/* Failure here is OK */;
} else
break;
}
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf)
/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
{
if (conf->mddev->queue) {
int data_disks = conf->raid_disks - conf->max_degraded;
int stripe = data_disks * ((conf->chunk_sectors << 9)
/ PAGE_SIZE);