Merge branch 'for-3.2/core' of git://git.kernel.dk/linux-block

* 'for-3.2/core' of git://git.kernel.dk/linux-block: (29 commits)
  block: don't call blk_drain_queue() if elevator is not up
  blk-throttle: use queue_is_locked() instead of lockdep_is_held()
  blk-throttle: Take blkcg->lock while traversing blkcg->policy_list
  blk-throttle: Free up policy node associated with deleted rule
  block: warn if tag is greater than real_max_depth.
  block: make gendisk hold a reference to its queue
  blk-flush: move the queue kick into
  blk-flush: fix invalid BUG_ON in blk_insert_flush
  block: Remove the control of complete cpu from bio.
  block: fix a typo in the blk-cgroup.h file
  block: initialize the bounce pool if high memory may be added later
  block: fix request_queue lifetime handling by making blk_queue_cleanup() properly shutdown
  block: drop @tsk from attempt_plug_merge() and explain sync rules
  block: make get_request[_wait]() fail if queue is dead
  block: reorganize throtl_get_tg() and blk_throtl_bio()
  block: reorganize queue draining
  block: drop unnecessary blk_get/put_queue() in scsi_cmd_ioctl() and blk_get_tg()
  block: pass around REQ_* flags instead of broken down booleans during request alloc/free
  block: move blk_throtl prototypes to block/blk.h
  block: fix genhd refcounting in blkio_policy_parse_and_set()
  ...

Fix up trivial conflicts due to "mddev_t" -> "struct mddev" conversion
and making the request functions be of type "void" instead of "int" in
 - drivers/md/{faulty.c,linear.c,md.c,md.h,multipath.c,raid0.c,raid1.c,raid10.c,raid5.c}
 - drivers/staging/zram/zram_drv.c
This commit is contained in:
Linus Torvalds
2011-11-04 17:06:58 -07:00
42 changed files with 596 additions and 630 deletions
+64 -47
View File
@@ -768,25 +768,14 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
return disk_total;
}
static int blkio_check_dev_num(dev_t dev)
{
int part = 0;
struct gendisk *disk;
disk = get_gendisk(dev, &part);
if (!disk || part)
return -ENODEV;
return 0;
}
static int blkio_policy_parse_and_set(char *buf,
struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
{
struct gendisk *disk = NULL;
char *s[4], *p, *major_s = NULL, *minor_s = NULL;
int ret;
unsigned long major, minor;
int i = 0;
int i = 0, ret = -EINVAL;
int part;
dev_t dev;
u64 temp;
@@ -804,37 +793,36 @@ static int blkio_policy_parse_and_set(char *buf,
}
if (i != 2)
return -EINVAL;
goto out;
p = strsep(&s[0], ":");
if (p != NULL)
major_s = p;
else
return -EINVAL;
goto out;
minor_s = s[0];
if (!minor_s)
return -EINVAL;
goto out;
ret = strict_strtoul(major_s, 10, &major);
if (ret)
return -EINVAL;
if (strict_strtoul(major_s, 10, &major))
goto out;
ret = strict_strtoul(minor_s, 10, &minor);
if (ret)
return -EINVAL;
if (strict_strtoul(minor_s, 10, &minor))
goto out;
dev = MKDEV(major, minor);
ret = strict_strtoull(s[1], 10, &temp);
if (ret)
return -EINVAL;
if (strict_strtoull(s[1], 10, &temp))
goto out;
/* For rule removal, do not check for device presence. */
if (temp) {
ret = blkio_check_dev_num(dev);
if (ret)
return ret;
disk = get_gendisk(dev, &part);
if (!disk || part) {
ret = -ENODEV;
goto out;
}
}
newpn->dev = dev;
@@ -843,7 +831,7 @@ static int blkio_policy_parse_and_set(char *buf,
case BLKIO_POLICY_PROP:
if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
temp > BLKIO_WEIGHT_MAX)
return -EINVAL;
goto out;
newpn->plid = plid;
newpn->fileid = fileid;
@@ -860,7 +848,7 @@ static int blkio_policy_parse_and_set(char *buf,
case BLKIO_THROTL_read_iops_device:
case BLKIO_THROTL_write_iops_device:
if (temp > THROTL_IOPS_MAX)
return -EINVAL;
goto out;
newpn->plid = plid;
newpn->fileid = fileid;
@@ -871,68 +859,96 @@ static int blkio_policy_parse_and_set(char *buf,
default:
BUG();
}
return 0;
ret = 0;
out:
put_disk(disk);
return ret;
}
unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
dev_t dev)
{
struct blkio_policy_node *pn;
unsigned long flags;
unsigned int weight;
spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
BLKIO_PROP_weight_device);
if (pn)
return pn->val.weight;
weight = pn->val.weight;
else
return blkcg->weight;
weight = blkcg->weight;
spin_unlock_irqrestore(&blkcg->lock, flags);
return weight;
}
EXPORT_SYMBOL_GPL(blkcg_get_weight);
uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
unsigned long flags;
uint64_t bps = -1;
spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_read_bps_device);
if (pn)
return pn->val.bps;
else
return -1;
bps = pn->val.bps;
spin_unlock_irqrestore(&blkcg->lock, flags);
return bps;
}
uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
unsigned long flags;
uint64_t bps = -1;
spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_write_bps_device);
if (pn)
return pn->val.bps;
else
return -1;
bps = pn->val.bps;
spin_unlock_irqrestore(&blkcg->lock, flags);
return bps;
}
unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
unsigned long flags;
unsigned int iops = -1;
spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_read_iops_device);
if (pn)
return pn->val.iops;
else
return -1;
iops = pn->val.iops;
spin_unlock_irqrestore(&blkcg->lock, flags);
return iops;
}
unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
{
struct blkio_policy_node *pn;
unsigned long flags;
unsigned int iops = -1;
spin_lock_irqsave(&blkcg->lock, flags);
pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
BLKIO_THROTL_write_iops_device);
if (pn)
return pn->val.iops;
else
return -1;
iops = pn->val.iops;
spin_unlock_irqrestore(&blkcg->lock, flags);
return iops;
}
/* Checks whether user asked for deleting a policy rule */
@@ -1085,6 +1101,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
if (blkio_delete_rule_command(newpn)) {
blkio_policy_delete_node(pn);
kfree(pn);
spin_unlock_irq(&blkcg->lock);
goto update_io_group;
}
+1 -1
View File
@@ -188,7 +188,7 @@ struct blkio_policy_node {
union {
unsigned int weight;
/*
* Rate read/write in terms of byptes per second
* Rate read/write in terms of bytes per second
* Whether this rate represents read or write is determined
* by file type "fileid".
*/
+268 -203
View File
File diff suppressed because it is too large Load Diff
+1 -2
View File
@@ -320,7 +320,7 @@ void blk_insert_flush(struct request *rq)
return;
}
BUG_ON(!rq->bio || rq->bio != rq->biotail);
BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
/*
* If there's data but flush is not necessary, the request can be
@@ -330,7 +330,6 @@ void blk_insert_flush(struct request *rq)
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
list_add_tail(&rq->queuelist, &q->queue_head);
blk_run_queue_async(q);
return;
}
+4 -3
View File
@@ -457,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
}
/**
* blk_cleanup_queue: - release a &struct request_queue when it is no longer needed
* @kobj: the kobj belonging of the request queue to be released
* blk_release_queue: - release a &struct request_queue when it is no longer needed
* @kobj: the kobj belonging to the request queue to be released
*
* Description:
* blk_cleanup_queue is the pair to blk_init_queue() or
* blk_release_queue is the pair to blk_init_queue() or
* blk_queue_make_request(). It should be called when a request queue is
* being released; typically when a block device is being de-registered.
* Currently, its primary task it to free all the &struct request
@@ -490,6 +490,7 @@ static void blk_release_queue(struct kobject *kobj)
if (q->queue_tags)
__blk_queue_free_tags(q);
blk_throtl_release(q);
blk_trace_shutdown(q);
bdi_destroy(&q->backing_dev_info);
+4 -2
View File
@@ -286,12 +286,14 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
BUG_ON(tag == -1);
if (unlikely(tag >= bqt->real_max_depth))
if (unlikely(tag >= bqt->max_depth)) {
/*
* This can happen after tag depth has been reduced.
* FIXME: how about a warning or info message here?
* But tag shouldn't be larger than real_max_depth.
*/
WARN_ON(tag >= bqt->real_max_depth);
return;
}
list_del_init(&rq->queuelist);
rq->cmd_flags &= ~REQ_QUEUED;
+63 -45
View File
@@ -10,6 +10,7 @@
#include <linux/bio.h>
#include <linux/blktrace_api.h>
#include "blk-cgroup.h"
#include "blk.h"
/* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8;
@@ -302,16 +303,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
return tg;
}
/*
* This function returns with queue lock unlocked in case of error, like
* request queue is no more
*/
static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
{
struct throtl_grp *tg = NULL, *__tg = NULL;
struct blkio_cgroup *blkcg;
struct request_queue *q = td->queue;
/* no throttling for dead queue */
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
return NULL;
rcu_read_lock();
blkcg = task_blkio_cgroup(current);
tg = throtl_find_tg(td, blkcg);
@@ -323,32 +324,22 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
/*
* Need to allocate a group. Allocation of group also needs allocation
* of per cpu stats which in-turn takes a mutex() and can block. Hence
* we need to drop rcu lock and queue_lock before we call alloc
*
* Take the request queue reference to make sure queue does not
* go away once we return from allocation.
* we need to drop rcu lock and queue_lock before we call alloc.
*/
blk_get_queue(q);
rcu_read_unlock();
spin_unlock_irq(q->queue_lock);
tg = throtl_alloc_tg(td);
/*
* We might have slept in group allocation. Make sure queue is not
* dead
*/
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
blk_put_queue(q);
if (tg)
kfree(tg);
return ERR_PTR(-ENODEV);
}
blk_put_queue(q);
/* Group allocated and queue is still alive. take the lock */
spin_lock_irq(q->queue_lock);
/* Make sure @q is still alive */
if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
kfree(tg);
return NULL;
}
/*
* Initialize the new group. After sleeping, read the blkcg again.
*/
@@ -1014,11 +1005,6 @@ static void throtl_release_tgs(struct throtl_data *td)
}
}
static void throtl_td_free(struct throtl_data *td)
{
kfree(td);
}
/*
* Blk cgroup controller notification saying that blkio_group object is being
* delinked as associated cgroup object is going away. That also means that
@@ -1123,17 +1109,17 @@ static struct blkio_policy_type blkio_policy_throtl = {
.plid = BLKIO_POLICY_THROTL,
};
int blk_throtl_bio(struct request_queue *q, struct bio **biop)
bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
{
struct throtl_data *td = q->td;
struct throtl_grp *tg;
struct bio *bio = *biop;
bool rw = bio_data_dir(bio), update_disptime = true;
struct blkio_cgroup *blkcg;
bool throttled = false;
if (bio->bi_rw & REQ_THROTTLED) {
bio->bi_rw &= ~REQ_THROTTLED;
return 0;
goto out;
}
/*
@@ -1152,7 +1138,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
rw, rw_is_sync(bio->bi_rw));
rcu_read_unlock();
return 0;
goto out;
}
}
rcu_read_unlock();
@@ -1161,18 +1147,10 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
* Either group has not been allocated yet or it is not an unlimited
* IO group
*/
spin_lock_irq(q->queue_lock);
tg = throtl_get_tg(td);
if (IS_ERR(tg)) {
if (PTR_ERR(tg) == -ENODEV) {
/*
* Queue is gone. No queue lock held here.
*/
return -ENODEV;
}
}
if (unlikely(!tg))
goto out_unlock;
if (tg->nr_queued[rw]) {
/*
@@ -1200,7 +1178,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
* So keep on trimming slice even if bio is not queued.
*/
throtl_trim_slice(td, tg, rw);
goto out;
goto out_unlock;
}
queue_bio:
@@ -1212,16 +1190,52 @@ queue_bio:
tg->nr_queued[READ], tg->nr_queued[WRITE]);
throtl_add_bio_tg(q->td, tg, bio);
*biop = NULL;
throttled = true;
if (update_disptime) {
tg_update_disptime(td, tg);
throtl_schedule_next_dispatch(td);
}
out:
out_unlock:
spin_unlock_irq(q->queue_lock);
return 0;
out:
return throttled;
}
/**
* blk_throtl_drain - drain throttled bios
* @q: request_queue to drain throttled bios for
*
* Dispatch all currently throttled bios on @q through ->make_request_fn().
*/
void blk_throtl_drain(struct request_queue *q)
__releases(q->queue_lock) __acquires(q->queue_lock)
{
struct throtl_data *td = q->td;
struct throtl_rb_root *st = &td->tg_service_tree;
struct throtl_grp *tg;
struct bio_list bl;
struct bio *bio;
WARN_ON_ONCE(!queue_is_locked(q));
bio_list_init(&bl);
while ((tg = throtl_rb_first(st))) {
throtl_dequeue_tg(td, tg);
while ((bio = bio_list_peek(&tg->bio_lists[READ])))
tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
}
spin_unlock_irq(q->queue_lock);
while ((bio = bio_list_pop(&bl)))
generic_make_request(bio);
spin_lock_irq(q->queue_lock);
}
int blk_throtl_init(struct request_queue *q)
@@ -1296,7 +1310,11 @@ void blk_throtl_exit(struct request_queue *q)
* it.
*/
throtl_shutdown_wq(q);
throtl_td_free(td);
}
void blk_throtl_release(struct request_queue *q)
{
kfree(q->td);
}
static int __init throtl_init(void)
+19 -1
View File
@@ -15,6 +15,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio);
void blk_drain_queue(struct request_queue *q, bool drain_all);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
bool __blk_end_bidi_request(struct request *rq, int error,
@@ -188,4 +189,21 @@ static inline int blk_do_io_stat(struct request *rq)
(rq->cmd_flags & REQ_DISCARD));
}
#endif
#ifdef CONFIG_BLK_DEV_THROTTLING
extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
extern void blk_throtl_drain(struct request_queue *q);
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
extern void blk_throtl_release(struct request_queue *q);
#else /* CONFIG_BLK_DEV_THROTTLING */
static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
{
return false;
}
static inline void blk_throtl_drain(struct request_queue *q) { }
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_release(struct request_queue *q) { }
#endif /* CONFIG_BLK_DEV_THROTTLING */
#endif /* BLK_INTERNAL_H */
+12 -27
View File
@@ -31,7 +31,6 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace_api.h>
#include <linux/hash.h>
#include <linux/uaccess.h>
@@ -182,7 +181,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,
eq->elevator_data = data;
}
static char chosen_elevator[16];
static char chosen_elevator[ELV_NAME_MAX];
static int __init elevator_setup(char *str)
{
@@ -606,43 +605,35 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
void elv_drain_elevator(struct request_queue *q)
{
static int printed;
lockdep_assert_held(q->queue_lock);
while (q->elevator->ops->elevator_dispatch_fn(q, 1))
;
if (q->nr_sorted == 0)
return;
if (printed++ < 10) {
if (q->nr_sorted && printed++ < 10) {
printk(KERN_ERR "%s: forced dispatching is broken "
"(nr_sorted=%u), please report this\n",
q->elevator->elevator_type->elevator_name, q->nr_sorted);
}
}
/*
* Call with queue lock held, interrupts disabled
*/
void elv_quiesce_start(struct request_queue *q)
{
if (!q->elevator)
return;
spin_lock_irq(q->queue_lock);
queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
spin_unlock_irq(q->queue_lock);
/*
* make sure we don't have any requests in flight
*/
elv_drain_elevator(q);
while (q->rq.elvpriv) {
__blk_run_queue(q);
spin_unlock_irq(q->queue_lock);
msleep(10);
spin_lock_irq(q->queue_lock);
elv_drain_elevator(q);
}
blk_drain_queue(q, false);
}
void elv_quiesce_end(struct request_queue *q)
{
spin_lock_irq(q->queue_lock);
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
spin_unlock_irq(q->queue_lock);
}
void __elv_add_request(struct request_queue *q, struct request *rq, int where)
@@ -972,7 +963,6 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
/*
* Turn on BYPASS and drain all requests w/ elevator private data
*/
spin_lock_irq(q->queue_lock);
elv_quiesce_start(q);
/*
@@ -983,8 +973,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
/*
* attach and start new elevator
*/
spin_lock_irq(q->queue_lock);
elevator_attach(q, e, data);
spin_unlock_irq(q->queue_lock);
if (old_elevator->registered) {
@@ -999,9 +989,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
* finally exit old elevator and turn off BYPASS.
*/
elevator_exit(old_elevator);
spin_lock_irq(q->queue_lock);
elv_quiesce_end(q);
spin_unlock_irq(q->queue_lock);
blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
@@ -1015,10 +1003,7 @@ fail_register:
elevator_exit(e);
q->elevator = old_elevator;
elv_register_queue(q);
spin_lock_irq(q->queue_lock);
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
spin_unlock_irq(q->queue_lock);
elv_quiesce_end(q);
return err;
}
+8
View File
@@ -612,6 +612,12 @@ void add_disk(struct gendisk *disk)
register_disk(disk);
blk_register_queue(disk);
/*
* Take an extra ref on queue which will be put on disk_release()
* so that it sticks around as long as @disk is there.
*/
WARN_ON_ONCE(blk_get_queue(disk->queue));
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
@@ -1166,6 +1172,8 @@ static void disk_release(struct device *dev)
disk_replace_part_tbl(disk, NULL);
free_part_stats(&disk->part0);
free_part_info(&disk->part0);
if (disk->queue)
blk_put_queue(disk->queue);
kfree(disk);
}
struct class block_class = {
+1 -2
View File
@@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
{
int err;
if (!q || blk_get_queue(q))
if (!q)
return -ENXIO;
switch (cmd) {
@@ -686,7 +686,6 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
err = -ENOTTY;
}
blk_put_queue(q);
return err;
}
EXPORT_SYMBOL(scsi_cmd_ioctl);