You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block: (65 commits)
Documentation/iostats.txt: bit-size reference etc.
cfq-iosched: removing unnecessary think time checking
cfq-iosched: Don't clear queue stats when preempt.
blk-throttle: Reset group slice when limits are changed
blk-cgroup: Only give unaccounted_time under debug
cfq-iosched: Don't set active queue in preempt
block: fix non-atomic access to genhd inflight structures
block: attempt to merge with existing requests on plug flush
block: NULL dereference on error path in __blkdev_get()
cfq-iosched: Don't update group weights when on service tree
fs: assign sb->s_bdi to default_backing_dev_info if the bdi is going away
block: Require subsystems to explicitly allocate bio_set integrity mempool
jbd2: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging
jbd: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging
fs: make fsync_buffers_list() plug
mm: make generic_writepages() use plugging
blk-cgroup: Add unaccounted time to timeslice_used.
block: fixup plugging stubs for !CONFIG_BLOCK
block: remove obsolete comments for blkdev_issue_zeroout.
blktrace: Use rq->cmd_flags directly in blk_add_trace_rq.
...
Fix up conflicts in fs/{aio.c,super.c}
This commit is contained in:
@@ -963,11 +963,6 @@ elevator_dispatch_fn* fills the dispatch queue with ready requests.
|
||||
|
||||
elevator_add_req_fn* called to add a new request into the scheduler
|
||||
|
||||
elevator_queue_empty_fn returns true if the merge queue is empty.
|
||||
Drivers shouldn't use this, but rather check
|
||||
if elv_next_request is NULL (without losing the
|
||||
request if one exists!)
|
||||
|
||||
elevator_former_req_fn
|
||||
elevator_latter_req_fn These return the request before or after the
|
||||
one specified in disk sort order. Used by the
|
||||
|
||||
@@ -140,7 +140,7 @@ Proportional weight policy files
|
||||
- Specifies per cgroup weight. This is default weight of the group
|
||||
on all the devices until and unless overridden by per device rule.
|
||||
(See blkio.weight_device).
|
||||
Currently allowed range of weights is from 100 to 1000.
|
||||
Currently allowed range of weights is from 10 to 1000.
|
||||
|
||||
- blkio.weight_device
|
||||
- One can specify per cgroup per device rules using this interface.
|
||||
@@ -343,34 +343,6 @@ Common files among various policies
|
||||
|
||||
CFQ sysfs tunable
|
||||
=================
|
||||
/sys/block/<disk>/queue/iosched/group_isolation
|
||||
-----------------------------------------------
|
||||
|
||||
If group_isolation=1, it provides stronger isolation between groups at the
|
||||
expense of throughput. By default group_isolation is 0. In general that
|
||||
means that if group_isolation=0, expect fairness for sequential workload
|
||||
only. Set group_isolation=1 to see fairness for random IO workload also.
|
||||
|
||||
Generally CFQ will put random seeky workload in sync-noidle category. CFQ
|
||||
will disable idling on these queues and it does a collective idling on group
|
||||
of such queues. Generally these are slow moving queues and if there is a
|
||||
sync-noidle service tree in each group, that group gets exclusive access to
|
||||
disk for certain period. That means it will bring the throughput down if
|
||||
group does not have enough IO to drive deeper queue depths and utilize disk
|
||||
capacity to the fullest in the slice allocated to it. But the flip side is
|
||||
that even a random reader should get better latencies and overall throughput
|
||||
if there are lots of sequential readers/sync-idle workload running in the
|
||||
system.
|
||||
|
||||
If group_isolation=0, then CFQ automatically moves all the random seeky queues
|
||||
in the root group. That means there will be no service differentiation for
|
||||
that kind of workload. This leads to better throughput as we do collective
|
||||
idling on root sync-noidle tree.
|
||||
|
||||
By default one should run with group_isolation=0. If that is not sufficient
|
||||
and one wants stronger isolation between groups, then set group_isolation=1
|
||||
but this will come at cost of reduced throughput.
|
||||
|
||||
/sys/block/<disk>/queue/iosched/slice_idle
|
||||
------------------------------------------
|
||||
On a faster hardware CFQ can be slow, especially with sequential workload.
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
I/O statistics fields
|
||||
---------------
|
||||
|
||||
Last modified Sep 30, 2003
|
||||
|
||||
Since 2.4.20 (and some versions before, with patches), and 2.5.45,
|
||||
more extensive disk statistics have been introduced to help measure disk
|
||||
activity. Tools such as sar and iostat typically interpret these and do
|
||||
@@ -46,11 +44,12 @@ the above example, the first field of statistics would be 446216.
|
||||
By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll
|
||||
find just the eleven fields, beginning with 446216. If you look at
|
||||
/proc/diskstats, the eleven fields will be preceded by the major and
|
||||
minor device numbers, and device name. Each of these formats provide
|
||||
minor device numbers, and device name. Each of these formats provides
|
||||
eleven fields of statistics, each meaning exactly the same things.
|
||||
All fields except field 9 are cumulative since boot. Field 9 should
|
||||
go to zero as I/Os complete; all others only increase. Yes, these are
|
||||
32 bit unsigned numbers, and on a very busy or long-lived system they
|
||||
go to zero as I/Os complete; all others only increase (unless they
|
||||
overflow and wrap). Yes, these are (32-bit or 64-bit) unsigned long
|
||||
(native word size) numbers, and on a very busy or long-lived system they
|
||||
may wrap. Applications should be prepared to deal with that; unless
|
||||
your observations are measured in large numbers of minutes or hours,
|
||||
they should not wrap twice before you notice them.
|
||||
@@ -96,11 +95,11 @@ introduced when changes collide, so (for instance) adding up all the
|
||||
read I/Os issued per partition should equal those made to the disks ...
|
||||
but due to the lack of locking it may only be very close.
|
||||
|
||||
In 2.6, there are counters for each cpu, which made the lack of locking
|
||||
almost a non-issue. When the statistics are read, the per-cpu counters
|
||||
are summed (possibly overflowing the unsigned 32-bit variable they are
|
||||
In 2.6, there are counters for each CPU, which make the lack of locking
|
||||
almost a non-issue. When the statistics are read, the per-CPU counters
|
||||
are summed (possibly overflowing the unsigned long variable they are
|
||||
summed to) and the result given to the user. There is no convenient
|
||||
user interface for accessing the per-cpu counters themselves.
|
||||
user interface for accessing the per-CPU counters themselves.
|
||||
|
||||
Disks vs Partitions
|
||||
-------------------
|
||||
|
||||
+15
-1
@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
|
||||
|
||||
void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
|
||||
void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
|
||||
unsigned long unaccounted_time)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&blkg->stats_lock, flags);
|
||||
blkg->stats.time += time;
|
||||
blkg->stats.unaccounted_time += unaccounted_time;
|
||||
spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
|
||||
@@ -604,6 +606,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
||||
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
||||
blkg->stats.sectors, cb, dev);
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
if (type == BLKIO_STAT_UNACCOUNTED_TIME)
|
||||
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
||||
blkg->stats.unaccounted_time, cb, dev);
|
||||
if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
|
||||
uint64_t sum = blkg->stats.avg_queue_size_sum;
|
||||
uint64_t samples = blkg->stats.avg_queue_size_samples;
|
||||
@@ -1125,6 +1130,9 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_QUEUED, 1);
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
case BLKIO_PROP_unaccounted_time:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_UNACCOUNTED_TIME, 0);
|
||||
case BLKIO_PROP_dequeue:
|
||||
return blkio_read_blkg_stats(blkcg, cft, cb,
|
||||
BLKIO_STAT_DEQUEUE, 0);
|
||||
@@ -1382,6 +1390,12 @@ struct cftype blkio_files[] = {
|
||||
BLKIO_PROP_dequeue),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
{
|
||||
.name = "unaccounted_time",
|
||||
.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
|
||||
BLKIO_PROP_unaccounted_time),
|
||||
.read_map = blkiocg_file_read_map,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
+11
-3
@@ -49,6 +49,8 @@ enum stat_type {
|
||||
/* All the single valued stats go below this */
|
||||
BLKIO_STAT_TIME,
|
||||
BLKIO_STAT_SECTORS,
|
||||
/* Time not charged to this cgroup */
|
||||
BLKIO_STAT_UNACCOUNTED_TIME,
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
BLKIO_STAT_AVG_QUEUE_SIZE,
|
||||
BLKIO_STAT_IDLE_TIME,
|
||||
@@ -81,6 +83,7 @@ enum blkcg_file_name_prop {
|
||||
BLKIO_PROP_io_serviced,
|
||||
BLKIO_PROP_time,
|
||||
BLKIO_PROP_sectors,
|
||||
BLKIO_PROP_unaccounted_time,
|
||||
BLKIO_PROP_io_service_time,
|
||||
BLKIO_PROP_io_wait_time,
|
||||
BLKIO_PROP_io_merged,
|
||||
@@ -114,6 +117,8 @@ struct blkio_group_stats {
|
||||
/* total disk time and nr sectors dispatched by this group */
|
||||
uint64_t time;
|
||||
uint64_t sectors;
|
||||
/* Time not charged to this cgroup */
|
||||
uint64_t unaccounted_time;
|
||||
uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
|
||||
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
||||
/* Sum of number of IOs queued across all samples */
|
||||
@@ -240,7 +245,7 @@ static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
|
||||
|
||||
#endif
|
||||
|
||||
#define BLKIO_WEIGHT_MIN 100
|
||||
#define BLKIO_WEIGHT_MIN 10
|
||||
#define BLKIO_WEIGHT_MAX 1000
|
||||
#define BLKIO_WEIGHT_DEFAULT 500
|
||||
|
||||
@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
|
||||
extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
|
||||
void *key);
|
||||
void blkiocg_update_timeslice_used(struct blkio_group *blkg,
|
||||
unsigned long time);
|
||||
unsigned long time,
|
||||
unsigned long unaccounted_time);
|
||||
void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
|
||||
bool direction, bool sync);
|
||||
void blkiocg_update_completion_stats(struct blkio_group *blkg,
|
||||
@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
|
||||
static inline struct blkio_group *
|
||||
blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
|
||||
static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
|
||||
unsigned long time) {}
|
||||
unsigned long time,
|
||||
unsigned long unaccounted_time)
|
||||
{}
|
||||
static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
|
||||
uint64_t bytes, bool direction, bool sync) {}
|
||||
static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
|
||||
|
||||
+380
-272
File diff suppressed because it is too large
Load Diff
+2
-2
@@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||
rq->end_io = done;
|
||||
WARN_ON(irqs_disabled());
|
||||
spin_lock_irq(q->queue_lock);
|
||||
__elv_add_request(q, rq, where, 1);
|
||||
__generic_unplug_device(q);
|
||||
__elv_add_request(q, rq, where);
|
||||
__blk_run_queue(q, false);
|
||||
/* the queue is stopped so it won't be plugged+unplugged */
|
||||
if (rq->cmd_type == REQ_TYPE_PM_RESUME)
|
||||
q->request_fn(q);
|
||||
|
||||
+319
-146
File diff suppressed because it is too large
Load Diff
@@ -136,8 +136,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
|
||||
*
|
||||
* Description:
|
||||
* Generate and issue number of bios with zerofiled pages.
|
||||
* Send barrier at the beginning and at the end if requested. This guarantie
|
||||
* correct request ordering. Empty barrier allow us to avoid post queue flush.
|
||||
*/
|
||||
|
||||
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
|
||||
@@ -465,3 +465,9 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
return attempt_merge(q, rq, next);
|
||||
}
|
||||
|
||||
@@ -164,24 +164,9 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
|
||||
blk_queue_congestion_threshold(q);
|
||||
q->nr_batching = BLK_BATCH_REQ;
|
||||
|
||||
q->unplug_thresh = 4; /* hmm */
|
||||
q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */
|
||||
if (q->unplug_delay == 0)
|
||||
q->unplug_delay = 1;
|
||||
|
||||
q->unplug_timer.function = blk_unplug_timeout;
|
||||
q->unplug_timer.data = (unsigned long)q;
|
||||
|
||||
blk_set_default_limits(&q->limits);
|
||||
blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
|
||||
|
||||
/*
|
||||
* If the caller didn't supply a lock, fall back to our embedded
|
||||
* per-queue locks
|
||||
*/
|
||||
if (!q->queue_lock)
|
||||
q->queue_lock = &q->__queue_lock;
|
||||
|
||||
/*
|
||||
* by default assume old behaviour and bounce for any highmem page
|
||||
*/
|
||||
|
||||
@@ -471,8 +471,6 @@ static void blk_release_queue(struct kobject *kobj)
|
||||
|
||||
blk_sync_queue(q);
|
||||
|
||||
blk_throtl_exit(q);
|
||||
|
||||
if (rl->rq_pool)
|
||||
mempool_destroy(rl->rq_pool);
|
||||
|
||||
|
||||
+72
-69
@@ -102,7 +102,7 @@ struct throtl_data
|
||||
/* Work for dispatching throttled bios */
|
||||
struct delayed_work throtl_work;
|
||||
|
||||
atomic_t limits_changed;
|
||||
bool limits_changed;
|
||||
};
|
||||
|
||||
enum tg_state_flags {
|
||||
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td,
|
||||
RB_CLEAR_NODE(&tg->rb_node);
|
||||
bio_list_init(&tg->bio_lists[0]);
|
||||
bio_list_init(&tg->bio_lists[1]);
|
||||
td->limits_changed = false;
|
||||
|
||||
/*
|
||||
* Take the initial reference that will be released on destroy
|
||||
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td)
|
||||
struct throtl_grp *tg;
|
||||
struct hlist_node *pos, *n;
|
||||
|
||||
if (!atomic_read(&td->limits_changed))
|
||||
if (!td->limits_changed)
|
||||
return;
|
||||
|
||||
throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed));
|
||||
xchg(&td->limits_changed, false);
|
||||
|
||||
/*
|
||||
* Make sure updates from throtl_update_blkio_group_read_bps() group
|
||||
* of functions to tg->limits_changed are visible. We do not
|
||||
* want update td->limits_changed to be visible but update to
|
||||
* tg->limits_changed not being visible yet on this cpu. Hence
|
||||
* the read barrier.
|
||||
*/
|
||||
smp_rmb();
|
||||
throtl_log(td, "limits changed");
|
||||
|
||||
hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
|
||||
if (throtl_tg_on_rr(tg) && tg->limits_changed) {
|
||||
throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
|
||||
" riops=%u wiops=%u", tg->bps[READ],
|
||||
tg->bps[WRITE], tg->iops[READ],
|
||||
tg->iops[WRITE]);
|
||||
tg_update_disptime(td, tg);
|
||||
tg->limits_changed = false;
|
||||
}
|
||||
}
|
||||
if (!tg->limits_changed)
|
||||
continue;
|
||||
|
||||
smp_mb__before_atomic_dec();
|
||||
atomic_dec(&td->limits_changed);
|
||||
smp_mb__after_atomic_dec();
|
||||
if (!xchg(&tg->limits_changed, false))
|
||||
continue;
|
||||
|
||||
throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
|
||||
" riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
|
||||
tg->iops[READ], tg->iops[WRITE]);
|
||||
|
||||
/*
|
||||
* Restart the slices for both READ and WRITES. It
|
||||
* might happen that a group's limit are dropped
|
||||
* suddenly and we don't want to account recently
|
||||
* dispatched IO with new low rate
|
||||
*/
|
||||
throtl_start_new_slice(td, tg, 0);
|
||||
throtl_start_new_slice(td, tg, 1);
|
||||
|
||||
if (throtl_tg_on_rr(tg))
|
||||
tg_update_disptime(td, tg);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dispatch throttled bios. Should be called without queue lock held. */
|
||||
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q)
|
||||
unsigned int nr_disp = 0;
|
||||
struct bio_list bio_list_on_stack;
|
||||
struct bio *bio;
|
||||
struct blk_plug plug;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
|
||||
@@ -802,9 +806,10 @@ out:
|
||||
* immediate dispatch
|
||||
*/
|
||||
if (nr_disp) {
|
||||
blk_start_plug(&plug);
|
||||
while((bio = bio_list_pop(&bio_list_on_stack)))
|
||||
generic_make_request(bio);
|
||||
blk_unplug(q);
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
return nr_disp;
|
||||
}
|
||||
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
|
||||
|
||||
struct delayed_work *dwork = &td->throtl_work;
|
||||
|
||||
if (total_nr_queued(td) > 0) {
|
||||
/* schedule work if limits changed even if no bio is queued */
|
||||
if (total_nr_queued(td) > 0 || td->limits_changed) {
|
||||
/*
|
||||
* We might have a work scheduled to be executed in future.
|
||||
* Cancel that and schedule a new one.
|
||||
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
|
||||
spin_unlock_irqrestore(td->queue->queue_lock, flags);
|
||||
}
|
||||
|
||||
static void throtl_update_blkio_group_common(struct throtl_data *td,
|
||||
struct throtl_grp *tg)
|
||||
{
|
||||
xchg(&tg->limits_changed, true);
|
||||
xchg(&td->limits_changed, true);
|
||||
/* Schedule a work now to process the limit change */
|
||||
throtl_schedule_delayed_work(td, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* For all update functions, key should be a valid pointer because these
|
||||
* update functions are called under blkcg_lock, that means, blkg is
|
||||
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key,
|
||||
struct blkio_group *blkg, u64 read_bps)
|
||||
{
|
||||
struct throtl_data *td = key;
|
||||
struct throtl_grp *tg = tg_of_blkg(blkg);
|
||||
|
||||
tg_of_blkg(blkg)->bps[READ] = read_bps;
|
||||
/* Make sure read_bps is updated before setting limits_changed */
|
||||
smp_wmb();
|
||||
tg_of_blkg(blkg)->limits_changed = true;
|
||||
|
||||
/* Make sure tg->limits_changed is updated before td->limits_changed */
|
||||
smp_mb__before_atomic_inc();
|
||||
atomic_inc(&td->limits_changed);
|
||||
smp_mb__after_atomic_inc();
|
||||
|
||||
/* Schedule a work now to process the limit change */
|
||||
throtl_schedule_delayed_work(td, 0);
|
||||
tg->bps[READ] = read_bps;
|
||||
throtl_update_blkio_group_common(td, tg);
|
||||
}
|
||||
|
||||
static void throtl_update_blkio_group_write_bps(void *key,
|
||||
struct blkio_group *blkg, u64 write_bps)
|
||||
{
|
||||
struct throtl_data *td = key;
|
||||
struct throtl_grp *tg = tg_of_blkg(blkg);
|
||||
|
||||
tg_of_blkg(blkg)->bps[WRITE] = write_bps;
|
||||
smp_wmb();
|
||||
tg_of_blkg(blkg)->limits_changed = true;
|
||||
smp_mb__before_atomic_inc();
|
||||
atomic_inc(&td->limits_changed);
|
||||
smp_mb__after_atomic_inc();
|
||||
throtl_schedule_delayed_work(td, 0);
|
||||
tg->bps[WRITE] = write_bps;
|
||||
throtl_update_blkio_group_common(td, tg);
|
||||
}
|
||||
|
||||
static void throtl_update_blkio_group_read_iops(void *key,
|
||||
struct blkio_group *blkg, unsigned int read_iops)
|
||||
{
|
||||
struct throtl_data *td = key;
|
||||
struct throtl_grp *tg = tg_of_blkg(blkg);
|
||||
|
||||
tg_of_blkg(blkg)->iops[READ] = read_iops;
|
||||
smp_wmb();
|
||||
tg_of_blkg(blkg)->limits_changed = true;
|
||||
smp_mb__before_atomic_inc();
|
||||
atomic_inc(&td->limits_changed);
|
||||
smp_mb__after_atomic_inc();
|
||||
throtl_schedule_delayed_work(td, 0);
|
||||
tg->iops[READ] = read_iops;
|
||||
throtl_update_blkio_group_common(td, tg);
|
||||
}
|
||||
|
||||
static void throtl_update_blkio_group_write_iops(void *key,
|
||||
struct blkio_group *blkg, unsigned int write_iops)
|
||||
{
|
||||
struct throtl_data *td = key;
|
||||
struct throtl_grp *tg = tg_of_blkg(blkg);
|
||||
|
||||
tg_of_blkg(blkg)->iops[WRITE] = write_iops;
|
||||
smp_wmb();
|
||||
tg_of_blkg(blkg)->limits_changed = true;
|
||||
smp_mb__before_atomic_inc();
|
||||
atomic_inc(&td->limits_changed);
|
||||
smp_mb__after_atomic_inc();
|
||||
throtl_schedule_delayed_work(td, 0);
|
||||
tg->iops[WRITE] = write_iops;
|
||||
throtl_update_blkio_group_common(td, tg);
|
||||
}
|
||||
|
||||
void throtl_shutdown_timer_wq(struct request_queue *q)
|
||||
static void throtl_shutdown_wq(struct request_queue *q)
|
||||
{
|
||||
struct throtl_data *td = q->td;
|
||||
|
||||
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
|
||||
/*
|
||||
* There is already another bio queued in same dir. No
|
||||
* need to update dispatch time.
|
||||
* Still update the disptime if rate limits on this group
|
||||
* were changed.
|
||||
*/
|
||||
if (!tg->limits_changed)
|
||||
update_disptime = false;
|
||||
else
|
||||
tg->limits_changed = false;
|
||||
|
||||
update_disptime = false;
|
||||
goto queue_bio;
|
||||
|
||||
}
|
||||
|
||||
/* Bio is with-in rate limit of group */
|
||||
if (tg_may_dispatch(td, tg, bio, NULL)) {
|
||||
throtl_charge_bio(tg, bio);
|
||||
|
||||
/*
|
||||
* We need to trim slice even when bios are not being queued
|
||||
* otherwise it might happen that a bio is not queued for
|
||||
* a long time and slice keeps on extending and trim is not
|
||||
* called for a long time. Now if limits are reduced suddenly
|
||||
* we take into account all the IO dispatched so far at new
|
||||
* low rate and * newly queued IO gets a really long dispatch
|
||||
* time.
|
||||
*
|
||||
* So keep on trimming slice even if bio is not queued.
|
||||
*/
|
||||
throtl_trim_slice(td, tg, rw);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q)
|
||||
|
||||
INIT_HLIST_HEAD(&td->tg_list);
|
||||
td->tg_service_tree = THROTL_RB_ROOT;
|
||||
atomic_set(&td->limits_changed, 0);
|
||||
td->limits_changed = false;
|
||||
|
||||
/* Init root group */
|
||||
tg = &td->root_tg;
|
||||
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q)
|
||||
/* Practically unlimited BW */
|
||||
tg->bps[0] = tg->bps[1] = -1;
|
||||
tg->iops[0] = tg->iops[1] = -1;
|
||||
td->limits_changed = false;
|
||||
|
||||
/*
|
||||
* Set root group reference to 2. One reference will be dropped when
|
||||
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q)
|
||||
|
||||
BUG_ON(!td);
|
||||
|
||||
throtl_shutdown_timer_wq(q);
|
||||
throtl_shutdown_wq(q);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
throtl_release_tgs(td);
|
||||
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q)
|
||||
* update limits through cgroup and another work got queued, cancel
|
||||
* it.
|
||||
*/
|
||||
throtl_shutdown_timer_wq(q);
|
||||
throtl_shutdown_wq(q);
|
||||
throtl_td_free(td);
|
||||
}
|
||||
|
||||
|
||||
+6
-10
@@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
|
||||
void blk_dequeue_request(struct request *rq);
|
||||
void __blk_queue_free_tags(struct request_queue *q);
|
||||
|
||||
void blk_unplug_work(struct work_struct *work);
|
||||
void blk_unplug_timeout(unsigned long data);
|
||||
void blk_rq_timed_out_timer(unsigned long data);
|
||||
void blk_delete_timer(struct request *);
|
||||
void blk_add_timer(struct request *);
|
||||
@@ -51,21 +49,17 @@ static inline void blk_clear_rq_complete(struct request *rq)
|
||||
*/
|
||||
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
|
||||
|
||||
struct request *blk_do_flush(struct request_queue *q, struct request *rq);
|
||||
void blk_insert_flush(struct request *rq);
|
||||
void blk_abort_flushes(struct request_queue *q);
|
||||
|
||||
static inline struct request *__elv_next_request(struct request_queue *q)
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
while (1) {
|
||||
while (!list_empty(&q->queue_head)) {
|
||||
if (!list_empty(&q->queue_head)) {
|
||||
rq = list_entry_rq(q->queue_head.next);
|
||||
if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
|
||||
rq == &q->flush_rq)
|
||||
return rq;
|
||||
rq = blk_do_flush(q, rq);
|
||||
if (rq)
|
||||
return rq;
|
||||
return rq;
|
||||
}
|
||||
|
||||
if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
|
||||
@@ -109,6 +103,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
|
||||
struct bio *bio);
|
||||
int attempt_back_merge(struct request_queue *q, struct request *rq);
|
||||
int attempt_front_merge(struct request_queue *q, struct request *rq);
|
||||
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
|
||||
struct request *next);
|
||||
void blk_recalc_rq_segments(struct request *rq);
|
||||
void blk_rq_set_mixed_merge(struct request *rq);
|
||||
|
||||
|
||||
+86
-79
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4;
|
||||
#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
|
||||
|
||||
#define RQ_CIC(rq) \
|
||||
((struct cfq_io_context *) (rq)->elevator_private)
|
||||
#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
|
||||
#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3)
|
||||
((struct cfq_io_context *) (rq)->elevator_private[0])
|
||||
#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1])
|
||||
#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2])
|
||||
|
||||
static struct kmem_cache *cfq_pool;
|
||||
static struct kmem_cache *cfq_ioc_pool;
|
||||
@@ -146,7 +146,6 @@ struct cfq_queue {
|
||||
struct cfq_rb_root *service_tree;
|
||||
struct cfq_queue *new_cfqq;
|
||||
struct cfq_group *cfqg;
|
||||
struct cfq_group *orig_cfqg;
|
||||
/* Number of sectors dispatched from queue in single dispatch round */
|
||||
unsigned long nr_sectors;
|
||||
};
|
||||
@@ -179,6 +178,8 @@ struct cfq_group {
|
||||
/* group service_tree key */
|
||||
u64 vdisktime;
|
||||
unsigned int weight;
|
||||
unsigned int new_weight;
|
||||
bool needs_update;
|
||||
|
||||
/* number of cfqq currently on this group */
|
||||
int nr_cfqq;
|
||||
@@ -238,6 +239,7 @@ struct cfq_data {
|
||||
struct rb_root prio_trees[CFQ_PRIO_LISTS];
|
||||
|
||||
unsigned int busy_queues;
|
||||
unsigned int busy_sync_queues;
|
||||
|
||||
int rq_in_driver;
|
||||
int rq_in_flight[2];
|
||||
@@ -285,7 +287,6 @@ struct cfq_data {
|
||||
unsigned int cfq_slice_idle;
|
||||
unsigned int cfq_group_idle;
|
||||
unsigned int cfq_latency;
|
||||
unsigned int cfq_group_isolation;
|
||||
|
||||
unsigned int cic_index;
|
||||
struct list_head cic_list;
|
||||
@@ -501,13 +502,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
|
||||
}
|
||||
}
|
||||
|
||||
static int cfq_queue_empty(struct request_queue *q)
|
||||
{
|
||||
struct cfq_data *cfqd = q->elevator->elevator_data;
|
||||
|
||||
return !cfqd->rq_queued;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scale schedule slice based on io priority. Use the sync time slice only
|
||||
* if a queue is marked sync and has sync io queued. A sync queue with async
|
||||
@@ -558,15 +552,13 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
|
||||
|
||||
static void update_min_vdisktime(struct cfq_rb_root *st)
|
||||
{
|
||||
u64 vdisktime = st->min_vdisktime;
|
||||
struct cfq_group *cfqg;
|
||||
|
||||
if (st->left) {
|
||||
cfqg = rb_entry_cfqg(st->left);
|
||||
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
|
||||
st->min_vdisktime = max_vdisktime(st->min_vdisktime,
|
||||
cfqg->vdisktime);
|
||||
}
|
||||
|
||||
st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -863,7 +855,27 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
|
||||
}
|
||||
|
||||
static void
|
||||
cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||
cfq_update_group_weight(struct cfq_group *cfqg)
|
||||
{
|
||||
BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
|
||||
if (cfqg->needs_update) {
|
||||
cfqg->weight = cfqg->new_weight;
|
||||
cfqg->needs_update = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
|
||||
{
|
||||
BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
|
||||
|
||||
cfq_update_group_weight(cfqg);
|
||||
__cfq_group_service_tree_add(st, cfqg);
|
||||
st->total_weight += cfqg->weight;
|
||||
}
|
||||
|
||||
static void
|
||||
cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||
{
|
||||
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
||||
struct cfq_group *__cfqg;
|
||||
@@ -884,13 +896,19 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||
cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
|
||||
} else
|
||||
cfqg->vdisktime = st->min_vdisktime;
|
||||
|
||||
__cfq_group_service_tree_add(st, cfqg);
|
||||
st->total_weight += cfqg->weight;
|
||||
cfq_group_service_tree_add(st, cfqg);
|
||||
}
|
||||
|
||||
static void
|
||||
cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||
cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
|
||||
{
|
||||
st->total_weight -= cfqg->weight;
|
||||
if (!RB_EMPTY_NODE(&cfqg->rb_node))
|
||||
cfq_rb_erase(&cfqg->rb_node, st);
|
||||
}
|
||||
|
||||
static void
|
||||
cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||
{
|
||||
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
||||
|
||||
@@ -902,14 +920,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||
return;
|
||||
|
||||
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
|
||||
st->total_weight -= cfqg->weight;
|
||||
if (!RB_EMPTY_NODE(&cfqg->rb_node))
|
||||
cfq_rb_erase(&cfqg->rb_node, st);
|
||||
cfq_group_service_tree_del(st, cfqg);
|
||||
cfqg->saved_workload_slice = 0;
|
||||
cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
|
||||
}
|
||||
|
||||
static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
|
||||
static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
|
||||
unsigned int *unaccounted_time)
|
||||
{
|
||||
unsigned int slice_used;
|
||||
|
||||
@@ -928,8 +945,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
|
||||
1);
|
||||
} else {
|
||||
slice_used = jiffies - cfqq->slice_start;
|
||||
if (slice_used > cfqq->allocated_slice)
|
||||
if (slice_used > cfqq->allocated_slice) {
|
||||
*unaccounted_time = slice_used - cfqq->allocated_slice;
|
||||
slice_used = cfqq->allocated_slice;
|
||||
}
|
||||
if (time_after(cfqq->slice_start, cfqq->dispatch_start))
|
||||
*unaccounted_time += cfqq->slice_start -
|
||||
cfqq->dispatch_start;
|
||||
}
|
||||
|
||||
return slice_used;
|
||||
@@ -939,12 +961,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
|
||||
struct cfq_queue *cfqq)
|
||||
{
|
||||
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
||||
unsigned int used_sl, charge;
|
||||
unsigned int used_sl, charge, unaccounted_sl = 0;
|
||||
int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
|
||||
- cfqg->service_tree_idle.count;
|
||||
|
||||
BUG_ON(nr_sync < 0);
|
||||
used_sl = charge = cfq_cfqq_slice_usage(cfqq);
|
||||
used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
|
||||
|
||||
if (iops_mode(cfqd))
|
||||
charge = cfqq->slice_dispatch;
|
||||
@@ -952,9 +974,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
|
||||
charge = cfqq->allocated_slice;
|
||||
|
||||
/* Can't update vdisktime while group is on service tree */
|
||||
cfq_rb_erase(&cfqg->rb_node, st);
|
||||
cfq_group_service_tree_del(st, cfqg);
|
||||
cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
|
||||
__cfq_group_service_tree_add(st, cfqg);
|
||||
/* If a new weight was requested, update now, off tree */
|
||||
cfq_group_service_tree_add(st, cfqg);
|
||||
|
||||
/* This group is being expired. Save the context */
|
||||
if (time_after(cfqd->workload_expires, jiffies)) {
|
||||
@@ -970,7 +993,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
|
||||
cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
|
||||
" sect=%u", used_sl, cfqq->slice_dispatch, charge,
|
||||
iops_mode(cfqd), cfqq->nr_sectors);
|
||||
cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
|
||||
cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
|
||||
unaccounted_sl);
|
||||
cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
|
||||
}
|
||||
|
||||
@@ -985,7 +1009,9 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
|
||||
void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
|
||||
unsigned int weight)
|
||||
{
|
||||
cfqg_of_blkg(blkg)->weight = weight;
|
||||
struct cfq_group *cfqg = cfqg_of_blkg(blkg);
|
||||
cfqg->new_weight = weight;
|
||||
cfqg->needs_update = true;
|
||||
}
|
||||
|
||||
static struct cfq_group *
|
||||
@@ -1187,32 +1213,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
||||
int new_cfqq = 1;
|
||||
int group_changed = 0;
|
||||
|
||||
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
||||
if (!cfqd->cfq_group_isolation
|
||||
&& cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD
|
||||
&& cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) {
|
||||
/* Move this cfq to root group */
|
||||
cfq_log_cfqq(cfqd, cfqq, "moving to root group");
|
||||
if (!RB_EMPTY_NODE(&cfqq->rb_node))
|
||||
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
|
||||
cfqq->orig_cfqg = cfqq->cfqg;
|
||||
cfqq->cfqg = &cfqd->root_group;
|
||||
cfqd->root_group.ref++;
|
||||
group_changed = 1;
|
||||
} else if (!cfqd->cfq_group_isolation
|
||||
&& cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
|
||||
/* cfqq is sequential now needs to go to its original group */
|
||||
BUG_ON(cfqq->cfqg != &cfqd->root_group);
|
||||
if (!RB_EMPTY_NODE(&cfqq->rb_node))
|
||||
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
|
||||
cfq_put_cfqg(cfqq->cfqg);
|
||||
cfqq->cfqg = cfqq->orig_cfqg;
|
||||
cfqq->orig_cfqg = NULL;
|
||||
group_changed = 1;
|
||||
cfq_log_cfqq(cfqd, cfqq, "moved to origin group");
|
||||
}
|
||||
#endif
|
||||
|
||||
service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
|
||||
cfqq_type(cfqq));
|
||||
if (cfq_class_idle(cfqq)) {
|
||||
@@ -1284,7 +1284,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
|
||||
service_tree->count++;
|
||||
if ((add_front || !new_cfqq) && !group_changed)
|
||||
return;
|
||||
cfq_group_service_tree_add(cfqd, cfqq->cfqg);
|
||||
cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
|
||||
}
|
||||
|
||||
static struct cfq_queue *
|
||||
@@ -1372,6 +1372,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
||||
BUG_ON(cfq_cfqq_on_rr(cfqq));
|
||||
cfq_mark_cfqq_on_rr(cfqq);
|
||||
cfqd->busy_queues++;
|
||||
if (cfq_cfqq_sync(cfqq))
|
||||
cfqd->busy_sync_queues++;
|
||||
|
||||
cfq_resort_rr_list(cfqd, cfqq);
|
||||
}
|
||||
@@ -1395,9 +1397,11 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
||||
cfqq->p_root = NULL;
|
||||
}
|
||||
|
||||
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
|
||||
cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
|
||||
BUG_ON(!cfqd->busy_queues);
|
||||
cfqd->busy_queues--;
|
||||
if (cfq_cfqq_sync(cfqq))
|
||||
cfqd->busy_sync_queues--;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2405,22 +2409,34 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
||||
* Does this cfqq already have too much IO in flight?
|
||||
*/
|
||||
if (cfqq->dispatched >= max_dispatch) {
|
||||
bool promote_sync = false;
|
||||
/*
|
||||
* idle queue must always only have a single IO in flight
|
||||
*/
|
||||
if (cfq_class_idle(cfqq))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If there is only one sync queue
|
||||
* we can ignore async queue here and give the sync
|
||||
* queue no dispatch limit. The reason is a sync queue can
|
||||
* preempt async queue, limiting the sync queue doesn't make
|
||||
* sense. This is useful for aiostress test.
|
||||
*/
|
||||
if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
|
||||
promote_sync = true;
|
||||
|
||||
/*
|
||||
* We have other queues, don't allow more IO from this one
|
||||
*/
|
||||
if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq))
|
||||
if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
|
||||
!promote_sync)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Sole queue user, no limit
|
||||
*/
|
||||
if (cfqd->busy_queues == 1)
|
||||
if (cfqd->busy_queues == 1 || promote_sync)
|
||||
max_dispatch = -1;
|
||||
else
|
||||
/*
|
||||
@@ -2542,7 +2558,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
|
||||
static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||
{
|
||||
struct cfq_data *cfqd = cfqq->cfqd;
|
||||
struct cfq_group *cfqg, *orig_cfqg;
|
||||
struct cfq_group *cfqg;
|
||||
|
||||
BUG_ON(cfqq->ref <= 0);
|
||||
|
||||
@@ -2554,7 +2570,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||
BUG_ON(rb_first(&cfqq->sort_list));
|
||||
BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
|
||||
cfqg = cfqq->cfqg;
|
||||
orig_cfqg = cfqq->orig_cfqg;
|
||||
|
||||
if (unlikely(cfqd->active_queue == cfqq)) {
|
||||
__cfq_slice_expired(cfqd, cfqq, 0);
|
||||
@@ -2564,8 +2579,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||
BUG_ON(cfq_cfqq_on_rr(cfqq));
|
||||
kmem_cache_free(cfq_pool, cfqq);
|
||||
cfq_put_cfqg(cfqg);
|
||||
if (orig_cfqg)
|
||||
cfq_put_cfqg(orig_cfqg);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3613,12 +3626,12 @@ static void cfq_put_request(struct request *rq)
|
||||
|
||||
put_io_context(RQ_CIC(rq)->ioc);
|
||||
|
||||
rq->elevator_private = NULL;
|
||||
rq->elevator_private2 = NULL;
|
||||
rq->elevator_private[0] = NULL;
|
||||
rq->elevator_private[1] = NULL;
|
||||
|
||||
/* Put down rq reference on cfqg */
|
||||
cfq_put_cfqg(RQ_CFQG(rq));
|
||||
rq->elevator_private3 = NULL;
|
||||
rq->elevator_private[2] = NULL;
|
||||
|
||||
cfq_put_queue(cfqq);
|
||||
}
|
||||
@@ -3705,13 +3718,12 @@ new_queue:
|
||||
}
|
||||
|
||||
cfqq->allocated[rw]++;
|
||||
|
||||
cfqq->ref++;
|
||||
rq->elevator_private = cic;
|
||||
rq->elevator_private2 = cfqq;
|
||||
rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
|
||||
|
||||
rq->elevator_private[0] = cic;
|
||||
rq->elevator_private[1] = cfqq;
|
||||
rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
|
||||
return 0;
|
||||
|
||||
queue_fail:
|
||||
@@ -3953,7 +3965,6 @@ static void *cfq_init_queue(struct request_queue *q)
|
||||
cfqd->cfq_slice_idle = cfq_slice_idle;
|
||||
cfqd->cfq_group_idle = cfq_group_idle;
|
||||
cfqd->cfq_latency = 1;
|
||||
cfqd->cfq_group_isolation = 0;
|
||||
cfqd->hw_tag = -1;
|
||||
/*
|
||||
* we optimistically start assuming sync ops weren't delayed in last
|
||||
@@ -4029,7 +4040,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
|
||||
SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
|
||||
SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
|
||||
SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
|
||||
SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0);
|
||||
#undef SHOW_FUNCTION
|
||||
|
||||
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
|
||||
@@ -4063,7 +4073,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
|
||||
STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
|
||||
UINT_MAX, 0);
|
||||
STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
|
||||
STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0);
|
||||
#undef STORE_FUNCTION
|
||||
|
||||
#define CFQ_ATTR(name) \
|
||||
@@ -4081,7 +4090,6 @@ static struct elv_fs_entry cfq_attrs[] = {
|
||||
CFQ_ATTR(slice_idle),
|
||||
CFQ_ATTR(group_idle),
|
||||
CFQ_ATTR(low_latency),
|
||||
CFQ_ATTR(group_isolation),
|
||||
__ATTR_NULL
|
||||
};
|
||||
|
||||
@@ -4096,7 +4104,6 @@ static struct elevator_type iosched_cfq = {
|
||||
.elevator_add_req_fn = cfq_insert_request,
|
||||
.elevator_activate_req_fn = cfq_activate_request,
|
||||
.elevator_deactivate_req_fn = cfq_deactivate_request,
|
||||
.elevator_queue_empty_fn = cfq_queue_empty,
|
||||
.elevator_completed_req_fn = cfq_completed_request,
|
||||
.elevator_former_req_fn = elv_rb_former_request,
|
||||
.elevator_latter_req_fn = elv_rb_latter_request,
|
||||
|
||||
+3
-3
@@ -16,9 +16,9 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
|
||||
}
|
||||
|
||||
static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
|
||||
unsigned long time)
|
||||
unsigned long time, unsigned long unaccounted_time)
|
||||
{
|
||||
blkiocg_update_timeslice_used(blkg, time);
|
||||
blkiocg_update_timeslice_used(blkg, time, unaccounted_time);
|
||||
}
|
||||
|
||||
static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg)
|
||||
@@ -85,7 +85,7 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg,
|
||||
unsigned long dequeue) {}
|
||||
|
||||
static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg,
|
||||
unsigned long time) {}
|
||||
unsigned long time, unsigned long unaccounted_time) {}
|
||||
static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
|
||||
static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg,
|
||||
bool direction, bool sync) {}
|
||||
|
||||
@@ -326,14 +326,6 @@ dispatch_request:
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int deadline_queue_empty(struct request_queue *q)
|
||||
{
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
|
||||
return list_empty(&dd->fifo_list[WRITE])
|
||||
&& list_empty(&dd->fifo_list[READ]);
|
||||
}
|
||||
|
||||
static void deadline_exit_queue(struct elevator_queue *e)
|
||||
{
|
||||
struct deadline_data *dd = e->elevator_data;
|
||||
@@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = {
|
||||
.elevator_merge_req_fn = deadline_merged_requests,
|
||||
.elevator_dispatch_fn = deadline_dispatch_requests,
|
||||
.elevator_add_req_fn = deadline_add_request,
|
||||
.elevator_queue_empty_fn = deadline_queue_empty,
|
||||
.elevator_former_req_fn = elv_rb_former_request,
|
||||
.elevator_latter_req_fn = elv_rb_latter_request,
|
||||
.elevator_init_fn = deadline_init_queue,
|
||||
|
||||
+64
-44
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
}
|
||||
EXPORT_SYMBOL(elv_rq_merge_ok);
|
||||
|
||||
static inline int elv_try_merge(struct request *__rq, struct bio *bio)
|
||||
int elv_try_merge(struct request *__rq, struct bio *bio)
|
||||
{
|
||||
int ret = ELEVATOR_NO_MERGE;
|
||||
|
||||
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
|
||||
struct list_head *entry;
|
||||
int stop_flags;
|
||||
|
||||
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
|
||||
|
||||
if (q->last_merge == rq)
|
||||
q->last_merge = NULL;
|
||||
|
||||
@@ -519,6 +521,40 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to do an insertion back merge. Only check for the case where
|
||||
* we can append 'rq' to an existing request, so we can throw 'rq' away
|
||||
* afterwards.
|
||||
*
|
||||
* Returns true if we merged, false otherwise
|
||||
*/
|
||||
static bool elv_attempt_insert_merge(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
struct request *__rq;
|
||||
|
||||
if (blk_queue_nomerges(q))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* First try one-hit cache.
|
||||
*/
|
||||
if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
|
||||
return true;
|
||||
|
||||
if (blk_queue_noxmerges(q))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* See if our hash lookup can find a potential backmerge.
|
||||
*/
|
||||
__rq = elv_rqhash_find(q, blk_rq_pos(rq));
|
||||
if (__rq && blk_attempt_req_merge(q, __rq, rq))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
@@ -536,14 +572,18 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
const int next_sorted = next->cmd_flags & REQ_SORTED;
|
||||
|
||||
if (e->ops->elevator_merge_req_fn)
|
||||
if (next_sorted && e->ops->elevator_merge_req_fn)
|
||||
e->ops->elevator_merge_req_fn(q, rq, next);
|
||||
|
||||
elv_rqhash_reposition(q, rq);
|
||||
elv_rqhash_del(q, next);
|
||||
|
||||
q->nr_sorted--;
|
||||
if (next_sorted) {
|
||||
elv_rqhash_del(q, next);
|
||||
q->nr_sorted--;
|
||||
}
|
||||
|
||||
q->last_merge = rq;
|
||||
}
|
||||
|
||||
@@ -617,21 +657,12 @@ void elv_quiesce_end(struct request_queue *q)
|
||||
|
||||
void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||
{
|
||||
int unplug_it = 1;
|
||||
|
||||
trace_block_rq_insert(q, rq);
|
||||
|
||||
rq->q = q;
|
||||
|
||||
switch (where) {
|
||||
case ELEVATOR_INSERT_REQUEUE:
|
||||
/*
|
||||
* Most requeues happen because of a busy condition,
|
||||
* don't force unplug of the queue for that case.
|
||||
* Clear unplug_it and fall through.
|
||||
*/
|
||||
unplug_it = 0;
|
||||
|
||||
case ELEVATOR_INSERT_FRONT:
|
||||
rq->cmd_flags |= REQ_SOFTBARRIER;
|
||||
list_add(&rq->queuelist, &q->queue_head);
|
||||
@@ -654,6 +685,14 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||
__blk_run_queue(q, false);
|
||||
break;
|
||||
|
||||
case ELEVATOR_INSERT_SORT_MERGE:
|
||||
/*
|
||||
* If we succeed in merging this request with one in the
|
||||
* queue already, we are done - rq has now been freed,
|
||||
* so no need to do anything further.
|
||||
*/
|
||||
if (elv_attempt_insert_merge(q, rq))
|
||||
break;
|
||||
case ELEVATOR_INSERT_SORT:
|
||||
BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
|
||||
!(rq->cmd_flags & REQ_DISCARD));
|
||||
@@ -673,24 +712,21 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||
q->elevator->ops->elevator_add_req_fn(q, rq);
|
||||
break;
|
||||
|
||||
case ELEVATOR_INSERT_FLUSH:
|
||||
rq->cmd_flags |= REQ_SOFTBARRIER;
|
||||
blk_insert_flush(rq);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "%s: bad insertion point %d\n",
|
||||
__func__, where);
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (unplug_it && blk_queue_plugged(q)) {
|
||||
int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
|
||||
- queue_in_flight(q);
|
||||
|
||||
if (nrq >= q->unplug_thresh)
|
||||
__generic_unplug_device(q);
|
||||
}
|
||||
}
|
||||
|
||||
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
|
||||
int plug)
|
||||
void __elv_add_request(struct request_queue *q, struct request *rq, int where)
|
||||
{
|
||||
BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
|
||||
|
||||
if (rq->cmd_flags & REQ_SOFTBARRIER) {
|
||||
/* barriers are scheduling boundary, update end_sector */
|
||||
if (rq->cmd_type == REQ_TYPE_FS ||
|
||||
@@ -702,38 +738,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
|
||||
where == ELEVATOR_INSERT_SORT)
|
||||
where = ELEVATOR_INSERT_BACK;
|
||||
|
||||
if (plug)
|
||||
blk_plug_device(q);
|
||||
|
||||
elv_insert(q, rq, where);
|
||||
}
|
||||
EXPORT_SYMBOL(__elv_add_request);
|
||||
|
||||
void elv_add_request(struct request_queue *q, struct request *rq, int where,
|
||||
int plug)
|
||||
void elv_add_request(struct request_queue *q, struct request *rq, int where)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
__elv_add_request(q, rq, where, plug);
|
||||
__elv_add_request(q, rq, where);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(elv_add_request);
|
||||
|
||||
int elv_queue_empty(struct request_queue *q)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
if (!list_empty(&q->queue_head))
|
||||
return 0;
|
||||
|
||||
if (e->ops->elevator_queue_empty_fn)
|
||||
return e->ops->elevator_queue_empty_fn(q);
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(elv_queue_empty);
|
||||
|
||||
struct request *elv_latter_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
@@ -759,7 +777,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
|
||||
if (e->ops->elevator_set_req_fn)
|
||||
return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
|
||||
|
||||
rq->elevator_private = NULL;
|
||||
rq->elevator_private[0] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -785,6 +803,8 @@ void elv_abort_queue(struct request_queue *q)
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
blk_abort_flushes(q);
|
||||
|
||||
while (!list_empty(&q->queue_head)) {
|
||||
rq = list_entry_rq(q->queue_head.next);
|
||||
rq->cmd_flags |= REQ_QUIET;
|
||||
|
||||
+9
-9
@@ -1158,14 +1158,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
||||
"%u %lu %lu %llu %u %u %u %u\n",
|
||||
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
|
||||
disk_name(gp, hd->partno, buf),
|
||||
part_stat_read(hd, ios[0]),
|
||||
part_stat_read(hd, merges[0]),
|
||||
(unsigned long long)part_stat_read(hd, sectors[0]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[0])),
|
||||
part_stat_read(hd, ios[1]),
|
||||
part_stat_read(hd, merges[1]),
|
||||
(unsigned long long)part_stat_read(hd, sectors[1]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[1])),
|
||||
part_stat_read(hd, ios[READ]),
|
||||
part_stat_read(hd, merges[READ]),
|
||||
(unsigned long long)part_stat_read(hd, sectors[READ]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
|
||||
part_stat_read(hd, ios[WRITE]),
|
||||
part_stat_read(hd, merges[WRITE]),
|
||||
(unsigned long long)part_stat_read(hd, sectors[WRITE]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
|
||||
part_in_flight(hd),
|
||||
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
|
||||
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
|
||||
@@ -1494,7 +1494,7 @@ void disk_block_events(struct gendisk *disk)
|
||||
void disk_unblock_events(struct gendisk *disk)
|
||||
{
|
||||
if (disk->ev)
|
||||
__disk_unblock_events(disk, true);
|
||||
__disk_unblock_events(disk, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq)
|
||||
list_add_tail(&rq->queuelist, &nd->queue);
|
||||
}
|
||||
|
||||
static int noop_queue_empty(struct request_queue *q)
|
||||
{
|
||||
struct noop_data *nd = q->elevator->elevator_data;
|
||||
|
||||
return list_empty(&nd->queue);
|
||||
}
|
||||
|
||||
static struct request *
|
||||
noop_former_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
@@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = {
|
||||
.elevator_merge_req_fn = noop_merged_requests,
|
||||
.elevator_dispatch_fn = noop_dispatch,
|
||||
.elevator_add_req_fn = noop_add_request,
|
||||
.elevator_queue_empty_fn = noop_queue_empty,
|
||||
.elevator_former_req_fn = noop_former_request,
|
||||
.elevator_latter_req_fn = noop_latter_request,
|
||||
.elevator_init_fn = noop_init_queue,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user