You've already forked linux-rockchip
mirror of
https://github.com/armbian/linux-rockchip.git
synced 2026-01-06 11:08:10 -08:00
Merge tag 'for-5.5/block-20191121' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
"Due to more granular branches, this one is small and will be followed
with other core branches that add specific features. I meant to just
have a core and drivers branch, but external dependencies we ended up
adding a few more that are also core.
The changes are:
- Fixes and improvements for the zoned device support (Ajay, Damien)
- sed-opal table writing and datastore UID (Revanth)
- blk-cgroup (and bfq) blk-cgroup stat fixes (Tejun)
- Improvements to the block stats tracking (Pavel)
- Fix for overruning sysfs buffer for large number of CPUs (Ming)
- Optimization for small IO (Ming, Christoph)
- Fix typo in RWH lifetime hint (Eugene)
- Dead code removal and documentation (Bart)
- Reduction in memory usage for queue and tag set (Bart)
- Kerneldoc header documentation (André)
- Device/partition revalidation fixes (Jan)
- Stats tracking for flush requests (Konstantin)
- Various other little fixes here and there (et al)"
* tag 'for-5.5/block-20191121' of git://git.kernel.dk/linux-block: (48 commits)
Revert "block: split bio if the only bvec's length is > SZ_4K"
block: add iostat counters for flush requests
block,bfq: Skip tracing hooks if possible
block: sed-opal: Introduce SUM_SET_LIST parameter and append it using 'add_token_u64'
blk-cgroup: cgroup_rstat_updated() shouldn't be called on cgroup1
block: Don't disable interrupts in trigger_softirq()
sbitmap: Delete sbitmap_any_bit_clear()
blk-mq: Delete blk_mq_has_free_tags() and blk_mq_can_queue()
block: split bio if the only bvec's length is > SZ_4K
block: still try to split bio if the bvec crosses pages
blk-cgroup: separate out blkg_rwstat under CONFIG_BLK_CGROUP_RWSTAT
blk-cgroup: reimplement basic IO stats using cgroup rstat
blk-cgroup: remove now unused blkg_print_stat_{bytes|ios}_recursive()
blk-throtl: stop using blkg->stat_bytes and ->stat_ios
bfq-iosched: stop using blkg->stat_bytes and ->stat_ios
bfq-iosched: relocate bfqg_*rwstat*() helpers
block: add zone open, close and finish ioctl support
block: add zone open, close and finish operations
block: Simplify REQ_OP_ZONE_RESET_ALL handling
block: Remove REQ_OP_ZONE_RESET plugging
...
This commit is contained in:
@@ -29,4 +29,9 @@ Description:
|
||||
17 - sectors discarded
|
||||
18 - time spent discarding
|
||||
|
||||
Kernel 5.5+ appends two more fields for flush requests:
|
||||
|
||||
19 - flush requests completed successfully
|
||||
20 - time spent flushing
|
||||
|
||||
For more details refer to Documentation/admin-guide/iostats.rst
|
||||
|
||||
@@ -15,6 +15,12 @@ Description:
|
||||
9 - I/Os currently in progress
|
||||
10 - time spent doing I/Os (ms)
|
||||
11 - weighted time spent doing I/Os (ms)
|
||||
12 - discards completed
|
||||
13 - discards merged
|
||||
14 - sectors discarded
|
||||
15 - time spent discarding (ms)
|
||||
16 - flush requests completed
|
||||
17 - time spent flushing (ms)
|
||||
For more details refer Documentation/admin-guide/iostats.rst
|
||||
|
||||
|
||||
|
||||
@@ -121,6 +121,15 @@ Field 15 -- # of milliseconds spent discarding
|
||||
This is the total number of milliseconds spent by all discards (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
|
||||
Field 16 -- # of flush requests completed
|
||||
This is the total number of flush requests completed successfully.
|
||||
|
||||
Block layer combines flush requests and executes at most one at a time.
|
||||
This counts flush requests executed by disk. Not tracked for partitions.
|
||||
|
||||
Field 17 -- # of milliseconds spent flushing
|
||||
This is the total number of milliseconds spent by all flush requests.
|
||||
|
||||
To avoid introducing performance bottlenecks, no locks are held while
|
||||
modifying these counters. This implies that minor inaccuracies may be
|
||||
introduced when changes collide, so (for instance) adding up all the
|
||||
|
||||
@@ -41,6 +41,8 @@ discard I/Os requests number of discard I/Os processed
|
||||
discard merges requests number of discard I/Os merged with in-queue I/O
|
||||
discard sectors sectors number of sectors discarded
|
||||
discard ticks milliseconds total wait time for discard requests
|
||||
flush I/Os requests number of flush I/Os processed
|
||||
flush ticks milliseconds total wait time for flush requests
|
||||
=============== ============= =================================================
|
||||
|
||||
read I/Os, write I/Os, discard I/0s
|
||||
@@ -48,6 +50,14 @@ read I/Os, write I/Os, discard I/0s
|
||||
|
||||
These values increment when an I/O request completes.
|
||||
|
||||
flush I/Os
|
||||
==========
|
||||
|
||||
These values increment when an flush I/O request completes.
|
||||
|
||||
Block layer combines flush requests and executes at most one at a time.
|
||||
This counts flush requests executed by disk. Not tracked for partitions.
|
||||
|
||||
read merges, write merges, discard merges
|
||||
=========================================
|
||||
|
||||
@@ -62,8 +72,8 @@ discarded from this block device. The "sectors" in question are the
|
||||
standard UNIX 512-byte sectors, not any device- or filesystem-specific
|
||||
block size. The counters are incremented when the I/O completes.
|
||||
|
||||
read ticks, write ticks, discard ticks
|
||||
======================================
|
||||
read ticks, write ticks, discard ticks, flush ticks
|
||||
===================================================
|
||||
|
||||
These values count the number of milliseconds that I/O requests have
|
||||
waited on this block device. If there are multiple I/O requests waiting,
|
||||
|
||||
@@ -32,6 +32,9 @@ config BLK_RQ_ALLOC_TIME
|
||||
config BLK_SCSI_REQUEST
|
||||
bool
|
||||
|
||||
config BLK_CGROUP_RWSTAT
|
||||
bool
|
||||
|
||||
config BLK_DEV_BSG
|
||||
bool "Block layer SG support v4"
|
||||
default y
|
||||
@@ -86,6 +89,7 @@ config BLK_DEV_ZONED
|
||||
config BLK_DEV_THROTTLING
|
||||
bool "Block layer bio throttling support"
|
||||
depends on BLK_CGROUP=y
|
||||
select BLK_CGROUP_RWSTAT
|
||||
---help---
|
||||
Block layer bio throttling support. It can be used to limit
|
||||
the IO rate to a device. IO rate policies are per cgroup and
|
||||
|
||||
@@ -31,6 +31,7 @@ config IOSCHED_BFQ
|
||||
config BFQ_GROUP_IOSCHED
|
||||
bool "BFQ hierarchical scheduling support"
|
||||
depends on IOSCHED_BFQ && BLK_CGROUP
|
||||
select BLK_CGROUP_RWSTAT
|
||||
---help---
|
||||
|
||||
Enable hierarchical scheduling in BFQ, using the blkio
|
||||
|
||||
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
|
||||
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
|
||||
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
|
||||
|
||||
@@ -347,6 +347,14 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg)
|
||||
bfqg_put(bfqg);
|
||||
}
|
||||
|
||||
void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
|
||||
|
||||
blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
|
||||
blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
|
||||
}
|
||||
|
||||
/* @stats = 0 */
|
||||
static void bfqg_stats_reset(struct bfqg_stats *stats)
|
||||
{
|
||||
@@ -431,6 +439,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
|
||||
|
||||
static void bfqg_stats_exit(struct bfqg_stats *stats)
|
||||
{
|
||||
blkg_rwstat_exit(&stats->bytes);
|
||||
blkg_rwstat_exit(&stats->ios);
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
blkg_rwstat_exit(&stats->merged);
|
||||
blkg_rwstat_exit(&stats->service_time);
|
||||
@@ -448,6 +458,10 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
|
||||
|
||||
static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
|
||||
{
|
||||
if (blkg_rwstat_init(&stats->bytes, gfp) ||
|
||||
blkg_rwstat_init(&stats->ios, gfp))
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
if (blkg_rwstat_init(&stats->merged, gfp) ||
|
||||
blkg_rwstat_init(&stats->service_time, gfp) ||
|
||||
@@ -1057,6 +1071,30 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
|
||||
return bfq_io_set_device_weight(of, buf, nbytes, off);
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
|
||||
&blkcg_policy_bfq, seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample sum;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
|
||||
return __blkg_prfill_rwstat(sf, pd, &sum);
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
|
||||
seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
static int bfqg_print_stat(struct seq_file *sf, void *v)
|
||||
{
|
||||
@@ -1065,13 +1103,6 @@ static int bfqg_print_stat(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
|
||||
&blkcg_policy_bfq, seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
@@ -1097,15 +1128,6 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
|
||||
return __blkg_prfill_u64(sf, pd, sum);
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample sum;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
|
||||
return __blkg_prfill_rwstat(sf, pd, &sum);
|
||||
}
|
||||
|
||||
static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
@@ -1114,18 +1136,11 @@ static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
|
||||
seq_cft(sf)->private, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
|
||||
struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
|
||||
u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
|
||||
|
||||
return __blkg_prfill_u64(sf, pd, sum >> 9);
|
||||
}
|
||||
@@ -1142,8 +1157,8 @@ static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
|
||||
{
|
||||
struct blkg_rwstat_sample tmp;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd->blkg, NULL,
|
||||
offsetof(struct blkcg_gq, stat_bytes), &tmp);
|
||||
blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
|
||||
offsetof(struct bfq_group, stats.bytes), &tmp);
|
||||
|
||||
return __blkg_prfill_u64(sf, pd,
|
||||
(tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
|
||||
@@ -1226,13 +1241,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
|
||||
/* statistics, covers only the tasks in the bfqg */
|
||||
{
|
||||
.name = "bfq.io_service_bytes",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_bytes,
|
||||
.private = offsetof(struct bfq_group, stats.bytes),
|
||||
.seq_show = bfqg_print_rwstat,
|
||||
},
|
||||
{
|
||||
.name = "bfq.io_serviced",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_ios,
|
||||
.private = offsetof(struct bfq_group, stats.ios),
|
||||
.seq_show = bfqg_print_rwstat,
|
||||
},
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
{
|
||||
@@ -1269,13 +1284,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
|
||||
/* the same statistics which cover the bfqg and its descendants */
|
||||
{
|
||||
.name = "bfq.io_service_bytes_recursive",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_bytes_recursive,
|
||||
.private = offsetof(struct bfq_group, stats.bytes),
|
||||
.seq_show = bfqg_print_rwstat_recursive,
|
||||
},
|
||||
{
|
||||
.name = "bfq.io_serviced_recursive",
|
||||
.private = (unsigned long)&blkcg_policy_bfq,
|
||||
.seq_show = blkg_print_stat_ios_recursive,
|
||||
.private = offsetof(struct bfq_group, stats.ios),
|
||||
.seq_show = bfqg_print_rwstat_recursive,
|
||||
},
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
{
|
||||
|
||||
@@ -5484,6 +5484,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool idle_timer_disabled = false;
|
||||
unsigned int cmd_flags;
|
||||
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
|
||||
bfqg_stats_update_legacy_io(q, rq);
|
||||
#endif
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
if (blk_mq_sched_try_insert_merge(q, rq)) {
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
|
||||
#define BFQ_IOPRIO_CLASSES 3
|
||||
#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
|
||||
|
||||
@@ -809,6 +811,9 @@ struct bfq_stat {
|
||||
};
|
||||
|
||||
struct bfqg_stats {
|
||||
/* basic stats */
|
||||
struct blkg_rwstat bytes;
|
||||
struct blkg_rwstat ios;
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
/* number of ios merged */
|
||||
struct blkg_rwstat merged;
|
||||
@@ -956,6 +961,7 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
|
||||
|
||||
/* ---------------- cgroups-support interface ---------------- */
|
||||
|
||||
void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq);
|
||||
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
|
||||
unsigned int op);
|
||||
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
|
||||
@@ -1062,6 +1068,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
|
||||
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||
char pid_str[MAX_PID_STR_LENGTH]; \
|
||||
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
|
||||
break; \
|
||||
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
|
||||
@@ -1078,6 +1086,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
|
||||
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
|
||||
char pid_str[MAX_PID_STR_LENGTH]; \
|
||||
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
|
||||
break; \
|
||||
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
|
||||
blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \
|
||||
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
|
||||
|
||||
129
block/blk-cgroup-rwstat.c
Normal file
129
block/blk-cgroup-rwstat.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
|
||||
* Do not use in new code.
|
||||
*/
|
||||
#include "blk-cgroup-rwstat.h"
|
||||
|
||||
int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
|
||||
if (ret) {
|
||||
while (--i >= 0)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
return ret;
|
||||
}
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_init);
|
||||
|
||||
void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_exit);
|
||||
|
||||
/**
|
||||
* __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @rwstat: rwstat to print
|
||||
*
|
||||
* Print @rwstat to @sf for the device assocaited with @pd.
|
||||
*/
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat)
|
||||
{
|
||||
static const char *rwstr[] = {
|
||||
[BLKG_RWSTAT_READ] = "Read",
|
||||
[BLKG_RWSTAT_WRITE] = "Write",
|
||||
[BLKG_RWSTAT_SYNC] = "Sync",
|
||||
[BLKG_RWSTAT_ASYNC] = "Async",
|
||||
[BLKG_RWSTAT_DISCARD] = "Discard",
|
||||
};
|
||||
const char *dname = blkg_dev_name(pd->blkg);
|
||||
u64 v;
|
||||
int i;
|
||||
|
||||
if (!dname)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
|
||||
rwstat->cnt[i]);
|
||||
|
||||
v = rwstat->cnt[BLKG_RWSTAT_READ] +
|
||||
rwstat->cnt[BLKG_RWSTAT_WRITE] +
|
||||
rwstat->cnt[BLKG_RWSTAT_DISCARD];
|
||||
seq_printf(sf, "%s Total %llu\n", dname, v);
|
||||
return v;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
|
||||
|
||||
/**
|
||||
* blkg_prfill_rwstat - prfill callback for blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @off: offset to the blkg_rwstat in @pd
|
||||
*
|
||||
* prfill callback for printing a blkg_rwstat.
|
||||
*/
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat = { };
|
||||
|
||||
blkg_rwstat_read((void *)pd + off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
|
||||
|
||||
/**
|
||||
* blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
|
||||
* @blkg: blkg of interest
|
||||
* @pol: blkcg_policy which contains the blkg_rwstat
|
||||
* @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
|
||||
* @sum: blkg_rwstat_sample structure containing the results
|
||||
*
|
||||
* Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
|
||||
* online descendants and their aux counts. The caller must be holding the
|
||||
* queue lock for online tests.
|
||||
*
|
||||
* If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
|
||||
* is at @off bytes into @blkg's blkg_policy_data of the policy.
|
||||
*/
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum)
|
||||
{
|
||||
struct blkcg_gq *pos_blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
|
||||
struct blkg_rwstat *rwstat;
|
||||
|
||||
if (!pos_blkg->online)
|
||||
continue;
|
||||
|
||||
if (pol)
|
||||
rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
|
||||
else
|
||||
rwstat = (void *)pos_blkg + off;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
|
||||
149
block/blk-cgroup-rwstat.h
Normal file
149
block/blk-cgroup-rwstat.h
Normal file
@@ -0,0 +1,149 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
|
||||
* Do not use in new code.
|
||||
*/
|
||||
#ifndef _BLK_CGROUP_RWSTAT_H
|
||||
#define _BLK_CGROUP_RWSTAT_H
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
enum blkg_rwstat_type {
|
||||
BLKG_RWSTAT_READ,
|
||||
BLKG_RWSTAT_WRITE,
|
||||
BLKG_RWSTAT_SYNC,
|
||||
BLKG_RWSTAT_ASYNC,
|
||||
BLKG_RWSTAT_DISCARD,
|
||||
|
||||
BLKG_RWSTAT_NR,
|
||||
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
|
||||
};
|
||||
|
||||
/*
|
||||
* blkg_[rw]stat->aux_cnt is excluded for local stats but included for
|
||||
* recursive. Used to carry stats of dead children.
|
||||
*/
|
||||
struct blkg_rwstat {
|
||||
struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
|
||||
atomic64_t aux_cnt[BLKG_RWSTAT_NR];
|
||||
};
|
||||
|
||||
struct blkg_rwstat_sample {
|
||||
u64 cnt[BLKG_RWSTAT_NR];
|
||||
};
|
||||
|
||||
static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
|
||||
unsigned int idx)
|
||||
{
|
||||
return atomic64_read(&rwstat->aux_cnt[idx]) +
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
|
||||
}
|
||||
|
||||
int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp);
|
||||
void blkg_rwstat_exit(struct blkg_rwstat *rwstat);
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat);
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off);
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum);
|
||||
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add - add a value to a blkg_rwstat
|
||||
* @rwstat: target blkg_rwstat
|
||||
* @op: REQ_OP and flags
|
||||
* @val: value to add
|
||||
*
|
||||
* Add @val to @rwstat. The counters are chosen according to @rw. The
|
||||
* caller is responsible for synchronizing calls to this function.
|
||||
*/
|
||||
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
|
||||
unsigned int op, uint64_t val)
|
||||
{
|
||||
struct percpu_counter *cnt;
|
||||
|
||||
if (op_is_discard(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
|
||||
else if (op_is_write(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
|
||||
if (op_is_sync(op))
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
|
||||
else
|
||||
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
|
||||
|
||||
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_read - read the current values of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Read the current snapshot of @rwstat and return it in the aux counts.
|
||||
*/
|
||||
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
|
||||
struct blkg_rwstat_sample *result)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
result->cnt[i] =
|
||||
percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_total - read the total count of a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to read
|
||||
*
|
||||
* Return the total count of @rwstat regardless of the IO direction. This
|
||||
* function can be called without synchronization and takes care of u64
|
||||
* atomicity.
|
||||
*/
|
||||
static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
struct blkg_rwstat_sample tmp = { };
|
||||
|
||||
blkg_rwstat_read(rwstat, &tmp);
|
||||
return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_reset - reset a blkg_rwstat
|
||||
* @rwstat: blkg_rwstat to reset
|
||||
*/
|
||||
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
percpu_counter_set(&rwstat->cpu_cnt[i], 0);
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
|
||||
* @to: the destination blkg_rwstat
|
||||
* @from: the source
|
||||
*
|
||||
* Add @from's count including the aux one to @to's aux count.
|
||||
*/
|
||||
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
|
||||
struct blkg_rwstat *from)
|
||||
{
|
||||
u64 sum[BLKG_RWSTAT_NR];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
|
||||
&to->aux_cnt[i]);
|
||||
}
|
||||
#endif /* _BLK_CGROUP_RWSTAT_H */
|
||||
@@ -80,8 +80,7 @@ static void blkg_free(struct blkcg_gq *blkg)
|
||||
if (blkg->pd[i])
|
||||
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
|
||||
|
||||
blkg_rwstat_exit(&blkg->stat_ios);
|
||||
blkg_rwstat_exit(&blkg->stat_bytes);
|
||||
free_percpu(blkg->iostat_cpu);
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
kfree(blkg);
|
||||
}
|
||||
@@ -146,7 +145,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
int i;
|
||||
int i, cpu;
|
||||
|
||||
/* alloc and init base part */
|
||||
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
|
||||
@@ -156,8 +155,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
|
||||
goto err_free;
|
||||
|
||||
if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
|
||||
blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
|
||||
blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
|
||||
if (!blkg->iostat_cpu)
|
||||
goto err_free;
|
||||
|
||||
blkg->q = q;
|
||||
@@ -167,6 +166,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
|
||||
blkg->blkcg = blkcg;
|
||||
|
||||
u64_stats_init(&blkg->iostat.sync);
|
||||
for_each_possible_cpu(cpu)
|
||||
u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
struct blkg_policy_data *pd;
|
||||
@@ -393,7 +396,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct blkcg *blkcg = blkg->blkcg;
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
@@ -410,11 +412,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
}
|
||||
|
||||
if (parent) {
|
||||
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
|
||||
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
|
||||
}
|
||||
|
||||
blkg->online = false;
|
||||
|
||||
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
|
||||
@@ -464,7 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
int i;
|
||||
int i, cpu;
|
||||
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
@@ -475,8 +472,12 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
|
||||
* anyway. If you get hit by a race, retry.
|
||||
*/
|
||||
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
blkg_rwstat_reset(&blkg->stat_bytes);
|
||||
blkg_rwstat_reset(&blkg->stat_ios);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct blkg_iostat_set *bis =
|
||||
per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
memset(bis, 0, sizeof(*bis));
|
||||
}
|
||||
memset(&blkg->iostat, 0, sizeof(blkg->iostat));
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
@@ -560,186 +561,6 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
|
||||
|
||||
/**
|
||||
* __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @rwstat: rwstat to print
|
||||
*
|
||||
* Print @rwstat to @sf for the device assocaited with @pd.
|
||||
*/
|
||||
u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
const struct blkg_rwstat_sample *rwstat)
|
||||
{
|
||||
static const char *rwstr[] = {
|
||||
[BLKG_RWSTAT_READ] = "Read",
|
||||
[BLKG_RWSTAT_WRITE] = "Write",
|
||||
[BLKG_RWSTAT_SYNC] = "Sync",
|
||||
[BLKG_RWSTAT_ASYNC] = "Async",
|
||||
[BLKG_RWSTAT_DISCARD] = "Discard",
|
||||
};
|
||||
const char *dname = blkg_dev_name(pd->blkg);
|
||||
u64 v;
|
||||
int i;
|
||||
|
||||
if (!dname)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
|
||||
rwstat->cnt[i]);
|
||||
|
||||
v = rwstat->cnt[BLKG_RWSTAT_READ] +
|
||||
rwstat->cnt[BLKG_RWSTAT_WRITE] +
|
||||
rwstat->cnt[BLKG_RWSTAT_DISCARD];
|
||||
seq_printf(sf, "%s Total %llu\n", dname, v);
|
||||
return v;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
|
||||
|
||||
/**
|
||||
* blkg_prfill_rwstat - prfill callback for blkg_rwstat
|
||||
* @sf: seq_file to print to
|
||||
* @pd: policy private data of interest
|
||||
* @off: offset to the blkg_rwstat in @pd
|
||||
*
|
||||
* prfill callback for printing a blkg_rwstat.
|
||||
*/
|
||||
u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat = { };
|
||||
|
||||
blkg_rwstat_read((void *)pd + off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
|
||||
|
||||
static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat = { };
|
||||
|
||||
blkg_rwstat_read((void *)pd->blkg + off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*
|
||||
* To be used as cftype->seq_show to print blkg->stat_bytes.
|
||||
* cftype->private must be set to the blkcg_policy.
|
||||
*/
|
||||
int blkg_print_stat_bytes(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_bytes), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
|
||||
|
||||
/**
|
||||
* blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*
|
||||
* To be used as cftype->seq_show to print blkg->stat_ios. cftype->private
|
||||
* must be set to the blkcg_policy.
|
||||
*/
|
||||
int blkg_print_stat_ios(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_ios), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
|
||||
|
||||
static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd,
|
||||
int off)
|
||||
{
|
||||
struct blkg_rwstat_sample rwstat;
|
||||
|
||||
blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat);
|
||||
return __blkg_prfill_rwstat(sf, pd, &rwstat);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*/
|
||||
int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field_recursive,
|
||||
(void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_bytes), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
|
||||
|
||||
/**
|
||||
* blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
|
||||
* @sf: seq_file to print to
|
||||
* @v: unused
|
||||
*/
|
||||
int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
|
||||
{
|
||||
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
|
||||
blkg_prfill_rwstat_field_recursive,
|
||||
(void *)seq_cft(sf)->private,
|
||||
offsetof(struct blkcg_gq, stat_ios), true);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
|
||||
|
||||
/**
|
||||
* blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
|
||||
* @blkg: blkg of interest
|
||||
* @pol: blkcg_policy which contains the blkg_rwstat
|
||||
* @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
|
||||
* @sum: blkg_rwstat_sample structure containing the results
|
||||
*
|
||||
* Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
|
||||
* online descendants and their aux counts. The caller must be holding the
|
||||
* queue lock for online tests.
|
||||
*
|
||||
* If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
|
||||
* is at @off bytes into @blkg's blkg_policy_data of the policy.
|
||||
*/
|
||||
void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
|
||||
int off, struct blkg_rwstat_sample *sum)
|
||||
{
|
||||
struct blkcg_gq *pos_blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
|
||||
struct blkg_rwstat *rwstat;
|
||||
|
||||
if (!pos_blkg->online)
|
||||
continue;
|
||||
|
||||
if (pol)
|
||||
rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
|
||||
else
|
||||
rwstat = (void *)pos_blkg + off;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
|
||||
|
||||
/* Performs queue bypass and policy enabled checks then looks up blkg. */
|
||||
static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
|
||||
const struct blkcg_policy *pol,
|
||||
@@ -923,16 +744,18 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
cgroup_rstat_flush(blkcg->css.cgroup);
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct blkg_iostat_set *bis = &blkg->iostat;
|
||||
const char *dname;
|
||||
char *buf;
|
||||
struct blkg_rwstat_sample rwstat;
|
||||
u64 rbytes, wbytes, rios, wios, dbytes, dios;
|
||||
size_t size = seq_get_buf(sf, &buf), off = 0;
|
||||
int i;
|
||||
bool has_stats = false;
|
||||
unsigned seq;
|
||||
|
||||
spin_lock_irq(&blkg->q->queue_lock);
|
||||
|
||||
@@ -951,17 +774,16 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
*/
|
||||
off += scnprintf(buf+off, size-off, "%s ", dname);
|
||||
|
||||
blkg_rwstat_recursive_sum(blkg, NULL,
|
||||
offsetof(struct blkcg_gq, stat_bytes), &rwstat);
|
||||
rbytes = rwstat.cnt[BLKG_RWSTAT_READ];
|
||||
wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE];
|
||||
dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD];
|
||||
do {
|
||||
seq = u64_stats_fetch_begin(&bis->sync);
|
||||
|
||||
blkg_rwstat_recursive_sum(blkg, NULL,
|
||||
offsetof(struct blkcg_gq, stat_ios), &rwstat);
|
||||
rios = rwstat.cnt[BLKG_RWSTAT_READ];
|
||||
wios = rwstat.cnt[BLKG_RWSTAT_WRITE];
|
||||
dios = rwstat.cnt[BLKG_RWSTAT_DISCARD];
|
||||
rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
|
||||
wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
|
||||
dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
|
||||
rios = bis->cur.ios[BLKG_IOSTAT_READ];
|
||||
wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
|
||||
dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
|
||||
} while (u64_stats_fetch_retry(&bis->sync, seq));
|
||||
|
||||
if (rbytes || wbytes || rios || wios) {
|
||||
has_stats = true;
|
||||
@@ -1297,6 +1119,77 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] = src->bytes[i];
|
||||
dst->ios[i] = src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] += src->bytes[i];
|
||||
dst->ios[i] += src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_IOSTAT_NR; i++) {
|
||||
dst->bytes[i] -= src->bytes[i];
|
||||
dst->ios[i] -= src->ios[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
|
||||
struct blkg_iostat cur, delta;
|
||||
unsigned seq;
|
||||
|
||||
/* fetch the current per-cpu values */
|
||||
do {
|
||||
seq = u64_stats_fetch_begin(&bisc->sync);
|
||||
blkg_iostat_set(&cur, &bisc->cur);
|
||||
} while (u64_stats_fetch_retry(&bisc->sync, seq));
|
||||
|
||||
/* propagate percpu delta to global */
|
||||
u64_stats_update_begin(&blkg->iostat.sync);
|
||||
blkg_iostat_set(&delta, &cur);
|
||||
blkg_iostat_sub(&delta, &bisc->last);
|
||||
blkg_iostat_add(&blkg->iostat.cur, &delta);
|
||||
blkg_iostat_add(&bisc->last, &delta);
|
||||
u64_stats_update_end(&blkg->iostat.sync);
|
||||
|
||||
/* propagate global delta to parent */
|
||||
if (parent) {
|
||||
u64_stats_update_begin(&parent->iostat.sync);
|
||||
blkg_iostat_set(&delta, &blkg->iostat.cur);
|
||||
blkg_iostat_sub(&delta, &blkg->iostat.last);
|
||||
blkg_iostat_add(&parent->iostat.cur, &delta);
|
||||
blkg_iostat_add(&blkg->iostat.last, &delta);
|
||||
u64_stats_update_end(&parent->iostat.sync);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
{
|
||||
int i;
|
||||
@@ -1329,6 +1222,7 @@ struct cgroup_subsys io_cgrp_subsys = {
|
||||
.css_offline = blkcg_css_offline,
|
||||
.css_free = blkcg_css_free,
|
||||
.can_attach = blkcg_can_attach,
|
||||
.css_rstat_flush = blkcg_rstat_flush,
|
||||
.bind = blkcg_bind,
|
||||
.dfl_cftypes = blkcg_files,
|
||||
.legacy_cftypes = blkcg_legacy_files,
|
||||
|
||||
@@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
|
||||
REQ_OP_NAME(SECURE_ERASE),
|
||||
REQ_OP_NAME(ZONE_RESET),
|
||||
REQ_OP_NAME(ZONE_RESET_ALL),
|
||||
REQ_OP_NAME(ZONE_OPEN),
|
||||
REQ_OP_NAME(ZONE_CLOSE),
|
||||
REQ_OP_NAME(ZONE_FINISH),
|
||||
REQ_OP_NAME(WRITE_SAME),
|
||||
REQ_OP_NAME(WRITE_ZEROES),
|
||||
REQ_OP_NAME(SCSI_IN),
|
||||
@@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
|
||||
*/
|
||||
void blk_cleanup_queue(struct request_queue *q)
|
||||
{
|
||||
WARN_ON_ONCE(blk_queue_registered(q));
|
||||
|
||||
/* mark @q DYING, no new request or merges will be allowed afterwards */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
blk_set_queue_dying(q);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
/*
|
||||
* Drain all requests queued before DYING marking. Set DEAD flag to
|
||||
@@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Zone reset does not include bi_size so bio_sectors() is always 0.
|
||||
* Include a test for the reset op code and perform the remap if needed.
|
||||
* Zone management bios do not have a sector count but they do have
|
||||
* a start sector filled out and need to be remapped.
|
||||
*/
|
||||
if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
|
||||
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
|
||||
if (bio_check_eod(bio, part_nr_sects_read(p)))
|
||||
goto out;
|
||||
bio->bi_iter.bi_sector += p->start_sect;
|
||||
@@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio)
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
if (!blk_queue_is_zoned(q))
|
||||
goto not_supported;
|
||||
break;
|
||||
|
||||
@@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
|
||||
rq->rq_disk = bd_disk;
|
||||
rq->end_io = done;
|
||||
|
||||
blk_account_io_start(rq, true);
|
||||
|
||||
/*
|
||||
* don't check dying flag for MQ because the request won't
|
||||
* be reused after dying flag is set
|
||||
|
||||
@@ -136,6 +136,17 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
|
||||
blk_mq_add_to_requeue_list(rq, add_front, true);
|
||||
}
|
||||
|
||||
static void blk_account_io_flush(struct request *rq)
|
||||
{
|
||||
struct hd_struct *part = &rq->rq_disk->part0;
|
||||
|
||||
part_stat_lock();
|
||||
part_stat_inc(part, ios[STAT_FLUSH]);
|
||||
part_stat_add(part, nsecs[STAT_FLUSH],
|
||||
ktime_get_ns() - rq->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_flush_complete_seq - complete flush sequence
|
||||
* @rq: PREFLUSH/FUA request being sequenced
|
||||
@@ -185,7 +196,7 @@ static void blk_flush_complete_seq(struct request *rq,
|
||||
|
||||
case REQ_FSEQ_DONE:
|
||||
/*
|
||||
* @rq was previously adjusted by blk_flush_issue() for
|
||||
* @rq was previously adjusted by blk_insert_flush() for
|
||||
* flush sequencing and may already have gone through the
|
||||
* flush data request completion path. Restore @rq for
|
||||
* normal completion and end it.
|
||||
@@ -212,6 +223,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
|
||||
|
||||
@@ -293,7 +293,7 @@ split:
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
unsigned int *nr_segs)
|
||||
{
|
||||
struct bio *split;
|
||||
struct bio *split = NULL;
|
||||
|
||||
switch (bio_op(*bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
@@ -309,6 +309,21 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
nr_segs);
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* All drivers must accept single-segments bios that are <=
|
||||
* PAGE_SIZE. This is a quick and dirty check that relies on
|
||||
* the fact that bi_io_vec[0] is always valid if a bio has data.
|
||||
* The check might lead to occasional false negatives when bios
|
||||
* are cloned, but compared to the performance impact of cloned
|
||||
* bios themselves the loop below doesn't matter anyway.
|
||||
*/
|
||||
if (!q->limits.chunk_sectors &&
|
||||
(*bio)->bi_vcnt == 1 &&
|
||||
((*bio)->bi_io_vec[0].bv_len +
|
||||
(*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
|
||||
*nr_segs = 1;
|
||||
break;
|
||||
}
|
||||
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -74,10 +74,8 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->show(ctx, page);
|
||||
res = entry->show(ctx, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@@ -97,10 +95,8 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->store(ctx, page, length);
|
||||
res = entry->store(ctx, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@@ -120,10 +116,8 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->show(hctx, page);
|
||||
res = entry->show(hctx, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@@ -144,10 +138,8 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
|
||||
res = -ENOENT;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (!blk_queue_dying(q))
|
||||
res = entry->store(hctx, page, length);
|
||||
res = entry->store(hctx, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
@@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
|
||||
{
|
||||
const size_t size = PAGE_SIZE - 1;
|
||||
unsigned int i, first = 1;
|
||||
ssize_t ret = 0;
|
||||
int ret = 0, pos = 0;
|
||||
|
||||
for_each_cpu(i, hctx->cpumask) {
|
||||
if (first)
|
||||
ret += sprintf(ret + page, "%u", i);
|
||||
ret = snprintf(pos + page, size - pos, "%u", i);
|
||||
else
|
||||
ret += sprintf(ret + page, ", %u", i);
|
||||
ret = snprintf(pos + page, size - pos, ", %u", i);
|
||||
|
||||
if (ret >= size - pos)
|
||||
break;
|
||||
|
||||
first = 0;
|
||||
pos += ret;
|
||||
}
|
||||
|
||||
ret += sprintf(ret + page, "\n");
|
||||
return ret;
|
||||
ret = snprintf(pos + page, size + 1 - pos, "\n");
|
||||
return pos + ret;
|
||||
}
|
||||
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
|
||||
|
||||
@@ -15,14 +15,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
|
||||
{
|
||||
if (!tags)
|
||||
return true;
|
||||
|
||||
return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a previously inactive queue goes active, bump the active user count.
|
||||
* We need to do this before try to allocate driver tag, then even if fail
|
||||
|
||||
@@ -28,7 +28,6 @@ extern void blk_mq_free_tags(struct blk_mq_tags *tags);
|
||||
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
|
||||
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
|
||||
struct blk_mq_ctx *ctx, unsigned int tag);
|
||||
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
|
||||
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_tags **tags,
|
||||
unsigned int depth, bool can_grow);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user