mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-5.19/block-2022-05-22' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"Here are the core block changes for 5.19. This contains:
- blk-throttle accounting fix (Laibin)
- Series removing redundant assignments (Michal)
- Expose bio cache via the bio_set, so that DM can use it (Mike)
- Finish off the bio allocation interface cleanups by dealing with
the weirdest member of the family. bio_kmalloc combines a kmalloc
for the bio and bio_vecs with a hidden bio_init call and magic
cleanup semantics (Christoph)
- Clean up the block layer API so that APIs consumed by file systems
are (almost) only struct block_device based, so that file systems
don't have to poke into block layer internals like the
request_queue (Christoph)
- Clean up the blk_execute_rq* API (Christoph)
- Clean up various lose end in the blk-cgroup code to make it easier
to follow in preparation of reworking the blkcg assignment for bios
(Christoph)
- Fix use-after-free issues in BFQ when processes with merged queues
get moved to different cgroups (Jan)
- BFQ fixes (Jan)
- Various fixes and cleanups (Bart, Chengming, Fanjun, Julia, Ming,
Wolfgang, me)"
* tag 'for-5.19/block-2022-05-22' of git://git.kernel.dk/linux-block: (83 commits)
blk-mq: fix typo in comment
bfq: Remove bfq_requeue_request_body()
bfq: Remove superfluous conversion from RQ_BIC()
bfq: Allow current waker to defend against a tentative one
bfq: Relax waker detection for shared queues
blk-cgroup: delete rcu_read_lock_held() WARN_ON_ONCE()
blk-throttle: Set BIO_THROTTLED when bio has been throttled
blk-cgroup: Remove unnecessary rcu_read_lock/unlock()
blk-cgroup: always terminate io.stat lines
block, bfq: make bfq_has_work() more accurate
block, bfq: protect 'bfqd->queued' by 'bfqd->lock'
block: cleanup the VM accounting in submit_bio
block: Fix the bio.bi_opf comment
block: reorder the REQ_ flags
blk-iocost: combine local_stat and desc_stat to stat
block: improve the error message from bio_check_eod
block: allow passing a NULL bdev to bio_alloc_clone/bio_init_clone
block: remove superfluous calls to blkcg_bio_issue_init
kthread: unexport kthread_blkcg
blk-cgroup: cleanup blkcg_maybe_throttle_current
...
This commit is contained in:
@@ -483,7 +483,6 @@ static void ubd_handler(void)
|
||||
if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
|
||||
blk_queue_max_discard_sectors(io_req->req->q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
|
||||
}
|
||||
blk_mq_end_request(io_req->req, io_req->error);
|
||||
kfree(io_req);
|
||||
@@ -803,7 +802,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
|
||||
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
|
||||
return 0;
|
||||
|
||||
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
|
||||
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
|
||||
obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
|
||||
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
|
||||
s >>= bb->shift;
|
||||
target += (1<<bb->shift) - 1;
|
||||
target >>= bb->shift;
|
||||
sectors = target - s;
|
||||
}
|
||||
/* 'target' is now the first block after the bad range */
|
||||
|
||||
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
|
||||
s += (1<<bb->shift) - 1;
|
||||
s >>= bb->shift;
|
||||
target >>= bb->shift;
|
||||
sectors = target - s;
|
||||
}
|
||||
|
||||
write_seqlock_irq(&bb->lock);
|
||||
|
||||
@@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
|
||||
*/
|
||||
bfqg->bfqd = bfqd;
|
||||
bfqg->active_entities = 0;
|
||||
bfqg->online = true;
|
||||
bfqg->rq_pos_tree = RB_ROOT;
|
||||
}
|
||||
|
||||
@@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
|
||||
entity->sched_data = &parent->sched_data;
|
||||
}
|
||||
|
||||
static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
|
||||
struct blkcg *blkcg)
|
||||
static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
blkg = blkg_lookup(blkcg, bfqd->queue);
|
||||
if (likely(blkg))
|
||||
return blkg_to_bfqg(blkg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct bfq_group *bfqg, *parent;
|
||||
struct bfq_group *parent;
|
||||
struct bfq_entity *entity;
|
||||
|
||||
bfqg = bfq_lookup_bfqg(bfqd, blkcg);
|
||||
|
||||
if (unlikely(!bfqg))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Update chain of bfq_groups as we might be handling a leaf group
|
||||
* which, along with some of its relatives, has not been hooked yet
|
||||
@@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||
bfq_group_set_parent(curr_bfqg, parent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bfqg;
|
||||
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
|
||||
{
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
struct bfq_group *bfqg;
|
||||
|
||||
while (blkg) {
|
||||
bfqg = blkg_to_bfqg(blkg);
|
||||
if (bfqg->online) {
|
||||
bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
|
||||
return bfqg;
|
||||
}
|
||||
blkg = blkg->parent;
|
||||
}
|
||||
bio_associate_blkg_from_css(bio,
|
||||
&bfqg_to_blkg(bfqd->root_group)->blkcg->css);
|
||||
return bfqd->root_group;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
* Move bic to blkcg, assuming that bfqd->lock is held; which makes
|
||||
* sure that the reference to cgroup is valid across the call (see
|
||||
* comments in bfq_bic_update_cgroup on this issue)
|
||||
*
|
||||
* NOTE: an alternative approach might have been to store the current
|
||||
* cgroup in bfqq and getting a reference to it, reducing the lookup
|
||||
* time here, at the price of slightly more complex code.
|
||||
*/
|
||||
static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
struct bfq_io_cq *bic,
|
||||
struct blkcg *blkcg)
|
||||
static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
struct bfq_io_cq *bic,
|
||||
struct bfq_group *bfqg)
|
||||
{
|
||||
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
|
||||
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
|
||||
struct bfq_group *bfqg;
|
||||
struct bfq_entity *entity;
|
||||
|
||||
bfqg = bfq_find_set_group(bfqd, blkcg);
|
||||
|
||||
if (unlikely(!bfqg))
|
||||
bfqg = bfqd->root_group;
|
||||
|
||||
if (async_bfqq) {
|
||||
entity = &async_bfqq->entity;
|
||||
|
||||
@@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
}
|
||||
|
||||
if (sync_bfqq) {
|
||||
entity = &sync_bfqq->entity;
|
||||
if (entity->sched_data != &bfqg->sched_data)
|
||||
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
|
||||
if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
|
||||
/* We are the only user of this bfqq, just move it */
|
||||
if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
|
||||
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
|
||||
} else {
|
||||
struct bfq_queue *bfqq;
|
||||
|
||||
/*
|
||||
* The queue was merged to a different queue. Check
|
||||
* that the merge chain still belongs to the same
|
||||
* cgroup.
|
||||
*/
|
||||
for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
|
||||
if (bfqq->entity.sched_data !=
|
||||
&bfqg->sched_data)
|
||||
break;
|
||||
if (bfqq) {
|
||||
/*
|
||||
* Some queue changed cgroup so the merge is
|
||||
* not valid anymore. We cannot easily just
|
||||
* cancel the merge (by clearing new_bfqq) as
|
||||
* there may be other processes using this
|
||||
* queue and holding refs to all queues below
|
||||
* sync_bfqq->new_bfqq. Similarly if the merge
|
||||
* already happened, we need to detach from
|
||||
* bfqq now so that we cannot merge bio to a
|
||||
* request from the old cgroup.
|
||||
*/
|
||||
bfq_put_cooperator(sync_bfqq);
|
||||
bfq_release_process_ref(bfqd, sync_bfqq);
|
||||
bic_set_bfqq(bic, NULL, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bfqg;
|
||||
@@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
{
|
||||
struct bfq_data *bfqd = bic_to_bfqd(bic);
|
||||
struct bfq_group *bfqg = NULL;
|
||||
struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
|
||||
uint64_t serial_nr;
|
||||
|
||||
rcu_read_lock();
|
||||
serial_nr = __bio_blkcg(bio)->css.serial_nr;
|
||||
serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
|
||||
|
||||
/*
|
||||
* Check whether blkcg has changed. The condition may trigger
|
||||
* spuriously on a newly created cic but there's no harm.
|
||||
*/
|
||||
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
|
||||
goto out;
|
||||
return;
|
||||
|
||||
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
|
||||
/*
|
||||
* New cgroup for this process. Make sure it is linked to bfq internal
|
||||
* cgroup hierarchy.
|
||||
*/
|
||||
bfq_link_bfqg(bfqd, bfqg);
|
||||
__bfq_bic_change_cgroup(bfqd, bic, bfqg);
|
||||
/*
|
||||
* Update blkg_path for bfq_log_* functions. We cache this
|
||||
* path, and update it here, for the following
|
||||
@@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
*/
|
||||
blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
|
||||
bic->blkcg_serial_nr = serial_nr;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
|
||||
|
||||
put_async_queues:
|
||||
bfq_put_async_queues(bfqd, bfqg);
|
||||
bfqg->online = false;
|
||||
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
/*
|
||||
@@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
|
||||
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
|
||||
}
|
||||
|
||||
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
|
||||
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
|
||||
{
|
||||
return bfqd->root_group;
|
||||
}
|
||||
|
||||
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
|
||||
*/
|
||||
static const unsigned long bfq_late_stable_merging = 600;
|
||||
|
||||
#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
|
||||
#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
|
||||
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
|
||||
|
||||
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
|
||||
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
|
||||
*/
|
||||
void bfq_schedule_dispatch(struct bfq_data *bfqd)
|
||||
{
|
||||
lockdep_assert_held(&bfqd->lock);
|
||||
|
||||
if (bfqd->queued != 0) {
|
||||
bfq_log(bfqd, "schedule dispatch");
|
||||
blk_mq_run_hw_queues(bfqd->queue, true);
|
||||
@@ -2133,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
if (!bfqd->last_completed_rq_bfqq ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq ||
|
||||
bfq_bfqq_has_short_ttime(bfqq) ||
|
||||
bfqq->dispatched > 0 ||
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
|
||||
bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
|
||||
now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -2208,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
|
||||
|
||||
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
|
||||
bfqq->queued[rq_is_sync(rq)]++;
|
||||
bfqd->queued++;
|
||||
/*
|
||||
* Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
|
||||
* may be read without holding the lock in bfq_has_work().
|
||||
*/
|
||||
WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
|
||||
|
||||
if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
|
||||
if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
|
||||
bfq_check_waker(bfqd, bfqq, now_ns);
|
||||
|
||||
/*
|
||||
@@ -2400,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
|
||||
if (rq->queuelist.prev != &rq->queuelist)
|
||||
list_del_init(&rq->queuelist);
|
||||
bfqq->queued[sync]--;
|
||||
bfqd->queued--;
|
||||
/*
|
||||
* Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
|
||||
* may be read without holding the lock in bfq_has_work().
|
||||
*/
|
||||
WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
|
||||
elv_rb_del(&bfqq->sort_list, rq);
|
||||
|
||||
elv_rqhash_del(q, rq);
|
||||
@@ -2463,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
|
||||
if (bic)
|
||||
if (bic) {
|
||||
/*
|
||||
* Make sure cgroup info is uptodate for current process before
|
||||
* considering the merge.
|
||||
*/
|
||||
bfq_bic_update_cgroup(bic, bio);
|
||||
|
||||
bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
|
||||
else
|
||||
} else {
|
||||
bfqd->bio_bfqq = NULL;
|
||||
}
|
||||
bfqd->bio_bic = bic;
|
||||
|
||||
ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
|
||||
@@ -2496,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
enum elv_merge type)
|
||||
{
|
||||
@@ -2506,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
blk_rq_pos(req) <
|
||||
blk_rq_pos(container_of(rb_prev(&req->rb_node),
|
||||
struct request, rb_node))) {
|
||||
struct bfq_queue *bfqq = bfq_init_rq(req);
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(req);
|
||||
struct bfq_data *bfqd;
|
||||
struct request *prev, *next_rq;
|
||||
|
||||
@@ -2558,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
struct bfq_queue *bfqq = bfq_init_rq(rq),
|
||||
*next_bfqq = bfq_init_rq(next);
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq),
|
||||
*next_bfqq = RQ_BFQQ(next);
|
||||
|
||||
if (!bfqq)
|
||||
goto remove;
|
||||
@@ -2764,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
|
||||
if (process_refs == 0 || new_process_refs == 0)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Make sure merged queues belong to the same parent. Parents could
|
||||
* have changed since the time we decided the two queues are suitable
|
||||
* for merging.
|
||||
*/
|
||||
if (new_bfqq->entity.parent != bfqq->entity.parent)
|
||||
return NULL;
|
||||
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
|
||||
new_bfqq->pid);
|
||||
|
||||
@@ -2901,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct bfq_queue *new_bfqq =
|
||||
bfq_setup_merge(bfqq, stable_merge_bfqq);
|
||||
|
||||
bic->stably_merged = true;
|
||||
if (new_bfqq && new_bfqq->bic)
|
||||
new_bfqq->bic->stably_merged = true;
|
||||
if (new_bfqq) {
|
||||
bic->stably_merged = true;
|
||||
if (new_bfqq->bic)
|
||||
new_bfqq->bic->stably_merged =
|
||||
true;
|
||||
}
|
||||
return new_bfqq;
|
||||
} else
|
||||
return NULL;
|
||||
@@ -5045,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
|
||||
|
||||
/*
|
||||
* Avoiding lock: a race on bfqd->busy_queues should cause at
|
||||
* Avoiding lock: a race on bfqd->queued should cause at
|
||||
* most a call to dispatch for nothing
|
||||
*/
|
||||
return !list_empty_careful(&bfqd->dispatch) ||
|
||||
bfq_tot_busy_queues(bfqd) > 0;
|
||||
READ_ONCE(bfqd->queued);
|
||||
}
|
||||
|
||||
static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
@@ -5310,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
static void bfq_put_cooperator(struct bfq_queue *bfqq)
|
||||
void bfq_put_cooperator(struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_queue *__bfqq, *next;
|
||||
|
||||
@@ -5716,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq;
|
||||
struct bfq_group *bfqg;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
|
||||
if (!bfqg) {
|
||||
bfqq = &bfqd->oom_bfqq;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bfqg = bfq_bio_bfqg(bfqd, bio);
|
||||
if (!is_sync) {
|
||||
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
|
||||
ioprio);
|
||||
@@ -5769,8 +5786,6 @@ out:
|
||||
|
||||
if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
|
||||
bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);
|
||||
|
||||
rcu_read_unlock();
|
||||
return bfqq;
|
||||
}
|
||||
|
||||
@@ -6117,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
|
||||
unsigned int cmd_flags) {}
|
||||
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
|
||||
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
@@ -6132,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bfqg_stats_update_legacy_io(q, rq);
|
||||
#endif
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
bfqq = bfq_init_rq(rq);
|
||||
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
blk_mq_free_requests(&free);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
|
||||
trace_block_rq_insert(rq);
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
bfqq = bfq_init_rq(rq);
|
||||
if (!bfqq || at_head) {
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
@@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
bfq_schedule_dispatch(bfqd);
|
||||
}
|
||||
|
||||
static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
|
||||
{
|
||||
bfqq_request_freed(bfqq);
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
/*
|
||||
* The processes associated with bfqq may happen to generate their
|
||||
* cumulative I/O at a lower rate than the rate at which the device
|
||||
@@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
|
||||
|
||||
bfq_completed_request(bfqq, bfqd);
|
||||
}
|
||||
bfq_finish_requeue_request_body(bfqq);
|
||||
bfqq_request_freed(bfqq);
|
||||
bfq_put_queue(bfqq);
|
||||
RQ_BIC(rq)->requests--;
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
|
||||
/*
|
||||
@@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
|
||||
|
||||
bfqq_request_allocated(bfqq);
|
||||
bfqq->ref++;
|
||||
bic->requests++;
|
||||
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
|
||||
rq, bfqq, bfqq->ref);
|
||||
|
||||
@@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
bfq_bfqq_expire(bfqd, bfqq, true, reason);
|
||||
|
||||
schedule_dispatch:
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
bfq_schedule_dispatch(bfqd);
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -468,6 +468,7 @@ struct bfq_io_cq {
|
||||
struct bfq_queue *stable_merge_bfqq;
|
||||
|
||||
bool stably_merged; /* non splittable if true */
|
||||
unsigned int requests; /* Number of requests this process has in flight */
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -928,6 +929,8 @@ struct bfq_group {
|
||||
|
||||
/* reference counter (see comments in bfq_bic_update_cgroup) */
|
||||
int ref;
|
||||
/* Is bfq_group still online? */
|
||||
bool online;
|
||||
|
||||
struct bfq_entity entity;
|
||||
struct bfq_sched_data sched_data;
|
||||
@@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bool compensate, enum bfqq_expiration reason);
|
||||
void bfq_put_queue(struct bfq_queue *bfqq);
|
||||
void bfq_put_cooperator(struct bfq_queue *bfqq);
|
||||
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
|
||||
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
void bfq_schedule_dispatch(struct bfq_data *bfqd);
|
||||
@@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
|
||||
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
|
||||
void bfq_end_wr_async(struct bfq_data *bfqd);
|
||||
struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
|
||||
struct blkcg *blkcg);
|
||||
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio);
|
||||
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
|
||||
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
|
||||
@@ -1100,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
|
||||
break; \
|
||||
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
|
||||
&bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \
|
||||
"%s " fmt, pid_str, ##args); \
|
||||
} while (0)
|
||||
|
||||
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
|
||||
blk_add_cgroup_trace_msg((bfqd)->queue, \
|
||||
bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
|
||||
&bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \
|
||||
} while (0)
|
||||
|
||||
#else /* CONFIG_BFQ_GROUP_IOSCHED */
|
||||
|
||||
146
block/bio.c
146
block/bio.c
@@ -224,24 +224,13 @@ EXPORT_SYMBOL(bio_uninit);
|
||||
static void bio_free(struct bio *bio)
|
||||
{
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
void *p;
|
||||
void *p = bio;
|
||||
|
||||
WARN_ON_ONCE(!bs);
|
||||
|
||||
bio_uninit(bio);
|
||||
|
||||
if (bs) {
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
|
||||
|
||||
/*
|
||||
* If we have front padding, adjust the bio pointer before freeing
|
||||
*/
|
||||
p = bio;
|
||||
p -= bs->front_pad;
|
||||
|
||||
mempool_free(p, &bs->bio_pool);
|
||||
} else {
|
||||
/* Bio was allocated by bio_kmalloc() */
|
||||
kfree(bio);
|
||||
}
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, bio->bi_max_vecs);
|
||||
mempool_free(p - bs->front_pad, &bs->bio_pool);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -419,6 +408,28 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
queue_work(bs->rescue_workqueue, &bs->rescue_work);
|
||||
}
|
||||
|
||||
static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
|
||||
unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
|
||||
struct bio_set *bs)
|
||||
{
|
||||
struct bio_alloc_cache *cache;
|
||||
struct bio *bio;
|
||||
|
||||
cache = per_cpu_ptr(bs->cache, get_cpu());
|
||||
if (!cache->free_list) {
|
||||
put_cpu();
|
||||
return NULL;
|
||||
}
|
||||
bio = cache->free_list;
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
put_cpu();
|
||||
|
||||
bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs, opf);
|
||||
bio->bi_pool = bs;
|
||||
return bio;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_alloc_bioset - allocate a bio for I/O
|
||||
* @bdev: block device to allocate the bio for (can be %NULL)
|
||||
@@ -451,6 +462,9 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
* submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
|
||||
* for per bio allocations.
|
||||
*
|
||||
* If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from process
|
||||
* context, not hard/soft IRQ.
|
||||
*
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
|
||||
@@ -465,6 +479,21 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
|
||||
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
|
||||
return NULL;
|
||||
|
||||
if (opf & REQ_ALLOC_CACHE) {
|
||||
if (bs->cache && nr_vecs <= BIO_INLINE_VECS) {
|
||||
bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf,
|
||||
gfp_mask, bs);
|
||||
if (bio)
|
||||
return bio;
|
||||
/*
|
||||
* No cached bio available, bio returned below marked with
|
||||
* REQ_ALLOC_CACHE to particpate in per-cpu alloc cache.
|
||||
*/
|
||||
} else {
|
||||
opf &= ~REQ_ALLOC_CACHE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* submit_bio_noacct() converts recursion to iteration; this means if
|
||||
* we're running beneath it, any bios we allocate and submit will not be
|
||||
@@ -528,28 +557,28 @@ err_free:
|
||||
EXPORT_SYMBOL(bio_alloc_bioset);
|
||||
|
||||
/**
|
||||
* bio_kmalloc - kmalloc a bio for I/O
|
||||
* bio_kmalloc - kmalloc a bio
|
||||
* @nr_vecs: number of bio_vecs to allocate
|
||||
* @gfp_mask: the GFP_* mask given to the slab allocator
|
||||
* @nr_iovecs: number of iovecs to pre-allocate
|
||||
*
|
||||
* Use kmalloc to allocate and initialize a bio.
|
||||
* Use kmalloc to allocate a bio (including bvecs). The bio must be initialized
|
||||
* using bio_init() before use. To free a bio returned from this function use
|
||||
* kfree() after calling bio_uninit(). A bio returned from this function can
|
||||
* be reused by calling bio_uninit() before calling bio_init() again.
|
||||
*
|
||||
* Note that unlike bio_alloc() or bio_alloc_bioset() allocations from this
|
||||
* function are not backed by a mempool can can fail. Do not use this function
|
||||
* for allocations in the file system I/O path.
|
||||
*
|
||||
* Returns: Pointer to new bio on success, NULL on failure.
|
||||
*/
|
||||
struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
|
||||
struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
if (nr_iovecs > UIO_MAXIOV)
|
||||
if (nr_vecs > UIO_MAXIOV)
|
||||
return NULL;
|
||||
|
||||
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
|
||||
if (unlikely(!bio))
|
||||
return NULL;
|
||||
bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
|
||||
0);
|
||||
bio->bi_pool = NULL;
|
||||
return bio;
|
||||
return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_kmalloc);
|
||||
|
||||
@@ -711,7 +740,7 @@ void bio_put(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (bio_flagged(bio, BIO_PERCPU_CACHE)) {
|
||||
if (bio->bi_opf & REQ_ALLOC_CACHE) {
|
||||
struct bio_alloc_cache *cache;
|
||||
|
||||
bio_uninit(bio);
|
||||
@@ -732,14 +761,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
if (bio_flagged(bio_src, BIO_THROTTLED))
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
if (bio->bi_bdev == bio_src->bi_bdev &&
|
||||
bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_iter = bio_src->bi_iter;
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
blkcg_bio_issue_init(bio);
|
||||
if (bio->bi_bdev) {
|
||||
if (bio->bi_bdev == bio_src->bi_bdev &&
|
||||
bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
}
|
||||
|
||||
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
|
||||
return -ENOMEM;
|
||||
@@ -1727,55 +1757,13 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
|
||||
flags |= BIOSET_NEED_BVECS;
|
||||
if (src->rescue_workqueue)
|
||||
flags |= BIOSET_NEED_RESCUER;
|
||||
if (src->cache)
|
||||
flags |= BIOSET_PERCPU_CACHE;
|
||||
|
||||
return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_init_from_src);
|
||||
|
||||
/**
|
||||
* bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
|
||||
* @kiocb: kiocb describing the IO
|
||||
* @bdev: block device to allocate the bio for (can be %NULL)
|
||||
* @nr_vecs: number of iovecs to pre-allocate
|
||||
* @opf: operation and flags for bio
|
||||
* @bs: bio_set to allocate from
|
||||
*
|
||||
* Description:
|
||||
* Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only
|
||||
* used to check if we should dip into the per-cpu bio_set allocation
|
||||
* cache. The allocation uses GFP_KERNEL internally. On return, the
|
||||
* bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio
|
||||
* MUST be done from process context, not hard/soft IRQ.
|
||||
*
|
||||
*/
|
||||
struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
|
||||
unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
|
||||
{
|
||||
struct bio_alloc_cache *cache;
|
||||
struct bio *bio;
|
||||
|
||||
if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
|
||||
return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
|
||||
|
||||
cache = per_cpu_ptr(bs->cache, get_cpu());
|
||||
if (cache->free_list) {
|
||||
bio = cache->free_list;
|
||||
cache->free_list = bio->bi_next;
|
||||
cache->nr--;
|
||||
put_cpu();
|
||||
bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
|
||||
nr_vecs, opf);
|
||||
bio->bi_pool = bs;
|
||||
bio_set_flag(bio, BIO_PERCPU_CACHE);
|
||||
return bio;
|
||||
}
|
||||
put_cpu();
|
||||
bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
|
||||
bio_set_flag(bio, BIO_PERCPU_CACHE);
|
||||
return bio;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_alloc_kiocb);
|
||||
|
||||
static int __init init_bio(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
57
block/blk-cgroup-fc-appid.c
Normal file
57
block/blk-cgroup-fc-appid.c
Normal file
@@ -0,0 +1,57 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "blk-cgroup.h"
|
||||
|
||||
/**
|
||||
* blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
|
||||
* @app_id: application identifier
|
||||
* @cgrp_id: cgroup id
|
||||
* @app_id_len: size of application identifier
|
||||
*/
|
||||
int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct blkcg *blkcg;
|
||||
int ret = 0;
|
||||
|
||||
if (app_id_len > FC_APPID_LEN)
|
||||
return -EINVAL;
|
||||
|
||||
cgrp = cgroup_get_from_id(cgrp_id);
|
||||
if (!cgrp)
|
||||
return -ENOENT;
|
||||
css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
|
||||
if (!css) {
|
||||
ret = -ENOENT;
|
||||
goto out_cgrp_put;
|
||||
}
|
||||
blkcg = css_to_blkcg(css);
|
||||
/*
|
||||
* There is a slight race condition on setting the appid.
|
||||
* Worst case an I/O may not find the right id.
|
||||
* This is no different from the I/O we let pass while obtaining
|
||||
* the vmid from the fabric.
|
||||
* Adding the overhead of a lock is not necessary.
|
||||
*/
|
||||
strlcpy(blkcg->fc_app_id, app_id, app_id_len);
|
||||
css_put(css);
|
||||
out_cgrp_put:
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
|
||||
|
||||
/**
|
||||
* blkcg_get_fc_appid - get the fc app identifier associated with a bio
|
||||
* @bio: target bio
|
||||
*
|
||||
* On success return the fc_app_id, on failure return NULL
|
||||
*/
|
||||
char *blkcg_get_fc_appid(struct bio *bio)
|
||||
{
|
||||
if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
|
||||
return NULL;
|
||||
return bio->bi_blkg->blkcg->fc_app_id;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
|
||||
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
|
||||
|
||||
#define BLKG_DESTROY_BATCH_SIZE 64
|
||||
|
||||
/**
|
||||
* blkcg_css - find the current css
|
||||
*
|
||||
* Find the css associated with either the kthread or the current task.
|
||||
* This may return a dying css, so it is up to the caller to use tryget logic
|
||||
* to confirm it is alive and well.
|
||||
*/
|
||||
static struct cgroup_subsys_state *blkcg_css(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
return css;
|
||||
return task_css(current, io_cgrp_id);
|
||||
}
|
||||
|
||||
static bool blkcg_policy_enabled(struct request_queue *q,
|
||||
const struct blkcg_policy *pol)
|
||||
{
|
||||
@@ -155,6 +172,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_blkcg_css - return the blkcg CSS associated with a bio
|
||||
* @bio: target bio
|
||||
*
|
||||
* This returns the CSS for the blkcg associated with a bio, or %NULL if not
|
||||
* associated. Callers are expected to either handle %NULL or know association
|
||||
* has been done prior to calling this.
|
||||
*/
|
||||
struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
|
||||
{
|
||||
if (!bio || !bio->bi_blkg)
|
||||
return NULL;
|
||||
return &bio->bi_blkg->blkcg->css;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_blkcg_css);
|
||||
|
||||
/**
|
||||
* blkcg_parent - get the parent of a blkcg
|
||||
* @blkcg: blkcg of interest
|
||||
*
|
||||
* Return the parent blkcg of @blkcg. Can be called anytime.
|
||||
*/
|
||||
static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
|
||||
{
|
||||
return css_to_blkcg(blkcg->css.parent);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_alloc - allocate a blkg
|
||||
* @blkcg: block cgroup the new blkg is associated with
|
||||
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
||||
struct blkcg_gq *blkg;
|
||||
int i, ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
lockdep_assert_held(&q->queue_lock);
|
||||
|
||||
/* request_queue is dying, do not create/recreate a blkg */
|
||||
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
|
||||
{
|
||||
struct blkg_iostat_set *bis = &blkg->iostat;
|
||||
u64 rbytes, wbytes, rios, wios, dbytes, dios;
|
||||
bool has_stats = false;
|
||||
const char *dname;
|
||||
unsigned seq;
|
||||
int i;
|
||||
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
|
||||
} while (u64_stats_fetch_retry(&bis->sync, seq));
|
||||
|
||||
if (rbytes || wbytes || rios || wios) {
|
||||
has_stats = true;
|
||||
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
|
||||
rbytes, wbytes, rios, wios,
|
||||
dbytes, dios);
|
||||
}
|
||||
|
||||
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
|
||||
has_stats = true;
|
||||
seq_printf(s, " use_delay=%d delay_nsec=%llu",
|
||||
atomic_read(&blkg->use_delay),
|
||||
atomic64_read(&blkg->delay_nsec));
|
||||
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
|
||||
if (!blkg->pd[i] || !pol->pd_stat_fn)
|
||||
continue;
|
||||
|
||||
if (pol->pd_stat_fn(blkg->pd[i], s))
|
||||
has_stats = true;
|
||||
pol->pd_stat_fn(blkg->pd[i], s);
|
||||
}
|
||||
|
||||
if (has_stats)
|
||||
seq_printf(s, "\n");
|
||||
seq_puts(s, "\n");
|
||||
}
|
||||
|
||||
static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return &css_to_blkcg(css)->cgwb_list;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* blkcg destruction is a three-stage process.
|
||||
*
|
||||
@@ -1015,25 +1060,6 @@ static struct cftype blkcg_legacy_files[] = {
|
||||
* This finally frees the blkcg.
|
||||
*/
|
||||
|
||||
/**
|
||||
* blkcg_css_offline - cgroup css_offline callback
|
||||
* @css: css of interest
|
||||
*
|
||||
* This function is called when @css is about to go away. Here the cgwbs are
|
||||
* offlined first and only once writeback associated with the blkcg has
|
||||
* finished do we start step 2 (see above).
|
||||
*/
|
||||
static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
|
||||
/* this prevents anyone from attaching or migrating to this blkcg */
|
||||
wb_blkcg_offline(blkcg);
|
||||
|
||||
/* put the base online pin allowing step 2 to be triggered */
|
||||
blkcg_unpin_online(blkcg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_destroy_blkgs - responsible for shooting down blkgs
|
||||
* @blkcg: blkcg of interest
|
||||
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
*
|
||||
* This is the blkcg counterpart of ioc_release_fn().
|
||||
*/
|
||||
void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
static void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_pin_online - pin online state
|
||||
* @blkcg_css: blkcg of interest
|
||||
*
|
||||
* While pinned, a blkcg is kept online. This is primarily used to
|
||||
* impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
|
||||
* while an associated cgwb is still active.
|
||||
*/
|
||||
void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
|
||||
{
|
||||
refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_unpin_online - unpin online state
|
||||
* @blkcg_css: blkcg of interest
|
||||
*
|
||||
* This is primarily used to impedance-match blkg and cgwb lifetimes so
|
||||
* that blkg doesn't go offline while an associated cgwb is still active.
|
||||
* When this count goes to zero, all active cgwbs have finished so the
|
||||
* blkcg can continue destruction by calling blkcg_destroy_blkgs().
|
||||
*/
|
||||
void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(blkcg_css);
|
||||
|
||||
do {
|
||||
if (!refcount_dec_and_test(&blkcg->online_pin))
|
||||
break;
|
||||
blkcg_destroy_blkgs(blkcg);
|
||||
blkcg = blkcg_parent(blkcg);
|
||||
} while (blkcg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_css_offline - cgroup css_offline callback
|
||||
* @css: css of interest
|
||||
*
|
||||
* This function is called when @css is about to go away. Here the cgwbs are
|
||||
* offlined first and only once writeback associated with the blkcg has
|
||||
* finished do we start step 2 (see above).
|
||||
*/
|
||||
static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
/* this prevents anyone from attaching or migrating to this blkcg */
|
||||
wb_blkcg_offline(css);
|
||||
|
||||
/* put the base online pin allowing step 2 to be triggered */
|
||||
blkcg_unpin_online(css);
|
||||
}
|
||||
|
||||
static void blkcg_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
@@ -1163,8 +1240,7 @@ unlock:
|
||||
|
||||
static int blkcg_css_online(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg *parent = blkcg_parent(blkcg);
|
||||
struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
|
||||
|
||||
/*
|
||||
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
|
||||
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
|
||||
* parent so that offline always happens towards the root.
|
||||
*/
|
||||
if (parent)
|
||||
blkcg_pin_online(parent);
|
||||
blkcg_pin_online(css);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
|
||||
preloaded = !radix_tree_preload(GFP_KERNEL);
|
||||
|
||||
/* Make sure the root blkg exists. */
|
||||
rcu_read_lock();
|
||||
/* spin_lock_irq can serve as RCU read-side critical section. */
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
blkg = blkg_create(&blkcg_root, q, new_blkg);
|
||||
if (IS_ERR(blkg))
|
||||
goto err_unlock;
|
||||
q->root_blkg = blkg;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
@@ -1234,7 +1309,6 @@ err_destroy_all:
|
||||
return ret;
|
||||
err_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
rcu_read_unlock();
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
return PTR_ERR(blkg);
|
||||
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
void blkcg_maybe_throttle_current(void)
|
||||
{
|
||||
struct request_queue *q = current->throttle_queue;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
bool use_memdelay = current->use_memdelay;
|
||||
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
|
||||
current->use_memdelay = false;
|
||||
|
||||
rcu_read_lock();
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
blkcg = css_to_blkcg(css);
|
||||
else
|
||||
blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
|
||||
|
||||
blkcg = css_to_blkcg(blkcg_css());
|
||||
if (!blkcg)
|
||||
goto out;
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
|
||||
rcu_read_lock();
|
||||
|
||||
if (bio->bi_blkg)
|
||||
css = &bio_blkcg(bio)->css;
|
||||
css = bio_blkcg_css(bio);
|
||||
else
|
||||
css = blkcg_css();
|
||||
|
||||
@@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio)
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
bool blk_cgroup_congested(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
bool ret = false;
|
||||
|
||||
rcu_read_lock();
|
||||
for (css = blkcg_css(); css; css = css->parent) {
|
||||
if (atomic_read(&css->cgroup->congestion_count)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init blkcg_init(void)
|
||||
{
|
||||
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
|
||||
|
||||
@@ -15,13 +15,101 @@
|
||||
*/
|
||||
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
struct blkcg_gq;
|
||||
struct blkg_policy_data;
|
||||
|
||||
|
||||
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
|
||||
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
enum blkg_iostat_type {
|
||||
BLKG_IOSTAT_READ,
|
||||
BLKG_IOSTAT_WRITE,
|
||||
BLKG_IOSTAT_DISCARD,
|
||||
|
||||
BLKG_IOSTAT_NR,
|
||||
};
|
||||
|
||||
struct blkg_iostat {
|
||||
u64 bytes[BLKG_IOSTAT_NR];
|
||||
u64 ios[BLKG_IOSTAT_NR];
|
||||
};
|
||||
|
||||
struct blkg_iostat_set {
|
||||
struct u64_stats_sync sync;
|
||||
struct blkg_iostat cur;
|
||||
struct blkg_iostat last;
|
||||
};
|
||||
|
||||
/* association between a blk cgroup and a request queue */
|
||||
struct blkcg_gq {
|
||||
/* Pointer to the associated request_queue */
|
||||
struct request_queue *q;
|
||||
struct list_head q_node;
|
||||
struct hlist_node blkcg_node;
|
||||
struct blkcg *blkcg;
|
||||
|
||||
/* all non-root blkcg_gq's are guaranteed to have access to parent */
|
||||
struct blkcg_gq *parent;
|
||||
|
||||
/* reference count */
|
||||
struct percpu_ref refcnt;
|
||||
|
||||
/* is this blkg online? protected by both blkcg and q locks */
|
||||
bool online;
|
||||
|
||||
struct blkg_iostat_set __percpu *iostat_cpu;
|
||||
struct blkg_iostat_set iostat;
|
||||
|
||||
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
|
||||
|
||||
spinlock_t async_bio_lock;
|
||||
struct bio_list async_bios;
|
||||
union {
|
||||
struct work_struct async_bio_work;
|
||||
struct work_struct free_work;
|
||||
};
|
||||
|
||||
atomic_t use_delay;
|
||||
atomic64_t delay_nsec;
|
||||
atomic64_t delay_start;
|
||||
u64 last_delay;
|
||||
int last_use;
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
struct blkcg {
|
||||
struct cgroup_subsys_state css;
|
||||
spinlock_t lock;
|
||||
refcount_t online_pin;
|
||||
|
||||
struct radix_tree_root blkg_tree;
|
||||
struct blkcg_gq __rcu *blkg_hint;
|
||||
struct hlist_head blkg_list;
|
||||
|
||||
struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
|
||||
|
||||
struct list_head all_blkcgs_node;
|
||||
#ifdef CONFIG_BLK_CGROUP_FC_APPID
|
||||
char fc_app_id[FC_APPID_LEN];
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct list_head cgwb_list;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
|
||||
{
|
||||
return css ? container_of(css, struct blkcg, css) : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
|
||||
* request_queue (q). This is used by blkcg policies which need to track
|
||||
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
|
||||
typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
|
||||
typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
|
||||
struct seq_file *s);
|
||||
|
||||
struct blkcg_policy {
|
||||
@@ -122,53 +210,15 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
char *input, struct blkg_conf_ctx *ctx);
|
||||
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
|
||||
|
||||
/**
|
||||
* blkcg_css - find the current css
|
||||
*
|
||||
* Find the css associated with either the kthread or the current task.
|
||||
* This may return a dying css, so it is up to the caller to use tryget logic
|
||||
* to confirm it is alive and well.
|
||||
*/
|
||||
static inline struct cgroup_subsys_state *blkcg_css(void)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
css = kthread_blkcg();
|
||||
if (css)
|
||||
return css;
|
||||
return task_css(current, io_cgrp_id);
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_blkcg - internal, inconsistent version to get blkcg
|
||||
*
|
||||
* DO NOT USE.
|
||||
* This function is inconsistent and consequently is dangerous to use. The
|
||||
* first part of the function returns a blkcg where a reference is owned by the
|
||||
* bio. This means it does not need to be rcu protected as it cannot go away
|
||||
* with the bio owning a reference to it. However, the latter potentially gets
|
||||
* it from task_css(). This can race against task migration and the cgroup
|
||||
* dying. It is also semantically different as it must be called rcu protected
|
||||
* and is susceptible to failure when trying to get a reference to it.
|
||||
* Therefore, it is not ok to assume that *_get() will always succeed on the
|
||||
* blkcg returned here.
|
||||
*/
|
||||
static inline struct blkcg *__bio_blkcg(struct bio *bio)
|
||||
{
|
||||
if (bio && bio->bi_blkg)
|
||||
return bio->bi_blkg->blkcg;
|
||||
return css_to_blkcg(blkcg_css());
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
|
||||
* @return: true if this bio needs to be submitted with the root blkg context.
|
||||
*
|
||||
* In order to avoid priority inversions we sometimes need to issue a bio as if
|
||||
* it were attached to the root blkg, and then backcharge to the actual owning
|
||||
* blkg. The idea is we do bio_blkcg() to look up the actual context for the
|
||||
* bio and attach the appropriate blkg to the bio. Then we call this helper and
|
||||
* if it is true run with the root blkg for that queue and then do any
|
||||
* blkg. The idea is we do bio_blkcg_css() to look up the actual context for
|
||||
* the bio and attach the appropriate blkg to the bio. Then we call this helper
|
||||
* and if it is true run with the root blkg for that queue and then do any
|
||||
* backcharging to the originating cgroup once the io is complete.
|
||||
*/
|
||||
static inline bool bio_issue_as_root_blkg(struct bio *bio)
|
||||
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
|
||||
struct blkcg_policy {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
struct blkcg {
|
||||
};
|
||||
|
||||
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
|
||||
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
|
||||
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
|
||||
static inline void blkcg_deactivate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol) { }
|
||||
|
||||
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
|
||||
|
||||
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
|
||||
struct blkcg_policy *pol) { return NULL; }
|
||||
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
|
||||
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
|
||||
#define blk_queue_for_each_rl(rl, q) \
|
||||
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
|
||||
|
||||
#endif /* CONFIG_BLOCK */
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
#endif /* _BLK_CGROUP_PRIVATE_H */
|
||||
|
||||
@@ -588,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
|
||||
(nr_sectors > maxsector ||
|
||||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
|
||||
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
|
||||
"%pg: rw=%d, want=%llu, limit=%llu\n",
|
||||
current->comm,
|
||||
bio->bi_bdev, bio->bi_opf,
|
||||
bio_end_sector(bio), maxsector);
|
||||
"%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
|
||||
current->comm, bio->bi_bdev, bio->bi_opf,
|
||||
bio->bi_iter.bi_sector, nr_sectors, maxsector);
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
@@ -816,11 +815,11 @@ void submit_bio_noacct(struct bio *bio)
|
||||
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
if (!blk_queue_discard(q))
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_SECURE_ERASE:
|
||||
if (!blk_queue_secure_erase(q))
|
||||
if (!bdev_max_secure_erase_sectors(bdev))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
@@ -889,19 +888,11 @@ void submit_bio(struct bio *bio)
|
||||
if (blkcg_punt_bio_submit(bio))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If it's a regular read/write or a barrier with data attached,
|
||||
* go through the normal accounting stuff before submission.
|
||||
*/
|
||||
if (bio_has_data(bio)) {
|
||||
unsigned int count = bio_sectors(bio);
|
||||
|
||||
if (op_is_write(bio_op(bio))) {
|
||||
count_vm_events(PGPGOUT, count);
|
||||
} else {
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, count);
|
||||
}
|
||||
if (bio_op(bio) == REQ_OP_READ) {
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, bio_sectors(bio));
|
||||
} else if (bio_op(bio) == REQ_OP_WRITE) {
|
||||
count_vm_events(PGPGOUT, bio_sectors(bio));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1018,21 +1009,22 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long __part_start_io_acct(struct block_device *part,
|
||||
unsigned int sectors, unsigned int op,
|
||||
unsigned long start_time)
|
||||
unsigned long bdev_start_io_acct(struct block_device *bdev,
|
||||
unsigned int sectors, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
const int sgrp = op_stat_group(op);
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(part, start_time, false);
|
||||
part_stat_inc(part, ios[sgrp]);
|
||||
part_stat_add(part, sectors[sgrp], sectors);
|
||||
part_stat_local_inc(part, in_flight[op_is_write(op)]);
|
||||
update_io_ticks(bdev, start_time, false);
|
||||
part_stat_inc(bdev, ios[sgrp]);
|
||||
part_stat_add(bdev, sectors[sgrp], sectors);
|
||||
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
|
||||
part_stat_unlock();
|
||||
|
||||
return start_time;
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_start_io_acct);
|
||||
|
||||
/**
|
||||
* bio_start_io_acct_time - start I/O accounting for bio based drivers
|
||||
@@ -1041,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
|
||||
*/
|
||||
void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
|
||||
{
|
||||
__part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), start_time);
|
||||
bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), start_time);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
|
||||
|
||||
@@ -1054,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
|
||||
*/
|
||||
unsigned long bio_start_io_acct(struct bio *bio)
|
||||
{
|
||||
return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), jiffies);
|
||||
return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
|
||||
bio_op(bio), jiffies);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_start_io_acct);
|
||||
|
||||
unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
|
||||
unsigned int op)
|
||||
{
|
||||
return __part_start_io_acct(disk->part0, sectors, op, jiffies);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_start_io_acct);
|
||||
|
||||
static void __part_end_io_acct(struct block_device *part, unsigned int op,
|
||||
unsigned long start_time)
|
||||
void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
const int sgrp = op_stat_group(op);
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
unsigned long duration = now - start_time;
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(part, now, true);
|
||||
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
|
||||
part_stat_local_dec(part, in_flight[op_is_write(op)]);
|
||||
update_io_ticks(bdev, now, true);
|
||||
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
|
||||
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(bdev_end_io_acct);
|
||||
|
||||
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
|
||||
struct block_device *orig_bdev)
|
||||
struct block_device *orig_bdev)
|
||||
{
|
||||
__part_end_io_acct(orig_bdev, bio_op(bio), start_time);
|
||||
bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
|
||||
|
||||
void disk_end_io_acct(struct gendisk *disk, unsigned int op,
|
||||
unsigned long start_time)
|
||||
{
|
||||
__part_end_io_acct(disk->part0, op, start_time);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_end_io_acct);
|
||||
|
||||
/**
|
||||
* blk_lld_busy - Check if underlying low-level drivers of a device are busy
|
||||
* @q : the queue of the device being checked
|
||||
|
||||
@@ -152,23 +152,25 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
|
||||
|
||||
src_bio->bi_status = enc_bio->bi_status;
|
||||
|
||||
bio_put(enc_bio);
|
||||
bio_uninit(enc_bio);
|
||||
kfree(enc_bio);
|
||||
bio_endio(src_bio);
|
||||
}
|
||||
|
||||
static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
|
||||
{
|
||||
unsigned int nr_segs = bio_segments(bio_src);
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_kmalloc(GFP_NOIO, bio_segments(bio_src));
|
||||
bio = bio_kmalloc(nr_segs, GFP_NOIO);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
bio->bi_bdev = bio_src->bi_bdev;
|
||||
bio_init(bio, bio_src->bi_bdev, bio->bi_inline_vecs, nr_segs,
|
||||
bio_src->bi_opf);
|
||||
if (bio_flagged(bio_src, BIO_REMAPPED))
|
||||
bio_set_flag(bio, BIO_REMAPPED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_ioprio = bio_src->bi_ioprio;
|
||||
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
|
||||
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
|
||||
@@ -177,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
|
||||
bio->bi_io_vec[bio->bi_vcnt++] = bv;
|
||||
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
return bio;
|
||||
}
|
||||
@@ -363,8 +364,8 @@ out_release_keyslot:
|
||||
blk_crypto_put_keyslot(slot);
|
||||
out_put_enc_bio:
|
||||
if (enc_bio)
|
||||
bio_put(enc_bio);
|
||||
|
||||
bio_uninit(enc_bio);
|
||||
kfree(enc_bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -533,8 +533,7 @@ struct ioc_gq {
|
||||
|
||||
/* statistics */
|
||||
struct iocg_pcpu_stat __percpu *pcpu_stat;
|
||||
struct iocg_stat local_stat;
|
||||
struct iocg_stat desc_stat;
|
||||
struct iocg_stat stat;
|
||||
struct iocg_stat last_stat;
|
||||
u64 last_stat_abs_vusage;
|
||||
u64 usage_delta_us;
|
||||
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
return true;
|
||||
} else {
|
||||
if (iocg->indelay_since) {
|
||||
iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
|
||||
iocg->stat.indelay_us += now->now - iocg->indelay_since;
|
||||
iocg->indelay_since = 0;
|
||||
}
|
||||
iocg->delay = 0;
|
||||
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
|
||||
|
||||
/* if debt is paid in full, restore inuse */
|
||||
if (!iocg->abs_vdebt) {
|
||||
iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
|
||||
iocg->stat.indebt_us += now->now - iocg->indebt_since;
|
||||
iocg->indebt_since = 0;
|
||||
|
||||
propagate_weights(iocg, iocg->active, iocg->last_inuse,
|
||||
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
|
||||
|
||||
if (!waitqueue_active(&iocg->waitq)) {
|
||||
if (iocg->wait_since) {
|
||||
iocg->local_stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->wait_since = 0;
|
||||
}
|
||||
return;
|
||||
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
|
||||
}
|
||||
}
|
||||
|
||||
/* propagate the deltas to the parent */
|
||||
static void iocg_flush_stat_upward(struct ioc_gq *iocg)
|
||||
{
|
||||
if (iocg->level > 0) {
|
||||
struct iocg_stat *parent_stat =
|
||||
&iocg->ancestors[iocg->level - 1]->stat;
|
||||
|
||||
parent_stat->usage_us +=
|
||||
iocg->stat.usage_us - iocg->last_stat.usage_us;
|
||||
parent_stat->wait_us +=
|
||||
iocg->stat.wait_us - iocg->last_stat.wait_us;
|
||||
parent_stat->indebt_us +=
|
||||
iocg->stat.indebt_us - iocg->last_stat.indebt_us;
|
||||
parent_stat->indelay_us +=
|
||||
iocg->stat.indelay_us - iocg->last_stat.indelay_us;
|
||||
}
|
||||
|
||||
iocg->last_stat = iocg->stat;
|
||||
}
|
||||
|
||||
/* collect per-cpu counters and propagate the deltas to the parent */
|
||||
static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
{
|
||||
struct ioc *ioc = iocg->ioc;
|
||||
struct iocg_stat new_stat;
|
||||
u64 abs_vusage = 0;
|
||||
u64 vusage_delta;
|
||||
int cpu;
|
||||
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
|
||||
iocg->last_stat_abs_vusage = abs_vusage;
|
||||
|
||||
iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
|
||||
iocg->local_stat.usage_us += iocg->usage_delta_us;
|
||||
iocg->stat.usage_us += iocg->usage_delta_us;
|
||||
|
||||
/* propagate upwards */
|
||||
new_stat.usage_us =
|
||||
iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
|
||||
new_stat.wait_us =
|
||||
iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
|
||||
new_stat.indebt_us =
|
||||
iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
|
||||
new_stat.indelay_us =
|
||||
iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
|
||||
|
||||
/* propagate the deltas to the parent */
|
||||
if (iocg->level > 0) {
|
||||
struct iocg_stat *parent_stat =
|
||||
&iocg->ancestors[iocg->level - 1]->desc_stat;
|
||||
|
||||
parent_stat->usage_us +=
|
||||
new_stat.usage_us - iocg->last_stat.usage_us;
|
||||
parent_stat->wait_us +=
|
||||
new_stat.wait_us - iocg->last_stat.wait_us;
|
||||
parent_stat->indebt_us +=
|
||||
new_stat.indebt_us - iocg->last_stat.indebt_us;
|
||||
parent_stat->indelay_us +=
|
||||
new_stat.indelay_us - iocg->last_stat.indelay_us;
|
||||
}
|
||||
|
||||
iocg->last_stat = new_stat;
|
||||
iocg_flush_stat_upward(iocg);
|
||||
}
|
||||
|
||||
/* get stat counters ready for reading on all active iocgs */
|
||||
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
|
||||
|
||||
/* flush leaves and build inner node walk list */
|
||||
list_for_each_entry(iocg, target_iocgs, active_list) {
|
||||
iocg_flush_stat_one(iocg, now);
|
||||
iocg_flush_stat_leaf(iocg, now);
|
||||
iocg_build_inner_walk(iocg, &inner_walk);
|
||||
}
|
||||
|
||||
/* keep flushing upwards by walking the inner list backwards */
|
||||
list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
|
||||
iocg_flush_stat_one(iocg, now);
|
||||
iocg_flush_stat_upward(iocg);
|
||||
list_del_init(&iocg->walk_list);
|
||||
}
|
||||
}
|
||||
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
|
||||
|
||||
/* flush wait and indebt stat deltas */
|
||||
if (iocg->wait_since) {
|
||||
iocg->local_stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->stat.wait_us += now->now - iocg->wait_since;
|
||||
iocg->wait_since = now->now;
|
||||
}
|
||||
if (iocg->indebt_since) {
|
||||
iocg->local_stat.indebt_us +=
|
||||
iocg->stat.indebt_us +=
|
||||
now->now - iocg->indebt_since;
|
||||
iocg->indebt_since = now->now;
|
||||
}
|
||||
if (iocg->indelay_since) {
|
||||
iocg->local_stat.indelay_us +=
|
||||
iocg->stat.indelay_us +=
|
||||
now->now - iocg->indelay_since;
|
||||
iocg->indelay_since = now->now;
|
||||
}
|
||||
@@ -3005,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
|
||||
kfree(iocg);
|
||||
}
|
||||
|
||||
static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
{
|
||||
struct ioc_gq *iocg = pd_to_iocg(pd);
|
||||
struct ioc *ioc = iocg->ioc;
|
||||
|
||||
if (!ioc->enabled)
|
||||
return false;
|
||||
return;
|
||||
|
||||
if (iocg->level == 0) {
|
||||
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
|
||||
@@ -3027,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
iocg->last_stat.wait_us,
|
||||
iocg->last_stat.indebt_us,
|
||||
iocg->last_stat.indelay_us);
|
||||
return true;
|
||||
}
|
||||
|
||||
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
|
||||
@@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
|
||||
static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
|
||||
{
|
||||
struct latency_stat stat;
|
||||
int cpu;
|
||||
@@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
|
||||
(unsigned long long)stat.ps.missed,
|
||||
(unsigned long long)stat.ps.total,
|
||||
iolat->rq_depth.max_depth);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
{
|
||||
struct iolatency_grp *iolat = pd_to_lat(pd);
|
||||
unsigned long long avg_lat;
|
||||
unsigned long long cur_win;
|
||||
|
||||
if (!blkcg_debug_stats)
|
||||
return false;
|
||||
return;
|
||||
|
||||
if (iolat->ssd)
|
||||
return iolatency_ssd_stat(iolat, s);
|
||||
@@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
else
|
||||
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
|
||||
iolat->rq_depth.max_depth, avg_lat, cur_win);
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
|
||||
|
||||
126
block/blk-lib.c
126
block/blk-lib.c
@@ -10,30 +10,44 @@
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, int flags,
|
||||
struct bio **biop)
|
||||
static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
|
||||
{
|
||||
unsigned int discard_granularity = bdev_discard_granularity(bdev);
|
||||
sector_t granularity_aligned_sector;
|
||||
|
||||
if (bdev_is_partition(bdev))
|
||||
sector += bdev->bd_start_sect;
|
||||
|
||||
granularity_aligned_sector =
|
||||
round_up(sector, discard_granularity >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Make sure subsequent bios start aligned to the discard granularity if
|
||||
* it needs to be split.
|
||||
*/
|
||||
if (granularity_aligned_sector != sector)
|
||||
return granularity_aligned_sector - sector;
|
||||
|
||||
/*
|
||||
* Align the bio size to the discard granularity to make splitting the bio
|
||||
* at discard granularity boundaries easier in the driver if needed.
|
||||
*/
|
||||
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
struct bio *bio = *biop;
|
||||
unsigned int op;
|
||||
sector_t bs_mask, part_offset = 0;
|
||||
sector_t bs_mask;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
if (flags & BLKDEV_DISCARD_SECURE) {
|
||||
if (!blk_queue_secure_erase(q))
|
||||
return -EOPNOTSUPP;
|
||||
op = REQ_OP_SECURE_ERASE;
|
||||
} else {
|
||||
if (!blk_queue_discard(q))
|
||||
return -EOPNOTSUPP;
|
||||
op = REQ_OP_DISCARD;
|
||||
}
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* In case the discard granularity isn't set by buggy device driver */
|
||||
if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
|
||||
if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
|
||||
char dev_name[BDEVNAME_SIZE];
|
||||
|
||||
bdevname(bdev, dev_name);
|
||||
@@ -48,38 +62,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
if (!nr_sects)
|
||||
return -EINVAL;
|
||||
|
||||
/* In case the discard request is in a partition */
|
||||
if (bdev_is_partition(bdev))
|
||||
part_offset = bdev->bd_start_sect;
|
||||
|
||||
while (nr_sects) {
|
||||
sector_t granularity_aligned_lba, req_sects;
|
||||
sector_t sector_mapped = sector + part_offset;
|
||||
sector_t req_sects =
|
||||
min(nr_sects, bio_discard_limit(bdev, sector));
|
||||
|
||||
granularity_aligned_lba = round_up(sector_mapped,
|
||||
q->limits.discard_granularity >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Check whether the discard bio starts at a discard_granularity
|
||||
* aligned LBA,
|
||||
* - If no: set (granularity_aligned_lba - sector_mapped) to
|
||||
* bi_size of the first split bio, then the second bio will
|
||||
* start at a discard_granularity aligned LBA on the device.
|
||||
* - If yes: use bio_aligned_discard_max_sectors() as the max
|
||||
* possible bi_size of the first split bio. Then when this bio
|
||||
* is split in device drive, the split ones are very probably
|
||||
* to be aligned to discard_granularity of the device's queue.
|
||||
*/
|
||||
if (granularity_aligned_lba == sector_mapped)
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
bio_aligned_discard_max_sectors(q));
|
||||
else
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
granularity_aligned_lba - sector_mapped);
|
||||
|
||||
WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_iter.bi_size = req_sects << 9;
|
||||
sector += req_sects;
|
||||
@@ -105,21 +92,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to discard
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
* @flags: BLKDEV_DISCARD_* flags to control behaviour
|
||||
*
|
||||
* Description:
|
||||
* Issue a discard request for the sectors in question.
|
||||
*/
|
||||
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
|
||||
sector_t nr_sects, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
|
||||
&bio);
|
||||
ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, &bio);
|
||||
if (!ret && bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
if (ret == -EOPNOTSUPP)
|
||||
@@ -316,3 +301,42 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_zeroout);
|
||||
|
||||
int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp)
|
||||
{
|
||||
sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
unsigned int max_sectors = bdev_max_secure_erase_sectors(bdev);
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret = 0;
|
||||
|
||||
if (max_sectors == 0)
|
||||
return -EOPNOTSUPP;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
for (;;) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_iter.bi_size = len;
|
||||
|
||||
sector += len << SECTOR_SHIFT;
|
||||
nr_sects -= len << SECTOR_SHIFT;
|
||||
if (!nr_sects) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_secure_erase);
|
||||
|
||||
@@ -152,10 +152,10 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
|
||||
nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
|
||||
|
||||
ret = -ENOMEM;
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
bio = bio_kmalloc(nr_pages, gfp_mask);
|
||||
if (!bio)
|
||||
goto out_bmd;
|
||||
bio->bi_opf |= req_op(rq);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq));
|
||||
|
||||
if (map_data) {
|
||||
nr_pages = 1 << map_data->page_order;
|
||||
@@ -224,7 +224,8 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
|
||||
cleanup:
|
||||
if (!map_data)
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
out_bmd:
|
||||
kfree(bmd);
|
||||
return ret;
|
||||
@@ -234,6 +235,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
|
||||
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
int j;
|
||||
@@ -241,10 +243,10 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
if (!iov_iter_count(iter))
|
||||
return -EINVAL;
|
||||
|
||||
bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
|
||||
bio = bio_kmalloc(nr_vecs, gfp_mask);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
bio->bi_opf |= req_op(rq);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
|
||||
|
||||
while (iov_iter_count(iter)) {
|
||||
struct page **pages;
|
||||
@@ -260,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
|
||||
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
|
||||
|
||||
if (unlikely(offs & queue_dma_alignment(rq->q))) {
|
||||
ret = -EINVAL;
|
||||
if (unlikely(offs & queue_dma_alignment(rq->q)))
|
||||
j = 0;
|
||||
} else {
|
||||
else {
|
||||
for (j = 0; j < npages; j++) {
|
||||
struct page *page = pages[j];
|
||||
unsigned int n = PAGE_SIZE - offs;
|
||||
@@ -303,7 +304,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
|
||||
|
||||
out_unmap:
|
||||
bio_release_pages(bio, false);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -323,7 +325,8 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio)
|
||||
static void bio_map_kern_endio(struct bio *bio)
|
||||
{
|
||||
bio_invalidate_vmalloc_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -348,9 +351,10 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
|
||||
int offset, i;
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
bio = bio_kmalloc(nr_pages, gfp_mask);
|
||||
if (!bio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
|
||||
|
||||
if (is_vmalloc) {
|
||||
flush_kernel_vmap_range(data, len);
|
||||
@@ -374,7 +378,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
|
||||
if (bio_add_pc_page(q, bio, page, bytes,
|
||||
offset) < bytes) {
|
||||
/* we don't support partial mappings */
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
@@ -390,7 +395,8 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
|
||||
static void bio_copy_kern_endio(struct bio *bio)
|
||||
{
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
|
||||
static void bio_copy_kern_endio_read(struct bio *bio)
|
||||
@@ -435,9 +441,10 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
nr_pages = end - start;
|
||||
bio = bio_kmalloc(gfp_mask, nr_pages);
|
||||
bio = bio_kmalloc(nr_pages, gfp_mask);
|
||||
if (!bio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
|
||||
|
||||
while (len) {
|
||||
struct page *page;
|
||||
@@ -471,7 +478,8 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
|
||||
|
||||
cleanup:
|
||||
bio_free_pages(bio);
|
||||
bio_put(bio);
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@@ -602,7 +610,8 @@ int blk_rq_unmap_user(struct bio *bio)
|
||||
|
||||
next_bio = bio;
|
||||
bio = bio->bi_next;
|
||||
bio_put(next_bio);
|
||||
bio_uninit(next_bio);
|
||||
kfree(next_bio);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -648,8 +657,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
bio->bi_opf |= req_op(rq);
|
||||
|
||||
ret = blk_rq_append_bio(rq, bio);
|
||||
if (unlikely(ret))
|
||||
bio_put(bio);
|
||||
if (unlikely(ret)) {
|
||||
bio_uninit(bio);
|
||||
kfree(bio);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_kern);
|
||||
|
||||
@@ -113,10 +113,8 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(FAIL_IO),
|
||||
QUEUE_FLAG_NAME(NONROT),
|
||||
QUEUE_FLAG_NAME(IO_STAT),
|
||||
QUEUE_FLAG_NAME(DISCARD),
|
||||
QUEUE_FLAG_NAME(NOXMERGES),
|
||||
QUEUE_FLAG_NAME(ADD_RANDOM),
|
||||
QUEUE_FLAG_NAME(SECERASE),
|
||||
QUEUE_FLAG_NAME(SAME_FORCE),
|
||||
QUEUE_FLAG_NAME(DEAD),
|
||||
QUEUE_FLAG_NAME(INIT_DONE),
|
||||
|
||||
@@ -1083,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
|
||||
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
|
||||
|
||||
/*
|
||||
* For a polled request, always complete locallly, it's pointless
|
||||
* For a polled request, always complete locally, it's pointless
|
||||
* to redirect the completion.
|
||||
*/
|
||||
if (rq->cmd_flags & REQ_POLLED)
|
||||
|
||||
@@ -46,6 +46,7 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_discard_sectors = 0;
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
lim->max_secure_erase_sectors = 0;
|
||||
lim->discard_granularity = 0;
|
||||
lim->discard_alignment = 0;
|
||||
lim->discard_misaligned = 0;
|
||||
@@ -176,6 +177,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
|
||||
* @q: the request queue for the device
|
||||
* @max_sectors: maximum number of sectors to secure_erase
|
||||
**/
|
||||
void blk_queue_max_secure_erase_sectors(struct request_queue *q,
|
||||
unsigned int max_sectors)
|
||||
{
|
||||
q->limits.max_secure_erase_sectors = max_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_write_zeroes_sectors - set max sectors for a single
|
||||
* write zeroes
|
||||
@@ -468,6 +481,40 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_io_opt);
|
||||
|
||||
static int queue_limit_alignment_offset(struct queue_limits *lim,
|
||||
sector_t sector)
|
||||
{
|
||||
unsigned int granularity = max(lim->physical_block_size, lim->io_min);
|
||||
unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT)
|
||||
<< SECTOR_SHIFT;
|
||||
|
||||
return (granularity + lim->alignment_offset - alignment) % granularity;
|
||||
}
|
||||
|
||||
static unsigned int queue_limit_discard_alignment(struct queue_limits *lim,
|
||||
sector_t sector)
|
||||
{
|
||||
unsigned int alignment, granularity, offset;
|
||||
|
||||
if (!lim->max_discard_sectors)
|
||||
return 0;
|
||||
|
||||
/* Why are these in bytes, not sectors? */
|
||||
alignment = lim->discard_alignment >> SECTOR_SHIFT;
|
||||
granularity = lim->discard_granularity >> SECTOR_SHIFT;
|
||||
if (!granularity)
|
||||
return 0;
|
||||
|
||||
/* Offset of the partition start in 'granularity' sectors */
|
||||
offset = sector_div(sector, granularity);
|
||||
|
||||
/* And why do we do this modulus *again* in blkdev_issue_discard()? */
|
||||
offset = (granularity + alignment - offset) % granularity;
|
||||
|
||||
/* Turn it back into bytes, gaah */
|
||||
return offset << SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
|
||||
{
|
||||
sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
|
||||
@@ -627,7 +674,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
|
||||
t->discard_granularity;
|
||||
}
|
||||
|
||||
t->max_secure_erase_sectors = min_not_zero(t->max_secure_erase_sectors,
|
||||
b->max_secure_erase_sectors);
|
||||
t->zone_write_granularity = max(t->zone_write_granularity,
|
||||
b->zone_write_granularity);
|
||||
t->zoned = max(t->zoned, b->zoned);
|
||||
@@ -901,3 +949,27 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
|
||||
|
||||
int bdev_alignment_offset(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (q->limits.misaligned)
|
||||
return -1;
|
||||
if (bdev_is_partition(bdev))
|
||||
return queue_limit_alignment_offset(&q->limits,
|
||||
bdev->bd_start_sect);
|
||||
return q->limits.alignment_offset;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_alignment_offset);
|
||||
|
||||
unsigned int bdev_discard_alignment(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (bdev_is_partition(bdev))
|
||||
return queue_limit_discard_alignment(&q->limits,
|
||||
bdev->bd_start_sect);
|
||||
return q->limits.discard_alignment;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_discard_alignment);
|
||||
|
||||
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
|
||||
break; \
|
||||
if ((__tg)) { \
|
||||
blk_add_cgroup_trace_msg(__td->queue, \
|
||||
tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
|
||||
&tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
|
||||
} else { \
|
||||
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
|
||||
} \
|
||||
@@ -2189,13 +2189,14 @@ again:
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (throttled || !td->track_bio_latency)
|
||||
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
|
||||
#endif
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
rcu_read_unlock();
|
||||
return throttled;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user