mirror of
https://github.com/ukui/kernel.git
synced 2026-03-09 10:07:04 -07:00
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe:
"This is the main pull request for block storage for 4.15-rc1.
Nothing out of the ordinary in here, and no API changes or anything
like that. Just various new features for drivers, core changes, etc.
In particular, this pull request contains:
- A patch series from Bart, closing the whole on blk/scsi-mq queue
quescing.
- A series from Christoph, building towards hidden gendisks (for
multipath) and ability to move bio chains around.
- NVMe
- Support for native multipath for NVMe (Christoph).
- Userspace notifications for AENs (Keith).
- Command side-effects support (Keith).
- SGL support (Chaitanya Kulkarni)
- FC fixes and improvements (James Smart)
- Lots of fixes and tweaks (Various)
- bcache
- New maintainer (Michael Lyle)
- Writeback control improvements (Michael)
- Various fixes (Coly, Elena, Eric, Liang, et al)
- lightnvm updates, mostly centered around the pblk interface
(Javier, Hans, and Rakesh).
- Removal of unused bio/bvec kmap atomic interfaces (me, Christoph)
- Writeback series that fix the much discussed hundreds of millions
of sync-all units. This goes all the way, as discussed previously
(me).
- Fix for missing wakeup on writeback timer adjustments (Yafang
Shao).
- Fix laptop mode on blk-mq (me).
- {mq,name} tupple lookup for IO schedulers, allowing us to have
alias names. This means you can use 'deadline' on both !mq and on
mq (where it's called mq-deadline). (me).
- blktrace race fix, oopsing on sg load (me).
- blk-mq optimizations (me).
- Obscure waitqueue race fix for kyber (Omar).
- NBD fixes (Josef).
- Disable writeback throttling by default on bfq, like we do on cfq
(Luca Miccio).
- Series from Ming that enable us to treat flush requests on blk-mq
like any other request. This is a really nice cleanup.
- Series from Ming that improves merging on blk-mq with schedulers,
getting us closer to flipping the switch on scsi-mq again.
- BFQ updates (Paolo).
- blk-mq atomic flags memory ordering fixes (Peter Z).
- Loop cgroup support (Shaohua).
- Lots of minor fixes from lots of different folks, both for core and
driver code"
* 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits)
nvme: fix visibility of "uuid" ns attribute
blk-mq: fixup some comment typos and lengths
ide: ide-atapi: fix compile error with defining macro DEBUG
blk-mq: improve tag waiting setup for non-shared tags
brd: remove unused brd_mutex
blk-mq: only run the hardware queue if IO is pending
block: avoid null pointer dereference on null disk
fs: guard_bio_eod() needs to consider partitions
xtensa/simdisk: fix compile error
nvme: expose subsys attribute to sysfs
nvme: create 'slaves' and 'holders' entries for hidden controllers
block: create 'slaves' and 'holders' entries for hidden gendisks
nvme: also expose the namespace identification sysfs files for mpath nodes
nvme: implement multipath access to nvme subsystems
nvme: track shared namespaces
nvme: introduce a nvme_ns_ids structure
nvme: track subsystems
block, nvme: Introduce blk_mq_req_flags_t
block, scsi: Make SCSI quiesce and resume work reliably
block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag
...
This commit is contained in:
+159
-68
@@ -108,6 +108,7 @@
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "bfq-iosched.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
#define BFQ_BFQQ_FNS(name) \
|
||||
void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
|
||||
@@ -724,6 +725,44 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
|
||||
{
|
||||
u64 dur;
|
||||
|
||||
if (bfqd->bfq_wr_max_time > 0)
|
||||
return bfqd->bfq_wr_max_time;
|
||||
|
||||
dur = bfqd->RT_prod;
|
||||
do_div(dur, bfqd->peak_rate);
|
||||
|
||||
/*
|
||||
* Limit duration between 3 and 13 seconds. Tests show that
|
||||
* higher values than 13 seconds often yield the opposite of
|
||||
* the desired result, i.e., worsen responsiveness by letting
|
||||
* non-interactive and non-soft-real-time applications
|
||||
* preserve weight raising for a too long time interval.
|
||||
*
|
||||
* On the other end, lower values than 3 seconds make it
|
||||
* difficult for most interactive tasks to complete their jobs
|
||||
* before weight-raising finishes.
|
||||
*/
|
||||
if (dur > msecs_to_jiffies(13000))
|
||||
dur = msecs_to_jiffies(13000);
|
||||
else if (dur < msecs_to_jiffies(3000))
|
||||
dur = msecs_to_jiffies(3000);
|
||||
|
||||
return dur;
|
||||
}
|
||||
|
||||
/* switch back from soft real-time to interactive weight raising */
|
||||
static void switch_back_to_interactive_wr(struct bfq_queue *bfqq,
|
||||
struct bfq_data *bfqd)
|
||||
{
|
||||
bfqq->wr_coeff = bfqd->bfq_wr_coeff;
|
||||
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
|
||||
bfqq->last_wr_start_finish = bfqq->wr_start_at_switch_to_srt;
|
||||
}
|
||||
|
||||
static void
|
||||
bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
|
||||
struct bfq_io_cq *bic, bool bfq_already_existing)
|
||||
@@ -750,10 +789,16 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
|
||||
if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) ||
|
||||
time_is_before_jiffies(bfqq->last_wr_start_finish +
|
||||
bfqq->wr_cur_max_time))) {
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq,
|
||||
"resume state: switching off wr");
|
||||
|
||||
bfqq->wr_coeff = 1;
|
||||
if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time &&
|
||||
!bfq_bfqq_in_large_burst(bfqq) &&
|
||||
time_is_after_eq_jiffies(bfqq->wr_start_at_switch_to_srt +
|
||||
bfq_wr_duration(bfqd))) {
|
||||
switch_back_to_interactive_wr(bfqq, bfqd);
|
||||
} else {
|
||||
bfqq->wr_coeff = 1;
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq,
|
||||
"resume state: switching off wr");
|
||||
}
|
||||
}
|
||||
|
||||
/* make sure weight will be updated, however we got here */
|
||||
@@ -1173,33 +1218,22 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
|
||||
return wr_or_deserves_wr;
|
||||
}
|
||||
|
||||
static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
|
||||
/*
|
||||
* Return the farthest future time instant according to jiffies
|
||||
* macros.
|
||||
*/
|
||||
static unsigned long bfq_greatest_from_now(void)
|
||||
{
|
||||
u64 dur;
|
||||
return jiffies + MAX_JIFFY_OFFSET;
|
||||
}
|
||||
|
||||
if (bfqd->bfq_wr_max_time > 0)
|
||||
return bfqd->bfq_wr_max_time;
|
||||
|
||||
dur = bfqd->RT_prod;
|
||||
do_div(dur, bfqd->peak_rate);
|
||||
|
||||
/*
|
||||
* Limit duration between 3 and 13 seconds. Tests show that
|
||||
* higher values than 13 seconds often yield the opposite of
|
||||
* the desired result, i.e., worsen responsiveness by letting
|
||||
* non-interactive and non-soft-real-time applications
|
||||
* preserve weight raising for a too long time interval.
|
||||
*
|
||||
* On the other end, lower values than 3 seconds make it
|
||||
* difficult for most interactive tasks to complete their jobs
|
||||
* before weight-raising finishes.
|
||||
*/
|
||||
if (dur > msecs_to_jiffies(13000))
|
||||
dur = msecs_to_jiffies(13000);
|
||||
else if (dur < msecs_to_jiffies(3000))
|
||||
dur = msecs_to_jiffies(3000);
|
||||
|
||||
return dur;
|
||||
/*
|
||||
* Return the farthest past time instant according to jiffies
|
||||
* macros.
|
||||
*/
|
||||
static unsigned long bfq_smallest_from_now(void)
|
||||
{
|
||||
return jiffies - MAX_JIFFY_OFFSET;
|
||||
}
|
||||
|
||||
static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
|
||||
@@ -1216,7 +1250,19 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd,
|
||||
bfqq->wr_coeff = bfqd->bfq_wr_coeff;
|
||||
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
|
||||
} else {
|
||||
bfqq->wr_start_at_switch_to_srt = jiffies;
|
||||
/*
|
||||
* No interactive weight raising in progress
|
||||
* here: assign minus infinity to
|
||||
* wr_start_at_switch_to_srt, to make sure
|
||||
* that, at the end of the soft-real-time
|
||||
* weight raising periods that is starting
|
||||
* now, no interactive weight-raising period
|
||||
* may be wrongly considered as still in
|
||||
* progress (and thus actually started by
|
||||
* mistake).
|
||||
*/
|
||||
bfqq->wr_start_at_switch_to_srt =
|
||||
bfq_smallest_from_now();
|
||||
bfqq->wr_coeff = bfqd->bfq_wr_coeff *
|
||||
BFQ_SOFTRT_WEIGHT_FACTOR;
|
||||
bfqq->wr_cur_max_time =
|
||||
@@ -2016,10 +2062,27 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
|
||||
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
|
||||
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
|
||||
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
|
||||
bic->saved_wr_coeff = bfqq->wr_coeff;
|
||||
bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt;
|
||||
bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
|
||||
bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
|
||||
if (unlikely(bfq_bfqq_just_created(bfqq) &&
|
||||
!bfq_bfqq_in_large_burst(bfqq))) {
|
||||
/*
|
||||
* bfqq being merged right after being created: bfqq
|
||||
* would have deserved interactive weight raising, but
|
||||
* did not make it to be set in a weight-raised state,
|
||||
* because of this early merge. Store directly the
|
||||
* weight-raising state that would have been assigned
|
||||
* to bfqq, so that to avoid that bfqq unjustly fails
|
||||
* to enjoy weight raising if split soon.
|
||||
*/
|
||||
bic->saved_wr_coeff = bfqq->bfqd->bfq_wr_coeff;
|
||||
bic->saved_wr_cur_max_time = bfq_wr_duration(bfqq->bfqd);
|
||||
bic->saved_last_wr_start_finish = jiffies;
|
||||
} else {
|
||||
bic->saved_wr_coeff = bfqq->wr_coeff;
|
||||
bic->saved_wr_start_at_switch_to_srt =
|
||||
bfqq->wr_start_at_switch_to_srt;
|
||||
bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish;
|
||||
bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2897,24 +2960,6 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
|
||||
jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the farthest future time instant according to jiffies
|
||||
* macros.
|
||||
*/
|
||||
static unsigned long bfq_greatest_from_now(void)
|
||||
{
|
||||
return jiffies + MAX_JIFFY_OFFSET;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the farthest past time instant according to jiffies
|
||||
* macros.
|
||||
*/
|
||||
static unsigned long bfq_smallest_from_now(void)
|
||||
{
|
||||
return jiffies - MAX_JIFFY_OFFSET;
|
||||
}
|
||||
|
||||
/**
|
||||
* bfq_bfqq_expire - expire a queue.
|
||||
* @bfqd: device owning the queue.
|
||||
@@ -3489,11 +3534,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
bfq_wr_duration(bfqd)))
|
||||
bfq_bfqq_end_wr(bfqq);
|
||||
else {
|
||||
/* switch back to interactive wr */
|
||||
bfqq->wr_coeff = bfqd->bfq_wr_coeff;
|
||||
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
|
||||
bfqq->last_wr_start_finish =
|
||||
bfqq->wr_start_at_switch_to_srt;
|
||||
switch_back_to_interactive_wr(bfqq, bfqd);
|
||||
bfqq->entity.prio_changed = 1;
|
||||
}
|
||||
}
|
||||
@@ -3685,16 +3726,37 @@ void bfq_put_queue(struct bfq_queue *bfqq)
|
||||
if (bfqq->ref)
|
||||
return;
|
||||
|
||||
if (bfq_bfqq_sync(bfqq))
|
||||
/*
|
||||
* The fact that this queue is being destroyed does not
|
||||
* invalidate the fact that this queue may have been
|
||||
* activated during the current burst. As a consequence,
|
||||
* although the queue does not exist anymore, and hence
|
||||
* needs to be removed from the burst list if there,
|
||||
* the burst size has not to be decremented.
|
||||
*/
|
||||
if (!hlist_unhashed(&bfqq->burst_list_node)) {
|
||||
hlist_del_init(&bfqq->burst_list_node);
|
||||
/*
|
||||
* Decrement also burst size after the removal, if the
|
||||
* process associated with bfqq is exiting, and thus
|
||||
* does not contribute to the burst any longer. This
|
||||
* decrement helps filter out false positives of large
|
||||
* bursts, when some short-lived process (often due to
|
||||
* the execution of commands by some service) happens
|
||||
* to start and exit while a complex application is
|
||||
* starting, and thus spawning several processes that
|
||||
* do I/O (and that *must not* be treated as a large
|
||||
* burst, see comments on bfq_handle_burst).
|
||||
*
|
||||
* In particular, the decrement is performed only if:
|
||||
* 1) bfqq is not a merged queue, because, if it is,
|
||||
* then this free of bfqq is not triggered by the exit
|
||||
* of the process bfqq is associated with, but exactly
|
||||
* by the fact that bfqq has just been merged.
|
||||
* 2) burst_size is greater than 0, to handle
|
||||
* unbalanced decrements. Unbalanced decrements may
|
||||
* happen in te following case: bfqq is inserted into
|
||||
* the current burst list--without incrementing
|
||||
* bust_size--because of a split, but the current
|
||||
* burst list is not the burst list bfqq belonged to
|
||||
* (see comments on the case of a split in
|
||||
* bfq_set_request).
|
||||
*/
|
||||
if (bfqq->bic && bfqq->bfqd->burst_size > 0)
|
||||
bfqq->bfqd->burst_size--;
|
||||
}
|
||||
|
||||
kmem_cache_free(bfq_pool, bfqq);
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
@@ -4127,7 +4189,6 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
|
||||
new_bfqq->allocated++;
|
||||
bfqq->allocated--;
|
||||
new_bfqq->ref++;
|
||||
bfq_clear_bfqq_just_created(bfqq);
|
||||
/*
|
||||
* If the bic associated with the process
|
||||
* issuing this request still points to bfqq
|
||||
@@ -4139,6 +4200,8 @@ static void __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
|
||||
if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq)
|
||||
bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
|
||||
bfqq, new_bfqq);
|
||||
|
||||
bfq_clear_bfqq_just_created(bfqq);
|
||||
/*
|
||||
* rq is about to be enqueued into new_bfqq,
|
||||
* release rq reference on bfqq
|
||||
@@ -4424,6 +4487,34 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
|
||||
else {
|
||||
bfq_clear_bfqq_in_large_burst(bfqq);
|
||||
if (bic->was_in_burst_list)
|
||||
/*
|
||||
* If bfqq was in the current
|
||||
* burst list before being
|
||||
* merged, then we have to add
|
||||
* it back. And we do not need
|
||||
* to increase burst_size, as
|
||||
* we did not decrement
|
||||
* burst_size when we removed
|
||||
* bfqq from the burst list as
|
||||
* a consequence of a merge
|
||||
* (see comments in
|
||||
* bfq_put_queue). In this
|
||||
* respect, it would be rather
|
||||
* costly to know whether the
|
||||
* current burst list is still
|
||||
* the same burst list from
|
||||
* which bfqq was removed on
|
||||
* the merge. To avoid this
|
||||
* cost, if bfqq was in a
|
||||
* burst list, then we add
|
||||
* bfqq to the current burst
|
||||
* list without any further
|
||||
* check. This can cause
|
||||
* inappropriate insertions,
|
||||
* but rarely enough to not
|
||||
* harm the detection of large
|
||||
* bursts significantly.
|
||||
*/
|
||||
hlist_add_head(&bfqq->burst_list_node,
|
||||
&bfqd->burst_list);
|
||||
}
|
||||
@@ -4775,7 +4866,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||
bfq_init_root_group(bfqd->root_group, bfqd);
|
||||
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
|
||||
|
||||
|
||||
wbt_disable_default(q);
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
|
||||
@@ -485,11 +485,8 @@ EXPORT_SYMBOL(bioset_integrity_create);
|
||||
|
||||
void bioset_integrity_free(struct bio_set *bs)
|
||||
{
|
||||
if (bs->bio_integrity_pool)
|
||||
mempool_destroy(bs->bio_integrity_pool);
|
||||
|
||||
if (bs->bvec_integrity_pool)
|
||||
mempool_destroy(bs->bvec_integrity_pool);
|
||||
mempool_destroy(bs->bio_integrity_pool);
|
||||
mempool_destroy(bs->bvec_integrity_pool);
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_integrity_free);
|
||||
|
||||
|
||||
+3
-37
@@ -400,7 +400,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
|
||||
|
||||
/**
|
||||
* bio_alloc_bioset - allocate a bio for I/O
|
||||
* @gfp_mask: the GFP_ mask given to the slab allocator
|
||||
* @gfp_mask: the GFP_* mask given to the slab allocator
|
||||
* @nr_iovecs: number of iovecs to pre-allocate
|
||||
* @bs: the bio_set to allocate from.
|
||||
*
|
||||
@@ -1931,11 +1931,8 @@ void bioset_free(struct bio_set *bs)
|
||||
if (bs->rescue_workqueue)
|
||||
destroy_workqueue(bs->rescue_workqueue);
|
||||
|
||||
if (bs->bio_pool)
|
||||
mempool_destroy(bs->bio_pool);
|
||||
|
||||
if (bs->bvec_pool)
|
||||
mempool_destroy(bs->bvec_pool);
|
||||
mempool_destroy(bs->bio_pool);
|
||||
mempool_destroy(bs->bvec_pool);
|
||||
|
||||
bioset_integrity_free(bs);
|
||||
bio_put_slab(bs);
|
||||
@@ -2035,37 +2032,6 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkcg);
|
||||
|
||||
/**
|
||||
* bio_associate_current - associate a bio with %current
|
||||
* @bio: target bio
|
||||
*
|
||||
* Associate @bio with %current if it hasn't been associated yet. Block
|
||||
* layer will treat @bio as if it were issued by %current no matter which
|
||||
* task actually issues it.
|
||||
*
|
||||
* This function takes an extra reference of @task's io_context and blkcg
|
||||
* which will be put when @bio is released. The caller must own @bio,
|
||||
* ensure %current->io_context exists, and is responsible for synchronizing
|
||||
* calls to this function.
|
||||
*/
|
||||
int bio_associate_current(struct bio *bio)
|
||||
{
|
||||
struct io_context *ioc;
|
||||
|
||||
if (bio->bi_css)
|
||||
return -EBUSY;
|
||||
|
||||
ioc = current->io_context;
|
||||
if (!ioc)
|
||||
return -ENOENT;
|
||||
|
||||
get_io_context_active(ioc);
|
||||
bio->bi_ioc = ioc;
|
||||
bio->bi_css = task_get_css(current, io_cgrp_id);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_current);
|
||||
|
||||
/**
|
||||
* bio_disassociate_task - undo bio_associate_current()
|
||||
* @bio: target bio
|
||||
|
||||
+7
-2
@@ -1419,6 +1419,11 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
||||
if (i >= BLKCG_MAX_POLS)
|
||||
goto err_unlock;
|
||||
|
||||
/* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
|
||||
if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
|
||||
(!pol->pd_alloc_fn ^ !pol->pd_free_fn))
|
||||
goto err_unlock;
|
||||
|
||||
/* register @pol */
|
||||
pol->plid = i;
|
||||
blkcg_policy[pol->plid] = pol;
|
||||
@@ -1452,7 +1457,7 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
||||
return 0;
|
||||
|
||||
err_free_cpds:
|
||||
if (pol->cpd_alloc_fn) {
|
||||
if (pol->cpd_free_fn) {
|
||||
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
|
||||
if (blkcg->cpd[pol->plid]) {
|
||||
pol->cpd_free_fn(blkcg->cpd[pol->plid]);
|
||||
@@ -1492,7 +1497,7 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
|
||||
/* remove cpds and unregister */
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
if (pol->cpd_alloc_fn) {
|
||||
if (pol->cpd_free_fn) {
|
||||
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
|
||||
if (blkcg->cpd[pol->plid]) {
|
||||
pol->cpd_free_fn(blkcg->cpd[pol->plid]);
|
||||
|
||||
+236
-38
@@ -333,11 +333,13 @@ EXPORT_SYMBOL(blk_stop_queue);
|
||||
void blk_sync_queue(struct request_queue *q)
|
||||
{
|
||||
del_timer_sync(&q->timeout);
|
||||
cancel_work_sync(&q->timeout_work);
|
||||
|
||||
if (q->mq_ops) {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
cancel_delayed_work_sync(&q->requeue_work);
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
cancel_delayed_work_sync(&hctx->run_work);
|
||||
} else {
|
||||
@@ -346,6 +348,37 @@ void blk_sync_queue(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_sync_queue);
|
||||
|
||||
/**
|
||||
* blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
|
||||
* @q: request queue pointer
|
||||
*
|
||||
* Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
|
||||
* set and 1 if the flag was already set.
|
||||
*/
|
||||
int blk_set_preempt_only(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
int res;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_set_preempt_only);
|
||||
|
||||
void blk_clear_preempt_only(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
|
||||
|
||||
/**
|
||||
* __blk_run_queue_uncond - run a queue whether or not it has been stopped
|
||||
* @q: The queue to run
|
||||
@@ -610,6 +643,9 @@ void blk_set_queue_dying(struct request_queue *q)
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
/* Make blk_queue_enter() reexamine the DYING flag. */
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
|
||||
|
||||
@@ -718,7 +754,7 @@ static void free_request_size(void *element, void *data)
|
||||
int blk_init_rl(struct request_list *rl, struct request_queue *q,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
if (unlikely(rl->rq_pool))
|
||||
if (unlikely(rl->rq_pool) || q->mq_ops)
|
||||
return 0;
|
||||
|
||||
rl->q = q;
|
||||
@@ -760,15 +796,38 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_alloc_queue);
|
||||
|
||||
int blk_queue_enter(struct request_queue *q, bool nowait)
|
||||
/**
|
||||
* blk_queue_enter() - try to increase q->q_usage_counter
|
||||
* @q: request queue pointer
|
||||
* @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
|
||||
*/
|
||||
int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
||||
{
|
||||
const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
|
||||
|
||||
while (true) {
|
||||
bool success = false;
|
||||
int ret;
|
||||
|
||||
if (percpu_ref_tryget_live(&q->q_usage_counter))
|
||||
rcu_read_lock_sched();
|
||||
if (percpu_ref_tryget_live(&q->q_usage_counter)) {
|
||||
/*
|
||||
* The code that sets the PREEMPT_ONLY flag is
|
||||
* responsible for ensuring that that flag is globally
|
||||
* visible before the queue is unfrozen.
|
||||
*/
|
||||
if (preempt || !blk_queue_preempt_only(q)) {
|
||||
success = true;
|
||||
} else {
|
||||
percpu_ref_put(&q->q_usage_counter);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock_sched();
|
||||
|
||||
if (success)
|
||||
return 0;
|
||||
|
||||
if (nowait)
|
||||
if (flags & BLK_MQ_REQ_NOWAIT)
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
@@ -781,7 +840,8 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
|
||||
smp_rmb();
|
||||
|
||||
ret = wait_event_interruptible(q->mq_freeze_wq,
|
||||
!atomic_read(&q->mq_freeze_depth) ||
|
||||
(atomic_read(&q->mq_freeze_depth) == 0 &&
|
||||
(preempt || !blk_queue_preempt_only(q))) ||
|
||||
blk_queue_dying(q));
|
||||
if (blk_queue_dying(q))
|
||||
return -ENODEV;
|
||||
@@ -844,6 +904,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||
setup_timer(&q->backing_dev_info->laptop_mode_wb_timer,
|
||||
laptop_mode_timer_fn, (unsigned long) q);
|
||||
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
|
||||
INIT_WORK(&q->timeout_work, NULL);
|
||||
INIT_LIST_HEAD(&q->queue_head);
|
||||
INIT_LIST_HEAD(&q->timeout_list);
|
||||
INIT_LIST_HEAD(&q->icq_list);
|
||||
@@ -1154,7 +1215,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||
* @rl: request list to allocate from
|
||||
* @op: operation and flags
|
||||
* @bio: bio to allocate request for (can be %NULL)
|
||||
* @gfp_mask: allocation mask
|
||||
* @flags: BLQ_MQ_REQ_* flags
|
||||
*
|
||||
* Get a free request from @q. This function may fail under memory
|
||||
* pressure or if @q is dead.
|
||||
@@ -1164,7 +1225,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||
* Returns request pointer on success, with @q->queue_lock *not held*.
|
||||
*/
|
||||
static struct request *__get_request(struct request_list *rl, unsigned int op,
|
||||
struct bio *bio, gfp_t gfp_mask)
|
||||
struct bio *bio, blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct request_queue *q = rl->q;
|
||||
struct request *rq;
|
||||
@@ -1173,6 +1234,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
|
||||
struct io_cq *icq = NULL;
|
||||
const bool is_sync = op_is_sync(op);
|
||||
int may_queue;
|
||||
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
|
||||
__GFP_DIRECT_RECLAIM;
|
||||
req_flags_t rq_flags = RQF_ALLOCED;
|
||||
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
@@ -1255,6 +1318,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
|
||||
blk_rq_set_rl(rq, rl);
|
||||
rq->cmd_flags = op;
|
||||
rq->rq_flags = rq_flags;
|
||||
if (flags & BLK_MQ_REQ_PREEMPT)
|
||||
rq->rq_flags |= RQF_PREEMPT;
|
||||
|
||||
/* init elvpriv */
|
||||
if (rq_flags & RQF_ELVPRIV) {
|
||||
@@ -1333,7 +1398,7 @@ rq_starved:
|
||||
* @q: request_queue to allocate request from
|
||||
* @op: operation and flags
|
||||
* @bio: bio to allocate request for (can be %NULL)
|
||||
* @gfp_mask: allocation mask
|
||||
* @flags: BLK_MQ_REQ_* flags.
|
||||
*
|
||||
* Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
|
||||
* this function keeps retrying under memory pressure and fails iff @q is dead.
|
||||
@@ -1343,7 +1408,7 @@ rq_starved:
|
||||
* Returns request pointer on success, with @q->queue_lock *not held*.
|
||||
*/
|
||||
static struct request *get_request(struct request_queue *q, unsigned int op,
|
||||
struct bio *bio, gfp_t gfp_mask)
|
||||
struct bio *bio, blk_mq_req_flags_t flags)
|
||||
{
|
||||
const bool is_sync = op_is_sync(op);
|
||||
DEFINE_WAIT(wait);
|
||||
@@ -1355,7 +1420,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
|
||||
|
||||
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
|
||||
retry:
|
||||
rq = __get_request(rl, op, bio, gfp_mask);
|
||||
rq = __get_request(rl, op, bio, flags);
|
||||
if (!IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
@@ -1364,7 +1429,7 @@ retry:
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
|
||||
if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
|
||||
blk_put_rl(rl);
|
||||
return rq;
|
||||
}
|
||||
@@ -1391,20 +1456,28 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */
|
||||
static struct request *blk_old_get_request(struct request_queue *q,
|
||||
unsigned int op, gfp_t gfp_mask)
|
||||
unsigned int op, blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct request *rq;
|
||||
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
|
||||
__GFP_DIRECT_RECLAIM;
|
||||
int ret = 0;
|
||||
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
/* create ioc upfront */
|
||||
create_io_context(gfp_mask, q->node);
|
||||
|
||||
ret = blk_queue_enter(q, flags);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
rq = get_request(q, op, NULL, gfp_mask);
|
||||
rq = get_request(q, op, NULL, flags);
|
||||
if (IS_ERR(rq)) {
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_exit(q);
|
||||
return rq;
|
||||
}
|
||||
|
||||
@@ -1415,25 +1488,40 @@ static struct request *blk_old_get_request(struct request_queue *q,
|
||||
return rq;
|
||||
}
|
||||
|
||||
struct request *blk_get_request(struct request_queue *q, unsigned int op,
|
||||
gfp_t gfp_mask)
|
||||
/**
|
||||
* blk_get_request_flags - allocate a request
|
||||
* @q: request queue to allocate a request for
|
||||
* @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
|
||||
* @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
|
||||
*/
|
||||
struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
|
||||
blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct request *req;
|
||||
|
||||
WARN_ON_ONCE(op & REQ_NOWAIT);
|
||||
WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
|
||||
|
||||
if (q->mq_ops) {
|
||||
req = blk_mq_alloc_request(q, op,
|
||||
(gfp_mask & __GFP_DIRECT_RECLAIM) ?
|
||||
0 : BLK_MQ_REQ_NOWAIT);
|
||||
req = blk_mq_alloc_request(q, op, flags);
|
||||
if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
|
||||
q->mq_ops->initialize_rq_fn(req);
|
||||
} else {
|
||||
req = blk_old_get_request(q, op, gfp_mask);
|
||||
req = blk_old_get_request(q, op, flags);
|
||||
if (!IS_ERR(req) && q->initialize_rq_fn)
|
||||
q->initialize_rq_fn(req);
|
||||
}
|
||||
|
||||
return req;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_get_request_flags);
|
||||
|
||||
struct request *blk_get_request(struct request_queue *q, unsigned int op,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
|
||||
0 : BLK_MQ_REQ_NOWAIT);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_get_request);
|
||||
|
||||
/**
|
||||
@@ -1576,6 +1664,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
|
||||
blk_free_request(rl, req);
|
||||
freed_request(rl, sync, rq_flags);
|
||||
blk_put_rl(rl);
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blk_put_request);
|
||||
@@ -1857,8 +1946,10 @@ get_rq:
|
||||
* Grab a free request. This is might sleep but can not fail.
|
||||
* Returns with the queue unlocked.
|
||||
*/
|
||||
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
|
||||
blk_queue_enter_live(q);
|
||||
req = get_request(q, bio->bi_opf, bio, 0);
|
||||
if (IS_ERR(req)) {
|
||||
blk_queue_exit(q);
|
||||
__wbt_done(q->rq_wb, wb_acct);
|
||||
if (PTR_ERR(req) == -ENOMEM)
|
||||
bio->bi_status = BLK_STS_RESOURCE;
|
||||
@@ -2200,8 +2291,10 @@ blk_qc_t generic_make_request(struct bio *bio)
|
||||
current->bio_list = bio_list_on_stack;
|
||||
do {
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
|
||||
BLK_MQ_REQ_NOWAIT : 0;
|
||||
|
||||
if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
|
||||
if (likely(blk_queue_enter(q, flags) == 0)) {
|
||||
struct bio_list lower, same;
|
||||
|
||||
/* Create a fresh bio_list for all subordinate requests */
|
||||
@@ -2241,6 +2334,40 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL(generic_make_request);
|
||||
|
||||
/**
|
||||
* direct_make_request - hand a buffer directly to its device driver for I/O
|
||||
* @bio: The bio describing the location in memory and on the device.
|
||||
*
|
||||
* This function behaves like generic_make_request(), but does not protect
|
||||
* against recursion. Must only be used if the called driver is known
|
||||
* to not call generic_make_request (or direct_make_request) again from
|
||||
* its make_request function. (Calling direct_make_request again from
|
||||
* a workqueue is perfectly fine as that doesn't recurse).
|
||||
*/
|
||||
blk_qc_t direct_make_request(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
bool nowait = bio->bi_opf & REQ_NOWAIT;
|
||||
blk_qc_t ret;
|
||||
|
||||
if (!generic_make_request_checks(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
|
||||
if (nowait && !blk_queue_dying(q))
|
||||
bio->bi_status = BLK_STS_AGAIN;
|
||||
else
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
ret = q->make_request_fn(q, bio);
|
||||
blk_queue_exit(q);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(direct_make_request);
|
||||
|
||||
/**
|
||||
* submit_bio - submit a bio to the block device layer for I/O
|
||||
* @bio: The &struct bio which describes the I/O
|
||||
@@ -2285,6 +2412,17 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio);
|
||||
|
||||
bool blk_poll(struct request_queue *q, blk_qc_t cookie)
|
||||
{
|
||||
if (!q->poll_fn || !blk_qc_t_valid(cookie))
|
||||
return false;
|
||||
|
||||
if (current->plug)
|
||||
blk_flush_plug_list(current->plug, false);
|
||||
return q->poll_fn(q, cookie);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_poll);
|
||||
|
||||
/**
|
||||
* blk_cloned_rq_check_limits - Helper function to check a cloned request
|
||||
* for new the queue limits
|
||||
@@ -2350,7 +2488,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
|
||||
* bypass a potential scheduler on the bottom device for
|
||||
* insert.
|
||||
*/
|
||||
blk_mq_request_bypass_insert(rq);
|
||||
blk_mq_request_bypass_insert(rq, true);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
@@ -2464,20 +2602,22 @@ void blk_account_io_done(struct request *req)
|
||||
* Don't process normal requests when queue is suspended
|
||||
* or in the process of suspending/resuming
|
||||
*/
|
||||
static struct request *blk_pm_peek_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
static bool blk_pm_allow_request(struct request *rq)
|
||||
{
|
||||
if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
|
||||
(q->rpm_status != RPM_ACTIVE && !(rq->rq_flags & RQF_PM))))
|
||||
return NULL;
|
||||
else
|
||||
return rq;
|
||||
switch (rq->q->rpm_status) {
|
||||
case RPM_RESUMING:
|
||||
case RPM_SUSPENDING:
|
||||
return rq->rq_flags & RQF_PM;
|
||||
case RPM_SUSPENDED:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static inline struct request *blk_pm_peek_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
static bool blk_pm_allow_request(struct request *rq)
|
||||
{
|
||||
return rq;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2517,6 +2657,48 @@ void blk_account_io_start(struct request *rq, bool new_io)
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
static struct request *elv_next_request(struct request_queue *q)
|
||||
{
|
||||
struct request *rq;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
|
||||
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
while (1) {
|
||||
list_for_each_entry(rq, &q->queue_head, queuelist) {
|
||||
if (blk_pm_allow_request(rq))
|
||||
return rq;
|
||||
|
||||
if (rq->rq_flags & RQF_SOFTBARRIER)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush request is running and flush request isn't queueable
|
||||
* in the drive, we can hold the queue till flush request is
|
||||
* finished. Even we don't do this, driver can't dispatch next
|
||||
* requests and will requeue them. And this can improve
|
||||
* throughput too. For example, we have request flush1, write1,
|
||||
* flush 2. flush1 is dispatched, then queue is hold, write1
|
||||
* isn't inserted to queue. After flush1 is finished, flush2
|
||||
* will be dispatched. Since disk cache is already clean,
|
||||
* flush2 will be finished very soon, so looks like flush2 is
|
||||
* folded to flush1.
|
||||
* Since the queue is hold, a flag is set to indicate the queue
|
||||
* should be restarted later. Please see flush_end_io() for
|
||||
* details.
|
||||
*/
|
||||
if (fq->flush_pending_idx != fq->flush_running_idx &&
|
||||
!queue_flush_queueable(q)) {
|
||||
fq->flush_queue_delayed = 1;
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(blk_queue_bypass(q)) ||
|
||||
!q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_peek_request - peek at the top of a request queue
|
||||
* @q: request queue to peek at
|
||||
@@ -2538,12 +2720,7 @@ struct request *blk_peek_request(struct request_queue *q)
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
while ((rq = __elv_next_request(q)) != NULL) {
|
||||
|
||||
rq = blk_pm_peek_request(q, rq);
|
||||
if (!rq)
|
||||
break;
|
||||
|
||||
while ((rq = elv_next_request(q)) != NULL) {
|
||||
if (!(rq->rq_flags & RQF_STARTED)) {
|
||||
/*
|
||||
* This is the first time the device driver
|
||||
@@ -2695,6 +2872,27 @@ struct request *blk_fetch_request(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_fetch_request);
|
||||
|
||||
/*
|
||||
* Steal bios from a request and add them to a bio list.
|
||||
* The request must not have been partially completed before.
|
||||
*/
|
||||
void blk_steal_bios(struct bio_list *list, struct request *rq)
|
||||
{
|
||||
if (rq->bio) {
|
||||
if (list->tail)
|
||||
list->tail->bi_next = rq->bio;
|
||||
else
|
||||
list->head = rq->bio;
|
||||
list->tail = rq->biotail;
|
||||
|
||||
rq->bio = NULL;
|
||||
rq->biotail = NULL;
|
||||
}
|
||||
|
||||
rq->__data_len = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_steal_bios);
|
||||
|
||||
/**
|
||||
* blk_update_request - Special helper function for request stacking drivers
|
||||
* @req: the request being processed
|
||||
|
||||
+27
-10
@@ -231,8 +231,13 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
|
||||
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
|
||||
flush_rq->tag = -1;
|
||||
if (!q->elevator) {
|
||||
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
|
||||
flush_rq->tag = -1;
|
||||
} else {
|
||||
blk_mq_put_driver_tag_hctx(hctx, flush_rq);
|
||||
flush_rq->internal_tag = -1;
|
||||
}
|
||||
}
|
||||
|
||||
running = &fq->flush_queue[fq->flush_running_idx];
|
||||
@@ -318,19 +323,26 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
|
||||
blk_rq_init(q, flush_rq);
|
||||
|
||||
/*
|
||||
* Borrow tag from the first request since they can't
|
||||
* be in flight at the same time. And acquire the tag's
|
||||
* ownership for flush req.
|
||||
* In case of none scheduler, borrow tag from the first request
|
||||
* since they can't be in flight at the same time. And acquire
|
||||
* the tag's ownership for flush req.
|
||||
*
|
||||
* In case of IO scheduler, flush rq need to borrow scheduler tag
|
||||
* just for cheating put/get driver tag.
|
||||
*/
|
||||
if (q->mq_ops) {
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
flush_rq->mq_ctx = first_rq->mq_ctx;
|
||||
flush_rq->tag = first_rq->tag;
|
||||
fq->orig_rq = first_rq;
|
||||
|
||||
hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
|
||||
blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
|
||||
if (!q->elevator) {
|
||||
fq->orig_rq = first_rq;
|
||||
flush_rq->tag = first_rq->tag;
|
||||
hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
|
||||
blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
|
||||
} else {
|
||||
flush_rq->internal_tag = first_rq->internal_tag;
|
||||
}
|
||||
}
|
||||
|
||||
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
|
||||
@@ -394,6 +406,11 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
|
||||
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
if (q->elevator) {
|
||||
WARN_ON(rq->tag < 0);
|
||||
blk_mq_put_driver_tag_hctx(hctx, rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* After populating an empty queue, kick it to avoid stall. Read
|
||||
* the comment in flush_end_io().
|
||||
@@ -463,7 +480,7 @@ void blk_insert_flush(struct request *rq)
|
||||
if ((policy & REQ_FSEQ_DATA) &&
|
||||
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
|
||||
if (q->mq_ops)
|
||||
blk_mq_sched_insert_request(rq, false, true, false, false);
|
||||
blk_mq_request_bypass_insert(rq, false);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &q->queue_head);
|
||||
return;
|
||||
|
||||
+82
-46
@@ -275,51 +275,18 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
|
||||
return min(pages, (sector_t)BIO_MAX_PAGES);
|
||||
}
|
||||
|
||||
/**
|
||||
* __blkdev_issue_zeroout - generate number of zero filed write bios
|
||||
* @bdev: blockdev to issue
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to write
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
* @biop: pointer to anchor bio
|
||||
* @flags: controls detailed behavior
|
||||
*
|
||||
* Description:
|
||||
* Zero-fill a block range, either using hardware offload or by explicitly
|
||||
* writing zeroes to the device.
|
||||
*
|
||||
* Note that this function may fail with -EOPNOTSUPP if the driver signals
|
||||
* zeroing offload support, but the device fails to process the command (for
|
||||
* some devices there is no non-destructive way to verify whether this
|
||||
* operation is actually supported). In this case the caller should call
|
||||
* retry the call to blkdev_issue_zeroout() and the fallback path will be used.
|
||||
*
|
||||
* If a device is using logical block provisioning, the underlying space will
|
||||
* not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
|
||||
*
|
||||
* If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
|
||||
* -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
|
||||
*/
|
||||
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
|
||||
unsigned flags)
|
||||
static int __blkdev_issue_zero_pages(struct block_device *bdev,
|
||||
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
|
||||
struct bio **biop)
|
||||
{
|
||||
int ret;
|
||||
int bi_size = 0;
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
struct bio *bio = *biop;
|
||||
int bi_size = 0;
|
||||
unsigned int sz;
|
||||
sector_t bs_mask;
|
||||
|
||||
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
if (!q)
|
||||
return -ENXIO;
|
||||
|
||||
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
|
||||
biop, flags);
|
||||
if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
while (nr_sects != 0) {
|
||||
bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
|
||||
gfp_mask);
|
||||
@@ -339,8 +306,46 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
out:
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* __blkdev_issue_zeroout - generate number of zero filed write bios
|
||||
* @bdev: blockdev to issue
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to write
|
||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||
* @biop: pointer to anchor bio
|
||||
* @flags: controls detailed behavior
|
||||
*
|
||||
* Description:
|
||||
* Zero-fill a block range, either using hardware offload or by explicitly
|
||||
* writing zeroes to the device.
|
||||
*
|
||||
* If a device is using logical block provisioning, the underlying space will
|
||||
* not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
|
||||
*
|
||||
* If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
|
||||
* -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
|
||||
*/
|
||||
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
|
||||
unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
sector_t bs_mask;
|
||||
|
||||
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
|
||||
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
|
||||
biop, flags);
|
||||
if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
|
||||
return ret;
|
||||
|
||||
return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
|
||||
biop);
|
||||
}
|
||||
EXPORT_SYMBOL(__blkdev_issue_zeroout);
|
||||
|
||||
@@ -360,18 +365,49 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
|
||||
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
struct bio *bio = NULL;
|
||||
int ret = 0;
|
||||
sector_t bs_mask;
|
||||
struct bio *bio;
|
||||
struct blk_plug plug;
|
||||
bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
|
||||
|
||||
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
|
||||
retry:
|
||||
bio = NULL;
|
||||
blk_start_plug(&plug);
|
||||
ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
|
||||
&bio, flags);
|
||||
if (try_write_zeroes) {
|
||||
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
|
||||
gfp_mask, &bio, flags);
|
||||
} else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
|
||||
ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
|
||||
gfp_mask, &bio);
|
||||
} else {
|
||||
/* No zeroing offload support */
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
if (ret == 0 && bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
if (ret && try_write_zeroes) {
|
||||
if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
|
||||
try_write_zeroes = false;
|
||||
goto retry;
|
||||
}
|
||||
if (!bdev_write_zeroes_sectors(bdev)) {
|
||||
/*
|
||||
* Zeroing offload support was indicated, but the
|
||||
* device reported ILLEGAL REQUEST (for some devices
|
||||
* there is no non-destructive way to verify whether
|
||||
* WRITE ZEROES is actually supported).
|
||||
*/
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -54,7 +54,6 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(NOMERGES),
|
||||
QUEUE_FLAG_NAME(SAME_COMP),
|
||||
QUEUE_FLAG_NAME(FAIL_IO),
|
||||
QUEUE_FLAG_NAME(STACKABLE),
|
||||
QUEUE_FLAG_NAME(NONROT),
|
||||
QUEUE_FLAG_NAME(IO_STAT),
|
||||
QUEUE_FLAG_NAME(DISCARD),
|
||||
@@ -75,6 +74,7 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(REGISTERED),
|
||||
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
|
||||
QUEUE_FLAG_NAME(QUIESCED),
|
||||
QUEUE_FLAG_NAME(PREEMPT_ONLY),
|
||||
};
|
||||
#undef QUEUE_FLAG_NAME
|
||||
|
||||
@@ -180,7 +180,6 @@ static const char *const hctx_state_name[] = {
|
||||
HCTX_STATE_NAME(STOPPED),
|
||||
HCTX_STATE_NAME(TAG_ACTIVE),
|
||||
HCTX_STATE_NAME(SCHED_RESTART),
|
||||
HCTX_STATE_NAME(TAG_WAITING),
|
||||
HCTX_STATE_NAME(START_ON_RUN),
|
||||
};
|
||||
#undef HCTX_STATE_NAME
|
||||
|
||||
+132
-73
@@ -81,20 +81,103 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
} else
|
||||
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
|
||||
if (blk_mq_hctx_has_pending(hctx)) {
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
||||
* its queue by itself in its completion handler, so we don't need to
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
*/
|
||||
static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
do {
|
||||
struct request *rq;
|
||||
|
||||
if (e->type->ops.mq.has_work &&
|
||||
!e->type->ops.mq.has_work(hctx))
|
||||
break;
|
||||
|
||||
if (!blk_mq_get_dispatch_budget(hctx))
|
||||
break;
|
||||
|
||||
rq = e->type->ops.mq.dispatch_request(hctx);
|
||||
if (!rq) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now this rq owns the budget which has to be released
|
||||
* if this rq won't be queued to driver via .queue_rq()
|
||||
* in blk_mq_dispatch_rq_list().
|
||||
*/
|
||||
list_add(&rq->queuelist, &rq_list);
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
|
||||
}
|
||||
|
||||
static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx)
|
||||
{
|
||||
unsigned idx = ctx->index_hw;
|
||||
|
||||
if (++idx == hctx->nr_ctx)
|
||||
idx = 0;
|
||||
|
||||
return hctx->ctxs[idx];
|
||||
}
|
||||
|
||||
/*
|
||||
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
||||
* its queue by itself in its completion handler, so we don't need to
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
*/
|
||||
static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
LIST_HEAD(rq_list);
|
||||
struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
|
||||
|
||||
do {
|
||||
struct request *rq;
|
||||
|
||||
if (!sbitmap_any_bit_set(&hctx->ctx_map))
|
||||
break;
|
||||
|
||||
if (!blk_mq_get_dispatch_budget(hctx))
|
||||
break;
|
||||
|
||||
rq = blk_mq_dequeue_from_ctx(hctx, ctx);
|
||||
if (!rq) {
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now this rq owns the budget which has to be released
|
||||
* if this rq won't be queued to driver via .queue_rq()
|
||||
* in blk_mq_dispatch_rq_list().
|
||||
*/
|
||||
list_add(&rq->queuelist, &rq_list);
|
||||
|
||||
/* round robin for fair dispatch */
|
||||
ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
|
||||
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
|
||||
|
||||
WRITE_ONCE(hctx->dispatch_from, ctx);
|
||||
}
|
||||
|
||||
/* return true if hw queue need to be run again */
|
||||
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
|
||||
bool did_work = false;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
/* RCU or SRCU read lock is needed before checking quiesced flag */
|
||||
@@ -122,29 +205,34 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
* scheduler, we can no longer merge or sort them. So it's best to
|
||||
* leave them there for as long as we can. Mark the hw queue as
|
||||
* needing a restart in that case.
|
||||
*
|
||||
* We want to dispatch from the scheduler if there was nothing
|
||||
* on the dispatch list or we were able to dispatch from the
|
||||
* dispatch list.
|
||||
*/
|
||||
if (!list_empty(&rq_list)) {
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
did_work = blk_mq_dispatch_rq_list(q, &rq_list);
|
||||
} else if (!has_sched_dispatch) {
|
||||
if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
|
||||
if (has_sched_dispatch)
|
||||
blk_mq_do_dispatch_sched(hctx);
|
||||
else
|
||||
blk_mq_do_dispatch_ctx(hctx);
|
||||
}
|
||||
} else if (has_sched_dispatch) {
|
||||
blk_mq_do_dispatch_sched(hctx);
|
||||
} else if (q->mq_ops->get_budget) {
|
||||
/*
|
||||
* If we need to get budget before queuing request, we
|
||||
* dequeue request one by one from sw queue for avoiding
|
||||
* to mess up I/O merge when dispatch runs out of resource.
|
||||
*
|
||||
* TODO: get more budgets, and dequeue more requests in
|
||||
* one time.
|
||||
*/
|
||||
blk_mq_do_dispatch_ctx(hctx);
|
||||
} else {
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
blk_mq_dispatch_rq_list(q, &rq_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to dispatch from the scheduler if we had no work left
|
||||
* on the dispatch list, OR if we did have work but weren't able
|
||||
* to make progress.
|
||||
*/
|
||||
if (!did_work && has_sched_dispatch) {
|
||||
do {
|
||||
struct request *rq;
|
||||
|
||||
rq = e->type->ops.mq.dispatch_request(hctx);
|
||||
if (!rq)
|
||||
break;
|
||||
list_add(&rq->queuelist, &rq_list);
|
||||
} while (blk_mq_dispatch_rq_list(q, &rq_list));
|
||||
blk_mq_dispatch_rq_list(q, &rq_list, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,21 +348,21 @@ void blk_mq_sched_request_inserted(struct request *rq)
|
||||
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
|
||||
|
||||
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
|
||||
bool has_sched,
|
||||
struct request *rq)
|
||||
{
|
||||
if (rq->tag == -1) {
|
||||
rq->rq_flags |= RQF_SORTED;
|
||||
return false;
|
||||
/* dispatch flush rq directly */
|
||||
if (rq->rq_flags & RQF_FLUSH_SEQ) {
|
||||
spin_lock(&hctx->lock);
|
||||
list_add(&rq->queuelist, &hctx->dispatch);
|
||||
spin_unlock(&hctx->lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we already have a real request tag, send directly to
|
||||
* the dispatch list.
|
||||
*/
|
||||
spin_lock(&hctx->lock);
|
||||
list_add(&rq->queuelist, &hctx->dispatch);
|
||||
spin_unlock(&hctx->lock);
|
||||
return true;
|
||||
if (has_sched)
|
||||
rq->rq_flags |= RQF_SORTED;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -339,21 +427,6 @@ done:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Add flush/fua to the queue. If we fail getting a driver tag, then
|
||||
* punt to the requeue list. Requeue will re-invoke us from a context
|
||||
* that's safe to block from.
|
||||
*/
|
||||
static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq, bool can_block)
|
||||
{
|
||||
if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
|
||||
blk_insert_flush(rq);
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
} else
|
||||
blk_mq_add_to_requeue_list(rq, false, true);
|
||||
}
|
||||
|
||||
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
bool run_queue, bool async, bool can_block)
|
||||
{
|
||||
@@ -362,12 +435,15 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
|
||||
blk_mq_sched_insert_flush(hctx, rq, can_block);
|
||||
return;
|
||||
/* flush rq in flush machinery need to be dispatched directly */
|
||||
if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
|
||||
blk_insert_flush(rq);
|
||||
goto run;
|
||||
}
|
||||
|
||||
if (e && blk_mq_sched_bypass_insert(hctx, rq))
|
||||
WARN_ON(e && (rq->tag != -1));
|
||||
|
||||
if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
|
||||
goto run;
|
||||
|
||||
if (e && e->type->ops.mq.insert_requests) {
|
||||
@@ -393,23 +469,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
struct elevator_queue *e = hctx->queue->elevator;
|
||||
|
||||
if (e) {
|
||||
struct request *rq, *next;
|
||||
|
||||
/*
|
||||
* We bypass requests that already have a driver tag assigned,
|
||||
* which should only be flushes. Flushes are only ever inserted
|
||||
* as single requests, so we shouldn't ever hit the
|
||||
* WARN_ON_ONCE() below (but let's handle it just in case).
|
||||
*/
|
||||
list_for_each_entry_safe(rq, next, list, queuelist) {
|
||||
if (WARN_ON_ONCE(rq->tag != -1)) {
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_sched_bypass_insert(hctx, rq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (e && e->type->ops.mq.insert_requests)
|
||||
e->type->ops.mq.insert_requests(hctx, list, false);
|
||||
else
|
||||
|
||||
+5
-6
@@ -298,12 +298,12 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
|
||||
|
||||
int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
|
||||
int (reinit_request)(void *, struct request *))
|
||||
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
|
||||
int (fn)(void *, struct request *))
|
||||
{
|
||||
int i, j, ret = 0;
|
||||
|
||||
if (WARN_ON_ONCE(!reinit_request))
|
||||
if (WARN_ON_ONCE(!fn))
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
@@ -316,8 +316,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
|
||||
if (!tags->static_rqs[j])
|
||||
continue;
|
||||
|
||||
ret = reinit_request(set->driver_data,
|
||||
tags->static_rqs[j]);
|
||||
ret = fn(data, tags->static_rqs[j]);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@@ -326,7 +325,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
|
||||
EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);
|
||||
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
||||
void *priv)
|
||||
|
||||
+1
-6
@@ -44,14 +44,9 @@ static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
|
||||
return sbq_wait_ptr(bt, &hctx->wait_index);
|
||||
}
|
||||
|
||||
enum {
|
||||
BLK_MQ_TAG_CACHE_MIN = 1,
|
||||
BLK_MQ_TAG_CACHE_MAX = 64,
|
||||
};
|
||||
|
||||
enum {
|
||||
BLK_MQ_TAG_FAIL = -1U,
|
||||
BLK_MQ_TAG_MIN = BLK_MQ_TAG_CACHE_MIN,
|
||||
BLK_MQ_TAG_MIN = 1,
|
||||
BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1,
|
||||
};
|
||||
|
||||
|
||||
+250
-174
File diff suppressed because it is too large
Load Diff
+55
-5
@@ -3,6 +3,7 @@
|
||||
#define INT_BLK_MQ_H
|
||||
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
struct blk_mq_tag_set;
|
||||
|
||||
@@ -26,16 +27,16 @@ struct blk_mq_ctx {
|
||||
struct kobject kobj;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
|
||||
void blk_mq_freeze_queue(struct request_queue *q);
|
||||
void blk_mq_free_queue(struct request_queue *q);
|
||||
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
|
||||
void blk_mq_wake_waiters(struct request_queue *q);
|
||||
bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *);
|
||||
bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
|
||||
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
|
||||
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
|
||||
bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
|
||||
bool wait);
|
||||
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *start);
|
||||
|
||||
/*
|
||||
* Internal helpers for allocating/freeing the request map
|
||||
@@ -55,7 +56,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
*/
|
||||
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head);
|
||||
void blk_mq_request_bypass_insert(struct request *rq);
|
||||
void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
|
||||
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
struct list_head *list);
|
||||
|
||||
@@ -109,7 +110,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
struct blk_mq_alloc_data {
|
||||
/* input parameter */
|
||||
struct request_queue *q;
|
||||
unsigned int flags;
|
||||
blk_mq_req_flags_t flags;
|
||||
unsigned int shallow_depth;
|
||||
|
||||
/* input & output parameter */
|
||||
@@ -138,4 +139,53 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
|
||||
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
|
||||
unsigned int inflight[2]);
|
||||
|
||||
static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (q->mq_ops->put_budget)
|
||||
q->mq_ops->put_budget(hctx);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (q->mq_ops->get_budget)
|
||||
return q->mq_ops->get_budget(hctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
|
||||
rq->tag = -1;
|
||||
|
||||
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
|
||||
rq->rq_flags &= ~RQF_MQ_INFLIGHT;
|
||||
atomic_dec(&hctx->nr_active);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
if (rq->tag == -1 || rq->internal_tag == -1)
|
||||
return;
|
||||
|
||||
__blk_mq_put_driver_tag(hctx, rq);
|
||||
}
|
||||
|
||||
static inline void blk_mq_put_driver_tag(struct request *rq)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
if (rq->tag == -1 || rq->internal_tag == -1)
|
||||
return;
|
||||
|
||||
hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
|
||||
__blk_mq_put_driver_tag(hctx, rq);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
* Caveat:
|
||||
* The driver that does this *must* be able to deal appropriately
|
||||
* with buffers in "highmemory". This can be accomplished by either calling
|
||||
* __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
|
||||
* kmap_atomic() to get a temporary kernel mapping, or by calling
|
||||
* blk_queue_bounce() to create a buffer in normal memory.
|
||||
**/
|
||||
void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
|
||||
|
||||
+7
-38
@@ -11,8 +11,6 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk.h"
|
||||
|
||||
#define BLK_RQ_STAT_BATCH 64
|
||||
|
||||
struct blk_queue_stats {
|
||||
struct list_head callbacks;
|
||||
spinlock_t lock;
|
||||
@@ -23,45 +21,21 @@ static void blk_stat_init(struct blk_rq_stat *stat)
|
||||
{
|
||||
stat->min = -1ULL;
|
||||
stat->max = stat->nr_samples = stat->mean = 0;
|
||||
stat->batch = stat->nr_batch = 0;
|
||||
}
|
||||
|
||||
static void blk_stat_flush_batch(struct blk_rq_stat *stat)
|
||||
{
|
||||
const s32 nr_batch = READ_ONCE(stat->nr_batch);
|
||||
const s32 nr_samples = READ_ONCE(stat->nr_samples);
|
||||
|
||||
if (!nr_batch)
|
||||
return;
|
||||
if (!nr_samples)
|
||||
stat->mean = div64_s64(stat->batch, nr_batch);
|
||||
else {
|
||||
stat->mean = div64_s64((stat->mean * nr_samples) +
|
||||
stat->batch,
|
||||
nr_batch + nr_samples);
|
||||
}
|
||||
|
||||
stat->nr_samples += nr_batch;
|
||||
stat->nr_batch = stat->batch = 0;
|
||||
stat->batch = 0;
|
||||
}
|
||||
|
||||
/* src is a per-cpu stat, mean isn't initialized */
|
||||
static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
|
||||
{
|
||||
blk_stat_flush_batch(src);
|
||||
|
||||
if (!src->nr_samples)
|
||||
return;
|
||||
|
||||
dst->min = min(dst->min, src->min);
|
||||
dst->max = max(dst->max, src->max);
|
||||
|
||||
if (!dst->nr_samples)
|
||||
dst->mean = src->mean;
|
||||
else {
|
||||
dst->mean = div64_s64((src->mean * src->nr_samples) +
|
||||
(dst->mean * dst->nr_samples),
|
||||
dst->nr_samples + src->nr_samples);
|
||||
}
|
||||
dst->mean = div_u64(src->batch + dst->mean * dst->nr_samples,
|
||||
dst->nr_samples + src->nr_samples);
|
||||
|
||||
dst->nr_samples += src->nr_samples;
|
||||
}
|
||||
|
||||
@@ -69,13 +43,8 @@ static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
|
||||
{
|
||||
stat->min = min(stat->min, value);
|
||||
stat->max = max(stat->max, value);
|
||||
|
||||
if (stat->batch + value < stat->batch ||
|
||||
stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
|
||||
blk_stat_flush_batch(stat);
|
||||
|
||||
stat->batch += value;
|
||||
stat->nr_batch++;
|
||||
stat->nr_samples++;
|
||||
}
|
||||
|
||||
void blk_stat_add(struct request *rq)
|
||||
@@ -84,7 +53,7 @@ void blk_stat_add(struct request *rq)
|
||||
struct blk_stat_callback *cb;
|
||||
struct blk_rq_stat *stat;
|
||||
int bucket;
|
||||
s64 now, value;
|
||||
u64 now, value;
|
||||
|
||||
now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
||||
if (now < blk_stat_time(&rq->issue_stat))
|
||||
|
||||
+10
-2
@@ -2113,8 +2113,12 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
||||
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (bio->bi_css)
|
||||
if (bio->bi_css) {
|
||||
if (bio->bi_cg_private)
|
||||
blkg_put(tg_to_blkg(bio->bi_cg_private));
|
||||
bio->bi_cg_private = tg;
|
||||
blkg_get(tg_to_blkg(tg));
|
||||
}
|
||||
blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
|
||||
#endif
|
||||
}
|
||||
@@ -2284,8 +2288,10 @@ void blk_throtl_bio_endio(struct bio *bio)
|
||||
|
||||
start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
|
||||
finish_time = __blk_stat_time(finish_time_ns) >> 10;
|
||||
if (!start_time || finish_time <= start_time)
|
||||
if (!start_time || finish_time <= start_time) {
|
||||
blkg_put(tg_to_blkg(tg));
|
||||
return;
|
||||
}
|
||||
|
||||
lat = finish_time - start_time;
|
||||
/* this is only for bio based driver */
|
||||
@@ -2315,6 +2321,8 @@ void blk_throtl_bio_endio(struct bio *bio)
|
||||
tg->bio_cnt /= 2;
|
||||
tg->bad_bio_cnt /= 2;
|
||||
}
|
||||
|
||||
blkg_put(tg_to_blkg(tg));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
+1
-4
@@ -134,8 +134,6 @@ void blk_timeout_work(struct work_struct *work)
|
||||
struct request *rq, *tmp;
|
||||
int next_set = 0;
|
||||
|
||||
if (blk_queue_enter(q, true))
|
||||
return;
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
|
||||
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
|
||||
@@ -145,7 +143,6 @@ void blk_timeout_work(struct work_struct *work)
|
||||
mod_timer(&q->timeout, round_jiffies_up(next));
|
||||
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -211,7 +208,7 @@ void blk_add_timer(struct request *req)
|
||||
if (!req->timeout)
|
||||
req->timeout = q->rq_timeout;
|
||||
|
||||
req->deadline = jiffies + req->timeout;
|
||||
WRITE_ONCE(req->deadline, jiffies + req->timeout);
|
||||
|
||||
/*
|
||||
* Only the non-mq case needs to add the request to a protected list.
|
||||
|
||||
+1
-1
@@ -654,7 +654,7 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable wbt, if enabled by default. Only called from CFQ.
|
||||
* Disable wbt, if enabled by default.
|
||||
*/
|
||||
void wbt_disable_default(struct request_queue *q)
|
||||
{
|
||||
|
||||
+7
-39
@@ -123,8 +123,15 @@ void blk_account_io_done(struct request *req);
|
||||
* Internal atomic flags for request handling
|
||||
*/
|
||||
enum rq_atomic_flags {
|
||||
/*
|
||||
* Keep these two bits first - not because we depend on the
|
||||
* value of them, but we do depend on them being in the same
|
||||
* byte of storage to ensure ordering on writes. Keeping them
|
||||
* first will achieve that nicely.
|
||||
*/
|
||||
REQ_ATOM_COMPLETE = 0,
|
||||
REQ_ATOM_STARTED,
|
||||
|
||||
REQ_ATOM_POLL_SLEPT,
|
||||
};
|
||||
|
||||
@@ -149,45 +156,6 @@ static inline void blk_clear_rq_complete(struct request *rq)
|
||||
|
||||
void blk_insert_flush(struct request *rq);
|
||||
|
||||
static inline struct request *__elv_next_request(struct request_queue *q)
|
||||
{
|
||||
struct request *rq;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
|
||||
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
while (1) {
|
||||
if (!list_empty(&q->queue_head)) {
|
||||
rq = list_entry_rq(q->queue_head.next);
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush request is running and flush request isn't queueable
|
||||
* in the drive, we can hold the queue till flush request is
|
||||
* finished. Even we don't do this, driver can't dispatch next
|
||||
* requests and will requeue them. And this can improve
|
||||
* throughput too. For example, we have request flush1, write1,
|
||||
* flush 2. flush1 is dispatched, then queue is hold, write1
|
||||
* isn't inserted to queue. After flush1 is finished, flush2
|
||||
* will be dispatched. Since disk cache is already clean,
|
||||
* flush2 will be finished very soon, so looks like flush2 is
|
||||
* folded to flush1.
|
||||
* Since the queue is hold, a flag is set to indicate the queue
|
||||
* should be restarted later. Please see flush_end_io() for
|
||||
* details.
|
||||
*/
|
||||
if (fq->flush_pending_idx != fq->flush_running_idx &&
|
||||
!queue_flush_queueable(q)) {
|
||||
fq->flush_queue_delayed = 1;
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(blk_queue_bypass(q)) ||
|
||||
!q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
+8
-10
@@ -137,7 +137,7 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
|
||||
|
||||
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
|
||||
struct sg_io_v4 *hdr, struct bsg_device *bd,
|
||||
fmode_t has_write_perm)
|
||||
fmode_t mode)
|
||||
{
|
||||
struct scsi_request *req = scsi_req(rq);
|
||||
|
||||
@@ -152,7 +152,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
|
||||
return -EFAULT;
|
||||
|
||||
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
|
||||
if (blk_verify_command(req->cmd, has_write_perm))
|
||||
if (blk_verify_command(req->cmd, mode))
|
||||
return -EPERM;
|
||||
} else if (!capable(CAP_SYS_RAWIO))
|
||||
return -EPERM;
|
||||
@@ -206,7 +206,7 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
|
||||
* map sg_io_v4 to a request.
|
||||
*/
|
||||
static struct request *
|
||||
bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
|
||||
bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
|
||||
{
|
||||
struct request_queue *q = bd->queue;
|
||||
struct request *rq, *next_rq = NULL;
|
||||
@@ -237,7 +237,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
|
||||
if (IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
|
||||
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, mode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -587,8 +587,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
|
||||
}
|
||||
|
||||
static int __bsg_write(struct bsg_device *bd, const char __user *buf,
|
||||
size_t count, ssize_t *bytes_written,
|
||||
fmode_t has_write_perm)
|
||||
size_t count, ssize_t *bytes_written, fmode_t mode)
|
||||
{
|
||||
struct bsg_command *bc;
|
||||
struct request *rq;
|
||||
@@ -619,7 +618,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
|
||||
/*
|
||||
* get a request, fill in the blanks, and add to request queue
|
||||
*/
|
||||
rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
|
||||
rq = bsg_map_hdr(bd, &bc->hdr, mode);
|
||||
if (IS_ERR(rq)) {
|
||||
ret = PTR_ERR(rq);
|
||||
rq = NULL;
|
||||
@@ -655,8 +654,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
|
||||
bsg_set_block(bd, file);
|
||||
|
||||
bytes_written = 0;
|
||||
ret = __bsg_write(bd, buf, count, &bytes_written,
|
||||
file->f_mode & FMODE_WRITE);
|
||||
ret = __bsg_write(bd, buf, count, &bytes_written, file->f_mode);
|
||||
|
||||
*ppos = bytes_written;
|
||||
|
||||
@@ -915,7 +913,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
if (copy_from_user(&hdr, uarg, sizeof(hdr)))
|
||||
return -EFAULT;
|
||||
|
||||
rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
|
||||
rq = bsg_map_hdr(bd, &hdr, file->f_mode);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user