Merge branch 'for-4.14/block' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:
 "This is the first pull request for 4.14, containing most of the code
  changes. It's a quiet series this round, which I think we needed after
  the churn of the last few series. This contains:

   - Fix for a registration race in loop, from Anton Volkov.

   - Overflow complaint fix from Arnd for DAC960.

   - Series of drbd changes from the usual suspects.

   - Conversion of the stec/skd driver to blk-mq. From Bart.

   - A few BFQ improvements/fixes from Paolo.

   - CFQ improvement from Ritesh, allowing idling for group idle.

   - A few fixes found by Dan's smatch, courtesy of Dan.

   - A warning fixup for a race between changing the IO scheduler and
     device remova. From David Jeffery.

   - A few nbd fixes from Josef.

   - Support for cgroup info in blktrace, from Shaohua.

   - Also from Shaohua, new features in the null_blk driver to allow it
     to actually hold data, among other things.

   - Various corner cases and error handling fixes from Weiping Zhang.

   - Improvements to the IO stats tracking for blk-mq from me. Can
     drastically improve performance for fast devices and/or big
     machines.

   - Series from Christoph removing bi_bdev as being needed for IO
     submission, in preparation for nvme multipathing code.

   - Series from Bart, including various cleanups and fixes for switch
     fall through case complaints"

* 'for-4.14/block' of git://git.kernel.dk/linux-block: (162 commits)
  kernfs: checking for IS_ERR() instead of NULL
  drbd: remove BIOSET_NEED_RESCUER flag from drbd_{md_,}io_bio_set
  drbd: Fix allyesconfig build, fix recent commit
  drbd: switch from kmalloc() to kmalloc_array()
  drbd: abort drbd_start_resync if there is no connection
  drbd: move global variables to drbd namespace and make some static
  drbd: rename "usermode_helper" to "drbd_usermode_helper"
  drbd: fix race between handshake and admin disconnect/down
  drbd: fix potential deadlock when trying to detach during handshake
  drbd: A single dot should be put into a sequence.
  drbd: fix rmmod cleanup, remove _all_ debugfs entries
  drbd: Use setup_timer() instead of init_timer() to simplify the code.
  drbd: fix potential get_ldev/put_ldev refcount imbalance during attach
  drbd: new disk-option disable-write-same
  drbd: Fix resource role for newly created resources in events2
  drbd: mark symbols static where possible
  drbd: Send P_NEG_ACK upon write error in protocol != C
  drbd: add explicit plugging when submitting batches
  drbd: change list_for_each_safe to while(list_first_entry_or_null)
  drbd: introduce drbd_recv_header_maybe_unplug
  ...
This commit is contained in:
Linus Torvalds
2017-09-07 11:59:42 -07:00
160 changed files with 3721 additions and 3523 deletions
+6
View File
@@ -12561,6 +12561,12 @@ M: Ion Badulescu <ionut@badula.org>
S: Odd Fixes
F: drivers/net/ethernet/adaptec/starfire*
STEC S1220 SKD DRIVER
M: Bart Van Assche <bart.vanassche@wdc.com>
L: linux-block@vger.kernel.org
S: Maintained
F: drivers/block/skd*[ch]
STI CEC DRIVER
M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
S: Maintained
+1 -1
View File
@@ -110,7 +110,7 @@ axon_ram_irq_handler(int irq, void *dev)
static blk_qc_t
axon_ram_make_request(struct request_queue *queue, struct bio *bio)
{
struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
struct axon_ram_bank *bank = bio->bi_disk->private_data;
unsigned long phys_mem, phys_end;
void *user_mem;
struct bio_vec vec;
+100 -82
View File
@@ -128,7 +128,7 @@ BFQ_BFQQ_FNS(busy);
BFQ_BFQQ_FNS(wait_request);
BFQ_BFQQ_FNS(non_blocking_wait_rq);
BFQ_BFQQ_FNS(fifo_expire);
BFQ_BFQQ_FNS(idle_window);
BFQ_BFQQ_FNS(has_short_ttime);
BFQ_BFQQ_FNS(sync);
BFQ_BFQQ_FNS(IO_bound);
BFQ_BFQQ_FNS(in_large_burst);
@@ -731,10 +731,10 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
unsigned int old_wr_coeff = bfqq->wr_coeff;
bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
if (bic->saved_idle_window)
bfq_mark_bfqq_idle_window(bfqq);
if (bic->saved_has_short_ttime)
bfq_mark_bfqq_has_short_ttime(bfqq);
else
bfq_clear_bfqq_idle_window(bfqq);
bfq_clear_bfqq_has_short_ttime(bfqq);
if (bic->saved_IO_bound)
bfq_mark_bfqq_IO_bound(bfqq);
@@ -2012,7 +2012,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
return;
bic->saved_ttime = bfqq->ttime;
bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
@@ -3038,8 +3038,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
}
bfq_log_bfqq(bfqd, bfqq,
"expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
"expire (%d, slow %d, num_disp %d, short_ttime %d)", reason,
slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq));
/*
* Increase, decrease or leave budget unchanged according to
@@ -3114,35 +3114,56 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
bool idling_boosts_thr, idling_boosts_thr_without_issues,
bool rot_without_queueing =
!blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag,
bfqq_sequential_and_IO_bound,
idling_boosts_thr, idling_boosts_thr_without_issues,
idling_needed_for_service_guarantees,
asymmetric_scenario;
if (bfqd->strict_guarantees)
return true;
/*
* Idling is performed only if slice_idle > 0. In addition, we
* do not idle if
* (a) bfqq is async
* (b) bfqq is in the idle io prio class: in this case we do
* not idle because we want to minimize the bandwidth that
* queues in this class can steal to higher-priority queues
*/
if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
bfq_class_idle(bfqq))
return false;
bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
/*
* The next variable takes into account the cases where idling
* boosts the throughput.
*
* The value of the variable is computed considering, first, that
* idling is virtually always beneficial for the throughput if:
* (a) the device is not NCQ-capable, or
* (b) regardless of the presence of NCQ, the device is rotational
* and the request pattern for bfqq is I/O-bound and sequential.
* (a) the device is not NCQ-capable and rotational, or
* (b) regardless of the presence of NCQ, the device is rotational and
* the request pattern for bfqq is I/O-bound and sequential, or
* (c) regardless of whether it is rotational, the device is
* not NCQ-capable and the request pattern for bfqq is
* I/O-bound and sequential.
*
* Secondly, and in contrast to the above item (b), idling an
* NCQ-capable flash-based device would not boost the
* throughput even with sequential I/O; rather it would lower
* the throughput in proportion to how fast the device
* is. Accordingly, the next variable is true if any of the
* above conditions (a) and (b) is true, and, in particular,
* happens to be false if bfqd is an NCQ-capable flash-based
* device.
* above conditions (a), (b) or (c) is true, and, in
* particular, happens to be false if bfqd is an NCQ-capable
* flash-based device.
*/
idling_boosts_thr = !bfqd->hw_tag ||
(!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
bfq_bfqq_idle_window(bfqq));
idling_boosts_thr = rot_without_queueing ||
((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) &&
bfqq_sequential_and_IO_bound);
/*
* The value of the next variable,
@@ -3313,16 +3334,13 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
/*
* We have now all the components we need to compute the return
* value of the function, which is true only if both the following
* conditions hold:
* 1) bfqq is sync, because idling make sense only for sync queues;
* 2) idling either boosts the throughput (without issues), or
* is necessary to preserve service guarantees.
* We have now all the components we need to compute the
* return value of the function, which is true only if idling
* either boosts the throughput (without issues), or is
* necessary to preserve service guarantees.
*/
return bfq_bfqq_sync(bfqq) &&
(idling_boosts_thr_without_issues ||
idling_needed_for_service_guarantees);
return idling_boosts_thr_without_issues ||
idling_needed_for_service_guarantees;
}
/*
@@ -3338,10 +3356,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
*/
static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
bfq_bfqq_may_idle(bfqq);
return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq);
}
/*
@@ -3783,7 +3798,6 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
case IOPRIO_CLASS_IDLE:
bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
bfqq->new_ioprio = 7;
bfq_clear_bfqq_idle_window(bfqq);
break;
}
@@ -3843,8 +3857,14 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_set_next_ioprio_data(bfqq, bic);
if (is_sync) {
/*
* No need to mark as has_short_ttime if in
* idle_class, because no device idling is performed
* for queues in idle class
*/
if (!bfq_class_idle(bfqq))
bfq_mark_bfqq_idle_window(bfqq);
/* tentatively mark as has_short_ttime */
bfq_mark_bfqq_has_short_ttime(bfqq);
bfq_mark_bfqq_sync(bfqq);
bfq_mark_bfqq_just_created(bfqq);
} else
@@ -3985,18 +4005,19 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
}
/*
* Disable idle window if the process thinks too long or seeks so much that
* it doesn't matter.
*/
static void bfq_update_idle_window(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
struct bfq_io_cq *bic)
static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
struct bfq_io_cq *bic)
{
int enable_idle;
bool has_short_ttime = true;
/* Don't idle for async or idle io prio class. */
if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
/*
* No need to update has_short_ttime if bfqq is async or in
* idle io prio class, or if bfq_slice_idle is zero, because
* no device idling is performed for bfqq in this case.
*/
if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq) ||
bfqd->bfq_slice_idle == 0)
return;
/* Idle window just restored, statistics are meaningless. */
@@ -4004,27 +4025,22 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
bfqd->bfq_wr_min_idle_time))
return;
enable_idle = bfq_bfqq_idle_window(bfqq);
/* Think time is infinite if no process is linked to
* bfqq. Otherwise check average think time to
* decide whether to mark as has_short_ttime
*/
if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
bfqd->bfq_slice_idle == 0 ||
(bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
bfqq->wr_coeff == 1))
enable_idle = 0;
else if (bfq_sample_valid(bfqq->ttime.ttime_samples)) {
if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle &&
bfqq->wr_coeff == 1)
enable_idle = 0;
else
enable_idle = 1;
}
bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
enable_idle);
(bfq_sample_valid(bfqq->ttime.ttime_samples) &&
bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle))
has_short_ttime = false;
if (enable_idle)
bfq_mark_bfqq_idle_window(bfqq);
bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d",
has_short_ttime);
if (has_short_ttime)
bfq_mark_bfqq_has_short_ttime(bfqq);
else
bfq_clear_bfqq_idle_window(bfqq);
bfq_clear_bfqq_has_short_ttime(bfqq);
}
/*
@@ -4040,14 +4056,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->meta_pending++;
bfq_update_io_thinktime(bfqd, bfqq);
bfq_update_has_short_ttime(bfqd, bfqq, bic);
bfq_update_io_seektime(bfqd, bfqq, rq);
if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
!BFQQ_SEEKY(bfqq))
bfq_update_idle_window(bfqd, bfqq, bic);
bfq_log_bfqq(bfqd, bfqq,
"rq_enqueued: idle_window=%d (seeky %d)",
bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq));
"rq_enqueued: has_short_ttime=%d (seeky %d)",
bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq));
bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
@@ -4787,16 +4801,13 @@ static ssize_t bfq_var_show(unsigned int var, char *page)
return sprintf(page, "%u\n", var);
}
static ssize_t bfq_var_store(unsigned long *var, const char *page,
size_t count)
static void bfq_var_store(unsigned long *var, const char *page)
{
unsigned long new_val;
int ret = kstrtoul(page, 10, &new_val);
if (ret == 0)
*var = new_val;
return count;
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
@@ -4838,7 +4849,7 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct bfq_data *bfqd = e->elevator_data; \
unsigned long uninitialized_var(__data); \
int ret = bfq_var_store(&__data, (page), count); \
bfq_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
@@ -4849,7 +4860,7 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \
*(__PTR) = (u64)__data * NSEC_PER_MSEC; \
else \
*(__PTR) = __data; \
return ret; \
return count; \
}
STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
INT_MAX, 2);
@@ -4866,13 +4877,13 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
{ \
struct bfq_data *bfqd = e->elevator_data; \
unsigned long uninitialized_var(__data); \
int ret = bfq_var_store(&__data, (page), count); \
bfq_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
__data = (MAX); \
*(__PTR) = (u64)__data * NSEC_PER_USEC; \
return ret; \
return count; \
}
USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0,
UINT_MAX);
@@ -4883,7 +4894,8 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
int ret = bfq_var_store(&__data, (page), count);
bfq_var_store(&__data, (page));
if (__data == 0)
bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
@@ -4895,7 +4907,7 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
bfqd->bfq_user_max_budget = __data;
return ret;
return count;
}
/*
@@ -4907,7 +4919,8 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
int ret = bfq_var_store(&__data, (page), count);
bfq_var_store(&__data, (page));
if (__data < 1)
__data = 1;
@@ -4918,7 +4931,7 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
if (bfqd->bfq_user_max_budget == 0)
bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
return ret;
return count;
}
static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
@@ -4926,7 +4939,8 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
int ret = bfq_var_store(&__data, (page), count);
bfq_var_store(&__data, (page));
if (__data > 1)
__data = 1;
@@ -4936,7 +4950,7 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
bfqd->strict_guarantees = __data;
return ret;
return count;
}
static ssize_t bfq_low_latency_store(struct elevator_queue *e,
@@ -4944,7 +4958,8 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
int ret = bfq_var_store(&__data, (page), count);
bfq_var_store(&__data, (page));
if (__data > 1)
__data = 1;
@@ -4952,7 +4967,7 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
bfq_end_wr(bfqd);
bfqd->low_latency = __data;
return ret;
return count;
}
#define BFQ_ATTR(name) \
@@ -4998,6 +5013,7 @@ static struct elevator_type iosched_bfq_mq = {
.elevator_name = "bfq",
.elevator_owner = THIS_MODULE,
};
MODULE_ALIAS("bfq-iosched");
static int __init bfq_init(void)
{
@@ -5048,10 +5064,12 @@ static int __init bfq_init(void)
ret = elv_register(&iosched_bfq_mq);
if (ret)
goto err_pol_unreg;
goto slab_kill;
return 0;
slab_kill:
bfq_slab_kill();
err_pol_unreg:
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_bfq);
+14 -11
View File
@@ -360,11 +360,11 @@ struct bfq_io_cq {
uint64_t blkcg_serial_nr; /* the current blkcg serial */
#endif
/*
* Snapshot of the idle window before merging; taken to
* remember this value while the queue is merged, so as to be
* able to restore it in case of split.
* Snapshot of the has_short_time flag before merging; taken
* to remember its value while the queue is merged, so as to
* be able to restore it in case of split.
*/
bool saved_idle_window;
bool saved_has_short_ttime;
/*
* Same purpose as the previous two fields for the I/O bound
* classification of a queue.
@@ -638,7 +638,7 @@ enum bfqq_state_flags {
* without idling the device
*/
BFQQF_fifo_expire, /* FIFO checked in this slice */
BFQQF_idle_window, /* slice idling enabled */
BFQQF_has_short_ttime, /* queue has a short think time */
BFQQF_sync, /* synchronous queue */
BFQQF_IO_bound, /*
* bfqq has timed-out at least once
@@ -667,7 +667,7 @@ BFQ_BFQQ_FNS(busy);
BFQ_BFQQ_FNS(wait_request);
BFQ_BFQQ_FNS(non_blocking_wait_rq);
BFQ_BFQQ_FNS(fifo_expire);
BFQ_BFQQ_FNS(idle_window);
BFQ_BFQQ_FNS(has_short_ttime);
BFQ_BFQQ_FNS(sync);
BFQ_BFQQ_FNS(IO_bound);
BFQ_BFQQ_FNS(in_large_burst);
@@ -929,13 +929,16 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid,\
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
bfqq_group(bfqq)->blkg_path, ##args); \
blk_add_cgroup_trace_msg((bfqd)->queue, \
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
"bfq%d%c " fmt, (bfqq)->pid, \
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \
} while (0)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) \
blk_add_trace_msg((bfqd)->queue, "%s " fmt, (bfqg)->blkg_path, ##args)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
blk_add_cgroup_trace_msg((bfqd)->queue, \
bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
} while (0)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
+12 -14
View File
@@ -146,7 +146,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
iv = bip->bip_vec + bip->bip_vcnt;
if (bip->bip_vcnt &&
bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
bvec_gap_to_prev(bio->bi_disk->queue,
&bip->bip_vec[bip->bip_vcnt - 1], offset))
return 0;
@@ -190,7 +190,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
static blk_status_t bio_integrity_process(struct bio *bio,
struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct blk_integrity_iter iter;
struct bvec_iter bviter;
struct bio_vec bv;
@@ -199,7 +199,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
void *prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
iter.disk_name = bio->bi_disk->disk_name;
iter.interval = 1 << bi->interval_exp;
iter.seed = proc_iter->bi_sector;
iter.prot_buf = prot_buf;
@@ -236,8 +236,8 @@ static blk_status_t bio_integrity_process(struct bio *bio,
bool bio_integrity_prep(struct bio *bio)
{
struct bio_integrity_payload *bip;
struct blk_integrity *bi;
struct request_queue *q;
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct request_queue *q = bio->bi_disk->queue;
void *buf;
unsigned long start, end;
unsigned int len, nr_pages;
@@ -245,8 +245,9 @@ bool bio_integrity_prep(struct bio *bio)
unsigned int intervals;
blk_status_t status;
bi = bdev_get_integrity(bio->bi_bdev);
q = bdev_get_queue(bio->bi_bdev);
if (!bi)
return true;
if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
return true;
@@ -257,9 +258,6 @@ bool bio_integrity_prep(struct bio *bio)
if (bio_integrity(bio))
return true;
if (bi == NULL)
return true;
if (bio_data_dir(bio) == READ) {
if (!bi->profile->verify_fn ||
!(bi->flags & BLK_INTEGRITY_VERIFY))
@@ -354,7 +352,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
struct bio_integrity_payload *bip =
container_of(work, struct bio_integrity_payload, bip_work);
struct bio *bio = bip->bip_bio;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct bvec_iter iter = bio->bi_iter;
/*
@@ -387,7 +385,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
@@ -413,7 +411,7 @@ bool __bio_integrity_endio(struct bio *bio)
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
bip->bip_iter.bi_sector += bytes_done >> 9;
@@ -430,7 +428,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
void bio_integrity_trim(struct bio *bio)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
}
+17 -13
View File
@@ -593,10 +593,10 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
/*
* most users will be overriding ->bi_bdev with a new target,
* most users will be overriding ->bi_disk with a new target,
* so we don't set nor calculate new physical/hw segment counts here
*/
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_disk = bio_src->bi_disk;
bio_set_flag(bio, BIO_CLONED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
@@ -681,7 +681,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
if (!bio)
return NULL;
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_disk = bio_src->bi_disk;
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
@@ -936,6 +936,10 @@ static void submit_bio_wait_endio(struct bio *bio)
*
* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
* bio_endio() on failure.
*
* WARNING: Unlike to how submit_bio() is usually used, this function does not
* result in bio reference to be consumed. The caller must drop the reference
* on his own.
*/
int submit_bio_wait(struct bio *bio)
{
@@ -1732,29 +1736,29 @@ void bio_check_pages_dirty(struct bio *bio)
}
}
void generic_start_io_acct(int rw, unsigned long sectors,
struct hd_struct *part)
void generic_start_io_acct(struct request_queue *q, int rw,
unsigned long sectors, struct hd_struct *part)
{
int cpu = part_stat_lock();
part_round_stats(cpu, part);
part_round_stats(q, cpu, part);
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, sectors[rw], sectors);
part_inc_in_flight(part, rw);
part_inc_in_flight(q, part, rw);
part_stat_unlock();
}
EXPORT_SYMBOL(generic_start_io_acct);
void generic_end_io_acct(int rw, struct hd_struct *part,
unsigned long start_time)
void generic_end_io_acct(struct request_queue *q, int rw,
struct hd_struct *part, unsigned long start_time)
{
unsigned long duration = jiffies - start_time;
int cpu = part_stat_lock();
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);
part_round_stats(q, cpu, part);
part_dec_in_flight(q, part, rw);
part_stat_unlock();
}
@@ -1826,8 +1830,8 @@ again:
goto again;
}
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_complete(bio->bi_disk->queue, bio,
blk_status_to_errno(bio->bi_status));
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
}
+5 -3
View File
@@ -1067,7 +1067,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
if (!blkcg) {
ret = ERR_PTR(-ENOMEM);
goto free_blkcg;
goto unlock;
}
}
@@ -1111,8 +1111,10 @@ free_pd_blkcg:
for (i--; i >= 0; i--)
if (blkcg->cpd[i])
blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
free_blkcg:
kfree(blkcg);
if (blkcg != &blkcg_root)
kfree(blkcg);
unlock:
mutex_unlock(&blkcg_pol_mutex);
return ret;
}
+81 -72
View File
@@ -280,7 +280,7 @@ EXPORT_SYMBOL(blk_start_queue_async);
void blk_start_queue(struct request_queue *q)
{
lockdep_assert_held(q->queue_lock);
WARN_ON(!irqs_disabled());
WARN_ON(!in_interrupt() && !irqs_disabled());
WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
@@ -1469,15 +1469,10 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
__elv_add_request(q, rq, where);
}
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
static void part_round_stats_single(struct request_queue *q, int cpu,
struct hd_struct *part, unsigned long now,
unsigned int inflight)
{
int inflight;
if (now == part->stamp)
return;
inflight = part_in_flight(part);
if (inflight) {
__part_stat_add(cpu, part, time_in_queue,
inflight * (now - part->stamp));
@@ -1488,6 +1483,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
/**
* part_round_stats() - Round off the performance stats on a struct disk_stats.
* @q: target block queue
* @cpu: cpu number for stats access
* @part: target partition
*
@@ -1502,13 +1498,31 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
* /proc/diskstats. This accounts immediately for all queue usage up to
* the current jiffies and restarts the counters again.
*/
void part_round_stats(int cpu, struct hd_struct *part)
void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
{
struct hd_struct *part2 = NULL;
unsigned long now = jiffies;
unsigned int inflight[2];
int stats = 0;
if (part->partno)
part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
part_round_stats_single(cpu, part, now);
if (part->stamp != now)
stats |= 1;
if (part->partno) {
part2 = &part_to_disk(part)->part0;
if (part2->stamp != now)
stats |= 2;
}
if (!stats)
return;
part_in_flight(q, part, inflight);
if (stats & 2)
part_round_stats_single(q, cpu, part2, now, inflight[1]);
if (stats & 1)
part_round_stats_single(q, cpu, part, now, inflight[0]);
}
EXPORT_SYMBOL_GPL(part_round_stats);
@@ -1896,40 +1910,15 @@ out_unlock:
return BLK_QC_T_NONE;
}
/*
* If bio->bi_dev is a partition, remap the location
*/
static inline void blk_partition_remap(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
*/
if (bdev != bdev->bd_contains &&
(bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)) {
struct hd_struct *p = bdev->bd_part;
bio->bi_iter.bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
bdev->bd_dev,
bio->bi_iter.bi_sector - p->start_sect);
}
}
static void handle_bad_sector(struct bio *bio)
{
char b[BDEVNAME_SIZE];
printk(KERN_INFO "attempt to access beyond end of device\n");
printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
bdevname(bio->bi_bdev, b),
bio->bi_opf,
bio_devname(bio, b), bio->bi_opf,
(unsigned long long)bio_end_sector(bio),
(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
(long long)get_capacity(bio->bi_disk));
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -1967,6 +1956,38 @@ static inline bool should_fail_request(struct hd_struct *part,
#endif /* CONFIG_FAIL_MAKE_REQUEST */
/*
* Remap block n of partition p to block n+start(p) of the disk.
*/
static inline int blk_partition_remap(struct bio *bio)
{
struct hd_struct *p;
int ret = 0;
/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
*/
if (!bio->bi_partno ||
(!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET))
return 0;
rcu_read_lock();
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) {
bio->bi_iter.bi_sector += p->start_sect;
bio->bi_partno = 0;
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
bio->bi_iter.bi_sector - p->start_sect);
} else {
printk("%s: fail for partition %d\n", __func__, bio->bi_partno);
ret = -EIO;
}
rcu_read_unlock();
return ret;
}
/*
* Check whether this bio extends beyond the end of the device.
*/
@@ -1978,7 +1999,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
return 0;
/* Test device or partition size, when known. */
maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
maxsector = get_capacity(bio->bi_disk);
if (maxsector) {
sector_t sector = bio->bi_iter.bi_sector;
@@ -2003,20 +2024,18 @@ generic_make_request_checks(struct bio *bio)
int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR;
char b[BDEVNAME_SIZE];
struct hd_struct *part;
might_sleep();
if (bio_check_eod(bio, nr_sectors))
goto end_io;
q = bdev_get_queue(bio->bi_bdev);
q = bio->bi_disk->queue;
if (unlikely(!q)) {
printk(KERN_ERR
"generic_make_request: Trying to access "
"nonexistent block-device %s (%Lu)\n",
bdevname(bio->bi_bdev, b),
(long long) bio->bi_iter.bi_sector);
bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
goto end_io;
}
@@ -2028,17 +2047,11 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
goto not_supported;
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(&part_to_disk(part)->part0,
bio->bi_iter.bi_size))
if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
goto end_io;
/*
* If this device has partitions, remap block n
* of partition p to block n+start(p) of the disk.
*/
blk_partition_remap(bio);
if (blk_partition_remap(bio))
goto end_io;
if (bio_check_eod(bio, nr_sectors))
goto end_io;
@@ -2067,16 +2080,16 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_WRITE_SAME:
if (!bdev_write_same(bio->bi_bdev))
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
case REQ_OP_ZONE_REPORT:
case REQ_OP_ZONE_RESET:
if (!bdev_is_zoned(bio->bi_bdev))
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
case REQ_OP_WRITE_ZEROES:
if (!bdev_write_zeroes_sectors(bio->bi_bdev))
if (!q->limits.max_write_zeroes_sectors)
goto not_supported;
break;
default:
@@ -2183,7 +2196,7 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct request_queue *q = bio->bi_disk->queue;
if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
@@ -2201,7 +2214,7 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_init(&lower);
bio_list_init(&same);
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
if (q == bio->bi_disk->queue)
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
@@ -2244,7 +2257,7 @@ blk_qc_t submit_bio(struct bio *bio)
unsigned int count;
if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
count = queue_logical_block_size(bio->bi_disk->queue);
else
count = bio_sectors(bio);
@@ -2261,8 +2274,7 @@ blk_qc_t submit_bio(struct bio *bio)
current->comm, task_pid_nr(current),
op_is_write(bio_op(bio)) ? "WRITE" : "READ",
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(bio->bi_bdev, b),
count);
bio_devname(bio, b), count);
}
}
@@ -2431,8 +2443,8 @@ void blk_account_io_done(struct request *req)
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);
part_round_stats(req->q, cpu, part);
part_dec_in_flight(req->q, part, rw);
hd_struct_put(part);
part_stat_unlock();
@@ -2489,8 +2501,8 @@ void blk_account_io_start(struct request *rq, bool new_io)
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
part_round_stats(rq->q, cpu, part);
part_inc_in_flight(rq->q, part, rw);
rq->part = part;
}
@@ -2603,7 +2615,7 @@ struct request *blk_peek_request(struct request_queue *q)
}
EXPORT_SYMBOL(blk_peek_request);
void blk_dequeue_request(struct request *rq)
static void blk_dequeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -2630,9 +2642,6 @@ void blk_dequeue_request(struct request *rq)
* Description:
* Dequeue @req and start timeout timer on it. This hands off the
* request to the driver.
*
* Block internal functions which don't want to start timer should
* call blk_dequeue_request().
*/
void blk_start_request(struct request *req)
{
@@ -3035,8 +3044,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
if (bio->bi_bdev)
rq->rq_disk = bio->bi_bdev->bd_disk;
if (bio->bi_disk)
rq->rq_disk = bio->bi_disk;
}
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+13 -13
View File
@@ -1,12 +1,12 @@
/*
* Functions to sequence FLUSH and FUA writes.
* Functions to sequence PREFLUSH and FUA writes.
*
* Copyright (C) 2011 Max Planck Institute for Gravitational Physics
* Copyright (C) 2011 Tejun Heo <tj@kernel.org>
*
* This file is released under the GPLv2.
*
* REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
* REQ_{PREFLUSH|FUA} requests are decomposed to sequences consisted of three
* optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
* properties and hardware capability.
*
@@ -16,9 +16,9 @@
* REQ_FUA means that the data must be on non-volatile media on request
* completion.
*
* If the device doesn't have writeback cache, FLUSH and FUA don't make any
* difference. The requests are either completed immediately if there's no
* data or executed as normal requests otherwise.
* If the device doesn't have writeback cache, PREFLUSH and FUA don't make any
* difference. The requests are either completed immediately if there's no data
* or executed as normal requests otherwise.
*
* If the device has writeback cache and supports FUA, REQ_PREFLUSH is
* translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
@@ -31,7 +31,7 @@
* fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a
* REQ_OP_FLUSH is issued and the pending_idx is toggled. When the flush
* completes, all the requests which were pending are proceeded to the next
* step. This allows arbitrary merging of different types of FLUSH/FUA
* step. This allows arbitrary merging of different types of PREFLUSH/FUA
* requests.
*
* Currently, the following conditions are used to determine when to issue
@@ -47,19 +47,19 @@
* C3. The second condition is ignored if there is a request which has
* waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid
* starvation in the unlikely case where there are continuous stream of
* FUA (without FLUSH) requests.
* FUA (without PREFLUSH) requests.
*
* For devices which support FUA, it isn't clear whether C2 (and thus C3)
* is beneficial.
*
* Note that a sequenced FLUSH/FUA request with DATA is completed twice.
* Note that a sequenced PREFLUSH/FUA request with DATA is completed twice.
* Once while executing DATA and again after the whole sequence is
* complete. The first completion updates the contained bio but doesn't
* finish it so that the bio submitter is notified only after the whole
* sequence is complete. This is implemented by testing RQF_FLUSH_SEQ in
* req_bio_endio().
*
* The above peculiarity requires that each FLUSH/FUA request has only one
* The above peculiarity requires that each PREFLUSH/FUA request has only one
* bio attached to it, which is guaranteed as they aren't allowed to be
* merged in the usual way.
*/
@@ -76,7 +76,7 @@
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
/* FLUSH/FUA sequences */
/* PREFLUSH/FUA sequences */
enum {
REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */
REQ_FSEQ_DATA = (1 << 1), /* data write in progress */
@@ -148,7 +148,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
/**
* blk_flush_complete_seq - complete flush sequence
* @rq: FLUSH/FUA request being sequenced
* @rq: PREFLUSH/FUA request being sequenced
* @fq: flush queue
* @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
* @error: whether an error occurred
@@ -406,7 +406,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
}
/**
* blk_insert_flush - insert a new FLUSH/FUA request
* blk_insert_flush - insert a new PREFLUSH/FUA request
* @rq: request to insert
*
* To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
@@ -525,7 +525,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
return -ENXIO;
bio = bio_alloc(gfp_mask, 0);
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
ret = submit_bio_wait(bio);
+4 -4
View File
@@ -77,7 +77,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, op, 0);
bio->bi_iter.bi_size = req_sects << 9;
@@ -168,7 +168,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
while (nr_sects) {
bio = next_bio(bio, 1, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
@@ -241,7 +241,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
while (nr_sects) {
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE_ZEROES;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
@@ -323,7 +323,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
while (nr_sects != 0) {
+3 -3
View File
@@ -633,8 +633,8 @@ static void blk_account_io_merge(struct request *req)
cpu = part_stat_lock();
part = req->part;
part_round_stats(cpu, part);
part_dec_in_flight(part, rq_data_dir(req));
part_round_stats(req->q, cpu, part);
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
part_stat_unlock();
@@ -786,7 +786,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;
/* must be same device and not a special request */
if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq))
if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq))
return false;
/* only merge integrity protected bio into ditto rq */
+1 -3
View File
@@ -48,8 +48,6 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(QUEUED),
QUEUE_FLAG_NAME(STOPPED),
QUEUE_FLAG_NAME(SYNCFULL),
QUEUE_FLAG_NAME(ASYNCFULL),
QUEUE_FLAG_NAME(DYING),
QUEUE_FLAG_NAME(BYPASS),
QUEUE_FLAG_NAME(BIDI),
@@ -744,7 +742,7 @@ static int blk_mq_debugfs_release(struct inode *inode, struct file *file)
return seq_release(inode, file);
}
const struct file_operations blk_mq_debugfs_fops = {
static const struct file_operations blk_mq_debugfs_fops = {
.open = blk_mq_debugfs_open,
.read = seq_read,
.write = blk_mq_debugfs_write,
+18 -7
View File
@@ -214,7 +214,11 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
bitnr += tags->nr_reserved_tags;
rq = tags->rqs[bitnr];
if (rq->q == hctx->queue)
/*
* We can hit rq == NULL here, because the tagging functions
* test and set the bit before assining ->rqs[].
*/
if (rq && rq->q == hctx->queue)
iter_data->fn(hctx, rq, iter_data->data, reserved);
return true;
}
@@ -248,9 +252,15 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (!reserved)
bitnr += tags->nr_reserved_tags;
rq = tags->rqs[bitnr];
iter_data->fn(rq, iter_data->data, reserved);
/*
* We can hit rq == NULL here, because the tagging functions
* test and set the bit before assining ->rqs[].
*/
rq = tags->rqs[bitnr];
if (rq)
iter_data->fn(rq, iter_data->data, reserved);
return true;
}
@@ -288,11 +298,12 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
int (reinit_request)(void *, struct request *))
{
int i, j, ret = 0;
if (!set->ops->reinit_request)
if (WARN_ON_ONCE(!reinit_request))
goto out;
for (i = 0; i < set->nr_hw_queues; i++) {
@@ -305,8 +316,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
if (!tags->static_rqs[j])
continue;
ret = set->ops->reinit_request(set->driver_data,
tags->static_rqs[j]);
ret = reinit_request(set->driver_data,
tags->static_rqs[j]);
if (ret)
goto out;
}
+49 -5
View File
@@ -83,6 +83,41 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
}
struct mq_inflight {
struct hd_struct *part;
unsigned int *inflight;
};
static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv,
bool reserved)
{
struct mq_inflight *mi = priv;
if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags) &&
!test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) {
/*
* index[0] counts the specific partition that was asked
* for. index[1] counts the ones that are active on the
* whole device, so increment that if mi->part is indeed
* a partition, and not a whole device.
*/
if (rq->part == mi->part)
mi->inflight[0]++;
if (mi->part->partno)
mi->inflight[1]++;
}
}
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
struct mq_inflight mi = { .part = part, .inflight = inflight, };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
}
void blk_freeze_queue_start(struct request_queue *q)
{
int freeze_depth;
@@ -624,11 +659,10 @@ static void blk_mq_requeue_work(struct work_struct *work)
container_of(work, struct request_queue, requeue_work.work);
LIST_HEAD(rq_list);
struct request *rq, *next;
unsigned long flags;
spin_lock_irqsave(&q->requeue_lock, flags);
spin_lock_irq(&q->requeue_lock);
list_splice_init(&q->requeue_list, &rq_list);
spin_unlock_irqrestore(&q->requeue_lock, flags);
spin_unlock_irq(&q->requeue_lock);
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
if (!(rq->rq_flags & RQF_SOFTBARRIER))
@@ -1102,9 +1136,19 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
int srcu_idx;
/*
* We should be running this queue from one of the CPUs that
* are mapped to it.
*/
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
cpu_online(hctx->next_cpu));
/*
* We can't run the queue inline with ints disabled. Ensure that
* we catch bad users of this early.
*/
WARN_ON_ONCE(in_interrupt());
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
blk_mq_sched_dispatch_requests(hctx);
@@ -1218,7 +1262,7 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
/*
* This function is often used for pausing .queue_rq() by driver when
* there isn't enough resource or some conditions aren't satisfied, and
* BLK_MQ_RQ_QUEUE_BUSY is usually returned.
* BLK_STS_RESOURCE is usually returned.
*
* We do not guarantee that dispatch can be drained or blocked
* after blk_mq_stop_hw_queue() returns. Please use
@@ -1235,7 +1279,7 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queue);
/*
* This function is often used for pausing .queue_rq() by driver when
* there isn't enough resource or some conditions aren't satisfied, and
* BLK_MQ_RQ_QUEUE_BUSY is usually returned.
* BLK_STS_RESOURCE is usually returned.
*
* We do not guarantee that dispatch can be drained or blocked
* after blk_mq_stop_hw_queues() returns. Please use
+3
View File
@@ -133,4 +133,7 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
return hctx->nr_ctx && hctx->tags;
}
void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2]);
#endif
+1
View File
@@ -68,6 +68,7 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
WARN_ON_ONCE(q->mq_ops);
q->rq_timed_out_fn = fn;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
+2
View File
@@ -931,7 +931,9 @@ void blk_unregister_queue(struct gendisk *disk)
if (WARN_ON(!q))
return;
mutex_lock(&q->sysfs_lock);
queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
mutex_unlock(&q->sysfs_lock);
wbt_exit(q);
-1
View File
@@ -290,7 +290,6 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
*/
clear_bit_unlock(tag, bqt->tag_map);
}
EXPORT_SYMBOL(blk_queue_end_tag);
/**
* blk_queue_start_tag - find a free tag and assign it
+3 -10
View File
@@ -373,10 +373,8 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
if (likely(!blk_trace_note_message_enabled(__td->queue))) \
break; \
if ((__tg)) { \
char __pbuf[128]; \
\
blkg_path(tg_to_blkg(__tg), __pbuf, sizeof(__pbuf)); \
blk_add_trace_msg(__td->queue, "throtl %s " fmt, __pbuf, ##args); \
blk_add_cgroup_trace_msg(__td->queue, \
tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
} else { \
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
} \
@@ -2114,14 +2112,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
{
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
int ret;
ret = bio_associate_current(bio);
if (ret == 0 || ret == -EBUSY)
if (bio->bi_css)
bio->bi_cg_private = tg;
blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
#else
bio_associate_current(bio);
#endif
}
+2 -2
View File
@@ -116,7 +116,7 @@ int blkdev_report_zones(struct block_device *bdev,
if (!bio)
return -ENOMEM;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blk_zone_start(q, sector);
bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
@@ -234,7 +234,7 @@ int blkdev_reset_zones(struct block_device *bdev,
bio = bio_alloc(gfp_mask, 0);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
ret = submit_bio_wait(bio);

Some files were not shown because too many files have changed in this diff Show More