Merge tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:

 - clean up how we pass around gfp_t and
   blk_mq_req_flags_t (Christoph)

 - prepare us to defer scheduler attach (Christoph)

 - clean up drivers handling of bounce buffers (Christoph)

 - fix timeout handling corner cases (Christoph/Bart/Keith)

 - bcache fixes (Coly)

 - prep work for bcachefs and some block layer optimizations (Kent).

 - convert users of bio_sets to using embedded structs (Kent).

 - fixes for the BFQ io scheduler (Paolo/Davide/Filippo)

 - lightnvm fixes and improvements (Matias, with contributions from Hans
   and Javier)

 - adding discard throttling to blk-wbt (me)

 - sbitmap blk-mq-tag handling (me/Omar/Ming).

 - remove the sparc jsflash block driver, acked by DaveM.

 - Kyber scheduler improvement from Jianchao, making it more friendly
   wrt merging.

 - conversion of symbolic proc permissions to octal, from Joe Perches.
   Previously the block parts were a mix of both.

 - nbd fixes (Josef and Kevin Vigor)

 - unify how we handle the various kinds of timestamps that the block
   core and utility code uses (Omar)

 - three NVMe pull requests from Keith and Christoph, bringing AEN to
   feature completeness, file backed namespaces, cq/sq lock split, and
   various fixes

 - various little fixes and improvements all over the map

* tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block: (196 commits)
  blk-mq: update nr_requests when switching to 'none' scheduler
  block: don't use blocking queue entered for recursive bio submits
  dm-crypt: fix warning in shutdown path
  lightnvm: pblk: take bitmap alloc. out of critical section
  lightnvm: pblk: kick writer on new flush points
  lightnvm: pblk: only try to recover lines with written smeta
  lightnvm: pblk: remove unnecessary bio_get/put
  lightnvm: pblk: add possibility to set write buffer size manually
  lightnvm: fix partial read error path
  lightnvm: proper error handling for pblk_bio_add_pages
  lightnvm: pblk: fix smeta write error path
  lightnvm: pblk: garbage collect lines with failed writes
  lightnvm: pblk: rework write error recovery path
  lightnvm: pblk: remove dead function
  lightnvm: pass flag on graceful teardown to targets
  lightnvm: pblk: check for chunk size before allocating it
  lightnvm: pblk: remove unnecessary argument
  lightnvm: pblk: remove unnecessary indirection
  lightnvm: pblk: return NVM_ error on failed submission
  lightnvm: pblk: warn in case of corrupted write buffer
  ...
This commit is contained in:
Linus Torvalds
2018-06-04 07:58:06 -07:00
212 changed files with 4033 additions and 3997 deletions
+6 -3
View File
@@ -71,13 +71,16 @@ use_per_node_hctx=[0/1]: Default: 0
1: The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
use_lightnvm=[0/1]: Default: 0
Register device with LightNVM. Requires blk-mq and CONFIG_NVM to be enabled.
no_sched=[0/1]: Default: 0
0: nullb* use default blk-mq io scheduler.
1: nullb* doesn't use io scheduler.
blocking=[0/1]: Default: 0
0: Register as a non-blocking blk-mq driver device.
1: Register as a blocking blk-mq driver device, null_blk will set
the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
needs to block in its ->queue_rq() function.
shared_tags=[0/1]: Default: 0
0: Tag set is not shared.
1: Tag set shared between devices for blk-mq. Only makes sense with
+2 -13
View File
@@ -82,24 +82,13 @@ function
1. invokes optional hostt->eh_timed_out() callback. Return value can
be one of
- BLK_EH_HANDLED
This indicates that eh_timed_out() dealt with the timeout.
The command is passed back to the block layer and completed
via __blk_complete_requests().
*NOTE* After returning BLK_EH_HANDLED the SCSI layer is
assumed to be finished with the command, and no other
functions from the SCSI layer will be called. So this
should typically only be returned if the eh_timed_out()
handler raced with normal completion.
- BLK_EH_RESET_TIMER
This indicates that more time is required to finish the
command. Timer is restarted. This action is counted as a
retry and only allowed scmd->allowed + 1(!) times. Once the
limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
limit is reached, action for BLK_EH_DONE is taken instead.
- BLK_EH_NOT_HANDLED
- BLK_EH_DONE
eh_timed_out() callback did not handle the command.
Step #2 is taken.
+1 -1
View File
@@ -9700,7 +9700,7 @@ S: Maintained
F: drivers/net/ethernet/netronome/
NETWORK BLOCK DEVICE (NBD)
M: Josef Bacik <jbacik@fb.com>
M: Josef Bacik <josef@toxicpanda.com>
S: Maintained
L: linux-block@vger.kernel.org
L: nbd@other.debian.org
-40
View File
@@ -1,40 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* jsflash.h: OS Flash SIMM support for JavaStations.
*
* Copyright (C) 1999 Pete Zaitcev
*/
#ifndef _SPARC_JSFLASH_H
#define _SPARC_JSFLASH_H
#ifndef _SPARC_TYPES_H
#include <linux/types.h>
#endif
/*
* Semantics of the offset is a full address.
* Hardcode it or get it from probe ioctl.
*
* We use full bus address, so that we would be
* automatically compatible with possible future systems.
*/
#define JSFLASH_IDENT (('F'<<8)|54)
struct jsflash_ident_arg {
__u64 off; /* 0x20000000 is included */
__u32 size;
char name[32]; /* With trailing zero */
};
#define JSFLASH_ERASE (('F'<<8)|55)
/* Put 0 as argument, may be flags or sector number... */
#define JSFLASH_PROGRAM (('F'<<8)|56)
struct jsflash_program_arg {
__u64 data; /* char* for sparc and sparc64 */
__u64 off;
__u32 size;
};
#endif /* _SPARC_JSFLASH_H */
+20 -20
View File
@@ -55,13 +55,13 @@ BFQG_FLAG_FNS(empty)
/* This should be called with the scheduler lock held. */
static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
{
unsigned long long now;
u64 now;
if (!bfqg_stats_waiting(stats))
return;
now = sched_clock();
if (time_after64(now, stats->start_group_wait_time))
now = ktime_get_ns();
if (now > stats->start_group_wait_time)
blkg_stat_add(&stats->group_wait_time,
now - stats->start_group_wait_time);
bfqg_stats_clear_waiting(stats);
@@ -77,20 +77,20 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
return;
if (bfqg == curr_bfqg)
return;
stats->start_group_wait_time = sched_clock();
stats->start_group_wait_time = ktime_get_ns();
bfqg_stats_mark_waiting(stats);
}
/* This should be called with the scheduler lock held. */
static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
{
unsigned long long now;
u64 now;
if (!bfqg_stats_empty(stats))
return;
now = sched_clock();
if (time_after64(now, stats->start_empty_time))
now = ktime_get_ns();
if (now > stats->start_empty_time)
blkg_stat_add(&stats->empty_time,
now - stats->start_empty_time);
bfqg_stats_clear_empty(stats);
@@ -116,7 +116,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
if (bfqg_stats_empty(stats))
return;
stats->start_empty_time = sched_clock();
stats->start_empty_time = ktime_get_ns();
bfqg_stats_mark_empty(stats);
}
@@ -125,9 +125,9 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
struct bfqg_stats *stats = &bfqg->stats;
if (bfqg_stats_idling(stats)) {
unsigned long long now = sched_clock();
u64 now = ktime_get_ns();
if (time_after64(now, stats->start_idle_time))
if (now > stats->start_idle_time)
blkg_stat_add(&stats->idle_time,
now - stats->start_idle_time);
bfqg_stats_clear_idling(stats);
@@ -138,7 +138,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
{
struct bfqg_stats *stats = &bfqg->stats;
stats->start_idle_time = sched_clock();
stats->start_idle_time = ktime_get_ns();
bfqg_stats_mark_idling(stats);
}
@@ -171,18 +171,18 @@ void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
}
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op)
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, unsigned int op)
{
struct bfqg_stats *stats = &bfqg->stats;
unsigned long long now = sched_clock();
u64 now = ktime_get_ns();
if (time_after64(now, io_start_time))
if (now > io_start_time_ns)
blkg_rwstat_add(&stats->service_time, op,
now - io_start_time);
if (time_after64(io_start_time, start_time))
now - io_start_time_ns);
if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, op,
io_start_time - start_time);
io_start_time_ns - start_time_ns);
}
#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
@@ -191,8 +191,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op) { }
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, unsigned int op) { }
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
+237 -241
View File
File diff suppressed because it is too large Load Diff
+9 -21
View File
@@ -399,11 +399,6 @@ struct bfq_io_cq {
struct bfq_ttime saved_ttime;
};
enum bfq_device_speed {
BFQ_BFQD_FAST,
BFQ_BFQD_SLOW,
};
/**
* struct bfq_data - per-device data structure.
*
@@ -611,12 +606,11 @@ struct bfq_data {
/* Max service-rate for a soft real-time queue, in sectors/sec */
unsigned int bfq_wr_max_softrt_rate;
/*
* Cached value of the product R*T, used for computing the
* maximum duration of weight raising automatically.
* Cached value of the product ref_rate*ref_wr_duration, used
* for computing the maximum duration of weight raising
* automatically.
*/
u64 RT_prod;
/* device-speed class for the low-latency heuristic */
enum bfq_device_speed device_speed;
u64 rate_dur_prod;
/* fallback dummy bfqq for extreme OOM conditions */
struct bfq_queue oom_bfqq;
@@ -635,12 +629,6 @@ struct bfq_data {
/* bfqq associated with the task issuing current bio for merging */
struct bfq_queue *bio_bfqq;
/*
* Cached sbitmap shift, used to compute depth limits in
* bfq_update_depths.
*/
unsigned int sb_shift;
/*
* Depth limits used in bfq_limit_depth (see comments on the
* function)
@@ -732,9 +720,9 @@ struct bfqg_stats {
/* total time with empty current active q with other requests queued */
struct blkg_stat empty_time;
/* fields after this shouldn't be cleared on stat reset */
uint64_t start_group_wait_time;
uint64_t start_idle_time;
uint64_t start_empty_time;
u64 start_group_wait_time;
u64 start_idle_time;
u64 start_empty_time;
uint16_t flags;
#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
};
@@ -856,8 +844,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op);
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
uint64_t io_start_time, unsigned int op);
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, unsigned int op);
void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
+14 -15
View File
@@ -56,12 +56,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
struct bio_set *bs = bio->bi_pool;
unsigned inline_vecs;
if (!bs || !bs->bio_integrity_pool) {
if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
bip = kmalloc(sizeof(struct bio_integrity_payload) +
sizeof(struct bio_vec) * nr_vecs, gfp_mask);
inline_vecs = nr_vecs;
} else {
bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
inline_vecs = BIP_INLINE_VECS;
}
@@ -74,7 +74,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
unsigned long idx = 0;
bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
bs->bvec_integrity_pool);
&bs->bvec_integrity_pool);
if (!bip->bip_vec)
goto err;
bip->bip_max_vcnt = bvec_nr_vecs(idx);
@@ -90,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
return bip;
err:
mempool_free(bip, bs->bio_integrity_pool);
mempool_free(bip, &bs->bio_integrity_pool);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(bio_integrity_alloc);
@@ -111,10 +111,10 @@ static void bio_integrity_free(struct bio *bio)
kfree(page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset);
if (bs && bs->bio_integrity_pool) {
bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
mempool_free(bip, bs->bio_integrity_pool);
mempool_free(bip, &bs->bio_integrity_pool);
} else {
kfree(bip);
}
@@ -465,16 +465,15 @@ EXPORT_SYMBOL(bio_integrity_clone);
int bioset_integrity_create(struct bio_set *bs, int pool_size)
{
if (bs->bio_integrity_pool)
if (mempool_initialized(&bs->bio_integrity_pool))
return 0;
bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
if (!bs->bio_integrity_pool)
if (mempool_init_slab_pool(&bs->bio_integrity_pool,
pool_size, bip_slab))
return -1;
bs->bvec_integrity_pool = biovec_create_pool(pool_size);
if (!bs->bvec_integrity_pool) {
mempool_destroy(bs->bio_integrity_pool);
if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
mempool_exit(&bs->bio_integrity_pool);
return -1;
}
@@ -484,8 +483,8 @@ EXPORT_SYMBOL(bioset_integrity_create);
void bioset_integrity_free(struct bio_set *bs)
{
mempool_destroy(bs->bio_integrity_pool);
mempool_destroy(bs->bvec_integrity_pool);
mempool_exit(&bs->bio_integrity_pool);
mempool_exit(&bs->bvec_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
+109 -80
View File
@@ -53,7 +53,7 @@ static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
struct bio_set *fs_bio_set;
struct bio_set fs_bio_set;
EXPORT_SYMBOL(fs_bio_set);
/*
@@ -254,7 +254,7 @@ static void bio_free(struct bio *bio)
bio_uninit(bio);
if (bs) {
bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
/*
* If we have front padding, adjust the bio pointer before freeing
@@ -262,7 +262,7 @@ static void bio_free(struct bio *bio)
p = bio;
p -= bs->front_pad;
mempool_free(p, bs->bio_pool);
mempool_free(p, &bs->bio_pool);
} else {
/* Bio was allocated by bio_kmalloc() */
kfree(bio);
@@ -454,7 +454,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
inline_vecs = nr_iovecs;
} else {
/* should not use nobvec bioset for nr_iovecs > 0 */
if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
nr_iovecs > 0))
return NULL;
/*
* generic_make_request() converts recursion to iteration; this
@@ -483,11 +484,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
bs->rescue_workqueue)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
p = mempool_alloc(&bs->bio_pool, gfp_mask);
if (!p && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
p = mempool_alloc(bs->bio_pool, gfp_mask);
p = mempool_alloc(&bs->bio_pool, gfp_mask);
}
front_pad = bs->front_pad;
@@ -503,11 +504,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
if (nr_iovecs > inline_vecs) {
unsigned long idx = 0;
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
}
if (unlikely(!bvl))
@@ -524,25 +525,25 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
return bio;
err_free:
mempool_free(p, bs->bio_pool);
mempool_free(p, &bs->bio_pool);
return NULL;
}
EXPORT_SYMBOL(bio_alloc_bioset);
void zero_fill_bio(struct bio *bio)
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
{
unsigned long flags;
struct bio_vec bv;
struct bvec_iter iter;
bio_for_each_segment(bv, bio, iter) {
__bio_for_each_segment(bv, bio, iter, start) {
char *data = bvec_kmap_irq(&bv, &flags);
memset(data, 0, bv.bv_len);
flush_dcache_page(bv.bv_page);
bvec_kunmap_irq(data, &flags);
}
}
EXPORT_SYMBOL(zero_fill_bio);
EXPORT_SYMBOL(zero_fill_bio_iter);
/**
* bio_put - release a reference to a bio
@@ -970,27 +971,68 @@ void bio_advance(struct bio *bio, unsigned bytes)
}
EXPORT_SYMBOL(bio_advance);
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
struct bio *src, struct bvec_iter *src_iter)
{
struct bio_vec src_bv, dst_bv;
void *src_p, *dst_p;
unsigned bytes;
while (src_iter->bi_size && dst_iter->bi_size) {
src_bv = bio_iter_iovec(src, *src_iter);
dst_bv = bio_iter_iovec(dst, *dst_iter);
bytes = min(src_bv.bv_len, dst_bv.bv_len);
src_p = kmap_atomic(src_bv.bv_page);
dst_p = kmap_atomic(dst_bv.bv_page);
memcpy(dst_p + dst_bv.bv_offset,
src_p + src_bv.bv_offset,
bytes);
kunmap_atomic(dst_p);
kunmap_atomic(src_p);
flush_dcache_page(dst_bv.bv_page);
bio_advance_iter(src, src_iter, bytes);
bio_advance_iter(dst, dst_iter, bytes);
}
}
EXPORT_SYMBOL(bio_copy_data_iter);
/**
* bio_copy_data - copy contents of data buffers from one chain of bios to
* another
* @src: source bio list
* @dst: destination bio list
*
* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
* @src and @dst as linked lists of bios.
* bio_copy_data - copy contents of data buffers from one bio to another
* @src: source bio
* @dst: destination bio
*
* Stops when it reaches the end of either @src or @dst - that is, copies
* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
*/
void bio_copy_data(struct bio *dst, struct bio *src)
{
struct bvec_iter src_iter, dst_iter;
struct bio_vec src_bv, dst_bv;
void *src_p, *dst_p;
unsigned bytes;
struct bvec_iter src_iter = src->bi_iter;
struct bvec_iter dst_iter = dst->bi_iter;
src_iter = src->bi_iter;
dst_iter = dst->bi_iter;
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
}
EXPORT_SYMBOL(bio_copy_data);
/**
* bio_list_copy_data - copy contents of data buffers from one chain of bios to
* another
* @src: source bio list
* @dst: destination bio list
*
* Stops when it reaches the end of either the @src list or @dst list - that is,
* copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
* bios).
*/
void bio_list_copy_data(struct bio *dst, struct bio *src)
{
struct bvec_iter src_iter = src->bi_iter;
struct bvec_iter dst_iter = dst->bi_iter;
while (1) {
if (!src_iter.bi_size) {
@@ -1009,26 +1051,10 @@ void bio_copy_data(struct bio *dst, struct bio *src)
dst_iter = dst->bi_iter;
}
src_bv = bio_iter_iovec(src, src_iter);
dst_bv = bio_iter_iovec(dst, dst_iter);
bytes = min(src_bv.bv_len, dst_bv.bv_len);
src_p = kmap_atomic(src_bv.bv_page);
dst_p = kmap_atomic(dst_bv.bv_page);
memcpy(dst_p + dst_bv.bv_offset,
src_p + src_bv.bv_offset,
bytes);
kunmap_atomic(dst_p);
kunmap_atomic(src_p);
bio_advance_iter(src, &src_iter, bytes);
bio_advance_iter(dst, &dst_iter, bytes);
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
}
}
EXPORT_SYMBOL(bio_copy_data);
EXPORT_SYMBOL(bio_list_copy_data);
struct bio_map_data {
int is_our_pages;
@@ -1584,6 +1610,7 @@ void bio_set_pages_dirty(struct bio *bio)
set_page_dirty_lock(page);
}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
static void bio_release_pages(struct bio *bio)
{
@@ -1667,6 +1694,7 @@ void bio_check_pages_dirty(struct bio *bio)
bio_put(bio);
}
}
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
void generic_start_io_acct(struct request_queue *q, int rw,
unsigned long sectors, struct hd_struct *part)
@@ -1749,6 +1777,9 @@ again:
if (!bio_integrity_endio(bio))
return;
if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
bio->bi_next = NULL;
/*
* Need to have a real endio function for chained bios, otherwise
* various corner cases will break (like stacking block devices that
@@ -1848,30 +1879,38 @@ EXPORT_SYMBOL_GPL(bio_trim);
* create memory pools for biovec's in a bio_set.
* use the global biovec slabs created for general use.
*/
mempool_t *biovec_create_pool(int pool_entries)
int biovec_init_pool(mempool_t *pool, int pool_entries)
{
struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
return mempool_create_slab_pool(pool_entries, bp->slab);
return mempool_init_slab_pool(pool, pool_entries, bp->slab);
}
void bioset_free(struct bio_set *bs)
/*
* bioset_exit - exit a bioset initialized with bioset_init()
*
* May be called on a zeroed but uninitialized bioset (i.e. allocated with
* kzalloc()).
*/
void bioset_exit(struct bio_set *bs)
{
if (bs->rescue_workqueue)
destroy_workqueue(bs->rescue_workqueue);
bs->rescue_workqueue = NULL;
mempool_destroy(bs->bio_pool);
mempool_destroy(bs->bvec_pool);
mempool_exit(&bs->bio_pool);
mempool_exit(&bs->bvec_pool);
bioset_integrity_free(bs);
bio_put_slab(bs);
kfree(bs);
if (bs->bio_slab)
bio_put_slab(bs);
bs->bio_slab = NULL;
}
EXPORT_SYMBOL(bioset_free);
EXPORT_SYMBOL(bioset_exit);
/**
* bioset_create - Create a bio_set
* bioset_init - Initialize a bio_set
* @bs: pool to initialize
* @pool_size: Number of bio and bio_vecs to cache in the mempool
* @front_pad: Number of bytes to allocate in front of the returned bio
* @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
@@ -1890,16 +1929,12 @@ EXPORT_SYMBOL(bioset_free);
* dispatch queued requests when the mempool runs out of space.
*
*/
struct bio_set *bioset_create(unsigned int pool_size,
unsigned int front_pad,
int flags)
int bioset_init(struct bio_set *bs,
unsigned int pool_size,
unsigned int front_pad,
int flags)
{
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
struct bio_set *bs;
bs = kzalloc(sizeof(*bs), GFP_KERNEL);
if (!bs)
return NULL;
bs->front_pad = front_pad;
@@ -1908,34 +1943,29 @@ struct bio_set *bioset_create(unsigned int pool_size,
INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
if (!bs->bio_slab) {
kfree(bs);
return NULL;
}
if (!bs->bio_slab)
return -ENOMEM;
bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
if (!bs->bio_pool)
if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
goto bad;
if (flags & BIOSET_NEED_BVECS) {
bs->bvec_pool = biovec_create_pool(pool_size);
if (!bs->bvec_pool)
goto bad;
}
if ((flags & BIOSET_NEED_BVECS) &&
biovec_init_pool(&bs->bvec_pool, pool_size))
goto bad;
if (!(flags & BIOSET_NEED_RESCUER))
return bs;
return 0;
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
if (!bs->rescue_workqueue)
goto bad;
return bs;
return 0;
bad:
bioset_free(bs);
return NULL;
bioset_exit(bs);
return -ENOMEM;
}
EXPORT_SYMBOL(bioset_create);
EXPORT_SYMBOL(bioset_init);
#ifdef CONFIG_BLK_CGROUP
@@ -2020,11 +2050,10 @@ static int __init init_bio(void)
bio_integrity_init();
biovec_init_slabs();
fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!fs_bio_set)
if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
panic("bio: can't allocate bios\n");
if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
panic("bio: can't create integrity pool\n");
return 0;
+52 -68
View File
@@ -196,15 +196,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
RB_CLEAR_NODE(&rq->rb_node);
rq->tag = -1;
rq->internal_tag = -1;
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
seqcount_init(&rq->gstate_seq);
u64_stats_init(&rq->aborted_gstate_sync);
/*
* See comment of blk_mq_init_request
*/
WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
}
EXPORT_SYMBOL(blk_rq_init);
@@ -280,6 +273,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_advance(bio, nbytes);
/* don't actually finish bio if it's part of flush sequence */
/*
* XXX this code looks suspicious - it's not consistent with advancing
* req->bio in caller
*/
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
}
@@ -360,7 +357,6 @@ EXPORT_SYMBOL(blk_start_queue_async);
void blk_start_queue(struct request_queue *q)
{
lockdep_assert_held(q->queue_lock);
WARN_ON(!in_interrupt() && !irqs_disabled());
WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
@@ -996,18 +992,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
spinlock_t *lock)
{
struct request_queue *q;
int ret;
q = kmem_cache_alloc_node(blk_requestq_cachep,
gfp_mask | __GFP_ZERO, node_id);
if (!q)
return NULL;
INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL;
q->end_sector = 0;
q->boundary_rq = NULL;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
goto fail_q;
q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!q->bio_split)
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (ret)
goto fail_id;
q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
@@ -1079,7 +1081,7 @@ fail_bdi:
fail_stats:
bdi_put(q->backing_dev_info);
fail_split:
bioset_free(q->bio_split);
bioset_exit(&q->bio_split);
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
fail_q:
@@ -1173,16 +1175,8 @@ int blk_init_allocated_queue(struct request_queue *q)
q->sg_reserved_size = INT_MAX;
/* Protect q->elevator from elevator_change */
mutex_lock(&q->sysfs_lock);
/* init elevator */
if (elevator_init(q, NULL)) {
mutex_unlock(&q->sysfs_lock);
if (elevator_init(q))
goto out_exit_flush_rq;
}
mutex_unlock(&q->sysfs_lock);
return 0;
out_exit_flush_rq:
@@ -1334,6 +1328,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
* @op: operation and flags
* @bio: bio to allocate request for (can be %NULL)
* @flags: BLQ_MQ_REQ_* flags
* @gfp_mask: allocator flags
*
* Get a free request from @q. This function may fail under memory
* pressure or if @q is dead.
@@ -1343,7 +1338,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
static struct request *__get_request(struct request_list *rl, unsigned int op,
struct bio *bio, blk_mq_req_flags_t flags)
struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
{
struct request_queue *q = rl->q;
struct request *rq;
@@ -1352,8 +1347,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
struct io_cq *icq = NULL;
const bool is_sync = op_is_sync(op);
int may_queue;
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
__GFP_DIRECT_RECLAIM;
req_flags_t rq_flags = RQF_ALLOCED;
lockdep_assert_held(q->queue_lock);
@@ -1517,8 +1510,9 @@ rq_starved:
* @op: operation and flags
* @bio: bio to allocate request for (can be %NULL)
* @flags: BLK_MQ_REQ_* flags.
* @gfp: allocator flags
*
* Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
* Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags,
* this function keeps retrying under memory pressure and fails iff @q is dead.
*
* Must be called with @q->queue_lock held and,
@@ -1526,7 +1520,7 @@ rq_starved:
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
static struct request *get_request(struct request_queue *q, unsigned int op,
struct bio *bio, blk_mq_req_flags_t flags)
struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
{
const bool is_sync = op_is_sync(op);
DEFINE_WAIT(wait);
@@ -1538,7 +1532,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
rq = __get_request(rl, op, bio, flags);
rq = __get_request(rl, op, bio, flags, gfp);
if (!IS_ERR(rq))
return rq;
@@ -1579,8 +1573,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
unsigned int op, blk_mq_req_flags_t flags)
{
struct request *rq;
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
__GFP_DIRECT_RECLAIM;
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
int ret = 0;
WARN_ON_ONCE(q->mq_ops);
@@ -1592,7 +1585,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
rq = get_request(q, op, NULL, flags);
rq = get_request(q, op, NULL, flags, gfp_mask);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
blk_queue_exit(q);
@@ -1607,13 +1600,13 @@ static struct request *blk_old_get_request(struct request_queue *q,
}
/**
* blk_get_request_flags - allocate a request
* blk_get_request - allocate a request
* @q: request queue to allocate a request for
* @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
* @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
*/
struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
blk_mq_req_flags_t flags)
struct request *blk_get_request(struct request_queue *q, unsigned int op,
blk_mq_req_flags_t flags)
{
struct request *req;
@@ -1632,14 +1625,6 @@ struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
return req;
}
EXPORT_SYMBOL(blk_get_request_flags);
struct request *blk_get_request(struct request_queue *q, unsigned int op,
gfp_t gfp_mask)
{
return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
0 : BLK_MQ_REQ_NOWAIT);
}
EXPORT_SYMBOL(blk_get_request);
/**
@@ -1660,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
wbt_requeue(q->rq_wb, &rq->issue_stat);
wbt_requeue(q->rq_wb, rq);
if (rq->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1767,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
wbt_done(q->rq_wb, &req->issue_stat);
wbt_done(q->rq_wb, req);
/*
* Request may not have originated from ll_rw_blk. if not,
@@ -2066,7 +2051,7 @@ get_rq:
* Returns with the queue unlocked.
*/
blk_queue_enter_live(q);
req = get_request(q, bio->bi_opf, bio, 0);
req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
if (IS_ERR(req)) {
blk_queue_exit(q);
__wbt_done(q->rq_wb, wb_acct);
@@ -2078,7 +2063,7 @@ get_rq:
goto out_unlock;
}
wbt_track(&req->issue_stat, wb_acct);
wbt_track(req, wb_acct);
/*
* After dropping the lock and possibly sleeping here, our request
@@ -2392,7 +2377,9 @@ blk_qc_t generic_make_request(struct bio *bio)
if (bio->bi_opf & REQ_NOWAIT)
flags = BLK_MQ_REQ_NOWAIT;
if (blk_queue_enter(q, flags) < 0) {
if (bio_flagged(bio, BIO_QUEUE_ENTERED))
blk_queue_enter_live(q);
else if (blk_queue_enter(q, flags) < 0) {
if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
@@ -2727,7 +2714,7 @@ void blk_account_io_completion(struct request *req, unsigned int bytes)
}
}
void blk_account_io_done(struct request *req)
void blk_account_io_done(struct request *req, u64 now)
{
/*
* Account IO completion. flush_rq isn't accounted as a
@@ -2735,11 +2722,12 @@ void blk_account_io_done(struct request *req)
* containing request is enough.
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
unsigned long duration = jiffies - req->start_time;
unsigned long duration;
const int rw = rq_data_dir(req);
struct hd_struct *part;
int cpu;
duration = nsecs_to_jiffies(now - req->start_time_ns);
cpu = part_stat_lock();
part = req->part;
@@ -2970,10 +2958,8 @@ static void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
if (blk_account_rq(rq)) {
if (blk_account_rq(rq))
q->in_flight[rq_is_sync(rq)]++;
set_io_start_time_ns(rq);
}
}
/**
@@ -2992,9 +2978,12 @@ void blk_start_request(struct request *req)
blk_dequeue_request(req);
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
req->io_start_time_ns = ktime_get_ns();
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
req->throtl_size = blk_rq_sectors(req);
#endif
req->rq_flags |= RQF_STATS;
wbt_issue(req->q->rq_wb, &req->issue_stat);
wbt_issue(req->q->rq_wb, req);
}
BUG_ON(blk_rq_is_complete(req));
@@ -3092,8 +3081,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
struct bio *bio = req->bio;
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
if (bio_bytes == bio->bi_iter.bi_size)
if (bio_bytes == bio->bi_iter.bi_size) {
req->bio = bio->bi_next;
bio->bi_next = NULL;
}
/* Completion has already been traced */
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
@@ -3190,12 +3181,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
void blk_finish_request(struct request *req, blk_status_t error)
{
struct request_queue *q = req->q;
u64 now = ktime_get_ns();
lockdep_assert_held(req->q->queue_lock);
WARN_ON_ONCE(q->mq_ops);
if (req->rq_flags & RQF_STATS)
blk_stat_add(req);
blk_stat_add(req, now);
if (req->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, req);
@@ -3210,10 +3202,10 @@ void blk_finish_request(struct request *req, blk_status_t error)
if (req->rq_flags & RQF_DONTPREP)
blk_unprep_request(req);
blk_account_io_done(req);
blk_account_io_done(req, now);
if (req->end_io) {
wbt_done(req->q->rq_wb, &req->issue_stat);
wbt_done(req->q->rq_wb, req);
req->end_io(req, error);
} else {
if (blk_bidi_rq(req))
@@ -3519,7 +3511,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio *bio, *bio_src;
if (!bs)
bs = fs_bio_set;
bs = &fs_bio_set;
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_clone_fast(bio_src, gfp_mask, bs);
@@ -3630,7 +3622,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
blk_run_queue_async(q);
else
__blk_run_queue(q);
spin_unlock(q->queue_lock);
spin_unlock_irq(q->queue_lock);
}
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -3678,7 +3670,6 @@ EXPORT_SYMBOL(blk_check_plugged);
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request_queue *q;
unsigned long flags;
struct request *rq;
LIST_HEAD(list);
unsigned int depth;
@@ -3698,11 +3689,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
q = NULL;
depth = 0;
/*
* Save and disable interrupts here, to avoid doing it for every
* queue lock we have to take.
*/
local_irq_save(flags);
while (!list_empty(&list)) {
rq = list_entry_rq(list.next);
list_del_init(&rq->queuelist);
@@ -3715,7 +3701,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
queue_unplugged(q, depth, from_schedule);
q = rq->q;
depth = 0;
spin_lock(q->queue_lock);
spin_lock_irq(q->queue_lock);
}
/*
@@ -3742,8 +3728,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
*/
if (q)
queue_unplugged(q, depth, from_schedule);
local_irq_restore(flags);
}
void blk_finish_plug(struct blk_plug *plug)
+6 -6
View File
@@ -333,34 +333,34 @@ static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
}
static struct integrity_sysfs_entry integrity_format_entry = {
.attr = { .name = "format", .mode = S_IRUGO },
.attr = { .name = "format", .mode = 0444 },
.show = integrity_format_show,
};
static struct integrity_sysfs_entry integrity_tag_size_entry = {
.attr = { .name = "tag_size", .mode = S_IRUGO },
.attr = { .name = "tag_size", .mode = 0444 },
.show = integrity_tag_size_show,
};
static struct integrity_sysfs_entry integrity_interval_entry = {
.attr = { .name = "protection_interval_bytes", .mode = S_IRUGO },
.attr = { .name = "protection_interval_bytes", .mode = 0444 },
.show = integrity_interval_show,
};
static struct integrity_sysfs_entry integrity_verify_entry = {
.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
.attr = { .name = "read_verify", .mode = 0644 },
.show = integrity_verify_show,
.store = integrity_verify_store,
};
static struct integrity_sysfs_entry integrity_generate_entry = {
.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
.attr = { .name = "write_generate", .mode = 0644 },
.show = integrity_generate_show,
.store = integrity_generate_store,
};
static struct integrity_sysfs_entry integrity_device_entry = {
.attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
.attr = { .name = "device_is_integrity_capable", .mode = 0444 },
.show = integrity_device_show,
};
+9 -3
View File
@@ -62,10 +62,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
unsigned int req_sects;
sector_t end_sect, tmp;
/* Make sure bi_size doesn't overflow */
req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
/*
* Issue in chunks of the user defined max discard setting,
* ensuring that bi_size doesn't overflow
*/
req_sects = min_t(sector_t, nr_sects,
q->limits.max_discard_sectors);
if (req_sects > UINT_MAX >> 9)
req_sects = UINT_MAX >> 9;
/**
/*
* If splitting a request, and the next starting sector would be
* misaligned, stop the discard at the previous aligned sector.
*/
+19 -10
View File
@@ -188,16 +188,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_ZEROES:
split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_SAME:
split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs);
break;
default:
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs);
break;
}
@@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
/* there isn't chance to merge the splitted bio */
split->bi_opf |= REQ_NOMERGE;
/*
* Since we're recursing into make_request here, ensure
* that we mark this bio as already having entered the queue.
* If not, and the queue is going away, we can get stuck
* forever on waiting for the queue reference to drop. But
* that will never happen, as we're already holding a
* reference to it.
*/
bio_set_flag(*bio, BIO_QUEUE_ENTERED);
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
generic_make_request(*bio);
@@ -724,13 +734,12 @@ static struct request *attempt_merge(struct request_queue *q,
}
/*
* At this point we have either done a back merge
* or front merge. We need the smaller start_time of
* the merged requests to be the current request
* for accounting purposes.
* At this point we have either done a back merge or front merge. We
* need the smaller start_time_ns of the merged requests to be the
* current request for accounting purposes.
*/
if (time_after(req->start_time, next->start_time))
req->start_time = next->start_time;
if (next->start_time_ns < req->start_time_ns)
req->start_time_ns = next->start_time_ns;
req->biotail->bi_next = next->bio;
req->biotail = next->biotail;
-1
View File
@@ -344,7 +344,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(MQ_TIMEOUT_EXPIRED),
RQF_NAME(MQ_POLL_SLEPT),
};
#undef RQF_NAME
+25 -21
View File
@@ -268,19 +268,16 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
/*
* Reverse check our software queue for entries that we could potentially
* merge with. Currently includes a hand-wavy stop count of 8, to not spend
* too much time checking for merges.
* Iterate list of requests and see if we can merge this bio with any
* of them.
*/
static bool blk_mq_attempt_merge(struct request_queue *q,
struct blk_mq_ctx *ctx, struct bio *bio)
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio)
{
struct request *rq;
int checked = 8;
lockdep_assert_held(&ctx->lock);
list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
list_for_each_entry_reverse(rq, list, queuelist) {
bool merged = false;
if (!checked--)
@@ -305,13 +302,30 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
continue;
}
if (merged)
ctx->rq_merged++;
return merged;
}
return false;
}
EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
/*
* Reverse check our software queue for entries that we could potentially
* merge with. Currently includes a hand-wavy stop count of 8, to not spend
* too much time checking for merges.
*/
static bool blk_mq_attempt_merge(struct request_queue *q,
struct blk_mq_ctx *ctx, struct bio *bio)
{
lockdep_assert_held(&ctx->lock);
if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
ctx->rq_merged++;
return true;
}
return false;
}
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
@@ -571,6 +585,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
if (!e) {
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
return 0;
}
@@ -633,14 +648,3 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
blk_mq_sched_tags_teardown(q);
q->elevator = NULL;
}
int blk_mq_sched_init(struct request_queue *q)
{
int ret;
mutex_lock(&q->sysfs_lock);
ret = elevator_init(q, NULL);
mutex_unlock(&q->sysfs_lock);
return ret;
}
-2
View File
@@ -33,8 +33,6 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx);
int blk_mq_sched_init(struct request_queue *q);
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
+3 -3
View File
@@ -166,15 +166,15 @@ static struct attribute *default_ctx_attrs[] = {
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
.attr = {.name = "nr_tags", .mode = S_IRUGO },
.attr = {.name = "nr_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
.attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
.attr = {.name = "nr_reserved_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
.attr = {.name = "cpu_list", .mode = S_IRUGO },
.attr = {.name = "cpu_list", .mode = 0444 },
.show = blk_mq_hw_sysfs_cpus_show,
};
+13 -1
View File
@@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
ws = bt_wait_ptr(bt, data->hctx);
drop_ctx = data->ctx == NULL;
do {
struct sbitmap_queue *bt_prev;
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
@@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (data->ctx)
blk_mq_put_ctx(data->ctx);
bt_prev = bt;
io_schedule();
data->ctx = blk_mq_get_ctx(data->q);
@@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
bt = &tags->bitmap_tags;
finish_wait(&ws->wait, &wait);
/*
* If destination hw queue is changed, fake wake up on
* previous queue for compensating the wake up miss, so
* other allocations on previous queue won't be starved.
*/
if (bt != bt_prev)
sbitmap_queue_wake_up(bt_prev);
ws = bt_wait_ptr(bt, data->hctx);
} while (1);
@@ -259,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* test and set the bit before assining ->rqs[].
*/
rq = tags->rqs[bitnr];
if (rq)
if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
iter_data->fn(rq, iter_data->data, reserved);
return true;
+110 -234
View File
File diff suppressed because it is too large Load Diff
+2 -40
View File
@@ -30,20 +30,6 @@ struct blk_mq_ctx {
struct kobject kobj;
} ____cacheline_aligned_in_smp;
/*
* Bits for request->gstate. The lower two bits carry MQ_RQ_* state value
* and the upper bits the generation number.
*/
enum mq_rq_state {
MQ_RQ_IDLE = 0,
MQ_RQ_IN_FLIGHT = 1,
MQ_RQ_COMPLETE = 2,
MQ_RQ_STATE_BITS = 2,
MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1,
MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS,
};
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
@@ -107,33 +93,9 @@ void blk_mq_release(struct request_queue *q);
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
* @rq: target request.
*/
static inline int blk_mq_rq_state(struct request *rq)
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
{
return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
}
/**
* blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
* @rq: target request.
* @state: new state to set.
*
* Set @rq's state to @state. The caller is responsible for ensuring that
* there are no other updaters. A request can transition into IN_FLIGHT
* only from IDLE and doing so increments the generation number.
*/
static inline void blk_mq_rq_update_state(struct request *rq,
enum mq_rq_state state)
{
u64 old_val = READ_ONCE(rq->gstate);
u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
if (state == MQ_RQ_IN_FLIGHT) {
WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
new_val += MQ_RQ_GEN_INC;
}
/* avoid exposing interim values */
WRITE_ONCE(rq->gstate, new_val);
return READ_ONCE(rq->state);
}
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,

Some files were not shown because too many files have changed in this diff Show More