Merge tag 'for-5.13/block-2021-04-27' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe: "Pretty quiet round this time, which is nice. In detail: - Series revamping bounce buffer support (Christoph) - Dead code removal (Christoph, Bart) - Partition iteration revamp, now using xarray (Christoph) - Passthrough request scheduler improvements (Lin) - Series of BFQ improvements (Paolo) - Fix ioprio task iteration (Peter) - Various little tweaks and fixes (Tejun, Saravanan, Bhaskar, Max, Nikolay)" * tag 'for-5.13/block-2021-04-27' of git://git.kernel.dk/linux-block: (41 commits) blk-iocost: don't ignore vrate_min on QD contention blk-mq: Fix spurious debugfs directory creation during initialization bfq/mq-deadline: remove redundant check for passthrough request blk-mq: bypass IO scheduler's limit_depth for passthrough request block: Remove an obsolete comment from sg_io() block: move bio_list_copy_data to pktcdvd block: remove zero_fill_bio_iter block: add queue_to_disk() to get gendisk from request_queue block: remove an incorrect check from blk_rq_append_bio block: initialize ret in bdev_disk_changed block: Fix sys_ioprio_set(.which=IOPRIO_WHO_PGRP) task iteration block: remove disk_part_iter block: simplify diskstats_show block: simplify show_partition block: simplify printk_all_partitions block: simplify partition_overlaps block: simplify partition removal block: take bd_mutex around delete_partitions in del_gendisk block: refactor blk_drop_partitions block: move more syncing and invalidation to delete_partition ...
2026-03-09 10:07:04 -07:00 · 2021-04-28 14:27:12 -07:00
parent 16b3d0cf5b f46ec84b5a
commit 6c00292113
53 changed files with 786 additions and 1265 deletions
--- a/Documentation/scsi/BusLogic.rst
+++ b/Documentation/scsi/BusLogic.rst
@@ -251,8 +251,6 @@ BT-445C	    VLB		Fast SCSI-2
 BT-747C	    EISA	Fast SCSI-2
 BT-757C	    EISA	Wide Fast SCSI-2
 BT-757CD    EISA	Wide Differential Fast SCSI-2
-BT-545C	    ISA		Fast SCSI-2
-BT-540CF    ISA		Fast SCSI-2
 ========    ====	==============================

 MultiMaster "S" Series Host Adapters:
@@ -263,17 +261,13 @@ BT-747S	    EISA	Fast SCSI-2
 BT-747D	    EISA	Differential Fast SCSI-2
 BT-757S	    EISA	Wide Fast SCSI-2
 BT-757D	    EISA	Wide Differential Fast SCSI-2
-BT-545S	    ISA		Fast SCSI-2
-BT-542D	    ISA		Differential Fast SCSI-2
 BT-742A	    EISA	SCSI-2 (742A revision H)
-BT-542B	    ISA		SCSI-2 (542B revision H)
 =======     ====	==============================

 MultiMaster "A" Series Host Adapters:

 =======     ====	==============================
 BT-742A	    EISA	SCSI-2 (742A revisions A - G)
-BT-542B	    ISA		SCSI-2 (542B revisions A - G)
 =======     ====	==============================

 AMI FastDisk Host Adapters that are true BusLogic MultiMaster clones are also
@@ -400,26 +394,11 @@ selected host adapter.

 The BusLogic Driver Probing Options comprise the following:

-IO:<integer>
-
-  The "IO:" option specifies an ISA I/O Address to be probed for a non-PCI
-  MultiMaster Host Adapter.  If neither "IO:" nor "NoProbeISA" options are
-  specified, then the standard list of BusLogic MultiMaster ISA I/O Addresses
-  will be probed (0x330, 0x334, 0x230, 0x234, 0x130, and 0x134).  Multiple
-  "IO:" options may be specified to precisely determine the I/O Addresses to
-  be probed, but the probe order will always follow the standard list.
-
 NoProbe

  The "NoProbe" option disables all probing and therefore no BusLogic Host
  Adapters will be detected.

-NoProbeISA
-
-  The "NoProbeISA" option disables probing of the standard BusLogic ISA I/O
-  Addresses and therefore only PCI MultiMaster and FlashPoint Host Adapters
-  will be detected.
-
 NoProbePCI

  The "NoProbePCI" options disables the interrogation of PCI Configuration
@@ -464,10 +443,7 @@ QueueDepth:<integer>
  Depth for devices that do not support Tagged Queuing.  If no Queue Depth
  option is provided, the Queue Depth will be determined automatically based
  on the Host Adapter's Total Queue Depth and the number, type, speed, and
-  capabilities of the detected Target Devices.  For Host Adapters that
-  require ISA Bounce Buffers, the Queue Depth is automatically set by default
-  to BusLogic_TaggedQueueDepthBB or BusLogic_UntaggedQueueDepthBB to avoid
-  excessive preallocation of DMA Bounce Buffer memory.  Target Devices that
+  capabilities of the detected Target Devices.  Target Devices that
  do not support Tagged Queuing always have their Queue Depth set to
  BusLogic_UntaggedQueueDepth or BusLogic_UntaggedQueueDepthBB, unless a
  lower Queue Depth option is provided.  A Queue Depth of 1 automatically
--- a/Documentation/scsi/scsi_mid_low_api.rst
+++ b/Documentation/scsi/scsi_mid_low_api.rst
@@ -1095,10 +1095,6 @@ of interest:
 		 - maximum number of commands that can be queued on devices
                   controlled by the host. Overridden by LLD calls to
                   scsi_change_queue_depth().
-    unchecked_isa_dma
-		 - 1=>only use bottom 16 MB of ram (ISA DMA addressing
-                   restriction), 0=>can use full 32 bit (or better) DMA
-                   address space
    no_async_abort
 		 - 1=>Asynchronous aborts are not supported
 		 - 0=>Timed-out commands will be aborted asynchronously
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -547,6 +547,8 @@ static void bfq_pd_init(struct blkg_policy_data *pd)

 	entity->orig_weight = entity->weight = entity->new_weight = d->weight;
 	entity->my_sched_data = &bfqg->sched_data;
+	entity->last_bfqq_created = NULL;
+
 	bfqg->my_entity = entity; /*
 				   * the root_group's will be set to NULL
 				   * in bfq_init_queue()
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -197,6 +197,9 @@ struct bfq_entity {

 	/* flag, set if the entity is counted in groups_with_pending_reqs */
 	bool in_groups_with_pending_reqs;
+
+	/* last child queue of entity created (for non-leaf entities) */
+	struct bfq_queue *last_bfqq_created;
 };

 struct bfq_group;
@@ -230,6 +233,8 @@ struct bfq_ttime {
 struct bfq_queue {
 	/* reference counter */
 	int ref;
+	/* counter of references from other queues for delayed stable merge */
+	int stable_ref;
 	/* parent bfq_data */
 	struct bfq_data *bfqd;

@@ -365,6 +370,8 @@ struct bfq_queue {

 	unsigned long first_IO_time; /* time of first I/O for this queue */

+	unsigned long creation_time; /* when this queue is created */
+
 	/* max service rate measured so far */
 	u32 max_service_rate;

@@ -454,6 +461,11 @@ struct bfq_io_cq {
 	u64 saved_last_serv_time_ns;
 	unsigned int saved_inject_limit;
 	unsigned long saved_decrease_time_jif;
+
+	/* candidate queue for a stable merge (due to close creation time) */
+	struct bfq_queue *stable_merge_bfqq;
+
+	bool stably_merged;	/* non splittable if true */
 };

 /**
@@ -578,6 +590,9 @@ struct bfq_data {
 	/* bfqq owning the last completed rq */
 	struct bfq_queue *last_completed_rq_bfqq;

+	/* last bfqq created, among those in the root group */
+	struct bfq_queue *last_bfqq_created;
+
 	/* time of last transition from empty to non-empty (ns) */
 	u64 last_empty_occupied_ns;

--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1706,4 +1706,12 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)

 	if (bfqq->wr_coeff > 1)
 		bfqd->wr_busy_queues++;
+
+	/* Move bfqq to the head of the woken list of its waker */
+	if (!hlist_unhashed(&bfqq->woken_list_node) &&
+	    &bfqq->woken_list_node != bfqq->waker_bfqq->woken_list.first) {
+		hlist_del_init(&bfqq->woken_list_node);
+		hlist_add_head(&bfqq->woken_list_node,
+			       &bfqq->waker_bfqq->woken_list);
+	}
 }
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -204,7 +204,6 @@ bool bio_integrity_prep(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
-	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
 	void *buf;
 	unsigned long start, end;
 	unsigned int len, nr_pages;
@@ -238,7 +237,7 @@ bool bio_integrity_prep(struct bio *bio)

 	/* Allocate kernel buffer for protection data */
 	len = intervals * bi->tuple_size;
-	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
+	buf = kmalloc(len, GFP_NOIO);
 	status = BLK_STS_RESOURCE;
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
--- a/block/bio.c
+++ b/block/bio.c
@@ -493,20 +493,20 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
 }
 EXPORT_SYMBOL(bio_kmalloc);

-void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
+void zero_fill_bio(struct bio *bio)
 {
 	unsigned long flags;
 	struct bio_vec bv;
 	struct bvec_iter iter;

-	__bio_for_each_segment(bv, bio, iter, start) {
+	bio_for_each_segment(bv, bio, iter) {
 		char *data = bvec_kmap_irq(&bv, &flags);
 		memset(data, 0, bv.bv_len);
 		flush_dcache_page(bv.bv_page);
 		bvec_kunmap_irq(data, &flags);
 	}
 }
-EXPORT_SYMBOL(zero_fill_bio_iter);
+EXPORT_SYMBOL(zero_fill_bio);

 /**
 * bio_truncate - truncate the bio to small size of @new_size
@@ -1236,43 +1236,6 @@ void bio_copy_data(struct bio *dst, struct bio *src)
 }
 EXPORT_SYMBOL(bio_copy_data);

-/**
- * bio_list_copy_data - copy contents of data buffers from one chain of bios to
- * another
- * @src: source bio list
- * @dst: destination bio list
- *
- * Stops when it reaches the end of either the @src list or @dst list - that is,
- * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
- * bios).
- */
-void bio_list_copy_data(struct bio *dst, struct bio *src)
-{
-	struct bvec_iter src_iter = src->bi_iter;
-	struct bvec_iter dst_iter = dst->bi_iter;
-
-	while (1) {
-		if (!src_iter.bi_size) {
-			src = src->bi_next;
-			if (!src)
-				break;
-
-			src_iter = src->bi_iter;
-		}
-
-		if (!dst_iter.bi_size) {
-			dst = dst->bi_next;
-			if (!dst)
-				break;
-
-			dst_iter = dst->bi_iter;
-		}
-
-		bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
-	}
-}
-EXPORT_SYMBOL(bio_list_copy_data);
-
 void bio_free_pages(struct bio *bio)
 {
 	struct bio_vec *bvec;
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1161,10 +1161,8 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
 	}

 	/*
-	 * queue's settings related to segment counting like q->bounce_pfn
-	 * may differ from that of other stacking queues.
-	 * Recalculate it to check the request correctly on this queue's
-	 * limitation.
+	 * The queue settings related to segment counting may differ from the
+	 * original queue.
 	 */
 	rq->nr_phys_segments = blk_recalc_rq_segments(rq);
 	if (rq->nr_phys_segments > queue_max_segments(q)) {
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -987,10 +987,6 @@ static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct,
 		return;
 	}

-	/* rq_wait signal is always reliable, ignore user vrate_min */
-	if (rq_wait_pct > RQ_WAIT_BUSY_PCT)
-		vrate_min = VRATE_MIN;
-
 	/*
 	 * If vrate is out of bounds, apply clamp gradually as the
 	 * bounds can change abruptly.  Otherwise, apply busy_level
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -123,7 +123,6 @@ static int bio_uncopy_user(struct bio *bio)
 			bio_free_pages(bio);
 	}
 	kfree(bmd);
-	bio_put(bio);
 	return ret;
 }

@@ -132,7 +131,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
 {
 	struct bio_map_data *bmd;
 	struct page *page;
-	struct bio *bio, *bounce_bio;
+	struct bio *bio;
 	int i = 0, ret;
 	int nr_pages;
 	unsigned int len = iter->count;
@@ -181,7 +180,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,

 			i++;
 		} else {
-			page = alloc_page(rq->q->bounce_gfp | gfp_mask);
+			page = alloc_page(GFP_NOIO | gfp_mask);
 			if (!page) {
 				ret = -ENOMEM;
 				goto cleanup;
@@ -218,16 +217,9 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,

 	bio->bi_private = bmd;

-	bounce_bio = bio;
-	ret = blk_rq_append_bio(rq, &bounce_bio);
+	ret = blk_rq_append_bio(rq, bio);
 	if (ret)
 		goto cleanup;
-
-	/*
-	 * We link the bounce buffer in and could have to traverse it later, so
-	 * we have to get a ref to prevent it from being freed
-	 */
-	bio_get(bounce_bio);
 	return 0;
 cleanup:
 	if (!map_data)
@@ -242,7 +234,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 		gfp_t gfp_mask)
 {
 	unsigned int max_sectors = queue_max_hw_sectors(rq->q);
-	struct bio *bio, *bounce_bio;
+	struct bio *bio;
 	int ret;
 	int j;

@@ -304,49 +296,17 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 			break;
 	}

-	/*
-	 * Subtle: if we end up needing to bounce a bio, it would normally
-	 * disappear when its bi_end_io is run.  However, we need the original
-	 * bio for the unmap, so grab an extra reference to it
-	 */
-	bio_get(bio);
-
-	bounce_bio = bio;
-	ret = blk_rq_append_bio(rq, &bounce_bio);
+	ret = blk_rq_append_bio(rq, bio);
 	if (ret)
-		goto out_put_orig;
-
-	/*
-	 * We link the bounce buffer in and could have to traverse it
-	 * later, so we have to get a ref to prevent it from being freed
-	 */
-	bio_get(bounce_bio);
+		goto out_unmap;
 	return 0;

- out_put_orig:
-	bio_put(bio);
 out_unmap:
 	bio_release_pages(bio, false);
 	bio_put(bio);
 	return ret;
 }

-/**
- *	bio_unmap_user	-	unmap a bio
- *	@bio:		the bio being unmapped
- *
- *	Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
- *	process context.
- *
- *	bio_unmap_user() may sleep.
- */
-static void bio_unmap_user(struct bio *bio)
-{
-	bio_release_pages(bio, bio_data_dir(bio) == READ);
-	bio_put(bio);
-	bio_put(bio);
-}
-
 static void bio_invalidate_vmalloc_pages(struct bio *bio)
 {
 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
@@ -486,7 +446,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
 		if (bytes > len)
 			bytes = len;

-		page = alloc_page(q->bounce_gfp | gfp_mask);
+		page = alloc_page(GFP_NOIO | gfp_mask);
 		if (!page)
 			goto cleanup;

@@ -519,33 +479,24 @@ cleanup:
 * Append a bio to a passthrough request.  Only works if the bio can be merged
 * into the request based on the driver constraints.
 */
-int blk_rq_append_bio(struct request *rq, struct bio **bio)
+int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
-	struct bio *orig_bio = *bio;
 	struct bvec_iter iter;
 	struct bio_vec bv;
 	unsigned int nr_segs = 0;

-	blk_queue_bounce(rq->q, bio);
-
-	bio_for_each_bvec(bv, *bio, iter)
+	bio_for_each_bvec(bv, bio, iter)
 		nr_segs++;

 	if (!rq->bio) {
-		blk_rq_bio_prep(rq, *bio, nr_segs);
+		blk_rq_bio_prep(rq, bio, nr_segs);
 	} else {
-		if (!ll_back_merge_fn(rq, *bio, nr_segs)) {
-			if (orig_bio != *bio) {
-				bio_put(*bio);
-				*bio = orig_bio;
-			}
+		if (!ll_back_merge_fn(rq, bio, nr_segs))
 			return -EINVAL;
-		}
-
-		rq->biotail->bi_next = *bio;
-		rq->biotail = *bio;
-		rq->__data_len += (*bio)->bi_iter.bi_size;
-		bio_crypt_free_ctx(*bio);
+		rq->biotail->bi_next = bio;
+		rq->biotail = bio;
+		rq->__data_len += (bio)->bi_iter.bi_size;
+		bio_crypt_free_ctx(bio);
 	}

 	return 0;
@@ -566,12 +517,6 @@ EXPORT_SYMBOL(blk_rq_append_bio);
 *
 *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 *    still in process context.
- *
- *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
- *    before being submitted to the device, as pages mapped may be out of
- *    reach. It's the callers responsibility to make sure this happens. The
- *    original bio must be passed back in to blk_rq_unmap_user() for proper
- *    unmapping.
 */
 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 			struct rq_map_data *map_data,
@@ -588,6 +533,8 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,

 	if (map_data)
 		copy = true;
+	else if (blk_queue_may_bounce(q))
+		copy = true;
 	else if (iov_iter_alignment(iter) & align)
 		copy = true;
 	else if (queue_virt_boundary(q))
@@ -641,25 +588,21 @@ EXPORT_SYMBOL(blk_rq_map_user);
 */
 int blk_rq_unmap_user(struct bio *bio)
 {
-	struct bio *mapped_bio;
+	struct bio *next_bio;
 	int ret = 0, ret2;

 	while (bio) {
-		mapped_bio = bio;
-		if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
-			mapped_bio = bio->bi_private;
-
 		if (bio->bi_private) {
-			ret2 = bio_uncopy_user(mapped_bio);
+			ret2 = bio_uncopy_user(bio);
 			if (ret2 && !ret)
 				ret = ret2;
 		} else {
-			bio_unmap_user(mapped_bio);
+			bio_release_pages(bio, bio_data_dir(bio) == READ);
 		}

-		mapped_bio = bio;
+		next_bio = bio;
 		bio = bio->bi_next;
-		bio_put(mapped_bio);
+		bio_put(next_bio);
 	}

 	return ret;
@@ -684,7 +627,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 {
 	int reading = rq_data_dir(rq) == READ;
 	unsigned long addr = (unsigned long) kbuf;
-	struct bio *bio, *orig_bio;
+	struct bio *bio;
 	int ret;

 	if (len > (queue_max_hw_sectors(q) << 9))
@@ -692,7 +635,8 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (!len || !kbuf)
 		return -EINVAL;

-	if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf))
+	if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) ||
+	    blk_queue_may_bounce(q))
 		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 	else
 		bio = bio_map_kern(q, kbuf, len, gfp_mask);
@@ -703,14 +647,9 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	bio->bi_opf &= ~REQ_OP_MASK;
 	bio->bi_opf |= req_op(rq);

-	orig_bio = bio;
-	ret = blk_rq_append_bio(rq, &bio);
-	if (unlikely(ret)) {
-		/* request is too big */
-		bio_put(orig_bio);
-		return ret;
-	}
-
-	return 0;
+	ret = blk_rq_append_bio(rq, bio);
+	if (unlikely(ret))
+		bio_put(bio);
+	return ret;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -972,6 +972,14 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 {
 	struct elevator_type *e = q->elevator->type;

+	/*
+	 * If the parent debugfs directory has not been created yet, return;
+	 * We will be called again later on with appropriate parent debugfs
+	 * directory from blk_register_queue()
+	 */
+	if (!hctx->debugfs_dir)
+		return;
+
 	if (!e->hctx_debugfs_attrs)
 		return;

--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -373,8 +373,8 @@ static bool blk_mq_tagset_count_completed_rqs(struct request *rq,
 }

 /**
- * blk_mq_tagset_wait_completed_request - wait until all completed req's
- * complete funtion is run
+ * blk_mq_tagset_wait_completed_request - Wait until all scheduled request
+ * completions have finished.
 * @tagset:	Tag set to drain completed request
 *
 * Note: This function has to be run after all IO queues are shutdown
@@ -517,7 +517,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 	tags->nr_tags = total_tags;
 	tags->nr_reserved_tags = reserved_tags;

-	if (flags & BLK_MQ_F_TAG_HCTX_SHARED)
+	if (blk_mq_is_sbitmap_shared(flags))
 		return tags;

 	if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) {
@@ -529,7 +529,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,

 void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags)
 {
-	if (!(flags & BLK_MQ_F_TAG_HCTX_SHARED)) {
+	if (!blk_mq_is_sbitmap_shared(flags)) {
 		sbitmap_queue_free(tags->bitmap_tags);
 		sbitmap_queue_free(tags->breserved_tags);
 	}
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -361,11 +361,12 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)

 	if (e) {
 		/*
-		 * Flush requests are special and go directly to the
+		 * Flush/passthrough requests are special and go directly to the
 		 * dispatch list. Don't include reserved tags in the
 		 * limiting, as it isn't useful.
 		 */
 		if (!op_is_flush(data->cmd_flags) &&
+		    !blk_op_is_passthrough(data->cmd_flags) &&
 		    e->type->ops.limit_depth &&
 		    !(data->flags & BLK_MQ_REQ_RESERVED))
 			e->type->ops.limit_depth(data->cmd_flags, data);
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -7,7 +7,6 @@
 #include <linux/init.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/memblock.h>	/* for max_pfn/max_low_pfn */
 #include <linux/gcd.h>
 #include <linux/lcm.h>
 #include <linux/jiffies.h>
@@ -17,11 +16,6 @@
 #include "blk.h"
 #include "blk-wbt.h"

-unsigned long blk_max_low_pfn;
-EXPORT_SYMBOL(blk_max_low_pfn);
-
-unsigned long blk_max_pfn;
-
 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
 {
 	q->rq_timeout = timeout;
@@ -55,7 +49,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
-	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
+	lim->bounce = BLK_BOUNCE_NONE;
 	lim->alignment_offset = 0;
 	lim->io_opt = 0;
 	lim->misaligned = 0;
@@ -92,39 +86,16 @@ EXPORT_SYMBOL(blk_set_stacking_limits);
 /**
 * blk_queue_bounce_limit - set bounce buffer limit for queue
 * @q: the request queue for the device
- * @max_addr: the maximum address the device can handle
+ * @bounce: bounce limit to enforce
 *
 * Description:
- *    Different hardware can have different requirements as to what pages
- *    it can do I/O directly to. A low level driver can call
- *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
- *    buffers for doing I/O to pages residing above @max_addr.
+ *    Force bouncing for ISA DMA ranges or highmem.
+ *
+ *    DEPRECATED, don't use in new code.
 **/
-void blk_queue_bounce_limit(struct request_queue *q, u64 max_addr)
+void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
 {
-	unsigned long b_pfn = max_addr >> PAGE_SHIFT;
-	int dma = 0;
-
-	q->bounce_gfp = GFP_NOIO;
-#if BITS_PER_LONG == 64
-	/*
-	 * Assume anything <= 4GB can be handled by IOMMU.  Actually
-	 * some IOMMUs can handle everything, but I don't know of a
-	 * way to test this here.
-	 */
-	if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
-		dma = 1;
-	q->limits.bounce_pfn = max(max_low_pfn, b_pfn);
-#else
-	if (b_pfn < blk_max_low_pfn)
-		dma = 1;
-	q->limits.bounce_pfn = b_pfn;
-#endif
-	if (dma) {
-		init_emergency_isa_pool();
-		q->bounce_gfp = GFP_NOIO | GFP_DMA;
-		q->limits.bounce_pfn = b_pfn;
-	}
+	q->limits.bounce = bounce;
 }
 EXPORT_SYMBOL(blk_queue_bounce_limit);

@@ -547,7 +518,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 					b->max_write_zeroes_sectors);
 	t->max_zone_append_sectors = min(t->max_zone_append_sectors,
 					b->max_zone_append_sectors);
-	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
+	t->bounce = max(t->bounce, b->bounce);

 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
 					    b->seg_boundary_mask);
@@ -927,11 +898,3 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 	}
 }
 EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
-
-static int __init blk_settings_init(void)
-{
-	blk_max_low_pfn = max_low_pfn - 1;
-	blk_max_pfn = max_pfn - 1;
-	return 0;
-}
-subsys_initcall(blk_settings_init);
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -60,7 +60,7 @@ static ssize_t queue_var_store64(s64 *var, const char *page)

 static ssize_t queue_requests_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(q->nr_requests, (page));
+	return queue_var_show(q->nr_requests, page);
 }

 static ssize_t
@@ -264,6 +264,11 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_hw_sectors_kb, (page));
 }

+static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->limits.virt_boundary_mask, (page));
+}
+
 #define QUEUE_SYSFS_BIT_FNS(name, flag, neg)				\
 static ssize_t								\
 queue_##name##_show(struct request_queue *q, char *page)		\
@@ -610,6 +615,7 @@ QUEUE_RO_ENTRY(queue_fua, "fua");
 QUEUE_RO_ENTRY(queue_dax, "dax");
 QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
 QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
+QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");

 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
@@ -670,6 +676,7 @@ static struct attribute *queue_attrs[] = {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	&blk_throtl_sample_time_entry.attr,
 #endif
+	&queue_virt_boundary_mask_entry.attr,
 	NULL,
 };

--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -52,14 +52,6 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
 }
 EXPORT_SYMBOL_GPL(blk_zone_cond_str);

-static inline sector_t blk_zone_start(struct request_queue *q,
-				      sector_t sector)
-{
-	sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
-
-	return sector & ~zone_mask;
-}
-
 /*
 * Return true if a request is a write requests that needs zone write locking.
 */
--- a/block/blk.h
+++ b/block/blk.h
@@ -6,6 +6,7 @@
 #include <linux/blk-mq.h>
 #include <linux/part_stat.h>
 #include <linux/blk-crypto.h>
+#include <linux/memblock.h>	/* for max_pfn/max_low_pfn */
 #include <xen/xen.h>
 #include "blk-crypto-internal.h"
 #include "blk-mq.h"
@@ -311,18 +312,20 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
 #endif

-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
+void __blk_queue_bounce(struct request_queue *q, struct bio **bio);
+
+static inline bool blk_queue_may_bounce(struct request_queue *q)
 {
-	return 0;
+	return IS_ENABLED(CONFIG_BOUNCE) &&
+		q->limits.bounce == BLK_BOUNCE_HIGH &&
+		max_low_pfn >= max_pfn;
 }
+
 static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 {
+	if (unlikely(blk_queue_may_bounce(q) && bio_has_data(*bio)))
+		__blk_queue_bounce(q, bio);	
 }
-#endif /* CONFIG_BOUNCE */

 #ifdef CONFIG_BLK_CGROUP_IOLATENCY
 extern int blk_iolatency_init(struct request_queue *q);
@@ -346,7 +349,6 @@ char *disk_name(struct gendisk *hd, int partno, char *buf);
 #define ADDPART_FLAG_NONE	0
 #define ADDPART_FLAG_RAID	1
 #define ADDPART_FLAG_WHOLEDISK	2
-void delete_partition(struct block_device *part);
 int bdev_add_partition(struct block_device *bdev, int partno,
 		sector_t start, sector_t length);
 int bdev_del_partition(struct block_device *bdev, int partno);
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -18,7 +18,6 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
-#include <linux/memblock.h>
 #include <linux/printk.h>
 #include <asm/tlbflush.h>

@@ -29,7 +28,7 @@
 #define ISA_POOL_SIZE	16

 static struct bio_set bounce_bio_set, bounce_bio_split;
-static mempool_t page_pool, isa_page_pool;
+static mempool_t page_pool;

 static void init_bounce_bioset(void)
 {
@@ -49,11 +48,11 @@ static void init_bounce_bioset(void)
 	bounce_bs_setup = true;
 }

-#if defined(CONFIG_HIGHMEM)
 static __init int init_emergency_pool(void)
 {
 	int ret;
-#if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
+
+#ifndef CONFIG_MEMORY_HOTPLUG
 	if (max_pfn <= max_low_pfn)
 		return 0;
 #endif
@@ -67,9 +66,7 @@ static __init int init_emergency_pool(void)
 }

 __initcall(init_emergency_pool);
-#endif

-#ifdef CONFIG_HIGHMEM
 /*
 * highmem version, map in to vec
 */
@@ -82,48 +79,6 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
 	kunmap_atomic(vto);
 }

-#else /* CONFIG_HIGHMEM */
-
-#define bounce_copy_vec(to, vfrom)	\
-	memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
-
-#endif /* CONFIG_HIGHMEM */
-
-/*
- * allocate pages in the DMA region for the ISA pool
- */
-static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
-{
-	return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
-}
-
-static DEFINE_MUTEX(isa_mutex);
-
-/*
- * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
- * as the max address, so check if the pool has already been created.
- */
-int init_emergency_isa_pool(void)
-{
-	int ret;
-
-	mutex_lock(&isa_mutex);
-
-	if (mempool_initialized(&isa_page_pool)) {
-		mutex_unlock(&isa_mutex);
-		return 0;
-	}
-
-	ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
-			   mempool_free_pages, (void *) 0);
-	BUG_ON(ret);
-
-	pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
-	init_bounce_bioset();
-	mutex_unlock(&isa_mutex);
-	return 0;
-}
-
 /*
 * Simple bounce buffer support for highmem pages. Depending on the
 * queue gfp mask set, *to may or may not be a highmem page. kmap it
@@ -159,7 +114,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 	}
 }

-static void bounce_end_io(struct bio *bio, mempool_t *pool)
+static void bounce_end_io(struct bio *bio)
 {
 	struct bio *bio_orig = bio->bi_private;
 	struct bio_vec *bvec, orig_vec;
@@ -173,7 +128,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 		orig_vec = bio_iter_iovec(bio_orig, orig_iter);
 		if (bvec->bv_page != orig_vec.bv_page) {
 			dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
-			mempool_free(bvec->bv_page, pool);
+			mempool_free(bvec->bv_page, &page_pool);
 		}
 		bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len);
 	}
@@ -185,33 +140,17 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)

 static void bounce_end_io_write(struct bio *bio)
 {
-	bounce_end_io(bio, &page_pool);
+	bounce_end_io(bio);
 }

-static void bounce_end_io_write_isa(struct bio *bio)
-{
-
-	bounce_end_io(bio, &isa_page_pool);
-}
-
-static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
+static void bounce_end_io_read(struct bio *bio)
 {
 	struct bio *bio_orig = bio->bi_private;

 	if (!bio->bi_status)
 		copy_to_high_bio_irq(bio_orig, bio);

-	bounce_end_io(bio, pool);
-}
-
-static void bounce_end_io_read(struct bio *bio)
-{
-	__bounce_end_io_read(bio, &page_pool);
-}
-
-static void bounce_end_io_read_isa(struct bio *bio)
-{
-	__bounce_end_io_read(bio, &isa_page_pool);
+	bounce_end_io(bio);
 }

 static struct bio *bounce_clone_bio(struct bio *bio_src)
@@ -241,12 +180,8 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
 	 *    asking for trouble and would force extra work on
 	 *    __bio_clone_fast() anyways.
 	 */
-	if (bio_is_passthrough(bio_src))
-		bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL,
-				  bio_segments(bio_src));
-	else
-		bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
-				       &bounce_bio_set);
+	bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
+			       &bounce_bio_set);
 	bio->bi_bdev		= bio_src->bi_bdev;
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
@@ -287,8 +222,7 @@ err_put:
 	return NULL;
 }

-static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
-			       mempool_t *pool)
+void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 {
 	struct bio *bio;
 	int rw = bio_data_dir(*bio_orig);
@@ -301,14 +235,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_VECS)
 			sectors += from.bv_len >> 9;
-		if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
+		if (PageHighMem(from.bv_page))
 			bounce = true;
 	}
 	if (!bounce)
 		return;

-	if (!bio_is_passthrough(*bio_orig) &&
-	    sectors < bio_sectors(*bio_orig)) {
+	if (sectors < bio_sectors(*bio_orig)) {
 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
 		bio_chain(bio, *bio_orig);
 		submit_bio_noacct(*bio_orig);
@@ -324,10 +257,10 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	for (i = 0, to = bio->bi_io_vec; i < bio->bi_vcnt; to++, i++) {
 		struct page *page = to->bv_page;

-		if (page_to_pfn(page) <= q->limits.bounce_pfn)
+		if (!PageHighMem(page))
 			continue;

-		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
+		to->bv_page = mempool_alloc(&page_pool, GFP_NOIO);
 		inc_zone_page_state(to->bv_page, NR_BOUNCE);

 		if (rw == WRITE) {
@@ -346,46 +279,11 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,

 	bio->bi_flags |= (1 << BIO_BOUNCED);

-	if (pool == &page_pool) {
+	if (rw == READ)
+		bio->bi_end_io = bounce_end_io_read;
+	else
 		bio->bi_end_io = bounce_end_io_write;
-		if (rw == READ)
-			bio->bi_end_io = bounce_end_io_read;
-	} else {
-		bio->bi_end_io = bounce_end_io_write_isa;
-		if (rw == READ)
-			bio->bi_end_io = bounce_end_io_read_isa;
-	}

 	bio->bi_private = *bio_orig;
 	*bio_orig = bio;
 }
-
-void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
-{
-	mempool_t *pool;
-
-	/*
-	 * Data-less bio, nothing to bounce
-	 */
-	if (!bio_has_data(*bio_orig))
-		return;
-
-	/*
-	 * for non-isa bounce case, just check if the bounce pfn is equal
-	 * to or bigger than the highest pfn in the system -- in that case,
-	 * don't waste time iterating over bio segments
-	 */
-	if (!(q->bounce_gfp & GFP_DMA)) {
-		if (q->limits.bounce_pfn >= blk_max_pfn)
-			return;
-		pool = &page_pool;
-	} else {
-		BUG_ON(!mempool_initialized(&isa_page_pool));
-		pool = &isa_page_pool;
-	}
-
-	/*
-	 * slow path
-	 */
-	__blk_queue_bounce(q, bio_orig, pool);
-}
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -621,7 +621,8 @@ static inline bool elv_support_iosched(struct request_queue *q)
 */
 static struct elevator_type *elevator_get_default(struct request_queue *q)
 {
-	if (q->nr_hw_queues != 1)
+	if (q->nr_hw_queues != 1 &&
+			!blk_mq_is_sbitmap_shared(q->tag_set->flags))
 		return NULL;

 	return elevator_get(q, "mq-deadline", false);
--- a/Show More
+++ b/Show More