Merge tag 'block-5.12-2021-03-12-v2' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Mostly just random fixes all over the map.

  The only odd-one-out change is finally getting the rename of
  BIO_MAX_PAGES to BIO_MAX_VECS done. This should've been done with the
  multipage bvec change, but it's been left.

  Do it now to avoid hassles around changes piling up for the next merge
  window.

  Summary:

   - NVMe pull request:
      - one more quirk (Dmitry Monakhov)
      - fix max_zone_append_sectors initialization (Chaitanya Kulkarni)
      - nvme-fc reset/create race fix (James Smart)
      - fix status code on aborts/resets (Hannes Reinecke)
      - fix the CSS check for ZNS namespaces (Chaitanya Kulkarni)
      - fix a use after free in a debug printk in nvme-rdma (Lv Yunlong)

   - Follow-up NVMe error fix for NULL 'id' (Christoph)

   - Fixup for the bd_size_lock being IRQ safe, now that the offending
     driver has been dropped (Damien).

   - rsxx probe failure error return (Jia-Ju)

   - umem probe failure error return (Wei)

   - s390/dasd unbind fixes (Stefan)

   - blk-cgroup stats summing fix (Xunlei)

   - zone reset handling fix (Damien)

   - Rename BIO_MAX_PAGES to BIO_MAX_VECS (Christoph)

   - Suppress uevent trigger for hidden devices (Daniel)

   - Fix handling of discard on busy device (Jan)

   - Fix stale cache issue with zone reset (Shin'ichiro)"

* tag 'block-5.12-2021-03-12-v2' of git://git.kernel.dk/linux-block:
  nvme: fix the nsid value to print in nvme_validate_or_alloc_ns
  block: Discard page cache of zone reset target range
  block: Suppress uevent for hidden device when removed
  block: rename BIO_MAX_PAGES to BIO_MAX_VECS
  nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a Samsung PM1725a
  nvme-rdma: Fix a use after free in nvmet_rdma_write_data_done
  nvme-core: check ctrl css before setting up zns
  nvme-fc: fix racing controller reset and create association
  nvme-fc: return NVME_SC_HOST_ABORTED_CMD when a command has been aborted
  nvme-fc: set NVME_REQ_CANCELLED in nvme_fc_terminate_exchange()
  nvme: add NVME_REQ_CANCELLED flag in nvme_cancel_request()
  nvme: simplify error logic in nvme_validate_ns()
  nvme: set max_zone_append_sectors nvme_revalidate_zones
  block: rsxx: fix error return code of rsxx_pci_probe()
  block: Fix REQ_OP_ZONE_RESET_ALL handling
  umem: fix error return code in mm_pci_probe()
  blk-cgroup: Fix the recursive blkg rwstat
  s390/dasd: fix hanging IO request during DASD driver unbind
  s390/dasd: fix hanging DASD driver unbind
  block: Try to handle busy underlying device on discard
This commit is contained in:
Linus Torvalds
2021-03-12 13:25:49 -08:00
42 changed files with 138 additions and 79 deletions

View File

@@ -33,7 +33,7 @@ static struct biovec_slab {
{ .nr_vecs = 16, .name = "biovec-16" },
{ .nr_vecs = 64, .name = "biovec-64" },
{ .nr_vecs = 128, .name = "biovec-128" },
{ .nr_vecs = BIO_MAX_PAGES, .name = "biovec-max" },
{ .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
};
static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
@@ -46,7 +46,7 @@ static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
return &bvec_slabs[1];
case 65 ... 128:
return &bvec_slabs[2];
case 129 ... BIO_MAX_PAGES:
case 129 ... BIO_MAX_VECS:
return &bvec_slabs[3];
default:
BUG();
@@ -151,9 +151,9 @@ out:
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
{
BIO_BUG_ON(nr_vecs > BIO_MAX_PAGES);
BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
if (nr_vecs == BIO_MAX_PAGES)
if (nr_vecs == BIO_MAX_VECS)
mempool_free(bv, pool);
else if (nr_vecs > BIO_INLINE_VECS)
kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
@@ -186,15 +186,15 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
/*
* Try a slab allocation first for all smaller allocations. If that
* fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
* The mempool is sized to handle up to BIO_MAX_PAGES entries.
* The mempool is sized to handle up to BIO_MAX_VECS entries.
*/
if (*nr_vecs < BIO_MAX_PAGES) {
if (*nr_vecs < BIO_MAX_VECS) {
struct bio_vec *bvl;
bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
return bvl;
*nr_vecs = BIO_MAX_PAGES;
*nr_vecs = BIO_MAX_VECS;
}
return mempool_alloc(pool, gfp_mask);

View File

@@ -109,6 +109,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
lockdep_assert_held(&blkg->q->queue_lock);
memset(sum, 0, sizeof(*sum));
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
struct blkg_rwstat *rwstat;
@@ -122,7 +123,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
rwstat = (void *)pos_blkg + off;
for (i = 0; i < BLKG_RWSTAT_NR; i++)
sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i);
}
rcu_read_unlock();
}

View File

@@ -219,7 +219,7 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
bio_for_each_segment(bv, bio, iter) {
num_sectors += bv.bv_len >> SECTOR_SHIFT;
if (++i == BIO_MAX_PAGES)
if (++i == BIO_MAX_VECS)
break;
}
if (num_sectors < bio_sectors(bio)) {

View File

@@ -296,7 +296,7 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
return min(pages, (sector_t)BIO_MAX_PAGES);
return min(pages, (sector_t)BIO_MAX_VECS);
}
static int __blkdev_issue_zero_pages(struct block_device *bdev,

View File

@@ -249,7 +249,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (!iov_iter_count(iter))
return -EINVAL;
bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
if (!bio)
return -ENOMEM;
bio->bi_opf |= req_op(rq);

View File

@@ -240,7 +240,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
*/
if (op == REQ_OP_ZONE_RESET &&
blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
break;
}
@@ -318,6 +318,22 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
return 0;
}
static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
const struct blk_zone_range *zrange)
{
loff_t start, end;
if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
/* Out of range */
return -EINVAL;
start = zrange->sector << SECTOR_SHIFT;
end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
return truncate_bdev_range(bdev, mode, start, end);
}
/*
* BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
* Called from blkdev_ioctl.
@@ -329,6 +345,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
struct request_queue *q;
struct blk_zone_range zrange;
enum req_opf op;
int ret;
if (!argp)
return -EINVAL;
@@ -352,6 +369,11 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
switch (cmd) {
case BLKRESETZONE:
op = REQ_OP_ZONE_RESET;
/* Invalidate the page cache, including dirty pages. */
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
if (ret)
return ret;
break;
case BLKOPENZONE:
op = REQ_OP_ZONE_OPEN;
@@ -366,8 +388,20 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
return -ENOTTY;
}
return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
GFP_KERNEL);
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
GFP_KERNEL);
/*
* Invalidate the page cache again for zone reset: writes can only be
* direct for zoned devices so concurrent writes would not add any page
* to the page cache after/during reset. The page cache may be filled
* again due to concurrent reads though and dropping the pages for
* these is fine.
*/
if (!ret && cmd == BLKRESETZONE)
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
return ret;
}
static inline unsigned long *blk_alloc_zone_bitmap(int node,

View File

@@ -229,10 +229,10 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
* - The point of cloning the biovec is to produce a bio with a biovec
* the caller can modify: bi_idx and bi_bvec_done should be 0.
*
* - The original bio could've had more than BIO_MAX_PAGES biovecs; if
* - The original bio could've had more than BIO_MAX_VECS biovecs; if
* we tried to clone the whole thing bio_alloc_bioset() would fail.
* But the clone should succeed as long as the number of biovecs we
* actually need to allocate is fewer than BIO_MAX_PAGES.
* actually need to allocate is fewer than BIO_MAX_VECS.
*
* - Lastly, bi_vcnt should not be looked at or relied upon by code
* that does not own the bio - reason being drivers don't use it for
@@ -299,7 +299,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
int sectors = 0;
bio_for_each_segment(from, *bio_orig, iter) {
if (i++ < BIO_MAX_PAGES)
if (i++ < BIO_MAX_VECS)
sectors += from.bv_len >> 9;
if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
bounce = true;

View File

@@ -534,10 +534,8 @@ static void register_disk(struct device *parent, struct gendisk *disk,
kobject_create_and_add("holders", &ddev->kobj);
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
if (disk->flags & GENHD_FL_HIDDEN) {
dev_set_uevent_suppress(ddev, 0);
if (disk->flags & GENHD_FL_HIDDEN)
return;
}
disk_scan_partitions(disk);

View File

@@ -1324,7 +1324,7 @@ struct bm_extent {
* A followup commit may allow even bigger BIO sizes,
* once we thought that through. */
#define DRBD_MAX_BIO_SIZE (1U << 20)
#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT)
#if DRBD_MAX_BIO_SIZE > (BIO_MAX_VECS << PAGE_SHIFT)
#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
#endif
#define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */

View File

@@ -871,6 +871,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
if (!card->event_wq) {
dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
st = -ENOMEM;
goto failed_event_handler;
}

View File

@@ -877,6 +877,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
if (card->mm_pages[0].desc == NULL ||
card->mm_pages[1].desc == NULL) {
dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n");
ret = -ENOMEM;
goto failed_alloc;
}
reset_page(&card->mm_pages[0]);
@@ -888,8 +889,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
spin_lock_init(&card->lock);
card->queue = blk_alloc_queue(NUMA_NO_NODE);
if (!card->queue)
if (!card->queue) {
ret = -ENOMEM;
goto failed_alloc;
}
tasklet_init(&card->tasklet, process_page, (unsigned long)card);

View File

@@ -965,7 +965,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
q->limits.max_hw_sectors = UINT_MAX;
q->limits.max_sectors = UINT_MAX;
q->limits.max_segment_size = UINT_MAX;
q->limits.max_segments = BIO_MAX_PAGES;
q->limits.max_segments = BIO_MAX_VECS;
blk_queue_max_discard_sectors(q, UINT_MAX);
q->limits.discard_granularity = 512;
q->limits.io_min = block_size;

View File

@@ -229,7 +229,7 @@ static DEFINE_SPINLOCK(dm_crypt_clients_lock);
static unsigned dm_crypt_clients_n = 0;
static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT 2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16)
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16)
static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -3246,7 +3246,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
ARCH_KMALLOC_MINALIGN);
ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
ret = mempool_init(&cc->page_pool, BIO_MAX_VECS, crypt_page_alloc, crypt_page_free, cc);
if (ret) {
ti->error = "Cannot allocate page mempool";
goto bad;
@@ -3373,9 +3373,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
/*
* Check if bio is too large, split as needed.
*/
if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) &&
(bio_data_dir(bio) == WRITE || cc->on_disk_tag_size))
dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT));
/*
* Ensure that bio is a multiple of internal sector encryption size

View File

@@ -1892,10 +1892,10 @@ restart:
list_add(&g->lru, &wbl.list);
wbl.size++;
g->write_in_progress = true;
g->wc_list_contiguous = BIO_MAX_PAGES;
g->wc_list_contiguous = BIO_MAX_VECS;
f = g;
e->wc_list_contiguous++;
if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) {
if (unlikely(e->wc_list_contiguous == BIO_MAX_VECS)) {
if (unlikely(wc->writeback_all)) {
next_node = rb_next(&f->rb_node);
if (likely(next_node))

View File

@@ -735,7 +735,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
static struct bio *r5l_bio_alloc(struct r5l_log *log)
{
struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs);
struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio_set_dev(bio, log->rdev->bdev);
@@ -1634,7 +1634,7 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
{
struct page *page;
ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs);
ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs);
if (!ctx->ra_bio)
return -ENOMEM;

View File

@@ -496,7 +496,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
struct bio *prev = bio;
bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS,
&ppl_conf->bs);
bio->bi_opf = prev->bi_opf;
bio->bi_write_hint = prev->bi_write_hint;

View File

@@ -380,6 +380,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
return true;
nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
blk_mq_complete_request(req);
return true;
}
@@ -1440,7 +1441,7 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
goto out_free_id;
}
error = -ENODEV;
error = NVME_SC_INVALID_NS | NVME_SC_DNR;
if ((*id)->ncap == 0) /* namespace not allocated or attached */
goto out_free_id;
@@ -4038,7 +4039,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
{
struct nvme_id_ns *id;
int ret = -ENODEV;
int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
if (test_bit(NVME_NS_DEAD, &ns->flags))
goto out;
@@ -4047,7 +4048,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
if (ret)
goto out;
ret = -ENODEV;
ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
if (!nvme_ns_ids_equal(&ns->head->ids, ids)) {
dev_err(ns->ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
@@ -4065,7 +4066,7 @@ out:
*
* TODO: we should probably schedule a delayed retry here.
*/
if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR)))
if (ret > 0 && (ret & NVME_SC_DNR))
nvme_ns_remove(ns);
}
@@ -4095,6 +4096,12 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nsid);
break;
}
if (!nvme_multi_css(ctrl)) {
dev_warn(ctrl->device,
"command set not reported for nsid: %d\n",
nsid);
break;
}
nvme_alloc_ns(ctrl, nsid, &ids);
break;
default:

View File

@@ -1956,7 +1956,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
if (opstate == FCPOP_STATE_ABORTED)
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1);
else if (freq->status) {
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
dev_info(ctrl->ctrl.device,
@@ -2055,7 +2055,7 @@ done:
nvme_fc_complete_rq(rq);
check_error:
if (terminate_assoc)
if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
queue_work(nvme_reset_wq, &ctrl->ioerr_work);
}
@@ -2443,6 +2443,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
op->nreq.flags |= NVME_REQ_CANCELLED;
__nvme_fc_abort_op(ctrl, op);
return true;
}

View File

@@ -3246,6 +3246,7 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_DISABLE_WRITE_ZEROES|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },

View File

@@ -9,7 +9,13 @@
int nvme_revalidate_zones(struct nvme_ns *ns)
{
return blk_revalidate_disk_zones(ns->disk, NULL);
struct request_queue *q = ns->queue;
int ret;
ret = blk_revalidate_disk_zones(ns->disk, NULL);
if (!ret)
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
return ret;
}
static int nvme_set_max_append(struct nvme_ctrl *ctrl)
@@ -107,7 +113,6 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
free_data:
kfree(id);
return status;

Some files were not shown because too many files have changed in this diff Show More