From f1ba4e674febf5c0e9f725a75ca43b7722b4e963 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Thu, 30 Mar 2023 17:49:52 -0400 Subject: [PATCH 1/7] virtio-blk: fix to match virtio spec The merged patch series to support zoned block devices in virtio-blk is not the most up to date version. The merged patch can be found at https://lore.kernel.org/linux-block/20221016034127.330942-3-dmitry.fomichev@wdc.com/ but the latest and reviewed version is https://lore.kernel.org/linux-block/20221110053952.3378990-3-dmitry.fomichev@wdc.com/ The reason is apparently that the correct mailing lists and maintainers were not copied. The differences between the two are mostly cleanups, but there is one change that is very important in terms of compatibility with the approved virtio-zbd specification. Before it was approved, the OASIS virtio spec had a change in VIRTIO_BLK_T_ZONE_APPEND request layout that is not reflected in the current virtio-blk driver code. In the running code, the status is the first byte of the in-header that is followed by some pad bytes and the u64 that carries the sector at which the data has been written to the zone back to the driver, aka the append sector. This layout turned out to be problematic for implementing in QEMU and the request status byte has been eventually made the last byte of the in-header. The current code doesn't expect that and this causes the append sector value always come as zero to the block layer. This needs to be fixed ASAP. Fixes: 95bfec41bd3d ("virtio-blk: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Stefan Hajnoczi Reviewed-by: Damien Le Moal Message-Id: <20230330214953.1088216-2-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 238 +++++++++++++++++++++----------- include/uapi/linux/virtio_blk.h | 18 +-- 2 files changed, 166 insertions(+), 90 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2723eede6f21..4f0dbbb3d4a5 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -96,16 +96,14 @@ struct virtblk_req { /* * The zone append command has an extended in header. - * The status field in zone_append_in_hdr must have - * the same offset in virtblk_req as the non-zoned - * status field above. + * The status field in zone_append_in_hdr must always + * be the last byte. */ struct { + __virtio64 sector; u8 status; - u8 reserved[7]; - __le64 append_sector; - } zone_append_in_hdr; - }; + } zone_append; + } in_hdr; size_t in_hdr_len; @@ -154,7 +152,7 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) sgs[num_out + num_in++] = vbr->sg_table.sgl; } - sg_init_one(&in_hdr, &vbr->status, vbr->in_hdr_len); + sg_init_one(&in_hdr, &vbr->in_hdr.status, vbr->in_hdr_len); sgs[num_out + num_in++] = &in_hdr; return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); @@ -242,11 +240,14 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, struct virtblk_req *vbr) { - size_t in_hdr_len = sizeof(vbr->status); + size_t in_hdr_len = sizeof(vbr->in_hdr.status); bool unmap = false; u32 type; u64 sector = 0; + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && op_is_zone_mgmt(req_op(req))) + return BLK_STS_NOTSUPP; + /* Set fields for all request types */ vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); @@ -287,7 +288,7 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, case REQ_OP_ZONE_APPEND: type = VIRTIO_BLK_T_ZONE_APPEND; sector = blk_rq_pos(req); - in_hdr_len = sizeof(vbr->zone_append_in_hdr); + in_hdr_len = sizeof(vbr->in_hdr.zone_append); break; case REQ_OP_ZONE_RESET: type = VIRTIO_BLK_T_ZONE_RESET; @@ -297,7 +298,10 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, type = VIRTIO_BLK_T_ZONE_RESET_ALL; break; case REQ_OP_DRV_IN: - /* Out header already filled in, nothing to do */ + /* + * Out header has already been prepared by the caller (virtblk_get_id() + * or virtblk_submit_zone_report()), nothing to do here. + */ return 0; default: WARN_ON_ONCE(1); @@ -318,16 +322,28 @@ static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, return 0; } +/* + * The status byte is always the last byte of the virtblk request + * in-header. This helper fetches its value for all in-header formats + * that are currently defined. + */ +static inline u8 virtblk_vbr_status(struct virtblk_req *vbr) +{ + return *((u8 *)&vbr->in_hdr + vbr->in_hdr_len - 1); +} + static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - blk_status_t status = virtblk_result(vbr->status); + blk_status_t status = virtblk_result(virtblk_vbr_status(vbr)); + struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; virtblk_unmap_data(req, vbr); virtblk_cleanup_cmd(req); if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = le64_to_cpu(vbr->zone_append_in_hdr.append_sector); + req->__sector = virtio64_to_cpu(vblk->vdev, + vbr->in_hdr.zone_append.sector); blk_mq_end_request(req, status); } @@ -355,7 +371,7 @@ static int virtblk_handle_req(struct virtio_blk_vq *vq, if (likely(!blk_should_fake_timeout(req->q)) && !blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, vbr->status, + !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) virtblk_request_done(req); req_done++; @@ -550,7 +566,6 @@ static void virtio_queue_rqs(struct request **rqlist) #ifdef CONFIG_BLK_DEV_ZONED static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, unsigned int nr_zones, - unsigned int zone_sectors, size_t *buflen) { struct request_queue *q = vblk->disk->queue; @@ -558,7 +573,7 @@ static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk, void *buf; nr_zones = min_t(unsigned int, nr_zones, - get_capacity(vblk->disk) >> ilog2(zone_sectors)); + get_capacity(vblk->disk) >> ilog2(vblk->zone_sectors)); bufsize = sizeof(struct virtio_blk_zone_report) + nr_zones * sizeof(struct virtio_blk_zone_descriptor); @@ -592,7 +607,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, return PTR_ERR(req); vbr = blk_mq_rq_to_pdu(req); - vbr->in_hdr_len = sizeof(vbr->status); + vbr->in_hdr_len = sizeof(vbr->in_hdr.status); vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_ZONE_REPORT); vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector); @@ -601,7 +616,7 @@ static int virtblk_submit_zone_report(struct virtio_blk *vblk, goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(vbr->status)); + err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status)); out: blk_mq_free_request(req); return err; @@ -609,29 +624,72 @@ out: static int virtblk_parse_zone(struct virtio_blk *vblk, struct virtio_blk_zone_descriptor *entry, - unsigned int idx, unsigned int zone_sectors, - report_zones_cb cb, void *data) + unsigned int idx, report_zones_cb cb, void *data) { struct blk_zone zone = { }; - if (entry->z_type != VIRTIO_BLK_ZT_SWR && - entry->z_type != VIRTIO_BLK_ZT_SWP && - entry->z_type != VIRTIO_BLK_ZT_CONV) { - dev_err(&vblk->vdev->dev, "invalid zone type %#x\n", - entry->z_type); - return -EINVAL; + zone.start = virtio64_to_cpu(vblk->vdev, entry->z_start); + if (zone.start + vblk->zone_sectors <= get_capacity(vblk->disk)) + zone.len = vblk->zone_sectors; + else + zone.len = get_capacity(vblk->disk) - zone.start; + zone.capacity = virtio64_to_cpu(vblk->vdev, entry->z_cap); + zone.wp = virtio64_to_cpu(vblk->vdev, entry->z_wp); + + switch (entry->z_type) { + case VIRTIO_BLK_ZT_SWR: + zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; + break; + case VIRTIO_BLK_ZT_SWP: + zone.type = BLK_ZONE_TYPE_SEQWRITE_PREF; + break; + case VIRTIO_BLK_ZT_CONV: + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + break; + default: + dev_err(&vblk->vdev->dev, "zone %llu: invalid type %#x\n", + zone.start, entry->z_type); + return -EIO; } - zone.type = entry->z_type; - zone.cond = entry->z_state; - zone.len = zone_sectors; - zone.capacity = le64_to_cpu(entry->z_cap); - zone.start = le64_to_cpu(entry->z_start); - if (zone.cond == BLK_ZONE_COND_FULL) + switch (entry->z_state) { + case VIRTIO_BLK_ZS_EMPTY: + zone.cond = BLK_ZONE_COND_EMPTY; + break; + case VIRTIO_BLK_ZS_CLOSED: + zone.cond = BLK_ZONE_COND_CLOSED; + break; + case VIRTIO_BLK_ZS_FULL: + zone.cond = BLK_ZONE_COND_FULL; zone.wp = zone.start + zone.len; - else - zone.wp = le64_to_cpu(entry->z_wp); + break; + case VIRTIO_BLK_ZS_EOPEN: + zone.cond = BLK_ZONE_COND_EXP_OPEN; + break; + case VIRTIO_BLK_ZS_IOPEN: + zone.cond = BLK_ZONE_COND_IMP_OPEN; + break; + case VIRTIO_BLK_ZS_NOT_WP: + zone.cond = BLK_ZONE_COND_NOT_WP; + break; + case VIRTIO_BLK_ZS_RDONLY: + zone.cond = BLK_ZONE_COND_READONLY; + zone.wp = ULONG_MAX; + break; + case VIRTIO_BLK_ZS_OFFLINE: + zone.cond = BLK_ZONE_COND_OFFLINE; + zone.wp = ULONG_MAX; + break; + default: + dev_err(&vblk->vdev->dev, "zone %llu: invalid condition %#x\n", + zone.start, entry->z_state); + return -EIO; + } + /* + * The callback below checks the validity of the reported + * entry data, no need to further validate it here. + */ return cb(&zone, idx, data); } @@ -641,39 +699,47 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector, { struct virtio_blk *vblk = disk->private_data; struct virtio_blk_zone_report *report; - unsigned int zone_sectors = vblk->zone_sectors; - unsigned int nz, i; - int ret, zone_idx = 0; + unsigned long long nz, i; size_t buflen; + unsigned int zone_idx = 0; + int ret; if (WARN_ON_ONCE(!vblk->zone_sectors)) return -EOPNOTSUPP; - report = virtblk_alloc_report_buffer(vblk, nr_zones, - zone_sectors, &buflen); + report = virtblk_alloc_report_buffer(vblk, nr_zones, &buflen); if (!report) return -ENOMEM; + mutex_lock(&vblk->vdev_mutex); + + if (!vblk->vdev) { + ret = -ENXIO; + goto fail_report; + } + while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) { memset(report, 0, buflen); ret = virtblk_submit_zone_report(vblk, (char *)report, buflen, sector); - if (ret) { - if (ret > 0) - ret = -EIO; - goto out_free; - } - nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); + if (ret) + goto fail_report; + + nz = min_t(u64, virtio64_to_cpu(vblk->vdev, report->nr_zones), + nr_zones); if (!nz) break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { ret = virtblk_parse_zone(vblk, &report->zones[i], - zone_idx, zone_sectors, cb, data); + zone_idx, cb, data); if (ret) - goto out_free; - sector = le64_to_cpu(report->zones[i].z_start) + zone_sectors; + goto fail_report; + + sector = virtio64_to_cpu(vblk->vdev, + report->zones[i].z_start) + + vblk->zone_sectors; zone_idx++; } } @@ -682,7 +748,8 @@ static int virtblk_report_zones(struct gendisk *disk, sector_t sector, ret = zone_idx; else ret = -EINVAL; -out_free: +fail_report: + mutex_unlock(&vblk->vdev_mutex); kvfree(report); return ret; } @@ -691,20 +758,28 @@ static void virtblk_revalidate_zones(struct virtio_blk *vblk) { u8 model; - if (!vblk->zone_sectors) - return; - virtio_cread(vblk->vdev, struct virtio_blk_config, zoned.model, &model); - if (!blk_revalidate_disk_zones(vblk->disk, NULL)) - set_capacity_and_notify(vblk->disk, 0); + switch (model) { + default: + dev_err(&vblk->vdev->dev, "unknown zone model %d\n", model); + fallthrough; + case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + disk_set_zoned(vblk->disk, BLK_ZONED_NONE); + return; + case VIRTIO_BLK_Z_HM: + WARN_ON_ONCE(!vblk->zone_sectors); + if (!blk_revalidate_disk_zones(vblk->disk, NULL)) + set_capacity_and_notify(vblk->disk, 0); + } } static int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { - u32 v; + u32 v, wg; u8 model; int ret; @@ -713,16 +788,11 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, switch (model) { case VIRTIO_BLK_Z_NONE: + case VIRTIO_BLK_Z_HA: + /* Present the host-aware device as non-zoned */ return 0; case VIRTIO_BLK_Z_HM: break; - case VIRTIO_BLK_Z_HA: - /* - * Present the host-aware device as a regular drive. - * TODO It is possible to add an option to make it appear - * in the system as a zoned drive. - */ - return 0; default: dev_err(&vdev->dev, "unsupported zone model %d\n", model); return -EINVAL; @@ -735,32 +805,31 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, virtio_cread(vdev, struct virtio_blk_config, zoned.max_open_zones, &v); - disk_set_max_open_zones(vblk->disk, le32_to_cpu(v)); - - dev_dbg(&vdev->dev, "max open zones = %u\n", le32_to_cpu(v)); + disk_set_max_open_zones(vblk->disk, v); + dev_dbg(&vdev->dev, "max open zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, zoned.max_active_zones, &v); - disk_set_max_active_zones(vblk->disk, le32_to_cpu(v)); - dev_dbg(&vdev->dev, "max active zones = %u\n", le32_to_cpu(v)); + disk_set_max_active_zones(vblk->disk, v); + dev_dbg(&vdev->dev, "max active zones = %u\n", v); virtio_cread(vdev, struct virtio_blk_config, - zoned.write_granularity, &v); - if (!v) { + zoned.write_granularity, &wg); + if (!wg) { dev_warn(&vdev->dev, "zero write granularity reported\n"); return -ENODEV; } - blk_queue_physical_block_size(q, le32_to_cpu(v)); - blk_queue_io_min(q, le32_to_cpu(v)); + blk_queue_physical_block_size(q, wg); + blk_queue_io_min(q, wg); - dev_dbg(&vdev->dev, "write granularity = %u\n", le32_to_cpu(v)); + dev_dbg(&vdev->dev, "write granularity = %u\n", wg); /* * virtio ZBD specification doesn't require zones to be a power of * two sectors in size, but the code in this driver expects that. */ - virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, &v); - vblk->zone_sectors = le32_to_cpu(v); + virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors, + &vblk->zone_sectors); if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) { dev_err(&vdev->dev, "zoned device with non power of two zone size %u\n", @@ -783,8 +852,15 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, dev_warn(&vdev->dev, "zero max_append_sectors reported\n"); return -ENODEV; } - blk_queue_max_zone_append_sectors(q, le32_to_cpu(v)); - dev_dbg(&vdev->dev, "max append sectors = %u\n", le32_to_cpu(v)); + if ((v << SECTOR_SHIFT) < wg) { + dev_err(&vdev->dev, + "write granularity %u exceeds max_append_sectors %u limit\n", + wg, v); + return -ENODEV; + } + + blk_queue_max_zone_append_sectors(q, v); + dev_dbg(&vdev->dev, "max append sectors = %u\n", v); } return ret; @@ -794,6 +870,7 @@ static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) { return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); } + #else /* @@ -801,9 +878,11 @@ static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) * We only need to define a few symbols to avoid compilation errors. */ #define virtblk_report_zones NULL + static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) { } + static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { @@ -831,7 +910,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) return PTR_ERR(req); vbr = blk_mq_rq_to_pdu(req); - vbr->in_hdr_len = sizeof(vbr->status); + vbr->in_hdr_len = sizeof(vbr->in_hdr.status); vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); vbr->out_hdr.sector = 0; @@ -840,7 +919,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) goto out; blk_execute_rq(req, false); - err = blk_status_to_errno(virtblk_result(vbr->status)); + err = blk_status_to_errno(virtblk_result(vbr->in_hdr.status)); out: blk_mq_free_request(req); return err; @@ -1504,9 +1583,6 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_cleanup_disk; } - dev_info(&vdev->dev, "blk config size: %zu\n", - sizeof(struct virtio_blk_config)); - err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); if (err) goto out_cleanup_disk; diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 5af2a0300bb9..3744e4da1b2a 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -140,11 +140,11 @@ struct virtio_blk_config { /* Zoned block device characteristics (if VIRTIO_BLK_F_ZONED) */ struct virtio_blk_zoned_characteristics { - __le32 zone_sectors; - __le32 max_open_zones; - __le32 max_active_zones; - __le32 max_append_sectors; - __le32 write_granularity; + __virtio32 zone_sectors; + __virtio32 max_open_zones; + __virtio32 max_active_zones; + __virtio32 max_append_sectors; + __virtio32 write_granularity; __u8 model; __u8 unused2[3]; } zoned; @@ -241,11 +241,11 @@ struct virtio_blk_outhdr { */ struct virtio_blk_zone_descriptor { /* Zone capacity */ - __le64 z_cap; + __virtio64 z_cap; /* The starting sector of the zone */ - __le64 z_start; + __virtio64 z_start; /* Zone write pointer position in sectors */ - __le64 z_wp; + __virtio64 z_wp; /* Zone type */ __u8 z_type; /* Zone state */ @@ -254,7 +254,7 @@ struct virtio_blk_zone_descriptor { }; struct virtio_blk_zone_report { - __le64 nr_zones; + __virtio64 nr_zones; __u8 reserved[56]; struct virtio_blk_zone_descriptor zones[]; }; From 10805eb5d6d15fd4f61b05cc7aa269e12ab99848 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Thu, 30 Mar 2023 17:49:53 -0400 Subject: [PATCH 2/7] virtio-blk: fix ZBD probe in kernels without ZBD support When the kernel is built without support for zoned block devices, virtio-blk probe needs to error out any host-managed device scans to prevent such devices from appearing in the system as non-zoned. The current virtio-blk code simply bypasses all ZBD checks if CONFIG_BLK_DEV_ZONED is not defined and this leads to host-managed block devices being presented as non-zoned in the OS. This is one of the main problems this patch series is aimed to fix. In this patch, make VIRTIO_BLK_F_ZONED feature defined even when CONFIG_BLK_DEV_ZONED is not. This change makes the code compliant with the voted revision of virtio-blk ZBD spec. Modify the probe code to look at the situation when VIRTIO_BLK_F_ZONED is negotiated in a kernel that is built without ZBD support. In this case, the code checks the zoned model of the device and fails the probe is the device is host-managed. The patch also adds the comment to clarify that the call to perform the zoned device probe is correctly placed after virtio_device ready(). Fixes: 95bfec41bd3d ("virtio-blk: add support for zoned block devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev Reviewed-by: Stefan Hajnoczi Reviewed-by: Damien Le Moal Message-Id: <20230330214953.1088216-3-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4f0dbbb3d4a5..2b918e28acaa 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -866,16 +866,12 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev, return ret; } -static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) -{ - return virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED); -} - #else /* * Zoned block device support is not configured in this kernel. - * We only need to define a few symbols to avoid compilation errors. + * Host-managed zoned devices can't be supported, but others are + * good to go as regular block devices. */ #define virtblk_report_zones NULL @@ -886,12 +882,16 @@ static inline void virtblk_revalidate_zones(struct virtio_blk *vblk) static inline int virtblk_probe_zoned_device(struct virtio_device *vdev, struct virtio_blk *vblk, struct request_queue *q) { - return -EOPNOTSUPP; -} + u8 model; -static inline bool virtblk_has_zoned_feature(struct virtio_device *vdev) -{ - return false; + virtio_cread(vdev, struct virtio_blk_config, zoned.model, &model); + if (model == VIRTIO_BLK_Z_HM) { + dev_err(&vdev->dev, + "virtio_blk: zoned devices are not supported"); + return -EOPNOTSUPP; + } + + return 0; } #endif /* CONFIG_BLK_DEV_ZONED */ @@ -1577,7 +1577,11 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev); - if (virtblk_has_zoned_feature(vdev)) { + /* + * All steps that follow use the VQs therefore they need to be + * placed after the virtio_device_ready() call above. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) { err = virtblk_probe_zoned_device(vdev, vblk, q); if (err) goto out_cleanup_disk; @@ -1683,10 +1687,7 @@ static unsigned int features[] = { VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, - VIRTIO_BLK_F_SECURE_ERASE, -#ifdef CONFIG_BLK_DEV_ZONED - VIRTIO_BLK_F_ZONED, -#endif /* CONFIG_BLK_DEV_ZONED */ + VIRTIO_BLK_F_SECURE_ERASE, VIRTIO_BLK_F_ZONED, }; static struct virtio_driver virtio_blk = { From e508efc3ae7e44eb3caf595a086bfd3824da5b9a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:18 -0500 Subject: [PATCH 3/7] vhost-scsi: Fix vhost_scsi struct use after free If vhost_scsi_setup_vq_cmds fails we leave the tpg->vhost_scsi pointer set. If the device is freed and then the user unmaps the LUN, the call to vhost_scsi_port_unlink -> vhost_scsi_hotunplug will see the that tpg->vhost_scsi is still set and try to use it. This has us clear the vhost_scsi pointer in the failure path. It also has us take tv_tpg_mutex in this failure path, because tv_tpg_vhost_count is accessed under this mutex in vhost_scsi_drop_nexus and in the future we will want to serialize access to tpg->vhost_scsi with that mutex instead of the vhost_scsi_mutex. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index b244e7c0f514..5875241e1654 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1658,7 +1658,10 @@ undepend: for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { tpg = vs_tpg[i]; if (tpg) { + mutex_lock(&tpg->tv_tpg_mutex); + tpg->vhost_scsi = NULL; tpg->tv_tpg_vhost_count--; + mutex_unlock(&tpg->tv_tpg_mutex); target_undepend_item(&tpg->se_tpg.tpg_group.cg_item); } } From 4c363c81f66c77e0a2394b9a4efa707d122dc544 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:19 -0500 Subject: [PATCH 4/7] vhost-scsi: Fix crash during LUN unmapping We normally clear the endpoint then unmap LUNs so the devices are fully shutdown when the LUN is unmapped, but it's legal to unmap before clearing. If the user does that while TMFs are running then we can end up crashing. vhost_scsi_port_unlink assumes that the LUN's tmf struct will always be on the tmf_queue list. However, if a TMF is running then it will have been removed while it's executing. If we do a LUN unmap at this time, then we assume the entry is on the list and just start accessing it and free it. This fixes the bug by just allocating the vhost_scsi_tmf struct when it's needed like is done with the se_tmr struct that's needed when we submit the TMF. In this path perf is not an issue and we can use GFP_KERNEL since it won't swing directly back on us, so we don't need to preallocate the struct. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5875241e1654..32d0be968103 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -125,7 +125,6 @@ struct vhost_scsi_tpg { struct se_portal_group se_tpg; /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */ struct vhost_scsi *vhost_scsi; - struct list_head tmf_queue; }; struct vhost_scsi_tport { @@ -206,10 +205,8 @@ struct vhost_scsi { struct vhost_scsi_tmf { struct vhost_work vwork; - struct vhost_scsi_tpg *tpg; struct vhost_scsi *vhost; struct vhost_scsi_virtqueue *svq; - struct list_head queue_entry; struct se_cmd se_cmd; u8 scsi_resp; @@ -352,12 +349,9 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd) static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf) { - struct vhost_scsi_tpg *tpg = tmf->tpg; struct vhost_scsi_inflight *inflight = tmf->inflight; - mutex_lock(&tpg->tv_tpg_mutex); - list_add_tail(&tpg->tmf_queue, &tmf->queue_entry); - mutex_unlock(&tpg->tv_tpg_mutex); + kfree(tmf); vhost_scsi_put_inflight(inflight); } @@ -1194,19 +1188,11 @@ vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg, goto send_reject; } - mutex_lock(&tpg->tv_tpg_mutex); - if (list_empty(&tpg->tmf_queue)) { - pr_err("Missing reserve TMF. Could not handle LUN RESET.\n"); - mutex_unlock(&tpg->tv_tpg_mutex); + tmf = kzalloc(sizeof(*tmf), GFP_KERNEL); + if (!tmf) goto send_reject; - } - tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, - queue_entry); - list_del_init(&tmf->queue_entry); - mutex_unlock(&tpg->tv_tpg_mutex); - - tmf->tpg = tpg; + vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); tmf->vhost = vs; tmf->svq = svq; tmf->resp_iov = vq->iov[vc->out]; @@ -2035,19 +2021,11 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg, { struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tmf *tmf; - - tmf = kzalloc(sizeof(*tmf), GFP_KERNEL); - if (!tmf) - return -ENOMEM; - INIT_LIST_HEAD(&tmf->queue_entry); - vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work); mutex_lock(&vhost_scsi_mutex); mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count++; - list_add_tail(&tmf->queue_entry, &tpg->tmf_queue); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotplug(tpg, lun); @@ -2062,16 +2040,11 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, { struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); - struct vhost_scsi_tmf *tmf; mutex_lock(&vhost_scsi_mutex); mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count--; - tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf, - queue_entry); - list_del(&tmf->queue_entry); - kfree(tmf); mutex_unlock(&tpg->tv_tpg_mutex); vhost_scsi_hotunplug(tpg, lun); @@ -2332,7 +2305,6 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, const char *name) } mutex_init(&tpg->tv_tpg_mutex); INIT_LIST_HEAD(&tpg->tv_tpg_list); - INIT_LIST_HEAD(&tpg->tmf_queue); tpg->tport = tport; tpg->tport_tpgt = tpgt; From 9513c55ce3e8f0f118c47423452c4a4bdaa80222 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 15 Feb 2023 15:33:50 -0700 Subject: [PATCH 5/7] tools/virtio: fix typo in README instructions We need to have a unique chardev for each data path, else the chardevs will collide and qemu will die with this message: qemu-system-x86_64: -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0, id=channel1,name=trace-path-cpu0: Property 'virtserialport.chardev' can't take value 'charchannel0': Device 'charchannel0' is in use Signed-off-by: Ross Zwisler Message-Id: <20230215223350.2658616-7-zwisler@google.com> Signed-off-by: Michael S. Tsirkin --- tools/virtio/virtio-trace/README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README index b64845b823ab..4fb9368bf751 100644 --- a/tools/virtio/virtio-trace/README +++ b/tools/virtio/virtio-trace/README @@ -61,7 +61,7 @@ and id=channel0,name=agent-ctl-path\ ##data path## -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ - -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ + -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,\ id=channel1,name=trace-path-cpu0\ ... From f0417e72add5aed5997092b6eb76298290d866c9 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 3 Apr 2023 14:40:39 +0300 Subject: [PATCH 6/7] vdpa/mlx5: Add and remove debugfs in setup/teardown driver The right place to add the debugfs create is in setup_driver() and remove it in teardown_driver(). Current code adds the debugfs when creating the device but resetting a device will remove the debugfs subtree and subsequent set_driver will not be able to create the files since the debugfs pointer is NULL. Fixes: 294221004322 ("vdpa/mlx5: Add debugfs subtree") Signed-off-by: Eli Cohen v3 -> v4: Fix error flow in setup_driver() Message-Id: <20230403114039.11102-1-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 520646ae7fa0..195963b82b63 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2467,10 +2467,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) err = 0; goto out; } + mlx5_vdpa_add_debugfs(ndev); err = setup_virtqueues(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); - goto out; + goto err_setup; } err = create_rqt(ndev); @@ -2500,6 +2501,8 @@ err_tir: destroy_rqt(ndev); err_rqt: teardown_virtqueues(ndev); +err_setup: + mlx5_vdpa_remove_debugfs(ndev->debugfs); out: return err; } @@ -2513,6 +2516,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) if (!ndev->setup) return; + mlx5_vdpa_remove_debugfs(ndev->debugfs); + ndev->debugfs = NULL; teardown_steering(ndev); destroy_tir(ndev); destroy_rqt(ndev); @@ -3261,7 +3266,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, if (err) goto err_reg; - mlx5_vdpa_add_debugfs(ndev); mgtdev->ndev = ndev; return 0; From 9da667e50c7e62266f3c2f8ad57b32fca40716b1 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 29 Mar 2023 18:03:21 +0200 Subject: [PATCH 7/7] vdpa_sim_net: complete the initialization before register the device Initialization must be completed before calling _vdpa_register_device() since it can connect the device to the vDPA bus, so requests can arrive after that call. So for example vdpasim_net_work(), which uses the net->*_stats variables, can be scheduled before they are initialized. Let's move _vdpa_register_device() to the end of vdpasim_net_dev_add() and add a comment to avoid future issues. Fixes: 0899774cb360 ("vdpa_sim_net: vendor satistics") Signed-off-by: Stefano Garzarella Message-Id: <20230329160321.187176-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 862f405362de..dfe2ce341803 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -466,16 +466,21 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, vdpasim_net_setup_config(simdev, config); - ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); - if (ret) - goto reg_err; - net = sim_to_net(simdev); u64_stats_init(&net->tx_stats.syncp); u64_stats_init(&net->rx_stats.syncp); u64_stats_init(&net->cq_stats.syncp); + /* + * Initialization must be completed before this call, since it can + * connect the device to the vDPA bus, so requests can arrive after + * this call. + */ + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); + if (ret) + goto reg_err; + return 0; reg_err: