Merge tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:

 - Improve the type checking of request flags (Bart)

 - Ensure queue mapping for a single queues always picks the right queue
   (Bart)

 - Sanitize the io priority handling (Jan)

 - rq-qos race fix (Jinke)

 - Reserved tags handling improvements (John)

 - Separate memory alignment from file/disk offset aligment for O_DIRECT
   (Keith)

 - Add new ublk driver, userspace block driver using io_uring for
   communication with the userspace backend (Ming)

 - Use try_cmpxchg() to cleanup the code in various spots (Uros)

 - Finally remove bdevname() (Christoph)

 - Clean up the zoned device handling (Christoph)

 - Clean up independent access range support (Christoph)

 - Clean up and improve block sysfs handling (Christoph)

 - Clean up and improve teardown of block devices.

   This turns the usual two step process into something that is simpler
   to implement and handle in block drivers (Christoph)

 - Clean up chunk size handling (Christoph)

 - Misc cleanups and fixes (Bart, Bo, Dan, GuoYong, Jason, Keith, Liu,
   Ming, Sebastian, Yang, Ying)

* tag 'for-5.20/block-2022-07-29' of git://git.kernel.dk/linux-block: (178 commits)
  ublk_drv: fix double shift bug
  ublk_drv: make sure that correct flags(features) returned to userspace
  ublk_drv: fix error handling of ublk_add_dev
  ublk_drv: fix lockdep warning
  block: remove __blk_get_queue
  block: call blk_mq_exit_queue from disk_release for never added disks
  blk-mq: fix error handling in __blk_mq_alloc_disk
  ublk: defer disk allocation
  ublk: rewrite ublk_ctrl_get_queue_affinity to not rely on hctx->cpumask
  ublk: fold __ublk_create_dev into ublk_ctrl_add_dev
  ublk: cleanup ublk_ctrl_uring_cmd
  ublk: simplify ublk_ch_open and ublk_ch_release
  ublk: remove the empty open and release block device operations
  ublk: remove UBLK_IO_F_PREFLUSH
  ublk: add a MAINTAINERS entry
  block: don't allow the same type rq_qos add more than once
  mmc: fix disk/queue leak in case of adding disk failure
  ublk_drv: fix an IS_ERR() vs NULL check
  ublk: remove UBLK_IO_F_INTEGRITY
  ublk_drv: remove unneeded semicolon
  ...
This commit is contained in:
Linus Torvalds
2022-08-02 13:46:35 -07:00
261 changed files with 3640 additions and 2163 deletions

View File

@@ -260,6 +260,15 @@ Description:
for discards, and don't read this file.
What: /sys/block/<disk>/queue/dma_alignment
Date: May 2022
Contact: linux-block@vger.kernel.org
Description:
Reports the alignment that user space addresses must have to be
used for raw block device access with O_DIRECT and other driver
specific passthrough mechanisms.
What: /sys/block/<disk>/queue/fua
Date: May 2018
Contact: linux-block@vger.kernel.org

View File

@@ -87,8 +87,7 @@ with the command.
1.2.2 Completing a scmd w/ timeout
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The timeout handler is scsi_times_out(). When a timeout occurs, this
function
The timeout handler is scsi_timeout(). When a timeout occurs, this function
1. invokes optional hostt->eh_timed_out() callback. Return value can
be one of

View File

@@ -731,7 +731,7 @@ Details::
* Notes: If 'no_async_abort' is defined this callback
* will be invoked from scsi_eh thread. No other commands
* will then be queued on current host during eh.
* Otherwise it will be called whenever scsi_times_out()
* Otherwise it will be called whenever scsi_timeout()
* is called due to a command timeout.
*
* Optionally defined in: LLD

View File

@@ -20539,6 +20539,13 @@ F: Documentation/filesystems/ubifs-authentication.rst
F: Documentation/filesystems/ubifs.rst
F: fs/ubifs/
UBLK USERSPACE BLOCK DRIVER
M: Ming Lei <ming.lei@redhat.com>
L: linux-block@vger.kernel.org
S: Maintained
F: drivers/block/ublk_drv.c
F: include/uapi/linux/ublk_cmd.h
UCLINUX (M68KNOMMU AND COLDFIRE)
M: Greg Ungerer <gerg@linux-m68k.org>
L: linux-m68k@lists.linux-m68k.org

View File

@@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
return 0;
out_cleanup_disk:
blk_cleanup_disk(dev->disk);
put_disk(dev->disk);
free_dev:
kfree(dev);
out:
@@ -180,7 +180,7 @@ static void __exit nfhd_exit(void)
list_for_each_entry_safe(dev, next, &nfhd_list, list) {
list_del(&dev->list);
del_gendisk(dev->disk);
blk_cleanup_disk(dev->disk);
put_disk(dev->disk);
kfree(dev);
}
unregister_blkdev(major_num, "nfhd");

View File

@@ -925,7 +925,7 @@ static int ubd_add(int n, char **error_out)
return 0;
out_cleanup_disk:
blk_cleanup_disk(disk);
put_disk(disk);
out_cleanup_tags:
blk_mq_free_tag_set(&ubd_dev->tag_set);
out:
@@ -1032,7 +1032,7 @@ static int ubd_remove(int n, char **error_out)
ubd_gendisk[n] = NULL;
if(disk != NULL){
del_gendisk(disk);
blk_cleanup_disk(disk);
put_disk(disk);
}
err = 0;
@@ -1262,7 +1262,7 @@ static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
struct req_iterator iter;
int i = 0;
unsigned long byte_offset = io_req->offset;
int op = req_op(req);
enum req_op op = req_op(req);
if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
io_req->io_desc[0].buffer = NULL;
@@ -1325,7 +1325,7 @@ static int ubd_submit_request(struct ubd *dev, struct request *req)
int segs = 0;
struct io_thread_req *io_req;
int ret;
int op = req_op(req);
enum req_op op = req_op(req);
if (op == REQ_OP_FLUSH)
segs = 0;

View File

@@ -290,7 +290,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
return 0;
out_cleanup_disk:
blk_cleanup_disk(dev->gd);
put_disk(dev->gd);
out:
return err;
}
@@ -344,7 +344,7 @@ static void simdisk_teardown(struct simdisk *dev, int which,
simdisk_detach(dev);
if (dev->gd) {
del_gendisk(dev->gd);
blk_cleanup_disk(dev->gd);
put_disk(dev->gd);
}
remove_proc_entry(tmp, procdir);
}

View File

@@ -147,7 +147,6 @@ config BLK_CGROUP_FC_APPID
config BLK_CGROUP_IOCOST
bool "Enable support for cost model based cgroup IO controller"
depends on BLK_CGROUP
select BLK_RQ_IO_DATA_LEN
select BLK_RQ_ALLOC_TIME
help
Enabling this option enables the .weight interface for cost

View File

@@ -54,12 +54,10 @@ static void bdev_write_inode(struct block_device *bdev)
while (inode->i_state & I_DIRTY) {
spin_unlock(&inode->i_lock);
ret = write_inode_now(inode, true);
if (ret) {
char name[BDEVNAME_SIZE];
pr_warn_ratelimited("VFS: Dirty inode writeback failed "
"for block device %s (err=%d).\n",
bdevname(bdev, name), ret);
}
if (ret)
pr_warn_ratelimited(
"VFS: Dirty inode writeback failed for block device %pg (err=%d).\n",
bdev, ret);
spin_lock(&inode->i_lock);
}
spin_unlock(&inode->i_lock);

View File

@@ -220,46 +220,46 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
}
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op)
blk_opf_t opf)
{
blkg_rwstat_add(&bfqg->stats.queued, op, 1);
blkg_rwstat_add(&bfqg->stats.queued, opf, 1);
bfqg_stats_end_empty_time(&bfqg->stats);
if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
}
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf)
{
blkg_rwstat_add(&bfqg->stats.queued, op, -1);
blkg_rwstat_add(&bfqg->stats.queued, opf, -1);
}
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf)
{
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
blkg_rwstat_add(&bfqg->stats.merged, opf, 1);
}
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, unsigned int op)
u64 io_start_time_ns, blk_opf_t opf)
{
struct bfqg_stats *stats = &bfqg->stats;
u64 now = ktime_get_ns();
if (now > io_start_time_ns)
blkg_rwstat_add(&stats->service_time, op,
blkg_rwstat_add(&stats->service_time, opf,
now - io_start_time_ns);
if (io_start_time_ns > start_time_ns)
blkg_rwstat_add(&stats->wait_time, op,
blkg_rwstat_add(&stats->wait_time, opf,
io_start_time_ns - start_time_ns);
}
#else /* CONFIG_BFQ_CGROUP_DEBUG */
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
blk_opf_t opf) { }
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { }
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { }
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, unsigned int op) { }
u64 io_start_time_ns, blk_opf_t opf) { }
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
@@ -706,10 +706,10 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
/**
* __bfq_bic_change_cgroup - move @bic to @cgroup.
* __bfq_bic_change_cgroup - move @bic to @bfqg.
* @bfqd: the queue descriptor.
* @bic: the bic to move.
* @blkcg: the blk-cgroup to move to.
* @bfqg: the group to move to.
*
* Move bic to blkcg, assuming that bfqd->lock is held; which makes
* sure that the reference to cgroup is valid across the call (see
@@ -863,6 +863,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
* @bfqd: the device data structure with the root group.
* @entity: the entity to move, if entity is a leaf; or the parent entity
* of an active leaf entity to move, if entity is not a leaf.
* @ioprio_class: I/O priority class to reparent.
*/
static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
struct bfq_entity *entity,
@@ -892,6 +893,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
* @bfqd: the device data structure with the root group.
* @bfqg: the group to move from.
* @st: the service tree to start the search from.
* @ioprio_class: I/O priority class to reparent.
*/
static void bfq_reparent_active_queues(struct bfq_data *bfqd,
struct bfq_group *bfqg,
@@ -1471,8 +1473,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
return bfqq->bfqd->root_group;
}
void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)

View File

@@ -668,19 +668,19 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
* significantly affect service guarantees coming from the BFQ scheduling
* algorithm.
*/
static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
{
struct bfq_data *bfqd = data->q->elevator->elevator_data;
struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(op)) : NULL;
struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL;
int depth;
unsigned limit = data->q->nr_requests;
/* Sync reads have full depth available */
if (op_is_sync(op) && !op_is_write(op)) {
if (op_is_sync(opf) && !op_is_write(opf)) {
depth = 0;
} else {
depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)];
limit = (limit * depth) >> bfqd->full_depth_shift;
}
@@ -693,7 +693,7 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
depth = 1;
bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
__func__, bfqd->wr_busy_queues, op_is_sync(op), depth);
__func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
if (depth)
data->shallow_depth = depth;
}
@@ -6104,7 +6104,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
static void bfq_update_insert_stats(struct request_queue *q,
struct bfq_queue *bfqq,
bool idle_timer_disabled,
unsigned int cmd_flags)
blk_opf_t cmd_flags)
{
if (!bfqq)
return;
@@ -6129,7 +6129,7 @@ static void bfq_update_insert_stats(struct request_queue *q,
static inline void bfq_update_insert_stats(struct request_queue *q,
struct bfq_queue *bfqq,
bool idle_timer_disabled,
unsigned int cmd_flags) {}
blk_opf_t cmd_flags) {}
#endif /* CONFIG_BFQ_CGROUP_DEBUG */
static struct bfq_queue *bfq_init_rq(struct request *rq);
@@ -6141,7 +6141,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
struct bfq_data *bfqd = q->elevator->elevator_data;
struct bfq_queue *bfqq;
bool idle_timer_disabled = false;
unsigned int cmd_flags;
blk_opf_t cmd_flags;
LIST_HEAD(free);
#ifdef CONFIG_BFQ_GROUP_IOSCHED

View File

@@ -994,11 +994,11 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq);
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op);
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
blk_opf_t opf);
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf);
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf);
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
u64 io_start_time_ns, unsigned int op);
u64 io_start_time_ns, blk_opf_t opf);
void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
void bfqg_stats_update_idle_time(struct bfq_group *bfqg);

View File

@@ -1360,6 +1360,8 @@ left:
/**
* __bfq_lookup_next_entity - return the first eligible entity in @st.
* @st: the service tree.
* @in_service: whether or not there is an in-service entity for the sched_data
* this active tree belongs to.
*
* If there is no in-service entity for the sched_data st belongs to,
* then return the entity that will be set in service if:
@@ -1472,9 +1474,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
break;
}
if (!entity)
return NULL;
return entity;
}

View File

@@ -239,7 +239,7 @@ static void bio_free(struct bio *bio)
* when IO has completed, or when the bio is released.
*/
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, unsigned int opf)
unsigned short max_vecs, blk_opf_t opf)
{
bio->bi_next = NULL;
bio->bi_bdev = bdev;
@@ -292,7 +292,7 @@ EXPORT_SYMBOL(bio_init);
* preserved are the ones that are initialized by bio_alloc_bioset(). See
* comment in struct bio.
*/
void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf)
{
bio_uninit(bio);
memset(bio, 0, BIO_RESET_BYTES);
@@ -341,7 +341,7 @@ void bio_chain(struct bio *bio, struct bio *parent)
EXPORT_SYMBOL(bio_chain);
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, unsigned int opf, gfp_t gfp)
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp)
{
struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
@@ -409,7 +409,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
}
static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
unsigned short nr_vecs, unsigned int opf, gfp_t gfp,
unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp,
struct bio_set *bs)
{
struct bio_alloc_cache *cache;
@@ -468,7 +468,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev,
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
blk_opf_t opf, gfp_t gfp_mask,
struct bio_set *bs)
{
gfp_t saved_gfp = gfp_mask;
@@ -1033,7 +1033,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND))
return 0;
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev)))
return 0;
return bio_add_hw_page(q, bio, page, len, offset,
@@ -1159,6 +1159,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off)
put_page(pages[i]);
}
static int bio_iov_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
bool same_page = false;
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
if (WARN_ON_ONCE(bio_full(bio, len)))
return -EINVAL;
__bio_add_page(bio, page, len, offset);
return 0;
}
if (same_page)
put_page(page);
return 0;
}
static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
bool same_page = false;
if (bio_add_hw_page(q, bio, page, len, offset,
queue_max_zone_append_sectors(q), &same_page) != len)
return -EINVAL;
if (same_page)
put_page(page);
return 0;
}
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -1177,61 +1208,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
bool same_page = false;
ssize_t size, left;
unsigned len, i;
size_t offset;
/*
* Move page array up in the allocated memory for the bio vecs as far as
* possible so that we can start filling biovecs from the beginning
* without overwriting the temporary page array.
*/
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
for (left = size, i = 0; left > 0; left -= len, i++) {
struct page *page = pages[i];
len = min_t(size_t, PAGE_SIZE - offset, left);
if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
if (same_page)
put_page(page);
} else {
if (WARN_ON_ONCE(bio_full(bio, len))) {
bio_put_pages(pages + i, left, offset);
return -EINVAL;
}
__bio_add_page(bio, page, len, offset);
}
offset = 0;
}
iov_iter_advance(iter, size);
return 0;
}
static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
{
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
unsigned int max_append_sectors = queue_max_zone_append_sectors(q);
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
ssize_t size, left;
unsigned len, i;
size_t offset;
int ret = 0;
if (WARN_ON_ONCE(!max_append_sectors))
return 0;
/*
* Move page array up in the allocated memory for the bio vecs as far as
* possible so that we can start filling biovecs from the beginning
@@ -1240,28 +1220,39 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
/*
* Each segment in the iov is required to be a block size multiple.
* However, we may not be able to get the entire segment if it spans
* more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
*/
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
if (size > 0)
size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
if (unlikely(size <= 0))
return size ? size : -EFAULT;
for (left = size, i = 0; left > 0; left -= len, i++) {
struct page *page = pages[i];
bool same_page = false;
int ret;
len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_add_hw_page(q, bio, page, len, offset,
max_append_sectors, &same_page) != len) {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
ret = bio_iov_add_zone_append_page(bio, page, len,
offset);
else
ret = bio_iov_add_page(bio, page, len, offset);
if (ret) {
bio_put_pages(pages + i, left, offset);
ret = -EINVAL;
break;
return ret;
}
if (same_page)
put_page(page);
offset = 0;
}
iov_iter_advance(iter, size - left);
return ret;
iov_iter_advance(iter, size);
return 0;
}
/**
@@ -1298,10 +1289,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
}
do {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
ret = __bio_iov_append_get_pages(bio, iter);
else
ret = __bio_iov_iter_get_pages(bio, iter);
ret = __bio_iov_iter_get_pages(bio, iter);
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
/* don't account direct I/O as memory stall */

View File

@@ -59,20 +59,20 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
* caller is responsible for synchronizing calls to this function.
*/
static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
unsigned int op, uint64_t val)
blk_opf_t opf, uint64_t val)
{
struct percpu_counter *cnt;
if (op_is_discard(op))
if (op_is_discard(opf))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
else if (op_is_write(op))
else if (op_is_write(opf))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
if (op_is_sync(op))
if (op_is_sync(opf))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];

View File

@@ -846,6 +846,21 @@ static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
}
}
static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
struct blkg_iostat *last)
{
struct blkg_iostat delta;
unsigned long flags;
/* propagate percpu delta to global */
flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&delta, cur);
blkg_iostat_sub(&delta, last);
blkg_iostat_add(&blkg->iostat.cur, &delta);
blkg_iostat_add(last, &delta);
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
{
struct blkcg *blkcg = css_to_blkcg(css);
@@ -860,8 +875,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
struct blkcg_gq *parent = blkg->parent;
struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
struct blkg_iostat cur, delta;
unsigned long flags;
struct blkg_iostat cur;
unsigned int seq;
/* fetch the current per-cpu values */
@@ -870,23 +884,12 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
blkg_iostat_set(&cur, &bisc->cur);
} while (u64_stats_fetch_retry(&bisc->sync, seq));
/* propagate percpu delta to global */
flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&delta, &cur);
blkg_iostat_sub(&delta, &bisc->last);
blkg_iostat_add(&blkg->iostat.cur, &delta);
blkg_iostat_add(&bisc->last, &delta);
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
blkcg_iostat_update(blkg, &cur, &bisc->last);
/* propagate global delta to parent (unless that's root) */
if (parent && parent->parent) {
flags = u64_stats_update_begin_irqsave(&parent->iostat.sync);
blkg_iostat_set(&delta, &blkg->iostat.cur);
blkg_iostat_sub(&delta, &blkg->iostat.last);
blkg_iostat_add(&parent->iostat.cur, &delta);
blkg_iostat_add(&blkg->iostat.last, &delta);
u64_stats_update_end_irqrestore(&parent->iostat.sync, flags);
}
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
}
rcu_read_unlock();
@@ -1299,6 +1302,7 @@ int blkcg_init_queue(struct request_queue *q)
ret = blk_iolatency_init(q);
if (ret) {
blk_throtl_exit(q);
blk_ioprio_exit(q);
goto err_destroy_all;
}
@@ -1529,6 +1533,18 @@ void blkcg_deactivate_policy(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
static void blkcg_free_all_cpd(struct blkcg_policy *pol)
{
struct blkcg *blkcg;
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
if (blkcg->cpd[pol->plid]) {
pol->cpd_free_fn(blkcg->cpd[pol->plid]);
blkcg->cpd[pol->plid] = NULL;
}
}
}
/**
* blkcg_policy_register - register a blkcg policy
* @pol: blkcg policy to register
@@ -1593,14 +1609,9 @@ int blkcg_policy_register(struct blkcg_policy *pol)
return 0;
err_free_cpds:
if (pol->cpd_free_fn) {
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
if (blkcg->cpd[pol->plid]) {
pol->cpd_free_fn(blkcg->cpd[pol->plid]);
blkcg->cpd[pol->plid] = NULL;
}
}
}
if (pol->cpd_free_fn)
blkcg_free_all_cpd(pol);
blkcg_policy[pol->plid] = NULL;
err_unlock:
mutex_unlock(&blkcg_pol_mutex);
@@ -1617,8 +1628,6 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
*/
void blkcg_policy_unregister(struct blkcg_policy *pol)
{
struct blkcg *blkcg;
mutex_lock(&blkcg_pol_register_mutex);
if (WARN_ON(blkcg_policy[pol->plid] != pol))
@@ -1633,14 +1642,9 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
/* remove cpds and unregister */
mutex_lock(&blkcg_pol_mutex);
if (pol->cpd_free_fn) {
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
if (blkcg->cpd[pol->plid]) {
pol->cpd_free_fn(blkcg->cpd[pol->plid]);
blkcg->cpd[pol->plid] = NULL;
}
}
}
if (pol->cpd_free_fn)
blkcg_free_all_cpd(pol);
blkcg_policy[pol->plid] = NULL;
mutex_unlock(&blkcg_pol_mutex);
@@ -1696,7 +1700,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
* everybody is happy with their IO latencies.
*/
if (time_before64(old + NSEC_PER_SEC, now) &&
atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
atomic64_try_cmpxchg(&blkg->delay_start, &old, now)) {
u64 cur = atomic64_read(&blkg->delay_nsec);
u64 sub = min_t(u64, blkg->last_delay, now - old);
int cur_use = atomic_read(&blkg->use_delay);

View File

@@ -430,12 +430,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
* then check to see if we were the last delay so we can drop the
* congestion count on the cgroup.
*/
while (old) {
int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
if (cur == old)
break;
old = cur;
}
while (old && !atomic_try_cmpxchg(&blkg->use_delay, &old, old - 1))
;
if (old == 0)
return 0;
@@ -458,7 +454,7 @@ static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
int old = atomic_read(&blkg->use_delay);
/* We only want 1 person setting the congestion count for this blkg. */
if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
if (!old && atomic_try_cmpxchg(&blkg->use_delay, &old, -1))
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
atomic64_set(&blkg->delay_nsec, delay);
@@ -475,7 +471,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
int old = atomic_read(&blkg->use_delay);
/* We only want 1 person clearing the congestion count for this blkg. */
if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
if (old && atomic_try_cmpxchg(&blkg->use_delay, &old, 0))
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
}

View File

@@ -136,7 +136,7 @@ static const char *const blk_op_name[] = {
* string format. Useful in the debugging and tracing bio or request. For
* invalid REQ_OP_XXX it returns string "UNKNOWN".
*/
inline const char *blk_op_str(unsigned int op)
inline const char *blk_op_str(enum req_op op)
{
const char *op_str = "UNKNOWN";
@@ -284,49 +284,6 @@ void blk_queue_start_drain(struct request_queue *q)
wake_up_all(&q->mq_freeze_wq);
}
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
*
* Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
* put it. All future requests will be failed immediately with -ENODEV.
*
* Context: can sleep
*/
void blk_cleanup_queue(struct request_queue *q)
{
/* cannot be called from atomic context */
might_sleep();
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
blk_queue_start_drain(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
* prevent that blk_mq_run_hw_queues() accesses the hardware queues
* after draining finished.
*/
blk_freeze_queue(q);
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
if (queue_is_mq(q)) {
blk_mq_cancel_work_sync(q);
blk_mq_exit_queue(q);
}
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
EXPORT_SYMBOL(blk_cleanup_queue);
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -435,7 +392,7 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
q->last_merge = NULL;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
if (q->id < 0)
goto fail_srcu;
@@ -485,7 +442,7 @@ fail_stats:
fail_split:
bioset_exit(&q->bio_split);
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
ida_free(&blk_queue_ida, q->id);
fail_srcu:
if (alloc_srcu)
cleanup_srcu_struct(q->srcu);
@@ -504,12 +461,10 @@ fail_q:
*/
bool blk_get_queue(struct request_queue *q)
{
if (likely(!blk_queue_dying(q))) {
__blk_get_queue(q);
return true;
}
return false;
if (unlikely(blk_queue_dying(q)))
return false;
kobject_get(&q->kobj);
return true;
}
EXPORT_SYMBOL(blk_get_queue);
@@ -608,16 +563,15 @@ static int blk_partition_remap(struct bio *bio)
static inline blk_status_t blk_check_zone_append(struct request_queue *q,
struct bio *bio)
{
sector_t pos = bio->bi_iter.bi_sector;
int nr_sectors = bio_sectors(bio);
/* Only applicable to zoned block devices */
if (!blk_queue_is_zoned(q))
if (!bdev_is_zoned(bio->bi_bdev))
return BLK_STS_NOTSUPP;
/* The bio sector must point to the start of a sequential zone */
if (pos & (blk_queue_zone_sectors(q) - 1) ||
!blk_queue_zone_is_seq(q, pos))
if (bio->bi_iter.bi_sector & (bdev_zone_sectors(bio->bi_bdev) - 1) ||
!bio_zone_is_seq(bio))
return BLK_STS_IOERR;
/*
@@ -762,7 +716,7 @@ void submit_bio_noacct(struct bio *bio)
might_sleep();
plug = blk_mq_plug(q, bio);
plug = blk_mq_plug(bio);
if (plug && plug->nowait)
bio->bi_opf |= REQ_NOWAIT;
@@ -818,11 +772,11 @@ void submit_bio_noacct(struct bio *bio)
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
case REQ_OP_ZONE_RESET_ALL:
if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
goto not_supported;
break;
case REQ_OP_WRITE_ZEROES:
@@ -987,7 +941,7 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
again:
stamp = READ_ONCE(part->bd_stamp);
if (unlikely(time_after(now, stamp))) {
if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp))
if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
__part_stat_add(part, io_ticks, end ? now - stamp : 1);
}
if (part->bd_partno) {
@@ -997,7 +951,7 @@ again:
}
unsigned long bdev_start_io_acct(struct block_device *bdev,
unsigned int sectors, unsigned int op,
unsigned int sectors, enum req_op op,
unsigned long start_time)
{
const int sgrp = op_stat_group(op);
@@ -1038,7 +992,7 @@ unsigned long bio_start_io_acct(struct bio *bio)
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time)
{
const int sgrp = op_stat_group(op);
@@ -1247,7 +1201,7 @@ EXPORT_SYMBOL_GPL(blk_io_schedule);
int __init blk_dev_init(void)
{
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
BUILD_BUG_ON((__force u32)REQ_OP_LAST >= (1 << REQ_OP_BITS));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
sizeof_field(struct request, cmd_flags));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *

View File

@@ -94,7 +94,7 @@ enum {
};
static void blk_kick_flush(struct request_queue *q,
struct blk_flush_queue *fq, unsigned int flags);
struct blk_flush_queue *fq, blk_opf_t flags);
static inline struct blk_flush_queue *
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
@@ -173,7 +173,7 @@ static void blk_flush_complete_seq(struct request *rq,
{
struct request_queue *q = rq->q;
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
unsigned int cmd_flags;
blk_opf_t cmd_flags;
BUG_ON(rq->flush.seq & seq);
rq->flush.seq |= seq;
@@ -290,7 +290,7 @@ bool is_flush_rq(struct request *rq)
*
*/
static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
unsigned int flags)
blk_opf_t flags)
{
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
struct request *first_rq =

View File

@@ -102,31 +102,18 @@ static struct kobj_type blk_ia_ranges_ktype = {
* disk_register_independent_access_ranges - register with sysfs a set of
* independent access ranges
* @disk: Target disk
* @new_iars: New set of independent access ranges
*
* Register with sysfs a set of independent access ranges for @disk.
* If @new_iars is not NULL, this set of ranges is registered and the old set
* specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is
* registered if it is not already.
*/
int disk_register_independent_access_ranges(struct gendisk *disk,
struct blk_independent_access_ranges *new_iars)
int disk_register_independent_access_ranges(struct gendisk *disk)
{
struct blk_independent_access_ranges *iars = disk->ia_ranges;
struct request_queue *q = disk->queue;
struct blk_independent_access_ranges *iars;
int i, ret;
lockdep_assert_held(&q->sysfs_dir_lock);
lockdep_assert_held(&q->sysfs_lock);
/* If a new range set is specified, unregister the old one */
if (new_iars) {
if (q->ia_ranges)
disk_unregister_independent_access_ranges(disk);
q->ia_ranges = new_iars;
}
iars = q->ia_ranges;
if (!iars)
return 0;
@@ -138,7 +125,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype,
&q->kobj, "%s", "independent_access_ranges");
if (ret) {
q->ia_ranges = NULL;
disk->ia_ranges = NULL;
kobject_put(&iars->kobj);
return ret;
}
@@ -164,7 +151,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
void disk_unregister_independent_access_ranges(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blk_independent_access_ranges *iars = q->ia_ranges;
struct blk_independent_access_ranges *iars = disk->ia_ranges;
int i;
lockdep_assert_held(&q->sysfs_dir_lock);
@@ -182,7 +169,7 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk)
kfree(iars);
}
q->ia_ranges = NULL;
disk->ia_ranges = NULL;
}
static struct blk_independent_access_range *
@@ -210,6 +197,9 @@ static bool disk_check_ia_ranges(struct gendisk *disk,
sector_t sector = 0;
int i;
if (WARN_ON_ONCE(!iars->nr_ia_ranges))
return false;
/*
* While sorting the ranges in increasing LBA order, check that the
* ranges do not overlap, that there are no sector holes and that all
@@ -242,7 +232,7 @@ static bool disk_check_ia_ranges(struct gendisk *disk,
static bool disk_ia_ranges_changed(struct gendisk *disk,
struct blk_independent_access_ranges *new)
{
struct blk_independent_access_ranges *old = disk->queue->ia_ranges;
struct blk_independent_access_ranges *old = disk->ia_ranges;
int i;
if (!old)
@@ -298,25 +288,15 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
{
struct request_queue *q = disk->queue;
if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) {
mutex_lock(&q->sysfs_dir_lock);
mutex_lock(&q->sysfs_lock);
if (iars && !disk_check_ia_ranges(disk, iars)) {
kfree(iars);
iars = NULL;
}
mutex_lock(&q->sysfs_dir_lock);
mutex_lock(&q->sysfs_lock);
if (iars) {
if (!disk_check_ia_ranges(disk, iars)) {
kfree(iars);
iars = NULL;
goto reg;
}
if (!disk_ia_ranges_changed(disk, iars)) {
kfree(iars);
goto unlock;
}
if (iars && !disk_ia_ranges_changed(disk, iars)) {
kfree(iars);
goto unlock;
}
/*
@@ -324,17 +304,12 @@ void disk_set_independent_access_ranges(struct gendisk *disk,
* revalidation. If that is the case, we need to unregister the old
* set of independent access ranges and register the new set. If the
* queue is not registered, registration of the device request queue
* will register the independent access ranges, so only swap in the
* new set and free the old one.
* will register the independent access ranges.
*/
reg:
if (blk_queue_registered(q)) {
disk_register_independent_access_ranges(disk, iars);
} else {
swap(q->ia_ranges, iars);
kfree(iars);
}
disk_unregister_independent_access_ranges(disk);
disk->ia_ranges = iars;
if (blk_queue_registered(q))
disk_register_independent_access_ranges(disk);
unlock:
mutex_unlock(&q->sysfs_lock);
mutex_unlock(&q->sysfs_dir_lock);

Some files were not shown because too many files have changed in this diff Show More