mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- MD pull requests via Song:
- Cleanup redundant checks (Yu Kuai)
- Remove deprecated headers (Marc Zyngier, Song Liu)
- Concurrency fixes (Li Lingfeng)
- Memory leak fix (Li Nan)
- Refactor raid1 read_balance (Yu Kuai, Paul Luse)
- Clean up and fix for md_ioctl (Li Nan)
- Other small fixes (Gui-Dong Han, Heming Zhao)
- MD atomic limits (Christoph)
- NVMe pull request via Keith:
- RDMA target enhancements (Max)
- Fabrics fixes (Max, Guixin, Hannes)
- Atomic queue_limits usage (Christoph)
- Const use for class_register (Ricardo)
- Identification error handling fixes (Shin'ichiro, Keith)
- Improvement and cleanup for cached request handling (Christoph)
- Moving towards atomic queue limits. Core changes and driver bits so
far (Christoph)
- Fix UAF issues in aoeblk (Chun-Yi)
- Zoned fix and cleanups (Damien)
- s390 dasd cleanups and fixes (Jan, Miroslav)
- Block issue timestamp caching (me)
- noio scope guarding for zoned IO (Johannes)
- block/nvme PI improvements (Kanchan)
- Ability to terminate long running discard loop (Keith)
- bdev revalidation fix (Li)
- Get rid of old nr_queues hack for kdump kernels (Ming)
- Support for async deletion of ublk (Ming)
- Improve IRQ bio recycling (Pavel)
- Factor in CPU capacity for remote vs local completion (Qais)
- Add shared_tags configfs entry for null_blk (Shin'ichiro
- Fix for a regression in page refcounts introduced by the folio
unification (Tony)
- Misc fixes and cleanups (Arnd, Colin, John, Kunwu, Li, Navid,
Ricardo, Roman, Tang, Uwe)
* tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux: (221 commits)
block: partitions: only define function mac_fix_string for CONFIG_PPC_PMAC
block/swim: Convert to platform remove callback returning void
cdrom: gdrom: Convert to platform remove callback returning void
block: remove disk_stack_limits
md: remove mddev->queue
md: don't initialize queue limits
md/raid10: use the atomic queue limit update APIs
md/raid5: use the atomic queue limit update APIs
md/raid1: use the atomic queue limit update APIs
md/raid0: use the atomic queue limit update APIs
md: add queue limit helpers
md: add a mddev_is_dm helper
md: add a mddev_add_trace_msg helper
md: add a mddev_trace_remap helper
bcache: move calculation of stripe_size and io_opt into bcache_device_init
virtio_blk: Do not use disk_set_max_open/active_zones()
aoe: fix the potential use-after-free problem in aoecmd_cfg_pkts
block: move capacity validation to blkpg_do_ioctl()
block: prevent division by zero in blk_rq_stat_sum()
drbd: atomically update queue limits in drbd_reconsider_queue_parameters
...
This commit is contained in:
@@ -96,6 +96,9 @@ static const struct block_device_operations nfhd_ops = {
|
||||
|
||||
static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = bsize,
|
||||
};
|
||||
struct nfhd_device *dev;
|
||||
int dev_id = id - NFHD_DEV_OFFSET;
|
||||
int err = -ENOMEM;
|
||||
@@ -117,9 +120,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
dev->bsize = bsize;
|
||||
dev->bshift = ffs(bsize) - 10;
|
||||
|
||||
dev->disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!dev->disk)
|
||||
dev->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(dev->disk)) {
|
||||
err = PTR_ERR(dev->disk);
|
||||
goto free_dev;
|
||||
}
|
||||
|
||||
dev->disk->major = major_num;
|
||||
dev->disk->first_minor = dev_id * 16;
|
||||
@@ -128,7 +133,6 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
dev->disk->private_data = dev;
|
||||
sprintf(dev->disk->disk_name, "nfhd%u", dev_id);
|
||||
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
|
||||
blk_queue_logical_block_size(dev->disk->queue, bsize);
|
||||
err = add_disk(dev->disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
||||
@@ -108,8 +108,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
|
||||
static DEFINE_MUTEX(ubd_lock);
|
||||
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
|
||||
|
||||
static int ubd_open(struct gendisk *disk, blk_mode_t mode);
|
||||
static void ubd_release(struct gendisk *disk);
|
||||
static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
||||
@@ -118,16 +116,11 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
||||
|
||||
static const struct block_device_operations ubd_blops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ubd_open,
|
||||
.release = ubd_release,
|
||||
.ioctl = ubd_ioctl,
|
||||
.compat_ioctl = blkdev_compat_ptr_ioctl,
|
||||
.getgeo = ubd_getgeo,
|
||||
};
|
||||
|
||||
/* Protected by ubd_lock */
|
||||
static struct gendisk *ubd_gendisk[MAX_DEV];
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_UBD_SYNC
|
||||
#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
|
||||
.cl = 1 })
|
||||
@@ -155,7 +148,6 @@ struct ubd {
|
||||
* backing or the cow file. */
|
||||
char *file;
|
||||
char *serial;
|
||||
int count;
|
||||
int fd;
|
||||
__u64 size;
|
||||
struct openflags boot_openflags;
|
||||
@@ -165,7 +157,7 @@ struct ubd {
|
||||
unsigned no_trim:1;
|
||||
struct cow cow;
|
||||
struct platform_device pdev;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *disk;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
spinlock_t lock;
|
||||
};
|
||||
@@ -181,7 +173,6 @@ struct ubd {
|
||||
#define DEFAULT_UBD { \
|
||||
.file = NULL, \
|
||||
.serial = NULL, \
|
||||
.count = 0, \
|
||||
.fd = -1, \
|
||||
.size = -1, \
|
||||
.boot_openflags = OPEN_FLAGS, \
|
||||
@@ -774,8 +765,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
ubd_dev->fd = fd;
|
||||
|
||||
if(ubd_dev->cow.file != NULL){
|
||||
blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
|
||||
|
||||
err = -ENOMEM;
|
||||
ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
|
||||
if(ubd_dev->cow.bitmap == NULL){
|
||||
@@ -797,11 +786,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
if(err < 0) goto error;
|
||||
ubd_dev->cow.fd = err;
|
||||
}
|
||||
if (ubd_dev->no_trim == 0) {
|
||||
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
|
||||
return 0;
|
||||
error:
|
||||
os_close_file(ubd_dev->fd);
|
||||
@@ -851,27 +835,6 @@ static const struct attribute_group *ubd_attr_groups[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int ubd_disk_register(int major, u64 size, int unit,
|
||||
struct gendisk *disk)
|
||||
{
|
||||
disk->major = major;
|
||||
disk->first_minor = unit << UBD_SHIFT;
|
||||
disk->minors = 1 << UBD_SHIFT;
|
||||
disk->fops = &ubd_blops;
|
||||
set_capacity(disk, size / 512);
|
||||
sprintf(disk->disk_name, "ubd%c", 'a' + unit);
|
||||
|
||||
ubd_devs[unit].pdev.id = unit;
|
||||
ubd_devs[unit].pdev.name = DRIVER_NAME;
|
||||
ubd_devs[unit].pdev.dev.release = ubd_device_release;
|
||||
dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
|
||||
platform_device_register(&ubd_devs[unit].pdev);
|
||||
|
||||
disk->private_data = &ubd_devs[unit];
|
||||
disk->queue = ubd_devs[unit].queue;
|
||||
return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
|
||||
}
|
||||
|
||||
#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
|
||||
|
||||
static const struct blk_mq_ops ubd_mq_ops = {
|
||||
@@ -881,18 +844,36 @@ static const struct blk_mq_ops ubd_mq_ops = {
|
||||
static int ubd_add(int n, char **error_out)
|
||||
{
|
||||
struct ubd *ubd_dev = &ubd_devs[n];
|
||||
struct queue_limits lim = {
|
||||
.max_segments = MAX_SG,
|
||||
.seg_boundary_mask = PAGE_SIZE - 1,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
int err = 0;
|
||||
|
||||
if(ubd_dev->file == NULL)
|
||||
goto out;
|
||||
|
||||
if (ubd_dev->cow.file)
|
||||
lim.max_hw_sectors = 8 * sizeof(long);
|
||||
if (!ubd_dev->no_trim) {
|
||||
lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
|
||||
lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
|
||||
}
|
||||
|
||||
err = ubd_file_size(ubd_dev, &ubd_dev->size);
|
||||
if(err < 0){
|
||||
*error_out = "Couldn't determine size of device's file";
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = ubd_open_dev(ubd_dev);
|
||||
if (err) {
|
||||
pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
|
||||
'a' + n, ubd_dev->file, -err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
|
||||
|
||||
ubd_dev->tag_set.ops = &ubd_mq_ops;
|
||||
@@ -904,29 +885,43 @@ static int ubd_add(int n, char **error_out)
|
||||
|
||||
err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
|
||||
if (err)
|
||||
goto out;
|
||||
goto out_close;
|
||||
|
||||
disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
|
||||
disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
ubd_dev->queue = disk->queue;
|
||||
|
||||
blk_queue_write_cache(ubd_dev->queue, true, false);
|
||||
blk_queue_max_segments(ubd_dev->queue, MAX_SG);
|
||||
blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
|
||||
err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_write_cache(disk->queue, true, false);
|
||||
disk->major = UBD_MAJOR;
|
||||
disk->first_minor = n << UBD_SHIFT;
|
||||
disk->minors = 1 << UBD_SHIFT;
|
||||
disk->fops = &ubd_blops;
|
||||
set_capacity(disk, ubd_dev->size / 512);
|
||||
sprintf(disk->disk_name, "ubd%c", 'a' + n);
|
||||
disk->private_data = ubd_dev;
|
||||
set_disk_ro(disk, !ubd_dev->openflags.w);
|
||||
|
||||
ubd_dev->pdev.id = n;
|
||||
ubd_dev->pdev.name = DRIVER_NAME;
|
||||
ubd_dev->pdev.dev.release = ubd_device_release;
|
||||
dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
|
||||
platform_device_register(&ubd_dev->pdev);
|
||||
|
||||
err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
||||
ubd_gendisk[n] = disk;
|
||||
return 0;
|
||||
|
||||
out_cleanup_disk:
|
||||
put_disk(disk);
|
||||
out_cleanup_tags:
|
||||
blk_mq_free_tag_set(&ubd_dev->tag_set);
|
||||
out_close:
|
||||
ubd_close_dev(ubd_dev);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@@ -1012,7 +1007,6 @@ static int ubd_id(char **str, int *start_out, int *end_out)
|
||||
|
||||
static int ubd_remove(int n, char **error_out)
|
||||
{
|
||||
struct gendisk *disk = ubd_gendisk[n];
|
||||
struct ubd *ubd_dev;
|
||||
int err = -ENODEV;
|
||||
|
||||
@@ -1023,15 +1017,15 @@ static int ubd_remove(int n, char **error_out)
|
||||
if(ubd_dev->file == NULL)
|
||||
goto out;
|
||||
|
||||
/* you cannot remove a open disk */
|
||||
err = -EBUSY;
|
||||
if(ubd_dev->count > 0)
|
||||
goto out;
|
||||
if (ubd_dev->disk) {
|
||||
/* you cannot remove a open disk */
|
||||
err = -EBUSY;
|
||||
if (disk_openers(ubd_dev->disk))
|
||||
goto out;
|
||||
|
||||
ubd_gendisk[n] = NULL;
|
||||
if(disk != NULL){
|
||||
del_gendisk(disk);
|
||||
put_disk(disk);
|
||||
del_gendisk(ubd_dev->disk);
|
||||
ubd_close_dev(ubd_dev);
|
||||
put_disk(ubd_dev->disk);
|
||||
}
|
||||
|
||||
err = 0;
|
||||
@@ -1153,37 +1147,6 @@ static int __init ubd_driver_init(void){
|
||||
|
||||
device_initcall(ubd_driver_init);
|
||||
|
||||
static int ubd_open(struct gendisk *disk, blk_mode_t mode)
|
||||
{
|
||||
struct ubd *ubd_dev = disk->private_data;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&ubd_mutex);
|
||||
if(ubd_dev->count == 0){
|
||||
err = ubd_open_dev(ubd_dev);
|
||||
if(err){
|
||||
printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
|
||||
disk->disk_name, ubd_dev->file, -err);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ubd_dev->count++;
|
||||
set_disk_ro(disk, !ubd_dev->openflags.w);
|
||||
out:
|
||||
mutex_unlock(&ubd_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ubd_release(struct gendisk *disk)
|
||||
{
|
||||
struct ubd *ubd_dev = disk->private_data;
|
||||
|
||||
mutex_lock(&ubd_mutex);
|
||||
if(--ubd_dev->count == 0)
|
||||
ubd_close_dev(ubd_dev);
|
||||
mutex_unlock(&ubd_mutex);
|
||||
}
|
||||
|
||||
static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
|
||||
__u64 *cow_offset, unsigned long *bitmap,
|
||||
__u64 bitmap_offset, unsigned long *bitmap_words,
|
||||
|
||||
@@ -264,16 +264,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
struct proc_dir_entry *procdir)
|
||||
{
|
||||
char tmp[2] = { '0' + which, 0 };
|
||||
int err = -ENOMEM;
|
||||
int err;
|
||||
|
||||
dev->fd = -1;
|
||||
dev->filename = NULL;
|
||||
spin_lock_init(&dev->lock);
|
||||
dev->users = 0;
|
||||
|
||||
dev->gd = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (!dev->gd)
|
||||
dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||
if (IS_ERR(dev->gd)) {
|
||||
err = PTR_ERR(dev->gd);
|
||||
goto out;
|
||||
}
|
||||
dev->gd->major = simdisk_major;
|
||||
dev->gd->first_minor = which;
|
||||
dev->gd->minors = SIMDISK_MINORS;
|
||||
|
||||
@@ -383,7 +383,7 @@ void __init bdev_cache_init(void)
|
||||
|
||||
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
|
||||
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
|
||||
SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
|
||||
SLAB_ACCOUNT|SLAB_PANIC),
|
||||
init_once);
|
||||
err = register_filesystem(&bd_type);
|
||||
if (err)
|
||||
|
||||
@@ -127,7 +127,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
|
||||
if (!bfqg_stats_waiting(stats))
|
||||
return;
|
||||
|
||||
now = ktime_get_ns();
|
||||
now = blk_time_get_ns();
|
||||
if (now > stats->start_group_wait_time)
|
||||
bfq_stat_add(&stats->group_wait_time,
|
||||
now - stats->start_group_wait_time);
|
||||
@@ -144,7 +144,7 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
|
||||
return;
|
||||
if (bfqg == curr_bfqg)
|
||||
return;
|
||||
stats->start_group_wait_time = ktime_get_ns();
|
||||
stats->start_group_wait_time = blk_time_get_ns();
|
||||
bfqg_stats_mark_waiting(stats);
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
|
||||
if (!bfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
now = ktime_get_ns();
|
||||
now = blk_time_get_ns();
|
||||
if (now > stats->start_empty_time)
|
||||
bfq_stat_add(&stats->empty_time,
|
||||
now - stats->start_empty_time);
|
||||
@@ -183,7 +183,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
|
||||
if (bfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
stats->start_empty_time = ktime_get_ns();
|
||||
stats->start_empty_time = blk_time_get_ns();
|
||||
bfqg_stats_mark_empty(stats);
|
||||
}
|
||||
|
||||
@@ -192,7 +192,7 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
|
||||
if (bfqg_stats_idling(stats)) {
|
||||
u64 now = ktime_get_ns();
|
||||
u64 now = blk_time_get_ns();
|
||||
|
||||
if (now > stats->start_idle_time)
|
||||
bfq_stat_add(&stats->idle_time,
|
||||
@@ -205,7 +205,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
|
||||
{
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
|
||||
stats->start_idle_time = ktime_get_ns();
|
||||
stats->start_idle_time = blk_time_get_ns();
|
||||
bfqg_stats_mark_idling(stats);
|
||||
}
|
||||
|
||||
@@ -242,7 +242,7 @@ void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
|
||||
u64 io_start_time_ns, blk_opf_t opf)
|
||||
{
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
u64 now = ktime_get_ns();
|
||||
u64 now = blk_time_get_ns();
|
||||
|
||||
if (now > io_start_time_ns)
|
||||
blkg_rwstat_add(&stats->service_time, opf,
|
||||
|
||||
@@ -1005,7 +1005,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq,
|
||||
|
||||
rq = rq_entry_fifo(bfqq->fifo.next);
|
||||
|
||||
if (rq == last || ktime_get_ns() < rq->fifo_time)
|
||||
if (rq == last || blk_time_get_ns() < rq->fifo_time)
|
||||
return NULL;
|
||||
|
||||
bfq_log_bfqq(bfqq->bfqd, bfqq, "check_fifo: returned %p", rq);
|
||||
@@ -1829,7 +1829,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
||||
* bfq_bfqq_update_budg_for_activation for
|
||||
* details on the usage of the next variable.
|
||||
*/
|
||||
arrived_in_time = ktime_get_ns() <=
|
||||
arrived_in_time = blk_time_get_ns() <=
|
||||
bfqq->ttime.last_end_request +
|
||||
bfqd->bfq_slice_idle * 3;
|
||||
unsigned int act_idx = bfq_actuator_index(bfqd, rq->bio);
|
||||
@@ -2208,7 +2208,7 @@ static void bfq_add_request(struct request *rq)
|
||||
struct request *next_rq, *prev;
|
||||
unsigned int old_wr_coeff = bfqq->wr_coeff;
|
||||
bool interactive = false;
|
||||
u64 now_ns = ktime_get_ns();
|
||||
u64 now_ns = blk_time_get_ns();
|
||||
|
||||
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
|
||||
bfqq->queued[rq_is_sync(rq)]++;
|
||||
@@ -2262,7 +2262,7 @@ static void bfq_add_request(struct request *rq)
|
||||
bfqd->rqs_injected && bfqd->tot_rq_in_driver > 0)) &&
|
||||
time_is_before_eq_jiffies(bfqq->decrease_time_jif +
|
||||
msecs_to_jiffies(10))) {
|
||||
bfqd->last_empty_occupied_ns = ktime_get_ns();
|
||||
bfqd->last_empty_occupied_ns = blk_time_get_ns();
|
||||
/*
|
||||
* Start the state machine for measuring the
|
||||
* total service time of rq: setting
|
||||
@@ -3294,7 +3294,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd,
|
||||
else
|
||||
timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight;
|
||||
|
||||
bfqd->last_budget_start = ktime_get();
|
||||
bfqd->last_budget_start = blk_time_get();
|
||||
|
||||
bfqq->budget_timeout = jiffies +
|
||||
bfqd->bfq_timeout * timeout_coeff;
|
||||
@@ -3394,7 +3394,7 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
|
||||
else if (bfqq->wr_coeff > 1)
|
||||
sl = max_t(u32, sl, 20ULL * NSEC_PER_MSEC);
|
||||
|
||||
bfqd->last_idling_start = ktime_get();
|
||||
bfqd->last_idling_start = blk_time_get();
|
||||
bfqd->last_idling_start_jiffies = jiffies;
|
||||
|
||||
hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
|
||||
@@ -3433,7 +3433,7 @@ static void bfq_reset_rate_computation(struct bfq_data *bfqd,
|
||||
struct request *rq)
|
||||
{
|
||||
if (rq != NULL) { /* new rq dispatch now, reset accordingly */
|
||||
bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns();
|
||||
bfqd->last_dispatch = bfqd->first_dispatch = blk_time_get_ns();
|
||||
bfqd->peak_rate_samples = 1;
|
||||
bfqd->sequential_samples = 0;
|
||||
bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size =
|
||||
@@ -3590,7 +3590,7 @@ reset_computation:
|
||||
*/
|
||||
static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
|
||||
{
|
||||
u64 now_ns = ktime_get_ns();
|
||||
u64 now_ns = blk_time_get_ns();
|
||||
|
||||
if (bfqd->peak_rate_samples == 0) { /* first dispatch */
|
||||
bfq_log(bfqd, "update_peak_rate: goto reset, samples %d",
|
||||
@@ -4162,7 +4162,7 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
if (compensate)
|
||||
delta_ktime = bfqd->last_idling_start;
|
||||
else
|
||||
delta_ktime = ktime_get();
|
||||
delta_ktime = blk_time_get();
|
||||
delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start);
|
||||
delta_usecs = ktime_to_us(delta_ktime);
|
||||
|
||||
@@ -5591,7 +5591,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct bfq_io_cq *bic, pid_t pid, int is_sync,
|
||||
unsigned int act_idx)
|
||||
{
|
||||
u64 now_ns = ktime_get_ns();
|
||||
u64 now_ns = blk_time_get_ns();
|
||||
|
||||
bfqq->actuator_idx = act_idx;
|
||||
RB_CLEAR_NODE(&bfqq->entity.rb_node);
|
||||
@@ -5903,7 +5903,7 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd,
|
||||
*/
|
||||
if (bfqq->dispatched || bfq_bfqq_busy(bfqq))
|
||||
return;
|
||||
elapsed = ktime_get_ns() - bfqq->ttime.last_end_request;
|
||||
elapsed = blk_time_get_ns() - bfqq->ttime.last_end_request;
|
||||
elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle);
|
||||
|
||||
ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
|
||||
@@ -6194,7 +6194,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
|
||||
bfq_add_request(rq);
|
||||
idle_timer_disabled = waiting && !bfq_bfqq_wait_request(bfqq);
|
||||
|
||||
rq->fifo_time = ktime_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
|
||||
rq->fifo_time = blk_time_get_ns() + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
|
||||
list_add_tail(&rq->queuelist, &bfqq->fifo);
|
||||
|
||||
bfq_rq_enqueued(bfqd, bfqq, rq);
|
||||
@@ -6370,7 +6370,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
bfq_weights_tree_remove(bfqq);
|
||||
}
|
||||
|
||||
now_ns = ktime_get_ns();
|
||||
now_ns = blk_time_get_ns();
|
||||
|
||||
bfqq->ttime.last_end_request = now_ns;
|
||||
|
||||
@@ -6585,7 +6585,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
||||
static void bfq_update_inject_limit(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq)
|
||||
{
|
||||
u64 tot_time_ns = ktime_get_ns() - bfqd->last_empty_occupied_ns;
|
||||
u64 tot_time_ns = blk_time_get_ns() - bfqd->last_empty_occupied_ns;
|
||||
unsigned int old_limit = bfqq->inject_limit;
|
||||
|
||||
if (bfqq->last_serv_time_ns > 0 && bfqd->rqs_injected) {
|
||||
|
||||
@@ -395,6 +395,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
|
||||
iter.tuple_size = bi->tuple_size;
|
||||
iter.seed = proc_iter->bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
iter.pi_offset = bi->pi_offset;
|
||||
|
||||
__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
|
||||
45
block/bio.c
45
block/bio.c
@@ -16,7 +16,6 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
@@ -763,29 +762,31 @@ static inline void bio_put_percpu_cache(struct bio *bio)
|
||||
struct bio_alloc_cache *cache;
|
||||
|
||||
cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
|
||||
if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) {
|
||||
put_cpu();
|
||||
bio_free(bio);
|
||||
return;
|
||||
}
|
||||
if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX)
|
||||
goto out_free;
|
||||
|
||||
bio_uninit(bio);
|
||||
|
||||
if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) {
|
||||
if (in_task()) {
|
||||
bio_uninit(bio);
|
||||
bio->bi_next = cache->free_list;
|
||||
/* Not necessary but helps not to iopoll already freed bios */
|
||||
bio->bi_bdev = NULL;
|
||||
cache->free_list = bio;
|
||||
cache->nr++;
|
||||
} else {
|
||||
unsigned long flags;
|
||||
} else if (in_hardirq()) {
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
local_irq_save(flags);
|
||||
bio_uninit(bio);
|
||||
bio->bi_next = cache->free_list_irq;
|
||||
cache->free_list_irq = bio;
|
||||
cache->nr_irq++;
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
goto out_free;
|
||||
}
|
||||
put_cpu();
|
||||
return;
|
||||
out_free:
|
||||
put_cpu();
|
||||
bio_free(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1154,7 +1155,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
|
||||
bio_for_each_folio_all(fi, bio) {
|
||||
struct page *page;
|
||||
size_t done = 0;
|
||||
size_t nr_pages;
|
||||
|
||||
if (mark_dirty) {
|
||||
folio_lock(fi.folio);
|
||||
@@ -1162,10 +1163,11 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
|
||||
folio_unlock(fi.folio);
|
||||
}
|
||||
page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
|
||||
nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE -
|
||||
fi.offset / PAGE_SIZE + 1;
|
||||
do {
|
||||
bio_release_page(bio, page++);
|
||||
done += PAGE_SIZE;
|
||||
} while (done < fi.length);
|
||||
} while (--nr_pages != 0);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__bio_release_pages);
|
||||
@@ -1371,21 +1373,12 @@ int submit_bio_wait(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
unsigned long hang_check;
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = submit_bio_wait_endio;
|
||||
bio->bi_opf |= REQ_SYNC;
|
||||
submit_bio(bio);
|
||||
|
||||
/* Prevent hang_check timer from firing at us during very long I/O */
|
||||
hang_check = sysctl_hung_task_timeout_secs;
|
||||
if (hang_check)
|
||||
while (!wait_for_completion_io_timeout(&done,
|
||||
hang_check * (HZ/2)))
|
||||
;
|
||||
else
|
||||
wait_for_completion_io(&done);
|
||||
blk_wait_io(&done);
|
||||
|
||||
return blk_status_to_errno(bio->bi_status);
|
||||
}
|
||||
|
||||
@@ -1846,7 +1846,7 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
{
|
||||
unsigned long pflags;
|
||||
bool clamp;
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now = blk_time_get_ns();
|
||||
u64 exp;
|
||||
u64 delay_nsec = 0;
|
||||
int tok;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/llist.h>
|
||||
#include "blk.h"
|
||||
|
||||
struct blkcg_gq;
|
||||
struct blkg_policy_data;
|
||||
|
||||
@@ -394,24 +394,34 @@ static void blk_timeout_work(struct work_struct *work)
|
||||
{
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue(int node_id)
|
||||
struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int error;
|
||||
|
||||
q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
|
||||
node_id);
|
||||
if (!q)
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
q->last_merge = NULL;
|
||||
|
||||
q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
|
||||
if (q->id < 0)
|
||||
if (q->id < 0) {
|
||||
error = q->id;
|
||||
goto fail_q;
|
||||
}
|
||||
|
||||
q->stats = blk_alloc_queue_stats();
|
||||
if (!q->stats)
|
||||
if (!q->stats) {
|
||||
error = -ENOMEM;
|
||||
goto fail_id;
|
||||
}
|
||||
|
||||
error = blk_set_default_limits(lim);
|
||||
if (error)
|
||||
goto fail_stats;
|
||||
q->limits = *lim;
|
||||
|
||||
q->node = node_id;
|
||||
|
||||
@@ -425,6 +435,7 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||
mutex_init(&q->debugfs_mutex);
|
||||
mutex_init(&q->sysfs_lock);
|
||||
mutex_init(&q->sysfs_dir_lock);
|
||||
mutex_init(&q->limits_lock);
|
||||
mutex_init(&q->rq_qos_mutex);
|
||||
spin_lock_init(&q->queue_lock);
|
||||
|
||||
@@ -435,12 +446,12 @@ struct request_queue *blk_alloc_queue(int node_id)
|
||||
* Init percpu_ref in atomic mode so that it's faster to shutdown.
|
||||
* See blk_register_queue() for details.
|
||||
*/
|
||||
if (percpu_ref_init(&q->q_usage_counter,
|
||||
error = percpu_ref_init(&q->q_usage_counter,
|
||||
blk_queue_usage_counter_release,
|
||||
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
|
||||
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
|
||||
if (error)
|
||||
goto fail_stats;
|
||||
|
||||
blk_set_default_limits(&q->limits);
|
||||
q->nr_requests = BLKDEV_DEFAULT_RQ;
|
||||
|
||||
return q;
|
||||
@@ -451,7 +462,7 @@ fail_id:
|
||||
ida_free(&blk_queue_ida, q->id);
|
||||
fail_q:
|
||||
kmem_cache_free(blk_requestq_cachep, q);
|
||||
return NULL;
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1083,6 +1094,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
|
||||
if (tsk->plug)
|
||||
return;
|
||||
|
||||
plug->cur_ktime = 0;
|
||||
plug->mq_list = NULL;
|
||||
plug->cached_rq = NULL;
|
||||
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
|
||||
@@ -1182,6 +1194,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
|
||||
*/
|
||||
if (unlikely(!rq_list_empty(plug->cached_rq)))
|
||||
blk_mq_free_plug_rqs(plug);
|
||||
|
||||
current->flags &= ~PF_BLOCK_TS;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1229,8 +1243,7 @@ int __init blk_dev_init(void)
|
||||
if (!kblockd_workqueue)
|
||||
panic("Failed to create kblockd\n");
|
||||
|
||||
blk_requestq_cachep = kmem_cache_create("request_queue",
|
||||
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
|
||||
blk_requestq_cachep = KMEM_CACHE(request_queue, SLAB_PANIC);
|
||||
|
||||
blk_debugfs_root = debugfs_create_dir("block", NULL);
|
||||
|
||||
|
||||
@@ -143,7 +143,7 @@ static void blk_account_io_flush(struct request *rq)
|
||||
part_stat_lock();
|
||||
part_stat_inc(part, ios[STAT_FLUSH]);
|
||||
part_stat_add(part, nsecs[STAT_FLUSH],
|
||||
ktime_get_ns() - rq->start_time_ns);
|
||||
blk_time_get_ns() - rq->start_time_ns);
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
|
||||
@@ -370,6 +370,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
|
||||
bi->profile = template->profile ? template->profile : &nop_profile;
|
||||
bi->tuple_size = template->tuple_size;
|
||||
bi->tag_size = template->tag_size;
|
||||
bi->pi_offset = template->pi_offset;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
|
||||
|
||||
|
||||
@@ -829,7 +829,7 @@ static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
|
||||
|
||||
/* step up/down based on the vrate */
|
||||
vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC);
|
||||
now_ns = ktime_get_ns();
|
||||
now_ns = blk_time_get_ns();
|
||||
|
||||
if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) {
|
||||
if (!ioc->autop_too_fast_at)
|
||||
@@ -1044,7 +1044,7 @@ static void ioc_now(struct ioc *ioc, struct ioc_now *now)
|
||||
unsigned seq;
|
||||
u64 vrate;
|
||||
|
||||
now->now_ns = ktime_get();
|
||||
now->now_ns = blk_time_get_ns();
|
||||
now->now = ktime_to_us(now->now_ns);
|
||||
vrate = atomic64_read(&ioc->vtime_rate);
|
||||
|
||||
@@ -2817,7 +2817,7 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
|
||||
return;
|
||||
}
|
||||
|
||||
on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
|
||||
on_q_ns = blk_time_get_ns() - rq->alloc_time_ns;
|
||||
rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
|
||||
size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
|
||||
|
||||
@@ -2900,7 +2900,7 @@ static int blk_iocost_init(struct gendisk *disk)
|
||||
ioc->vtime_base_rate = VTIME_PER_USEC;
|
||||
atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
|
||||
seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
|
||||
ioc->period_at = ktime_to_us(ktime_get());
|
||||
ioc->period_at = ktime_to_us(blk_time_get());
|
||||
atomic64_set(&ioc->cur_period, 0);
|
||||
atomic_set(&ioc->hweight_gen, 0);
|
||||
|
||||
|
||||
@@ -609,7 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
if (!iolat->blkiolat->enabled)
|
||||
return;
|
||||
|
||||
now = ktime_to_ns(ktime_get());
|
||||
now = blk_time_get_ns();
|
||||
while (blkg && blkg->parent) {
|
||||
iolat = blkg_to_lat(blkg);
|
||||
if (!iolat) {
|
||||
@@ -661,7 +661,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
|
||||
struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
|
||||
struct blkcg_gq *blkg;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now = blk_time_get_ns();
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(blkg, pos_css,
|
||||
@@ -985,7 +985,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
|
||||
struct blkcg_gq *blkg = lat_to_blkg(iolat);
|
||||
struct rq_qos *rqos = iolat_rq_qos(blkg->q);
|
||||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 now = blk_time_get_ns();
|
||||
int cpu;
|
||||
|
||||
if (blk_queue_nonrot(blkg->q))
|
||||
|
||||
@@ -35,6 +35,26 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
|
||||
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static void await_bio_endio(struct bio *bio)
|
||||
{
|
||||
complete(bio->bi_private);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* await_bio_chain - ends @bio and waits for every chained bio to complete
|
||||
*/
|
||||
static void await_bio_chain(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = await_bio_endio;
|
||||
bio_endio(bio);
|
||||
blk_wait_io(&done);
|
||||
}
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
|
||||
{
|
||||
@@ -77,6 +97,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
* is disabled.
|
||||
*/
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
@@ -120,32 +144,33 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||
struct bio **biop, unsigned flags)
|
||||
{
|
||||
struct bio *bio = *biop;
|
||||
unsigned int max_write_zeroes_sectors;
|
||||
unsigned int max_sectors;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
/* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
|
||||
max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
/* Ensure that max_sectors doesn't overflow bi_size */
|
||||
max_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
|
||||
if (max_write_zeroes_sectors == 0)
|
||||
if (max_sectors == 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (nr_sects) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
if (flags & BLKDEV_ZERO_NOUNMAP)
|
||||
bio->bi_opf |= REQ_NOUNMAP;
|
||||
|
||||
if (nr_sects > max_write_zeroes_sectors) {
|
||||
bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
|
||||
nr_sects -= max_write_zeroes_sectors;
|
||||
sector += max_write_zeroes_sectors;
|
||||
} else {
|
||||
bio->bi_iter.bi_size = nr_sects << 9;
|
||||
nr_sects = 0;
|
||||
}
|
||||
bio->bi_iter.bi_size = len << SECTOR_SHIFT;
|
||||
nr_sects -= len;
|
||||
sector += len;
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
@@ -190,6 +215,10 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
@@ -280,7 +309,7 @@ retry:
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
if (ret && try_write_zeroes) {
|
||||
if (ret && ret != -EINTR && try_write_zeroes) {
|
||||
if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
|
||||
try_write_zeroes = false;
|
||||
goto retry;
|
||||
@@ -322,7 +351,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
return -EPERM;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
for (;;) {
|
||||
while (nr_sects) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
|
||||
@@ -331,12 +360,17 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
|
||||
sector += len;
|
||||
nr_sects -= len;
|
||||
if (!nr_sects) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
cond_resched();
|
||||
if (fatal_signal_pending(current)) {
|
||||
await_bio_chain(bio);
|
||||
ret = -EINTR;
|
||||
bio = NULL;
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
if (bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
|
||||
186
block/blk-mq.c
186
block/blk-mq.c
@@ -21,7 +21,6 @@
|
||||
#include <linux/llist.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sched/topology.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/delay.h>
|
||||
@@ -322,7 +321,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
||||
RB_CLEAR_NODE(&rq->rb_node);
|
||||
rq->tag = BLK_MQ_NO_TAG;
|
||||
rq->internal_tag = BLK_MQ_NO_TAG;
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->start_time_ns = blk_time_get_ns();
|
||||
rq->part = NULL;
|
||||
blk_crypto_rq_set_defaults(rq);
|
||||
}
|
||||
@@ -332,7 +331,7 @@ EXPORT_SYMBOL(blk_rq_init);
|
||||
static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
|
||||
{
|
||||
if (blk_mq_need_time_stamp(rq))
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->start_time_ns = blk_time_get_ns();
|
||||
else
|
||||
rq->start_time_ns = 0;
|
||||
|
||||
@@ -443,7 +442,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
alloc_time_ns = blk_time_get_ns();
|
||||
|
||||
if (data->cmd_flags & REQ_NOWAIT)
|
||||
data->flags |= BLK_MQ_REQ_NOWAIT;
|
||||
@@ -628,7 +627,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
alloc_time_ns = blk_time_get_ns();
|
||||
|
||||
/*
|
||||
* If the tag allocator sleeps we could get an allocation for a
|
||||
@@ -1041,7 +1040,7 @@ static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
|
||||
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
|
||||
{
|
||||
if (blk_mq_need_time_stamp(rq))
|
||||
__blk_mq_end_request_acct(rq, ktime_get_ns());
|
||||
__blk_mq_end_request_acct(rq, blk_time_get_ns());
|
||||
|
||||
blk_mq_finish_request(rq);
|
||||
|
||||
@@ -1084,7 +1083,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
|
||||
u64 now = 0;
|
||||
|
||||
if (iob->need_ts)
|
||||
now = ktime_get_ns();
|
||||
now = blk_time_get_ns();
|
||||
|
||||
while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
|
||||
prefetch(rq->bio);
|
||||
@@ -1167,10 +1166,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
|
||||
if (force_irqthreads())
|
||||
return false;
|
||||
|
||||
/* same CPU or cache domain? Complete locally */
|
||||
/* same CPU or cache domain and capacity? Complete locally */
|
||||
if (cpu == rq->mq_ctx->cpu ||
|
||||
(!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
|
||||
cpus_share_cache(cpu, rq->mq_ctx->cpu)))
|
||||
cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
|
||||
cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
|
||||
return false;
|
||||
|
||||
/* don't try to IPI to an offline CPU */
|
||||
@@ -1254,7 +1254,7 @@ void blk_mq_start_request(struct request *rq)
|
||||
|
||||
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&
|
||||
!blk_rq_is_passthrough(rq)) {
|
||||
rq->io_start_time_ns = ktime_get_ns();
|
||||
rq->io_start_time_ns = blk_time_get_ns();
|
||||
rq->stats_sectors = blk_rq_sectors(rq);
|
||||
rq->rq_flags |= RQF_STATS;
|
||||
rq_qos_issue(q, rq);
|
||||
@@ -1409,22 +1409,10 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
|
||||
blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
|
||||
blk_mq_run_hw_queue(hctx, false);
|
||||
|
||||
if (blk_rq_is_poll(rq)) {
|
||||
if (blk_rq_is_poll(rq))
|
||||
blk_rq_poll_completion(rq, &wait.done);
|
||||
} else {
|
||||
/*
|
||||
* Prevent hang_check timer from firing at us during very long
|
||||
* I/O
|
||||
*/
|
||||
unsigned long hang_check = sysctl_hung_task_timeout_secs;
|
||||
|
||||
if (hang_check)
|
||||
while (!wait_for_completion_io_timeout(&wait.done,
|
||||
hang_check * (HZ/2)))
|
||||
;
|
||||
else
|
||||
wait_for_completion_io(&wait.done);
|
||||
}
|
||||
else
|
||||
blk_wait_io(&wait.done);
|
||||
|
||||
return wait.ret;
|
||||
}
|
||||
@@ -2892,9 +2880,6 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
|
||||
};
|
||||
struct request *rq;
|
||||
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nsegs))
|
||||
return NULL;
|
||||
|
||||
rq_qos_throttle(q, bio);
|
||||
|
||||
if (plug) {
|
||||
@@ -2913,23 +2898,32 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we can use the passed on request for submitting the passed in bio,
|
||||
* and remove it from the request list if it can be used.
|
||||
* Check if there is a suitable cached request and return it.
|
||||
*/
|
||||
static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
|
||||
struct request_queue *q, blk_opf_t opf)
|
||||
{
|
||||
enum hctx_type type = blk_mq_get_hctx_type(opf);
|
||||
struct request *rq;
|
||||
|
||||
if (!plug)
|
||||
return NULL;
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
if (type != rq->mq_hctx->type &&
|
||||
(type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT))
|
||||
return NULL;
|
||||
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
|
||||
return NULL;
|
||||
return rq;
|
||||
}
|
||||
|
||||
static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
struct bio *bio)
|
||||
{
|
||||
enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
|
||||
enum hctx_type hctx_type = rq->mq_hctx->type;
|
||||
|
||||
WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
|
||||
|
||||
if (type != hctx_type &&
|
||||
!(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
|
||||
return false;
|
||||
if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If any qos ->throttle() end up blocking, we will have flushed the
|
||||
* plug and hence killed the cached_rq list as well. Pop this entry
|
||||
@@ -2941,7 +2935,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
blk_mq_rq_time_init(rq, 0);
|
||||
rq->cmd_flags = bio->bi_opf;
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2963,50 +2956,43 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
struct blk_plug *plug = blk_mq_plug(bio);
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request *rq = NULL;
|
||||
unsigned int nr_segs = 1;
|
||||
struct request *rq;
|
||||
blk_status_t ret;
|
||||
|
||||
bio = blk_queue_bounce(bio, q);
|
||||
|
||||
if (plug) {
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
if (rq && rq->q != q)
|
||||
rq = NULL;
|
||||
}
|
||||
if (rq) {
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
return;
|
||||
}
|
||||
if (!bio_integrity_prep(bio))
|
||||
return;
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
|
||||
return;
|
||||
if (blk_mq_use_cached_rq(rq, plug, bio))
|
||||
goto done;
|
||||
percpu_ref_get(&q->q_usage_counter);
|
||||
} else {
|
||||
/*
|
||||
* If the plug has a cached request for this queue, try use it.
|
||||
*
|
||||
* The cached request already holds a q_usage_counter reference and we
|
||||
* don't have to acquire a new one if we use it.
|
||||
*/
|
||||
rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
|
||||
if (!rq) {
|
||||
if (unlikely(bio_queue_enter(bio)))
|
||||
return;
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
goto fail;
|
||||
}
|
||||
if (!bio_integrity_prep(bio))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
|
||||
if (unlikely(!rq)) {
|
||||
fail:
|
||||
blk_queue_exit(q);
|
||||
return;
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
goto queue_exit;
|
||||
}
|
||||
if (!bio_integrity_prep(bio))
|
||||
goto queue_exit;
|
||||
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
|
||||
goto queue_exit;
|
||||
|
||||
if (!rq) {
|
||||
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
|
||||
if (unlikely(!rq))
|
||||
goto queue_exit;
|
||||
} else {
|
||||
blk_mq_use_cached_rq(rq, plug, bio);
|
||||
}
|
||||
|
||||
done:
|
||||
trace_block_getrq(bio);
|
||||
|
||||
rq_qos_track(q, rq, bio);
|
||||
@@ -3037,6 +3023,15 @@ done:
|
||||
} else {
|
||||
blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
|
||||
}
|
||||
return;
|
||||
|
||||
queue_exit:
|
||||
/*
|
||||
* Don't drop the queue reference if we were trying to use a cached
|
||||
* request and thus didn't acquire one.
|
||||
*/
|
||||
if (!rq)
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_MQ_STACKING
|
||||
@@ -3098,7 +3093,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
ret = blk_mq_request_issue_directly(rq, true));
|
||||
if (ret)
|
||||
blk_account_io_done(rq, ktime_get_ns());
|
||||
blk_account_io_done(rq, blk_time_get_ns());
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
|
||||
@@ -4078,15 +4073,16 @@ void blk_mq_release(struct request_queue *q)
|
||||
blk_mq_sysfs_deinit(q);
|
||||
}
|
||||
|
||||
static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
||||
void *queuedata)
|
||||
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
|
||||
struct queue_limits *lim, void *queuedata)
|
||||
{
|
||||
struct queue_limits default_lim = { };
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
q = blk_alloc_queue(set->numa_node);
|
||||
if (!q)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
|
||||
if (IS_ERR(q))
|
||||
return q;
|
||||
q->queuedata = queuedata;
|
||||
ret = blk_mq_init_allocated_queue(set, q);
|
||||
if (ret) {
|
||||
@@ -4095,20 +4091,15 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
||||
{
|
||||
return blk_mq_init_queue_data(set, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_queue);
|
||||
EXPORT_SYMBOL(blk_mq_alloc_queue);
|
||||
|
||||
/**
|
||||
* blk_mq_destroy_queue - shutdown a request queue
|
||||
* @q: request queue to shutdown
|
||||
*
|
||||
* This shuts down a request queue allocated by blk_mq_init_queue(). All future
|
||||
* This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
|
||||
* requests will be failed with -ENODEV. The caller is responsible for dropping
|
||||
* the reference from blk_mq_init_queue() by calling blk_put_queue().
|
||||
* the reference from blk_mq_alloc_queue() by calling blk_put_queue().
|
||||
*
|
||||
* Context: can sleep
|
||||
*/
|
||||
@@ -4129,13 +4120,14 @@ void blk_mq_destroy_queue(struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_destroy_queue);
|
||||
|
||||
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
|
||||
struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
|
||||
struct queue_limits *lim, void *queuedata,
|
||||
struct lock_class_key *lkclass)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
|
||||
q = blk_mq_init_queue_data(set, queuedata);
|
||||
q = blk_mq_alloc_queue(set, lim, queuedata);
|
||||
if (IS_ERR(q))
|
||||
return ERR_CAST(q);
|
||||
|
||||
@@ -4389,7 +4381,7 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
|
||||
if (set->nr_maps == 1)
|
||||
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
|
||||
|
||||
if (set->ops->map_queues && !is_kdump_kernel()) {
|
||||
if (set->ops->map_queues) {
|
||||
int i;
|
||||
|
||||
/*
|
||||
@@ -4488,14 +4480,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
|
||||
/*
|
||||
* If a crashdump is active, then we are potentially in a very
|
||||
* memory constrained environment. Limit us to 1 queue and
|
||||
* 64 tags to prevent using too much memory.
|
||||
* memory constrained environment. Limit us to 64 tags to prevent
|
||||
* using too much memory.
|
||||
*/
|
||||
if (is_kdump_kernel()) {
|
||||
set->nr_hw_queues = 1;
|
||||
set->nr_maps = 1;
|
||||
if (is_kdump_kernel())
|
||||
set->queue_depth = min(64U, set->queue_depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no use for more h/w queues than cpus if we just have
|
||||
* a single map
|
||||
@@ -4525,7 +4515,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!set->map[i].mq_map)
|
||||
goto out_free_mq_map;
|
||||
set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
|
||||
set->map[i].nr_queues = set->nr_hw_queues;
|
||||
}
|
||||
|
||||
blk_mq_update_queue_map(set);
|
||||
|
||||
@@ -25,53 +25,22 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
|
||||
|
||||
/**
|
||||
* blk_set_default_limits - reset limits to default values
|
||||
* @lim: the queue_limits structure to reset
|
||||
*
|
||||
* Description:
|
||||
* Returns a queue_limit struct to its default state.
|
||||
*/
|
||||
void blk_set_default_limits(struct queue_limits *lim)
|
||||
{
|
||||
lim->max_segments = BLK_MAX_SEGMENTS;
|
||||
lim->max_discard_segments = 1;
|
||||
lim->max_integrity_segments = 0;
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
lim->virt_boundary_mask = 0;
|
||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
lim->max_user_sectors = lim->max_dev_sectors = 0;
|
||||
lim->chunk_sectors = 0;
|
||||
lim->max_write_zeroes_sectors = 0;
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->max_discard_sectors = 0;
|
||||
lim->max_hw_discard_sectors = 0;
|
||||
lim->max_secure_erase_sectors = 0;
|
||||
lim->discard_granularity = 512;
|
||||
lim->discard_alignment = 0;
|
||||
lim->discard_misaligned = 0;
|
||||
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
|
||||
lim->bounce = BLK_BOUNCE_NONE;
|
||||
lim->alignment_offset = 0;
|
||||
lim->io_opt = 0;
|
||||
lim->misaligned = 0;
|
||||
lim->zoned = false;
|
||||
lim->zone_write_granularity = 0;
|
||||
lim->dma_alignment = 511;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_set_stacking_limits - set default limits for stacking devices
|
||||
* @lim: the queue_limits structure to reset
|
||||
*
|
||||
* Description:
|
||||
* Returns a queue_limit struct to its default state. Should be used
|
||||
* by stacking drivers like DM that have no internal limits.
|
||||
* Prepare queue limits for applying limits from underlying devices using
|
||||
* blk_stack_limits().
|
||||
*/
|
||||
void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
{
|
||||
blk_set_default_limits(lim);
|
||||
memset(lim, 0, sizeof(*lim));
|
||||
lim->logical_block_size = SECTOR_SIZE;
|
||||
lim->physical_block_size = SECTOR_SIZE;
|
||||
lim->io_min = SECTOR_SIZE;
|
||||
lim->discard_granularity = SECTOR_SIZE;
|
||||
lim->dma_alignment = SECTOR_SIZE - 1;
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
|
||||
/* Inherit limits from component devices */
|
||||
lim->max_segments = USHRT_MAX;
|
||||
@@ -82,9 +51,239 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
lim->max_dev_sectors = UINT_MAX;
|
||||
lim->max_write_zeroes_sectors = UINT_MAX;
|
||||
lim->max_zone_append_sectors = UINT_MAX;
|
||||
lim->max_user_discard_sectors = UINT_MAX;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
|
||||
static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
/*
|
||||
* For read-ahead of large files to be effective, we need to read ahead
|
||||
* at least twice the optimal I/O size.
|
||||
*/
|
||||
bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
|
||||
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
|
||||
}
|
||||
|
||||
static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
{
|
||||
if (!lim->zoned) {
|
||||
if (WARN_ON_ONCE(lim->max_open_zones) ||
|
||||
WARN_ON_ONCE(lim->max_active_zones) ||
|
||||
WARN_ON_ONCE(lim->zone_write_granularity) ||
|
||||
WARN_ON_ONCE(lim->max_zone_append_sectors))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
|
||||
return -EINVAL;
|
||||
|
||||
if (lim->zone_write_granularity < lim->logical_block_size)
|
||||
lim->zone_write_granularity = lim->logical_block_size;
|
||||
|
||||
if (lim->max_zone_append_sectors) {
|
||||
/*
|
||||
* The Zone Append size is limited by the maximum I/O size
|
||||
* and the zone size given that it can't span zones.
|
||||
*/
|
||||
lim->max_zone_append_sectors =
|
||||
min3(lim->max_hw_sectors,
|
||||
lim->max_zone_append_sectors,
|
||||
lim->chunk_sectors);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the limits in lim are valid, initialize defaults for unset
|
||||
* values, and cap values based on others where needed.
|
||||
*/
|
||||
static int blk_validate_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_hw_sectors;
|
||||
|
||||
/*
|
||||
* Unless otherwise specified, default to 512 byte logical blocks and a
|
||||
* physical block size equal to the logical block size.
|
||||
*/
|
||||
if (!lim->logical_block_size)
|
||||
lim->logical_block_size = SECTOR_SIZE;
|
||||
if (lim->physical_block_size < lim->logical_block_size)
|
||||
lim->physical_block_size = lim->logical_block_size;
|
||||
|
||||
/*
|
||||
* The minimum I/O size defaults to the physical block size unless
|
||||
* explicitly overridden.
|
||||
*/
|
||||
if (lim->io_min < lim->physical_block_size)
|
||||
lim->io_min = lim->physical_block_size;
|
||||
|
||||
/*
|
||||
* max_hw_sectors has a somewhat weird default for historical reason,
|
||||
* but driver really should set their own instead of relying on this
|
||||
* value.
|
||||
*
|
||||
* The block layer relies on the fact that every driver can
|
||||
* handle at lest a page worth of data per I/O, and needs the value
|
||||
* aligned to the logical block size.
|
||||
*/
|
||||
if (!lim->max_hw_sectors)
|
||||
lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
|
||||
return -EINVAL;
|
||||
lim->max_hw_sectors = round_down(lim->max_hw_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* The actual max_sectors value is a complex beast and also takes the
|
||||
* max_dev_sectors value (set by SCSI ULPs) and a user configurable
|
||||
* value into account. The ->max_sectors value is always calculated
|
||||
* from these, so directly setting it won't have any effect.
|
||||
*/
|
||||
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
|
||||
lim->max_dev_sectors);
|
||||
if (lim->max_user_sectors) {
|
||||
if (lim->max_user_sectors > max_hw_sectors ||
|
||||
lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
|
||||
return -EINVAL;
|
||||
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
|
||||
} else {
|
||||
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
|
||||
}
|
||||
lim->max_sectors = round_down(lim->max_sectors,
|
||||
lim->logical_block_size >> SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Random default for the maximum number of segments. Driver should not
|
||||
* rely on this and set their own.
|
||||
*/
|
||||
if (!lim->max_segments)
|
||||
lim->max_segments = BLK_MAX_SEGMENTS;
|
||||
|
||||
lim->max_discard_sectors =
|
||||
min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors);
|
||||
|
||||
if (!lim->max_discard_segments)
|
||||
lim->max_discard_segments = 1;
|
||||
|
||||
if (lim->discard_granularity < lim->physical_block_size)
|
||||
lim->discard_granularity = lim->physical_block_size;
|
||||
|
||||
/*
|
||||
* By default there is no limit on the segment boundary alignment,
|
||||
* but if there is one it can't be smaller than the page size as
|
||||
* that would break all the normal I/O patterns.
|
||||
*/
|
||||
if (!lim->seg_boundary_mask)
|
||||
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
||||
if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Devices that require a virtual boundary do not support scatter/gather
|
||||
* I/O natively, but instead require a descriptor list entry for each
|
||||
* page (which might not be identical to the Linux PAGE_SIZE). Because
|
||||
* of that they are not limited by our notion of "segment size".
|
||||
*/
|
||||
if (lim->virt_boundary_mask) {
|
||||
if (WARN_ON_ONCE(lim->max_segment_size &&
|
||||
lim->max_segment_size != UINT_MAX))
|
||||
return -EINVAL;
|
||||
lim->max_segment_size = UINT_MAX;
|
||||
} else {
|
||||
/*
|
||||
* The maximum segment size has an odd historic 64k default that
|
||||
* drivers probably should override. Just like the I/O size we
|
||||
* require drivers to at least handle a full page per segment.
|
||||
*/
|
||||
if (!lim->max_segment_size)
|
||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||
if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We require drivers to at least do logical block aligned I/O, but
|
||||
* historically could not check for that due to the separate calls
|
||||
* to set the limits. Once the transition is finished the check
|
||||
* below should be narrowed down to check the logical block size.
|
||||
*/
|
||||
if (!lim->dma_alignment)
|
||||
lim->dma_alignment = SECTOR_SIZE - 1;
|
||||
if (WARN_ON_ONCE(lim->dma_alignment > PAGE_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
if (lim->alignment_offset) {
|
||||
lim->alignment_offset &= (lim->physical_block_size - 1);
|
||||
lim->misaligned = 0;
|
||||
}
|
||||
|
||||
return blk_validate_zoned_limits(lim);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the default limits for a newly allocated queue. @lim contains the
|
||||
* initial limits set by the driver, which could be no limit in which case
|
||||
* all fields are cleared to zero.
|
||||
*/
|
||||
int blk_set_default_limits(struct queue_limits *lim)
|
||||
{
|
||||
/*
|
||||
* Most defaults are set by capping the bounds in blk_validate_limits,
|
||||
* but max_user_discard_sectors is special and needs an explicit
|
||||
* initialization to the max value here.
|
||||
*/
|
||||
lim->max_user_discard_sectors = UINT_MAX;
|
||||
return blk_validate_limits(lim);
|
||||
}
|
||||
|
||||
/**
|
||||
* queue_limits_commit_update - commit an atomic update of queue limits
|
||||
* @q: queue to update
|
||||
* @lim: limits to apply
|
||||
*
|
||||
* Apply the limits in @lim that were obtained from queue_limits_start_update()
|
||||
* and updated by the caller to @q.
|
||||
*
|
||||
* Returns 0 if successful, else a negative error code.
|
||||
*/
|
||||
int queue_limits_commit_update(struct request_queue *q,
|
||||
struct queue_limits *lim)
|
||||
__releases(q->limits_lock)
|
||||
{
|
||||
int error = blk_validate_limits(lim);
|
||||
|
||||
if (!error) {
|
||||
q->limits = *lim;
|
||||
if (q->disk)
|
||||
blk_apply_bdi_limits(q->disk->bdi, lim);
|
||||
}
|
||||
mutex_unlock(&q->limits_lock);
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_commit_update);
|
||||
|
||||
/**
|
||||
* queue_limits_set - apply queue limits to queue
|
||||
* @q: queue to update
|
||||
* @lim: limits to apply
|
||||
*
|
||||
* Apply the limits in @lim that were freshly initialized to @q.
|
||||
* To update existing limits use queue_limits_start_update() and
|
||||
* queue_limits_commit_update() instead.
|
||||
*
|
||||
* Returns 0 if successful, else a negative error code.
|
||||
*/
|
||||
int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
|
||||
{
|
||||
mutex_lock(&q->limits_lock);
|
||||
return queue_limits_commit_update(q, lim);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_set);
|
||||
|
||||
/**
|
||||
* blk_queue_bounce_limit - set bounce buffer limit for queue
|
||||
* @q: the request queue for the device
|
||||
@@ -177,8 +376,11 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors);
|
||||
void blk_queue_max_discard_sectors(struct request_queue *q,
|
||||
unsigned int max_discard_sectors)
|
||||
{
|
||||
q->limits.max_hw_discard_sectors = max_discard_sectors;
|
||||
q->limits.max_discard_sectors = max_discard_sectors;
|
||||
struct queue_limits *lim = &q->limits;
|
||||
|
||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
||||
lim->max_discard_sectors =
|
||||
min(max_discard_sectors, lim->max_user_discard_sectors);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
|
||||
|
||||
@@ -393,15 +595,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
|
||||
|
||||
void disk_update_readahead(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
/*
|
||||
* For read-ahead of large files to be effective, we need to read ahead
|
||||
* at least twice the optimal I/O size.
|
||||
*/
|
||||
disk->bdi->ra_pages =
|
||||
max(queue_io_opt(q) * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
|
||||
disk->bdi->io_pages = queue_max_sectors(q) >> (PAGE_SHIFT - 9);
|
||||
blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_update_readahead);
|
||||
|
||||
@@ -689,33 +883,38 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->zone_write_granularity = max(t->zone_write_granularity,
|
||||
b->zone_write_granularity);
|
||||
t->zoned = max(t->zoned, b->zoned);
|
||||
if (!t->zoned) {
|
||||
t->zone_write_granularity = 0;
|
||||
t->max_zone_append_sectors = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_stack_limits);
|
||||
|
||||
/**
|
||||
* disk_stack_limits - adjust queue limits for stacked drivers
|
||||
* @disk: MD/DM gendisk (top)
|
||||
* queue_limits_stack_bdev - adjust queue_limits for stacked devices
|
||||
* @t: the stacking driver limits (top device)
|
||||
* @bdev: the underlying block device (bottom)
|
||||
* @offset: offset to beginning of data within component device
|
||||
* @pfx: prefix to use for warnings logged
|
||||
*
|
||||
* Description:
|
||||
* Merges the limits for a top level gendisk and a bottom level
|
||||
* block_device.
|
||||
* This function is used by stacking drivers like MD and DM to ensure
|
||||
* that all component devices have compatible block sizes and
|
||||
* alignments. The stacking driver must provide a queue_limits
|
||||
* struct (top) and then iteratively call the stacking function for
|
||||
* all component (bottom) devices. The stacking function will
|
||||
* attempt to combine the values and ensure proper alignment.
|
||||
*/
|
||||
void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
|
||||
sector_t offset)
|
||||
void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
|
||||
sector_t offset, const char *pfx)
|
||||
{
|
||||
struct request_queue *t = disk->queue;
|
||||
|
||||
if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
|
||||
get_start_sect(bdev) + (offset >> 9)) < 0)
|
||||
if (blk_stack_limits(t, &bdev_get_queue(bdev)->limits,
|
||||
get_start_sect(bdev) + offset))
|
||||
pr_notice("%s: Warning: Device %pg is misaligned\n",
|
||||
disk->disk_name, bdev);
|
||||
|
||||
disk_update_readahead(disk);
|
||||
pfx, bdev);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_stack_limits);
|
||||
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);
|
||||
|
||||
/**
|
||||
* blk_queue_update_dma_pad - update pad mask
|
||||
|
||||
@@ -27,7 +27,7 @@ void blk_rq_stat_init(struct blk_rq_stat *stat)
|
||||
/* src is a per-cpu stat, mean isn't initialized */
|
||||
void blk_rq_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
|
||||
{
|
||||
if (!src->nr_samples)
|
||||
if (dst->nr_samples + src->nr_samples <= dst->nr_samples)
|
||||
return;
|
||||
|
||||
dst->min = min(dst->min, src->min);
|
||||
|
||||
@@ -174,23 +174,29 @@ static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_discard_max_store(struct request_queue *q,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
unsigned long max_discard;
|
||||
ssize_t ret = queue_var_store(&max_discard, page, count);
|
||||
unsigned long max_discard_bytes;
|
||||
struct queue_limits lim;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = queue_var_store(&max_discard_bytes, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (max_discard & (q->limits.discard_granularity - 1))
|
||||
if (max_discard_bytes & (q->limits.discard_granularity - 1))
|
||||
return -EINVAL;
|
||||
|
||||
max_discard >>= 9;
|
||||
if (max_discard > UINT_MAX)
|
||||
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (max_discard > q->limits.max_hw_discard_sectors)
|
||||
max_discard = q->limits.max_hw_discard_sectors;
|
||||
blk_mq_freeze_queue(q);
|
||||
lim = queue_limits_start_update(q);
|
||||
lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
q->limits.max_discard_sectors = max_discard;
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -226,35 +232,22 @@ static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
||||
{
|
||||
unsigned long var;
|
||||
unsigned int max_sectors_kb,
|
||||
max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
|
||||
page_kb = 1 << (PAGE_SHIFT - 10);
|
||||
ssize_t ret = queue_var_store(&var, page, count);
|
||||
unsigned long max_sectors_kb;
|
||||
struct queue_limits lim;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = queue_var_store(&max_sectors_kb, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
max_sectors_kb = (unsigned int)var;
|
||||
max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb,
|
||||
q->limits.max_dev_sectors >> 1);
|
||||
if (max_sectors_kb == 0) {
|
||||
q->limits.max_user_sectors = 0;
|
||||
max_sectors_kb = min(max_hw_sectors_kb,
|
||||
BLK_DEF_MAX_SECTORS_CAP >> 1);
|
||||
} else {
|
||||
if (max_sectors_kb > max_hw_sectors_kb ||
|
||||
max_sectors_kb < page_kb)
|
||||
return -EINVAL;
|
||||
q->limits.max_user_sectors = max_sectors_kb << 1;
|
||||
}
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
q->limits.max_sectors = max_sectors_kb << 1;
|
||||
if (q->disk)
|
||||
q->disk->bdi->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
lim = queue_limits_start_update(q);
|
||||
lim.max_user_sectors = max_sectors_kb << 1;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user