Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block

Pull block IO update from Jens Axboe:
 "Core block IO bits for 3.7.  Not a huge round this time, it contains:

   - First series from Kent cleaning up and generalizing bio allocation
     and freeing.

   - WRITE_SAME support from Martin.

   - Mikulas patches to prevent O_DIRECT crashes when someone changes
     the block size of a device.

   - Make bio_split() work on data-less bio's (like trim/discards).

   - A few other minor fixups."

Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew
Morton.  It is due to the VM no longer using a prio-tree (see commit
6b2dbba8b6: "mm: replace vma prio_tree with an interval tree").

So make set_blocksize() use mapping_mapped() instead of open-coding the
internal VM knowledge that has changed.

* 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits)
  block: makes bio_split support bio without data
  scatterlist: refactor the sg_nents
  scatterlist: add sg_nents
  fs: fix include/percpu-rwsem.h export error
  percpu-rw-semaphore: fix documentation typos
  fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared
  blockdev: turn a rw semaphore into a percpu rw semaphore
  Fix a crash when block device is read and block size is changed at the same time
  block: fix request_queue->flags initialization
  block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue()
  block: ioctl to zero block ranges
  block: Make blkdev_issue_zeroout use WRITE SAME
  block: Implement support for WRITE SAME
  block: Consolidate command flag and queue limit checks for merges
  block: Clean up special command handling logic
  block/blk-tag.c: Remove useless kfree
  block: remove the duplicated setting for congestion_threshold
  block: reject invalid queue attribute values
  block: Add bio_clone_bioset(), bio_clone_kmalloc()
  block: Consolidate bio_alloc_bioset(), bio_kmalloc()
  ...
This commit is contained in:
Linus Torvalds
2012-10-11 09:04:23 +09:00
33 changed files with 771 additions and 465 deletions

View File

@@ -206,3 +206,17 @@ Description:
when a discarded area is read the discard_zeroes_data
parameter will be set to one. Otherwise it will be 0 and
the result of reading a discarded area is undefined.
What: /sys/block/<disk>/queue/write_same_max_bytes
Date: January 2012
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Some devices support a write same operation in which a
single data block can be written to a range of several
contiguous blocks on storage. This can be used to wipe
areas on disk or to initialize drives in a RAID
configuration. write_same_max_bytes indicates how many
bytes can be written in a single write same command. If
write_same_max_bytes is 0, write same is not supported
by the device.

View File

@@ -465,7 +465,6 @@ struct bio {
bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
atomic_t bi_cnt; /* pin count: free when it hits zero */
void *bi_private;
bio_destructor_t *bi_destructor; /* bi_destructor (bio) */
};
With this multipage bio design:
@@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs,
so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
given size from these slabs.
The bi_destructor() routine takes into account the possibility of the bio
having originated from a different source (see later discussions on
n/w to block transfers and kvec_cb)
The bio_get() routine may be used to hold an extra reference on a bio prior
to i/o submission, if the bio fields are likely to be accessed after the
i/o is issued (since the bio may otherwise get freed in case i/o completion

View File

@@ -0,0 +1,27 @@
Percpu rw semaphores
--------------------
Percpu rw semaphores is a new read-write semaphore design that is
optimized for locking for reading.
The problem with traditional read-write semaphores is that when multiple
cores take the lock for reading, the cache line containing the semaphore
is bouncing between L1 caches of the cores, causing performance
degradation.
Locking for reading is very fast, it uses RCU and it avoids any atomic
instruction in the lock and unlock path. On the other hand, locking for
writing is very expensive, it calls synchronize_rcu() that can take
hundreds of milliseconds.
The lock is declared with "struct percpu_rw_semaphore" type.
The lock is initialized percpu_init_rwsem, it returns 0 on success and
-ENOMEM on allocation failure.
The lock must be freed with percpu_free_rwsem to avoid memory leak.
The lock is locked for read with percpu_down_read, percpu_up_read and
for write with percpu_down_write, percpu_up_write.
The idea of using RCU for optimized rw-lock was introduced by
Eric Dumazet <eric.dumazet@gmail.com>.
The code was written by Mikulas Patocka <mpatocka@redhat.com>

View File

@@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
/*
* A queue starts its life with bypass turned on to avoid
* unnecessary bypass on/off overhead and nasty surprises during
* init. The initial bypass will be finished at the end of
* blk_init_allocated_queue().
* init. The initial bypass will be finished when the queue is
* registered by blk_register_queue().
*/
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
@@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
q->request_fn = rfn;
q->prep_rq_fn = NULL;
q->unprep_rq_fn = NULL;
q->queue_flags = QUEUE_FLAG_DEFAULT;
q->queue_flags |= QUEUE_FLAG_DEFAULT;
/* Override internal queue lock with supplied lock pointer */
if (lock)
@@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
/* init elevator */
if (elevator_init(q, NULL))
return NULL;
blk_queue_congestion_threshold(q);
/* all done, end the initial bypass */
blk_queue_bypass_end(q);
return q;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
nr_sectors > queue_max_hw_sectors(q))) {
if (likely(bio_is_rw(bio) &&
nr_sectors > queue_max_hw_sectors(q))) {
printk(KERN_ERR "bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
@@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_rw & REQ_DISCARD) &&
(!blk_queue_discard(q) ||
((bio->bi_rw & REQ_SECURE) &&
!blk_queue_secdiscard(q)))) {
((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
err = -EOPNOTSUPP;
goto end_io;
}
if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
err = -EOPNOTSUPP;
goto end_io;
}
@@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request);
*/
void submit_bio(int rw, struct bio *bio)
{
int count = bio_sectors(bio);
bio->bi_rw |= rw;
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
if (bio_has_data(bio)) {
unsigned int count;
if (unlikely(rw & REQ_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
@@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio);
*/
int blk_rq_check_limits(struct request_queue *q, struct request *rq)
{
if (rq->cmd_flags & REQ_DISCARD)
if (!rq_mergeable(rq))
return 0;
if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
}
@@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
req->buffer = bio_data(req->bio);
/* update sector only for requests with clear definition of sector */
if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
if (req->cmd_type == REQ_TYPE_FS)
req->__sector += total_bytes >> 9;
/* mixed attributes always follow the first bio */
@@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
blk_rq_init(NULL, rq);
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
bio = bio_clone_bioset(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;
__bio_clone(bio, bio_src);
if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp_mask, bs))
goto free_and_out;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
@@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
free_and_out:
if (bio)
bio_free(bio, bs);
bio_put(bio);
blk_rq_unprep_clone(rq);
return -ENOMEM;

View File

@@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
/**
* blkdev_issue_write_same - queue a write same operation
* @bdev: target blockdev
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @page: page containing data to write
*
* Description:
* Issue a write same request for the sectors in question.
*/
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask,
struct page *page)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_write_same_sectors;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
if (!q)
return -ENXIO;
max_write_same_sectors = q->limits.max_write_same_sectors;
if (max_write_same_sectors == 0)
return -EOPNOTSUPP;
atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
while (nr_sects) {
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
ret = -ENOMEM;
break;
}
bio->bi_sector = sector;
bio->bi_end_io = bio_batch_end_io;
bio->bi_bdev = bdev;
bio->bi_private = &bb;
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
if (nr_sects > max_write_same_sectors) {
bio->bi_size = max_write_same_sectors << 9;
nr_sects -= max_write_same_sectors;
sector += max_write_same_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
atomic_inc(&bb.done);
submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
}
/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
ret = -ENOTSUPP;
return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);
/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
@@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard);
* Generate and issue number of bios with zerofiled pages.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
int ret;
@@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
/**
* blkdev_issue_zeroout - zero-fill a block range
* @bdev: blockdev to write
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
*
* Description:
* Generate and issue number of bios with zerofiled pages.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask)
{
if (bdev_write_same(bdev)) {
unsigned char bdn[BDEVNAME_SIZE];
if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
ZERO_PAGE(0)))
return 0;
bdevname(bdev, bdn);
pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
}
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
}
EXPORT_SYMBOL(blkdev_issue_zeroout);

View File

@@ -275,14 +275,8 @@ no_merge:
int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
max_sectors = queue_max_sectors(q);
if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio)
{
unsigned short max_sectors;
if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
max_sectors = queue_max_hw_sectors(q);
else
max_sectors = queue_max_sectors(q);
if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
if (blk_rq_sectors(req) + bio_sectors(bio) >
blk_rq_get_max_sectors(req)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
/*
* Will it become too large?
*/
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q))
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
blk_rq_get_max_sectors(req))
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (!rq_mergeable(req) || !rq_mergeable(next))
return 0;
/*
* Don't merge file system requests and discard requests
*/
if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))
return 0;
/*
* Don't merge discard requests and secure discard requests
*/
if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE))
if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
return 0;
/*
@@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
if (req->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(req->bio, next->bio))
return 0;
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
{
if (!rq_mergeable(rq))
if (!rq_mergeable(rq) || !bio_mergeable(bio))
return false;
/* don't merge file system requests and discard requests */
if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
return false;
/* don't merge discard requests and secure discard requests */
if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
return false;
/* different data direction or already started, don't merge */
@@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;
/* must be using the same buffer */
if (rq->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(rq->bio, bio))
return false;
return true;
}

View File

@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
* blk_queue_max_write_same_sectors - set max sectors for a single write same
* @q: the request queue for the device
* @max_write_same_sectors: maximum number of sectors to write per command
**/
void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors)
{
q->limits.max_write_same_sectors = max_write_same_sectors;
}
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,

View File

@@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page)
static ssize_t
queue_var_store(unsigned long *var, const char *page, size_t count)
{
char *p = (char *) page;
int err;
unsigned long v;
err = strict_strtoul(page, 10, &v);
if (err || v > UINT_MAX)
return -EINVAL;
*var = v;
*var = simple_strtoul(p, &p, 10);
return count;
}
@@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
return -EINVAL;
ret = queue_var_store(&nr, page, count);
if (ret < 0)
return ret;
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
@@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
unsigned long ra_kb;
ssize_t ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
return ret;
@@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page);
}
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)q->limits.max_write_same_sectors << 9);
}
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
@@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
if (ret < 0)
return ret;
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
@@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
unsigned long nm;
ssize_t ret = queue_var_store(&nm, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
@@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
unsigned long val;
ret = queue_var_store(&val, page, count);
if (ret < 0)
return ret;
spin_lock_irq(q->queue_lock);
if (val == 2) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
@@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
@@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk)
if (WARN_ON(!q))
return -ENXIO;
/*
* Initialization must be complete by now. Finish the initial
* bypass from queue allocation.
*/
blk_queue_bypass_end(q);
ret = blk_trace_init_sysfs(dev);
if (ret)
return ret;

View File

@@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
tags = __blk_queue_init_tags(q, depth);
if (!tags)
goto fail;
return -ENOMEM;
} else if (q->queue_tags) {
rc = blk_queue_resize_tags(q, depth);
if (rc)
@@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
INIT_LIST_HEAD(&q->tag_busy_list);
return 0;
fail:
kfree(tags);
return -ENOMEM;
}
EXPORT_SYMBOL(blk_queue_init_tags);

View File

@@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
*
* a) it's attached to a gendisk, and
* b) the queue had IO stats enabled when this request was started, and
* c) it's a file system request or a discard request
* c) it's a file system request
*/
static inline int blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->cmd_flags & REQ_IO_STAT) &&
(rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD));
(rq->cmd_type == REQ_TYPE_FS);
}
/*

View File

@@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (rq->cmd_flags & REQ_SOFTBARRIER) {
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD)) {
if (rq->cmd_type == REQ_TYPE_FS) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = rq;
}
@@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
if (elv_attempt_insert_merge(q, rq))
break;
case ELEVATOR_INSERT_SORT:
BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
!(rq->cmd_flags & REQ_DISCARD));
BUG_ON(rq->cmd_type != REQ_TYPE_FS);
rq->cmd_flags |= REQ_SORTED;
q->nr_sorted++;
if (rq_mergeable(rq)) {

View File

@@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}
static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
uint64_t len)
{
if (start & 511)
return -EINVAL;
if (len & 511)
return -EINVAL;
start >>= 9;
len >>= 9;
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL;
return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
}
static int put_ushort(unsigned long arg, unsigned short val)
{
return put_user(val, (unsigned short __user *)arg);
@@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return blk_ioctl_discard(bdev, range[0], range[1],
cmd == BLKSECDISCARD);
}
case BLKZEROOUT: {
uint64_t range[2];
if (!(mode & FMODE_WRITE))
return -EBADF;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
return blk_ioctl_zeroout(bdev, range[0], range[1]);
}
case HDIO_GETGEO: {
struct hd_geometry geo;

View File

@@ -162,23 +162,12 @@ static const struct block_device_operations drbd_ops = {
.release = drbd_release,
};
static void bio_destructor_drbd(struct bio *bio)
{
bio_free(bio, drbd_md_io_bio_set);
}
struct bio *bio_alloc_drbd(gfp_t gfp_mask)
{
struct bio *bio;
if (!drbd_md_io_bio_set)
return bio_alloc(gfp_mask, 1);
bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
if (!bio)
return NULL;
bio->bi_destructor = bio_destructor_drbd;
return bio;
return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
}
#ifdef __CHECKER__

View File

@@ -266,11 +266,10 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
struct bio *tmp, *new_chain = NULL, *tail = NULL;
while (old_chain) {
tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
tmp = bio_clone_kmalloc(old_chain, gfpmask);
if (!tmp)
goto err_out;
__bio_clone(tmp, old_chain);
tmp->bi_bdev = NULL;
gfpmask &= ~__GFP_WAIT;
tmp->bi_next = NULL;

View File

@@ -522,38 +522,6 @@ static void pkt_bio_finished(struct pktcdvd_device *pd)
}
}
static void pkt_bio_destructor(struct bio *bio)
{
kfree(bio->bi_io_vec);
kfree(bio);
}
static struct bio *pkt_bio_alloc(int nr_iovecs)
{
struct bio_vec *bvl = NULL;
struct bio *bio;
bio = kmalloc(sizeof(struct bio), GFP_KERNEL);
if (!bio)
goto no_bio;
bio_init(bio);
bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL);
if (!bvl)
goto no_bvl;
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bvl;
bio->bi_destructor = pkt_bio_destructor;
return bio;
no_bvl:
kfree(bio);
no_bio:
return NULL;
}
/*
* Allocate a packet_data struct
*/
@@ -567,7 +535,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
goto no_pkt;
pkt->frames = frames;
pkt->w_bio = pkt_bio_alloc(frames);
pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
if (!pkt->w_bio)
goto no_bio;
@@ -581,9 +549,10 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
bio_list_init(&pkt->orig_bios);
for (i = 0; i < frames; i++) {
struct bio *bio = pkt_bio_alloc(1);
struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
if (!bio)
goto no_rd_bio;
pkt->r_bios[i] = bio;
}
@@ -1111,21 +1080,17 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
* Schedule reads for missing parts of the packet.
*/
for (f = 0; f < pkt->frames; f++) {
struct bio_vec *vec;
int p, offset;
if (written[f])
continue;
bio = pkt->r_bios[f];
vec = bio->bi_io_vec;
bio_init(bio);
bio->bi_max_vecs = 1;
bio_reset(bio);
bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio->bi_bdev = pd->bdev;
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
bio->bi_io_vec = vec;
bio->bi_destructor = pkt_bio_destructor;
p = (f * CD_FRAMESIZE) / PAGE_SIZE;
offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
@@ -1418,14 +1383,11 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
}
/* Start the write request */
bio_init(pkt->w_bio);
pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE;
bio_reset(pkt->w_bio);
pkt->w_bio->bi_sector = pkt->sector;
pkt->w_bio->bi_bdev = pd->bdev;
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
pkt->w_bio->bi_io_vec = bvec;
pkt->w_bio->bi_destructor = pkt_bio_destructor;
for (f = 0; f < pkt->frames; f++)
if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
BUG();

View File

@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations raw_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.write = do_sync_write,
.aio_write = blkdev_aio_write,
.fsync = blkdev_fsync,

View File

@@ -798,14 +798,6 @@ static int crypt_convert(struct crypt_config *cc,
return 0;
}
static void dm_crypt_bio_destructor(struct bio *bio)
{
struct dm_crypt_io *io = bio->bi_private;
struct crypt_config *cc = io->cc;
bio_free(bio, cc->bs);
}
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations
@@ -974,7 +966,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
clone->bi_end_io = crypt_endio;
clone->bi_bdev = cc->dev->bdev;
clone->bi_rw = io->base_bio->bi_rw;
clone->bi_destructor = dm_crypt_bio_destructor;
}
static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
@@ -988,19 +979,14 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
* copy the required bvecs because we need the original
* one in order to decrypt the whole bio data *afterwards*.
*/
clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
clone = bio_clone_bioset(base_bio, gfp, cc->bs);
if (!clone)
return 1;
crypt_inc_pending(io);
clone_init(io, clone);
clone->bi_idx = 0;
clone->bi_vcnt = bio_segments(base_bio);
clone->bi_size = base_bio->bi_size;
clone->bi_sector = cc->start + io->sector;
memcpy(clone->bi_io_vec, bio_iovec(base_bio),
sizeof(struct bio_vec) * clone->bi_vcnt);
generic_make_request(clone);
return 0;

View File

@@ -249,16 +249,6 @@ static void vm_dp_init(struct dpages *dp, void *data)
dp->context_ptr = data;
}
static void dm_bio_destructor(struct bio *bio)
{
unsigned region;
struct io *io;
retrieve_io_and_region_from_bio(bio, &io, &region);
bio_free(bio, io->client->bios);
}
/*
* Functions for getting the pages from kernel memory.
*/
@@ -317,7 +307,6 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
bio->bi_destructor = dm_bio_destructor;
store_io_and_region_in_bio(bio, io, region);
if (rw & REQ_DISCARD) {

View File

@@ -86,12 +86,17 @@ struct dm_rq_target_io {
};
/*
* For request-based dm.
* One of these is allocated per bio.
* For request-based dm - the bio clones we allocate are embedded in these
* structs.
*
* We allocate these with bio_alloc_bioset, using the front_pad parameter when
* the bioset is created - this means the bio has to come at the end of the
* struct.
*/
struct dm_rq_clone_bio_info {
struct bio *orig;
struct dm_rq_target_io *tio;
struct bio clone;
};
union map_info *dm_get_mapinfo(struct bio *bio)
@@ -211,6 +216,11 @@ struct dm_md_mempools {
static struct kmem_cache *_io_cache;
static struct kmem_cache *_tio_cache;
static struct kmem_cache *_rq_tio_cache;
/*
* Unused now, and needs to be deleted. But since io_pool is overloaded and it's
* still used for _io_cache, I'm leaving this for a later cleanup
*/
static struct kmem_cache *_rq_bio_info_cache;
static int __init local_init(void)
@@ -467,16 +477,6 @@ static void free_rq_tio(struct dm_rq_target_io *tio)
mempool_free(tio, tio->md->tio_pool);
}
static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
{
return mempool_alloc(md->io_pool, GFP_ATOMIC);
}
static void free_bio_info(struct dm_rq_clone_bio_info *info)
{
mempool_free(info, info->tio->md->io_pool);
}
static int md_in_flight(struct mapped_device *md)
{
return atomic_read(&md->pending[READ]) +
@@ -681,11 +681,6 @@ static void clone_endio(struct bio *bio, int error)
}
}
/*
* Store md for cleanup instead of tio which is about to get freed.
*/
bio->bi_private = md->bs;
free_tio(md, tio);
bio_put(bio);
dec_pending(io, error);
@@ -1036,11 +1031,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
/* error the io and bail out, or requeue it if needed */
md = tio->io->md;
dec_pending(tio->io, r);
/*
* Store bio_set for cleanup.
*/
clone->bi_end_io = NULL;
clone->bi_private = md->bs;
bio_put(clone);
free_tio(md, tio);
} else if (r) {
@@ -1059,13 +1049,6 @@ struct clone_info {
unsigned short idx;
};
static void dm_bio_destructor(struct bio *bio)
{
struct bio_set *bs = bio->bi_private;
bio_free(bio, bs);
}
/*
* Creates a little bio that just does part of a bvec.
*/
@@ -1077,7 +1060,6 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
struct bio_vec *bv = bio->bi_io_vec + idx;
clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
clone->bi_destructor = dm_bio_destructor;
*clone->bi_io_vec = *bv;
clone->bi_sector = sector;
@@ -1090,7 +1072,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
clone->bi_flags |= 1 << BIO_CLONED;
if (bio_integrity(bio)) {
bio_integrity_clone(clone, bio, GFP_NOIO, bs);
bio_integrity_clone(clone, bio, GFP_NOIO);
bio_integrity_trim(clone,
bio_sector_offset(bio, idx, offset), len);
}
@@ -1109,7 +1091,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
__bio_clone(clone, bio);
clone->bi_destructor = dm_bio_destructor;
clone->bi_sector = sector;
clone->bi_idx = idx;
clone->bi_vcnt = idx + bv_count;
@@ -1117,7 +1098,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
if (bio_integrity(bio)) {
bio_integrity_clone(clone, bio, GFP_NOIO, bs);
bio_integrity_clone(clone, bio, GFP_NOIO);
if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
bio_integrity_trim(clone,
@@ -1152,9 +1133,8 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
* ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
* and discard, so no need for concern about wasted bvec allocations.
*/
clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs);
__bio_clone(clone, ci->bio);
clone->bi_destructor = dm_bio_destructor;
clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs);
if (len) {
clone->bi_sector = ci->sector;
clone->bi_size = to_bytes(len);
@@ -1484,30 +1464,17 @@ void dm_dispatch_request(struct request *rq)
}
EXPORT_SYMBOL_GPL(dm_dispatch_request);
static void dm_rq_bio_destructor(struct bio *bio)
{
struct dm_rq_clone_bio_info *info = bio->bi_private;
struct mapped_device *md = info->tio->md;
free_bio_info(info);
bio_free(bio, md->bs);
}
static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
void *data)
{
struct dm_rq_target_io *tio = data;
struct mapped_device *md = tio->md;
struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
if (!info)
return -ENOMEM;
struct dm_rq_clone_bio_info *info =
container_of(bio, struct dm_rq_clone_bio_info, clone);
info->orig = bio_orig;
info->tio = tio;
bio->bi_end_io = end_clone_bio;
bio->bi_private = info;
bio->bi_destructor = dm_rq_bio_destructor;
return 0;
}
@@ -2771,7 +2738,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
if (!pools->tio_pool)
goto free_io_pool_and_out;
pools->bs = bioset_create(pool_size, 0);
pools->bs = (type == DM_TYPE_BIO_BASED) ?
bioset_create(pool_size, 0) :
bioset_create(pool_size,
offsetof(struct dm_rq_clone_bio_info, clone));
if (!pools->bs)
goto free_tio_pool_and_out;

View File

@@ -155,32 +155,17 @@ static int start_readonly;
* like bio_clone, but with a local bio set
*/
static void mddev_bio_destructor(struct bio *bio)
{
struct mddev *mddev, **mddevp;
mddevp = (void*)bio;
mddev = mddevp[-1];
bio_free(bio, mddev->bio_set);
}
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev)
{
struct bio *b;
struct mddev **mddevp;
if (!mddev || !mddev->bio_set)
return bio_alloc(gfp_mask, nr_iovecs);
b = bio_alloc_bioset(gfp_mask, nr_iovecs,
mddev->bio_set);
b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
if (!b)
return NULL;
mddevp = (void*)b;
mddevp[-1] = mddev;
b->bi_destructor = mddev_bio_destructor;
return b;
}
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
@@ -188,32 +173,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev);
struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
struct mddev *mddev)
{
struct bio *b;
struct mddev **mddevp;
if (!mddev || !mddev->bio_set)
return bio_clone(bio, gfp_mask);
b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
mddev->bio_set);
if (!b)
return NULL;
mddevp = (void*)b;
mddevp[-1] = mddev;
b->bi_destructor = mddev_bio_destructor;
__bio_clone(b, bio);
if (bio_integrity(bio)) {
int ret;
ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
if (ret < 0) {
bio_put(b);
return NULL;
}
}
return b;
return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
}
EXPORT_SYMBOL_GPL(bio_clone_mddev);
@@ -5006,8 +4969,7 @@ int md_run(struct mddev *mddev)
}
if (mddev->bio_set == NULL)
mddev->bio_set = bioset_create(BIO_POOL_SIZE,
sizeof(struct mddev *));
mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);

Some files were not shown because too many files have changed in this diff Show More