Merge branch 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block: (132 commits)
  doc/cdrom: Trvial documentation error, file not present
  block_dev: fix kernel-doc in new functions
  block: add some comments around the bio read-write flags
  block: mark bio_split_pool static
  block: Find bio sector offset given idx and offset
  block: gendisk integrity wrapper
  block: Switch blk_integrity_compare from bdev to gendisk
  block: Fix double put in blk_integrity_unregister
  block: Introduce integrity data ownership flag
  block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1
  bio.h: Remove unused conditional code
  block: remove end_{queued|dequeued}_request()
  block: change elevator to use __blk_end_request()
  gdrom: change to use __blk_end_request()
  memstick: change to use __blk_end_request()
  virtio_blk: change to use __blk_end_request()
  blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure
  block: add lld busy state exporting interface
  block: Fix blk_start_queueing() to not kick a stopped queue
  include blktrace_api.h in headers_install
  ...
This commit is contained in:
Linus Torvalds
2008-10-10 10:52:45 -07:00
124 changed files with 3862 additions and 2661 deletions
+1 -1
View File
@@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this:
int i, count = dma_map_sg(dev, sglist, nents, direction); int i, count = dma_map_sg(dev, sglist, nents, direction);
struct scatterlist *sg; struct scatterlist *sg;
for (i = 0, sg = sglist; i < count; i++, sg++) { for_each_sg(sglist, sg, count, i) {
hw_address[i] = sg_dma_address(sg); hw_address[i] = sg_dma_address(sg);
hw_len[i] = sg_dma_len(sg); hw_len[i] = sg_dma_len(sg);
} }
+4
View File
@@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c
!Eblock/blk-barrier.c !Eblock/blk-barrier.c
!Eblock/blk-tag.c !Eblock/blk-tag.c
!Iblock/blk-tag.c !Iblock/blk-tag.c
!Eblock/blk-integrity.c
!Iblock/blktrace.c
!Iblock/genhd.c
!Eblock/genhd.c
</chapter> </chapter>
<chapter id="chrdev"> <chapter id="chrdev">
+10 -4
View File
@@ -30,12 +30,18 @@ write_expire (in ms)
Similar to read_expire mentioned above, but for writes. Similar to read_expire mentioned above, but for writes.
fifo_batch fifo_batch (number of requests)
---------- ----------
When a read request expires its deadline, we must move some requests from Requests are grouped into ``batches'' of a particular data direction (read or
the sorted io scheduler list to the block device dispatch queue. fifo_batch write) which are serviced in increasing sector order. To limit extra seeking,
controls how many requests we move. deadline expiries are only checked between batches. fifo_batch controls the
maximum number of requests per batch.
This parameter tunes the balance between per-request latency and aggregate
throughput. When low latency is the primary concern, smaller is better (where
a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
generally improves throughput, at the cost of latency variation.
writes_starved (number of dispatches) writes_starved (number of dispatches)
+1 -2
View File
@@ -145,8 +145,7 @@ useful for reading photocds.
To play an audio CD, you should first unmount and remove any data To play an audio CD, you should first unmount and remove any data
CDROM. Any of the CDROM player programs should then work (workman, CDROM. Any of the CDROM player programs should then work (workman,
workbone, cdplayer, etc.). Lacking anything else, you could use the workbone, cdplayer, etc.).
cdtester program in Documentation/cdrom/sbpcd.
On a few drives, you can read digital audio directly using a program On a few drives, you can read digital audio directly using a program
such as cdda2wav. The only types of drive which I've heard support such as cdda2wav. The only types of drive which I've heard support
+2 -2
View File
@@ -4,8 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
cmd-filter.o ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
+11 -3
View File
@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad)
del_timer(&ad->antic_timer); del_timer(&ad->antic_timer);
ad->antic_status = ANTIC_FINISHED; ad->antic_status = ANTIC_FINISHED;
/* see as_work_handler */ /* see as_work_handler */
kblockd_schedule_work(&ad->antic_work); kblockd_schedule_work(ad->q, &ad->antic_work);
} }
} }
@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data)
aic = ad->io_context->aic; aic = ad->io_context->aic;
ad->antic_status = ANTIC_FINISHED; ad->antic_status = ANTIC_FINISHED;
kblockd_schedule_work(&ad->antic_work); kblockd_schedule_work(q, &ad->antic_work);
if (aic->ttime_samples == 0) { if (aic->ttime_samples == 0) {
/* process anticipated on has exited or timed out*/ /* process anticipated on has exited or timed out*/
@@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
*/ */
static int as_can_anticipate(struct as_data *ad, struct request *rq) static int as_can_anticipate(struct as_data *ad, struct request *rq)
{ {
#if 0 /* disable for now, we need to check tag level as well */
/*
* SSD device without seek penalty, disable idling
*/
if (blk_queue_nonrot(ad->q)) axman
return 0;
#endif
if (!ad->io_context) if (!ad->io_context)
/* /*
* Last request submitted was a write * Last request submitted was a write
@@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
if (ad->changed_batch && ad->nr_dispatched == 1) { if (ad->changed_batch && ad->nr_dispatched == 1) {
ad->current_batch_expires = jiffies + ad->current_batch_expires = jiffies +
ad->batch_expire[ad->batch_data_dir]; ad->batch_expire[ad->batch_data_dir];
kblockd_schedule_work(&ad->antic_work); kblockd_schedule_work(q, &ad->antic_work);
ad->changed_batch = 0; ad->changed_batch = 0;
if (ad->batch_data_dir == REQ_SYNC) if (ad->batch_data_dir == REQ_SYNC)
+71 -1
View File
@@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
bio->bi_end_io = bio_end_empty_barrier; bio->bi_end_io = bio_end_empty_barrier;
bio->bi_private = &wait; bio->bi_private = &wait;
bio->bi_bdev = bdev; bio->bi_bdev = bdev;
submit_bio(1 << BIO_RW_BARRIER, bio); submit_bio(WRITE_BARRIER, bio);
wait_for_completion(&wait); wait_for_completion(&wait);
@@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
return ret; return ret;
} }
EXPORT_SYMBOL(blkdev_issue_flush); EXPORT_SYMBOL(blkdev_issue_flush);
static void blkdev_discard_end_io(struct bio *bio, int err)
{
if (err) {
if (err == -EOPNOTSUPP)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
bio_put(bio);
}
/**
* blkdev_issue_discard - queue a discard
* @bdev: blockdev to issue discard for
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
*
* Description:
* Issue a discard request for the sectors in question. Does not wait.
*/
int blkdev_issue_discard(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
{
struct request_queue *q;
struct bio *bio;
int ret = 0;
if (bdev->bd_disk == NULL)
return -ENXIO;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
if (!q->prepare_discard_fn)
return -EOPNOTSUPP;
while (nr_sects && !ret) {
bio = bio_alloc(gfp_mask, 0);
if (!bio)
return -ENOMEM;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
bio->bi_sector = sector;
if (nr_sects > q->max_hw_sectors) {
bio->bi_size = q->max_hw_sectors << 9;
nr_sects -= q->max_hw_sectors;
sector += q->max_hw_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
bio_get(bio);
submit_bio(DISCARD_BARRIER, bio);
/* Check if it failed immediately */
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
ret = -EIO;
bio_put(bio);
}
return ret;
}
EXPORT_SYMBOL(blkdev_issue_discard);
+331 -288
View File
File diff suppressed because it is too large Load Diff
+3 -3
View File
@@ -16,7 +16,7 @@
/** /**
* blk_end_sync_rq - executes a completion event on a request * blk_end_sync_rq - executes a completion event on a request
* @rq: request to complete * @rq: request to complete
* @error: end io status of the request * @error: end I/O status of the request
*/ */
static void blk_end_sync_rq(struct request *rq, int error) static void blk_end_sync_rq(struct request *rq, int error)
{ {
@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct request *rq, int error)
* @done: I/O completion handler * @done: I/O completion handler
* *
* Description: * Description:
* Insert a fully prepared request at the back of the io scheduler queue * Insert a fully prepared request at the back of the I/O scheduler queue
* for execution. Don't wait for completion. * for execution. Don't wait for completion.
*/ */
void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
* @at_head: insert request at head or tail of queue * @at_head: insert request at head or tail of queue
* *
* Description: * Description:
* Insert a fully prepared request at the back of the io scheduler queue * Insert a fully prepared request at the back of the I/O scheduler queue
* for execution and wait for completion. * for execution and wait for completion.
*/ */
int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
+17 -16
View File
@@ -108,51 +108,51 @@ new_segment:
EXPORT_SYMBOL(blk_rq_map_integrity_sg); EXPORT_SYMBOL(blk_rq_map_integrity_sg);
/** /**
* blk_integrity_compare - Compare integrity profile of two block devices * blk_integrity_compare - Compare integrity profile of two disks
* @b1: Device to compare * @gd1: Disk to compare
* @b2: Device to compare * @gd2: Disk to compare
* *
* Description: Meta-devices like DM and MD need to verify that all * Description: Meta-devices like DM and MD need to verify that all
* sub-devices use the same integrity format before advertising to * sub-devices use the same integrity format before advertising to
* upper layers that they can send/receive integrity metadata. This * upper layers that they can send/receive integrity metadata. This
* function can be used to check whether two block devices have * function can be used to check whether two gendisk devices have
* compatible integrity formats. * compatible integrity formats.
*/ */
int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2) int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
{ {
struct blk_integrity *b1 = bd1->bd_disk->integrity; struct blk_integrity *b1 = gd1->integrity;
struct blk_integrity *b2 = bd2->bd_disk->integrity; struct blk_integrity *b2 = gd2->integrity;
BUG_ON(bd1->bd_disk == NULL); if (!b1 && !b2)
BUG_ON(bd2->bd_disk == NULL); return 0;
if (!b1 || !b2) if (!b1 || !b2)
return 0; return -1;
if (b1->sector_size != b2->sector_size) { if (b1->sector_size != b2->sector_size) {
printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, gd1->disk_name, gd2->disk_name,
b1->sector_size, b2->sector_size); b1->sector_size, b2->sector_size);
return -1; return -1;
} }
if (b1->tuple_size != b2->tuple_size) { if (b1->tuple_size != b2->tuple_size) {
printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, gd1->disk_name, gd2->disk_name,
b1->tuple_size, b2->tuple_size); b1->tuple_size, b2->tuple_size);
return -1; return -1;
} }
if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, gd1->disk_name, gd2->disk_name,
b1->tag_size, b2->tag_size); b1->tag_size, b2->tag_size);
return -1; return -1;
} }
if (strcmp(b1->name, b2->name)) { if (strcmp(b1->name, b2->name)) {
printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, gd1->disk_name, gd2->disk_name,
b1->name, b2->name); b1->name, b2->name);
return -1; return -1;
} }
@@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
return -1; return -1;
if (kobject_init_and_add(&bi->kobj, &integrity_ktype, if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
&disk->dev.kobj, "%s", "integrity")) { &disk_to_dev(disk)->kobj,
"%s", "integrity")) {
kmem_cache_free(integrity_cachep, bi); kmem_cache_free(integrity_cachep, bi);
return -1; return -1;
} }
@@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk)
kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_uevent(&bi->kobj, KOBJ_REMOVE);
kobject_del(&bi->kobj); kobject_del(&bi->kobj);
kobject_put(&disk->dev.kobj);
kmem_cache_free(integrity_cachep, bi); kmem_cache_free(integrity_cachep, bi);
disk->integrity = NULL;
} }
EXPORT_SYMBOL(blk_integrity_unregister); EXPORT_SYMBOL(blk_integrity_unregister);
+39 -29
View File
@@ -41,10 +41,10 @@ static int __blk_rq_unmap_user(struct bio *bio)
} }
static int __blk_rq_map_user(struct request_queue *q, struct request *rq, static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
void __user *ubuf, unsigned int len) struct rq_map_data *map_data, void __user *ubuf,
unsigned int len, int null_mapped, gfp_t gfp_mask)
{ {
unsigned long uaddr; unsigned long uaddr;
unsigned int alignment;
struct bio *bio, *orig_bio; struct bio *bio, *orig_bio;
int reading, ret; int reading, ret;
@@ -55,15 +55,17 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
* direct dma. else, set up kernel bounce buffers * direct dma. else, set up kernel bounce buffers
*/ */
uaddr = (unsigned long) ubuf; uaddr = (unsigned long) ubuf;
alignment = queue_dma_alignment(q) | q->dma_pad_mask; if (blk_rq_aligned(q, ubuf, len) && !map_data)
if (!(uaddr & alignment) && !(len & alignment)) bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
bio = bio_map_user(q, NULL, uaddr, len, reading);
else else
bio = bio_copy_user(q, uaddr, len, reading); bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
if (IS_ERR(bio)) if (IS_ERR(bio))
return PTR_ERR(bio); return PTR_ERR(bio);
if (null_mapped)
bio->bi_flags |= (1 << BIO_NULL_MAPPED);
orig_bio = bio; orig_bio = bio;
blk_queue_bounce(q, &bio); blk_queue_bounce(q, &bio);
@@ -85,17 +87,19 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
} }
/** /**
* blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted * @q: request queue where request should be inserted
* @rq: request structure to fill * @rq: request structure to fill
* @map_data: pointer to the rq_map_data holding pages (if necessary)
* @ubuf: the user buffer * @ubuf: the user buffer
* @len: length of user data * @len: length of user data
* @gfp_mask: memory allocation flags
* *
* Description: * Description:
* Data will be mapped directly for zero copy io, if possible. Otherwise * Data will be mapped directly for zero copy I/O, if possible. Otherwise
* a kernel bounce buffer is used. * a kernel bounce buffer is used.
* *
* A matching blk_rq_unmap_user() must be issued at the end of io, while * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
* still in process context. * still in process context.
* *
* Note: The mapped bio may need to be bounced through blk_queue_bounce() * Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -105,16 +109,22 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
* unmapping. * unmapping.
*/ */
int blk_rq_map_user(struct request_queue *q, struct request *rq, int blk_rq_map_user(struct request_queue *q, struct request *rq,
void __user *ubuf, unsigned long len) struct rq_map_data *map_data, void __user *ubuf,
unsigned long len, gfp_t gfp_mask)
{ {
unsigned long bytes_read = 0; unsigned long bytes_read = 0;
struct bio *bio = NULL; struct bio *bio = NULL;
int ret; int ret, null_mapped = 0;
if (len > (q->max_hw_sectors << 9)) if (len > (q->max_hw_sectors << 9))
return -EINVAL; return -EINVAL;
if (!len || !ubuf) if (!len)
return -EINVAL; return -EINVAL;
if (!ubuf) {
if (!map_data || rq_data_dir(rq) != READ)
return -EINVAL;
null_mapped = 1;
}
while (bytes_read != len) { while (bytes_read != len) {
unsigned long map_len, end, start; unsigned long map_len, end, start;
@@ -132,7 +142,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
if (end - start > BIO_MAX_PAGES) if (end - start > BIO_MAX_PAGES)
map_len -= PAGE_SIZE; map_len -= PAGE_SIZE;
ret = __blk_rq_map_user(q, rq, ubuf, map_len); ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
null_mapped, gfp_mask);
if (ret < 0) if (ret < 0)
goto unmap_rq; goto unmap_rq;
if (!bio) if (!bio)
@@ -154,18 +165,20 @@ unmap_rq:
EXPORT_SYMBOL(blk_rq_map_user); EXPORT_SYMBOL(blk_rq_map_user);
/** /**
* blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted * @q: request queue where request should be inserted
* @rq: request to map data to * @rq: request to map data to
* @map_data: pointer to the rq_map_data holding pages (if necessary)
* @iov: pointer to the iovec * @iov: pointer to the iovec
* @iov_count: number of elements in the iovec * @iov_count: number of elements in the iovec
* @len: I/O byte count * @len: I/O byte count
* @gfp_mask: memory allocation flags
* *
* Description: * Description:
* Data will be mapped directly for zero copy io, if possible. Otherwise * Data will be mapped directly for zero copy I/O, if possible. Otherwise
* a kernel bounce buffer is used. * a kernel bounce buffer is used.
* *
* A matching blk_rq_unmap_user() must be issued at the end of io, while * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
* still in process context. * still in process context.
* *
* Note: The mapped bio may need to be bounced through blk_queue_bounce() * Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -175,7 +188,8 @@ EXPORT_SYMBOL(blk_rq_map_user);
* unmapping. * unmapping.
*/ */
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct sg_iovec *iov, int iov_count, unsigned int len) struct rq_map_data *map_data, struct sg_iovec *iov,
int iov_count, unsigned int len, gfp_t gfp_mask)
{ {
struct bio *bio; struct bio *bio;
int i, read = rq_data_dir(rq) == READ; int i, read = rq_data_dir(rq) == READ;
@@ -193,10 +207,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
} }
} }
if (unaligned || (q->dma_pad_mask & len)) if (unaligned || (q->dma_pad_mask & len) || map_data)
bio = bio_copy_user_iov(q, iov, iov_count, read); bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
gfp_mask);
else else
bio = bio_map_user_iov(q, NULL, iov, iov_count, read); bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
if (IS_ERR(bio)) if (IS_ERR(bio))
return PTR_ERR(bio); return PTR_ERR(bio);
@@ -216,6 +231,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
rq->buffer = rq->data = NULL; rq->buffer = rq->data = NULL;
return 0; return 0;
} }
EXPORT_SYMBOL(blk_rq_map_user_iov);
/** /**
* blk_rq_unmap_user - unmap a request with user data * blk_rq_unmap_user - unmap a request with user data
@@ -224,7 +240,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
* Description: * Description:
* Unmap a rq previously mapped by blk_rq_map_user(). The caller must * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
* supply the original rq->bio from the blk_rq_map_user() return, since * supply the original rq->bio from the blk_rq_map_user() return, since
* the io completion may have changed rq->bio. * the I/O completion may have changed rq->bio.
*/ */
int blk_rq_unmap_user(struct bio *bio) int blk_rq_unmap_user(struct bio *bio)
{ {
@@ -250,7 +266,7 @@ int blk_rq_unmap_user(struct bio *bio)
EXPORT_SYMBOL(blk_rq_unmap_user); EXPORT_SYMBOL(blk_rq_unmap_user);
/** /**
* blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted * @q: request queue where request should be inserted
* @rq: request to fill * @rq: request to fill
* @kbuf: the kernel buffer * @kbuf: the kernel buffer
@@ -264,8 +280,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
unsigned int len, gfp_t gfp_mask) unsigned int len, gfp_t gfp_mask)
{ {
unsigned long kaddr;
unsigned int alignment;
int reading = rq_data_dir(rq) == READ; int reading = rq_data_dir(rq) == READ;
int do_copy = 0; int do_copy = 0;
struct bio *bio; struct bio *bio;
@@ -275,11 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
if (!len || !kbuf) if (!len || !kbuf)
return -EINVAL; return -EINVAL;
kaddr = (unsigned long)kbuf; do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
alignment = queue_dma_alignment(q) | q->dma_pad_mask;
do_copy = ((kaddr & alignment) || (len & alignment) ||
object_is_on_stack(kbuf));
if (do_copy) if (do_copy)
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
else else
+25 -104
View File
@@ -11,7 +11,7 @@
void blk_recalc_rq_sectors(struct request *rq, int nsect) void blk_recalc_rq_sectors(struct request *rq, int nsect)
{ {
if (blk_fs_request(rq)) { if (blk_fs_request(rq) || blk_discard_rq(rq)) {
rq->hard_sector += nsect; rq->hard_sector += nsect;
rq->hard_nr_sectors -= nsect; rq->hard_nr_sectors -= nsect;
@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
void blk_recalc_rq_segments(struct request *rq) void blk_recalc_rq_segments(struct request *rq)
{ {
int nr_phys_segs; int nr_phys_segs;
int nr_hw_segs;
unsigned int phys_size; unsigned int phys_size;
unsigned int hw_size;
struct bio_vec *bv, *bvprv = NULL; struct bio_vec *bv, *bvprv = NULL;
int seg_size; int seg_size;
int hw_seg_size;
int cluster; int cluster;
struct req_iterator iter; struct req_iterator iter;
int high, highprv = 1; int high, highprv = 1;
@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq)
return; return;
cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
hw_seg_size = seg_size = 0; seg_size = 0;
phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; phys_size = nr_phys_segs = 0;
rq_for_each_segment(bv, rq, iter) { rq_for_each_segment(bv, rq, iter) {
/* /*
* the trick here is making sure that a high page is never * the trick here is making sure that a high page is never
@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct request *rq)
*/ */
high = page_to_pfn(bv->bv_page) > q->bounce_pfn; high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
if (high || highprv) if (high || highprv)
goto new_hw_segment; goto new_segment;
if (cluster) { if (cluster) {
if (seg_size + bv->bv_len > q->max_segment_size) if (seg_size + bv->bv_len > q->max_segment_size)
goto new_segment; goto new_segment;
@@ -74,40 +71,19 @@ void blk_recalc_rq_segments(struct request *rq)
goto new_segment; goto new_segment;
if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
goto new_segment; goto new_segment;
if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
goto new_hw_segment;
seg_size += bv->bv_len; seg_size += bv->bv_len;
hw_seg_size += bv->bv_len;
bvprv = bv; bvprv = bv;
continue; continue;
} }
new_segment: new_segment:
if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
!BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
hw_seg_size += bv->bv_len;
else {
new_hw_segment:
if (nr_hw_segs == 1 &&
hw_seg_size > rq->bio->bi_hw_front_size)
rq->bio->bi_hw_front_size = hw_seg_size;
hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
nr_hw_segs++;
}
nr_phys_segs++; nr_phys_segs++;
bvprv = bv; bvprv = bv;
seg_size = bv->bv_len; seg_size = bv->bv_len;
highprv = high; highprv = high;
} }
if (nr_hw_segs == 1 &&
hw_seg_size > rq->bio->bi_hw_front_size)
rq->bio->bi_hw_front_size = hw_seg_size;
if (hw_seg_size > rq->biotail->bi_hw_back_size)
rq->biotail->bi_hw_back_size = hw_seg_size;
rq->nr_phys_segments = nr_phys_segs; rq->nr_phys_segments = nr_phys_segs;
rq->nr_hw_segments = nr_hw_segs;
} }
void blk_recount_segments(struct request_queue *q, struct bio *bio) void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -120,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
blk_recalc_rq_segments(&rq); blk_recalc_rq_segments(&rq);
bio->bi_next = nxt; bio->bi_next = nxt;
bio->bi_phys_segments = rq.nr_phys_segments; bio->bi_phys_segments = rq.nr_phys_segments;
bio->bi_hw_segments = rq.nr_hw_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID); bio->bi_flags |= (1 << BIO_SEG_VALID);
} }
EXPORT_SYMBOL(blk_recount_segments); EXPORT_SYMBOL(blk_recount_segments);
@@ -131,13 +106,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
return 0; return 0;
if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
return 0;
if (bio->bi_size + nxt->bi_size > q->max_segment_size) if (bio->bi_size + nxt->bi_size > q->max_segment_size)
return 0; return 0;
if (!bio_has_data(bio))
return 1;
if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
return 0;
/* /*
* bio and nxt are contigous in memory, check if the queue allows * bio and nxt are contiguous in memory; check if the queue allows
* these two to be merged into one * these two to be merged into one
*/ */
if (BIO_SEG_BOUNDARY(q, bio, nxt)) if (BIO_SEG_BOUNDARY(q, bio, nxt))
@@ -146,22 +125,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
return 0; return 0;
} }
static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
{
if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio);
if (!bio_flagged(nxt, BIO_SEG_VALID))
blk_recount_segments(q, nxt);
if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
return 0;
if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
return 0;
return 1;
}
/* /*
* map a request to scatterlist, return number of sg entries setup. Caller * map a request to scatterlist, return number of sg entries setup. Caller
* must make sure sg can hold rq->nr_phys_segments entries * must make sure sg can hold rq->nr_phys_segments entries
@@ -275,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q,
struct request *req, struct request *req,
struct bio *bio) struct bio *bio)
{ {
int nr_hw_segs = bio_hw_segments(q, bio);
int nr_phys_segs = bio_phys_segments(q, bio); int nr_phys_segs = bio_phys_segments(q, bio);
if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
|| req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
req->cmd_flags |= REQ_NOMERGE; req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge) if (req == q->last_merge)
@@ -290,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q,
* This will form the start of a new hw segment. Bump both * This will form the start of a new hw segment. Bump both
* counters. * counters.
*/ */
req->nr_hw_segments += nr_hw_segs;
req->nr_phys_segments += nr_phys_segs; req->nr_phys_segments += nr_phys_segs;
return 1; return 1;
} }
@@ -299,7 +260,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio) struct bio *bio)
{ {
unsigned short max_sectors; unsigned short max_sectors;
int len;
if (unlikely(blk_pc_request(req))) if (unlikely(blk_pc_request(req)))
max_sectors = q->max_hw_sectors; max_sectors = q->max_hw_sectors;
@@ -316,19 +276,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
blk_recount_segments(q, req->biotail); blk_recount_segments(q, req->biotail);
if (!bio_flagged(bio, BIO_SEG_VALID)) if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio); blk_recount_segments(q, bio);
len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
&& !BIOVEC_VIRT_OVERSIZE(len)) {
int mergeable = ll_new_mergeable(q, req, bio);
if (mergeable) {
if (req->nr_hw_segments == 1)
req->bio->bi_hw_front_size = len;
if (bio->bi_hw_segments == 1)
bio->bi_hw_back_size = len;
}
return mergeable;
}
return ll_new_hw_segment(q, req, bio); return ll_new_hw_segment(q, req, bio);
} }
@@ -337,7 +284,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio) struct bio *bio)
{ {
unsigned short max_sectors; unsigned short max_sectors;
int len;
if (unlikely(blk_pc_request(req))) if (unlikely(blk_pc_request(req)))
max_sectors = q->max_hw_sectors; max_sectors = q->max_hw_sectors;
@@ -351,23 +297,10 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
q->last_merge = NULL; q->last_merge = NULL;
return 0; return 0;
} }
len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
if (!bio_flagged(bio, BIO_SEG_VALID)) if (!bio_flagged(bio, BIO_SEG_VALID))
blk_recount_segments(q, bio); blk_recount_segments(q, bio);
if (!bio_flagged(req->bio, BIO_SEG_VALID)) if (!bio_flagged(req->bio, BIO_SEG_VALID))
blk_recount_segments(q, req->bio); blk_recount_segments(q, req->bio);
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
!BIOVEC_VIRT_OVERSIZE(len)) {
int mergeable = ll_new_mergeable(q, req, bio);
if (mergeable) {
if (bio->bi_hw_segments == 1)
bio->bi_hw_front_size = len;
if (req->nr_hw_segments == 1)
req->biotail->bi_hw_back_size = len;
}
return mergeable;
}
return ll_new_hw_segment(q, req, bio); return ll_new_hw_segment(q, req, bio);
} }
@@ -376,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next) struct request *next)
{ {
int total_phys_segments; int total_phys_segments;
int total_hw_segments;
/* /*
* First check if the either of the requests are re-queued * First check if the either of the requests are re-queued
@@ -398,26 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
if (total_phys_segments > q->max_phys_segments) if (total_phys_segments > q->max_phys_segments)
return 0; return 0;
total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; if (total_phys_segments > q->max_hw_segments)
if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
int len = req->biotail->bi_hw_back_size +
next->bio->bi_hw_front_size;
/*
* propagate the combined length to the end of the requests
*/
if (req->nr_hw_segments == 1)
req->bio->bi_hw_front_size = len;
if (next->nr_hw_segments == 1)
next->biotail->bi_hw_back_size = len;
total_hw_segments--;
}
if (total_hw_segments > q->max_hw_segments)
return 0; return 0;
/* Merge is OK... */ /* Merge is OK... */
req->nr_phys_segments = total_phys_segments; req->nr_phys_segments = total_phys_segments;
req->nr_hw_segments = total_hw_segments;
return 1; return 1;
} }
@@ -470,17 +387,21 @@ static int attempt_merge(struct request_queue *q, struct request *req,
elv_merge_requests(q, req, next); elv_merge_requests(q, req, next);
if (req->rq_disk) { if (req->rq_disk) {
struct hd_struct *part struct hd_struct *part;
= get_part(req->rq_disk, req->sector); int cpu;
disk_round_stats(req->rq_disk);
req->rq_disk->in_flight--; cpu = part_stat_lock();
if (part) { part = disk_map_sector_rcu(req->rq_disk, req->sector);
part_round_stats(part);
part->in_flight--; part_round_stats(cpu, part);
} part_dec_in_flight(part);
part_stat_unlock();
} }
req->ioprio = ioprio_best(req->ioprio, next->ioprio); req->ioprio = ioprio_best(req->ioprio, next->ioprio);
if (blk_rq_cpu_valid(next))
req->cpu = next->cpu;
__blk_put_request(q, next); __blk_put_request(q, next);
return 1; return 1;
+39 -4
View File
@@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
} }
EXPORT_SYMBOL(blk_queue_prep_rq); EXPORT_SYMBOL(blk_queue_prep_rq);
/**
* blk_queue_set_discard - set a discard_sectors function for queue
* @q: queue
* @dfn: prepare_discard function
*
* It's possible for a queue to register a discard callback which is used
* to transform a discard request into the appropriate type for the
* hardware. If none is registered, then discard requests are failed
* with %EOPNOTSUPP.
*
*/
void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
{
q->prepare_discard_fn = dfn;
}
EXPORT_SYMBOL(blk_queue_set_discard);
/** /**
* blk_queue_merge_bvec - set a merge_bvec function for queue * blk_queue_merge_bvec - set a merge_bvec function for queue
* @q: queue * @q: queue
@@ -60,6 +77,24 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
} }
EXPORT_SYMBOL(blk_queue_softirq_done); EXPORT_SYMBOL(blk_queue_softirq_done);
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
q->rq_timeout = timeout;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
q->rq_timed_out_fn = fn;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
{
q->lld_busy_fn = fn;
}
EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
/** /**
* blk_queue_make_request - define an alternate make_request function for a device * blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected * @q: the request queue for the device to be affected
@@ -127,7 +162,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
* Different hardware can have different requirements as to what pages * Different hardware can have different requirements as to what pages
* it can do I/O directly to. A low level driver can call * it can do I/O directly to. A low level driver can call
* blk_queue_bounce_limit to have lower memory pages allocated as bounce * blk_queue_bounce_limit to have lower memory pages allocated as bounce
* buffers for doing I/O to pages residing above @page. * buffers for doing I/O to pages residing above @dma_addr.
**/ **/
void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
{ {
@@ -212,7 +247,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments);
* Description: * Description:
* Enables a low level driver to set an upper limit on the number of * Enables a low level driver to set an upper limit on the number of
* hw data segments in a request. This would be the largest number of * hw data segments in a request. This would be the largest number of
* address/length pairs the host adapter can actually give as once * address/length pairs the host adapter can actually give at once
* to the device. * to the device.
**/ **/
void blk_queue_max_hw_segments(struct request_queue *q, void blk_queue_max_hw_segments(struct request_queue *q,
@@ -393,7 +428,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
* @mask: alignment mask * @mask: alignment mask
* *
* description: * description:
* set required memory and length aligment for direct dma transactions. * set required memory and length alignment for direct dma transactions.
* this is used when buiding direct io requests for the queue. * this is used when buiding direct io requests for the queue.
* *
**/ **/
@@ -409,7 +444,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment);
* @mask: alignment mask * @mask: alignment mask
* *
* description: * description:
* update required memory and length aligment for direct dma transactions. * update required memory and length alignment for direct dma transactions.
* If the requested alignment is larger than the current alignment, then * If the requested alignment is larger than the current alignment, then
* the current queue alignment is updated to the new value, otherwise it * the current queue alignment is updated to the new value, otherwise it
* is left alone. The design of this is to allow multiple objects * is left alone. The design of this is to allow multiple objects
+175
View File
@@ -0,0 +1,175 @@
/*
* Functions related to softirq rq completions
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include "blk.h"
static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
/*
* Softirq action handler - move entries to local list and loop over them
* while passing them to the queue registered handler.
*/
static void blk_done_softirq(struct softirq_action *h)
{
struct list_head *cpu_list, local_list;
local_irq_disable();
cpu_list = &__get_cpu_var(blk_cpu_done);
list_replace_init(cpu_list, &local_list);
local_irq_enable();
while (!list_empty(&local_list)) {
struct request *rq;
rq = list_entry(local_list.next, struct request, csd.list);
list_del_init(&rq->csd.list);
rq->q->softirq_done_fn(rq);
}
}
#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
static void trigger_softirq(void *data)
{
struct request *rq = data;
unsigned long flags;
struct list_head *list;
local_irq_save(flags);
list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&rq->csd.list, list);
if (list->next == &rq->csd.list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
local_irq_restore(flags);
}
/*
* Setup and invoke a run of 'trigger_softirq' on the given cpu.
*/
static int raise_blk_irq(int cpu, struct request *rq)
{
if (cpu_online(cpu)) {
struct call_single_data *data = &rq->csd;
data->func = trigger_softirq;
data->info = rq;
data->flags = 0;
__smp_call_function_single(cpu, data);
return 0;
}
return 1;
}
#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
static int raise_blk_irq(int cpu, struct request *rq)
{
return 1;
}
#endif
static int __cpuinit blk_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
int cpu = (unsigned long) hcpu;
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_done, cpu),
&__get_cpu_var(blk_cpu_done));
raise_softirq_irqoff(BLOCK_SOFTIRQ);
local_irq_enable();
}
return NOTIFY_OK;
}
static struct notifier_block __cpuinitdata blk_cpu_notifier = {
.notifier_call = blk_cpu_notify,
};
void __blk_complete_request(struct request *req)
{
struct request_queue *q = req->q;
unsigned long flags;
int ccpu, cpu, group_cpu;
BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
cpu = smp_processor_id();
group_cpu = blk_cpu_to_group(cpu);
/*
* Select completion CPU
*/
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
ccpu = req->cpu;
else
ccpu = cpu;
if (ccpu == cpu || ccpu == group_cpu) {
struct list_head *list;
do_local:
list = &__get_cpu_var(blk_cpu_done);
list_add_tail(&req->csd.list, list);
/*
* if the list only contains our just added request,
* signal a raise of the softirq. If there are already
* entries there, someone already raised the irq but it
* hasn't run yet.
*/
if (list->next == &req->csd.list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
} else if (raise_blk_irq(ccpu, req))
goto do_local;
local_irq_restore(flags);
}
/**
* blk_complete_request - end I/O on a request
* @req: the request being processed
*
* Description:
* Ends all I/O on a request. It does not handle partial completions,
* unless the driver actually implements this in its completion callback
* through requeueing. The actual completion happens out-of-order,
* through a softirq handler. The user must have registered a completion
* callback through blk_queue_softirq_done().
**/
void blk_complete_request(struct request *req)
{
if (unlikely(blk_should_fake_timeout(req->q)))
return;
if (!blk_mark_rq_complete(req))
__blk_complete_request(req);
}
EXPORT_SYMBOL(blk_complete_request);
__init int blk_softirq_init(void)
{
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
register_hotcpu_notifier(&blk_cpu_notifier);
return 0;
}
subsys_initcall(blk_softirq_init);
+33 -2
View File
@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
return ret; return ret;
} }
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
return queue_var_show(set != 0, page);
}
static ssize_t
queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
{
ssize_t ret = -EINVAL;
#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
unsigned long val;
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
if (val)
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
else
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
spin_unlock_irq(q->queue_lock);
#endif
return ret;
}
static struct queue_sysfs_entry queue_requests_entry = { static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
.store = queue_nomerges_store, .store = queue_nomerges_store,
}; };
static struct queue_sysfs_entry queue_rq_affinity_entry = {
.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};
static struct attribute *default_attrs[] = { static struct attribute *default_attrs[] = {
&queue_requests_entry.attr, &queue_requests_entry.attr,
&queue_ra_entry.attr, &queue_ra_entry.attr,
@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
&queue_iosched_entry.attr, &queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr, &queue_hw_sector_size_entry.attr,
&queue_nomerges_entry.attr, &queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
NULL, NULL,
}; };
@@ -310,7 +341,7 @@ int blk_register_queue(struct gendisk *disk)
if (!q->request_fn) if (!q->request_fn)
return 0; return 0;
ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
"%s", "queue"); "%s", "queue");
if (ret < 0) if (ret < 0)
return ret; return ret;
@@ -339,6 +370,6 @@ void blk_unregister_queue(struct gendisk *disk)
kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj); kobject_del(&q->kobj);
kobject_put(&disk->dev.kobj); kobject_put(&disk_to_dev(disk)->kobj);
} }
} }
+16 -6
View File
@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag);
* __blk_free_tags - release a given set of tag maintenance info * __blk_free_tags - release a given set of tag maintenance info
* @bqt: the tag map to free * @bqt: the tag map to free
* *
* Tries to free the specified @bqt@. Returns true if it was * Tries to free the specified @bqt. Returns true if it was
* actually freed and false if there are still references using it * actually freed and false if there are still references using it
*/ */
static int __blk_free_tags(struct blk_queue_tag *bqt) static int __blk_free_tags(struct blk_queue_tag *bqt)
@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct request_queue *q)
* blk_free_tags - release a given set of tag maintenance info * blk_free_tags - release a given set of tag maintenance info
* @bqt: the tag map to free * @bqt: the tag map to free
* *
* For externally managed @bqt@ frees the map. Callers of this * For externally managed @bqt frees the map. Callers of this
* function must guarantee to have released all the queues that * function must guarantee to have released all the queues that
* might have been using this tag map. * might have been using this tag map.
*/ */
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags);
* @q: the request queue for the device * @q: the request queue for the device
* *
* Notes: * Notes:
* This is used to disabled tagged queuing to a device, yet leave * This is used to disable tagged queuing to a device, yet leave
* queue in function. * queue in function.
**/ **/
void blk_queue_free_tags(struct request_queue *q) void blk_queue_free_tags(struct request_queue *q)
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
* @rq: the request that has completed * @rq: the request that has completed
* *
* Description: * Description:
* Typically called when end_that_request_first() returns 0, meaning * Typically called when end_that_request_first() returns %0, meaning
* all transfers have been done for a request. It's important to call * all transfers have been done for a request. It's important to call
* this function before end_that_request_last(), as that will put the * this function before end_that_request_last(), as that will put the
* request back on the free list thus corrupting the internal tag list. * request back on the free list thus corrupting the internal tag list.
@@ -337,6 +337,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
int blk_queue_start_tag(struct request_queue *q, struct request *rq) int blk_queue_start_tag(struct request_queue *q, struct request *rq)
{ {
struct blk_queue_tag *bqt = q->queue_tags; struct blk_queue_tag *bqt = q->queue_tags;
unsigned max_depth, offset;
int tag; int tag;
if (unlikely((rq->cmd_flags & REQ_QUEUED))) { if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -350,10 +351,19 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
/* /*
* Protect against shared tag maps, as we may not have exclusive * Protect against shared tag maps, as we may not have exclusive
* access to the tag map. * access to the tag map.
*
* We reserve a few tags just for sync IO, since we don't want
* to starve sync IO on behalf of flooding async IO.
*/ */
max_depth = bqt->max_depth;
if (rq_is_sync(rq))
offset = 0;
else
offset = max_depth >> 2;
do { do {
tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
if (tag >= bqt->max_depth) if (tag >= max_depth)
return 1; return 1;
} while (test_and_set_bit_lock(tag, bqt->tag_map)); } while (test_and_set_bit_lock(tag, bqt->tag_map));
+238
View File
@@ -0,0 +1,238 @@
/*
* Functions related to generic timeout handling of requests.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/fault-inject.h>
#include "blk.h"
#ifdef CONFIG_FAIL_IO_TIMEOUT
static DECLARE_FAULT_ATTR(fail_io_timeout);
static int __init setup_fail_io_timeout(char *str)
{
return setup_fault_attr(&fail_io_timeout, str);
}
__setup("fail_io_timeout=", setup_fail_io_timeout);
int blk_should_fake_timeout(struct request_queue *q)
{
if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
return 0;
return should_fail(&fail_io_timeout, 1);
}
static int __init fail_io_timeout_debugfs(void)
{
return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
}
late_initcall(fail_io_timeout_debugfs);
ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
return sprintf(buf, "%d\n", set != 0);
}
ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct gendisk *disk = dev_to_disk(dev);
int val;
if (count) {
struct request_queue *q = disk->queue;
char *p = (char *) buf;
val = simple_strtoul(p, &p, 10);
spin_lock_irq(q->queue_lock);
if (val)
queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
else
queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
spin_unlock_irq(q->queue_lock);
}
return count;
}
#endif /* CONFIG_FAIL_IO_TIMEOUT */
/*
* blk_delete_timer - Delete/cancel timer for a given function.
* @req: request that we are canceling timer for
*
*/
void blk_delete_timer(struct request *req)
{
struct request_queue *q = req->q;
/*
* Nothing to detach
*/
if (!q->rq_timed_out_fn || !req->deadline)
return;
list_del_init(&req->timeout_list);
if (list_empty(&q->timeout_list))
del_timer(&q->timeout);
}
static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret;
ret = q->rq_timed_out_fn(req);
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req);
break;
case BLK_EH_RESET_TIMER:
blk_clear_rq_complete(req);
blk_add_timer(req);
break;
case BLK_EH_NOT_HANDLED:
/*
* LLD handles this for now but in the future
* we can send a request msg to abort the command
* and we can move more of the generic scsi eh code to
* the blk layer.
*/
break;
default:
printk(KERN_ERR "block: bad eh return: %d\n", ret);
break;
}
}
void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *) data;
unsigned long flags, uninitialized_var(next), next_set = 0;
struct request *rq, *tmp;
spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
if (time_after_eq(jiffies, rq->deadline)) {
list_del_init(&rq->timeout_list);
/*
* Check if we raced with end io completion
*/
if (blk_mark_rq_complete(rq))
continue;
blk_rq_timed_out(rq);
}
if (!next_set) {
next = rq->deadline;
next_set = 1;
} else if (time_after(next, rq->deadline))
next = rq->deadline;
}
if (next_set && !list_empty(&q->timeout_list))
mod_timer(&q->timeout, round_jiffies(next));
spin_unlock_irqrestore(q->queue_lock, flags);
}
/**
* blk_abort_request -- Request request recovery for the specified command
* @req: pointer to the request of interest
*
* This function requests that the block layer start recovery for the
* request by deleting the timer and calling the q's timeout function.
* LLDDs who implement their own error recovery MAY ignore the timeout
* event if they generated blk_abort_req. Must hold queue lock.
*/
void blk_abort_request(struct request *req)
{
if (blk_mark_rq_complete(req))
return;
blk_delete_timer(req);
blk_rq_timed_out(req);
}
EXPORT_SYMBOL_GPL(blk_abort_request);
/**
* blk_add_timer - Start timeout timer for a single request
* @req: request that is about to start running.
*
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
if (!q->rq_timed_out_fn)
return;
BUG_ON(!list_empty(&req->timeout_list));
BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
if (req->timeout)
req->deadline = jiffies + req->timeout;
else {
req->deadline = jiffies + q->rq_timeout;
/*
* Some LLDs, like scsi, peek at the timeout to prevent
* a command from being retried forever.
*/
req->timeout = q->rq_timeout;
}
list_add_tail(&req->timeout_list, &q->timeout_list);
/*
* If the timer isn't already pending or this timeout is earlier
* than an existing one, modify the timer. Round to next nearest
* second.
*/
expiry = round_jiffies(req->deadline);
/*
* We use ->deadline == 0 to detect whether a timer was added or
* not, so just increase to next jiffy for that specific case
*/
if (unlikely(!req->deadline))
req->deadline = 1;
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires))
mod_timer(&q->timeout, expiry);
}
/**
* blk_abort_queue -- Abort all request on given queue
* @queue: pointer to queue
*
*/
void blk_abort_queue(struct request_queue *q)
{
unsigned long flags;
struct request *rq, *tmp;
spin_lock_irqsave(q->queue_lock, flags);
elv_abort_queue(q);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
blk_abort_request(rq);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_abort_queue);
+48
View File
@@ -17,6 +17,42 @@ void __blk_queue_free_tags(struct request_queue *q);
void blk_unplug_work(struct work_struct *work); void blk_unplug_work(struct work_struct *work);
void blk_unplug_timeout(unsigned long data); void blk_unplug_timeout(unsigned long data);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
void blk_add_timer(struct request *);
/*
* Internal atomic flags for request handling
*/
enum rq_atomic_flags {
REQ_ATOM_COMPLETE = 0,
};
/*
* EH timer and IO completion will both attempt to 'grab' the request, make
* sure that only one of them suceeds
*/
static inline int blk_mark_rq_complete(struct request *rq)
{
return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
}
static inline void blk_clear_rq_complete(struct request *rq)
{
clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
}
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
#else
static inline int blk_should_fake_timeout(struct request_queue *q)
{
return 0;
}
#endif
struct io_context *current_io_context(gfp_t gfp_flags, int node); struct io_context *current_io_context(gfp_t gfp_flags, int node);
@@ -59,4 +95,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
#endif /* BLK_DEV_INTEGRITY */ #endif /* BLK_DEV_INTEGRITY */
static inline int blk_cpu_to_group(int cpu)
{
#ifdef CONFIG_SCHED_MC
cpumask_t mask = cpu_coregroup_map(cpu);
return first_cpu(mask);
#elif defined(CONFIG_SCHED_SMT)
return first_cpu(per_cpu(cpu_sibling_map, cpu));
#else
return cpu;
#endif
}
#endif #endif
+10 -22
View File
@@ -111,23 +111,9 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
*/ */
static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
/* /* The ilog2() calls fall out because they're constant */
* Bio action bits of interest #define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
*/ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
/*
* More could be added as needed, taking care to increment the decrementer
* to get correct indexing
*/
#define trace_barrier_bit(rw) \
(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
#define trace_sync_bit(rw) \
(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
#define trace_ahead_bit(rw) \
(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
#define trace_meta_bit(rw) \
(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
/* /*
* The worker for the various blk_add_trace*() types. Fills out a * The worker for the various blk_add_trace*() types. Fills out a
@@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return; return;
what |= ddir_act[rw & WRITE]; what |= ddir_act[rw & WRITE];
what |= bio_act[trace_barrier_bit(rw)]; what |= MASK_TC_BIT(rw, BARRIER);
what |= bio_act[trace_sync_bit(rw)]; what |= MASK_TC_BIT(rw, SYNC);
what |= bio_act[trace_ahead_bit(rw)]; what |= MASK_TC_BIT(rw, AHEAD);
what |= bio_act[trace_meta_bit(rw)]; what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
pid = tsk->pid; pid = tsk->pid;
if (unlikely(act_log_check(bt, what, sector, pid))) if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -382,7 +369,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!buts->buf_size || !buts->buf_nr) if (!buts->buf_size || !buts->buf_nr)
return -EINVAL; return -EINVAL;
strcpy(buts->name, name); strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
/* /*
* some device names have larger paths - convert the slashes * some device names have larger paths - convert the slashes
+4 -2
View File
@@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
next_rq->cmd_type = rq->cmd_type; next_rq->cmd_type = rq->cmd_type;
dxferp = (void*)(unsigned long)hdr->din_xferp; dxferp = (void*)(unsigned long)hdr->din_xferp;
ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
hdr->din_xfer_len, GFP_KERNEL);
if (ret) if (ret)
goto out; goto out;
} }
@@ -298,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
dxfer_len = 0; dxfer_len = 0;
if (dxfer_len) { if (dxfer_len) {
ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
GFP_KERNEL);
if (ret) if (ret)
goto out; goto out;
} }

Some files were not shown because too many files have changed in this diff Show More