Merge branch 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block: (132 commits) doc/cdrom: Trvial documentation error, file not present block_dev: fix kernel-doc in new functions block: add some comments around the bio read-write flags block: mark bio_split_pool static block: Find bio sector offset given idx and offset block: gendisk integrity wrapper block: Switch blk_integrity_compare from bdev to gendisk block: Fix double put in blk_integrity_unregister block: Introduce integrity data ownership flag block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1 bio.h: Remove unused conditional code block: remove end_{queued|dequeued}_request() block: change elevator to use __blk_end_request() gdrom: change to use __blk_end_request() memstick: change to use __blk_end_request() virtio_blk: change to use __blk_end_request() blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure block: add lld busy state exporting interface block: Fix blk_start_queueing() to not kick a stopped queue include blktrace_api.h in headers_install ...
2026-05-01 15:00:59 -07:00 · 2008-10-10 10:52:45 -07:00
parent d403a6484f b911e473d2
commit e26feff647
124 changed files with 3862 additions and 2661 deletions
@@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this:
 	int i, count = dma_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
-	for (i = 0, sg = sglist; i < count; i++, sg++) {
+	for_each_sg(sglist, sg, count, i) {
 		hw_address[i] = sg_dma_address(sg);
 		hw_len[i] = sg_dma_len(sg);
 	}
@@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c
 !Eblock/blk-barrier.c
 !Eblock/blk-tag.c
 !Iblock/blk-tag.c
 !Eblock/blk-integrity.c
 !Iblock/blktrace.c
 !Iblock/genhd.c
 !Eblock/genhd.c
  </chapter>
  <chapter id="chrdev">
@@ -30,12 +30,18 @@ write_expire	(in ms)
 Similar to read_expire mentioned above, but for writes.
-fifo_batch
+fifo_batch	(number of requests)
 ----------
-When a read request expires its deadline, we must move some requests from
+Requests are grouped into ``batches'' of a particular data direction (read or
-the sorted io scheduler list to the block device dispatch queue. fifo_batch
+write) which are serviced in increasing sector order.  To limit extra seeking,
-controls how many requests we move.
+deadline expiries are only checked between batches.  fifo_batch controls the
 maximum number of requests per batch.
 This parameter tunes the balance between per-request latency and aggregate
 throughput.  When low latency is the primary concern, smaller is better (where
 a value of 1 yields first-come first-served behaviour).  Increasing fifo_batch
 generally improves throughput, at the cost of latency variation.
 writes_starved	(number of dispatches)
@@ -145,8 +145,7 @@ useful for reading photocds.
 To play an audio CD, you should first unmount and remove any data
 CDROM.  Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).  Lacking anything else, you could use the
+workbone, cdplayer, etc.).
 cdtester program in Documentation/cdrom/sbpcd.
 On a few drives, you can read digital audio directly using a program
 such as cdda2wav.  The only types of drive which I've heard support
@@ -4,8 +4,8 @@
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
-			blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-			cmd-filter.o
+			ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad)
 			del_timer(&ad->antic_timer);
 		ad->antic_status = ANTIC_FINISHED;
 		/* see as_work_handler */
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(ad->q, &ad->antic_work);
 	}
 }
@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data)
 		aic = ad->io_context->aic;
 		ad->antic_status = ANTIC_FINISHED;
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 		if (aic->ttime_samples == 0) {
 			/* process anticipated on has exited or timed out*/
@@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
 */
 static int as_can_anticipate(struct as_data *ad, struct request *rq)
 {
 #if 0 /* disable for now, we need to check tag level as well */
 	/*
 	 * SSD device without seek penalty, disable idling
 	 */
 	if (blk_queue_nonrot(ad->q)) axman
 		return 0;
 #endif
 	if (!ad->io_context)
 		/*
 		 * Last request submitted was a write
@@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		ad->current_batch_expires = jiffies +
 					ad->batch_expire[ad->batch_data_dir];
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 		ad->changed_batch = 0;
 		if (ad->batch_data_dir == REQ_SYNC)
@@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 	bio->bi_end_io = bio_end_empty_barrier;
 	bio->bi_private = &wait;
 	bio->bi_bdev = bdev;
-	submit_bio(1 << BIO_RW_BARRIER, bio);
+	submit_bio(WRITE_BARRIER, bio);
 	wait_for_completion(&wait);
@@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 static void blkdev_discard_end_io(struct bio *bio, int err)
 {
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
 	}
 	bio_put(bio);
 }
 /**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 *
 * Description:
 *    Issue a discard request for the sectors in question. Does not wait.
 */
 int blkdev_issue_discard(struct block_device *bdev,
 			 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
 {
 	struct request_queue *q;
 	struct bio *bio;
 	int ret = 0;
 	if (bdev->bd_disk == NULL)
 		return -ENXIO;
 	q = bdev_get_queue(bdev);
 	if (!q)
 		return -ENXIO;
 	if (!q->prepare_discard_fn)
 		return -EOPNOTSUPP;
 	while (nr_sects && !ret) {
 		bio = bio_alloc(gfp_mask, 0);
 		if (!bio)
 			return -ENOMEM;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		bio->bi_sector = sector;
 		if (nr_sects > q->max_hw_sectors) {
 			bio->bi_size = q->max_hw_sectors << 9;
 			nr_sects -= q->max_hw_sectors;
 			sector += q->max_hw_sectors;
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
 		}
 		bio_get(bio);
 		submit_bio(DISCARD_BARRIER, bio);
 		/* Check if it failed immediately */
 		if (bio_flagged(bio, BIO_EOPNOTSUPP))
 			ret = -EOPNOTSUPP;
 		else if (!bio_flagged(bio, BIO_UPTODATE))
 			ret = -EIO;
 		bio_put(bio);
 	}
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
@@ -16,7 +16,7 @@
 /**
 * blk_end_sync_rq - executes a completion event on a request
 * @rq: request to complete
- * @error: end io status of the request
+ * @error: end I/O status of the request
 */
 static void blk_end_sync_rq(struct request *rq, int error)
 {
@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct request *rq, int error)
 * @done:	I/O completion handler
 *
 * Description:
- *    Insert a fully prepared request at the back of the io scheduler queue
+ *    Insert a fully prepared request at the back of the I/O scheduler queue
 *    for execution.  Don't wait for completion.
 */
 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 * @at_head:    insert request at head or tail of queue
 *
 * Description:
- *    Insert a fully prepared request at the back of the io scheduler queue
+ *    Insert a fully prepared request at the back of the I/O scheduler queue
 *    for execution and wait for completion.
 */
 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
@@ -108,51 +108,51 @@ new_segment:
 EXPORT_SYMBOL(blk_rq_map_integrity_sg);
 /**
- * blk_integrity_compare - Compare integrity profile of two block devices
+ * blk_integrity_compare - Compare integrity profile of two disks
- * @b1:		Device to compare
+ * @gd1:	Disk to compare
- * @b2:		Device to compare
+ * @gd2:	Disk to compare
 *
 * Description: Meta-devices like DM and MD need to verify that all
 * sub-devices use the same integrity format before advertising to
 * upper layers that they can send/receive integrity metadata.  This
- * function can be used to check whether two block devices have
+ * function can be used to check whether two gendisk devices have
 * compatible integrity formats.
 */
-int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 {
-	struct blk_integrity *b1 = bd1->bd_disk->integrity;
+	struct blk_integrity *b1 = gd1->integrity;
-	struct blk_integrity *b2 = bd2->bd_disk->integrity;
+	struct blk_integrity *b2 = gd2->integrity;
-	BUG_ON(bd1->bd_disk == NULL);
+	if (!b1 && !b2)
-	BUG_ON(bd2->bd_disk == NULL);
+		return 0;
 	if (!b1 || !b2)
-		return 0;
+		return -1;
 	if (b1->sector_size != b2->sector_size) {
 		printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->sector_size, b2->sector_size);
 		return -1;
 	}
 	if (b1->tuple_size != b2->tuple_size) {
 		printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->tuple_size, b2->tuple_size);
 		return -1;
 	}
 	if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
 		printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->tag_size, b2->tag_size);
 		return -1;
 	}
 	if (strcmp(b1->name, b2->name)) {
 		printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->name, b2->name);
 		return -1;
 	}
@@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 			return -1;
 		if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
-					 &disk->dev.kobj, "%s", "integrity")) {
+					 &disk_to_dev(disk)->kobj,
 					 "%s", "integrity")) {
 			kmem_cache_free(integrity_cachep, bi);
 			return -1;
 		}
@@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk)
 	kobject_uevent(&bi->kobj, KOBJ_REMOVE);
 	kobject_del(&bi->kobj);
 	kobject_put(&disk->dev.kobj);
 	kmem_cache_free(integrity_cachep, bi);
 	disk->integrity = NULL;
 }
 EXPORT_SYMBOL(blk_integrity_unregister);
@@ -41,10 +41,10 @@ static int __blk_rq_unmap_user(struct bio *bio)
 }
 static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
-			     void __user *ubuf, unsigned int len)
+			     struct rq_map_data *map_data, void __user *ubuf,
 			     unsigned int len, int null_mapped, gfp_t gfp_mask)
 {
 	unsigned long uaddr;
 	unsigned int alignment;
 	struct bio *bio, *orig_bio;
 	int reading, ret;
@@ -55,15 +55,17 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
-	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	if (blk_rq_aligned(q, ubuf, len) && !map_data)
-	if (!(uaddr & alignment) && !(len & alignment))
+		bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
 		bio = bio_map_user(q, NULL, uaddr, len, reading);
 	else
-		bio = bio_copy_user(q, uaddr, len, reading);
+		bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 	if (null_mapped)
 		bio->bi_flags |= (1 << BIO_NULL_MAPPED);
 	orig_bio = bio;
 	blk_queue_bounce(q, &bio);
@@ -85,17 +87,19 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 }
 /**
- * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
 * @q:		request queue where request should be inserted
 * @rq:		request structure to fill
 * @map_data:   pointer to the rq_map_data holding pages (if necessary)
 * @ubuf:	the user buffer
 * @len:	length of user data
 * @gfp_mask:	memory allocation flags
 *
 * Description:
- *    Data will be mapped directly for zero copy io, if possible. Otherwise
+ *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
 *    a kernel bounce buffer is used.
 *
- *    A matching blk_rq_unmap_user() must be issued at the end of io, while
+ *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 *    still in process context.
 *
 *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -105,16 +109,22 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 *    unmapping.
 */
 int blk_rq_map_user(struct request_queue *q, struct request *rq,
-		    void __user *ubuf, unsigned long len)
+		    struct rq_map_data *map_data, void __user *ubuf,
 		    unsigned long len, gfp_t gfp_mask)
 {
 	unsigned long bytes_read = 0;
 	struct bio *bio = NULL;
-	int ret;
+	int ret, null_mapped = 0;
 	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
-	if (!len || !ubuf)
+	if (!len)
 		return -EINVAL;
 	if (!ubuf) {
 		if (!map_data || rq_data_dir(rq) != READ)
 			return -EINVAL;
 		null_mapped = 1;
 	}
 	while (bytes_read != len) {
 		unsigned long map_len, end, start;
@@ -132,7 +142,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 		if (end - start > BIO_MAX_PAGES)
 			map_len -= PAGE_SIZE;
-		ret = __blk_rq_map_user(q, rq, ubuf, map_len);
+		ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
 					null_mapped, gfp_mask);
 		if (ret < 0)
 			goto unmap_rq;
 		if (!bio)
@@ -154,18 +165,20 @@ unmap_rq:
 EXPORT_SYMBOL(blk_rq_map_user);
 /**
- * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
 * @q:		request queue where request should be inserted
 * @rq:		request to map data to
 * @map_data:   pointer to the rq_map_data holding pages (if necessary)
 * @iov:	pointer to the iovec
 * @iov_count:	number of elements in the iovec
 * @len:	I/O byte count
 * @gfp_mask:	memory allocation flags
 *
 * Description:
- *    Data will be mapped directly for zero copy io, if possible. Otherwise
+ *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
 *    a kernel bounce buffer is used.
 *
- *    A matching blk_rq_unmap_user() must be issued at the end of io, while
+ *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 *    still in process context.
 *
 *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -175,7 +188,8 @@ EXPORT_SYMBOL(blk_rq_map_user);
 *    unmapping.
 */
 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
-			struct sg_iovec *iov, int iov_count, unsigned int len)
+			struct rq_map_data *map_data, struct sg_iovec *iov,
 			int iov_count, unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 	int i, read = rq_data_dir(rq) == READ;
@@ -193,10 +207,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 		}
 	}
-	if (unaligned || (q->dma_pad_mask & len))
+	if (unaligned || (q->dma_pad_mask & len) || map_data)
-		bio = bio_copy_user_iov(q, iov, iov_count, read);
+		bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
 					gfp_mask);
 	else
-		bio = bio_map_user_iov(q, NULL, iov, iov_count, read);
+		bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
@@ -216,6 +231,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 	rq->buffer = rq->data = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
 /**
 * blk_rq_unmap_user - unmap a request with user data
@@ -224,7 +240,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 * Description:
 *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
 *    supply the original rq->bio from the blk_rq_map_user() return, since
- *    the io completion may have changed rq->bio.
+ *    the I/O completion may have changed rq->bio.
 */
 int blk_rq_unmap_user(struct bio *bio)
 {
@@ -250,7 +266,7 @@ int blk_rq_unmap_user(struct bio *bio)
 EXPORT_SYMBOL(blk_rq_unmap_user);
 /**
- * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
 * @q:		request queue where request should be inserted
 * @rq:		request to fill
 * @kbuf:	the kernel buffer
@@ -264,8 +280,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
 {
 	unsigned long kaddr;
 	unsigned int alignment;
 	int reading = rq_data_dir(rq) == READ;
 	int do_copy = 0;
 	struct bio *bio;
@@ -275,11 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (!len || !kbuf)
 		return -EINVAL;
-	kaddr = (unsigned long)kbuf;
+	do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
 	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
 	do_copy = ((kaddr & alignment) || (len & alignment) ||
 		   object_is_on_stack(kbuf));
 	if (do_copy)
 		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 	else
@@ -11,7 +11,7 @@
 void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
-	if (blk_fs_request(rq)) {
+	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 		rq->hard_sector += nsect;
 		rq->hard_nr_sectors -= nsect;
@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
 void blk_recalc_rq_segments(struct request *rq)
 {
 	int nr_phys_segs;
 	int nr_hw_segs;
 	unsigned int phys_size;
 	unsigned int hw_size;
 	struct bio_vec *bv, *bvprv = NULL;
 	int seg_size;
 	int hw_seg_size;
 	int cluster;
 	struct req_iterator iter;
 	int high, highprv = 1;
@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq)
 		return;
 	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
-	hw_seg_size = seg_size = 0;
+	seg_size = 0;
-	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+	phys_size = nr_phys_segs = 0;
 	rq_for_each_segment(bv, rq, iter) {
 		/*
 		 * the trick here is making sure that a high page is never
@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct request *rq)
 		 */
 		high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
 		if (high || highprv)
-			goto new_hw_segment;
+			goto new_segment;
 		if (cluster) {
 			if (seg_size + bv->bv_len > q->max_segment_size)
 				goto new_segment;
@@ -74,40 +71,19 @@ void blk_recalc_rq_segments(struct request *rq)
 				goto new_segment;
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
 				goto new_segment;
 			if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
 				goto new_hw_segment;
 			seg_size += bv->bv_len;
 			hw_seg_size += bv->bv_len;
 			bvprv = bv;
 			continue;
 		}
 new_segment:
 		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
 		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
 			hw_seg_size += bv->bv_len;
 		else {
 new_hw_segment:
 			if (nr_hw_segs == 1 &&
 			    hw_seg_size > rq->bio->bi_hw_front_size)
 				rq->bio->bi_hw_front_size = hw_seg_size;
 			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
 			nr_hw_segs++;
 		}
 		nr_phys_segs++;
 		bvprv = bv;
 		seg_size = bv->bv_len;
 		highprv = high;
 	}
 	if (nr_hw_segs == 1 &&
 	    hw_seg_size > rq->bio->bi_hw_front_size)
 		rq->bio->bi_hw_front_size = hw_seg_size;
 	if (hw_seg_size > rq->biotail->bi_hw_back_size)
 		rq->biotail->bi_hw_back_size = hw_seg_size;
 	rq->nr_phys_segments = nr_phys_segs;
 	rq->nr_hw_segments = nr_hw_segs;
 }
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -120,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
 	blk_recalc_rq_segments(&rq);
 	bio->bi_next = nxt;
 	bio->bi_phys_segments = rq.nr_phys_segments;
 	bio->bi_hw_segments = rq.nr_hw_segments;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
@@ -131,13 +106,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return 0;
 	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
 		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 	if (!bio_has_data(bio))
 		return 1;
 	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
 		return 0;
 	/*
-	 * bio and nxt are contigous in memory, check if the queue allows
+	 * bio and nxt are contiguous in memory; check if the queue allows
 	 * these two to be merged into one
 	 */
 	if (BIO_SEG_BOUNDARY(q, bio, nxt))
@@ -146,22 +125,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 	return 0;
 }
 static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
 				 struct bio *nxt)
 {
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
 	if (!bio_flagged(nxt, BIO_SEG_VALID))
 		blk_recount_segments(q, nxt);
 	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
 	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
 		return 0;
 	if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
 		return 0;
 	return 1;
 }
 /*
 * map a request to scatterlist, return number of sg entries setup. Caller
 * must make sure sg can hold rq->nr_phys_segments entries
@@ -275,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 				    struct request *req,
 				    struct bio *bio)
 {
 	int nr_hw_segs = bio_hw_segments(q, bio);
 	int nr_phys_segs = bio_phys_segments(q, bio);
-	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
+	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
@@ -290,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 	 * This will form the start of a new hw segment.  Bump both
 	 * counters.
 	 */
 	req->nr_hw_segments += nr_hw_segs;
 	req->nr_phys_segments += nr_phys_segs;
 	return 1;
 }
@@ -299,7 +260,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 		     struct bio *bio)
 {
 	unsigned short max_sectors;
 	int len;
 	if (unlikely(blk_pc_request(req)))
 		max_sectors = q->max_hw_sectors;
@@ -316,19 +276,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
 		blk_recount_segments(q, req->biotail);
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
 	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
 	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
 	    && !BIOVEC_VIRT_OVERSIZE(len)) {
 		int mergeable =  ll_new_mergeable(q, req, bio);
 		if (mergeable) {
 			if (req->nr_hw_segments == 1)
 				req->bio->bi_hw_front_size = len;
 			if (bio->bi_hw_segments == 1)
 				bio->bi_hw_back_size = len;
 		}
 		return mergeable;
 	}
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -337,7 +284,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 		      struct bio *bio)
 {
 	unsigned short max_sectors;
 	int len;
 	if (unlikely(blk_pc_request(req)))
 		max_sectors = q->max_hw_sectors;
@@ -351,23 +297,10 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 			q->last_merge = NULL;
 		return 0;
 	}
 	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
 	if (!bio_flagged(req->bio, BIO_SEG_VALID))
 		blk_recount_segments(q, req->bio);
 	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
 	    !BIOVEC_VIRT_OVERSIZE(len)) {
 		int mergeable =  ll_new_mergeable(q, req, bio);
 		if (mergeable) {
 			if (bio->bi_hw_segments == 1)
 				bio->bi_hw_front_size = len;
 			if (req->nr_hw_segments == 1)
 				req->biotail->bi_hw_back_size = len;
 		}
 		return mergeable;
 	}
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -376,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 				struct request *next)
 {
 	int total_phys_segments;
 	int total_hw_segments;
 	/*
 	 * First check if the either of the requests are re-queued
@@ -398,26 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (total_phys_segments > q->max_phys_segments)
 		return 0;
-	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
+	if (total_phys_segments > q->max_hw_segments)
 	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
 		int len = req->biotail->bi_hw_back_size +
 				next->bio->bi_hw_front_size;
 		/*
 		 * propagate the combined length to the end of the requests
 		 */
 		if (req->nr_hw_segments == 1)
 			req->bio->bi_hw_front_size = len;
 		if (next->nr_hw_segments == 1)
 			next->biotail->bi_hw_back_size = len;
 		total_hw_segments--;
 	}
 	if (total_hw_segments > q->max_hw_segments)
 		return 0;
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
 	req->nr_hw_segments = total_hw_segments;
 	return 1;
 }
@@ -470,17 +387,21 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	elv_merge_requests(q, req, next);
 	if (req->rq_disk) {
-		struct hd_struct *part
+		struct hd_struct *part;
-			= get_part(req->rq_disk, req->sector);
+		int cpu;
-		disk_round_stats(req->rq_disk);
+
-		req->rq_disk->in_flight--;
+		cpu = part_stat_lock();
-		if (part) {
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
-			part_round_stats(part);
+
-			part->in_flight--;
+		part_round_stats(cpu, part);
-		}
+		part_dec_in_flight(part);
 		part_stat_unlock();
 	}
 	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
 	if (blk_rq_cpu_valid(next))
 		req->cpu = next->cpu;
 	__blk_put_request(q, next);
 	return 1;
@@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 /**
 * blk_queue_set_discard - set a discard_sectors function for queue
 * @q:		queue
 * @dfn:	prepare_discard function
 *
 * It's possible for a queue to register a discard callback which is used
 * to transform a discard request into the appropriate type for the
 * hardware. If none is registered, then discard requests are failed
 * with %EOPNOTSUPP.
 *
 */
 void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
 {
 	q->prepare_discard_fn = dfn;
 }
 EXPORT_SYMBOL(blk_queue_set_discard);
 /**
 * blk_queue_merge_bvec - set a merge_bvec function for queue
 * @q:		queue
@@ -60,6 +77,24 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
 }
 EXPORT_SYMBOL(blk_queue_softirq_done);
 void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
 {
 	q->rq_timeout = timeout;
 }
 EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
 void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
 {
 	q->rq_timed_out_fn = fn;
 }
 EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
 void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 {
 	q->lld_busy_fn = fn;
 }
 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 /**
 * blk_queue_make_request - define an alternate make_request function for a device
 * @q:  the request queue for the device to be affected
@@ -127,7 +162,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
 *    Different hardware can have different requirements as to what pages
 *    it can do I/O directly to. A low level driver can call
 *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
- *    buffers for doing I/O to pages residing above @page.
+ *    buffers for doing I/O to pages residing above @dma_addr.
 **/
 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
 {
@@ -212,7 +247,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments);
 * Description:
 *    Enables a low level driver to set an upper limit on the number of
 *    hw data segments in a request.  This would be the largest number of
- *    address/length pairs the host adapter can actually give as once
+ *    address/length pairs the host adapter can actually give at once
 *    to the device.
 **/
 void blk_queue_max_hw_segments(struct request_queue *q,
@@ -393,7 +428,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
 * @mask:  alignment mask
 *
 * description:
- *    set required memory and length aligment for direct dma transactions.
+ *    set required memory and length alignment for direct dma transactions.
 *    this is used when buiding direct io requests for the queue.
 *
 **/
@@ -409,7 +444,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment);
 * @mask:  alignment mask
 *
 * description:
- *    update required memory and length aligment for direct dma transactions.
+ *    update required memory and length alignment for direct dma transactions.
 *    If the requested alignment is larger than the current alignment, then
 *    the current queue alignment is updated to the new value, otherwise it
 *    is left alone.  The design of this is to allow multiple objects
@@ -0,0 +1,175 @@
 /*
 * Functions related to softirq rq completions
 */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include "blk.h"
 static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
 /*
 * Softirq action handler - move entries to local list and loop over them
 * while passing them to the queue registered handler.
 */
 static void blk_done_softirq(struct softirq_action *h)
 {
 	struct list_head *cpu_list, local_list;
 	local_irq_disable();
 	cpu_list = &__get_cpu_var(blk_cpu_done);
 	list_replace_init(cpu_list, &local_list);
 	local_irq_enable();
 	while (!list_empty(&local_list)) {
 		struct request *rq;
 		rq = list_entry(local_list.next, struct request, csd.list);
 		list_del_init(&rq->csd.list);
 		rq->q->softirq_done_fn(rq);
 	}
 }
 #if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
 static void trigger_softirq(void *data)
 {
 	struct request *rq = data;
 	unsigned long flags;
 	struct list_head *list;
 	local_irq_save(flags);
 	list = &__get_cpu_var(blk_cpu_done);
 	list_add_tail(&rq->csd.list, list);
 	if (list->next == &rq->csd.list)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
 	local_irq_restore(flags);
 }
 /*
 * Setup and invoke a run of 'trigger_softirq' on the given cpu.
 */
 static int raise_blk_irq(int cpu, struct request *rq)
 {
 	if (cpu_online(cpu)) {
 		struct call_single_data *data = &rq->csd;
 		data->func = trigger_softirq;
 		data->info = rq;
 		data->flags = 0;
 		__smp_call_function_single(cpu, data);
 		return 0;
 	}
 	return 1;
 }
 #else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
 static int raise_blk_irq(int cpu, struct request *rq)
 {
 	return 1;
 }
 #endif
 static int __cpuinit blk_cpu_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 {
 	/*
 	 * If a CPU goes away, splice its entries to the current CPU
 	 * and trigger a run of the softirq
 	 */
 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 		int cpu = (unsigned long) hcpu;
 		local_irq_disable();
 		list_splice_init(&per_cpu(blk_cpu_done, cpu),
 				 &__get_cpu_var(blk_cpu_done));
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
 		local_irq_enable();
 	}
 	return NOTIFY_OK;
 }
 static struct notifier_block __cpuinitdata blk_cpu_notifier = {
 	.notifier_call	= blk_cpu_notify,
 };
 void __blk_complete_request(struct request *req)
 {
 	struct request_queue *q = req->q;
 	unsigned long flags;
 	int ccpu, cpu, group_cpu;
 	BUG_ON(!q->softirq_done_fn);
 	local_irq_save(flags);
 	cpu = smp_processor_id();
 	group_cpu = blk_cpu_to_group(cpu);
 	/*
 	 * Select completion CPU
 	 */
 	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
 		ccpu = req->cpu;
 	else
 		ccpu = cpu;
 	if (ccpu == cpu || ccpu == group_cpu) {
 		struct list_head *list;
 do_local:
 		list = &__get_cpu_var(blk_cpu_done);
 		list_add_tail(&req->csd.list, list);
 		/*
 		 * if the list only contains our just added request,
 		 * signal a raise of the softirq. If there are already
 		 * entries there, someone already raised the irq but it
 		 * hasn't run yet.
 		 */
 		if (list->next == &req->csd.list)
 			raise_softirq_irqoff(BLOCK_SOFTIRQ);
 	} else if (raise_blk_irq(ccpu, req))
 		goto do_local;
 	local_irq_restore(flags);
 }
 /**
 * blk_complete_request - end I/O on a request
 * @req:      the request being processed
 *
 * Description:
 *     Ends all I/O on a request. It does not handle partial completions,
 *     unless the driver actually implements this in its completion callback
 *     through requeueing. The actual completion happens out-of-order,
 *     through a softirq handler. The user must have registered a completion
 *     callback through blk_queue_softirq_done().
 **/
 void blk_complete_request(struct request *req)
 {
 	if (unlikely(blk_should_fake_timeout(req->q)))
 		return;
 	if (!blk_mark_rq_complete(req))
 		__blk_complete_request(req);
 }
 EXPORT_SYMBOL(blk_complete_request);
 __init int blk_softirq_init(void)
 {
 	int i;
 	for_each_possible_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
 	register_hotcpu_notifier(&blk_cpu_notifier);
 	return 0;
 }
 subsys_initcall(blk_softirq_init);
@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
 	return ret;
 }
 static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
 {
 	unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
 	return queue_var_show(set != 0, page);
 }
 static ssize_t
 queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
 {
 	ssize_t ret = -EINVAL;
 #if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
 	unsigned long val;
 	ret = queue_var_store(&val, page, count);
 	spin_lock_irq(q->queue_lock);
 	if (val)
 		queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_SAME_COMP,  q);
 	spin_unlock_irq(q->queue_lock);
 #endif
 	return ret;
 }
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
 	.store = queue_nomerges_store,
 };
 static struct queue_sysfs_entry queue_rq_affinity_entry = {
 	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_rq_affinity_show,
 	.store = queue_rq_affinity_store,
 };
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	NULL,
 };
@@ -310,7 +341,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (!q->request_fn)
 		return 0;
-	ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
+	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
 			  "%s", "queue");
 	if (ret < 0)
 		return ret;
@@ -339,6 +370,6 @@ void blk_unregister_queue(struct gendisk *disk)
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
-		kobject_put(&disk->dev.kobj);
+		kobject_put(&disk_to_dev(disk)->kobj);
 	}
 }
@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag);
 * __blk_free_tags - release a given set of tag maintenance info
 * @bqt:	the tag map to free
 *
- * Tries to free the specified @bqt@.  Returns true if it was
+ * Tries to free the specified @bqt.  Returns true if it was
 * actually freed and false if there are still references using it
 */
 static int __blk_free_tags(struct blk_queue_tag *bqt)
@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct request_queue *q)
 * blk_free_tags - release a given set of tag maintenance info
 * @bqt:	the tag map to free
 *
- * For externally managed @bqt@ frees the map.  Callers of this
+ * For externally managed @bqt frees the map.  Callers of this
 * function must guarantee to have released all the queues that
 * might have been using this tag map.
 */
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags);
 * @q:  the request queue for the device
 *
 *  Notes:
- *	This is used to disabled tagged queuing to a device, yet leave
+ *	This is used to disable tagged queuing to a device, yet leave
 *	queue in function.
 **/
 void blk_queue_free_tags(struct request_queue *q)
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
 * @rq: the request that has completed
 *
 *  Description:
- *    Typically called when end_that_request_first() returns 0, meaning
+ *    Typically called when end_that_request_first() returns %0, meaning
 *    all transfers have been done for a request. It's important to call
 *    this function before end_that_request_last(), as that will put the
 *    request back on the free list thus corrupting the internal tag list.
@@ -337,6 +337,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
 	unsigned max_depth, offset;
 	int tag;
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -350,10 +351,19 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 	/*
 	 * Protect against shared tag maps, as we may not have exclusive
 	 * access to the tag map.
 	 *
 	 * We reserve a few tags just for sync IO, since we don't want
 	 * to starve sync IO on behalf of flooding async IO.
 	 */
 	max_depth = bqt->max_depth;
 	if (rq_is_sync(rq))
 		offset = 0;
 	else
 		offset = max_depth >> 2;
 	do {
-		tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
+		tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
-		if (tag >= bqt->max_depth)
+		if (tag >= max_depth)
 			return 1;
 	} while (test_and_set_bit_lock(tag, bqt->tag_map));
@@ -0,0 +1,238 @@
 /*
 * Functions related to generic timeout handling of requests.
 */
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/fault-inject.h>
 #include "blk.h"
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 static DECLARE_FAULT_ATTR(fail_io_timeout);
 static int __init setup_fail_io_timeout(char *str)
 {
 	return setup_fault_attr(&fail_io_timeout, str);
 }
 __setup("fail_io_timeout=", setup_fail_io_timeout);
 int blk_should_fake_timeout(struct request_queue *q)
 {
 	if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
 		return 0;
 	return should_fail(&fail_io_timeout, 1);
 }
 static int __init fail_io_timeout_debugfs(void)
 {
 	return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
 }
 late_initcall(fail_io_timeout_debugfs);
 ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 	int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
 	return sprintf(buf, "%d\n", set != 0);
 }
 ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
 			   const char *buf, size_t count)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 	int val;
 	if (count) {
 		struct request_queue *q = disk->queue;
 		char *p = (char *) buf;
 		val = simple_strtoul(p, &p, 10);
 		spin_lock_irq(q->queue_lock);
 		if (val)
 			queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
 		else
 			queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
 		spin_unlock_irq(q->queue_lock);
 	}
 	return count;
 }
 #endif /* CONFIG_FAIL_IO_TIMEOUT */
 /*
 * blk_delete_timer - Delete/cancel timer for a given function.
 * @req:	request that we are canceling timer for
 *
 */
 void blk_delete_timer(struct request *req)
 {
 	struct request_queue *q = req->q;
 	/*
 	 * Nothing to detach
 	 */
 	if (!q->rq_timed_out_fn || !req->deadline)
 		return;
 	list_del_init(&req->timeout_list);
 	if (list_empty(&q->timeout_list))
 		del_timer(&q->timeout);
 }
 static void blk_rq_timed_out(struct request *req)
 {
 	struct request_queue *q = req->q;
 	enum blk_eh_timer_return ret;
 	ret = q->rq_timed_out_fn(req);
 	switch (ret) {
 	case BLK_EH_HANDLED:
 		__blk_complete_request(req);
 		break;
 	case BLK_EH_RESET_TIMER:
 		blk_clear_rq_complete(req);
 		blk_add_timer(req);
 		break;
 	case BLK_EH_NOT_HANDLED:
 		/*
 		 * LLD handles this for now but in the future
 		 * we can send a request msg to abort the command
 		 * and we can move more of the generic scsi eh code to
 		 * the blk layer.
 		 */
 		break;
 	default:
 		printk(KERN_ERR "block: bad eh return: %d\n", ret);
 		break;
 	}
 }
 void blk_rq_timed_out_timer(unsigned long data)
 {
 	struct request_queue *q = (struct request_queue *) data;
 	unsigned long flags, uninitialized_var(next), next_set = 0;
 	struct request *rq, *tmp;
 	spin_lock_irqsave(q->queue_lock, flags);
 	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
 		if (time_after_eq(jiffies, rq->deadline)) {
 			list_del_init(&rq->timeout_list);
 			/*
 			 * Check if we raced with end io completion
 			 */
 			if (blk_mark_rq_complete(rq))
 				continue;
 			blk_rq_timed_out(rq);
 		}
 		if (!next_set) {
 			next = rq->deadline;
 			next_set = 1;
 		} else if (time_after(next, rq->deadline))
 			next = rq->deadline;
 	}
 	if (next_set && !list_empty(&q->timeout_list))
 		mod_timer(&q->timeout, round_jiffies(next));
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 /**
 * blk_abort_request -- Request request recovery for the specified command
 * @req:	pointer to the request of interest
 *
 * This function requests that the block layer start recovery for the
 * request by deleting the timer and calling the q's timeout function.
 * LLDDs who implement their own error recovery MAY ignore the timeout
 * event if they generated blk_abort_req. Must hold queue lock.
 */
 void blk_abort_request(struct request *req)
 {
 	if (blk_mark_rq_complete(req))
 		return;
 	blk_delete_timer(req);
 	blk_rq_timed_out(req);
 }
 EXPORT_SYMBOL_GPL(blk_abort_request);
 /**
 * blk_add_timer - Start timeout timer for a single request
 * @req:	request that is about to start running.
 *
 * Notes:
 *    Each request has its own timer, and as it is added to the queue, we
 *    set up the timer. When the request completes, we cancel the timer.
 */
 void blk_add_timer(struct request *req)
 {
 	struct request_queue *q = req->q;
 	unsigned long expiry;
 	if (!q->rq_timed_out_fn)
 		return;
 	BUG_ON(!list_empty(&req->timeout_list));
 	BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
 	if (req->timeout)
 		req->deadline = jiffies + req->timeout;
 	else {
 		req->deadline = jiffies + q->rq_timeout;
 		/*
 		 * Some LLDs, like scsi, peek at the timeout to prevent
 		 * a command from being retried forever.
 		 */
 		req->timeout = q->rq_timeout;
 	}
 	list_add_tail(&req->timeout_list, &q->timeout_list);
 	/*
 	 * If the timer isn't already pending or this timeout is earlier
 	 * than an existing one, modify the timer. Round to next nearest
 	 * second.
 	 */
 	expiry = round_jiffies(req->deadline);
 	/*
 	 * We use ->deadline == 0 to detect whether a timer was added or
 	 * not, so just increase to next jiffy for that specific case
 	 */
 	if (unlikely(!req->deadline))
 		req->deadline = 1;
 	if (!timer_pending(&q->timeout) ||
 	    time_before(expiry, q->timeout.expires))
 		mod_timer(&q->timeout, expiry);
 }
 /**
 * blk_abort_queue -- Abort all request on given queue
 * @queue:	pointer to queue
 *
 */
 void blk_abort_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	struct request *rq, *tmp;
 	spin_lock_irqsave(q->queue_lock, flags);
 	elv_abort_queue(q);
 	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
 		blk_abort_request(rq);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blk_abort_queue);
@@ -17,6 +17,42 @@ void __blk_queue_free_tags(struct request_queue *q);
 void blk_unplug_work(struct work_struct *work);
 void blk_unplug_timeout(unsigned long data);
 void blk_rq_timed_out_timer(unsigned long data);
 void blk_delete_timer(struct request *);
 void blk_add_timer(struct request *);
 /*
 * Internal atomic flags for request handling
 */
 enum rq_atomic_flags {
 	REQ_ATOM_COMPLETE = 0,
 };
 /*
 * EH timer and IO completion will both attempt to 'grab' the request, make
 * sure that only one of them suceeds
 */
 static inline int blk_mark_rq_complete(struct request *rq)
 {
 	return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
 }
 static inline void blk_clear_rq_complete(struct request *rq)
 {
 	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
 }
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 int blk_should_fake_timeout(struct request_queue *);
 ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
 ssize_t part_timeout_store(struct device *, struct device_attribute *,
 				const char *, size_t);
 #else
 static inline int blk_should_fake_timeout(struct request_queue *q)
 {
 	return 0;
 }
 #endif
 struct io_context *current_io_context(gfp_t gfp_flags, int node);
@@ -59,4 +95,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
 #endif /* BLK_DEV_INTEGRITY */
 static inline int blk_cpu_to_group(int cpu)
 {
 #ifdef CONFIG_SCHED_MC
 	cpumask_t mask = cpu_coregroup_map(cpu);
 	return first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
 	return first_cpu(per_cpu(cpu_sibling_map, cpu));
 #else
 	return cpu;
 #endif
 }
 #endif
@@ -111,23 +111,9 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 */
 static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
-/*
+/* The ilog2() calls fall out because they're constant */
- * Bio action bits of interest
+#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
- */
+	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
 static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
 /*
 * More could be added as needed, taking care to increment the decrementer
 * to get correct indexing
 */
 #define trace_barrier_bit(rw)	\
 	(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
 #define trace_sync_bit(rw)	\
 	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
 #define trace_ahead_bit(rw)	\
 	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
 #define trace_meta_bit(rw)	\
 	(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
 /*
 * The worker for the various blk_add_trace*() types. Fills out a
@@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 		return;
 	what |= ddir_act[rw & WRITE];
-	what |= bio_act[trace_barrier_bit(rw)];
+	what |= MASK_TC_BIT(rw, BARRIER);
-	what |= bio_act[trace_sync_bit(rw)];
+	what |= MASK_TC_BIT(rw, SYNC);
-	what |= bio_act[trace_ahead_bit(rw)];
+	what |= MASK_TC_BIT(rw, AHEAD);
-	what |= bio_act[trace_meta_bit(rw)];
+	what |= MASK_TC_BIT(rw, META);
 	what |= MASK_TC_BIT(rw, DISCARD);
 	pid = tsk->pid;
 	if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -382,7 +369,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 	if (!buts->buf_size || !buts->buf_nr)
 		return -EINVAL;
-	strcpy(buts->name, name);
+	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
 	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
 	/*
 	 * some device names have larger paths - convert the slashes
@@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
 		next_rq->cmd_type = rq->cmd_type;
 		dxferp = (void*)(unsigned long)hdr->din_xferp;
-		ret =  blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len);
+		ret =  blk_rq_map_user(q, next_rq, NULL, dxferp,
 				       hdr->din_xfer_len, GFP_KERNEL);
 		if (ret)
 			goto out;
 	}
@@ -298,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
 		dxfer_len = 0;
 	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
+		ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
 				      GFP_KERNEL);
 		if (ret)
 			goto out;
 	}
--- a/Show More
+++ b/Show More