blk-mq: support per-distpatch_queue flush machinery

This patch supports to run one single flush machinery for
each blk-mq dispatch queue, so that:

- current init_request and exit_request callbacks can
cover flush request too, then the buggy copying way of
initializing flush request's pdu can be fixed

- flushing performance gets improved in case of multi hw-queue

In fio sync write test over virtio-blk(4 hw queues, ioengine=sync,
iodepth=64, numjobs=4, bs=4K), it is observed that througput gets
increased a lot over my test environment:
	- throughput: +70% in case of virtio-blk over null_blk
	- throughput: +30% in case of virtio-blk over SSD image

The multi virtqueue feature isn't merged to QEMU yet, and patches for
the feature can be found in below tree:

	git://kernel.ubuntu.com/ming/qemu.git  	v2.1.0-mq.4

And simply passing 'num_queues=4 vectors=5' should be enough to
enable multi queue(quad queue) feature for QEMU virtio-blk.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Ming Lei
2014-09-25 23:23:47 +08:00
committed by Jens Axboe
parent e97c293cdf
commit f70ced0917
6 changed files with 60 additions and 39 deletions
+24 -26
View File
@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq)
__blk_mq_free_request(hctx, ctx, rq);
}
/*
* Clone all relevant state from a request that has been put on hold in
* the flush state machine into the preallocated flush request that hangs
* off the request queue.
*
* For a driver the flush request should be invisible, that's why we are
* impersonating the original request here.
*/
void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq)
{
struct blk_mq_hw_ctx *hctx =
orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
flush_rq->mq_ctx = orig_rq->mq_ctx;
flush_rq->tag = orig_rq->tag;
memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
hctx->cmd_size);
}
inline void __blk_mq_end_request(struct request *rq, int error)
{
blk_account_io_done(rq);
@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q,
struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
unsigned flush_start_tag = set->queue_depth;
blk_mq_tag_idle(hctx);
if (set->ops->exit_request)
set->ops->exit_request(set->driver_data,
hctx->fq->flush_rq, hctx_idx,
flush_start_tag + hctx_idx);
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
blk_free_flush_queue(hctx->fq);
kfree(hctx->ctxs);
blk_mq_free_bitmap(&hctx->ctx_map);
}
@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
{
int node;
unsigned flush_start_tag = set->queue_depth;
node = hctx->numa_node;
if (node == NUMA_NO_NODE)
@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q,
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
goto free_bitmap;
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
if (!hctx->fq)
goto exit_hctx;
if (set->ops->init_request &&
set->ops->init_request(set->driver_data,
hctx->fq->flush_rq, hctx_idx,
flush_start_tag + hctx_idx, node))
goto free_fq;
return 0;
free_fq:
kfree(hctx->fq);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
free_bitmap:
blk_mq_free_bitmap(&hctx->ctx_map);
free_ctxs:
@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
blk_mq_add_queue_tag_set(set, q);
q->fq = blk_alloc_flush_queue(q);
if (!q->fq)
goto err_hw_queues;
blk_mq_map_swqueue(q);
return q;
err_hw_queues:
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
err_hw:
blk_cleanup_queue(q);
err_hctxs: