Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits)
  xen-blkfront: disable barrier/flush write support
  Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c
  block: remove BLKDEV_IFL_WAIT
  aic7xxx_old: removed unused 'req' variable
  block: remove the BH_Eopnotsupp flag
  block: remove the BLKDEV_IFL_BARRIER flag
  block: remove the WRITE_BARRIER flag
  swap: do not send discards as barriers
  fat: do not send discards as barriers
  ext4: do not send discards as barriers
  jbd2: replace barriers with explicit flush / FUA usage
  jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier
  jbd: replace barriers with explicit flush / FUA usage
  nilfs2: replace barriers with explicit flush / FUA usage
  reiserfs: replace barriers with explicit flush / FUA usage
  gfs2: replace barriers with explicit flush / FUA usage
  btrfs: replace barriers with explicit flush / FUA usage
  xfs: replace barriers with explicit flush / FUA usage
  block: pass gfp_mask and flags to sb_issue_discard
  dm: convey that all flushes are processed as empty
  ...
This commit is contained in:
Linus Torvalds
2010-10-22 17:07:18 -07:00
80 changed files with 849 additions and 1925 deletions
+1 -1
View File
@@ -3,7 +3,7 @@
#
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
-350
View File
@@ -1,350 +0,0 @@
/*
* Functions related to barrier IO handling
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/gfp.h>
#include "blk.h"
/**
* blk_queue_ordered - does this queue support ordered writes
* @q: the request queue
* @ordered: one of QUEUE_ORDERED_*
*
* Description:
* For journalled file systems, doing ordered writes on a commit
* block instead of explicitly doing wait_on_buffer (which is bad
* for performance) can be a big win. Block drivers supporting this
* feature should call this function and indicate so.
*
**/
int blk_queue_ordered(struct request_queue *q, unsigned ordered)
{
if (ordered != QUEUE_ORDERED_NONE &&
ordered != QUEUE_ORDERED_DRAIN &&
ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
ordered != QUEUE_ORDERED_DRAIN_FUA &&
ordered != QUEUE_ORDERED_TAG &&
ordered != QUEUE_ORDERED_TAG_FLUSH &&
ordered != QUEUE_ORDERED_TAG_FUA) {
printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
return -EINVAL;
}
q->ordered = ordered;
q->next_ordered = ordered;
return 0;
}
EXPORT_SYMBOL(blk_queue_ordered);
/*
* Cache flushing for ordered writes handling
*/
unsigned blk_ordered_cur_seq(struct request_queue *q)
{
if (!q->ordseq)
return 0;
return 1 << ffz(q->ordseq);
}
unsigned blk_ordered_req_seq(struct request *rq)
{
struct request_queue *q = rq->q;
BUG_ON(q->ordseq == 0);
if (rq == &q->pre_flush_rq)
return QUEUE_ORDSEQ_PREFLUSH;
if (rq == &q->bar_rq)
return QUEUE_ORDSEQ_BAR;
if (rq == &q->post_flush_rq)
return QUEUE_ORDSEQ_POSTFLUSH;
/*
* !fs requests don't need to follow barrier ordering. Always
* put them at the front. This fixes the following deadlock.
*
* http://thread.gmane.org/gmane.linux.kernel/537473
*/
if (rq->cmd_type != REQ_TYPE_FS)
return QUEUE_ORDSEQ_DRAIN;
if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
(q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
return QUEUE_ORDSEQ_DRAIN;
else
return QUEUE_ORDSEQ_DONE;
}
bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
{
struct request *rq;
if (error && !q->orderr)
q->orderr = error;
BUG_ON(q->ordseq & seq);
q->ordseq |= seq;
if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
return false;
/*
* Okay, sequence complete.
*/
q->ordseq = 0;
rq = q->orig_bar_rq;
__blk_end_request_all(rq, q->orderr);
return true;
}
static void pre_flush_end_io(struct request *rq, int error)
{
elv_completed_request(rq->q, rq);
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
}
static void bar_end_io(struct request *rq, int error)
{
elv_completed_request(rq->q, rq);
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
}
static void post_flush_end_io(struct request *rq, int error)
{
elv_completed_request(rq->q, rq);
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
}
static void queue_flush(struct request_queue *q, unsigned which)
{
struct request *rq;
rq_end_io_fn *end_io;
if (which == QUEUE_ORDERED_DO_PREFLUSH) {
rq = &q->pre_flush_rq;
end_io = pre_flush_end_io;
} else {
rq = &q->post_flush_rq;
end_io = post_flush_end_io;
}
blk_rq_init(q, rq);
rq->cmd_type = REQ_TYPE_FS;
rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
rq->rq_disk = q->orig_bar_rq->rq_disk;
rq->end_io = end_io;
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
}
static inline bool start_ordered(struct request_queue *q, struct request **rqp)
{
struct request *rq = *rqp;
unsigned skip = 0;
q->orderr = 0;
q->ordered = q->next_ordered;
q->ordseq |= QUEUE_ORDSEQ_STARTED;
/*
* For an empty barrier, there's no actual BAR request, which
* in turn makes POSTFLUSH unnecessary. Mask them off.
*/
if (!blk_rq_sectors(rq)) {
q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
QUEUE_ORDERED_DO_POSTFLUSH);
/*
* Empty barrier on a write-through device w/ ordered
* tag has no command to issue and without any command
* to issue, ordering by tag can't be used. Drain
* instead.
*/
if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
q->ordered &= ~QUEUE_ORDERED_BY_TAG;
q->ordered |= QUEUE_ORDERED_BY_DRAIN;
}
}
/* stash away the original request */
blk_dequeue_request(rq);
q->orig_bar_rq = rq;
rq = NULL;
/*
* Queue ordered sequence. As we stack them at the head, we
* need to queue in reverse order. Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
* request gets inbetween ordered sequence.
*/
if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
rq = &q->post_flush_rq;
} else
skip |= QUEUE_ORDSEQ_POSTFLUSH;
if (q->ordered & QUEUE_ORDERED_DO_BAR) {
rq = &q->bar_rq;
/* initialize proxy request and queue it */
blk_rq_init(q, rq);
if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
rq->cmd_flags |= REQ_WRITE;
if (q->ordered & QUEUE_ORDERED_DO_FUA)
rq->cmd_flags |= REQ_FUA;
init_request_from_bio(rq, q->orig_bar_rq->bio);
rq->end_io = bar_end_io;
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
} else
skip |= QUEUE_ORDSEQ_BAR;
if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
rq = &q->pre_flush_rq;
} else
skip |= QUEUE_ORDSEQ_PREFLUSH;
if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
rq = NULL;
else
skip |= QUEUE_ORDSEQ_DRAIN;
*rqp = rq;
/*
* Complete skipped sequences. If whole sequence is complete,
* return false to tell elevator that this request is gone.
*/
return !blk_ordered_complete_seq(q, skip, 0);
}
bool blk_do_ordered(struct request_queue *q, struct request **rqp)
{
struct request *rq = *rqp;
const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
(rq->cmd_flags & REQ_HARDBARRIER);
if (!q->ordseq) {
if (!is_barrier)
return true;
if (q->next_ordered != QUEUE_ORDERED_NONE)
return start_ordered(q, rqp);
else {
/*
* Queue ordering not supported. Terminate
* with prejudice.
*/
blk_dequeue_request(rq);
__blk_end_request_all(rq, -EOPNOTSUPP);
*rqp = NULL;
return false;
}
}
/*
* Ordered sequence in progress
*/
/* Special requests are not subject to ordering rules. */
if (rq->cmd_type != REQ_TYPE_FS &&
rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
return true;
if (q->ordered & QUEUE_ORDERED_BY_TAG) {
/* Ordered by tag. Blocking the next barrier is enough. */
if (is_barrier && rq != &q->bar_rq)
*rqp = NULL;
} else {
/* Ordered by draining. Wait for turn. */
WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
*rqp = NULL;
}
return true;
}
static void bio_end_empty_barrier(struct bio *bio, int err)
{
if (err) {
if (err == -EOPNOTSUPP)
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
}
if (bio->bi_private)
complete(bio->bi_private);
bio_put(bio);
}
/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
* @gfp_mask: memory allocation flags (for bio_alloc)
* @error_sector: error sector
* @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
* wish to. If WAIT flag is not passed then caller may check only what
* request was pushed in some internal queue for later handling.
*/
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
sector_t *error_sector, unsigned long flags)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q;
struct bio *bio;
int ret = 0;
if (bdev->bd_disk == NULL)
return -ENXIO;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
/*
* some block devices may not have their queue correctly set up here
* (e.g. loop device without a backing file) and so issuing a flush
* here will panic. Ensure there is a request function before issuing
* the barrier.
*/
if (!q->make_request_fn)
return -ENXIO;
bio = bio_alloc(gfp_mask, 0);
bio->bi_end_io = bio_end_empty_barrier;
bio->bi_bdev = bdev;
if (test_bit(BLKDEV_WAIT, &flags))
bio->bi_private = &wait;
bio_get(bio);
submit_bio(WRITE_BARRIER, bio);
if (test_bit(BLKDEV_WAIT, &flags)) {
wait_for_completion(&wait);
/*
* The driver must store the error location in ->bi_sector, if
* it supports it. For non-stacked drivers, this should be
* copied from blk_rq_pos(rq).
*/
if (error_sector)
*error_sector = bio->bi_sector;
}
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
else if (!bio_flagged(bio, BIO_UPTODATE))
ret = -EIO;
bio_put(bio);
return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);
+39 -33
View File
@@ -139,7 +139,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
{
struct request_queue *q = rq->q;
if (&q->bar_rq != rq) {
if (&q->flush_rq != rq) {
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
@@ -163,13 +163,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
if (bio->bi_size == 0)
bio_endio(bio, error);
} else {
/*
* Okay, this is the barrier request in progress, just
* record the error;
* Okay, this is the sequenced flush request in
* progress, just record the error;
*/
if (error && !q->orderr)
q->orderr = error;
if (error && !q->flush_err)
q->flush_err = error;
}
}
@@ -531,6 +530,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
init_timer(&q->unplug_timer);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->pending_flushes);
INIT_WORK(&q->unplug_work, blk_unplug_work);
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1053,22 +1053,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
}
EXPORT_SYMBOL(blk_insert_request);
/*
* add-request adds a request to the linked list.
* queue lock is held and interrupts disabled, as we muck with the
* request queue list.
*/
static inline void add_request(struct request_queue *q, struct request *req)
{
drive_stat_acct(req, 1);
/*
* elevator indicated where it wants this request to be
* inserted at elevator_merge time
*/
__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
}
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1217,13 +1201,16 @@ static int __make_request(struct request_queue *q, struct bio *bio)
const bool sync = !!(bio->bi_rw & REQ_SYNC);
const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
int where = ELEVATOR_INSERT_SORT;
int rw_flags;
if ((bio->bi_rw & REQ_HARDBARRIER) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
/* REQ_HARDBARRIER is no more */
if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
"block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1233,7 +1220,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
spin_lock_irq(q->queue_lock);
if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
where = ELEVATOR_INSERT_FRONT;
goto get_rq;
}
if (elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1330,7 +1322,10 @@ get_rq:
req->cpu = blk_cpu_to_group(smp_processor_id());
if (queue_should_plug(q) && elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
/* insert the request into the elevator */
drive_stat_acct(req, 1);
__elv_add_request(q, req, where, 0);
out:
if (unplug || !queue_should_plug(q))
__generic_unplug_device(q);
@@ -1530,6 +1525,19 @@ static inline void __generic_make_request(struct bio *bio)
if (bio_check_eod(bio, nr_sectors))
goto end_io;
/*
* Filter flush bio's early so that make_request based
* drivers without flush support don't have to worry
* about them.
*/
if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
if (!nr_sectors) {
err = 0;
goto end_io;
}
}
if ((bio->bi_rw & REQ_DISCARD) &&
(!blk_queue_discard(q) ||
((bio->bi_rw & REQ_SECURE) &&
@@ -1794,11 +1802,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
static void blk_account_io_done(struct request *req)
{
/*
* Account IO completion. bar_rq isn't accounted as a normal
* IO on queueing nor completion. Accounting the containing
* request is enough.
* Account IO completion. flush_rq isn't accounted as a
* normal IO on queueing nor completion. Accounting the
* containing request is enough.
*/
if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
struct hd_struct *part;
@@ -2523,9 +2531,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
{
dst->cpu = src->cpu;
dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
if (src->cmd_flags & REQ_DISCARD)
dst->cmd_flags |= REQ_DISCARD;
dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
dst->cmd_type = src->cmd_type;
dst->__sector = blk_rq_pos(src);
dst->__data_len = blk_rq_bytes(src);
+262
View File
@@ -0,0 +1,262 @@
/*
* Functions to sequence FLUSH and FUA writes.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/gfp.h>
#include "blk.h"
/* FLUSH/FUA sequences */
enum {
QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */
QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */
QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */
QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */
QUEUE_FSEQ_DONE = (1 << 4),
};
static struct request *queue_next_fseq(struct request_queue *q);
unsigned blk_flush_cur_seq(struct request_queue *q)
{
if (!q->flush_seq)
return 0;
return 1 << ffz(q->flush_seq);
}
static struct request *blk_flush_complete_seq(struct request_queue *q,
unsigned seq, int error)
{
struct request *next_rq = NULL;
if (error && !q->flush_err)
q->flush_err = error;
BUG_ON(q->flush_seq & seq);
q->flush_seq |= seq;
if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
/* not complete yet, queue the next flush sequence */
next_rq = queue_next_fseq(q);
} else {
/* complete this flush request */
__blk_end_request_all(q->orig_flush_rq, q->flush_err);
q->orig_flush_rq = NULL;
q->flush_seq = 0;
/* dispatch the next flush if there's one */
if (!list_empty(&q->pending_flushes)) {
next_rq = list_entry_rq(q->pending_flushes.next);
list_move(&next_rq->queuelist, &q->queue_head);
}
}
return next_rq;
}
static void blk_flush_complete_seq_end_io(struct request_queue *q,
unsigned seq, int error)
{
bool was_empty = elv_queue_empty(q);
struct request *next_rq;
next_rq = blk_flush_complete_seq(q, seq, error);
/*
* Moving a request silently to empty queue_head may stall the
* queue. Kick the queue in those cases.
*/
if (was_empty && next_rq)
__blk_run_queue(q);
}
static void pre_flush_end_io(struct request *rq, int error)
{
elv_completed_request(rq->q, rq);
blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
}
static void flush_data_end_io(struct request *rq, int error)
{
elv_completed_request(rq->q, rq);
blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
}
static void post_flush_end_io(struct request *rq, int error)
{
elv_completed_request(rq->q, rq);
blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
}
static void init_flush_request(struct request *rq, struct gendisk *disk)
{
rq->cmd_type = REQ_TYPE_FS;
rq->cmd_flags = WRITE_FLUSH;
rq->rq_disk = disk;
}
static struct request *queue_next_fseq(struct request_queue *q)
{
struct request *orig_rq = q->orig_flush_rq;
struct request *rq = &q->flush_rq;
blk_rq_init(q, rq);
switch (blk_flush_cur_seq(q)) {
case QUEUE_FSEQ_PREFLUSH:
init_flush_request(rq, orig_rq->rq_disk);
rq->end_io = pre_flush_end_io;
break;
case QUEUE_FSEQ_DATA:
init_request_from_bio(rq, orig_rq->bio);
/*
* orig_rq->rq_disk may be different from
* bio->bi_bdev->bd_disk if orig_rq got here through
* remapping drivers. Make sure rq->rq_disk points
* to the same one as orig_rq.
*/
rq->rq_disk = orig_rq->rq_disk;
rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
rq->end_io = flush_data_end_io;
break;
case QUEUE_FSEQ_POSTFLUSH:
init_flush_request(rq, orig_rq->rq_disk);
rq->end_io = post_flush_end_io;
break;
default:
BUG();
}
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
return rq;
}
struct request *blk_do_flush(struct request_queue *q, struct request *rq)
{
unsigned int fflags = q->flush_flags; /* may change, cache it */
bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
unsigned skip = 0;
/*
* Special case. If there's data but flush is not necessary,
* the request can be issued directly.
*
* Flush w/o data should be able to be issued directly too but
* currently some drivers assume that rq->bio contains
* non-zero data if it isn't NULL and empty FLUSH requests
* getting here usually have bio's without data.
*/
if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
rq->cmd_flags &= ~REQ_FLUSH;
if (!has_fua)
rq->cmd_flags &= ~REQ_FUA;
return rq;
}
/*
* Sequenced flushes can't be processed in parallel. If
* another one is already in progress, queue for later
* processing.
*/
if (q->flush_seq) {
list_move_tail(&rq->queuelist, &q->pending_flushes);
return NULL;
}
/*
* Start a new flush sequence
*/
q->flush_err = 0;
q->flush_seq |= QUEUE_FSEQ_STARTED;
/* adjust FLUSH/FUA of the original request and stash it away */
rq->cmd_flags &= ~REQ_FLUSH;
if (!has_fua)
rq->cmd_flags &= ~REQ_FUA;
blk_dequeue_request(rq);
q->orig_flush_rq = rq;
/* skip unneded sequences and return the first one */
if (!do_preflush)
skip |= QUEUE_FSEQ_PREFLUSH;
if (!blk_rq_sectors(rq))
skip |= QUEUE_FSEQ_DATA;
if (!do_postflush)
skip |= QUEUE_FSEQ_POSTFLUSH;
return blk_flush_complete_seq(q, skip, 0);
}
static void bio_end_flush(struct bio *bio, int err)
{
if (err)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (bio->bi_private)
complete(bio->bi_private);
bio_put(bio);
}
/**
* blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for
* @gfp_mask: memory allocation flags (for bio_alloc)
* @error_sector: error sector
*
* Description:
* Issue a flush for the block device in question. Caller can supply
* room for storing the error offset in case of a flush error, if they
* wish to. If WAIT flag is not passed then caller may check only what
* request was pushed in some internal queue for later handling.
*/
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
sector_t *error_sector)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q;
struct bio *bio;
int ret = 0;
if (bdev->bd_disk == NULL)
return -ENXIO;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
/*
* some block devices may not have their queue correctly set up here
* (e.g. loop device without a backing file) and so issuing a flush
* here will panic. Ensure there is a request function before issuing
* the flush.
*/
if (!q->make_request_fn)
return -ENXIO;
bio = bio_alloc(gfp_mask, 0);
bio->bi_end_io = bio_end_flush;
bio->bi_bdev = bdev;
bio->bi_private = &wait;
bio_get(bio);
submit_bio(WRITE_FLUSH, bio);
wait_for_completion(&wait);
/*
* The driver must store the error location in ->bi_sector, if
* it supports it. For non-stacked drivers, this should be
* copied from blk_rq_pos(rq).
*/
if (error_sector)
*error_sector = bio->bi_sector;
if (!bio_flagged(bio, BIO_UPTODATE))
ret = -EIO;
bio_put(bio);
return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);
+10 -29
View File
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
int type = flags & BLKDEV_IFL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER;
int type = REQ_WRITE | REQ_DISCARD;
unsigned int max_discard_sectors;
struct bio *bio;
int ret = 0;
@@ -62,10 +61,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
max_discard_sectors &= ~(disc_sects - 1);
}
if (flags & BLKDEV_IFL_SECURE) {
if (flags & BLKDEV_DISCARD_SECURE) {
if (!blk_queue_secdiscard(q))
return -EOPNOTSUPP;
type |= DISCARD_SECURE;
type |= REQ_SECURE;
}
while (nr_sects && !ret) {
@@ -78,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio->bi_sector = sector;
bio->bi_end_io = blkdev_discard_end_io;
bio->bi_bdev = bdev;
if (flags & BLKDEV_IFL_WAIT)
bio->bi_private = &wait;
bio->bi_private = &wait;
if (nr_sects > max_discard_sectors) {
bio->bi_size = max_discard_sectors << 9;
@@ -93,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio_get(bio);
submit_bio(type, bio);
if (flags & BLKDEV_IFL_WAIT)
wait_for_completion(&wait);
wait_for_completion(&wait);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
@@ -140,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @flags: BLKDEV_IFL_* flags to control behaviour
*
* Description:
* Generate and issue number of bios with zerofiled pages.
@@ -149,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err)
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
sector_t nr_sects, gfp_t gfp_mask)
{
int ret;
struct bio *bio;
@@ -162,12 +158,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
bb.wait = &wait;
bb.end_io = NULL;
if (flags & BLKDEV_IFL_BARRIER) {
/* issue async barrier before the data */
ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
if (ret)
return ret;
}
submit:
ret = 0;
while (nr_sects != 0) {
@@ -181,8 +171,7 @@ submit:
bio->bi_sector = sector;
bio->bi_bdev = bdev;
bio->bi_end_io = bio_batch_end_io;
if (flags & BLKDEV_IFL_WAIT)
bio->bi_private = &bb;
bio->bi_private = &bb;
while (nr_sects != 0) {
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -199,18 +188,10 @@ submit:
issued++;
submit_bio(WRITE, bio);
}
/*
* When all data bios are in flight. Send final barrier if requeted.
*/
if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
flags & BLKDEV_IFL_WAIT);
if (flags & BLKDEV_IFL_WAIT)
/* Wait for bios in-flight */
while ( issued != atomic_read(&bb.done))
wait_for_completion(&wait);
/* Wait for bios in-flight */
while (issued != atomic_read(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
/* One of bios in the batch was completed with error.*/
+20
View File
@@ -792,6 +792,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
}
EXPORT_SYMBOL(blk_queue_update_dma_alignment);
/**
* blk_queue_flush - configure queue's cache flush capability
* @q: the request queue for the device
* @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
*
* Tell block layer cache flush capability of @q. If it supports
* flushing, REQ_FLUSH should be set. If it supports bypassing
* write cache for individual writes, REQ_FUA should be set.
*/
void blk_queue_flush(struct request_queue *q, unsigned int flush)
{
WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
flush &= ~REQ_FUA;
q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
}
EXPORT_SYMBOL_GPL(blk_queue_flush);
static int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
+7 -1
View File
@@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq)
*/
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
struct request *blk_do_flush(struct request_queue *q, struct request *rq);
static inline struct request *__elv_next_request(struct request_queue *q)
{
struct request *rq;
@@ -58,7 +60,11 @@ static inline struct request *__elv_next_request(struct request_queue *q)
while (1) {
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
if (blk_do_ordered(q, &rq))
if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
rq == &q->flush_rq)
return rq;
rq = blk_do_flush(q, rq);
if (rq)
return rq;
}
+9 -70
View File
@@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q)
void elv_insert(struct request_queue *q, struct request *rq, int where)
{
struct list_head *pos;
unsigned ordseq;
int unplug_it = 1;
trace_block_rq_insert(q, rq);
@@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
rq->q = q;
switch (where) {
case ELEVATOR_INSERT_REQUEUE:
/*
* Most requeues happen because of a busy condition,
* don't force unplug of the queue for that case.
* Clear unplug_it and fall through.
*/
unplug_it = 0;
case ELEVATOR_INSERT_FRONT:
rq->cmd_flags |= REQ_SOFTBARRIER;
list_add(&rq->queuelist, &q->queue_head);
break;
@@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
q->elevator->ops->elevator_add_req_fn(q, rq);
break;
case ELEVATOR_INSERT_REQUEUE:
/*
* If ordered flush isn't in progress, we do front
* insertion; otherwise, requests should be requeued
* in ordseq order.
*/
rq->cmd_flags |= REQ_SOFTBARRIER;
/*
* Most requeues happen because of a busy condition,
* don't force unplug of the queue for that case.
*/
unplug_it = 0;
if (q->ordseq == 0) {
list_add(&rq->queuelist, &q->queue_head);
break;
}
ordseq = blk_ordered_req_seq(rq);
list_for_each(pos, &q->queue_head) {
struct request *pos_rq = list_entry_rq(pos);
if (ordseq <= blk_ordered_req_seq(pos_rq))
break;
}
list_add_tail(&rq->queuelist, pos);
break;
default:
printk(KERN_ERR "%s: bad insertion point %d\n",
__func__, where);
@@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
int plug)
{
if (q->ordcolor)
rq->cmd_flags |= REQ_ORDERED_COLOR;
if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
/*
* toggle ordered color
*/
if (rq->cmd_flags & REQ_HARDBARRIER)
q->ordcolor ^= 1;
/*
* barriers implicitly indicate back insertion
*/
if (where == ELEVATOR_INSERT_SORT)
where = ELEVATOR_INSERT_BACK;
/*
* this request is scheduling boundary, update
* end_sector
*/
/* barriers are scheduling boundary, update end_sector */
if (rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD)) {
q->end_sector = rq_end_sector(rq);
@@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
e->ops->elevator_completed_req_fn)
e->ops->elevator_completed_req_fn(q, rq);
}
/*
* Check if the queue is waiting for fs requests to be
* drained for flush sequence.
*/
if (unlikely(q->ordseq)) {
struct request *next = NULL;
if (!list_empty(&q->queue_head))
next = list_entry_rq(q->queue_head.next);
if (!queue_in_flight(q) &&
blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
(!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
__blk_run_queue(q);
}
}
}
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
+2 -2
View File
@@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev)
static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
uint64_t len, int secure)
{
unsigned long flags = BLKDEV_IFL_WAIT;
unsigned long flags = 0;
if (start & 511)
return -EINVAL;
@@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
if (start + len > (bdev->bd_inode->i_size >> 9))
return -EINVAL;
if (secure)
flags |= BLKDEV_IFL_SECURE;
flags |= BLKDEV_DISCARD_SECURE;
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
}