Merge branch 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block: (149 commits)
  block: make sure that REQ_* types are seen even with CONFIG_BLOCK=n
  xen-blkfront: fix missing out label
  blkdev: fix blkdev_issue_zeroout return value
  block: update request stacking methods to support discards
  block: fix missing export of blk_types.h
  writeback: fix bad _bh spinlock nesting
  drbd: revert "delay probes", feature is being re-implemented differently
  drbd: Initialize all members of sync_conf to their defaults [Bugz 315]
  drbd: Disable delay probes for the upcomming release
  writeback: cleanup bdi_register
  writeback: add new tracepoints
  writeback: remove unnecessary init_timer call
  writeback: optimize periodic bdi thread wakeups
  writeback: prevent unnecessary bdi threads wakeups
  writeback: move bdi threads exiting logic to the forker thread
  writeback: restructure bdi forker loop a little
  writeback: move last_active to bdi
  writeback: do not remove bdi from bdi_list
  writeback: simplify bdi code a little
  writeback: do not lose wake-ups in bdi threads
  ...

Fixed up pretty trivial conflicts in drivers/block/virtio_blk.c and
drivers/scsi/scsi_error.c as per Jens.
This commit is contained in:
Linus Torvalds
2010-08-10 15:22:42 -07:00
154 changed files with 4309 additions and 3234 deletions
+104 -57
View File
@@ -26,15 +26,9 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include <linux/tracepoint.h>
#include "internal.h"
#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
/*
* We don't actually have pdflush, but this one is exported though /proc...
*/
int nr_pdflush_threads;
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
@@ -50,6 +44,21 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */
};
/*
* Include the creation of the trace points after defining the
* wb_writeback_work structure so that the definition remains local to this
* file.
*/
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>
#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
/*
* We don't actually have pdflush, but this one is exported though /proc...
*/
int nr_pdflush_threads;
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -65,22 +74,21 @@ int writeback_in_progress(struct backing_dev_info *bdi)
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
spin_lock(&bdi->wb_lock);
trace_writeback_queue(bdi, work);
spin_lock_bh(&bdi->wb_lock);
list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
/*
* If the default thread isn't there, make sure we add it. When
* it gets created and wakes up, we'll run this work.
*/
if (unlikely(list_empty_careful(&bdi->wb_list)))
if (bdi->wb.task) {
wake_up_process(bdi->wb.task);
} else {
/*
* The bdi thread isn't there, wake up the forker thread which
* will create and run it.
*/
trace_writeback_nothread(bdi, work);
wake_up_process(default_backing_dev_info.wb.task);
else {
struct bdi_writeback *wb = &bdi->wb;
if (wb->task)
wake_up_process(wb->task);
}
spin_unlock_bh(&bdi->wb_lock);
}
static void
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
*/
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
if (bdi->wb.task)
if (bdi->wb.task) {
trace_writeback_nowork(bdi);
wake_up_process(bdi->wb.task);
}
return;
}
@@ -643,10 +653,14 @@ static long wb_writeback(struct bdi_writeback *wb,
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
trace_wbc_writeback_start(&wbc, wb->bdi);
if (work->sb)
__writeback_inodes_sb(work->sb, wb, &wbc);
else
writeback_inodes_wb(wb, &wbc);
trace_wbc_writeback_written(&wbc, wb->bdi);
work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
@@ -674,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb,
if (!list_empty(&wb->b_more_io)) {
inode = list_entry(wb->b_more_io.prev,
struct inode, i_list);
trace_wbc_writeback_wait(&wbc, wb->bdi);
inode_wait_for_writeback(inode);
}
spin_unlock(&inode_lock);
@@ -686,17 +701,17 @@ static long wb_writeback(struct bdi_writeback *wb,
* Return the next wb_writeback_work struct that hasn't been processed yet.
*/
static struct wb_writeback_work *
get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
get_next_work_item(struct backing_dev_info *bdi)
{
struct wb_writeback_work *work = NULL;
spin_lock(&bdi->wb_lock);
spin_lock_bh(&bdi->wb_lock);
if (!list_empty(&bdi->work_list)) {
work = list_entry(bdi->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
spin_unlock(&bdi->wb_lock);
spin_unlock_bh(&bdi->wb_lock);
return work;
}
@@ -744,7 +759,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
struct wb_writeback_work *work;
long wrote = 0;
while ((work = get_next_work_item(bdi, wb)) != NULL) {
while ((work = get_next_work_item(bdi)) != NULL) {
/*
* Override sync mode, in case we must wait for completion
* because this thread is exiting now.
@@ -752,6 +767,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
if (force_wait)
work->sync_mode = WB_SYNC_ALL;
trace_writeback_exec(bdi, work);
wrote += wb_writeback(wb, work);
/*
@@ -776,47 +793,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
* Handle writeback of dirty data for the device backed by this bdi. Also
* wakes up periodically and does kupdated style flushing.
*/
int bdi_writeback_task(struct bdi_writeback *wb)
int bdi_writeback_thread(void *data)
{
unsigned long last_active = jiffies;
unsigned long wait_jiffies = -1UL;
struct bdi_writeback *wb = data;
struct backing_dev_info *bdi = wb->bdi;
long pages_written;
current->flags |= PF_FLUSHER | PF_SWAPWRITE;
set_freezable();
wb->last_active = jiffies;
/*
* Our parent may run at a different priority, just set us to normal
*/
set_user_nice(current, 0);
trace_writeback_thread_start(bdi);
while (!kthread_should_stop()) {
/*
* Remove own delayed wake-up timer, since we are already awake
* and we'll take care of the preriodic write-back.
*/
del_timer(&wb->wakeup_timer);
pages_written = wb_do_writeback(wb, 0);
if (pages_written)
last_active = jiffies;
else if (wait_jiffies != -1UL) {
unsigned long max_idle;
trace_writeback_pages_written(pages_written);
/*
* Longest period of inactivity that we tolerate. If we
* see dirty data again later, the task will get
* recreated automatically.
*/
max_idle = max(5UL * 60 * HZ, wait_jiffies);
if (time_after(jiffies, max_idle + last_active))
break;
if (pages_written)
wb->last_active = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
if (!list_empty(&bdi->work_list)) {
__set_current_state(TASK_RUNNING);
continue;
}
if (dirty_writeback_interval) {
wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
schedule_timeout_interruptible(wait_jiffies);
} else {
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty_careful(&wb->bdi->work_list) &&
!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
if (wb_has_dirty_io(wb) && dirty_writeback_interval)
schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
else {
/*
* We have nothing to do, so can go sleep without any
* timeout and save power. When a work is queued or
* something is made dirty - we will be woken up.
*/
schedule();
}
try_to_freeze();
}
/* Flush any work that raced with us exiting */
if (!list_empty(&bdi->work_list))
wb_do_writeback(wb, 1);
trace_writeback_thread_stop(bdi);
return 0;
}
/*
* Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
* the whole world.
@@ -891,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = NULL;
bool wakeup_bdi = false;
/*
* Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -944,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* reposition it (that would break b_dirty time-ordering).
*/
if (!was_dirty) {
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
struct backing_dev_info *bdi = wb->bdi;
bdi = inode_to_bdi(inode);
if (bdi_cap_writeback_dirty(bdi) &&
!test_bit(BDI_registered, &bdi->state)) {
WARN_ON(1);
printk(KERN_ERR "bdi-%s not registered\n",
bdi->name);
if (bdi_cap_writeback_dirty(bdi)) {
WARN(!test_bit(BDI_registered, &bdi->state),
"bdi-%s not registered\n", bdi->name);
/*
* If this is the first dirty inode for this
* bdi, we have to wake-up the corresponding
* bdi thread to make sure background
* write-back happens later.
*/
if (!wb_has_dirty_io(&bdi->wb))
wakeup_bdi = true;
}
inode->dirtied_when = jiffies;
list_move(&inode->i_list, &wb->b_dirty);
list_move(&inode->i_list, &bdi->wb.b_dirty);
}
}
out:
spin_unlock(&inode_lock);
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
}
EXPORT_SYMBOL(__mark_inode_dirty);