Merge branch 'blktrace' of git://brick.kernel.dk/data/git/linux-2.6-block

* 'blktrace' of git://brick.kernel.dk/data/git/linux-2.6-block:
  [PATCH] Block queue IO tracing support (blktrace) as of 2006-03-23
  [PATCH] relay: consolidate sendfile() and read() code
  [PATCH] relay: add sendfile() support
  [PATCH] relay: migrate from relayfs to a generic relay API
This commit is contained in:
Linus Torvalds
2006-03-23 16:24:24 -08:00
29 changed files with 2221 additions and 1293 deletions
+12
View File
@@ -11,4 +11,16 @@ config LBD
your machine, or if you want to have a raid or loopback device
bigger than 2TB. Otherwise say N.
config BLK_DEV_IO_TRACE
bool "Support for tracing block io actions"
select RELAY
select DEBUG_FS
help
Say Y here, if you want to be able to trace the block layer actions
on a given queue. Tracing allows you to see any traffic happening
on a block device queue. For more information (and the user space
support tools needed), fetch the blktrace app from:
git://brick.kernel.dk/data/git/blktrace.git
source block/Kconfig.iosched
+2
View File
@@ -8,3 +8,5 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+538
View File
File diff suppressed because it is too large Load Diff
+4
View File
@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace_api.h>
#include <asm/uaccess.h>
@@ -333,6 +334,8 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
struct list_head *pos;
unsigned ordseq;
blk_add_trace_rq(q, rq, BLK_TA_INSERT);
rq->q = q;
switch (where) {
@@ -499,6 +502,7 @@ struct request *elv_next_request(request_queue_t *q)
* not be passed by new incoming requests
*/
rq->flags |= REQ_STARTED;
blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
}
if (!q->boundary_rq || q->boundary_rq == rq) {
+6
View File
@@ -5,6 +5,7 @@
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
#include <linux/blktrace_api.h>
#include <asm/uaccess.h>
static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
@@ -189,6 +190,11 @@ static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev,
return put_ulong(arg, bdev->bd_inode->i_size >> 9);
case BLKGETSIZE64:
return put_u64(arg, bdev->bd_inode->i_size);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, (char __user *) arg);
}
return -ENOIOCTLCMD;
}
+42 -2
View File
@@ -28,6 +28,7 @@
#include <linux/writeback.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/blktrace_api.h>
/*
* for max sense size
@@ -1556,8 +1557,10 @@ void blk_plug_device(request_queue_t *q)
if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
return;
if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
}
}
EXPORT_SYMBOL(blk_plug_device);
@@ -1621,14 +1624,21 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
/*
* devices don't necessarily have an ->unplug_fn defined
*/
if (q->unplug_fn)
if (q->unplug_fn) {
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
q->unplug_fn(q);
}
}
static void blk_unplug_work(void *data)
{
request_queue_t *q = data;
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
q->unplug_fn(q);
}
@@ -1636,6 +1646,9 @@ static void blk_unplug_timeout(unsigned long data)
{
request_queue_t *q = (request_queue_t *)data;
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
kblockd_schedule_work(&q->unplug_work);
}
@@ -1753,6 +1766,9 @@ static void blk_release_queue(struct kobject *kobj)
if (q->queue_tags)
__blk_queue_free_tags(q);
if (q->blk_trace)
blk_trace_shutdown(q);
kmem_cache_free(requestq_cachep, q);
}
@@ -2129,6 +2145,8 @@ rq_starved:
rq_init(q, rq);
rq->rl = rl;
blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
out:
return rq;
}
@@ -2157,6 +2175,8 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
if (!rq) {
struct io_context *ioc;
blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
io_schedule();
@@ -2210,6 +2230,8 @@ EXPORT_SYMBOL(blk_get_request);
*/
void blk_requeue_request(request_queue_t *q, struct request *rq)
{
blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
if (blk_rq_tagged(rq))
blk_queue_end_tag(q, rq);
@@ -2844,6 +2866,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
if (!q->back_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
req->biotail->bi_next = bio;
req->biotail = bio;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
@@ -2859,6 +2883,8 @@ static int __make_request(request_queue_t *q, struct bio *bio)
if (!q->front_merge_fn(q, req, bio))
break;
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
bio->bi_next = req->bio;
req->bio = bio;
@@ -2976,6 +3002,7 @@ void generic_make_request(struct bio *bio)
request_queue_t *q;
sector_t maxsector;
int ret, nr_sectors = bio_sectors(bio);
dev_t old_dev;
might_sleep();
/* Test device or partition size, when known. */
@@ -3002,6 +3029,8 @@ void generic_make_request(struct bio *bio)
* NOTE: we don't repeat the blk_size check for each new device.
* Stacking drivers are expected to know what they are doing.
*/
maxsector = -1;
old_dev = 0;
do {
char b[BDEVNAME_SIZE];
@@ -3034,6 +3063,15 @@ end_io:
*/
blk_partition_remap(bio);
if (maxsector != -1)
blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
maxsector);
blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
maxsector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
ret = q->make_request_fn(q, bio);
} while (ret);
}
@@ -3153,6 +3191,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
int total_bytes, bio_nbytes, error, next_idx = 0;
struct bio *bio;
blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
/*
* extend uptodate bool to allow < 0 value to be direct io error
*/
+2
View File
@@ -38,6 +38,7 @@
#include <linux/hdreg.h>
#include <linux/spinlock.h>
#include <linux/compat.h>
#include <linux/blktrace_api.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -2331,6 +2332,7 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd,
cmd->rq->completion_data = cmd;
cmd->rq->errors = status;
blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE);
blk_complete_request(cmd->rq);
}
+12 -1
View File
@@ -17,6 +17,7 @@
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/blktrace_api.h>
static const char *_name = DM_NAME;
@@ -334,6 +335,8 @@ static void dec_pending(struct dm_io *io, int error)
/* nudge anyone waiting on suspend queue */
wake_up(&io->md->wait);
blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE);
bio_endio(io->bio, io->bio->bi_size, io->error);
free_io(io->md, io);
}
@@ -392,6 +395,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
struct target_io *tio)
{
int r;
sector_t sector;
/*
* Sanity checks.
@@ -407,10 +411,17 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
* this io.
*/
atomic_inc(&tio->io->io_count);
sector = clone->bi_sector;
r = ti->type->map(ti, clone, &tio->info);
if (r > 0)
if (r > 0) {
/* the bio has been remapped so dispatch it */
blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev, sector,
clone->bi_sector);
generic_make_request(clone);
}
else if (r < 0) {
/* error the io and bail out */
-12
View File
@@ -859,18 +859,6 @@ config RAMFS
To compile this as a module, choose M here: the module will be called
ramfs.
config RELAYFS_FS
tristate "Relayfs file system support"
---help---
Relayfs is a high-speed data relay filesystem designed to provide
an efficient mechanism for tools and facilities to relay large
amounts of data from kernel space to user space.
To compile this code as a module, choose M here: the module will be
called relayfs.
If unsure, say N.
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
depends on EXPERIMENTAL
-1
View File
@@ -91,7 +91,6 @@ obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_RELAYFS_FS) += relayfs/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_JFS_FS) += jfs/
obj-$(CONFIG_XFS_FS) += xfs/
+4
View File
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/blktrace_api.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#define BIO_POOL_SIZE 256
@@ -1095,6 +1096,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
if (!bp)
return bp;
blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
bi->bi_sector + first_sectors);
BUG_ON(bi->bi_vcnt != 1);
BUG_ON(bi->bi_idx != 0);
atomic_set(&bp->cnt, 3);
+1
View File
@@ -72,6 +72,7 @@
#include <linux/i2c-dev.h>
#include <linux/wireless.h>
#include <linux/atalk.h>
#include <linux/blktrace_api.h>
#include <net/sock.h> /* siocdevprivate_ioctl */
#include <net/bluetooth/bluetooth.h>
-4
View File
@@ -1,4 +0,0 @@
obj-$(CONFIG_RELAYFS_FS) += relayfs.o
relayfs-y := relay.o inode.o buffers.o
-190
View File
@@ -1,190 +0,0 @@
/*
* RelayFS buffer management code.
*
* Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
* Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
*
* This file is released under the GPL.
*/
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/relayfs_fs.h>
#include "relay.h"
#include "buffers.h"
/*
* close() vm_op implementation for relayfs file mapping.
*/
static void relay_file_mmap_close(struct vm_area_struct *vma)
{
struct rchan_buf *buf = vma->vm_private_data;
buf->chan->cb->buf_unmapped(buf, vma->vm_file);
}
/*
* nopage() vm_op implementation for relayfs file mapping.
*/
static struct page *relay_buf_nopage(struct vm_area_struct *vma,
unsigned long address,
int *type)
{
struct page *page;
struct rchan_buf *buf = vma->vm_private_data;
unsigned long offset = address - vma->vm_start;
if (address > vma->vm_end)
return NOPAGE_SIGBUS; /* Disallow mremap */
if (!buf)
return NOPAGE_OOM;
page = vmalloc_to_page(buf->start + offset);
if (!page)
return NOPAGE_OOM;
get_page(page);
if (type)
*type = VM_FAULT_MINOR;
return page;
}
/*
* vm_ops for relay file mappings.
*/
static struct vm_operations_struct relay_file_mmap_ops = {
.nopage = relay_buf_nopage,
.close = relay_file_mmap_close,
};
/**
* relay_mmap_buf: - mmap channel buffer to process address space
* @buf: relay channel buffer
* @vma: vm_area_struct describing memory to be mapped
*
* Returns 0 if ok, negative on error
*
* Caller should already have grabbed mmap_sem.
*/
int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma)
{
unsigned long length = vma->vm_end - vma->vm_start;
struct file *filp = vma->vm_file;
if (!buf)
return -EBADF;
if (length != (unsigned long)buf->chan->alloc_size)
return -EINVAL;
vma->vm_ops = &relay_file_mmap_ops;
vma->vm_private_data = buf;
buf->chan->cb->buf_mapped(buf, filp);
return 0;
}
/**
* relay_alloc_buf - allocate a channel buffer
* @buf: the buffer struct
* @size: total size of the buffer
*
* Returns a pointer to the resulting buffer, NULL if unsuccessful
*/
static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size)
{
void *mem;
unsigned int i, j, n_pages;
size = PAGE_ALIGN(size);
n_pages = size >> PAGE_SHIFT;
buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL);
if (!buf->page_array)
return NULL;
for (i = 0; i < n_pages; i++) {
buf->page_array[i] = alloc_page(GFP_KERNEL);
if (unlikely(!buf->page_array[i]))
goto depopulate;
}
mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
if (!mem)
goto depopulate;
memset(mem, 0, size);
buf->page_count = n_pages;
return mem;
depopulate:
for (j = 0; j < i; j++)
__free_page(buf->page_array[j]);
kfree(buf->page_array);
return NULL;
}
/**
* relay_create_buf - allocate and initialize a channel buffer
* @alloc_size: size of the buffer to allocate
* @n_subbufs: number of sub-buffers in the channel
*
* Returns channel buffer if successful, NULL otherwise
*/
struct rchan_buf *relay_create_buf(struct rchan *chan)
{
struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL);
if (!buf)
return NULL;
buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL);
if (!buf->padding)
goto free_buf;
buf->start = relay_alloc_buf(buf, chan->alloc_size);
if (!buf->start)
goto free_buf;
buf->chan = chan;
kref_get(&buf->chan->kref);
return buf;
free_buf:
kfree(buf->padding);
kfree(buf);
return NULL;
}
/**
* relay_destroy_buf - destroy an rchan_buf struct and associated buffer
* @buf: the buffer struct
*/
void relay_destroy_buf(struct rchan_buf *buf)
{
struct rchan *chan = buf->chan;
unsigned int i;
if (likely(buf->start)) {
vunmap(buf->start);
for (i = 0; i < buf->page_count; i++)
__free_page(buf->page_array[i]);
kfree(buf->page_array);
}
kfree(buf->padding);
kfree(buf);
kref_put(&chan->kref, relay_destroy_channel);
}
/**
* relay_remove_buf - remove a channel buffer
*
* Removes the file from the relayfs fileystem, which also frees the
* rchan_buf_struct and the channel buffer. Should only be called from
* kref_put().
*/
void relay_remove_buf(struct kref *kref)
{
struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
buf->chan->cb->remove_buf_file(buf->dentry);
relay_destroy_buf(buf);
}
-12
View File
@@ -1,12 +0,0 @@
#ifndef _BUFFERS_H
#define _BUFFERS_H
/* This inspired by rtai/shmem */
#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE
extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma);
extern struct rchan_buf *relay_create_buf(struct rchan *chan);
extern void relay_destroy_buf(struct rchan_buf *buf);
extern void relay_remove_buf(struct kref *kref);
#endif/* _BUFFERS_H */
-581
View File
File diff suppressed because it is too large Load Diff
-482
View File
@@ -1,482 +0,0 @@
/*
* Public API and common code for RelayFS.
*
* See Documentation/filesystems/relayfs.txt for an overview of relayfs.
*
* Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
* Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
*
* This file is released under the GPL.
*/
#include <linux/errno.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/relayfs_fs.h>
#include "relay.h"
#include "buffers.h"
/**
* relay_buf_empty - boolean, is the channel buffer empty?
* @buf: channel buffer
*
* Returns 1 if the buffer is empty, 0 otherwise.
*/
int relay_buf_empty(struct rchan_buf *buf)
{
return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1;
}
/**
* relay_buf_full - boolean, is the channel buffer full?
* @buf: channel buffer
*
* Returns 1 if the buffer is full, 0 otherwise.
*/
int relay_buf_full(struct rchan_buf *buf)
{
size_t ready = buf->subbufs_produced - buf->subbufs_consumed;
return (ready >= buf->chan->n_subbufs) ? 1 : 0;
}
/*
* High-level relayfs kernel API and associated functions.
*/
/*
* rchan_callback implementations defining default channel behavior. Used
* in place of corresponding NULL values in client callback struct.
*/
/*
* subbuf_start() default callback. Does nothing.
*/
static int subbuf_start_default_callback (struct rchan_buf *buf,
void *subbuf,
void *prev_subbuf,
size_t prev_padding)
{
if (relay_buf_full(buf))
return 0;
return 1;
}
/*
* buf_mapped() default callback. Does nothing.
*/
static void buf_mapped_default_callback(struct rchan_buf *buf,
struct file *filp)
{
}
/*
* buf_unmapped() default callback. Does nothing.
*/
static void buf_unmapped_default_callback(struct rchan_buf *buf,
struct file *filp)
{
}
/*
* create_buf_file_create() default callback. Creates file to represent buf.
*/
static struct dentry *create_buf_file_default_callback(const char *filename,
struct dentry *parent,
int mode,
struct rchan_buf *buf,
int *is_global)
{
return relayfs_create_file(filename, parent, mode,
&relay_file_operations, buf);
}
/*
* remove_buf_file() default callback. Removes file representing relay buffer.
*/
static int remove_buf_file_default_callback(struct dentry *dentry)
{
return relayfs_remove(dentry);
}
/* relay channel default callbacks */
static struct rchan_callbacks default_channel_callbacks = {
.subbuf_start = subbuf_start_default_callback,
.buf_mapped = buf_mapped_default_callback,
.buf_unmapped = buf_unmapped_default_callback,
.create_buf_file = create_buf_file_default_callback,
.remove_buf_file = remove_buf_file_default_callback,
};
/**
* wakeup_readers - wake up readers waiting on a channel
* @private: the channel buffer
*
* This is the work function used to defer reader waking. The
* reason waking is deferred is that calling directly from write
* causes problems if you're writing from say the scheduler.
*/
static void wakeup_readers(void *private)
{
struct rchan_buf *buf = private;
wake_up_interruptible(&buf->read_wait);
}
/**
* __relay_reset - reset a channel buffer
* @buf: the channel buffer
* @init: 1 if this is a first-time initialization
*
* See relay_reset for description of effect.
*/
static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
{
size_t i;
if (init) {
init_waitqueue_head(&buf->read_wait);
kref_init(&buf->kref);
INIT_WORK(&buf->wake_readers, NULL, NULL);
} else {
cancel_delayed_work(&buf->wake_readers);
flush_scheduled_work();
}
buf->subbufs_produced = 0;
buf->subbufs_consumed = 0;
buf->bytes_consumed = 0;
buf->finalized = 0;
buf->data = buf->start;
buf->offset = 0;
for (i = 0; i < buf->chan->n_subbufs; i++)
buf->padding[i] = 0;
buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0);
}
/**
* relay_reset - reset the channel
* @chan: the channel
*
* This has the effect of erasing all data from all channel buffers
* and restarting the channel in its initial state. The buffers
* are not freed, so any mappings are still in effect.
*
* NOTE: Care should be taken that the channel isn't actually
* being used by anything when this call is made.
*/
void relay_reset(struct rchan *chan)
{
unsigned int i;
struct rchan_buf *prev = NULL;
if (!chan)
return;
for (i = 0; i < NR_CPUS; i++) {
if (!chan->buf[i] || chan->buf[i] == prev)
break;
__relay_reset(chan->buf[i], 0);
prev = chan->buf[i];
}
}
/**
* relay_open_buf - create a new channel buffer in relayfs
*
* Internal - used by relay_open().
*/
static struct rchan_buf *relay_open_buf(struct rchan *chan,
const char *filename,
struct dentry *parent,
int *is_global)
{
struct rchan_buf *buf;
struct dentry *dentry;
if (*is_global)
return chan->buf[0];
buf = relay_create_buf(chan);
if (!buf)
return NULL;
/* Create file in fs */
dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
buf, is_global);
if (!dentry) {
relay_destroy_buf(buf);
return NULL;
}
buf->dentry = dentry;
__relay_reset(buf, 1);
return buf;
}
/**
* relay_close_buf - close a channel buffer
* @buf: channel buffer
*
* Marks the buffer finalized and restores the default callbacks.
* The channel buffer and channel buffer data structure are then freed
* automatically when the last reference is given up.
*/
static inline void relay_close_buf(struct rchan_buf *buf)
{
buf->finalized = 1;
buf->chan->cb = &default_channel_callbacks;
cancel_delayed_work(&buf->wake_readers);
flush_scheduled_work();
kref_put(&buf->kref, relay_remove_buf);
}
static inline void setup_callbacks(struct rchan *chan,
struct rchan_callbacks *cb)
{
if (!cb) {
chan->cb = &default_channel_callbacks;
return;
}
if (!cb->subbuf_start)
cb->subbuf_start = subbuf_start_default_callback;
if (!cb->buf_mapped)
cb->buf_mapped = buf_mapped_default_callback;
if (!cb->buf_unmapped)
cb->buf_unmapped = buf_unmapped_default_callback;
if (!cb->create_buf_file)
cb->create_buf_file = create_buf_file_default_callback;
if (!cb->remove_buf_file)
cb->remove_buf_file = remove_buf_file_default_callback;
chan->cb = cb;
}
/**
* relay_open - create a new relayfs channel
* @base_filename: base name of files to create
* @parent: dentry of parent directory, NULL for root directory
* @subbuf_size: size of sub-buffers
* @n_subbufs: number of sub-buffers
* @cb: client callback functions
*
* Returns channel pointer if successful, NULL otherwise.
*
* Creates a channel buffer for each cpu using the sizes and
* attributes specified. The created channel buffer files
* will be named base_filename0...base_filenameN-1. File
* permissions will be S_IRUSR.
*/
struct rchan *relay_open(const char *base_filename,
struct dentry *parent,
size_t subbuf_size,
size_t n_subbufs,
struct rchan_callbacks *cb)
{
unsigned int i;
struct rchan *chan;
char *tmpname;
int is_global = 0;
if (!base_filename)
return NULL;
if (!(subbuf_size && n_subbufs))
return NULL;
chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL);
if (!chan)
return NULL;
chan->version = RELAYFS_CHANNEL_VERSION;
chan->n_subbufs = n_subbufs;
chan->subbuf_size = subbuf_size;
chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
setup_callbacks(chan, cb);
kref_init(&chan->kref);
tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!tmpname)
goto free_chan;
for_each_online_cpu(i) {
sprintf(tmpname, "%s%d", base_filename, i);
chan->buf[i] = relay_open_buf(chan, tmpname, parent,
&is_global);
chan->buf[i]->cpu = i;
if (!chan->buf[i])
goto free_bufs;
}
kfree(tmpname);
return chan;
free_bufs:
for (i = 0; i < NR_CPUS; i++) {
if (!chan->buf[i])
break;
relay_close_buf(chan->buf[i]);
if (is_global)
break;
}
kfree(tmpname);
free_chan:
kref_put(&chan->kref, relay_destroy_channel);
return NULL;
}
/**
* relay_switch_subbuf - switch to a new sub-buffer
* @buf: channel buffer
* @length: size of current event
*
* Returns either the length passed in or 0 if full.
* Performs sub-buffer-switch tasks such as invoking callbacks,
* updating padding counts, waking up readers, etc.
*/
size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
{
void *old, *new;
size_t old_subbuf, new_subbuf;
if (unlikely(length > buf->chan->subbuf_size))
goto toobig;
if (buf->offset != buf->chan->subbuf_size + 1) {
buf->prev_padding = buf->chan->subbuf_size - buf->offset;
old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
buf->padding[old_subbuf] = buf->prev_padding;
buf->subbufs_produced++;
if (waitqueue_active(&buf->read_wait)) {
PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf);
schedule_delayed_work(&buf->wake_readers, 1);
}
}
old = buf->data;
new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs;
new = buf->start + new_subbuf * buf->chan->subbuf_size;
buf->offset = 0;
if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) {
buf->offset = buf->chan->subbuf_size + 1;
return 0;
}
buf->data = new;
buf->padding[new_subbuf] = 0;
if (unlikely(length + buf->offset > buf->chan->subbuf_size))
goto toobig;
return length;
toobig:
buf->chan->last_toobig = length;
return 0;
}
/**
* relay_subbufs_consumed - update the buffer's sub-buffers-consumed count
* @chan: the channel
* @cpu: the cpu associated with the channel buffer to update
* @subbufs_consumed: number of sub-buffers to add to current buf's count
*
* Adds to the channel buffer's consumed sub-buffer count.
* subbufs_consumed should be the number of sub-buffers newly consumed,
* not the total consumed.
*
* NOTE: kernel clients don't need to call this function if the channel
* mode is 'overwrite'.
*/
void relay_subbufs_consumed(struct rchan *chan,
unsigned int cpu,
size_t subbufs_consumed)
{
struct rchan_buf *buf;
if (!chan)
return;
if (cpu >= NR_CPUS || !chan->buf[cpu])
return;
buf = chan->buf[cpu];
buf->subbufs_consumed += subbufs_consumed;
if (buf->subbufs_consumed > buf->subbufs_produced)
buf->subbufs_consumed = buf->subbufs_produced;
}
/**
* relay_destroy_channel - free the channel struct
*
* Should only be called from kref_put().
*/
void relay_destroy_channel(struct kref *kref)
{
struct rchan *chan = container_of(kref, struct rchan, kref);
kfree(chan);
}
/**
* relay_close - close the channel
* @chan: the channel
*
* Closes all channel buffers and frees the channel.
*/
void relay_close(struct rchan *chan)
{
unsigned int i;
struct rchan_buf *prev = NULL;
if (!chan)
return;
for (i = 0; i < NR_CPUS; i++) {
if (!chan->buf[i] || chan->buf[i] == prev)
break;
relay_close_buf(chan->buf[i]);
prev = chan->buf[i];
}
if (chan->last_toobig)
printk(KERN_WARNING "relayfs: one or more items not logged "
"[item size (%Zd) > sub-buffer size (%Zd)]\n",
chan->last_toobig, chan->subbuf_size);
kref_put(&chan->kref, relay_destroy_channel);
}
/**
* relay_flush - close the channel
* @chan: the channel
*
* Flushes all channel buffers i.e. forces buffer switch.
*/
void relay_flush(struct rchan *chan)
{
unsigned int i;
struct rchan_buf *prev = NULL;
if (!chan)
return;
for (i = 0; i < NR_CPUS; i++) {
if (!chan->buf[i] || chan->buf[i] == prev)
break;
relay_switch_subbuf(chan->buf[i], 0);
prev = chan->buf[i];
}
}
EXPORT_SYMBOL_GPL(relay_open);
EXPORT_SYMBOL_GPL(relay_close);
EXPORT_SYMBOL_GPL(relay_flush);
EXPORT_SYMBOL_GPL(relay_reset);
EXPORT_SYMBOL_GPL(relay_subbufs_consumed);
EXPORT_SYMBOL_GPL(relay_switch_subbuf);
EXPORT_SYMBOL_GPL(relay_buf_full);
-8
View File
@@ -1,8 +0,0 @@
#ifndef _RELAY_H
#define _RELAY_H
extern int relayfs_remove(struct dentry *dentry);
extern int relay_buf_empty(struct rchan_buf *buf);
extern void relay_destroy_channel(struct kref *kref);
#endif /* _RELAY_H */
+3
View File
@@ -22,6 +22,7 @@ typedef struct request_queue request_queue_t;
struct elevator_queue;
typedef struct elevator_queue elevator_t;
struct request_pm_state;
struct blk_trace;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -416,6 +417,8 @@ struct request_queue
unsigned int sg_reserved_size;
int node;
struct blk_trace *blk_trace;
/*
* reserved for flush operations
*/
+277
View File
@@ -0,0 +1,277 @@
#ifndef BLKTRACE_H
#define BLKTRACE_H
#include <linux/config.h>
#include <linux/blkdev.h>
#include <linux/relay.h>
/*
* Trace categories
*/
enum blktrace_cat {
BLK_TC_READ = 1 << 0, /* reads */
BLK_TC_WRITE = 1 << 1, /* writes */
BLK_TC_BARRIER = 1 << 2, /* barrier */
BLK_TC_SYNC = 1 << 3, /* barrier */
BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
BLK_TC_REQUEUE = 1 << 5, /* requeueing */
BLK_TC_ISSUE = 1 << 6, /* issue */
BLK_TC_COMPLETE = 1 << 7, /* completions */
BLK_TC_FS = 1 << 8, /* fs requests */
BLK_TC_PC = 1 << 9, /* pc requests */
BLK_TC_NOTIFY = 1 << 10, /* special message */
BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
};
#define BLK_TC_SHIFT (16)
#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
/*
* Basic trace actions
*/
enum blktrace_act {
__BLK_TA_QUEUE = 1, /* queued */
__BLK_TA_BACKMERGE, /* back merged to existing rq */
__BLK_TA_FRONTMERGE, /* front merge to existing rq */
__BLK_TA_GETRQ, /* allocated new request */
__BLK_TA_SLEEPRQ, /* sleeping on rq allocation */
__BLK_TA_REQUEUE, /* request requeued */
__BLK_TA_ISSUE, /* sent to driver */
__BLK_TA_COMPLETE, /* completed by driver */
__BLK_TA_PLUG, /* queue was plugged */
__BLK_TA_UNPLUG_IO, /* queue was unplugged by io */
__BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
__BLK_TA_INSERT, /* insert request */
__BLK_TA_SPLIT, /* bio was split */
__BLK_TA_BOUNCE, /* bio was bounced */
__BLK_TA_REMAP, /* bio was remapped */
};
/*
* Trace actions in full. Additionally, read or write is masked
*/
#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
#define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_SPLIT (__BLK_TA_SPLIT)
#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_IO_TRACE_MAGIC 0x65617400
#define BLK_IO_TRACE_VERSION 0x07
/*
* The trace itself
*/
struct blk_io_trace {
u32 magic; /* MAGIC << 8 | version */
u32 sequence; /* event number */
u64 time; /* in microseconds */
u64 sector; /* disk offset */
u32 bytes; /* transfer length */
u32 action; /* what happened */
u32 pid; /* who did it */
u32 device; /* device number */
u32 cpu; /* on what cpu did it happen */
u16 error; /* completion error */
u16 pdu_len; /* length of data after this trace */
};
/*
* The remap event
*/
struct blk_io_trace_remap {
u32 device;
u32 __pad;
u64 sector;
};
enum {
Blktrace_setup = 1,
Blktrace_running,
Blktrace_stopped,
};
struct blk_trace {
int trace_state;
struct rchan *rchan;
unsigned long *sequence;
u16 act_mask;
u64 start_lba;
u64 end_lba;
u32 pid;
u32 dev;
struct dentry *dir;
struct dentry *dropped_file;
atomic_t dropped;
};
/*
* User setup structure passed with BLKTRACESTART
*/
struct blk_user_trace_setup {
char name[BDEVNAME_SIZE]; /* output */
u16 act_mask; /* input */
u32 buf_size; /* input */
u32 buf_nr; /* input */
u64 start_lba;
u64 end_lba;
u32 pid;
};
#if defined(CONFIG_BLK_DEV_IO_TRACE)
extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
extern void blk_trace_shutdown(request_queue_t *);
extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
/**
* blk_add_trace_rq - Add a trace for a request oriented action
* @q: queue the io is for
* @rq: the source request
* @what: the action
*
* Description:
* Records an action against a request. Will log the bio offset + size.
*
**/
static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
u32 what)
{
struct blk_trace *bt = q->blk_trace;
int rw = rq->flags & 0x07;
if (likely(!bt))
return;
if (blk_pc_request(rq)) {
what |= BLK_TC_ACT(BLK_TC_PC);
__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
}
}
/**
* blk_add_trace_bio - Add a trace for a bio oriented action
* @q: queue the io is for
* @bio: the source bio
* @what: the action
*
* Description:
* Records an action against a bio. Will log the bio offset + size.
*
**/
static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
u32 what)
{
struct blk_trace *bt = q->blk_trace;
if (likely(!bt))
return;
__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
}
/**
* blk_add_trace_generic - Add a trace for a generic action
* @q: queue the io is for
* @bio: the source bio
* @rw: the data direction
* @what: the action
*
* Description:
* Records a simple trace
*
**/
static inline void blk_add_trace_generic(struct request_queue *q,
struct bio *bio, int rw, u32 what)
{
struct blk_trace *bt = q->blk_trace;
if (likely(!bt))
return;
if (bio)
blk_add_trace_bio(q, bio, what);
else
__blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
}
/**
* blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
* @q: queue the io is for
* @what: the action
* @bio: the source bio
* @pdu: the integer payload
*
* Description:
* Adds a trace with some integer payload. This might be an unplug
* option given as the action, with the depth at unplug time given
* as the payload
*
**/
static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
struct bio *bio, unsigned int pdu)
{
struct blk_trace *bt = q->blk_trace;
u64 rpdu = cpu_to_be64(pdu);
if (likely(!bt))
return;
if (bio)
__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
else
__blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
}
/**
* blk_add_trace_remap - Add a trace for a remap operation
* @q: queue the io is for
* @bio: the source bio
* @dev: target device
* @from: source sector
* @to: target sector
*
* Description:
* Device mapper or raid target sometimes need to split a bio because
* it spans a stripe (or similar). Add a trace for that action.
*
**/
static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
dev_t dev, sector_t from, sector_t to)
{
struct blk_trace *bt = q->blk_trace;
struct blk_io_trace_remap r;
if (likely(!bt))
return;
r.device = cpu_to_be32(dev);
r.sector = cpu_to_be64(to);
__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
}
#else /* !CONFIG_BLK_DEV_IO_TRACE */
#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
#define blk_trace_shutdown(q) do { } while (0)
#define blk_add_trace_rq(q, rq, what) do { } while (0)
#define blk_add_trace_bio(q, rq, what) do { } while (0)
#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
#endif /* CONFIG_BLK_DEV_IO_TRACE */
#endif

Some files were not shown because too many files have changed in this diff Show More