You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'fixes' into devel
This commit is contained in:
@@ -174,7 +174,7 @@
|
||||
</para>
|
||||
<programlisting>
|
||||
static struct mtd_info *board_mtd;
|
||||
static unsigned long baseaddr;
|
||||
static void __iomem *baseaddr;
|
||||
</programlisting>
|
||||
<para>
|
||||
Static example
|
||||
@@ -182,7 +182,7 @@ static unsigned long baseaddr;
|
||||
<programlisting>
|
||||
static struct mtd_info board_mtd;
|
||||
static struct nand_chip board_chip;
|
||||
static unsigned long baseaddr;
|
||||
static void __iomem *baseaddr;
|
||||
</programlisting>
|
||||
</sect1>
|
||||
<sect1 id="Partition_defines">
|
||||
@@ -283,8 +283,8 @@ int __init board_init (void)
|
||||
}
|
||||
|
||||
/* map physical address */
|
||||
baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
|
||||
if(!baseaddr){
|
||||
baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
|
||||
if (!baseaddr) {
|
||||
printk("Ioremap to access NAND chip failed\n");
|
||||
err = -EIO;
|
||||
goto out_mtd;
|
||||
@@ -316,7 +316,7 @@ int __init board_init (void)
|
||||
goto out;
|
||||
|
||||
out_ior:
|
||||
iounmap((void *)baseaddr);
|
||||
iounmap(baseaddr);
|
||||
out_mtd:
|
||||
kfree (board_mtd);
|
||||
out:
|
||||
@@ -341,7 +341,7 @@ static void __exit board_cleanup (void)
|
||||
nand_release (board_mtd);
|
||||
|
||||
/* unmap physical address */
|
||||
iounmap((void *)baseaddr);
|
||||
iounmap(baseaddr);
|
||||
|
||||
/* Free the MTD device structure */
|
||||
kfree (board_mtd);
|
||||
|
||||
@@ -157,7 +157,7 @@ For such memory, you can do things like
|
||||
* access only the 640k-1MB area, so anything else
|
||||
* has to be remapped.
|
||||
*/
|
||||
char * baseptr = ioremap(0xFC000000, 1024*1024);
|
||||
void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
|
||||
|
||||
/* write a 'A' to the offset 10 of the area */
|
||||
writeb('A',baseptr+10);
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
00-INDEX
|
||||
- This file
|
||||
as-iosched.txt
|
||||
- Anticipatory IO scheduler
|
||||
barrier.txt
|
||||
- I/O Barriers
|
||||
biodoc.txt
|
||||
|
||||
@@ -1,172 +0,0 @@
|
||||
Anticipatory IO scheduler
|
||||
-------------------------
|
||||
Nick Piggin <piggin@cyberone.com.au> 13 Sep 2003
|
||||
|
||||
Attention! Database servers, especially those using "TCQ" disks should
|
||||
investigate performance with the 'deadline' IO scheduler. Any system with high
|
||||
disk performance requirements should do so, in fact.
|
||||
|
||||
If you see unusual performance characteristics of your disk systems, or you
|
||||
see big performance regressions versus the deadline scheduler, please email
|
||||
me. Database users don't bother unless you're willing to test a lot of patches
|
||||
from me ;) its a known issue.
|
||||
|
||||
Also, users with hardware RAID controllers, doing striping, may find
|
||||
highly variable performance results with using the as-iosched. The
|
||||
as-iosched anticipatory implementation is based on the notion that a disk
|
||||
device has only one physical seeking head. A striped RAID controller
|
||||
actually has a head for each physical device in the logical RAID device.
|
||||
|
||||
However, setting the antic_expire (see tunable parameters below) produces
|
||||
very similar behavior to the deadline IO scheduler.
|
||||
|
||||
Selecting IO schedulers
|
||||
-----------------------
|
||||
Refer to Documentation/block/switching-sched.txt for information on
|
||||
selecting an io scheduler on a per-device basis.
|
||||
|
||||
Anticipatory IO scheduler Policies
|
||||
----------------------------------
|
||||
The as-iosched implementation implements several layers of policies
|
||||
to determine when an IO request is dispatched to the disk controller.
|
||||
Here are the policies outlined, in order of application.
|
||||
|
||||
1. one-way Elevator algorithm.
|
||||
|
||||
The elevator algorithm is similar to that used in deadline scheduler, with
|
||||
the addition that it allows limited backward movement of the elevator
|
||||
(i.e. seeks backwards). A seek backwards can occur when choosing between
|
||||
two IO requests where one is behind the elevator's current position, and
|
||||
the other is in front of the elevator's position. If the seek distance to
|
||||
the request in back of the elevator is less than half the seek distance to
|
||||
the request in front of the elevator, then the request in back can be chosen.
|
||||
Backward seeks are also limited to a maximum of MAXBACK (1024*1024) sectors.
|
||||
This favors forward movement of the elevator, while allowing opportunistic
|
||||
"short" backward seeks.
|
||||
|
||||
2. FIFO expiration times for reads and for writes.
|
||||
|
||||
This is again very similar to the deadline IO scheduler. The expiration
|
||||
times for requests on these lists is tunable using the parameters read_expire
|
||||
and write_expire discussed below. When a read or a write expires in this way,
|
||||
the IO scheduler will interrupt its current elevator sweep or read anticipation
|
||||
to service the expired request.
|
||||
|
||||
3. Read and write request batching
|
||||
|
||||
A batch is a collection of read requests or a collection of write
|
||||
requests. The as scheduler alternates dispatching read and write batches
|
||||
to the driver. In the case a read batch, the scheduler submits read
|
||||
requests to the driver as long as there are read requests to submit, and
|
||||
the read batch time limit has not been exceeded (read_batch_expire).
|
||||
The read batch time limit begins counting down only when there are
|
||||
competing write requests pending.
|
||||
|
||||
In the case of a write batch, the scheduler submits write requests to
|
||||
the driver as long as there are write requests available, and the
|
||||
write batch time limit has not been exceeded (write_batch_expire).
|
||||
However, the length of write batches will be gradually shortened
|
||||
when read batches frequently exceed their time limit.
|
||||
|
||||
When changing between batch types, the scheduler waits for all requests
|
||||
from the previous batch to complete before scheduling requests for the
|
||||
next batch.
|
||||
|
||||
The read and write fifo expiration times described in policy 2 above
|
||||
are checked only when in scheduling IO of a batch for the corresponding
|
||||
(read/write) type. So for example, the read FIFO timeout values are
|
||||
tested only during read batches. Likewise, the write FIFO timeout
|
||||
values are tested only during write batches. For this reason,
|
||||
it is generally not recommended for the read batch time
|
||||
to be longer than the write expiration time, nor for the write batch
|
||||
time to exceed the read expiration time (see tunable parameters below).
|
||||
|
||||
When the IO scheduler changes from a read to a write batch,
|
||||
it begins the elevator from the request that is on the head of the
|
||||
write expiration FIFO. Likewise, when changing from a write batch to
|
||||
a read batch, scheduler begins the elevator from the first entry
|
||||
on the read expiration FIFO.
|
||||
|
||||
4. Read anticipation.
|
||||
|
||||
Read anticipation occurs only when scheduling a read batch.
|
||||
This implementation of read anticipation allows only one read request
|
||||
to be dispatched to the disk controller at a time. In
|
||||
contrast, many write requests may be dispatched to the disk controller
|
||||
at a time during a write batch. It is this characteristic that can make
|
||||
the anticipatory scheduler perform anomalously with controllers supporting
|
||||
TCQ, or with hardware striped RAID devices. Setting the antic_expire
|
||||
queue parameter (see below) to zero disables this behavior, and the
|
||||
anticipatory scheduler behaves essentially like the deadline scheduler.
|
||||
|
||||
When read anticipation is enabled (antic_expire is not zero), reads
|
||||
are dispatched to the disk controller one at a time.
|
||||
At the end of each read request, the IO scheduler examines its next
|
||||
candidate read request from its sorted read list. If that next request
|
||||
is from the same process as the request that just completed,
|
||||
or if the next request in the queue is "very close" to the
|
||||
just completed request, it is dispatched immediately. Otherwise,
|
||||
statistics (average think time, average seek distance) on the process
|
||||
that submitted the just completed request are examined. If it seems
|
||||
likely that that process will submit another request soon, and that
|
||||
request is likely to be near the just completed request, then the IO
|
||||
scheduler will stop dispatching more read requests for up to (antic_expire)
|
||||
milliseconds, hoping that process will submit a new request near the one
|
||||
that just completed. If such a request is made, then it is dispatched
|
||||
immediately. If the antic_expire wait time expires, then the IO scheduler
|
||||
will dispatch the next read request from the sorted read queue.
|
||||
|
||||
To decide whether an anticipatory wait is worthwhile, the scheduler
|
||||
maintains statistics for each process that can be used to compute
|
||||
mean "think time" (the time between read requests), and mean seek
|
||||
distance for that process. One observation is that these statistics
|
||||
are associated with each process, but those statistics are not associated
|
||||
with a specific IO device. So for example, if a process is doing IO
|
||||
on several file systems on separate devices, the statistics will be
|
||||
a combination of IO behavior from all those devices.
|
||||
|
||||
|
||||
Tuning the anticipatory IO scheduler
|
||||
------------------------------------
|
||||
When using 'as', the anticipatory IO scheduler there are 5 parameters under
|
||||
/sys/block/*/queue/iosched/. All are units of milliseconds.
|
||||
|
||||
The parameters are:
|
||||
* read_expire
|
||||
Controls how long until a read request becomes "expired". It also controls the
|
||||
interval between which expired requests are served, so set to 50, a request
|
||||
might take anywhere < 100ms to be serviced _if_ it is the next on the
|
||||
expired list. Obviously request expiration strategies won't make the disk
|
||||
go faster. The result basically equates to the timeslice a single reader
|
||||
gets in the presence of other IO. 100*((seek time / read_expire) + 1) is
|
||||
very roughly the % streaming read efficiency your disk should get with
|
||||
multiple readers.
|
||||
|
||||
* read_batch_expire
|
||||
Controls how much time a batch of reads is given before pending writes are
|
||||
served. A higher value is more efficient. This might be set below read_expire
|
||||
if writes are to be given higher priority than reads, but reads are to be
|
||||
as efficient as possible when there are no writes. Generally though, it
|
||||
should be some multiple of read_expire.
|
||||
|
||||
* write_expire, and
|
||||
* write_batch_expire are equivalent to the above, for writes.
|
||||
|
||||
* antic_expire
|
||||
Controls the maximum amount of time we can anticipate a good read (one
|
||||
with a short seek distance from the most recently completed request) before
|
||||
giving up. Many other factors may cause anticipation to be stopped early,
|
||||
or some processes will not be "anticipated" at all. Should be a bit higher
|
||||
for big seek time devices though not a linear correspondence - most
|
||||
processes have only a few ms thinktime.
|
||||
|
||||
In addition to the tunables above there is a read-only file named est_time
|
||||
which, when read, will show:
|
||||
|
||||
- The probability of a task exiting without a cooperating task
|
||||
submitting an anticipated IO.
|
||||
|
||||
- The current mean think time.
|
||||
|
||||
- The seek distance used to determine if an incoming IO is better.
|
||||
|
||||
@@ -186,7 +186,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
|
||||
do not have a corresponding kernel virtual address space mapping) and
|
||||
low-memory pages.
|
||||
|
||||
Note: Please refer to Documentation/DMA-mapping.txt for a discussion
|
||||
Note: Please refer to Documentation/PCI/PCI-DMA-mapping.txt for a discussion
|
||||
on PCI high mem DMA aspects and mapping of scatter gather lists, and support
|
||||
for 64 bit PCI.
|
||||
|
||||
|
||||
@@ -196,7 +196,7 @@ nobarrier This also requires an IO stack which can support
|
||||
also be used to enable or disable barriers, for
|
||||
consistency with other ext4 mount options.
|
||||
|
||||
inode_readahead=n This tuning parameter controls the maximum
|
||||
inode_readahead_blks=n This tuning parameter controls the maximum
|
||||
number of inode table blocks that ext4's inode
|
||||
table readahead algorithm will pre-read into
|
||||
the buffer cache. The default value is 32 blocks.
|
||||
|
||||
@@ -28,7 +28,7 @@ described in the man pages included in the package.
|
||||
Project web page: http://www.nilfs.org/en/
|
||||
Download page: http://www.nilfs.org/en/download.html
|
||||
Git tree web page: http://www.nilfs.org/git/
|
||||
NILFS mailing lists: http://www.nilfs.org/mailman/listinfo/users
|
||||
List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
|
||||
|
||||
Caveats
|
||||
=======
|
||||
|
||||
@@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
|
||||
acpi_sleep= [HW,ACPI] Sleep options
|
||||
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
|
||||
old_ordering, s4_nonvs }
|
||||
old_ordering, s4_nonvs, sci_force_enable }
|
||||
See Documentation/power/video.txt for information on
|
||||
s3_bios and s3_mode.
|
||||
s3_beep is for debugging; it makes the PC's speaker beep
|
||||
@@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||
of _PTS is used by default).
|
||||
s4_nonvs prevents the kernel from saving/restoring the
|
||||
ACPI NVS memory during hibernation.
|
||||
sci_force_enable causes the kernel to set SCI_EN directly
|
||||
on resume from S1/S3 (which is against the ACPI spec,
|
||||
but some broken systems don't work without it).
|
||||
|
||||
acpi_use_timer_override [HW,ACPI]
|
||||
Use timer override. For some broken Nvidia NF5 boards
|
||||
|
||||
@@ -685,7 +685,7 @@ struct kvm_vcpu_events {
|
||||
__u8 pad;
|
||||
} nmi;
|
||||
__u32 sipi_vector;
|
||||
__u32 flags; /* must be zero */
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
4.30 KVM_SET_VCPU_EVENTS
|
||||
@@ -701,6 +701,14 @@ vcpu.
|
||||
|
||||
See KVM_GET_VCPU_EVENTS for the data structure.
|
||||
|
||||
Fields that may be modified asynchronously by running VCPUs can be excluded
|
||||
from the update. These fields are nmi.pending and sipi_vector. Keep the
|
||||
corresponding bits in the flags field cleared to suppress overwriting the
|
||||
current in-kernel state. The bits are:
|
||||
|
||||
KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
|
||||
KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
|
||||
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
|
||||
@@ -1092,8 +1092,8 @@ WARNING:
|
||||
its level up and down at every change.
|
||||
|
||||
|
||||
Volume control
|
||||
--------------
|
||||
Volume control (Console Audio control)
|
||||
--------------------------------------
|
||||
|
||||
procfs: /proc/acpi/ibm/volume
|
||||
ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
|
||||
@@ -1110,9 +1110,53 @@ the desktop environment to just provide on-screen-display feedback.
|
||||
Software volume control should be done only in the main AC97/HDA
|
||||
mixer.
|
||||
|
||||
This feature allows volume control on ThinkPad models with a digital
|
||||
volume knob (when available, not all models have it), as well as
|
||||
mute/unmute control. The available commands are:
|
||||
|
||||
About the ThinkPad Console Audio control:
|
||||
|
||||
ThinkPads have a built-in amplifier and muting circuit that drives the
|
||||
console headphone and speakers. This circuit is after the main AC97
|
||||
or HDA mixer in the audio path, and under exclusive control of the
|
||||
firmware.
|
||||
|
||||
ThinkPads have three special hotkeys to interact with the console
|
||||
audio control: volume up, volume down and mute.
|
||||
|
||||
It is worth noting that the normal way the mute function works (on
|
||||
ThinkPads that do not have a "mute LED") is:
|
||||
|
||||
1. Press mute to mute. It will *always* mute, you can press it as
|
||||
many times as you want, and the sound will remain mute.
|
||||
|
||||
2. Press either volume key to unmute the ThinkPad (it will _not_
|
||||
change the volume, it will just unmute).
|
||||
|
||||
This is a very superior design when compared to the cheap software-only
|
||||
mute-toggle solution found on normal consumer laptops: you can be
|
||||
absolutely sure the ThinkPad will not make noise if you press the mute
|
||||
button, no matter the previous state.
|
||||
|
||||
The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
|
||||
amplifiers driving the speakers and headphone output, and the firmware
|
||||
also handles volume control for the headphone and speakers on these
|
||||
ThinkPads without any help from the operating system (this volume
|
||||
control stage exists after the main AC97 or HDA mixer in the audio
|
||||
path).
|
||||
|
||||
The newer Lenovo models only have firmware mute control, and depend on
|
||||
the main HDA mixer to do volume control (which is done by the operating
|
||||
system). In this case, the volume keys are filtered out for unmute
|
||||
key press (there are some firmware bugs in this area) and delivered as
|
||||
normal key presses to the operating system (thinkpad-acpi is not
|
||||
involved).
|
||||
|
||||
|
||||
The ThinkPad-ACPI volume control:
|
||||
|
||||
The preferred way to interact with the Console Audio control is the
|
||||
ALSA interface.
|
||||
|
||||
The legacy procfs interface allows one to read the current state,
|
||||
and if volume control is enabled, accepts the following commands:
|
||||
|
||||
echo up >/proc/acpi/ibm/volume
|
||||
echo down >/proc/acpi/ibm/volume
|
||||
@@ -1121,12 +1165,10 @@ mute/unmute control. The available commands are:
|
||||
echo 'level <level>' >/proc/acpi/ibm/volume
|
||||
|
||||
The <level> number range is 0 to 14 although not all of them may be
|
||||
distinct. The unmute the volume after the mute command, use either the
|
||||
distinct. To unmute the volume after the mute command, use either the
|
||||
up or down command (the level command will not unmute the volume), or
|
||||
the unmute command.
|
||||
|
||||
The current volume level and mute state is shown in the file.
|
||||
|
||||
You can use the volume_capabilities parameter to tell the driver
|
||||
whether your thinkpad has volume control or mute-only control:
|
||||
volume_capabilities=1 for mixers with mute and volume control,
|
||||
|
||||
@@ -95,7 +95,7 @@ card*/pcm*/xrun_debug
|
||||
It takes an integer value, can be changed by writing to this
|
||||
file, such as
|
||||
|
||||
# cat 5 > /proc/asound/card0/pcm0p/xrun_debug
|
||||
# echo 5 > /proc/asound/card0/pcm0p/xrun_debug
|
||||
|
||||
The value consists of the following bit flags:
|
||||
bit 0 = Enable XRUN/jiffies debug messages
|
||||
|
||||
@@ -53,14 +53,14 @@ size of the mcount call that is embedded in the function).
|
||||
For example, if the function foo() calls bar(), when the bar() function calls
|
||||
mcount(), the arguments mcount() will pass to the tracer are:
|
||||
"frompc" - the address bar() will use to return to foo()
|
||||
"selfpc" - the address bar() (with _mcount() size adjustment)
|
||||
"selfpc" - the address bar() (with mcount() size adjustment)
|
||||
|
||||
Also keep in mind that this mcount function will be called *a lot*, so
|
||||
optimizing for the default case of no tracer will help the smooth running of
|
||||
your system when tracing is disabled. So the start of the mcount function is
|
||||
typically the bare min with checking things before returning. That also means
|
||||
the code flow should usually kept linear (i.e. no branching in the nop case).
|
||||
This is of course an optimization and not a hard requirement.
|
||||
typically the bare minimum with checking things before returning. That also
|
||||
means the code flow should usually be kept linear (i.e. no branching in the nop
|
||||
case). This is of course an optimization and not a hard requirement.
|
||||
|
||||
Here is some pseudo code that should help (these functions should actually be
|
||||
implemented in assembly):
|
||||
@@ -131,10 +131,10 @@ some functions to save (hijack) and restore the return address.
|
||||
|
||||
The mcount function should check the function pointers ftrace_graph_return
|
||||
(compare to ftrace_stub) and ftrace_graph_entry (compare to
|
||||
ftrace_graph_entry_stub). If either of those are not set to the relevant stub
|
||||
ftrace_graph_entry_stub). If either of those is not set to the relevant stub
|
||||
function, call the arch-specific function ftrace_graph_caller which in turn
|
||||
calls the arch-specific function prepare_ftrace_return. Neither of these
|
||||
function names are strictly required, but you should use them anyways to stay
|
||||
function names is strictly required, but you should use them anyway to stay
|
||||
consistent across the architecture ports -- easier to compare & contrast
|
||||
things.
|
||||
|
||||
@@ -144,7 +144,7 @@ but the first argument should be a pointer to the "frompc". Typically this is
|
||||
located on the stack. This allows the function to hijack the return address
|
||||
temporarily to have it point to the arch-specific function return_to_handler.
|
||||
That function will simply call the common ftrace_return_to_handler function and
|
||||
that will return the original return address with which, you can return to the
|
||||
that will return the original return address with which you can return to the
|
||||
original call site.
|
||||
|
||||
Here is the updated mcount pseudo code:
|
||||
|
||||
@@ -44,7 +44,8 @@ Check for lost events.
|
||||
Usage
|
||||
-----
|
||||
|
||||
Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges)
|
||||
Make sure debugfs is mounted to /sys/kernel/debug.
|
||||
If not (requires root privileges):
|
||||
$ mount -t debugfs debugfs /sys/kernel/debug
|
||||
|
||||
Check that the driver you are about to trace is not loaded.
|
||||
@@ -91,7 +92,7 @@ $ dmesg > dmesg.txt
|
||||
$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
|
||||
and then send the .tar.gz file. The trace compresses considerably. Replace
|
||||
"pciid" and "nick" with the PCI ID or model name of your piece of hardware
|
||||
under investigation and your nick name.
|
||||
under investigation and your nickname.
|
||||
|
||||
|
||||
How Mmiotrace Works
|
||||
@@ -100,7 +101,7 @@ How Mmiotrace Works
|
||||
Access to hardware IO-memory is gained by mapping addresses from PCI bus by
|
||||
calling one of the ioremap_*() functions. Mmiotrace is hooked into the
|
||||
__ioremap() function and gets called whenever a mapping is created. Mapping is
|
||||
an event that is recorded into the trace log. Note, that ISA range mappings
|
||||
an event that is recorded into the trace log. Note that ISA range mappings
|
||||
are not caught, since the mapping always exists and is returned directly.
|
||||
|
||||
MMIO accesses are recorded via page faults. Just before __ioremap() returns,
|
||||
@@ -122,11 +123,11 @@ Trace Log Format
|
||||
----------------
|
||||
|
||||
The raw log is text and easily filtered with e.g. grep and awk. One record is
|
||||
one line in the log. A record starts with a keyword, followed by keyword
|
||||
dependant arguments. Arguments are separated by a space, or continue until the
|
||||
one line in the log. A record starts with a keyword, followed by keyword-
|
||||
dependent arguments. Arguments are separated by a space, or continue until the
|
||||
end of line. The format for version 20070824 is as follows:
|
||||
|
||||
Explanation Keyword Space separated arguments
|
||||
Explanation Keyword Space-separated arguments
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
read event R width, timestamp, map id, physical, value, PC, PID
|
||||
@@ -136,7 +137,7 @@ iounmap event UNMAP timestamp, map id, PC, PID
|
||||
marker MARK timestamp, text
|
||||
version VERSION the string "20070824"
|
||||
info for reader LSPCI one line from lspci -v
|
||||
PCI address map PCIDEV space separated /proc/bus/pci/devices data
|
||||
PCI address map PCIDEV space-separated /proc/bus/pci/devices data
|
||||
unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID
|
||||
|
||||
Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
|
||||
|
||||
@@ -10,8 +10,8 @@ Tracepoints (see Documentation/trace/tracepoints.txt) can be used without
|
||||
creating custom kernel modules to register probe functions using the event
|
||||
tracing infrastructure.
|
||||
|
||||
Simplistically, tracepoints will represent an important event that when can
|
||||
be taken in conjunction with other tracepoints to build a "Big Picture" of
|
||||
Simplistically, tracepoints represent important events that can be
|
||||
taken in conjunction with other tracepoints to build a "Big Picture" of
|
||||
what is going on within the system. There are a large number of methods for
|
||||
gathering and interpreting these events. Lacking any current Best Practises,
|
||||
this document describes some of the methods that can be used.
|
||||
@@ -33,12 +33,12 @@ calling
|
||||
|
||||
will give a fair indication of the number of events available.
|
||||
|
||||
2.2 PCL
|
||||
2.2 PCL (Performance Counters for Linux)
|
||||
-------
|
||||
|
||||
Discovery and enumeration of all counters and events, including tracepoints
|
||||
Discovery and enumeration of all counters and events, including tracepoints,
|
||||
are available with the perf tool. Getting a list of available events is a
|
||||
simple case of
|
||||
simple case of:
|
||||
|
||||
$ perf list 2>&1 | grep Tracepoint
|
||||
ext4:ext4_free_inode [Tracepoint event]
|
||||
@@ -49,19 +49,19 @@ simple case of
|
||||
[ .... remaining output snipped .... ]
|
||||
|
||||
|
||||
2. Enabling Events
|
||||
3. Enabling Events
|
||||
==================
|
||||
|
||||
2.1 System-Wide Event Enabling
|
||||
3.1 System-Wide Event Enabling
|
||||
------------------------------
|
||||
|
||||
See Documentation/trace/events.txt for a proper description on how events
|
||||
can be enabled system-wide. A short example of enabling all events related
|
||||
to page allocation would look something like
|
||||
to page allocation would look something like:
|
||||
|
||||
$ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
|
||||
|
||||
2.2 System-Wide Event Enabling with SystemTap
|
||||
3.2 System-Wide Event Enabling with SystemTap
|
||||
---------------------------------------------
|
||||
|
||||
In SystemTap, tracepoints are accessible using the kernel.trace() function
|
||||
@@ -86,7 +86,7 @@ were allocating the pages.
|
||||
print_count()
|
||||
}
|
||||
|
||||
2.3 System-Wide Event Enabling with PCL
|
||||
3.3 System-Wide Event Enabling with PCL
|
||||
---------------------------------------
|
||||
|
||||
By specifying the -a switch and analysing sleep, the system-wide events
|
||||
@@ -107,16 +107,16 @@ for a duration of time can be examined.
|
||||
Similarly, one could execute a shell and exit it as desired to get a report
|
||||
at that point.
|
||||
|
||||
2.4 Local Event Enabling
|
||||
3.4 Local Event Enabling
|
||||
------------------------
|
||||
|
||||
Documentation/trace/ftrace.txt describes how to enable events on a per-thread
|
||||
basis using set_ftrace_pid.
|
||||
|
||||
2.5 Local Event Enablement with PCL
|
||||
3.5 Local Event Enablement with PCL
|
||||
-----------------------------------
|
||||
|
||||
Events can be activate and tracked for the duration of a process on a local
|
||||
Events can be activated and tracked for the duration of a process on a local
|
||||
basis using PCL such as follows.
|
||||
|
||||
$ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||
@@ -131,18 +131,18 @@ basis using PCL such as follows.
|
||||
|
||||
0.973913387 seconds time elapsed
|
||||
|
||||
3. Event Filtering
|
||||
4. Event Filtering
|
||||
==================
|
||||
|
||||
Documentation/trace/ftrace.txt covers in-depth how to filter events in
|
||||
ftrace. Obviously using grep and awk of trace_pipe is an option as well
|
||||
as any script reading trace_pipe.
|
||||
|
||||
4. Analysing Event Variances with PCL
|
||||
5. Analysing Event Variances with PCL
|
||||
=====================================
|
||||
|
||||
Any workload can exhibit variances between runs and it can be important
|
||||
to know what the standard deviation in. By and large, this is left to the
|
||||
to know what the standard deviation is. By and large, this is left to the
|
||||
performance analyst to do it by hand. In the event that the discrete event
|
||||
occurrences are useful to the performance analyst, then perf can be used.
|
||||
|
||||
@@ -166,7 +166,7 @@ In the event that some higher-level event is required that depends on some
|
||||
aggregation of discrete events, then a script would need to be developed.
|
||||
|
||||
Using --repeat, it is also possible to view how events are fluctuating over
|
||||
time on a system wide basis using -a and sleep.
|
||||
time on a system-wide basis using -a and sleep.
|
||||
|
||||
$ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||
-e kmem:mm_pagevec_free \
|
||||
@@ -180,7 +180,7 @@ time on a system wide basis using -a and sleep.
|
||||
|
||||
1.002251757 seconds time elapsed ( +- 0.005% )
|
||||
|
||||
5. Higher-Level Analysis with Helper Scripts
|
||||
6. Higher-Level Analysis with Helper Scripts
|
||||
============================================
|
||||
|
||||
When events are enabled the events that are triggering can be read from
|
||||
@@ -190,11 +190,11 @@ be gathered on-line as appropriate. Examples of post-processing might include
|
||||
|
||||
o Reading information from /proc for the PID that triggered the event
|
||||
o Deriving a higher-level event from a series of lower-level events.
|
||||
o Calculate latencies between two events
|
||||
o Calculating latencies between two events
|
||||
|
||||
Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
|
||||
script that can read trace_pipe from STDIN or a copy of a trace. When used
|
||||
on-line, it can be interrupted once to generate a report without existing
|
||||
on-line, it can be interrupted once to generate a report without exiting
|
||||
and twice to exit.
|
||||
|
||||
Simplistically, the script just reads STDIN and counts up events but it
|
||||
@@ -212,12 +212,12 @@ also can do more such as
|
||||
processes, the parent process responsible for creating all the helpers
|
||||
can be identified
|
||||
|
||||
6. Lower-Level Analysis with PCL
|
||||
7. Lower-Level Analysis with PCL
|
||||
================================
|
||||
|
||||
There may also be a requirement to identify what functions with a program
|
||||
There may also be a requirement to identify what functions within a program
|
||||
were generating events within the kernel. To begin this sort of analysis, the
|
||||
data must be recorded. At the time of writing, this required root
|
||||
data must be recorded. At the time of writing, this required root:
|
||||
|
||||
$ perf record -c 1 \
|
||||
-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||
@@ -253,11 +253,11 @@ perf report.
|
||||
# (For more details, try: perf report --sort comm,dso,symbol)
|
||||
#
|
||||
|
||||
According to this, the vast majority of events occured triggered on events
|
||||
within the VDSO. With simple binaries, this will often be the case so lets
|
||||
According to this, the vast majority of events triggered on events
|
||||
within the VDSO. With simple binaries, this will often be the case so let's
|
||||
take a slightly different example. In the course of writing this, it was
|
||||
noticed that X was generating an insane amount of page allocations so lets look
|
||||
at it
|
||||
noticed that X was generating an insane amount of page allocations so let's look
|
||||
at it:
|
||||
|
||||
$ perf record -c 1 -f \
|
||||
-e kmem:mm_page_alloc -e kmem:mm_page_free_direct \
|
||||
@@ -280,8 +280,8 @@ This was interrupted after a few seconds and
|
||||
# (For more details, try: perf report --sort comm,dso,symbol)
|
||||
#
|
||||
|
||||
So, almost half of the events are occuring in a library. To get an idea which
|
||||
symbol.
|
||||
So, almost half of the events are occurring in a library. To get an idea which
|
||||
symbol:
|
||||
|
||||
$ perf report --sort comm,dso,symbol
|
||||
# Samples: 27666
|
||||
@@ -297,7 +297,7 @@ symbol.
|
||||
0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path
|
||||
0.00% Xorg [kernel] [k] ftrace_trace_userstack
|
||||
|
||||
To see where within the function pixmanFillsse2 things are going wrong
|
||||
To see where within the function pixmanFillsse2 things are going wrong:
|
||||
|
||||
$ perf annotate pixmanFillsse2
|
||||
[ ... ]
|
||||
|
||||
@@ -103,7 +103,7 @@ I.2 libpciaccess
|
||||
----------------
|
||||
|
||||
To use the vga arbiter char device it was implemented an API inside the
|
||||
libpciaccess library. One fieldd was added to struct pci_device (each device
|
||||
libpciaccess library. One field was added to struct pci_device (each device
|
||||
on the system):
|
||||
|
||||
/* the type of resource decoded by the device */
|
||||
|
||||
+4
-13
@@ -2169,10 +2169,9 @@ F: drivers/hwmon/f75375s.c
|
||||
F: include/linux/f75375s.h
|
||||
|
||||
FIREWIRE SUBSYSTEM
|
||||
M: Kristian Hoegsberg <krh@redhat.com>
|
||||
M: Stefan Richter <stefanr@s5r6.in-berlin.de>
|
||||
L: linux1394-devel@lists.sourceforge.net
|
||||
W: http://www.linux1394.org/
|
||||
W: http://ieee1394.wiki.kernel.org/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git
|
||||
S: Maintained
|
||||
F: drivers/firewire/
|
||||
@@ -2705,22 +2704,14 @@ S: Supported
|
||||
F: drivers/idle/i7300_idle.c
|
||||
|
||||
IEEE 1394 SUBSYSTEM
|
||||
M: Ben Collins <ben.collins@ubuntu.com>
|
||||
M: Stefan Richter <stefanr@s5r6.in-berlin.de>
|
||||
L: linux1394-devel@lists.sourceforge.net
|
||||
W: http://www.linux1394.org/
|
||||
W: http://ieee1394.wiki.kernel.org/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git
|
||||
S: Maintained
|
||||
S: Obsolete
|
||||
F: Documentation/debugging-via-ohci1394.txt
|
||||
F: drivers/ieee1394/
|
||||
|
||||
IEEE 1394 RAW I/O DRIVER
|
||||
M: Dan Dennedy <dan@dennedy.org>
|
||||
M: Stefan Richter <stefanr@s5r6.in-berlin.de>
|
||||
L: linux1394-devel@lists.sourceforge.net
|
||||
S: Maintained
|
||||
F: drivers/ieee1394/raw1394*
|
||||
|
||||
IEEE 802.15.4 SUBSYSTEM
|
||||
M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
|
||||
M: Sergey Lapin <slapin@ossfans.org>
|
||||
@@ -3882,7 +3873,7 @@ F: drivers/net/ni5010.*
|
||||
|
||||
NILFS2 FILESYSTEM
|
||||
M: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp>
|
||||
L: users@nilfs.org
|
||||
L: linux-nilfs@vger.kernel.org
|
||||
W: http://www.nilfs.org/en/
|
||||
S: Supported
|
||||
F: Documentation/filesystems/nilfs2.txt
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
VERSION = 2
|
||||
PATCHLEVEL = 6
|
||||
SUBLEVEL = 33
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc3
|
||||
NAME = Man-Eating Seals of Antiquity
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
||||
@@ -688,6 +688,7 @@ config ARCH_DAVINCI
|
||||
select HAVE_IDE
|
||||
select COMMON_CLKDEV
|
||||
select GENERIC_ALLOCATOR
|
||||
select ARCH_HAS_HOLES_MEMORYMODEL
|
||||
help
|
||||
Support for TI's DaVinci platform.
|
||||
|
||||
|
||||
@@ -236,6 +236,7 @@ static struct vpfe_subdev_info vpfe_sub_devs[] = {
|
||||
|
||||
static struct vpfe_config vpfe_cfg = {
|
||||
.num_subdevs = ARRAY_SIZE(vpfe_sub_devs),
|
||||
.i2c_adapter_id = 1,
|
||||
.sub_devs = vpfe_sub_devs,
|
||||
.card_name = "DM355 EVM",
|
||||
.ccdc = "DM355 CCDC",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user