Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

This commit is contained in:
David Woodhouse
2007-08-23 10:43:14 +01:00
682 changed files with 11797 additions and 8144 deletions
-4
View File
@@ -133,10 +133,6 @@ interested in translating it, please email me
<para>updates of your driver can take place without recompiling <para>updates of your driver can take place without recompiling
the kernel.</para> the kernel.</para>
</listitem> </listitem>
<listitem>
<para>if you need to keep some parts of your driver closed source,
you can do so without violating the GPL license on the kernel.</para>
</listitem>
</itemizedlist> </itemizedlist>
<sect1 id="how_uio_works"> <sect1 id="how_uio_works">
+1 -1
View File
@@ -196,7 +196,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n" "IO %15s%15s\n"
" %15llu%15llu\n" " %15llu%15llu\n"
"MEM %15s%15s\n" "MEM %15s%15s\n"
" %15llu%15llu\n" " %15llu%15llu\n",
"count", "real total", "virtual total", "delay total", "count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total, t->cpu_delay_total,
+12 -12
View File
@@ -111,21 +111,21 @@ sub tda10045 {
} }
sub tda10046 { sub tda10046 {
my $sourcefile = "tt_budget_217g.zip"; my $sourcefile = "TT_PCI_2.19h_28_11_2006.zip";
my $url = "http://www.technotrend.de/new/217g/$sourcefile"; my $url = "http://technotrend-online.com/download/software/219/$sourcefile";
my $hash = "6a7e1e2f2644b162ff0502367553c72d"; my $hash = "6a7e1e2f2644b162ff0502367553c72d";
my $outfile = "dvb-fe-tda10046.fw"; my $outfile = "dvb-fe-tda10046.fw";
my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1); my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
checkstandard(); checkstandard();
wgetfile($sourcefile, $url); wgetfile($sourcefile, $url);
unzip($sourcefile, $tmpdir); unzip($sourcefile, $tmpdir);
extract("$tmpdir/software/OEM/PCI/App/ttlcdacc.dll", 0x3f731, 24478, "$tmpdir/fwtmp"); extract("$tmpdir/TT_PCI_2.19h_28_11_2006/software/OEM/PCI/App/ttlcdacc.dll", 0x65389, 24478, "$tmpdir/fwtmp");
verify("$tmpdir/fwtmp", $hash); verify("$tmpdir/fwtmp", $hash);
copy("$tmpdir/fwtmp", $outfile); copy("$tmpdir/fwtmp", $outfile);
$outfile; $outfile;
} }
sub tda10046lifeview { sub tda10046lifeview {
+13 -9
View File
@@ -9,14 +9,13 @@ one found in the Dreamcast.
Advantages: Advantages:
* It provides a nice large console (128 cols + 48 lines with 1024x768) * It provides a nice large console (128 cols + 48 lines with 1024x768)
without using tiny, unreadable fonts. without using tiny, unreadable fonts (NOT on the Dreamcast)
* You can run XF86_FBDev on top of /dev/fb0 * You can run XF86_FBDev on top of /dev/fb0
* Most important: boot logo :-) * Most important: boot logo :-)
Disadvantages: Disadvantages:
* Driver is currently limited to the Dreamcast PowerVR 2 implementation * Driver is largely untested on non-Dreamcast systems.
at the time of this writing.
Configuration Configuration
============= =============
@@ -29,11 +28,16 @@ Accepted options:
font:X - default font to use. All fonts are supported, including the font:X - default font to use. All fonts are supported, including the
SUN12x22 font which is very nice at high resolutions. SUN12x22 font which is very nice at high resolutions.
mode:X - default video mode. The following video modes are supported:
640x240-60, 640x480-60.
mode:X - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
The following video modes are supported:
640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
defaults to 640x480-16@60. At the time of writing the
24bpp and 32bpp modes function poorly. Work to fix that is
ongoing
Note: the 640x240 mode is currently broken, and should not be Note: the 640x240 mode is currently broken, and should not be
used for any reason. It is only mentioned as a reference. used for any reason. It is only mentioned here as a reference.
inverse - invert colors on screen (for LCD displays) inverse - invert colors on screen (for LCD displays)
@@ -52,10 +56,10 @@ output:X - output type. This can be any of the following: pal, ntsc, and
X11 X11
=== ===
XF86_FBDev should work, in theory. At the time of this writing it is XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
totally untested and may or may not even portray the beginnings of on any 2.6 series kernel.
working. If you end up testing this, please let me know!
-- --
Paul Mundt <lethal@linuxdc.org> Paul Mundt <lethal@linuxdc.org>
Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
+10
View File
@@ -1,3 +1,13 @@
---------------------------------------------------------------------------
!!!!!!!!!!!!!!!WARNING!!!!!!!!
The zero page is a kernel internal data structure, not a stable ABI. It might change
without warning and the kernel has no way to detect old version of it.
If you're writing some external code like a boot loader you should only use
the stable versioned real mode boot protocol described in boot.txt. Otherwise the kernel
might break you at any time.
!!!!!!!!!!!!!WARNING!!!!!!!!!!!
----------------------------------------------------------------------------
Summary of boot_params layout (kernel point of view) Summary of boot_params layout (kernel point of view)
( collected by Hans Lermen and Martin Mares ) ( collected by Hans Lermen and Martin Mares )
@@ -98,6 +98,15 @@ applicable everywhere (see syntax).
times, the limit is set to the largest selection. times, the limit is set to the largest selection.
Reverse dependencies can only be used with boolean or tristate Reverse dependencies can only be used with boolean or tristate
symbols. symbols.
Note:
select is evil.... select will by brute force set a symbol
equal to 'y' without visiting the dependencies. So abusing
select you are able to select a symbol FOO even if FOO depends
on BAR that is not set. In general use select only for
non-visible symbols (no promts anywhere) and for symbols with
no dependencies. That will limit the usefulness but on the
other hand avoid the illegal configurations all over. kconfig
should one day warn about such things.
- numerical ranges: "range" <symbol> <symbol> ["if" <expr>] - numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
This allows to limit the range of possible input values for int This allows to limit the range of possible input values for int
+23 -1
View File
@@ -163,6 +163,8 @@ and is between 256 and 4096 characters. It is defined in the file
acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
Format: <irq>,<irq>... Format: <irq>,<irq>...
acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
Format: To spoof as Windows 98: ="Microsoft Windows" Format: To spoof as Windows 98: ="Microsoft Windows"
@@ -1820,6 +1822,26 @@ and is between 256 and 4096 characters. It is defined in the file
thash_entries= [KNL,NET] thash_entries= [KNL,NET]
Set number of hash buckets for TCP connection Set number of hash buckets for TCP connection
thermal.act= [HW,ACPI]
-1: disable all active trip points in all thermal zones
<degrees C>: override all lowest active trip points
thermal.nocrt= [HW,ACPI]
Set to disable actions on ACPI thermal zone
critical and hot trip points.
thermal.off= [HW,ACPI]
1: disable ACPI thermal control
thermal.psv= [HW,ACPI]
-1: disable all passive trip points
<degrees C>: override all passive trip points to this value
thermal.tzp= [HW,ACPI]
Specify global default ACPI thermal zone polling rate
<deci-seconds>: poll all this frequency
0: no polling (default)
time Show timing data prefixed to each printk message line time Show timing data prefixed to each printk message line
[deprecated, see 'printk.time'] [deprecated, see 'printk.time']
@@ -1922,7 +1944,7 @@ and is between 256 and 4096 characters. It is defined in the file
See header of drivers/scsi/wd7000.c. See header of drivers/scsi/wd7000.c.
wdt= [WDT] Watchdog wdt= [WDT] Watchdog
See Documentation/watchdog/watchdog.txt. See Documentation/watchdog/wdt.txt.
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
xd_geo= See header of drivers/block/xd.c. xd_geo= See header of drivers/block/xd.c.
+3 -1
View File
@@ -13,7 +13,9 @@ LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds
LDLIBS:=-lz LDLIBS:=-lz
# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
# not others (eg. FC7).
LDFLAGS+=-static
all: lguest.lds lguest all: lguest.lds lguest
# The linker script on x86 is so complex the only way of creating one # The linker script on x86 is so complex the only way of creating one
+322
View File
@@ -0,0 +1,322 @@
==============
Memory Hotplug
==============
Last Updated: Jul 28 2007
This document is about memory hotplug including how-to-use and current status.
Because Memory Hotplug is still under development, contents of this text will
be changed often.
1. Introduction
1.1 purpose of memory hotplug
1.2. Phases of memory hotplug
1.3. Unit of Memory online/offline operation
2. Kernel Configuration
3. sysfs files for memory hotplug
4. Physical memory hot-add phase
4.1 Hardware(Firmware) Support
4.2 Notify memory hot-add event by hand
5. Logical Memory hot-add phase
5.1. State of memory
5.2. How to online memory
6. Logical memory remove
6.1 Memory offline and ZONE_MOVABLE
6.2. How to offline memory
7. Physical memory remove
8. Future Work List
Note(1): x86_64's has special implementation for memory hotplug.
This text does not describe it.
Note(2): This text assumes that sysfs is mounted at /sys.
---------------
1. Introduction
---------------
1.1 purpose of memory hotplug
------------
Memory Hotplug allows users to increase/decrease the amount of memory.
Generally, there are two purposes.
(A) For changing the amount of memory.
This is to allow a feature like capacity on demand.
(B) For installing/removing DIMMs or NUMA-nodes physically.
This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
(A) is required by highly virtualized environments and (B) is required by
hardware which supports memory power management.
Linux memory hotplug is designed for both purpose.
1.2. Phases of memory hotplug
---------------
There are 2 phases in Memory Hotplug.
1) Physical Memory Hotplug phase
2) Logical Memory Hotplug phase.
The First phase is to communicate hardware/firmware and make/erase
environment for hotplugged memory. Basically, this phase is necessary
for the purpose (B), but this is good phase for communication between
highly virtualized environments too.
When memory is hotplugged, the kernel recognizes new memory, makes new memory
management tables, and makes sysfs files for new memory's operation.
If firmware supports notification of connection of new memory to OS,
this phase is triggered automatically. ACPI can notify this event. If not,
"probe" operation by system administration is used instead.
(see Section 4.).
Logical Memory Hotplug phase is to change memory state into
avaiable/unavailable for users. Amount of memory from user's view is
changed by this phase. The kernel makes all memory in it as free pages
when a memory range is available.
In this document, this phase is described as online/offline.
Logical Memory Hotplug phase is triggred by write of sysfs file by system
administrator. For the hot-add case, it must be executed after Physical Hotplug
phase by hand.
(However, if you writes udev's hotplug scripts for memory hotplug, these
phases can be execute in seamless way.)
1.3. Unit of Memory online/offline operation
------------
Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
into chunks of the same size. The chunk is called a "section". The size of
a section is architecture dependent. For example, power uses 16MiB, ia64 uses
1GiB. The unit of online/offline operation is "one section". (see Section 3.)
To determine the size of sections, please read this file:
/sys/devices/system/memory/block_size_bytes
This file shows the size of sections in byte.
-----------------------
2. Kernel Configuration
-----------------------
To use memory hotplug feature, kernel must be compiled with following
config options.
- For all memory hotplug
Memory model -> Sparse Memory (CONFIG_SPARSEMEM)
Allow for memory hot-add (CONFIG_MEMORY_HOTPLUG)
- To enable memory removal, the followings are also necessary
Allow for memory hot remove (CONFIG_MEMORY_HOTREMOVE)
Page Migration (CONFIG_MIGRATION)
- For ACPI memory hotplug, the followings are also necessary
Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
This option can be kernel module.
- As a related configuration, if your box has a feature of NUMA-node hotplug
via ACPI, then this option is necessary too.
ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
(CONFIG_ACPI_CONTAINER).
This option can be kernel module too.
--------------------------------
3 sysfs files for memory hotplug
--------------------------------
All sections have their device information under /sys/devices/system/memory as
/sys/devices/system/memory/memoryXXX
(XXX is section id.)
Now, XXX is defined as start_address_of_section / section_size.
For example, assume 1GiB section size. A device for a memory starting at
0x100000000 is /sys/device/system/memory/memory4
(0x100000000 / 1Gib = 4)
This device covers address range [0x100000000 ... 0x140000000)
Under each section, you can see 3 files.
/sys/devices/system/memory/memoryXXX/phys_index
/sys/devices/system/memory/memoryXXX/phys_device
/sys/devices/system/memory/memoryXXX/state
'phys_index' : read-only and contains section id, same as XXX.
'state' : read-write
at read: contains online/offline state of memory.
at write: user can specify "online", "offline" command
'phys_device': read-only: designed to show the name of physical memory device.
This is not well implemented now.
NOTE:
These directories/files appear after physical memory hotplug phase.
--------------------------------
4. Physical memory hot-add phase
--------------------------------
4.1 Hardware(Firmware) Support
------------
On x86_64/ia64 platform, memory hotplug by ACPI is supported.
In general, the firmware (ACPI) which supports memory hotplug defines
memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
script. This will be done automatically.
But scripts for memory hotplug are not contained in generic udev package(now).
You may have to write it by yourself or online/offline memory by hand.
Please see "How to online memory", "How to offline memory" in this text.
If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
calls hotplug code for all of objects which are defined in it.
If memory device is found, memory hotplug code will be called.
4.2 Notify memory hot-add event by hand
------------
In some environments, especially virtualized environment, firmware will not
notify memory hotplug event to the kernel. For such environment, "probe"
interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
contain highly architecture codes. Please add config if you need "probe"
interface.
Probe interface is located at
/sys/devices/system/memory/probe
You can tell the physical address of new memory to the kernel by
% echo start_address_of_new_memory > /sys/devices/system/memory/probe
Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
memory range is hot-added. In this case, hotplug script is not called (in
current implementation). You'll have to online memory by yourself.
Please see "How to online memory" in this text.
------------------------------
5. Logical Memory hot-add phase
------------------------------
5.1. State of memory
------------
To see (online/offline) state of memory section, read 'state' file.
% cat /sys/device/system/memory/memoryXXX/state
If the memory section is online, you'll read "online".
If the memory section is offline, you'll read "offline".
5.2. How to online memory
------------
Even if the memory is hot-added, it is not at ready-to-use state.
For using newly added memory, you have to "online" the memory section.
For onlining, you have to write "online" to the section's state file as:
% echo online > /sys/devices/system/memory/memoryXXX/state
After this, section memoryXXX's state will be 'online' and the amount of
available memory will be increased.
Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
This may be changed in future.
------------------------
6. Logical memory remove
------------------------
6.1 Memory offline and ZONE_MOVABLE
------------
Memory offlining is more complicated than memory online. Because memory offline
has to make the whole memory section be unused, memory offline can fail if
the section includes memory which cannot be freed.
In general, memory offline can use 2 techniques.
(1) reclaim and free all memory in the section.
(2) migrate all pages in the section.
In the current implementation, Linux's memory offline uses method (2), freeing
all pages in the section by page migration. But not all pages are
migratable. Under current Linux, migratable pages are anonymous pages and
page caches. For offlining a section by migration, the kernel has to guarantee
that the section contains only migratable pages.
Now, a boot option for making a section which consists of migratable pages is
supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
create ZONE_MOVABLE...a zone which is just used for movable pages.
(See also Documentation/kernel-parameters.txt)
Assume the system has "TOTAL" amount of memory at boot time, this boot option
creates ZONE_MOVABLE as following.
1) When kernelcore=YYYY boot option is used,
Size of memory not for movable pages (not for offline) is YYYY.
Size of memory for movable pages (for offline) is TOTAL-YYYY.
2) When movablecore=ZZZZ boot option is used,
Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
Size of memory for movable pages (for offline) is ZZZZ.
Note) Unfortunately, there is no information to show which section belongs
to ZONE_MOVABLE. This is TBD.
6.2. How to offline memory
------------
You can offline a section by using the same sysfs interface that was used in
memory onlining.
% echo offline > /sys/devices/system/memory/memoryXXX/state
If offline succeeds, the state of the memory section is changed to be "offline".
If it fails, some error core (like -EBUSY) will be returned by the kernel.
Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
If it doesn't contain 'unmovable' memory, you'll get success.
A section under ZONE_MOVABLE is considered to be able to be offlined easily.
But under some busy state, it may return -EBUSY. Even if a memory section
cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
offline it (or not).
(For example, a page is referred to by some kernel internal call and released
soon.)
Consideration:
Memory hotplug's design direction is to make the possibility of memory offlining
higher and to guarantee unplugging memory under any situation. But it needs
more work. Returning -EBUSY under some situation may be good because the user
can decide to retry more or not by himself. Currently, memory offlining code
does some amount of retry with 120 seconds timeout.
-------------------------
7. Physical memory remove
-------------------------
Need more implementation yet....
- Notification completion of remove works by OS to firmware.
- Guard from remove if not yet.
--------------
8. Future Work
--------------
- allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
sysctl or new control file.
- showing memory section and physical device relationship.
- showing memory section and node relationship (maybe good for NUMA)
- showing memory section is under ZONE_MOVABLE or not
- test and make it better memory offlining.
- support HugeTLB page migration and offlining.
- memmap removing at memory offline.
- physical remove memory.
+1 -1
View File
@@ -83,7 +83,7 @@ Some implementation details:
CFS uses nanosecond granularity accounting and does not rely on any CFS uses nanosecond granularity accounting and does not rely on any
jiffies or other HZ detail. Thus the CFS scheduler has no notion of jiffies or other HZ detail. Thus the CFS scheduler has no notion of
'timeslices' and has no heuristics whatsoever. There is only one 'timeslices' and has no heuristics whatsoever. There is only one
central tunable: central tunable (you have to switch on CONFIG_SCHED_DEBUG):
/proc/sys/kernel/sched_granularity_ns /proc/sys/kernel/sched_granularity_ns
+108
View File
@@ -0,0 +1,108 @@
This document explains the thinking about the revamped and streamlined
nice-levels implementation in the new Linux scheduler.
Nice levels were always pretty weak under Linux and people continuously
pestered us to make nice +19 tasks use up much less CPU time.
Unfortunately that was not that easy to implement under the old
scheduler, (otherwise we'd have done it long ago) because nice level
support was historically coupled to timeslice length, and timeslice
units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
In the O(1) scheduler (in 2003) we changed negative nice levels to be
much stronger than they were before in 2.4 (and people were happy about
that change), and we also intentionally calibrated the linear timeslice
rule so that nice +19 level would be _exactly_ 1 jiffy. To better
understand it, the timeslice graph went like this (cheesy ASCII art
alert!):
A
\ | [timeslice length]
\ |
\ |
\ |
\ |
\|___100msecs
|^ . _
| ^ . _
| ^ . _
-*----------------------------------*-----> [nice level]
-20 | +19
|
|
So that if someone wanted to really renice tasks, +19 would give a much
bigger hit than the normal linear rule would do. (The solution of
changing the ABI to extend priorities was discarded early on.)
This approach worked to some degree for some time, but later on with
HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
we felt to be a bit excessive. Excessive _not_ because it's too small of
a CPU utilization, but because it causes too frequent (once per
millisec) rescheduling. (and would thus trash the cache, etc. Remember,
this was long ago when hardware was weaker and caches were smaller, and
people were running number crunching apps at nice +19.)
So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
right minimal granularity - and this translates to 5% CPU utilization.
But the fundamental HZ-sensitive property for nice+19 still remained,
and we never got a single complaint about nice +19 being too _weak_ in
terms of CPU utilization, we only got complaints about it (still) being
too _strong_ :-)
To sum it up: we always wanted to make nice levels more consistent, but
within the constraints of HZ and jiffies and their nasty design level
coupling to timeslices and granularity it was not really viable.
The second (less frequent but still periodically occuring) complaint
about Linux's nice level support was its assymetry around the origo
(which you can see demonstrated in the picture above), or more
accurately: the fact that nice level behavior depended on the _absolute_
nice level as well, while the nice API itself is fundamentally
"relative":
int nice(int inc);
asmlinkage long sys_nice(int increment)
(the first one is the glibc API, the second one is the syscall API.)
Note that the 'inc' is relative to the current nice level. Tools like
bash's "nice" command mirror this relative API.
With the old scheduler, if you for example started a niced task with +1
and another task with +2, the CPU split between the two tasks would
depend on the nice level of the parent shell - if it was at nice -10 the
CPU split was different than if it was at +5 or +10.
A third complaint against Linux's nice level support was that negative
nice levels were not 'punchy enough', so lots of people had to resort to
run audio (and other multimedia) apps under RT priorities such as
SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
proof, and a buggy SCHED_FIFO app can also lock up the system for good.
The new scheduler in v2.6.23 addresses all three types of complaints:
To address the first complaint (of nice levels being not "punchy"
enough), the scheduler was decoupled from 'time slice' and HZ concepts
(and granularity was made a separate concept from nice levels) and thus
it was possible to implement better and more consistent nice +19
support: with the new scheduler nice +19 tasks get a HZ-independent
1.5%, instead of the variable 3%-5%-9% range they got in the old
scheduler.
To address the second complaint (of nice levels not being consistent),
the new scheduler makes nice(1) have the same CPU utilization effect on
tasks, regardless of their absolute nice levels. So on the new
scheduler, running a nice +10 and a nice 11 task has the same CPU
utilization "split" between them as running a nice -5 and a nice -4
task. (one will get 55% of the CPU, the other 45%.) That is why nice
levels were changed to be "multiplicative" (or exponential) - that way
it does not matter which nice level you start out from, the 'relative
result' will always be the same.
The third complaint (of negative nice levels not being "punchy" enough
and forcing audio apps to run under the more dangerous SCHED_FIFO
scheduling policy) is addressed by the new scheduler almost
automatically: stronger negative nice levels are an automatic
side-effect of the recalibrated dynamic range of nice levels.
+2 -2
View File
@@ -1,6 +1,6 @@
Linux Magic System Request Key Hacks Linux Magic System Request Key Hacks
Documentation for sysrq.c Documentation for sysrq.c
Last update: 2007-MAR-14 Last update: 2007-AUG-04
* What is the magic SysRq key? * What is the magic SysRq key?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -78,7 +78,7 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'g' - Used by kgdb on ppc and sh platforms. 'g' - Used by kgdb on ppc and sh platforms.
'h' - Will display help (actually any other key than those listed 'h' - Will display help (actually any other key than those listed
above will display help. but 'h' is easy to remember :-) here will display help. but 'h' is easy to remember :-)
'i' - Send a SIGKILL to all processes, except for init. 'i' - Send a SIGKILL to all processes, except for init.
+2 -2
View File
@@ -105,10 +105,10 @@ The version of thinkpad-acpi's sysfs interface is exported by the driver
as a driver attribute (see below). as a driver attribute (see below).
Sysfs driver attributes are on the driver's sysfs attribute space, Sysfs driver attributes are on the driver's sysfs attribute space,
for 2.6.20 this is /sys/bus/platform/drivers/thinkpad-acpi/. for 2.6.20 this is /sys/bus/platform/drivers/thinkpad_acpi/.
Sysfs device attributes are on the driver's sysfs attribute space, Sysfs device attributes are on the driver's sysfs attribute space,
for 2.6.20 this is /sys/devices/platform/thinkpad-acpi/. for 2.6.20 this is /sys/devices/platform/thinkpad_acpi/.
Driver version Driver version
-------------- --------------
+332
View File
@@ -0,0 +1,332 @@
What is Linux Memory Policy?
In the Linux kernel, "memory policy" determines from which node the kernel will
allocate memory in a NUMA system or in an emulated NUMA system. Linux has
supported platforms with Non-Uniform Memory Access architectures since 2.4.?.
The current memory policy support was added to Linux 2.6 around May 2004. This
document attempts to describe the concepts and APIs of the 2.6 memory policy
support.
Memory policies should not be confused with cpusets (Documentation/cpusets.txt)
which is an administrative mechanism for restricting the nodes from which
memory may be allocated by a set of processes. Memory policies are a
programming interface that a NUMA-aware application can take advantage of. When
both cpusets and policies are applied to a task, the restrictions of the cpuset
takes priority. See "MEMORY POLICIES AND CPUSETS" below for more details.
MEMORY POLICY CONCEPTS
Scope of Memory Policies
The Linux kernel supports _scopes_ of memory policy, described here from
most general to most specific:
System Default Policy: this policy is "hard coded" into the kernel. It
is the policy that governs all page allocations that aren't controlled
by one of the more specific policy scopes discussed below. When the
system is "up and running", the system default policy will use "local
allocation" described below. However, during boot up, the system
default policy will be set to interleave allocations across all nodes
with "sufficient" memory, so as not to overload the initial boot node
with boot-time allocations.
Task/Process Policy: this is an optional, per-task policy. When defined
for a specific task, this policy controls all page allocations made by or
on behalf of the task that aren't controlled by a more specific scope.
If a task does not define a task policy, then all page allocations that
would have been controlled by the task policy "fall back" to the System
Default Policy.
The task policy applies to the entire address space of a task. Thus,
it is inheritable, and indeed is inherited, across both fork()
[clone() w/o the CLONE_VM flag] and exec*(). This allows a parent task
to establish the task policy for a child task exec()'d from an
executable image that has no awareness of memory policy. See the
MEMORY POLICY APIS section, below, for an overview of the system call
that a task may use to set/change it's task/process policy.
In a multi-threaded task, task policies apply only to the thread
[Linux kernel task] that installs the policy and any threads
subsequently created by that thread. Any sibling threads existing
at the time a new task policy is installed retain their current
policy.
A task policy applies only to pages allocated after the policy is
installed. Any pages already faulted in by the task when the task
changes its task policy remain where they were allocated based on
the policy at the time they were allocated.
VMA Policy: A "VMA" or "Virtual Memory Area" refers to a range of a task's
virtual adddress space. A task may define a specific policy for a range
of its virtual address space. See the MEMORY POLICIES APIS section,
below, for an overview of the mbind() system call used to set a VMA
policy.
A VMA policy will govern the allocation of pages that back this region of
the address space. Any regions of the task's address space that don't
have an explicit VMA policy will fall back to the task policy, which may
itself fall back to the System Default Policy.
VMA policies have a few complicating details:
VMA policy applies ONLY to anonymous pages. These include pages
allocated for anonymous segments, such as the task stack and heap, and
any regions of the address space mmap()ed with the MAP_ANONYMOUS flag.
If a VMA policy is applied to a file mapping, it will be ignored if
the mapping used the MAP_SHARED flag. If the file mapping used the
MAP_PRIVATE flag, the VMA policy will only be applied when an
anonymous page is allocated on an attempt to write to the mapping--
i.e., at Copy-On-Write.
VMA policies are shared between all tasks that share a virtual address
space--a.k.a. threads--independent of when the policy is installed; and
they are inherited across fork(). However, because VMA policies refer
to a specific region of a task's address space, and because the address
space is discarded and recreated on exec*(), VMA policies are NOT
inheritable across exec(). Thus, only NUMA-aware applications may
use VMA policies.
A task may install a new VMA policy on a sub-range of a previously
mmap()ed region. When this happens, Linux splits the existing virtual
memory area into 2 or 3 VMAs, each with it's own policy.
By default, VMA policy applies only to pages allocated after the policy
is installed. Any pages already faulted into the VMA range remain
where they were allocated based on the policy at the time they were
allocated. However, since 2.6.16, Linux supports page migration via
the mbind() system call, so that page contents can be moved to match
a newly installed policy.
Shared Policy: Conceptually, shared policies apply to "memory objects"
mapped shared into one or more tasks' distinct address spaces. An
application installs a shared policies the same way as VMA policies--using
the mbind() system call specifying a range of virtual addresses that map
the shared object. However, unlike VMA policies, which can be considered
to be an attribute of a range of a task's address space, shared policies
apply directly to the shared object. Thus, all tasks that attach to the
object share the policy, and all pages allocated for the shared object,
by any task, will obey the shared policy.
As of 2.6.22, only shared memory segments, created by shmget() or
mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy. When shared
policy support was added to Linux, the associated data structures were
added to hugetlbfs shmem segments. At the time, hugetlbfs did not
support allocation at fault time--a.k.a lazy allocation--so hugetlbfs
shmem segments were never "hooked up" to the shared policy support.
Although hugetlbfs segments now support lazy allocation, their support
for shared policy has not been completed.
As mentioned above [re: VMA policies], allocations of page cache
pages for regular files mmap()ed with MAP_SHARED ignore any VMA
policy installed on the virtual address range backed by the shared
file mapping. Rather, shared page cache pages, including pages backing
private mappings that have not yet been written by the task, follow
task policy, if any, else System Default Policy.
The shared policy infrastructure supports different policies on subset
ranges of the shared object. However, Linux still splits the VMA of
the task that installs the policy for each range of distinct policy.
Thus, different tasks that attach to a shared memory segment can have
different VMA configurations mapping that one shared object. This
can be seen by examining the /proc/<pid>/numa_maps of tasks sharing
a shared memory region, when one task has installed shared policy on
one or more ranges of the region.
Components of Memory Policies
A Linux memory policy is a tuple consisting of a "mode" and an optional set
of nodes. The mode determine the behavior of the policy, while the
optional set of nodes can be viewed as the arguments to the behavior.
Internally, memory policies are implemented by a reference counted
structure, struct mempolicy. Details of this structure will be discussed
in context, below, as required to explain the behavior.
Note: in some functions AND in the struct mempolicy itself, the mode
is called "policy". However, to avoid confusion with the policy tuple,
this document will continue to use the term "mode".
Linux memory policy supports the following 4 behavioral modes:
Default Mode--MPOL_DEFAULT: The behavior specified by this mode is
context or scope dependent.
As mentioned in the Policy Scope section above, during normal
system operation, the System Default Policy is hard coded to
contain the Default mode.
In this context, default mode means "local" allocation--that is
attempt to allocate the page from the node associated with the cpu
where the fault occurs. If the "local" node has no memory, or the
node's memory can be exhausted [no free pages available], local
allocation will "fallback to"--attempt to allocate pages from--
"nearby" nodes, in order of increasing "distance".
Implementation detail -- subject to change: "Fallback" uses
a per node list of sibling nodes--called zonelists--built at
boot time, or when nodes or memory are added or removed from
the system [memory hotplug]. These per node zonelist are
constructed with nodes in order of increasing distance based
on information provided by the platform firmware.
When a task/process policy or a shared policy contains the Default
mode, this also means "local allocation", as described above.
In the context of a VMA, Default mode means "fall back to task
policy"--which may or may not specify Default mode. Thus, Default
mode can not be counted on to mean local allocation when used
on a non-shared region of the address space. However, see
MPOL_PREFERRED below.
The Default mode does not use the optional set of nodes.
MPOL_BIND: This mode specifies that memory must come from the
set of nodes specified by the policy.
The memory policy APIs do not specify an order in which the nodes
will be searched. However, unlike "local allocation", the Bind
policy does not consider the distance between the nodes. Rather,
allocations will fallback to the nodes specified by the policy in
order of numeric node id. Like everything in Linux, this is subject
to change.
MPOL_PREFERRED: This mode specifies that the allocation should be
attempted from the single node specified in the policy. If that
allocation fails, the kernel will search other nodes, exactly as
it would for a local allocation that started at the preferred node
in increasing distance from the preferred node. "Local" allocation
policy can be viewed as a Preferred policy that starts at the node
containing the cpu where the allocation takes place.
Internally, the Preferred policy uses a single node--the
preferred_node member of struct mempolicy. A "distinguished
value of this preferred_node, currently '-1', is interpreted
as "the node containing the cpu where the allocation takes
place"--local allocation. This is the way to specify
local allocation for a specific range of addresses--i.e. for
VMA policies.
MPOL_INTERLEAVED: This mode specifies that page allocations be
interleaved, on a page granularity, across the nodes specified in
the policy. This mode also behaves slightly differently, based on
the context where it is used:
For allocation of anonymous pages and shared memory pages,
Interleave mode indexes the set of nodes specified by the policy
using the page offset of the faulting address into the segment
[VMA] containing the address modulo the number of nodes specified
by the policy. It then attempts to allocate a page, starting at
the selected node, as if the node had been specified by a Preferred
policy or had been selected by a local allocation. That is,
allocation will follow the per node zonelist.
For allocation of page cache pages, Interleave mode indexes the set
of nodes specified by the policy using a node counter maintained
per task. This counter wraps around to the lowest specified node
after it reaches the highest specified node. This will tend to
spread the pages out over the nodes specified by the policy based
on the order in which they are allocated, rather than based on any
page offset into an address range or file. During system boot up,
the temporary interleaved system default policy works in this
mode.
MEMORY POLICY APIs
Linux supports 3 system calls for controlling memory policy. These APIS
always affect only the calling task, the calling task's address space, or
some shared object mapped into the calling task's address space.
Note: the headers that define these APIs and the parameter data types
for user space applications reside in a package that is not part of
the Linux kernel. The kernel system call interfaces, with the 'sys_'
prefix, are defined in <linux/syscalls.h>; the mode and flag
definitions are defined in <linux/mempolicy.h>.
Set [Task] Memory Policy:
long set_mempolicy(int mode, const unsigned long *nmask,
unsigned long maxnode);
Set's the calling task's "task/process memory policy" to mode
specified by the 'mode' argument and the set of nodes defined
by 'nmask'. 'nmask' points to a bit mask of node ids containing
at least 'maxnode' ids.
See the set_mempolicy(2) man page for more details
Get [Task] Memory Policy or Related Information
long get_mempolicy(int *mode,
const unsigned long *nmask, unsigned long maxnode,
void *addr, int flags);
Queries the "task/process memory policy" of the calling task, or
the policy or location of a specified virtual address, depending
on the 'flags' argument.
See the get_mempolicy(2) man page for more details
Install VMA/Shared Policy for a Range of Task's Address Space
long mbind(void *start, unsigned long len, int mode,
const unsigned long *nmask, unsigned long maxnode,
unsigned flags);
mbind() installs the policy specified by (mode, nmask, maxnodes) as
a VMA policy for the range of the calling task's address space
specified by the 'start' and 'len' arguments. Additional actions
may be requested via the 'flags' argument.
See the mbind(2) man page for more details.
MEMORY POLICY COMMAND LINE INTERFACE
Although not strictly part of the Linux implementation of memory policy,
a command line tool, numactl(8), exists that allows one to:
+ set the task policy for a specified program via set_mempolicy(2), fork(2) and
exec(2)
+ set the shared policy for a shared memory segment via mbind(2)
The numactl(8) tool is packages with the run-time version of the library
containing the memory policy system call wrappers. Some distributions
package the headers and compile-time libraries in a separate development
package.
MEMORY POLICIES AND CPUSETS
Memory policies work within cpusets as described above. For memory policies
that require a node or set of nodes, the nodes are restricted to the set of
nodes whose memories are allowed by the cpuset constraints. If the
intersection of the set of nodes specified for the policy and the set of nodes
allowed by the cpuset is the empty set, the policy is considered invalid and
cannot be installed.
The interaction of memory policies and cpusets can be problematic for a
couple of reasons:
1) the memory policy APIs take physical node id's as arguments. However, the
memory policy APIs do not provide a way to determine what nodes are valid
in the context where the application is running. An application MAY consult
the cpuset file system [directly or via an out of tree, and not generally
available, libcpuset API] to obtain this information, but then the
application must be aware that it is running in a cpuset and use what are
intended primarily as administrative APIs.
However, as long as the policy specifies at least one node that is valid
in the controlling cpuset, the policy can be used.
2) when tasks in two cpusets share access to a memory region, such as shared
memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and
MAP_SHARED flags, and any of the tasks install shared policy on the region,
only nodes whose memories are allowed in both cpusets may be used in the
policies. Again, obtaining this information requires "stepping outside"
the memory policy APIs, as well as knowing in what cpusets other task might
be attaching to the shared region, to use the cpuset information.
Furthermore, if the cpusets' allowed memory sets are disjoint, "local"
allocation is the only valid policy.
+1 -1
View File
@@ -396,7 +396,7 @@ void report(struct slabinfo *s)
if (strcmp(s->name, "*") == 0) if (strcmp(s->name, "*") == 0)
return; return;
printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %d\n", printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n",
s->name, s->aliases, s->order, s->objects); s->name, s->aliases, s->order, s->objects);
if (s->hwcache_align) if (s->hwcache_align)
printf("** Hardware cacheline aligned\n"); printf("** Hardware cacheline aligned\n");
+10
View File
@@ -0,0 +1,10 @@
00-INDEX
- this file.
pcwd-watchdog.txt
- documentation for Berkshire Products PC Watchdog ISA cards.
src/
- directory holding watchdog related example programs.
watchdog-api.txt
- description of the Linux Watchdog driver API.
wdt.txt
- description of the Watchdog Timer Interfaces for Linux.
+28 -5
View File
@@ -97,6 +97,12 @@ M: philb@gnu.org
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Maintained
3C59X NETWORK DRIVER
P: Steffen Klassert
M: klassert@mathematik.tu-chemnitz.de
L: netdev@vger.kernel.org
S: Maintained
3CR990 NETWORK DRIVER 3CR990 NETWORK DRIVER
P: David Dillow P: David Dillow
M: dave@thedillows.org M: dave@thedillows.org
@@ -612,6 +618,15 @@ W: http://sourceforge.net/projects/acpi4asus
W: http://xf.iksaif.net/acpi4asus W: http://xf.iksaif.net/acpi4asus
S: Maintained S: Maintained
ASYNCHRONOUS TRANSFERS/TRANSFORMS API
P: Dan Williams
M: dan.j.williams@intel.com
P: Shannon Nelson
M: shannon.nelson@intel.com
L: linux-kernel@vger.kernel.org
W: http://sourceforge.net/projects/xscaleiop
S: Supported
ATA OVER ETHERNET DRIVER ATA OVER ETHERNET DRIVER
P: Ed L. Cashin P: Ed L. Cashin
M: ecashin@coraid.com M: ecashin@coraid.com
@@ -664,7 +679,7 @@ S: Maintained
AUDIT SUBSYSTEM AUDIT SUBSYSTEM
P: David Woodhouse P: David Woodhouse
M: dwmw2@infradead.org M: dwmw2@infradead.org
L: linux-audit@redhat.com L: linux-audit@redhat.com (subscribers-only)
W: http://people.redhat.com/sgrubb/audit/ W: http://people.redhat.com/sgrubb/audit/
T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
S: Maintained S: Maintained
@@ -994,7 +1009,7 @@ P: Steve French
M: sfrench@samba.org M: sfrench@samba.org
L: linux-cifs-client@lists.samba.org L: linux-cifs-client@lists.samba.org
L: samba-technical@lists.samba.org L: samba-technical@lists.samba.org
W: http://us1.samba.org/samba/Linux_CIFS_client.html W: http://linux-cifs.samba.org/
T: git kernel.org:/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git T: git kernel.org:/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
S: Supported S: Supported
@@ -1277,11 +1292,13 @@ M: tori@unhappy.mine.nu
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Maintained
DMA GENERIC MEMCPY SUBSYSTEM DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
P: Shannon Nelson P: Shannon Nelson
M: shannon.nelson@intel.com M: shannon.nelson@intel.com
P: Dan Williams
M: dan.j.williams@intel.com
L: linux-kernel@vger.kernel.org L: linux-kernel@vger.kernel.org
S: Maintained S: Supported
DME1737 HARDWARE MONITOR DRIVER DME1737 HARDWARE MONITOR DRIVER
P: Juerg Haefliger P: Juerg Haefliger
@@ -1958,6 +1975,12 @@ M: shannon.nelson@intel.com
L: linux-kernel@vger.kernel.org L: linux-kernel@vger.kernel.org
S: Supported S: Supported
INTEL IOP-ADMA DMA DRIVER
P: Dan Williams
M: dan.j.williams@intel.com
L: linux-kernel@vger.kernel.org
S: Supported
INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT INTEL IXP4XX RANDOM NUMBER GENERATOR SUPPORT
P: Deepak Saxena P: Deepak Saxena
M: dsaxena@plexity.net M: dsaxena@plexity.net
@@ -3429,7 +3452,7 @@ S: Maintained
TPM DEVICE DRIVER TPM DEVICE DRIVER
P: Kylene Hall P: Kylene Hall
M: kjhall@us.ibm.com M: tpmdd-devel@lists.sourceforge.net
W: http://tpmdd.sourceforge.net W: http://tpmdd.sourceforge.net
P: Marcel Selhorst P: Marcel Selhorst
M: tpm@selhorst.net M: tpm@selhorst.net
+1 -1
View File
@@ -1,7 +1,7 @@
VERSION = 2 VERSION = 2
PATCHLEVEL = 6 PATCHLEVEL = 6
SUBLEVEL = 23 SUBLEVEL = 23
EXTRAVERSION =-rc1 EXTRAVERSION =-rc3
NAME = Holy Dancing Manatees, Batman! NAME = Holy Dancing Manatees, Batman!
# *DOCUMENTATION* # *DOCUMENTATION*
+20 -7
View File
@@ -270,6 +270,19 @@ titan_dispatch_irqs(u64 mask)
/* /*
* Titan Family * Titan Family
*/ */
static void __init
titan_request_irq(unsigned int irq, irq_handler_t handler,
unsigned long irqflags, const char *devname,
void *dev_id)
{
int err;
err = request_irq(irq, handler, irqflags, devname, dev_id);
if (err) {
printk("titan_request_irq for IRQ %d returned %d; ignoring\n",
irq, err);
}
}
static void __init static void __init
titan_late_init(void) titan_late_init(void)
{ {
@@ -278,15 +291,15 @@ titan_late_init(void)
* all reported to the kernel as machine checks, so the handler * all reported to the kernel as machine checks, so the handler
* is a nop so it can be called to count the individual events. * is a nop so it can be called to count the individual events.
*/ */
request_irq(63+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED,
"CChip Error", NULL); "CChip Error", NULL);
request_irq(62+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED,
"PChip 0 H_Error", NULL); "PChip 0 H_Error", NULL);
request_irq(61+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED,
"PChip 1 H_Error", NULL); "PChip 1 H_Error", NULL);
request_irq(60+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED,
"PChip 0 C_Error", NULL); "PChip 0 C_Error", NULL);
request_irq(59+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED,
"PChip 1 C_Error", NULL); "PChip 1 C_Error", NULL);
/* /*
@@ -345,9 +358,9 @@ privateer_init_pci(void)
* Hook a couple of extra err interrupts that the * Hook a couple of extra err interrupts that the
* common titan code won't. * common titan code won't.
*/ */
request_irq(53+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED,
"NMI", NULL); "NMI", NULL);
request_irq(50+16, titan_intr_nop, IRQF_DISABLED, titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED,
"Temperature Warning", NULL); "Temperature Warning", NULL);
/* /*
File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More