Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

Merge the BIOS workarounds from 2.6.32, and the swiotlb fallback on failure.
2026-05-01 15:00:59 -07:00 · 2009-12-08 09:58:33 +00:00
parent aa697079ee 7b626acb8f
commit ec20849193
3916 changed files with 177182 additions and 76637 deletions
@@ -25,6 +25,7 @@
 *.elf
 *.bin
 *.gz
+*.bz2
 *.lzma
 *.patch
 *.gcno
@@ -31,3 +31,31 @@ Date:		March 2009
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	A symbolic link to /sys/block/cciss!cXdY
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/rescan
+Date:		August 2009
+Kernel Version:	2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Kicks of a rescan of the controller to discover logical
+		drive topology changes.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+Date:		August 2009
+Kernel Version: 2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Displays the 8-byte LUN ID used to address logical
+		drive Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+Date:		August 2009
+Kernel Version: 2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Displays the RAID level of logical drive Y of
+		controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+Date:		August 2009
+Kernel Version: 2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Displays the usage count (number of opens) of logical drive Y
+		of controller X.
@@ -1,4 +1,4 @@
-What:           /sys/class/usb_host/usb_hostN/wusb_chid
+What:           /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid
 Date:           July 2008
 KernelVersion:  2.6.27
 Contact:        David Vrabel <david.vrabel@csr.com>
@@ -9,7 +9,7 @@ Description:

                Set an all zero CHID to stop the host controller.

-What:           /sys/class/usb_host/usb_hostN/wusb_trust_timeout
+What:           /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout
 Date:           July 2008
 KernelVersion:  2.6.27
 Contact:        David Vrabel <david.vrabel@csr.com>
@@ -1,18 +0,0 @@
-What:      /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
-Date:      August 2008
-KernelVersion:	2.6.27
-Contact:	mark.langsdorf@amd.com
-Description:	These files exist in every cpu's cache index directories.
-		There are currently 2 cache_disable_# files in each
-		directory.  Reading from these files on a supported
-		processor will return that cache disable index value
-		for that processor and node.  Writing to one of these
-		files will cause the specificed cache index to be disabled.
-
-		Currently, only AMD Family 10h Processors support cache index
-		disable, and only for their L3 caches.  See the BIOS and
-		Kernel Developer's Guide at
-		http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
-		for formatting information and other details on the
-		cache index disable.
-Users:    joachim.deguara@amd.com
@@ -0,0 +1,156 @@
+What:		/sys/devices/system/cpu/
+Date:		pre-git history
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+		A collection of both global and individual CPU attributes
+
+		Individual CPU attributes are contained in subdirectories
+		named by the kernel's logical CPU number, e.g.:
+
+		/sys/devices/system/cpu/cpu#/
+
+What:		/sys/devices/system/cpu/sched_mc_power_savings
+		/sys/devices/system/cpu/sched_smt_power_savings
+Date:		June 2006
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Discover and adjust the kernel's multi-core scheduler support.
+
+		Possible values are:
+
+		0 - No power saving load balance (default value)
+		1 - Fill one thread/core/package first for long running threads
+		2 - Also bias task wakeups to semi-idle cpu package for power
+		    savings
+
+		sched_mc_power_savings is dependent upon SCHED_MC, which is
+		itself architecture dependent.
+
+		sched_smt_power_savings is dependent upon SCHED_SMT, which
+		is itself architecture dependent.
+
+		The two files are independent of each other. It is possible
+		that one file may be present without the other.
+
+		Introduced by git commit 5c45bf27.
+
+
+What:		/sys/devices/system/cpu/kernel_max
+		/sys/devices/system/cpu/offline
+		/sys/devices/system/cpu/online
+		/sys/devices/system/cpu/possible
+		/sys/devices/system/cpu/present
+Date:		December 2008
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	CPU topology files that describe kernel limits related to
+		hotplug. Briefly:
+
+		kernel_max: the maximum cpu index allowed by the kernel
+		configuration.
+
+		offline: cpus that are not online because they have been
+		HOTPLUGGED off or exceed the limit of cpus allowed by the
+		kernel configuration (kernel_max above).
+
+		online: cpus that are online and being scheduled.
+
+		possible: cpus that have been allocated resources and can be
+		brought online if they are present.
+
+		present: cpus that have been identified as being present in
+		the system.
+
+		See Documentation/cputopology.txt for more information.
+
+
+
+What:		/sys/devices/system/cpu/cpu#/node
+Date:		October 2009
+Contact:	Linux memory management mailing list <linux-mm@kvack.org>
+Description:	Discover NUMA node a CPU belongs to
+
+		When CONFIG_NUMA is enabled, a symbolic link that points
+		to the corresponding NUMA node directory.
+
+		For example, the following symlink is created for cpu42
+		in NUMA node 2:
+
+		/sys/devices/system/cpu/cpu42/node2 -> ../../node/node2
+
+
+What:		/sys/devices/system/cpu/cpu#/topology/core_id
+		/sys/devices/system/cpu/cpu#/topology/core_siblings
+		/sys/devices/system/cpu/cpu#/topology/core_siblings_list
+		/sys/devices/system/cpu/cpu#/topology/physical_package_id
+		/sys/devices/system/cpu/cpu#/topology/thread_siblings
+		/sys/devices/system/cpu/cpu#/topology/thread_siblings_list
+Date:		December 2008
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	CPU topology files that describe a logical CPU's relationship
+		to other cores and threads in the same physical package.
+
+		One cpu# directory is created per logical CPU in the system,
+		e.g. /sys/devices/system/cpu/cpu42/.
+
+		Briefly, the files above are:
+
+		core_id: the CPU core ID of cpu#. Typically it is the
+		hardware platform's identifier (rather than the kernel's).
+		The actual value is architecture and platform dependent.
+
+		core_siblings: internal kernel map of cpu#'s hardware threads
+		within the same physical_package_id.
+
+		core_siblings_list: human-readable list of the logical CPU
+		numbers within the same physical_package_id as cpu#.
+
+		physical_package_id: physical package id of cpu#. Typically
+		corresponds to a physical socket number, but the actual value
+		is architecture and platform dependent.
+
+		thread_siblings: internel kernel map of cpu#'s hardware
+		threads within the same core as cpu#
+
+		thread_siblings_list: human-readable list of cpu#'s hardware
+		threads within the same core as cpu#
+
+		See Documentation/cputopology.txt for more information.
+
+
+What:		/sys/devices/system/cpu/cpuidle/current_driver
+		/sys/devices/system/cpu/cpuidle/current_governer_ro
+Date:		September 2007
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Discover cpuidle policy and mechanism
+
+		Various CPUs today support multiple idle levels that are
+		differentiated by varying exit latencies and power
+		consumption during idle.
+
+		Idle policy (governor) is differentiated from idle mechanism
+		(driver)
+
+		current_driver: displays current idle mechanism
+
+		current_governor_ro: displays current idle policy
+
+		See files in Documentation/cpuidle/ for more information.
+
+
+What:      /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date:      August 2008
+KernelVersion:	2.6.27
+Contact:	mark.langsdorf@amd.com
+Description:	These files exist in every cpu's cache index directories.
+		There are currently 2 cache_disable_# files in each
+		directory.  Reading from these files on a supported
+		processor will return that cache disable index value
+		for that processor and node.  Writing to one of these
+		files will cause the specificed cache index to be disabled.
+
+		Currently, only AMD Family 10h Processors support cache index
+		disable, and only for their L3 caches.  See the BIOS and
+		Kernel Developer's Guide at
+		http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
+		for formatting information and other details on the
+		cache index disable.
+Users:    joachim.deguara@amd.com
@@ -232,7 +232,7 @@ your e-mail client so that it sends your patches untouched.
 When sending patches to Linus, always follow step #7.

 Large changes are not appropriate for mailing lists, and some
-maintainers.  If your patch, uncompressed, exceeds 40 kB in size,
+maintainers.  If your patch, uncompressed, exceeds 300 kB in size,
 it is preferred that you store your patch on an Internet-accessible
 server, and provide instead a URL (link) pointing to your patch.

@@ -0,0 +1,147 @@
+ARM TCM (Tightly-Coupled Memory) handling in Linux
+----
+Written by Linus Walleij <linus.walleij@stericsson.com>
+
+Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+This is usually just a few (4-64) KiB of RAM inside the ARM
+processor.
+
+Due to being embedded inside the CPU The TCM has a
+Harvard-architecture, so there is an ITCM (instruction TCM)
+and a DTCM (data TCM). The DTCM can not contain any
+instructions, but the ITCM can actually contain data.
+The size of DTCM or ITCM is minimum 4KiB so the typical
+minimum configuration is 4KiB ITCM and 4KiB DTCM.
+
+ARM CPU:s have special registers to read out status, physical
+location and size of TCM memories. arch/arm/include/asm/cputype.h
+defines a CPUID_TCM register that you can read out from the
+system control coprocessor. Documentation from ARM can be found
+at http://infocenter.arm.com, search for "TCM Status Register"
+to see documents for all CPUs. Reading this register you can
+determine if ITCM (bit 0) and/or DTCM (bit 16) is present in the
+machine.
+
+There is further a TCM region register (search for "TCM Region
+Registers" at the ARM site) that can report and modify the location
+size of TCM memories at runtime. This is used to read out and modify
+TCM location and size. Notice that this is not a MMU table: you
+actually move the physical location of the TCM around. At the
+place you put it, it will mask any underlying RAM from the
+CPU so it is usually wise not to overlap any physical RAM with
+the TCM.
+
+The TCM memory can then be remapped to another address again using
+the MMU, but notice that the TCM if often used in situations where
+the MMU is turned off. To avoid confusion the current Linux
+implementation will map the TCM 1 to 1 from physical to virtual
+memory in the location specified by the machine.
+
+TCM is used for a few things:
+
+- FIQ and other interrupt handlers that need deterministic
+  timing and cannot wait for cache misses.
+
+- Idle loops where all external RAM is set to self-refresh
+  retention mode, so only on-chip RAM is accessible by
+  the CPU and then we hang inside ITCM waiting for an
+  interrupt.
+
+- Other operations which implies shutting off or reconfiguring
+  the external RAM controller.
+
+There is an interface for using TCM on the ARM architecture
+in <asm/tcm.h>. Using this interface it is possible to:
+
+- Define the physical address and size of ITCM and DTCM.
+
+- Tag functions to be compiled into ITCM.
+
+- Tag data and constants to be allocated to DTCM and ITCM.
+
+- Have the remaining TCM RAM added to a special
+  allocation pool with gen_pool_create() and gen_pool_add()
+  and provice tcm_alloc() and tcm_free() for this
+  memory. Such a heap is great for things like saving
+  device state when shutting off device power domains.
+
+A machine that has TCM memory shall select HAVE_TCM in
+arch/arm/Kconfig for itself, and then the
+rest of the functionality will depend on the physical
+location and size of ITCM and DTCM to be defined in
+mach/memory.h for the machine. Code that needs to use
+TCM shall #include <asm/tcm.h> If the TCM is not located
+at the place given in memory.h it will be moved using
+the TCM Region registers.
+
+Functions to go into itcm can be tagged like this:
+int __tcmfunc foo(int bar);
+
+Variables to go into dtcm can be tagged like this:
+int __tcmdata foo;
+
+Constants can be tagged like this:
+int __tcmconst foo;
+
+To put assembler into TCM just use
+.section ".tcm.text" or .section ".tcm.data"
+respectively.
+
+Example code:
+
+#include <asm/tcm.h>
+
+/* Uninitialized data */
+static u32 __tcmdata tcmvar;
+/* Initialized data */
+static u32 __tcmdata tcmassigned = 0x2BADBABEU;
+/* Constant */
+static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
+
+static void __tcmlocalfunc tcm_to_tcm(void)
+{
+	int i;
+	for (i = 0; i < 100; i++)
+		tcmvar ++;
+}
+
+static void __tcmfunc hello_tcm(void)
+{
+	/* Some abstract code that runs in ITCM */
+	int i;
+	for (i = 0; i < 100; i++) {
+		tcmvar ++;
+	}
+	tcm_to_tcm();
+}
+
+static void __init test_tcm(void)
+{
+	u32 *tcmem;
+	int i;
+
+	hello_tcm();
+	printk("Hello TCM executed from ITCM RAM\n");
+
+	printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
+	tcmvar = 0xDEADBEEFU;
+	printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
+
+	printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
+
+	printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
+
+	/* Allocate some TCM memory from the pool */
+	tcmem = tcm_alloc(20);
+	if (tcmem) {
+		printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
+		tcmem[0] = 0xDEADBEEFU;
+		tcmem[1] = 0x2BADBABEU;
+		tcmem[2] = 0xCAFEBABEU;
+		tcmem[3] = 0xDEADBEEFU;
+		tcmem[4] = 0x2BADBABEU;
+		for (i = 0; i < 5; i++)
+			printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
+		tcm_free(tcmem, 20);
+	}
+}
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void)
 */

 #include <stdio.h>
-#include <string.h>

 #define EXAMPLES	6

@@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system.
 Each cgroup is represented by a directory in the cgroup file system
 containing the following files describing that cgroup:

- - tasks: list of tasks (by pid) attached to that cgroup
+ - tasks: list of tasks (by pid) attached to that cgroup.  This list
+   is not guaranteed to be sorted.  Writing a thread id into this file
+   moves the thread into this cgroup.
+ - cgroup.procs: list of tgids in the cgroup.  This list is not
+   guaranteed to be sorted or free of duplicate tgids, and userspace
+   should sort/uniquify the list if this property is required.
+   Writing a tgid into this file moves all threads with that tgid into
+   this cgroup.
 - notify_on_release flag: run the release agent on exit?
 - release_agent: the path to use for release notifications (this file
   exists in the top cgroup only)
@@ -374,7 +381,7 @@ Now you want to do something with this cgroup.

 In this directory you can find several files:
 # ls
-notify_on_release tasks
+cgroup.procs notify_on_release tasks
 (plus whatever files added by the attached subsystems)

 Now attach your shell to this cgroup:
@@ -408,6 +415,26 @@ You can attach the current shell task by echoing 0:

 # echo 0 > tasks

+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy.  This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems.  Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
 3. Kernel API
 =============

@@ -501,7 +528,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.

 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task)
+	       struct task_struct *task, bool threadgroup)
 (cgroup_mutex held by caller)

 Called prior to moving a task into a cgroup; if the subsystem
@@ -509,14 +536,20 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.

 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	    struct cgroup *old_cgrp, struct task_struct *task)
+	    struct cgroup *old_cgrp, struct task_struct *task,
+	    bool threadgroup)
 (cgroup_mutex held by caller)

 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.

 void fork(struct cgroup_subsy *ss, struct task_struct *task)

@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
 pages that are selected for reclaiming come from the per cgroup LRU
 list.

+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
 2. Locking

 The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
 NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
 mega or gigabytes.
 NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.

 # cat /cgroups/0/memory.limit_in_bytes
 4194304
@@ -375,7 +379,42 @@ cgroups created below it.

 NOTE2: This feature can be enabled/disabled per subtree.

-7. TODO
+7. Soft limits
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best effort feature, it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is setup such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 megabytes)
+
+# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use
+
+# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1: Soft limits take effect over a long period of time, since they involve
+       reclaiming memory for balancing between memory cgroups
+NOTE2: It is recommended to set the soft limit always below the hard limit,
+       otherwise the hard limit will take precedence.
+
+8. TODO

 1. Add support for accounting huge pages (as a separate controller)
 2. Make per-cgroup scanner reclaim not-shared pages first
@@ -34,7 +34,7 @@ static char cn_test_name[] = "cn_test";
 static struct sock *nls;
 static struct timer_list cn_test_timer;

-static void cn_test_callback(struct cn_msg *msg)
+static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
 	        __func__, jiffies, msg->id.idx, msg->id.val,
@@ -23,7 +23,7 @@ handling, etc...  The Connector driver allows any kernelspace agents to use
 netlink based networking for inter-process communication in a significantly
 easier way:

-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);

 struct cb_id
@@ -53,15 +53,15 @@ struct cn_msg
 Connector interfaces.
 /*****************************************/

-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));

 Registers new callback with connector core.

 struct cb_id *id		- unique connector's user identifier.
 				  It must be registered in connector.h for legal in-kernel users.
 char *name			- connector's callback symbolic name.
- void (*callback) (void *)	- connector's callback.
-				  Argument must be dereferenced to struct cn_msg *.
+ void (*callback) (struct cn..)	- connector's callback.
+				  cn_msg and the sender's credentials


 void cn_del_callback(struct cb_id *id);
@@ -1,15 +1,28 @@

-Export cpu topology info via sysfs. Items (attributes) are similar
+Export CPU topology info via sysfs. Items (attributes) are similar
 to /proc/cpuinfo.

 1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
-represent the physical package id of  cpu X;
+
+	physical package id of cpuX. Typically corresponds to a physical
+	socket number, but the actual value is architecture and platform
+	dependent.
+
 2) /sys/devices/system/cpu/cpuX/topology/core_id:
-represent the cpu core id to cpu X;
+
+	the CPU core ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).  The actual value is
+	architecture and platform dependent.
+
 3) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
-represent the thread siblings to cpu X in the same core;
+
+	internel kernel map of cpuX's hardware threads within the same
+	core as cpuX
+
 4) /sys/devices/system/cpu/cpuX/topology/core_siblings:
-represent the thread siblings to cpu X in the same physical package;
+
+	internal kernel map of cpuX's hardware threads within the same
+	physical_package_id.

 To implement it in an architecture-neutral way, a new source file,
 drivers/base/topology.c, is to export the 4 attributes.
@@ -32,32 +45,32 @@ not defined by include/asm-XXX/topology.h:
 3) thread_siblings: just the given CPU
 4) core_siblings: just the given CPU

-Additionally, cpu topology information is provided under
+Additionally, CPU topology information is provided under
 /sys/devices/system/cpu and includes these files.  The internal
 source for the output is in brackets ("[]").

-    kernel_max: the maximum cpu index allowed by the kernel configuration.
+    kernel_max: the maximum CPU index allowed by the kernel configuration.
 		[NR_CPUS-1]

-    offline:	cpus that are not online because they have been
+    offline:	CPUs that are not online because they have been
 		HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
-		of cpus allowed by the kernel configuration (kernel_max
+		of CPUs allowed by the kernel configuration (kernel_max
 		above). [~cpu_online_mask + cpus >= NR_CPUS]

-    online:	cpus that are online and being scheduled [cpu_online_mask]
+    online:	CPUs that are online and being scheduled [cpu_online_mask]

-    possible:	cpus that have been allocated resources and can be
+    possible:	CPUs that have been allocated resources and can be
 		brought online if they are present. [cpu_possible_mask]

-    present:	cpus that have been identified as being present in the
+    present:	CPUs that have been identified as being present in the
 		system. [cpu_present_mask]

 The format for the above output is compatible with cpulist_parse()
 [see <linux/cpumask.h>].  Some examples follow.

-In this example, there are 64 cpus in the system but cpus 32-63 exceed
+In this example, there are 64 CPUs in the system but cpus 32-63 exceed
 the kernel max which is limited to 0..31 by the NR_CPUS config option
-being 32.  Note also that cpus 2 and 4-31 are not online but could be
+being 32.  Note also that CPUs 2 and 4-31 are not online but could be
 brought online as they are both present and possible.

     kernel_max: 31
@@ -67,8 +80,8 @@ brought online as they are both present and possible.
        present: 0-31

 In this example, the NR_CPUS config option is 128, but the kernel was
-started with possible_cpus=144.  There are 4 cpus in the system and cpu2
-was manually taken offline (and is the only cpu that can be brought
+started with possible_cpus=144.  There are 4 CPUs in the system and cpu2
+was manually taken offline (and is the only CPU that can be brought
 online.)

     kernel_max: 127
@@ -78,4 +91,4 @@ online.)
        present: 0-3

 See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
-as well as more information on the various cpumask's.
+as well as more information on the various cpumasks.
@@ -54,20 +54,23 @@ features surfaced as a result:

 3.1 General format of the API:
 struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
-		  enum async_tx_flags flags,
-        	  struct dma_async_tx_descriptor *dependency,
-        	  dma_async_tx_callback callback_routine,
-		  void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)

 3.2 Supported operations:
-memcpy       - memory copy between a source and a destination buffer
-memset       - fill a destination buffer with a byte value
-xor          - xor a series of source buffers and write the result to a
-	       destination buffer
-xor_zero_sum - xor a series of source buffers and set a flag if the
-	       result is zero.  The implementation attempts to prevent
-	       writes to memory
+memcpy  - memory copy between a source and a destination buffer
+memset  - fill a destination buffer with a byte value
+xor     - xor a series of source buffers and write the result to a
+	  destination buffer
+xor_val - xor a series of source buffers and set a flag if the
+	  result is zero.  The implementation attempts to prevent
+	  writes to memory
+pq	- generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val  - validate that a p and or q buffer are in sync with a given series of
+	  sources
+datap	- (raid6_datap_recov) recover a raid6 data block and the p block
+	  from the given sources
+2data	- (raid6_2data_recov) recover 2 raid6 data blocks from the given
+	  sources

 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
 recycle (or free) the descriptor.  A descriptor can be acked by one of the
 following methods:
 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
-   descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+   async_tx call will implicitly set the acknowledged state.
 3/ calling async_tx_ack() on the descriptor.

 3.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
 Perform a xor->copy->xor operation where each operation depends on the
 result from the previous operation:

-void complete_xor_copy_xor(void *param)
+void callback(void *param)
 {
-	printk("complete\n");
+	struct completion *cmp = param;
+
+	complete(cmp);
 }

-int run_xor_copy_xor(struct page **xor_srcs,
-		     int xor_src_cnt,
-		     struct page *xor_dest,
-		     size_t xor_len,
-		     struct page *copy_src,
-		     struct page *copy_dest,
-		     size_t copy_len)
+void run_xor_copy_xor(struct page **xor_srcs,
+		      int xor_src_cnt,
+		      struct page *xor_dest,
+		      size_t xor_len,
+		      struct page *copy_src,
+		      struct page *copy_dest,
+		      size_t copy_len)
 {
 	struct dma_async_tx_descriptor *tx;
+	addr_conv_t addr_conv[xor_src_cnt];
+	struct async_submit_ctl submit;
+	addr_conv_t addr_conv[NDISKS];
+	struct completion cmp;

-	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-		       ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
-	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
-			  ASYNC_TX_DEP_ACK, tx, NULL, NULL);
-	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
-		       tx, complete_xor_copy_xor, NULL);
+	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+			  addr_conv);
+	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
+
+	submit->depend_tx = tx;
+	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+	init_completion(&cmp);
+	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+			  callback, &cmp, addr_conv);
+	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);

 	async_tx_issue_pending_all();
+
+	wait_for_completion(&cmp);
 }

 See include/linux/async_tx.h for more information on the flags.  See the
@@ -64,14 +64,14 @@ be used to view the printk buffer of a remote machine, even with live update.

 Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
 from 32-bit firescope and vice versa:
- ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2
+- http://halobates.de/firewire/firescope-0.2.2.tar.bz2

 and he implemented fast system dump (alpha version - read README.txt):
- ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2
+- http://halobates.de/firewire/firedump-0.1.tar.bz2

 There is also a gdb proxy for firewire which allows to use gdb to access
 data which can be referenced from symbols found by gdb in vmlinux:
- ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2
+- http://halobates.de/firewire/fireproxy-0.33.tar.bz2

 The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
 yet stable) with kgdb over an memory-based communication module (kgdbom).
@@ -178,7 +178,7 @@ Step-by-step instructions for using firescope with early OHCI initialization:

 Notes
 -----
-Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs
+Documentation and specifications: http://halobates.de/firewire/

 FireWire is a trademark of Apple Inc. - for more information please refer to:
 http://en.wikipedia.org/wiki/FireWire
@@ -65,6 +65,7 @@ aicdb.h*
 asm-offsets.h
 asm_offsets.h
 autoconf.h*
+av_permissions.h
 bbootsect
 bin2c
 binkernel.spec
@@ -95,12 +96,14 @@ docproc
 elf2ecoff
 elfconfig.h*
 fixdep
+flask.h
 fore200e_mkfirm
 fore200e_pca_fw.c*
 gconf
 gen-devlist
 gen_crc32table
 gen_init_cpio
+genheaders
 genksyms
 *_gray256.c
 ihex2fw
@@ -312,10 +312,8 @@ and to the following documentation:
 8. Mailing list
 ---------------

-There are several frame buffer device related mailing lists at SourceForge:
-  - linux-fbdev-announce@lists.sourceforge.net, for announcements,
-  - linux-fbdev-user@lists.sourceforge.net, for generic user support,
-  - linux-fbdev-devel@lists.sourceforge.net, for project developers.
+There is a frame buffer device related mailing list at kernel.org:
+linux-fbdev@vger.kernel.org.

 Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for
 subscription information and archive browsing.
@@ -354,14 +354,6 @@ Who:  Krzysztof Piotr Oledzki <ole@ans.pl>

 ---------------------------

-What:	fscher and fscpos drivers
-When:	June 2009
-Why:	Deprecated by the new fschmd driver.
-Who:	Hans de Goede <hdegoede@redhat.com>
-	Jean Delvare <khali@linux-fr.org>
-
---------------------------
-
 What:	sysfs ui for changing p4-clockmod parameters
 When:	September 2009
 Why:	See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
@@ -426,6 +418,14 @@ When:	2.6.33
 Why:	Should be implemented in userspace, policy daemon.
 Who:	Johannes Berg <johannes@sipsolutions.net>

+---------------------------
+
+What:	CONFIG_INOTIFY
+When:	2.6.33
+Why:	last user (audit) will be converted to the newer more generic
+	and more easily maintained fsnotify subsystem
+Who:	Eric Paris <eparis@redhat.com>
+
 ----------------------------

 What:	lock_policy_rwsem_* and unlock_policy_rwsem_* will not be
@@ -459,3 +459,33 @@ Why:	OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR
 	will also allow making ALSA OSS emulation independent of
 	sound_core.  The dependency will be broken then too.
 Who:	Tejun Heo <tj@kernel.org>
+
+----------------------------
+
+What:	Support for VMware's guest paravirtuliazation technique [VMI] will be
+	dropped.
+When:	2.6.37 or earlier.
+Why:	With the recent innovations in CPU hardware acceleration technologies
+	from Intel and AMD, VMware ran a few experiments to compare these
+	techniques to guest paravirtualization technique on VMware's platform.
+	These hardware assisted virtualization techniques have outperformed the
+	performance benefits provided by VMI in most of the workloads. VMware
+	expects that these hardware features will be ubiquitous in a couple of
+	years, as a result, VMware has started a phased retirement of this
+	feature from the hypervisor. We will be removing this feature from the
+	Kernel too. Right now we are targeting 2.6.37 but can retire earlier if
+	technical reasons (read opportunity to remove major chunk of pvops)
+	arise.
+
+	Please note that VMI has always been an optimization and non-VMI kernels
+	still work fine on VMware's platform.
+	Latest versions of VMware's product which support VMI are,
+	Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence
+	releases for these products will continue supporting VMI.
+
+	For more details about VMI retirement take a look at this,
+	http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html
+
+Who:	Alok N Kataria <akataria@vmware.com>
+
+----------------------------
@@ -18,11 +18,11 @@ the 9p client is available in the form of a USENIX paper:

 Other applications are described in the following papers:
 	* XCPU & Clustering
-		http://www.xcpu.org/xcpu-talk.pdf
+		http://xcpu.org/papers/xcpu-talk.pdf
 	* KVMFS: control file system for KVM
-		http://www.xcpu.org/kvmfs.pdf
-	* CellFS: A New ProgrammingModel for the Cell BE
-		http://www.xcpu.org/cellfs-talk.pdf
+		http://xcpu.org/papers/kvmfs.pdf
+	* CellFS: A New Programming Model for the Cell BE
+		http://xcpu.org/papers/cellfs-talk.pdf
 	* PROSE I/O: Using 9p to enable Application Partitions
 		http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf

@@ -48,6 +48,7 @@ OPTIONS
                                (see rfdno and wfdno)
 			virtio	- connect to the next virtio channel available
 				(from lguest or KVM with trans_virtio module)
+			rdma	- connect to a specified RDMA channel

  uname=name	user name to attempt mount as on the remote server.  The
  		server may override or ignore this value.  Certain user
@@ -59,16 +60,22 @@ OPTIONS
  cache=mode	specifies a caching policy.  By default, no caches are used.
 			loose = no attempts are made at consistency,
                                intended for exclusive, read-only mounts
+			fscache = use FS-Cache for a persistent, read-only
+				cache backend.

  debug=n	specifies debug level.  The debug level is a bitmask.
-  			0x01 = display verbose error messages
-			0x02 = developer debug (DEBUG_CURRENT)
-			0x04 = display 9p trace
-			0x08 = display VFS trace
-			0x10 = display Marshalling debug
-			0x20 = display RPC debug
-			0x40 = display transport debug
-			0x80 = display allocation debug
+			0x01  = display verbose error messages
+			0x02  = developer debug (DEBUG_CURRENT)
+			0x04  = display 9p trace
+			0x08  = display VFS trace
+			0x10  = display Marshalling debug
+			0x20  = display RPC debug
+			0x40  = display transport debug
+			0x80  = display allocation debug
+			0x100 = display protocol message debug
+			0x200 = display Fid debug
+			0x400 = display packet debug
+			0x800 = display fscache tracing debug

  rfdno=n	the file descriptor for reading with trans=fd

@@ -100,6 +107,10 @@ OPTIONS
 			any   = v9fs does single attach and performs all
 				operations as one user

+  cachetag	cache tag to use the specified persistent cache.
+		cache tags for existing cache sessions can be listed at
+		/sys/fs/9p/caches. (applies only to cache=fscache)
+
 RESOURCES
 =========

@@ -118,7 +129,7 @@ and export.
 A Linux version of the 9p server is now maintained under the npfs project
 on sourceforge (http://sourceforge.net/projects/npfs).  The currently
 maintained version is the single-threaded version of the server (named spfs)
-available from the same CVS repository.
+available from the same SVN repository.

 There are user and developer mailing lists available through the v9fs project
 on sourceforge (http://sourceforge.net/projects/v9fs).
@@ -126,7 +137,8 @@ on sourceforge (http://sourceforge.net/projects/v9fs).
 A stand-alone version of the module (which should build for any 2.6 kernel)
 is available via (http://github.com/ericvh/9p-sac/tree/master)

-News and other information is maintained on SWiK (http://swik.net/v9fs).
+News and other information is maintained on SWiK (http://swik.net/v9fs)
+and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php).

 Bug reports may be issued through the kernel.org bugzilla 
 (http://bugzilla.kernel.org)
@@ -235,6 +235,7 @@ proc files.
 		neg=N	Number of negative lookups made
 		pos=N	Number of positive lookups made
 		crt=N	Number of objects created by lookup
+		tmo=N	Number of lookups timed out and requeued
 	Updates	n=N	Number of update cookie requests seen
 		nul=N	Number of upd reqs given a NULL parent
 		run=N	Number of upd reqs granted CPU time
@@ -250,8 +251,10 @@ proc files.
 		ok=N	Number of successful alloc reqs
 		wt=N	Number of alloc reqs that waited on lookup completion
 		nbf=N	Number of alloc reqs rejected -ENOBUFS
+		int=N	Number of alloc reqs aborted -ERESTARTSYS
 		ops=N	Number of alloc reqs submitted
 		owt=N	Number of alloc reqs waited for CPU time
+		abt=N	Number of alloc reqs aborted due to object death
 	Retrvls	n=N	Number of retrieval (read) requests seen
 		ok=N	Number of successful retr reqs
 		wt=N	Number of retr reqs that waited on lookup completion
@@ -261,6 +264,7 @@ proc files.
 		oom=N	Number of retr reqs failed -ENOMEM
 		ops=N	Number of retr reqs submitted
 		owt=N	Number of retr reqs waited for CPU time
+		abt=N	Number of retr reqs aborted due to object death
 	Stores	n=N	Number of storage (write) requests seen
 		ok=N	Number of successful store reqs
 		agn=N	Number of store reqs on a page already pending storage
@@ -268,12 +272,37 @@ proc files.
 		oom=N	Number of store reqs failed -ENOMEM
 		ops=N	Number of store reqs submitted
 		run=N	Number of store reqs granted CPU time
+		pgs=N	Number of pages given store req processing time
+		rxd=N	Number of store reqs deleted from tracking tree
+		olm=N	Number of store reqs over store limit
+	VmScan	nos=N	Number of release reqs against pages with no pending store
+		gon=N	Number of release reqs against pages stored by time lock granted
+		bsy=N	Number of release reqs ignored due to in-progress store
+		can=N	Number of page stores cancelled due to release req
 	Ops	pend=N	Number of times async ops added to pending queues
 		run=N	Number of times async ops given CPU time
 		enq=N	Number of times async ops queued for processing
+		can=N	Number of async ops cancelled
+		rej=N	Number of async ops rejected due to object lookup/create failure
 		dfr=N	Number of async ops queued for deferred release
 		rel=N	Number of async ops released
 		gc=N	Number of deferred-release async ops garbage collected
+	CacheOp	alo=N	Number of in-progress alloc_object() cache ops
+		luo=N	Number of in-progress lookup_object() cache ops
+		luc=N	Number of in-progress lookup_complete() cache ops
+		gro=N	Number of in-progress grab_object() cache ops
+		upo=N	Number of in-progress update_object() cache ops
+		dro=N	Number of in-progress drop_object() cache ops
+		pto=N	Number of in-progress put_object() cache ops
+		syn=N	Number of in-progress sync_cache() cache ops
+		atc=N	Number of in-progress attr_changed() cache ops
+		rap=N	Number of in-progress read_or_alloc_page() cache ops
+		ras=N	Number of in-progress read_or_alloc_pages() cache ops
+		alp=N	Number of in-progress allocate_page() cache ops
+		als=N	Number of in-progress allocate_pages() cache ops
+		wrp=N	Number of in-progress write_page() cache ops
+		ucp=N	Number of in-progress uncache_page() cache ops
+		dsp=N	Number of in-progress dissociate_pages() cache ops


 (*) /proc/fs/fscache/histogram
@@ -299,6 +328,87 @@ proc files.
     jiffy range covered, and the SECS field the equivalent number of seconds.


+===========
+OBJECT LIST
+===========
+
+If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
+list of all the objects currently allocated and allow them to be viewed
+through:
+
+	/proc/fs/fscache/objects
+
+This will look something like:
+
+	[root@andromeda ~]# head /proc/fs/fscache/objects
+	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
+	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
+	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 8 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 8 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+
+where the first set of columns before the '|' describe the object:
+
+	COLUMN	DESCRIPTION
+	=======	===============================================================
+	OBJECT	Object debugging ID (appears as OBJ%x in some debug messages)
+	PARENT	Debugging ID of parent object
+	STAT	Object state
+	CHLDN	Number of child objects of this object
+	OPS	Number of outstanding operations on this object
+	OOP	Number of outstanding child object management operations
+	IPR
+	EX	Number of outstanding exclusive operations
+	READS	Number of outstanding read operations
+	EM	Object's event mask
+	EV	Events raised on this object
+	F	Object flags
+	S	Object slow-work work item flags
+
+and the second set of columns describe the object's cookie, if present:
+
+	COLUMN		DESCRIPTION
+	===============	=======================================================
+	NETFS_COOKIE_DEF Name of netfs cookie definition
+	TY		Cookie type (IX - index, DT - data, hex - special)
+	FL		Cookie flags
+	NETFS_DATA	Netfs private data stored in the cookie
+	OBJECT_KEY	Object key	} 1 column, with separating comma
+	AUX_DATA	Object aux data	} presence may be configured
+
+The data shown may be filtered by attaching the a key to an appropriate keyring
+before viewing the file.  Something like:
+
+		keyctl add user fscache:objlist <restrictions> @s
+
+where <restrictions> are a selection of the following letters:
+
+	K	Show hexdump of object key (don't show if not given)
+	A	Show hexdump of object aux data (don't show if not given)
+
+and the following paired letters:
+
+	C	Show objects that have a cookie
+	c	Show objects that don't have a cookie
+	B	Show objects that are busy
+	b	Show objects that aren't busy
+	W	Show objects that have pending writes
+	w	Show objects that don't have pending writes
+	R	Show objects that have outstanding reads
+	r	Show objects that don't have outstanding reads
+	S	Show objects that have slow work queued
+	s	Show objects that don't have slow work queued
+
+If neither side of a letter pair is given, then both are implied.  For example:
+
+	keyctl add user fscache:objlist KB @s
+
+shows objects that are busy, and lists their object keys, but does not dump
+their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
+not implied.
+
+By default all objects and all fields will be shown.
+
+
 =========
 DEBUGGING
 =========
--- a/Show More
+++ b/Show More