Merge branch 'master' into next

2026-01-06 10:13:00 -08:00 · 2010-03-31 08:39:27 +11:00
parent 225a9be24d 2eaa9cfdf3
commit d25d6fa1a9
4041 changed files with 230073 additions and 108041 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -34,13 +34,18 @@ modules.builtin
 #
 # Top-level generic files
 #
-tags
-TAGS
-vmlinux
-vmlinuz
-System.map
-Module.markers
-Module.symvers
+/tags
+/TAGS
+/linux
+/vmlinux
+/vmlinuz
+/System.map
+/Module.markers
+/Module.symvers
+
+#
+# git files that we don't want to ignore even it they are dot-files
+#
 !.gitignore
 !.mailmap

--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -0,0 +1,7 @@
+What:		/sys/devices/system/node/nodeX
+Date:		October 2002
+Contact:	Linux Memory Management list <linux-mm@kvack.org>
+Description:
+		When CONFIG_NUMA is enabled, this is a directory containing
+		information on node X such as what CPUs are local to the
+		node.
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -160,7 +160,7 @@ Description:
 		match the driver to the device.  For example:
 		# echo "046d c315" > /sys/bus/usb/drivers/foo/remove_id

-What:		/sys/bus/usb/device/.../avoid_reset
+What:		/sys/bus/usb/device/.../avoid_reset_quirk
 Date:		December 2009
 Contact:	Oliver Neukum <oliver@neukum.org>
 Description:
--- a/Documentation/PCI/PCI-DMA-mapping.txt
+++ b/Documentation/PCI/PCI-DMA-mapping.txt
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -4,20 +4,18 @@
        James E.J. Bottomley <James.Bottomley@HansenPartnership.com>

 This document describes the DMA API.  For a more gentle introduction
-phrased in terms of the pci_ equivalents (and actual examples) see
-Documentation/PCI/PCI-DMA-mapping.txt.
+of the API (and actual examples) see
+Documentation/DMA-API-HOWTO.txt.

-This API is split into two pieces.  Part I describes the API and the
-corresponding pci_ API.  Part II describes the extensions to the API
-for supporting non-consistent memory machines.  Unless you know that
-your driver absolutely has to support non-consistent platforms (this
-is usually only legacy platforms) you should only use the API
-described in part I.
+This API is split into two pieces.  Part I describes the API.  Part II
+describes the extensions to the API for supporting non-consistent
+memory machines.  Unless you know that your driver absolutely has to
+support non-consistent platforms (this is usually only legacy
+platforms) you should only use the API described in part I.

-Part I - pci_ and dma_ Equivalent API 
+Part I - dma_ API
 -------------------------------------

-To get the pci_ API, you must #include <linux/pci.h>
 To get the dma_ API, you must #include <linux/dma-mapping.h>


@@ -27,9 +25,6 @@ Part Ia - Using large dma-coherent buffers
 void *
 dma_alloc_coherent(struct device *dev, size_t size,
 			     dma_addr_t *dma_handle, gfp_t flag)
-void *
-pci_alloc_consistent(struct pci_dev *dev, size_t size,
-			     dma_addr_t *dma_handle)

 Consistent memory is memory for which a write by either the device or
 the processor can immediately be read by the processor or device
@@ -53,15 +48,11 @@ The simplest way to do that is to use the dma_pool calls (see below).
 The flag parameter (dma_alloc_coherent only) allows the caller to
 specify the GFP_ flags (see kmalloc) for the allocation (the
 implementation may choose to ignore flags that affect the location of
-the returned memory, like GFP_DMA).  For pci_alloc_consistent, you
-must assume GFP_ATOMIC behaviour.
+the returned memory, like GFP_DMA).

 void
 dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 			   dma_addr_t dma_handle)
-void
-pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr,
-			   dma_addr_t dma_handle)

 Free the region of consistent memory you previously allocated.  dev,
 size and dma_handle must all be the same as those passed into the
@@ -89,10 +80,6 @@ for alignment, like queue heads needing to be aligned on N-byte boundaries.
 	dma_pool_create(const char *name, struct device *dev,
 			size_t size, size_t align, size_t alloc);

-	struct pci_pool *
-	pci_pool_create(const char *name, struct pci_device *dev,
-			size_t size, size_t align, size_t alloc);
-
 The pool create() routines initialize a pool of dma-coherent buffers
 for use with a given device.  It must be called in a context which
 can sleep.
@@ -108,9 +95,6 @@ from this pool must not cross 4KByte boundaries.
 	void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
 			dma_addr_t *dma_handle);

-	void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags,
-			dma_addr_t *dma_handle);
-
 This allocates memory from the pool; the returned memory will meet the size
 and alignment requirements specified at creation time.  Pass GFP_ATOMIC to
 prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
@@ -122,9 +106,6 @@ pool's device.
 	void dma_pool_free(struct dma_pool *pool, void *vaddr,
 			dma_addr_t addr);

-	void pci_pool_free(struct pci_pool *pool, void *vaddr,
-			dma_addr_t addr);
-
 This puts memory back into the pool.  The pool is what was passed to
 the pool allocation routine; the cpu (vaddr) and dma addresses are what
 were returned when that routine allocated the memory being freed.
@@ -132,8 +113,6 @@ were returned when that routine allocated the memory being freed.

 	void dma_pool_destroy(struct dma_pool *pool);

-	void pci_pool_destroy(struct pci_pool *pool);
-
 The pool destroy() routines free the resources of the pool.  They must be
 called in a context which can sleep.  Make sure you've freed all allocated
 memory back to the pool before you destroy it.
@@ -144,8 +123,6 @@ Part Ic - DMA addressing limitations

 int
 dma_supported(struct device *dev, u64 mask)
-int
-pci_dma_supported(struct pci_dev *hwdev, u64 mask)

 Checks to see if the device can support DMA to the memory described by
 mask.
@@ -159,8 +136,14 @@ driver writers.

 int
 dma_set_mask(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
 int
-pci_set_dma_mask(struct pci_device *dev, u64 mask)
+dma_set_coherent_mask(struct device *dev, u64 mask)

 Checks to see if the mask is possible and updates the device
 parameters if it is.
@@ -187,9 +170,6 @@ Part Id - Streaming DMA mappings
 dma_addr_t
 dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 		      enum dma_data_direction direction)
-dma_addr_t
-pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size,
-		      int direction)

 Maps a piece of processor virtual memory so it can be accessed by the
 device and returns the physical handle of the memory.
@@ -198,14 +178,10 @@ The direction for both api's may be converted freely by casting.
 However the dma_ API uses a strongly typed enumerator for its
 direction:

-DMA_NONE		= PCI_DMA_NONE		no direction (used for
-						debugging)
-DMA_TO_DEVICE		= PCI_DMA_TODEVICE	data is going from the
-						memory to the device
-DMA_FROM_DEVICE		= PCI_DMA_FROMDEVICE	data is coming from
-						the device to the
-						memory
-DMA_BIDIRECTIONAL	= PCI_DMA_BIDIRECTIONAL	direction isn't known
+DMA_NONE		no direction (used for debugging)
+DMA_TO_DEVICE		data is going from the memory to the device
+DMA_FROM_DEVICE		data is coming from the device to the memory
+DMA_BIDIRECTIONAL	direction isn't known

 Notes:  Not all memory regions in a machine can be mapped by this
 API.  Further, regions that appear to be physically contiguous in
@@ -268,9 +244,6 @@ cache lines are updated with data that the device may have changed).
 void
 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 		 enum dma_data_direction direction)
-void
-pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
-		 size_t size, int direction)

 Unmaps the region previously mapped.  All the parameters passed in
 must be identical to those passed in (and returned) by the mapping
@@ -280,15 +253,9 @@ dma_addr_t
 dma_map_page(struct device *dev, struct page *page,
 		    unsigned long offset, size_t size,
 		    enum dma_data_direction direction)
-dma_addr_t
-pci_map_page(struct pci_dev *hwdev, struct page *page,
-		    unsigned long offset, size_t size, int direction)
 void
 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
 	       enum dma_data_direction direction)
-void
-pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
-	       size_t size, int direction)

 API for mapping and unmapping for pages.  All the notes and warnings
 for the other mapping APIs apply here.  Also, although the <offset>
@@ -299,9 +266,6 @@ cache width is.
 int
 dma_mapping_error(struct device *dev, dma_addr_t dma_addr)

-int
-pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr)
-
 In some circumstances dma_map_single and dma_map_page will fail to create
 a mapping. A driver can check for these errors by testing the returned
 dma address with dma_mapping_error(). A non-zero return value means the mapping
@@ -311,9 +275,6 @@ reduce current DMA mapping usage or delay and try again later).
 	int
 	dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction direction)
-	int
-	pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
-		int nents, int direction)

 Returns: the number of physical segments mapped (this may be shorter
 than <nents> passed in if some elements of the scatter/gather list are
@@ -353,9 +314,6 @@ accessed sg->address and sg->length as shown above.
 	void
 	dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 		int nhwentries, enum dma_data_direction direction)
-	void
-	pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
-		int nents, int direction)

 Unmap the previously mapped scatter/gather list.  All the parameters
 must be the same as those and passed in to the scatter/gather mapping
@@ -365,21 +323,23 @@ Note: <nents> must be the number you passed in, *not* the number of
 physical entries returned.

 void
-dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
 void
-pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle,
-			   size_t size, int direction)
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+			   enum dma_data_direction direction)
 void
-dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
-			  enum dma_data_direction direction)
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
 void
-pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg,
-		       int nelems, int direction)
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		       enum dma_data_direction direction)

-Synchronise a single contiguous or scatter/gather mapping.  All the
-parameters must be the same as those passed into the single mapping
-API.
+Synchronise a single contiguous or scatter/gather mapping for the cpu
+and device. With the sync_sg API, all the parameters must be the same
+as those passed into the single mapping API. With the sync_single API,
+you can use dma_handle and size parameters that aren't identical to
+those passed into the single mapping API to do a partial sync.

 Notes:  You must do this:

@@ -461,9 +421,9 @@ void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
 Part II - Advanced dma_ usage
 -----------------------------

-Warning: These pieces of the DMA API have no PCI equivalent.  They
-should also not be used in the majority of cases, since they cater for
-unlikely corner cases that don't belong in usual drivers.
+Warning: These pieces of the DMA API should not be used in the
+majority of cases, since they cater for unlikely corner cases that
+don't belong in usual drivers.

 If you don't understand how cache line coherency works between a
 processor and an I/O device, you should not be using this part of the
@@ -513,16 +473,6 @@ line, but it will guarantee that one or more cache lines fit exactly
 into the width returned by this call.  It will also always be a power
 of two for easy alignment.

-void
-dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
-
-Does a partial sync, starting at offset and continuing for size.  You
-must be careful to observe the cache alignment and width when doing
-anything like this.  You must also be extra careful about accessing
-memory you intend to sync partially.
-
 void
 dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 	       enum dma_data_direction direction)
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -488,7 +488,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				The ECC bytes must be placed immidiately after the data
 				bytes in order to make the syndrome generator work. This
 				is contrary to the usual layout used by software ECC. The
-				seperation of data and out of band area is not longer
+				separation of data and out of band area is not longer
 				possible. The nand driver code handles this layout and
 				the remaining free bytes in the oob area are managed by 
 				the autoplacement code. Provide a matching oob-layout
@@ -560,7 +560,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				bad blocks. They have factory marked good blocks. The marker pattern
 				is erased when the block is erased to be reused. So in case of
 				powerloss before writing the pattern back to the chip this block 
-				would be lost and added to the bad blocks. Therefor we scan the 
+				would be lost and added to the bad blocks. Therefore we scan the 
 				chip(s) when we detect them the first time for good blocks and 
 				store this information in a bad block table before erasing any 
 				of the blocks.
@@ -1094,7 +1094,7 @@ in this page</entry>
 		manufacturers specifications. This applies similar to the spare area. 
 	</para>
 	<para>
-		Therefor NAND aware filesystems must either write in page size chunks
+		Therefore NAND aware filesystems must either write in page size chunks
 		or hold a writebuffer to collect smaller writes until they sum up to 
 		pagesize. Available NAND aware filesystems: JFFS2, YAFFS. 		
 	</para>
--- a/Documentation/DocBook/v4l/common.xml
+++ b/Documentation/DocBook/v4l/common.xml
@@ -1170,7 +1170,7 @@ frames per second. If less than this number of frames is to be
 captured or output, applications can request frame skipping or
 duplicating on the driver side. This is especially useful when using
 the &func-read; or &func-write;, which are not augmented by timestamps
-or sequence counters, and to avoid unneccessary data copying.</para>
+or sequence counters, and to avoid unnecessary data copying.</para>

    <para>Finally these ioctls can be used to determine the number of
 buffers used internally by a driver in read/write mode. For
--- a/Documentation/DocBook/v4l/vidioc-g-parm.xml
+++ b/Documentation/DocBook/v4l/vidioc-g-parm.xml
@@ -55,7 +55,7 @@ captured or output, applications can request frame skipping or
 duplicating on the driver side. This is especially useful when using
 the <function>read()</function> or <function>write()</function>, which
 are not augmented by timestamps or sequence counters, and to avoid
-unneccessary data copying.</para>
+unnecessary data copying.</para>

    <para>Further these ioctls can be used to determine the number of
 buffers used internally by a driver in read/write mode. For
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -221,8 +221,8 @@ branches.  These different branches are:
  - main 2.6.x kernel tree
  - 2.6.x.y -stable kernel tree
  - 2.6.x -git kernel patches
-  - 2.6.x -mm kernel patches
  - subsystem specific kernel trees and patches
+  - the 2.6.x -next kernel tree for integration tests

 2.6.x kernel tree
 -----------------
@@ -232,7 +232,7 @@ process is as follows:
  - As soon as a new kernel is released a two weeks window is open,
    during this period of time maintainers can submit big diffs to
    Linus, usually the patches that have already been included in the
-    -mm kernel for a few weeks.  The preferred way to submit big changes
+    -next kernel for a few weeks.  The preferred way to submit big changes
    is using git (the kernel's source management tool, more information
    can be found at http://git.or.cz/) but plain patches are also just
    fine.
@@ -293,84 +293,43 @@ daily and represent the current state of Linus' tree.  They are more
 experimental than -rc kernels since they are generated automatically
 without even a cursory glance to see if they are sane.

-2.6.x -mm kernel patches
------------------------
-These are experimental kernel patches released by Andrew Morton.  Andrew
-takes all of the different subsystem kernel trees and patches and mushes
-them together, along with a lot of patches that have been plucked from
-the linux-kernel mailing list.  This tree serves as a proving ground for
-new features and patches.  Once a patch has proved its worth in -mm for
-a while Andrew or the subsystem maintainer pushes it on to Linus for
-inclusion in mainline.
-
-It is heavily encouraged that all new patches get tested in the -mm tree
-before they are sent to Linus for inclusion in the main kernel tree.  Code
-which does not make an appearance in -mm before the opening of the merge
-window will prove hard to merge into the mainline.
-
-These kernels are not appropriate for use on systems that are supposed
-to be stable and they are more risky to run than any of the other
-branches.
-
-If you wish to help out with the kernel development process, please test
-and use these kernel releases and provide feedback to the linux-kernel
-mailing list if you have any problems, and if everything works properly.
-
-In addition to all the other experimental patches, these kernels usually
-also contain any changes in the mainline -git kernels available at the
-time of release.
-
-The -mm kernels are not released on a fixed schedule, but usually a few
-mm kernels are released in between each -rc kernel (1 to 3 is common).
-
 Subsystem Specific kernel trees and patches
 -------------------------------------------
-A number of the different kernel subsystem developers expose their
-development trees so that others can see what is happening in the
-different areas of the kernel.  These trees are pulled into the -mm
-kernel releases as described above.
+The maintainers of the various kernel subsystems --- and also many
+kernel subsystem developers --- expose their current state of
+development in source repositories.  That way, others can see what is
+happening in the different areas of the kernel.  In areas where
+development is rapid, a developer may be asked to base his submissions
+onto such a subsystem kernel tree so that conflicts between the
+submission and other already ongoing work are avoided.

-Here is a list of some of the different kernel trees available:
-  git trees:
-    - Kbuild development tree, Sam Ravnborg <sam@ravnborg.org>
-	git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
+Most of these repositories are git trees, but there are also other SCMs
+in use, or patch queues being published as quilt series.  Addresses of
+these subsystem repositories are listed in the MAINTAINERS file.  Many
+of them can be browsed at http://git.kernel.org/.

-    - ACPI development tree, Len Brown <len.brown@intel.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
+Before a proposed patch is committed to such a subsystem tree, it is
+subject to review which primarily happens on mailing lists (see the
+respective section below).  For several kernel subsystems, this review
+process is tracked with the tool patchwork.  Patchwork offers a web
+interface which shows patch postings, any comments on a patch or
+revisions to it, and maintainers can mark patches as under review,
+accepted, or rejected.  Most of these patchwork sites are listed at
+http://patchwork.kernel.org/ or http://patchwork.ozlabs.org/.

-    - Block development tree, Jens Axboe <jens.axboe@oracle.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
+2.6.x -next kernel tree for integration tests
+---------------------------------------------
+Before updates from subsystem trees are merged into the mainline 2.6.x
+tree, they need to be integration-tested.  For this purpose, a special
+testing repository exists into which virtually all subsystem trees are
+pulled on an almost daily basis:
+	http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+	http://linux.f-seidel.de/linux-next/pmwiki/

-    - DRM development tree, Dave Airlie <airlied@linux.ie>
-	git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
+This way, the -next kernel gives a summary outlook onto what will be
+expected to go into the mainline kernel at the next merge period.
+Adventurous testers are very welcome to runtime-test the -next kernel.

-    - ia64 development tree, Tony Luck <tony.luck@intel.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
-
-    - infiniband, Roland Dreier <rolandd@cisco.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
-
-    - libata, Jeff Garzik <jgarzik@pobox.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
-
-    - network drivers, Jeff Garzik <jgarzik@pobox.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
-
-    - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
-	git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
-
-    - SCSI, James Bottomley <James.Bottomley@hansenpartnership.com>
-	git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
-
-    - x86, Ingo Molnar <mingo@elte.hu>
-	git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
-
-  quilt trees:
-    - USB, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
-	kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
-
-  Other kernel trees can be found listed at http://git.kernel.org/ and in
-  the MAINTAINERS file.

 Bug Reporting
 -------------
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -365,6 +365,7 @@ You can change this at module load time (for a module) with:
       regshifts=<shift1>,<shift2>,...
       slave_addrs=<addr1>,<addr2>,...
       force_kipmid=<enable1>,<enable2>,...
+       kipmid_max_busy_us=<ustime1>,<ustime2>,...
       unload_when_empty=[0|1]

 Each of these except si_trydefaults is a list, the first item for the
@@ -433,6 +434,7 @@ kernel command line as:
       ipmi_si.regshifts=<shift1>,<shift2>,...
       ipmi_si.slave_addrs=<addr1>,<addr2>,...
       ipmi_si.force_kipmid=<enable1>,<enable2>,...
+       ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,...

 It works the same as the module parameters of the same names.

@@ -450,6 +452,16 @@ force this thread on or off.  If you force it off and don't have
 interrupts, the driver will run VERY slowly.  Don't blame me,
 these interfaces suck.

+Unfortunately, this thread can use a lot of CPU depending on the
+interface's performance.  This can waste a lot of CPU and cause
+various issues with detecting idle CPU and using extra power.  To
+avoid this, the kipmid_max_busy_us sets the maximum amount of time, in
+microseconds, that kipmid will spin before sleeping for a tick.  This
+value sets a balance between performance and CPU waste and needs to be
+tuned to your needs.  Maybe, someday, auto-tuning will be added, but
+that's not a simple thing and even the auto-tuning would need to be
+tuned to the user's desired performance.
+
 The driver supports a hot add and remove of interfaces.  This way,
 interfaces can be added or removed after the kernel is up and running.
 This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -1,3 +1,3 @@
 obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
-	filesystems/configfs/ ia64/ networking/ \
-	pcmcia/ spi/ video4linux/ vm/ watchdog/src/
+	filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
+	pcmcia/ spi/ timers/ video4linux/ vm/ watchdog/src/
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -9,10 +9,14 @@ Documentation/SubmittingPatches and elsewhere regarding submitting Linux
 kernel patches.


-1: Builds cleanly with applicable or modified CONFIG options =y, =m, and
+1: If you use a facility then #include the file that defines/declares
+   that facility.  Don't depend on other header files pulling in ones
+   that you use.
+
+2: Builds cleanly with applicable or modified CONFIG options =y, =m, and
   =n.  No gcc warnings/errors, no linker warnings/errors.

-2: Passes allnoconfig, allmodconfig
+2b: Passes allnoconfig, allmodconfig

 3: Builds on multiple CPU architectures by using local cross-compile tools
   or some other build farm.
--- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
+++ b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
@@ -14,8 +14,8 @@ Introduction
 how the clocks are arranged. The first implementation used as single
 PLL to feed the ARM, memory and peripherals via a series of dividers
 and muxes and this is the implementation that is documented here. A
- newer version where there is a seperate PLL and clock divider for the
- ARM core is available as a seperate driver.
+ newer version where there is a separate PLL and clock divider for the
+ ARM core is available as a separate driver.


 Layout
--- a/Documentation/arm/Samsung/Overview.txt
+++ b/Documentation/arm/Samsung/Overview.txt
@@ -0,0 +1,86 @@
+		Samsung ARM Linux Overview
+		==========================
+
+Introduction
+------------
+
+  The Samsung range of ARM SoCs spans many similar devices, from the initial
+  ARM9 through to the newest ARM cores. This document shows an overview of
+  the current kernel support, how to use it and where to find the code
+  that supports this.
+
+  The currently supported SoCs are:
+
+  - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list
+  - S3C64XX: S3C6400 and S3C6410
+  - S5PC6440
+
+  S5PC100 and S5PC110 support is currently being merged
+
+
+S3C24XX Systems
+---------------
+
+  There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
+  deals with the architecture and drivers specific to these devices.
+
+  See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information
+  on the implementation details and specific support.
+
+
+Configuration
+-------------
+
+  A number of configurations are supplied, as there is no current way of
+  unifying all the SoCs into one kernel.
+
+  s5p6440_defconfig - S5P6440 specific default configuration
+  s5pc100_defconfig - S5PC100 specific default configuration
+
+
+Layout
+------
+
+  The directory layout is currently being restructured, and consists of
+  several platform directories and then the machine specific directories
+  of the CPUs being built for.
+
+  plat-samsung provides the base for all the implementations, and is the
+  last in the line of include directories that are processed for the build
+  specific information. It contains the base clock, GPIO and device definitions
+  to get the system running.
+
+  plat-s3c is the s3c24xx/s3c64xx platform directory, although it is currently
+  involved in other builds this will be phased out once the relevant code is
+  moved elsewhere.
+
+  plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
+
+  plat-s3c64xx is for the s3c64xx specific bits, see the S3C24XX docs.
+
+  plat-s5p is for s5p specific builds, more to be added.
+
+
+  [ to finish ]
+
+
+Port Contributors
+-----------------
+
+  Ben Dooks (BJD)
+  Vincent Sanders
+  Herbert Potzl
+  Arnaud Patard (RTP)
+  Roc Wu
+  Klaus Fetscher
+  Dimitry Andric
+  Shannon Holland
+  Guillaume Gourat (NexVision)
+  Christer Weinigel (wingel) (Acer N30)
+  Lucas Correia Villa Real (S3C2400 port)
+
+
+Document Author
+---------------
+
+Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
--- a/Documentation/arm/Samsung/clksrc-change-registers.awk
+++ b/Documentation/arm/Samsung/clksrc-change-registers.awk
@@ -0,0 +1,167 @@
+#!/usr/bin/awk -f
+#
+# Copyright 2010 Ben Dooks <ben-linux@fluff.org>
+#
+# Released under GPLv2
+
+# example usage
+# ./clksrc-change-registers.awk arch/arm/plat-s5pc1xx/include/plat/regs-clock.h < src > dst
+
+function extract_value(s)
+{
+    eqat = index(s, "=")
+    comat = index(s, ",")
+    return substr(s, eqat+2, (comat-eqat)-2)
+}
+
+function remove_brackets(b)
+{
+    return substr(b, 2, length(b)-2)
+}
+
+function splitdefine(l, p)
+{
+    r = split(l, tp)
+
+    p[0] = tp[2]
+    p[1] = remove_brackets(tp[3])
+}
+
+function find_length(f)
+{
+    if (0)
+	printf "find_length " f "\n" > "/dev/stderr"
+
+    if (f ~ /0x1/)
+	return 1
+    else if (f ~ /0x3/)
+	return 2
+    else if (f ~ /0x7/)
+	return 3
+    else if (f ~ /0xf/)
+	return 4
+
+    printf "unknown legnth " f "\n" > "/dev/stderr"
+    exit
+}
+
+function find_shift(s)
+{
+    id = index(s, "<")
+    if (id <= 0) {
+	printf "cannot find shift " s "\n" > "/dev/stderr"
+	exit
+    }
+
+    return substr(s, id+2)
+}
+
+
+BEGIN {
+    if (ARGC < 2) {
+	print "too few arguments" > "/dev/stderr"
+	exit
+    }
+
+# read the header file and find the mask values that we will need
+# to replace and create an associative array of values
+
+    while (getline line < ARGV[1] > 0) {
+	if (line ~ /\#define.*_MASK/ &&
+	    !(line ~ /S5PC100_EPLL_MASK/) &&
+	    !(line ~ /USB_SIG_MASK/)) {
+	    splitdefine(line, fields)
+	    name = fields[0]
+	    if (0)
+		printf "MASK " line "\n" > "/dev/stderr"
+	    dmask[name,0] = find_length(fields[1])
+	    dmask[name,1] = find_shift(fields[1])
+	    if (0)
+		printf "=> '" name "' LENGTH=" dmask[name,0] " SHIFT=" dmask[name,1] "\n" > "/dev/stderr"
+	} else {
+	}
+    }
+
+    delete ARGV[1]
+}
+
+/clksrc_clk.*=.*{/ {
+    shift=""
+    mask=""
+    divshift=""
+    reg_div=""
+    reg_src=""
+    indent=1
+
+    print $0
+
+    for(; indent >= 1;) {
+	if ((getline line) <= 0) {
+	    printf "unexpected end of file" > "/dev/stderr"
+	    exit 1;
+	}
+
+	if (line ~ /\.shift/) {
+	    shift = extract_value(line)
+	} else if (line ~ /\.mask/) {
+	    mask = extract_value(line)
+	} else if (line ~ /\.reg_divider/) {
+	    reg_div = extract_value(line)
+	} else if (line ~ /\.reg_source/) {
+	    reg_src = extract_value(line)
+	} else if (line ~ /\.divider_shift/) {
+	    divshift = extract_value(line)
+	} else if (line ~ /{/) {
+		indent++
+		print line
+	    } else if (line ~ /}/) {
+	    indent--
+
+	    if (indent == 0) {
+		if (0) {
+		    printf "shift '" shift   "' ='" dmask[shift,0] "'\n" > "/dev/stderr"
+		    printf "mask  '" mask    "'\n" > "/dev/stderr"
+		    printf "dshft '" divshift "'\n" > "/dev/stderr"
+		    printf "rdiv  '" reg_div "'\n" > "/dev/stderr"
+		    printf "rsrc  '" reg_src "'\n" > "/dev/stderr"
+		}
+
+		generated = mask
+		sub(reg_src, reg_div, generated)
+
+		if (0) {
+		    printf "/* rsrc " reg_src " */\n"
+		    printf "/* rdiv " reg_div " */\n"
+		    printf "/* shift " shift " */\n"
+		    printf "/* mask " mask " */\n"
+		    printf "/* generated " generated " */\n"
+		}
+
+		if (reg_div != "") {
+		    printf "\t.reg_div = { "
+		    printf ".reg = " reg_div ", "
+		    printf ".shift = " dmask[generated,1] ", "
+		    printf ".size = " dmask[generated,0] ", "
+		    printf "},\n"
+		}
+
+		printf "\t.reg_src = { "
+		printf ".reg = " reg_src ", "
+		printf ".shift = " dmask[mask,1] ", "
+		printf ".size = " dmask[mask,0] ", "
+
+		printf "},\n"
+
+	    }
+
+	    print line
+	} else {
+	    print line
+	}
+
+	if (0)
+	    printf indent ":" line "\n" > "/dev/stderr"
+    }
+}
+
+// && ! /clksrc_clk.*=.*{/ { print $0 }
--- a/Documentation/cdrom/ide-cd
+++ b/Documentation/cdrom/ide-cd
@@ -159,42 +159,7 @@ two arguments:  the CDROM device, and the slot number to which you wish
 to change.  If the slot number is -1, the drive is unloaded.


-4. Compilation options
----------------------
-
-There are a few additional options which can be set when compiling the
-driver.  Most people should not need to mess with any of these; they
-are listed here simply for completeness.  A compilation option can be
-enabled by adding a line of the form `#define <option> 1' to the top
-of ide-cd.c.  All these options are disabled by default.
-
-VERBOSE_IDE_CD_ERRORS
-  If this is set, ATAPI error codes will be translated into textual
-  descriptions.  In addition, a dump is made of the command which
-  provoked the error.  This is off by default to save the memory used
-  by the (somewhat long) table of error descriptions.  
-
-STANDARD_ATAPI
-  If this is set, the code needed to deal with certain drives which do
-  not properly implement the ATAPI spec will be disabled.  If you know
-  your drive implements ATAPI properly, you can turn this on to get a
-  slightly smaller kernel.
-
-NO_DOOR_LOCKING
-  If this is set, the driver will never attempt to lock the door of
-  the drive.
-
-CDROM_NBLOCKS_BUFFER
-  This sets the size of the buffer to be used for a CDROMREADAUDIO
-  ioctl.  The default is 8.
-
-TEST
-  This currently enables an additional ioctl which enables a user-mode
-  program to execute an arbitrary packet command.  See the source for
-  details.  This should be left off unless you know what you're doing.
-
-
-5. Common problems
+4. Common problems
 ------------------

 This section discusses some common problems encountered when trying to
@@ -371,7 +336,7 @@ f. Data corruption.
    expense of low system performance.


-6. cdchange.c
+5. cdchange.c
 -------------

 /*
--- a/Documentation/cgroups/cgroup_event_listener.c
+++ b/Documentation/cgroups/cgroup_event_listener.c
@@ -0,0 +1,110 @@
+/*
+ * cgroup_event_listener.c - Simple listener of cgroup events
+ *
+ * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+
+#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>\n"
+
+int main(int argc, char **argv)
+{
+	int efd = -1;
+	int cfd = -1;
+	int event_control = -1;
+	char event_control_path[PATH_MAX];
+	char line[LINE_MAX];
+	int ret;
+
+	if (argc != 3) {
+		fputs(USAGE_STR, stderr);
+		return 1;
+	}
+
+	cfd = open(argv[1], O_RDONLY);
+	if (cfd == -1) {
+		fprintf(stderr, "Cannot open %s: %s\n", argv[1],
+				strerror(errno));
+		goto out;
+	}
+
+	ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
+			dirname(argv[1]));
+	if (ret >= PATH_MAX) {
+		fputs("Path to cgroup.event_control is too long\n", stderr);
+		goto out;
+	}
+
+	event_control = open(event_control_path, O_WRONLY);
+	if (event_control == -1) {
+		fprintf(stderr, "Cannot open %s: %s\n", event_control_path,
+				strerror(errno));
+		goto out;
+	}
+
+	efd = eventfd(0, 0);
+	if (efd == -1) {
+		perror("eventfd() failed");
+		goto out;
+	}
+
+	ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
+	if (ret >= LINE_MAX) {
+		fputs("Arguments string is too long\n", stderr);
+		goto out;
+	}
+
+	ret = write(event_control, line, strlen(line) + 1);
+	if (ret == -1) {
+		perror("Cannot write to cgroup.event_control");
+		goto out;
+	}
+
+	while (1) {
+		uint64_t result;
+
+		ret = read(efd, &result, sizeof(result));
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			perror("Cannot read from eventfd");
+			break;
+		}
+		assert(ret == sizeof(result));
+
+		ret = access(event_control_path, W_OK);
+		if ((ret == -1) && (errno == ENOENT)) {
+				puts("The cgroup seems to have removed.");
+				ret = 0;
+				break;
+		}
+
+		if (ret == -1) {
+			perror("cgroup.event_control "
+					"is not accessable any more");
+			break;
+		}
+
+		printf("%s %s: crossed\n", argv[1], argv[2]);
+	}
+
+out:
+	if (efd >= 0)
+		close(efd);
+	if (event_control >= 0)
+		close(event_control);
+	if (cfd >= 0)
+		close(cfd);
+
+	return (ret != 0);
+}
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -22,6 +22,8 @@ CONTENTS:
 2. Usage Examples and Syntax
  2.1 Basic Usage
  2.2 Attaching processes
+  2.3 Mounting hierarchies by name
+  2.4 Notification API
 3. Kernel API
  3.1 Overview
  3.2 Synchronization
@@ -434,6 +436,25 @@ you give a subsystem a name.
 The name of the subsystem appears as part of the hierarchy description
 in /proc/mounts and /proc/<pid>/cgroups.

+2.4 Notification API
+--------------------
+
+There is mechanism which allows to get notifications about changing
+status of a cgroup.
+
+To register new notification handler you need:
+ - create a file descriptor for event notification using eventfd(2);
+ - open a control file to be monitored (e.g. memory.usage_in_bytes);
+ - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
+   Interpretation of args is defined by control file implementation;
+
+eventfd will be woken up by control file implementation or when the
+cgroup is removed.
+
+To unregister notification handler just close eventfd.
+
+NOTE: Support of notifications should be implemented for the control
+file. See documentation for the subsystem.

 3. Kernel API
 =============
@@ -488,6 +509,11 @@ Each subsystem should:
 - add an entry in linux/cgroup_subsys.h
 - define a cgroup_subsys object called <name>_subsys

+If a subsystem can be compiled as a module, it should also have in its
+module initcall a call to cgroup_load_subsys(), and in its exitcall a
+call to cgroup_unload_subsys(). It should also set its_subsys.module =
+THIS_MODULE in its .c file.
+
 Each subsystem may export the following methods. The only mandatory
 methods are create/destroy. Any others that are null are presumed to
 be successful no-ops.
@@ -536,10 +562,21 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex. If threadgroup is
+remain valid while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future. If threadgroup is
 true, then a successful result indicates that all threads in the given
 thread's threadgroup can be moved together.

+void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+	       struct task_struct *task, bool threadgroup)
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsytem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded.
+
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	    struct cgroup *old_cgrp, struct task_struct *task,
 	    bool threadgroup)
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -168,20 +168,20 @@ Each cpuset is represented by a directory in the cgroup file system
 containing (on top of the standard cgroup files) the following
 files describing that cpuset:

- - cpus: list of CPUs in that cpuset
- - mems: list of Memory Nodes in that cpuset
- - memory_migrate flag: if set, move pages to cpusets nodes
- - cpu_exclusive flag: is cpu placement exclusive?
- - mem_exclusive flag: is memory placement exclusive?
- - mem_hardwall flag:  is memory allocation hardwalled
- - memory_pressure: measure of how much paging pressure in cpuset
- - memory_spread_page flag: if set, spread page cache evenly on allowed nodes
- - memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
- - sched_load_balance flag: if set, load balance within CPUs on that cpuset
- - sched_relax_domain_level: the searching range when migrating tasks
+ - cpuset.cpus: list of CPUs in that cpuset
+ - cpuset.mems: list of Memory Nodes in that cpuset
+ - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
+ - cpuset.cpu_exclusive flag: is cpu placement exclusive?
+ - cpuset.mem_exclusive flag: is memory placement exclusive?
+ - cpuset.mem_hardwall flag:  is memory allocation hardwalled
+ - cpuset.memory_pressure: measure of how much paging pressure in cpuset
+ - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
+ - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
+ - cpuset.sched_relax_domain_level: the searching range when migrating tasks

 In addition, the root cpuset only has the following file:
- - memory_pressure_enabled flag: compute memory_pressure?
+ - cpuset.memory_pressure_enabled flag: compute memory_pressure?

 New cpusets are created using the mkdir system call or shell
 command.  The properties of a cpuset, such as its flags, allowed
@@ -229,7 +229,7 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than
 a direct ancestor or descendant, may share any of the same CPUs or
 Memory Nodes.

-A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
+A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
 i.e. it restricts kernel allocations for page, buffer and other data
 commonly shared by the kernel across multiple users.  All cpusets,
 whether hardwalled or not, restrict allocations of memory for user
@@ -304,15 +304,15 @@ times 1000.
 ---------------------------
 There are two boolean flag files per cpuset that control where the
 kernel allocates pages for the file system buffers and related in
-kernel data structures.  They are called 'memory_spread_page' and
-'memory_spread_slab'.
+kernel data structures.  They are called 'cpuset.memory_spread_page' and
+'cpuset.memory_spread_slab'.

-If the per-cpuset boolean flag file 'memory_spread_page' is set, then
+If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
 the kernel will spread the file system buffers (page cache) evenly
 over all the nodes that the faulting task is allowed to use, instead
 of preferring to put those pages on the node where the task is running.

-If the per-cpuset boolean flag file 'memory_spread_slab' is set,
+If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
 then the kernel will spread some file system related slab caches,
 such as for inodes and dentries evenly over all the nodes that the
 faulting task is allowed to use, instead of preferring to put those
@@ -337,21 +337,21 @@ their containing tasks memory spread settings.  If memory spreading
 is turned off, then the currently specified NUMA mempolicy once again
 applies to memory page allocations.

-Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag
+Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
 files.  By default they contain "0", meaning that the feature is off
 for that cpuset.  If a "1" is written to that file, then that turns
 the named feature on.

 The implementation is simple.

-Setting the flag 'memory_spread_page' turns on a per-process flag
+Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
 PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
 joins that cpuset.  The page allocation calls for the page cache
 is modified to perform an inline check for this PF_SPREAD_PAGE task
 flag, and if set, a call to a new routine cpuset_mem_spread_node()
 returns the node to prefer for the allocation.

-Similarly, setting 'memory_spread_slab' turns on the flag
+Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
 PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
 pages from the node returned by cpuset_mem_spread_node().

@@ -404,24 +404,24 @@ the following two situations:
    system overhead on those CPUs, including avoiding task load
    balancing if that is not needed.

-When the per-cpuset flag "sched_load_balance" is enabled (the default
-setting), it requests that all the CPUs in that cpusets allowed 'cpus'
+When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
 be contained in a single sched domain, ensuring that load balancing
 can move a task (not otherwised pinned, as by sched_setaffinity)
 from any CPU in that cpuset to any other.

-When the per-cpuset flag "sched_load_balance" is disabled, then the
+When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
 scheduler will avoid load balancing across the CPUs in that cpuset,
 --except-- in so far as is necessary because some overlapping cpuset
 has "sched_load_balance" enabled.

-So, for example, if the top cpuset has the flag "sched_load_balance"
+So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
 enabled, then the scheduler will have one sched domain covering all
-CPUs, and the setting of the "sched_load_balance" flag in any other
+CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
 cpusets won't matter, as we're already fully load balancing.

 Therefore in the above two situations, the top cpuset flag
-"sched_load_balance" should be disabled, and only some of the smaller,
+"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
 child cpusets have this flag enabled.

 When doing this, you don't usually want to leave any unpinned tasks in
@@ -433,7 +433,7 @@ scheduler might not consider the possibility of load balancing that
 task to that underused CPU.

 Of course, tasks pinned to a particular CPU can be left in a cpuset
-that disables "sched_load_balance" as those tasks aren't going anywhere
+that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
 else anyway.

 There is an impedance mismatch here, between cpusets and sched domains.
@@ -443,19 +443,19 @@ overlap and each CPU is in at most one sched domain.
 It is necessary for sched domains to be flat because load balancing
 across partially overlapping sets of CPUs would risk unstable dynamics
 that would be beyond our understanding.  So if each of two partially
-overlapping cpusets enables the flag 'sched_load_balance', then we
+overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
 form a single sched domain that is a superset of both.  We won't move
 a task to a CPU outside it cpuset, but the scheduler load balancing
 code might waste some compute cycles considering that possibility.

 This mismatch is why there is not a simple one-to-one relation
-between which cpusets have the flag "sched_load_balance" enabled,
+between which cpusets have the flag "cpuset.sched_load_balance" enabled,
 and the sched domain configuration.  If a cpuset enables the flag, it
 will get balancing across all its CPUs, but if it disables the flag,
 it will only be assured of no load balancing if no other overlapping
 cpuset enables the flag.

-If two cpusets have partially overlapping 'cpus' allowed, and only
+If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
 one of them has this flag enabled, then the other may find its
 tasks only partially load balanced, just on the overlapping CPUs.
 This is just the general case of the top_cpuset example given a few
@@ -468,23 +468,23 @@ load balancing to the other CPUs.
 1.7.1 sched_load_balance implementation details.
 ------------------------------------------------

-The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary
+The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
 to most cpuset flags.)  When enabled for a cpuset, the kernel will
 ensure that it can load balance across all the CPUs in that cpuset
 (makes sure that all the CPUs in the cpus_allowed of that cpuset are
 in the same sched domain.)

-If two overlapping cpusets both have 'sched_load_balance' enabled,
+If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
 then they will be (must be) both in the same sched domain.

-If, as is the default, the top cpuset has 'sched_load_balance' enabled,
+If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
 then by the above that means there is a single sched domain covering
 the whole system, regardless of any other cpuset settings.

 The kernel commits to user space that it will avoid load balancing
 where it can.  It will pick as fine a granularity partition of sched
 domains as it can while still providing load balancing for any set
-of CPUs allowed to a cpuset having 'sched_load_balance' enabled.
+of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.

 The internal kernel cpuset to scheduler interface passes from the
 cpuset code to the scheduler code a partition of the load balanced
@@ -495,9 +495,9 @@ all the CPUs that must be load balanced.
 The cpuset code builds a new such partition and passes it to the
 scheduler sched domain setup code, to have the sched domains rebuilt
 as necessary, whenever:
- - the 'sched_load_balance' flag of a cpuset with non-empty CPUs changes,
+ - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
 - or CPUs come or go from a cpuset with this flag enabled,
- - or 'sched_relax_domain_level' value of a cpuset with non-empty CPUs
+ - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
   and with this flag enabled changes,
 - or a cpuset with non-empty CPUs and with this flag enabled is removed,
 - or a cpu is offlined/onlined.
@@ -542,7 +542,7 @@ As the result, task B on CPU X need to wait task A or wait load balance
 on the next tick.  For some applications in special situation, waiting
 1 tick may be too long.

-The 'sched_relax_domain_level' file allows you to request changing
+The 'cpuset.sched_relax_domain_level' file allows you to request changing
 this searching range as you like.  This file takes int value which
 indicates size of searching range in levels ideally as follows,
 otherwise initial value -1 that indicates the cpuset has no request.
@@ -559,8 +559,8 @@ The system default is architecture dependent.  The system default
 can be changed using the relax_domain_level= boot parameter.

 This file is per-cpuset and affect the sched domain where the cpuset
-belongs to.  Therefore if the flag 'sched_load_balance' of a cpuset
-is disabled, then 'sched_relax_domain_level' have no effect since
+belongs to.  Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
+is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
 there is no sched domain belonging the cpuset.

 If multiple cpusets are overlapping and hence they form a single sched
@@ -607,9 +607,9 @@ from one cpuset to another, then the kernel will adjust the tasks
 memory placement, as above, the next time that the kernel attempts
 to allocate a page of memory for that task.

-If a cpuset has its 'cpus' modified, then each task in that cpuset
+If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
 will have its allowed CPU placement changed immediately.  Similarly,
-if a tasks pid is written to another cpusets 'tasks' file, then its
+if a tasks pid is written to another cpusets 'cpuset.tasks' file, then its
 allowed CPU placement is changed immediately.  If such a task had been
 bound to some subset of its cpuset using the sched_setaffinity() call,
 the task will be allowed to run on any CPU allowed in its new cpuset,
@@ -622,8 +622,8 @@ and the processor placement is updated immediately.
 Normally, once a page is allocated (given a physical page
 of main memory) then that page stays on whatever node it
 was allocated, so long as it remains allocated, even if the
-cpusets memory placement policy 'mems' subsequently changes.
-If the cpuset flag file 'memory_migrate' is set true, then when
+cpusets memory placement policy 'cpuset.mems' subsequently changes.
+If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
 tasks are attached to that cpuset, any pages that task had
 allocated to it on nodes in its previous cpuset are migrated
 to the tasks new cpuset. The relative placement of the page within
@@ -631,12 +631,12 @@ the cpuset is preserved during these migration operations if possible.
 For example if the page was on the second valid node of the prior cpuset
 then the page will be placed on the second valid node of the new cpuset.

-Also if 'memory_migrate' is set true, then if that cpusets
-'mems' file is modified, pages allocated to tasks in that
-cpuset, that were on nodes in the previous setting of 'mems',
+Also if 'cpuset.memory_migrate' is set true, then if that cpusets
+'cpuset.mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'cpuset.mems',
 will be moved to nodes in the new setting of 'mems.'
 Pages that were not in the tasks prior cpuset, or in the cpusets
-prior 'mems' setting, will not be moved.
+prior 'cpuset.mems' setting, will not be moved.

 There is an exception to the above.  If hotplug functionality is used
 to remove all the CPUs that are currently assigned to a cpuset,
@@ -678,8 +678,8 @@ and then start a subshell 'sh' in that cpuset:
  cd /dev/cpuset
  mkdir Charlie
  cd Charlie
-  /bin/echo 2-3 > cpus
-  /bin/echo 1 > mems
+  /bin/echo 2-3 > cpuset.cpus
+  /bin/echo 1 > cpuset.mems
  /bin/echo $$ > tasks
  sh
  # The subshell 'sh' is now running in cpuset Charlie
@@ -725,10 +725,13 @@ Now you want to do something with this cpuset.

 In this directory you can find several files:
 # ls
-cpu_exclusive  memory_migrate      mems                      tasks
-cpus           memory_pressure     notify_on_release
-mem_exclusive  memory_spread_page  sched_load_balance
-mem_hardwall   memory_spread_slab  sched_relax_domain_level
+cpuset.cpu_exclusive       cpuset.memory_spread_slab
+cpuset.cpus                cpuset.mems
+cpuset.mem_exclusive       cpuset.sched_load_balance
+cpuset.mem_hardwall        cpuset.sched_relax_domain_level
+cpuset.memory_migrate      notify_on_release
+cpuset.memory_pressure     tasks
+cpuset.memory_spread_page

 Reading them will give you information about the state of this cpuset:
 the CPUs and Memory Nodes it can use, the processes that are using
@@ -736,13 +739,13 @@ it, its properties.  By writing to these files you can manipulate
 the cpuset.

 Set some flags:
-# /bin/echo 1 > cpu_exclusive
+# /bin/echo 1 > cpuset.cpu_exclusive

 Add some cpus:
-# /bin/echo 0-7 > cpus
+# /bin/echo 0-7 > cpuset.cpus

 Add some mems:
-# /bin/echo 0-7 > mems
+# /bin/echo 0-7 > cpuset.mems

 Now attach your shell to this cpuset:
 # /bin/echo $$ > tasks
@@ -774,28 +777,28 @@ echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
 This is the syntax to use when writing in the cpus or mems files
 in cpuset directories:

-# /bin/echo 1-4 > cpus		-> set cpus list to cpus 1,2,3,4
-# /bin/echo 1,2,3,4 > cpus	-> set cpus list to cpus 1,2,3,4
+# /bin/echo 1-4 > cpuset.cpus		-> set cpus list to cpus 1,2,3,4
+# /bin/echo 1,2,3,4 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4

 To add a CPU to a cpuset, write the new list of CPUs including the
 CPU to be added. To add 6 to the above cpuset:

-# /bin/echo 1-4,6 > cpus	-> set cpus list to cpus 1,2,3,4,6
+# /bin/echo 1-4,6 > cpuset.cpus	-> set cpus list to cpus 1,2,3,4,6

 Similarly to remove a CPU from a cpuset, write the new list of CPUs
 without the CPU to be removed.

 To remove all the CPUs:

-# /bin/echo "" > cpus		-> clear cpus list
+# /bin/echo "" > cpuset.cpus		-> clear cpus list

 2.3 Setting flags
 -----------------

 The syntax is very simple:

-# /bin/echo 1 > cpu_exclusive 	-> set flag 'cpu_exclusive'
-# /bin/echo 0 > cpu_exclusive 	-> unset flag 'cpu_exclusive'
+# /bin/echo 1 > cpuset.cpu_exclusive 	-> set flag 'cpuset.cpu_exclusive'
+# /bin/echo 0 > cpuset.cpu_exclusive 	-> unset flag 'cpuset.cpu_exclusive'

 2.4 Attaching processes
 -----------------------
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -1,6 +1,6 @@
 Memory Resource Controller(Memcg)  Implementation Memo.
-Last Updated: 2009/1/20
-Base Kernel Version: based on 2.6.29-rc2.
+Last Updated: 2010/2
+Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).

 Because VM is getting complex (one of reasons is memcg...), memcg's behavior
 is complex. This is a document for memcg's internal behavior.
@@ -337,7 +337,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	race and lock dependency with other cgroup subsystems.

 	example)
-	# mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices
+	# mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices

 	and do task move, mkdir, rmdir etc...under this.

@@ -348,7 +348,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.

 	For example, test like following is good.
 	(Shell-A)
-	# mount -t cgroup none /cgroup -t memory
+	# mount -t cgroup none /cgroup -o memory
 	# mkdir /cgroup/test
 	# echo 40M > /cgroup/test/memory.limit_in_bytes
 	# echo 0 > /cgroup/test/tasks
@@ -378,3 +378,42 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 	#echo 50M > memory.limit_in_bytes
 	#echo 50M > memory.memsw.limit_in_bytes
 	run 51M of malloc
+
+ 9.9 Move charges at task migration
+	Charges associated with a task can be moved along with task migration.
+
+	(Shell-A)
+	#mkdir /cgroup/A
+	#echo $$ >/cgroup/A/tasks
+	run some programs which uses some amount of memory in /cgroup/A.
+
+	(Shell-B)
+	#mkdir /cgroup/B
+	#echo 1 >/cgroup/B/memory.move_charge_at_immigrate
+	#echo "pid of the program running in group A" >/cgroup/B/tasks
+
+	You can see charges have been moved by reading *.usage_in_bytes or
+	memory.stat of both A and B.
+	See 8.2 of Documentation/cgroups/memory.txt to see what value should be
+	written to move_charge_at_immigrate.
+
+ 9.10 Memory thresholds
+	Memory controler implements memory thresholds using cgroups notification
+	API. You can use Documentation/cgroups/cgroup_event_listener.c to test
+	it.
+
+	(Shell-A) Create cgroup and run event listener
+	# mkdir /cgroup/A
+	# ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+
+	(Shell-B) Add task to cgroup and try to allocate and free memory
+	# echo $$ >/cgroup/A/tasks
+	# a="$(dd if=/dev/zero bs=1M count=10)"
+	# a=
+
+	You will see message from cgroup_event_listener every time you cross
+	the thresholds.
+
+	Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
+
+	It's good idea to test root cgroup as well.
--- a/Show More
+++ b/Show More