Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

2026-05-01 15:00:59 -07:00 · 2012-04-10 14:30:45 -04:00
parent 32ed53b83e f68e556e23
commit 06eb4eafbd
4047 changed files with 104278 additions and 50193 deletions
@@ -55,7 +55,7 @@ What:		/sys/bus/usb/drivers/usbtmc/devices/*/auto_abort
 Date:		August 2008
 Contact:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 Description:
-		This file determines if the the transaction of the USB TMC
+		This file determines if the transaction of the USB TMC
 		device is to be automatically aborted if there is any error.
 		For more details about this, please see the document,
 		"Universal Serial Bus Test and Measurement Class Specification
@@ -0,0 +1,16 @@
+What:		/sys/kernel/debug/olpc-ec/cmd
+Date:		Dec 2011
+KernelVersion:	3.4
+Contact:	devel@lists.laptop.org
+Description:
+
+A generic interface for executing OLPC Embedded Controller commands and
+reading their responses.
+
+To execute a command, write data with the format: CC:N A A A A
+CC is the (hex) command, N is the count of expected reply bytes, and A A A A
+are optional (hex) arguments.
+
+To read the response (if any), read from the generic node after executing
+a command. Hex reply bytes will be returned, *whether or not* they came from
+the immediately previous command.
@@ -0,0 +1,25 @@
+What:		/sys/block/dm-<num>/dm/name
+Date:		January 2009
+KernelVersion:	2.6.29
+Contact:	dm-devel@redhat.com
+Description:	Device-mapper device name.
+		Read-only string containing mapped device name.
+Users:		util-linux, device-mapper udev rules
+
+What:		/sys/block/dm-<num>/dm/uuid
+Date:		January 2009
+KernelVersion:	2.6.29
+Contact:	dm-devel@redhat.com
+Description:	Device-mapper device UUID.
+		Read-only string containing DM-UUID or empty string
+		if DM-UUID is not set.
+Users:		util-linux, device-mapper udev rules
+
+What:		/sys/block/dm-<num>/dm/suspended
+Date:		June 2009
+KernelVersion:	2.6.31
+Contact:	dm-devel@redhat.com
+Description:	Device-mapper device suspend state.
+		Contains the value 1 while the device is suspended.
+		Otherwise it contains 0. Read-only attribute.
+Users:		util-linux, device-mapper udev rules
@@ -0,0 +1,14 @@
+Where:		/sys/bus/event_source/devices/<dev>/format
+Date:		January 2012
+Kernel Version: 3.3
+Contact:	Jiri Olsa <jolsa@redhat.com>
+Description:
+		Attribute group to describe the magic bits that go into
+		perf_event_attr::config[012] for a particular pmu.
+		Each attribute of this group defines the 'hardware' bitmask
+		we want to export, so that userspace can deal with sane
+		name/value pairs.
+
+		Example: 'config1:1,6-10,44'
+		Defines contents of attribute that occupies bits 1,6-10,44 of
+		perf_event_attr::config1.
@@ -17,3 +17,21 @@ Description:	Some Samsung laptops have different "performance levels"
 		Specifically, not all support the "overclock" option,
 		and it's still unknown if this value even changes
 		anything, other than making the user feel a bit better.
+
+What:		/sys/devices/platform/samsung/battery_life_extender
+Date:		December 1, 2011
+KernelVersion:	3.3
+Contact:	Corentin Chary <corentin.chary@gmail.com>
+Description:	Max battery charge level can be modified, battery cycle
+		life can be extended by reducing the max battery charge
+		level.
+		0 means normal battery mode (100% charge)
+		1 means battery life extender mode (80% charge)
+
+What:		/sys/devices/platform/samsung/usb_charge
+Date:		December 1, 2011
+KernelVersion:	3.3
+Contact:	Corentin Chary <corentin.chary@gmail.com>
+Description:	Use your USB ports to charge devices, even
+		when your laptop is powered off.
+		1 means enabled, 0 means disabled.
@@ -1,3 +1,23 @@
+What:		/sys/firmware/acpi/bgrt/
+Date:		January 2012
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		The BGRT is an ACPI 5.0 feature that allows the OS
+		to obtain a copy of the firmware boot splash and
+		some associated metadata. This is intended to be used
+		by boot splash applications in order to interact with
+		the firmware boot splash in order to avoid jarring
+		transitions.
+
+		image: The image bitmap. Currently a 32-bit BMP.
+		status: 1 if the image is valid, 0 if firmware invalidated it.
+		type: 0 indicates image is in BMP format.
+		version: The version of the BGRT. Currently 1.
+		xoffset: The number of pixels between the left of the screen
+			 and the left edge of the image.
+		yoffset: The number of pixels between the top of the screen
+			 and the top edge of the image.
+
 What:		/sys/firmware/acpi/interrupts/
 Date:		February 2008
 Contact:	Len Brown <lenb@kernel.org>
@@ -793,6 +793,35 @@ own custom mode, or may have some other magic method for making indentation
 work correctly.


+		Chapter 19:  Inline assembly
+
+In architecture-specific code, you may need to use inline assembly to interface
+with CPU or platform functionality.  Don't hesitate to do so when necessary.
+However, don't use inline assembly gratuitously when C can do the job.  You can
+and should poke hardware from C when possible.
+
+Consider writing simple helper functions that wrap common bits of inline
+assembly, rather than repeatedly writing them with slight variations.  Remember
+that inline assembly can use C parameters.
+
+Large, non-trivial assembly functions should go in .S files, with corresponding
+C prototypes defined in C header files.  The C prototypes for assembly
+functions should use "asmlinkage".
+
+You may need to mark your asm statement as volatile, to prevent GCC from
+removing it if GCC doesn't notice any side effects.  You don't always need to
+do so, though, and doing so unnecessarily can limit optimization.
+
+When writing a single inline assembly statement containing multiple
+instructions, put each instruction on a separate line in a separate quoted
+string, and end each string except the last with \n\t to properly indent the
+next instruction in the assembly output:
+
+	asm ("magic %reg1, #42\n\t"
+	     "more_magic %reg2, %reg3"
+	     : /* outputs */ : /* inputs */ : /* clobbers */);
+
+

 		Appendix I: References

@@ -31,3 +31,21 @@ may be weakly ordered, that is that reads and writes may pass each other.
 Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING,
 those that do not will simply ignore the attribute and exhibit default
 behavior.
+
+DMA_ATTR_WRITE_COMBINE
+----------------------
+
+DMA_ATTR_WRITE_COMBINE specifies that writes to the mapping may be
+buffered to improve performance.
+
+Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_NON_CONSISTENT
+-----------------------
+
+DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
+consistent or non-consistent memory as it sees fit.  By using this API,
+you are guaranteeing to the platform that you have all the correct and
+necessary sync points for this memory in the driver.
@@ -446,4 +446,21 @@ X!Idrivers/video/console/fonts.c
 !Edrivers/i2c/i2c-core.c
  </chapter>

+  <chapter id="hsi">
+     <title>High Speed Synchronous Serial Interface (HSI)</title>
+
+     <para>
+	High Speed Synchronous Serial Interface (HSI) is a
+	serial interface mainly used for connecting application
+	engines (APE) with cellular modem engines (CMT) in cellular
+	handsets.
+
+	HSI provides multiplexing for up to 16 logical channels,
+	low-latency and full duplex communication.
+     </para>
+
+!Iinclude/linux/hsi/hsi.h
+!Edrivers/hsi/hsi.c
+  </chapter>
+
 </book>
@@ -1,3 +1,3 @@
 obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
 	filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
-	pcmcia/ spi/ timers/ vm/ watchdog/src/
+	pcmcia/ spi/ timers/ watchdog/src/
@@ -53,6 +53,14 @@ directory apei/einj. The following files are provided.
  This file is used to set the second error parameter value. Effect of
  parameter depends on error_type specified.

+- notrigger
+  The EINJ mechanism is a two step process. First inject the error, then
+  perform some actions to trigger it. Setting "notrigger" to 1 skips the
+  trigger phase, which *may* allow the user to cause the error in some other
+  context by a simple access to the cpu, memory location, or device that is
+  the target of the error injection. Whether this actually works depends
+  on what operations the BIOS actually includes in the trigger phase.
+
 BIOS versions based in the ACPI 4.0 specification have limited options
 to control where the errors are injected.  Your BIOS may support an
 extension (enabled with the param_extension=1 module parameter, or
@@ -35,7 +35,7 @@ CREATING DEVICE NODES
    sh Documentation/aoe/mkshelf.sh /dev/etherd 0

  There is also an autoload script that shows how to edit
-  /etc/modprobe.conf to ensure that the aoe module is loaded when
+  /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
  necessary.

 USING DEVICE NODES
@@ -1,8 +1,8 @@
 #!/bin/sh
 # set aoe to autoload by installing the
-# aliases in /etc/modprobe.conf
+# aliases in /etc/modprobe.d/

-f=/etc/modprobe.conf
+f=/etc/modprobe.d/aoe.conf

 if test ! -r $f || test ! -w $f; then
 	echo "cannot configure $f for module autoloading" 1>&2
@@ -49,7 +49,7 @@ you can put:

 options floppy omnibook messages

-in /etc/modprobe.conf.
+in a configuration file in /etc/modprobe.d/.


 The floppy driver related options are:
@@ -217,7 +217,7 @@ and name space for cpusets, with a minimum of additional kernel code.

 The cpus and mems files in the root (top_cpuset) cpuset are
 read-only.  The cpus file automatically tracks the value of
-cpu_online_map using a CPU hotplug notifier, and the mems file
+cpu_online_mask using a CPU hotplug notifier, and the mems file
 automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e.,
 nodes with memory--using the cpuset_track_online_nodes() hook.

@@ -0,0 +1,233 @@
+		The Common Clk Framework
+		Mike Turquette <mturquette@ti.com>
+
+This document endeavours to explain the common clk framework details,
+and how to port a platform over to this framework.  It is not yet a
+detailed explanation of the clock api in include/linux/clk.h, but
+perhaps someday it will include that information.
+
+	Part 1 - introduction and interface split
+
+The common clk framework is an interface to control the clock nodes
+available on various devices today.  This may come in the form of clock
+gating, rate adjustment, muxing or other operations.  This framework is
+enabled with the CONFIG_COMMON_CLK option.
+
+The interface itself is divided into two halves, each shielded from the
+details of its counterpart.  First is the common definition of struct
+clk which unifies the framework-level accounting and infrastructure that
+has traditionally been duplicated across a variety of platforms.  Second
+is a common implementation of the clk.h api, defined in
+drivers/clk/clk.c.  Finally there is struct clk_ops, whose operations
+are invoked by the clk api implementation.
+
+The second half of the interface is comprised of the hardware-specific
+callbacks registered with struct clk_ops and the corresponding
+hardware-specific structures needed to model a particular clock.  For
+the remainder of this document any reference to a callback in struct
+clk_ops, such as .enable or .set_rate, implies the hardware-specific
+implementation of that code.  Likewise, references to struct clk_foo
+serve as a convenient shorthand for the implementation of the
+hardware-specific bits for the hypothetical "foo" hardware.
+
+Tying the two halves of this interface together is struct clk_hw, which
+is defined in struct clk_foo and pointed to within struct clk.  This
+allows easy for navigation between the two discrete halves of the common
+clock interface.
+
+	Part 2 - common data structures and api
+
+Below is the common struct clk definition from
+include/linux/clk-private.h, modified for brevity:
+
+	struct clk {
+		const char		*name;
+		const struct clk_ops	*ops;
+		struct clk_hw		*hw;
+		char			**parent_names;
+		struct clk		**parents;
+		struct clk		*parent;
+		struct hlist_head	children;
+		struct hlist_node	child_node;
+		...
+	};
+
+The members above make up the core of the clk tree topology.  The clk
+api itself defines several driver-facing functions which operate on
+struct clk.  That api is documented in include/linux/clk.h.
+
+Platforms and devices utilizing the common struct clk use the struct
+clk_ops pointer in struct clk to perform the hardware-specific parts of
+the operations defined in clk.h:
+
+	struct clk_ops {
+		int		(*prepare)(struct clk_hw *hw);
+		void		(*unprepare)(struct clk_hw *hw);
+		int		(*enable)(struct clk_hw *hw);
+		void		(*disable)(struct clk_hw *hw);
+		int		(*is_enabled)(struct clk_hw *hw);
+		unsigned long	(*recalc_rate)(struct clk_hw *hw,
+						unsigned long parent_rate);
+		long		(*round_rate)(struct clk_hw *hw, unsigned long,
+						unsigned long *);
+		int		(*set_parent)(struct clk_hw *hw, u8 index);
+		u8		(*get_parent)(struct clk_hw *hw);
+		int		(*set_rate)(struct clk_hw *hw, unsigned long);
+		void		(*init)(struct clk_hw *hw);
+	};
+
+	Part 3 - hardware clk implementations
+
+The strength of the common struct clk comes from its .ops and .hw pointers
+which abstract the details of struct clk from the hardware-specific bits, and
+vice versa.  To illustrate consider the simple gateable clk implementation in
+drivers/clk/clk-gate.c:
+
+struct clk_gate {
+	struct clk_hw	hw;
+	void __iomem    *reg;
+	u8              bit_idx;
+	...
+};
+
+struct clk_gate contains struct clk_hw hw as well as hardware-specific
+knowledge about which register and bit controls this clk's gating.
+Nothing about clock topology or accounting, such as enable_count or
+notifier_count, is needed here.  That is all handled by the common
+framework code and struct clk.
+
+Let's walk through enabling this clk from driver code:
+
+	struct clk *clk;
+	clk = clk_get(NULL, "my_gateable_clk");
+
+	clk_prepare(clk);
+	clk_enable(clk);
+
+The call graph for clk_enable is very simple:
+
+clk_enable(clk);
+	clk->ops->enable(clk->hw);
+	[resolves to...]
+		clk_gate_enable(hw);
+		[resolves struct clk gate with to_clk_gate(hw)]
+			clk_gate_set_bit(gate);
+
+And the definition of clk_gate_set_bit:
+
+static void clk_gate_set_bit(struct clk_gate *gate)
+{
+	u32 reg;
+
+	reg = __raw_readl(gate->reg);
+	reg |= BIT(gate->bit_idx);
+	writel(reg, gate->reg);
+}
+
+Note that to_clk_gate is defined as:
+
+#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, clk)
+
+This pattern of abstraction is used for every clock hardware
+representation.
+
+	Part 4 - supporting your own clk hardware
+
+When implementing support for a new type of clock it only necessary to
+include the following header:
+
+#include <linux/clk-provider.h>
+
+include/linux/clk.h is included within that header and clk-private.h
+must never be included from the code which implements the operations for
+a clock.  More on that below in Part 5.
+
+To construct a clk hardware structure for your platform you must define
+the following:
+
+struct clk_foo {
+	struct clk_hw hw;
+	... hardware specific data goes here ...
+};
+
+To take advantage of your data you'll need to support valid operations
+for your clk:
+
+struct clk_ops clk_foo_ops {
+	.enable		= &clk_foo_enable;
+	.disable	= &clk_foo_disable;
+};
+
+Implement the above functions using container_of:
+
+#define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw)
+
+int clk_foo_enable(struct clk_hw *hw)
+{
+	struct clk_foo *foo;
+
+	foo = to_clk_foo(hw);
+
+	... perform magic on foo ...
+
+	return 0;
+};
+
+Below is a matrix detailing which clk_ops are mandatory based upon the
+hardware capbilities of that clock.  A cell marked as "y" means
+mandatory, a cell marked as "n" implies that either including that
+callback is invalid or otherwise uneccesary.  Empty cells are either
+optional or must be evaluated on a case-by-case basis.
+
+                           clock hardware characteristics
+	     -----------------------------------------------------------
+             | gate | change rate | single parent | multiplexer | root |
+             |------|-------------|---------------|-------------|------|
+.prepare     |      |             |               |             |      |
+.unprepare   |      |             |               |             |      |
+             |      |             |               |             |      |
+.enable      | y    |             |               |             |      |
+.disable     | y    |             |               |             |      |
+.is_enabled  | y    |             |               |             |      |
+             |      |             |               |             |      |
+.recalc_rate |      | y           |               |             |      |
+.round_rate  |      | y           |               |             |      |
+.set_rate    |      | y           |               |             |      |
+             |      |             |               |             |      |
+.set_parent  |      |             | n             | y           | n    |
+.get_parent  |      |             | n             | y           | n    |
+             |      |             |               |             |      |
+.init        |      |             |               |             |      |
+	     -----------------------------------------------------------
+
+Finally, register your clock at run-time with a hardware-specific
+registration function.  This function simply populates struct clk_foo's
+data and then passes the common struct clk parameters to the framework
+with a call to:
+
+clk_register(...)
+
+See the basic clock types in drivers/clk/clk-*.c for examples.
+
+	Part 5 - static initialization of clock data
+
+For platforms with many clocks (often numbering into the hundreds) it
+may be desirable to statically initialize some clock data.  This
+presents a problem since the definition of struct clk should be hidden
+from everyone except for the clock core in drivers/clk/clk.c.
+
+To get around this problem struct clk's definition is exposed in
+include/linux/clk-private.h along with some macros for more easily
+initializing instances of the basic clock types.  These clocks must
+still be initialized with the common clock framework via a call to
+__clk_init.
+
+clk-private.h must NEVER be included by code which implements struct
+clk_ops callbacks, nor must it be included by any logic which pokes
+around inside of struct clk at run-time.  To do so is a layering
+violation.
+
+To better enforce this policy, always follow this simple rule: any
+statically initialized clock data MUST be defined in a separate file
+from the logic that implements its ops.  Basically separate the logic
+from the data and all is well.
@@ -47,7 +47,7 @@ maxcpus=n    Restrict boot time cpus to n. Say if you have 4 cpus, using
             other cpus later online, read FAQ's for more info.

 additional_cpus=n (*)	Use this to limit hotpluggable cpus. This option sets
-  			cpu_possible_map = cpu_present_map + additional_cpus
+  			cpu_possible_mask = cpu_present_mask + additional_cpus

 cede_offline={"off","on"}  Use this option to disable/enable putting offlined
 		            processors to an extended H_CEDE state on
@@ -64,11 +64,11 @@ should only rely on this to count the # of cpus, but *MUST* not rely
 on the apicid values in those tables for disabled apics. In the event
 BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
 use this parameter "additional_cpus=x" to represent those cpus in the
-cpu_possible_map.
+cpu_possible_mask.

 possible_cpus=n		[s390,x86_64] use this to set hotpluggable cpus.
 			This option sets possible_cpus bits in
-			cpu_possible_map. Thus keeping the numbers of bits set
+			cpu_possible_mask. Thus keeping the numbers of bits set
 			constant even if the machine gets rebooted.

 CPU maps and such
@@ -76,7 +76,7 @@ CPU maps and such
 [More on cpumaps and primitive to manipulate, please check
 include/linux/cpumask.h that has more descriptive text.]

-cpu_possible_map: Bitmap of possible CPUs that can ever be available in the
+cpu_possible_mask: Bitmap of possible CPUs that can ever be available in the
 system. This is used to allocate some boot time memory for per_cpu variables
 that aren't designed to grow/shrink as CPUs are made available or removed.
 Once set during boot time discovery phase, the map is static, i.e no bits
@@ -84,13 +84,13 @@ are added or removed anytime.  Trimming it accurately for your system needs
 upfront can save some boot time memory. See below for how we use heuristics
 in x86_64 case to keep this under check.

-cpu_online_map: Bitmap of all CPUs currently online. Its set in __cpu_up()
+cpu_online_mask: Bitmap of all CPUs currently online. Its set in __cpu_up()
 after a cpu is available for kernel scheduling and ready to receive
 interrupts from devices. Its cleared when a cpu is brought down using
 __cpu_disable(), before which all OS services including interrupts are
 migrated to another target CPU.

-cpu_present_map: Bitmap of CPUs currently present in the system. Not all
+cpu_present_mask: Bitmap of CPUs currently present in the system. Not all
 of them may be online. When physical hotplug is processed by the relevant
 subsystem (e.g ACPI) can change and new bit either be added or removed
 from the map depending on the event is hot-add/hot-remove. There are currently
@@ -99,22 +99,22 @@ at which time hotplug is disabled.

 You really dont need to manipulate any of the system cpu maps. They should
 be read-only for most use. When setting up per-cpu resources almost always use
-cpu_possible_map/for_each_possible_cpu() to iterate.
+cpu_possible_mask/for_each_possible_cpu() to iterate.

 Never use anything other than cpumask_t to represent bitmap of CPUs.

 	#include <linux/cpumask.h>

-	for_each_possible_cpu     - Iterate over cpu_possible_map
-	for_each_online_cpu       - Iterate over cpu_online_map
-	for_each_present_cpu      - Iterate over cpu_present_map
+	for_each_possible_cpu     - Iterate over cpu_possible_mask
+	for_each_online_cpu       - Iterate over cpu_online_mask
+	for_each_present_cpu      - Iterate over cpu_present_mask
 	for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.

 	#include <linux/cpu.h>
 	get_online_cpus() and put_online_cpus():

 The above calls are used to inhibit cpu hotplug operations. While the
-cpu_hotplug.refcount is non zero, the cpu_online_map will not change.
+cpu_hotplug.refcount is non zero, the cpu_online_mask will not change.
 If you merely need to avoid cpus going away, you could also use
 preempt_disable() and preempt_enable() for those sections.
 Just remember the critical section cannot call any
@@ -36,6 +36,7 @@ drwxr-xr-x 2 root root 0 Feb  8 10:42 state3
 /sys/devices/system/cpu/cpu0/cpuidle/state0:
 total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
@@ -45,6 +46,7 @@ total 0
 /sys/devices/system/cpu/cpu0/cpuidle/state1:
 total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
@@ -54,6 +56,7 @@ total 0
 /sys/devices/system/cpu/cpu0/cpuidle/state2:
 total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
@@ -63,6 +66,7 @@ total 0
 /sys/devices/system/cpu/cpu0/cpuidle/state3:
 total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb  8 10:42 disable
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
@@ -72,6 +76,7 @@ total 0


 * desc : Small description about the idle state (string)
+* disable : Option to disable this idle state (bool)
 * latency : Latency to exit out of this idle state (in microseconds)
 * name : Name of the idle state (string)
 * power : Power consumed while in this idle state (in milliwatts)
@@ -75,10 +75,12 @@ less sharing than average you'll need a larger-than-average metadata device.

 As a guide, we suggest you calculate the number of bytes to use in the
 metadata device as 48 * $data_dev_size / $data_block_size but round it up
-to 2MB if the answer is smaller.  The largest size supported is 16GB.
+to 2MB if the answer is smaller.  If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.

-If you're creating large numbers of snapshots which are recording large
-amounts of change, you may need find you need to increase this.
+The largest size supported is 16GB: If the device is larger,
+a warning will be issued and the excess space will not be used.

 Reloading a pool table
 ----------------------
@@ -167,6 +169,38 @@ ii) Using an internal snapshot.

    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"

+External snapshots
+------------------
+
+You can use an external _read only_ device as an origin for a
+thinly-provisioned volume.  Any read to an unprovisioned area of the
+thin device will be passed through to the origin.  Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+  This is the same as creating a thin device.
+  You don't mention the origin at this stage.
+
+    dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+  Append an extra parameter to the thin target specifying the origin:
+
+    dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+  N.B. All descendants (internal snapshots) of this snapshot require the
+  same extra origin parameter.
+
 Deactivation
 ------------

@@ -189,7 +223,13 @@ i) Constructor
 	      <low water mark (blocks)> [<number of feature args> [<arg>]*]

    Optional feature arguments:
-    - 'skip_block_zeroing': skips the zeroing of newly-provisioned blocks.
+
+      skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
+
+      ignore_discard: Disable discard support.
+
+      no_discard_passdown: Don't pass discards down to the underlying
+			   data device, but just remove the mapping.

    Data block size must be between 64KB (128 sectors) and 1GB
    (2097152 sectors) inclusive.
@@ -237,16 +277,6 @@ iii) Messages

 	Deletes a thin device.  Irreversible.

-    trim <dev id> <new size in sectors>
-
-	Delete mappings from the end of a thin device.  Irreversible.
-	You might want to use this if you're reducing the size of
-	your thinly-provisioned device.  In many cases, due to the
-	sharing of blocks between devices, it is not possible to
-	determine in advance how much space 'trim' will release.  (In
-	future a userspace tool might be able to perform this
-	calculation.)
-
    set_transaction_id <current id> <new id>

 	Userland volume managers, such as LVM, need a way to
@@ -262,7 +292,7 @@ iii) Messages

 i) Constructor

-    thin <pool dev> <dev id>
+    thin <pool dev> <dev id> [<external origin dev>]

    pool dev:
 	the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
@@ -271,6 +301,11 @@ i) Constructor
 	the internal device identifier of the device to be
 	activated.

+    external origin dev:
+	an optional block device outside the pool to be treated as a
+	read-only snapshot origin: reads to unprovisioned areas of the
+	thin target will be mapped to this device.
+
 The pool doesn't store any size against the thin devices.  If you
 load a thin target that is smaller than you've been using previously,
 then you'll have no access to blocks mapped beyond the end.  If you
@@ -0,0 +1,194 @@
+dm-verity
+==========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+    <version> <dev> <hash_dev> <hash_start>
+    <data_block_size> <hash_block_size>
+    <num_data_blocks> <hash_start_block>
+    <algorithm> <digest> <salt>
+
+<version>
+    This is the version number of the on-disk format.
+
+    0 is the original format used in the Chromium OS.
+	The salt is appended when hashing, digests are stored continuously and
+	the rest of the block is padded with zeros.
+
+    1 is the current format that should be used for new devices.
+	The salt is prepended when hashing and each digest is
+	padded with zeros to the power of two.
+
+<dev>
+    This is the device containing the data the integrity of which needs to be
+    checked.  It may be specified as a path, like /dev/sdaX, or a device number,
+    <major>:<minor>.
+
+<hash_dev>
+    This is the device that that supplies the hash tree data.  It may be
+    specified similarly to the device path and may be the same device.  If the
+    same device is used, the hash_start should be outside of the dm-verity
+    configured device size.
+
+<data_block_size>
+    The block size on a data device.  Each block corresponds to one digest on
+    the hash device.
+
+<hash_block_size>
+    The size of a hash block.
+
+<num_data_blocks>
+    The number of data blocks on the data device.  Additional blocks are
+    inaccessible.  You can place hashes to the same partition as data, in this
+    case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+    This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+    to the root block of the hash tree.
+
+<algorithm>
+    The cryptographic hash algorithm used for this device.  This should
+    be the name of the algorithm, like "sha1".
+
+<digest>
+    The hexadecimal encoding of the cryptographic hash of the root hash block
+    and the salt.  This hash should be trusted as there is no other authenticity
+    beyond this point.
+
+<salt>
+    The hexadecimal encoding of the salt value.
+
+Theory of operation
+===================
+
+dm-verity is meant to be setup as part of a verified boot path.  This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access.  If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail.  This should identify
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis.  This allows for a lightweight hash computation on first read
+into the page cache.  Block hashes are stored linearly-aligned to the nearest
+block the size of a page.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash.  If it is a leaf node, the hash
+is of some block data on disk.  If it is an intermediary node, then the hash is
+of a number of child nodes.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block.  The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm.  The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+alg = sha256, num_blocks = 32768, block_size = 4096
+
+                                 [   root    ]
+                                /    . . .    \
+                     [entry_0]                 [entry_1]
+                    /  . . .  \                 . . .   \
+         [entry_0_0]   . . .  [entry_0_127]    . . . .  [entry_1_127]
+           / ... \             /   . . .  \             /           \
+     blk_0 ... blk_127  blk_16256   blk_16383      blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+Below is the recommended on-disk format. The verity kernel code does not
+read the on-disk header. It only reads the hash blocks which directly
+follow the header. It is expected that a user-space tool will verify the
+integrity of the verity_header and then call dmsetup with the correct
+parameters. Alternatively, the header can be omitted and the dmsetup
+parameters can be passed via the kernel command-line in a rooted chain
+of trust where the command-line is verified.
+
+The on-disk format is especially useful in cases where the hash blocks
+are on a separate partition. The magic number allows easy identification
+of the partition contents. Alternatively, the hash blocks can be stored
+in the same partition as the data to be verified. In such a configuration
+the filesystem on the partition would be sized a little smaller than
+the full-partition, leaving room for the hash blocks.
+
+struct superblock {
+	uint8_t signature[8]
+		"verity\0\0";
+
+	uint8_t version;
+		1 - current format
+
+	uint8_t data_block_bits;
+		log2(data block size)
+
+	uint8_t hash_block_bits;
+		log2(hash block size)
+
+	uint8_t pad1[1];
+		zero padding
+
+	uint16_t salt_size;
+		big-endian salt size
+
+	uint8_t pad2[2];
+		zero padding
+
+	uint32_t data_blocks_hi;
+		big-endian high 32 bits of the 64-bit number of data blocks
+
+	uint32_t data_blocks_lo;
+		big-endian low 32 bits of the 64-bit number of data blocks
+
+	uint8_t algorithm[16];
+		cryptographic algorithm
+
+	uint8_t salt[384];
+		salt (the salt size is specified above)
+
+	uint8_t pad3[88];
+		zero padding to 512-byte boundary
+}
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+
+Setup a device:
+  dmsetup create vroot --table \
+    "0 2097152 "\
+    "verity 1 /dev/sda1 /dev/sda2 4096 4096 2097152 1 "\
+    "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+    "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel driver.  This is available from
+the LVM2 upstream repository and may be supplied as a package called
+device-mapper-verity-tools:
+    git://sources.redhat.com/git/lvm2
+    http://sourceware.org/git/?p=lvm2.git
+    http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/verity?cvsroot=lvm2
+
+veritysetup -a vroot /dev/sda1 /dev/sda2 \
+	4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
--- a/Show More
+++ b/Show More