Merge branch 'linus' into x86/core

2026-05-01 15:00:59 -07:00 · 2009-03-28 22:27:45 +01:00
parent 93394a761d 7c730ccdc1
commit 3fab191002
3171 changed files with 253006 additions and 129563 deletions
@@ -0,0 +1,61 @@
+What:		security/ima/policy
+Date:		May 2008
+Contact:	Mimi Zohar <zohar@us.ibm.com>
+Description:
+		The Trusted Computing Group(TCG) runtime Integrity
+		Measurement Architecture(IMA) maintains a list of hash
+		values of executables and other sensitive system files
+		loaded into the run-time of this system.  At runtime,
+		the policy can be constrained based on LSM specific data.
+		Policies are loaded into the securityfs file ima/policy
+		by opening the file, writing the rules one at a time and
+		then closing the file.  The new policy takes effect after
+		the file ima/policy is closed.
+
+		rule format: action [condition ...]
+
+		action: measure | dont_measure
+		condition:= base | lsm
+			base:	[[func=] [mask=] [fsmagic=] [uid=]]
+			lsm:	[[subj_user=] [subj_role=] [subj_type=]
+				 [obj_user=] [obj_role=] [obj_type=]]
+
+		base: 	func:= [BPRM_CHECK][FILE_MMAP][INODE_PERMISSION]
+			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
+			fsmagic:= hex value
+			uid:= decimal value
+		lsm:  	are LSM specific
+
+		default policy:
+			# PROC_SUPER_MAGIC
+			dont_measure fsmagic=0x9fa0
+			# SYSFS_MAGIC
+			dont_measure fsmagic=0x62656572
+			# DEBUGFS_MAGIC
+			dont_measure fsmagic=0x64626720
+			# TMPFS_MAGIC
+			dont_measure fsmagic=0x01021994
+			# SECURITYFS_MAGIC
+			dont_measure fsmagic=0x73636673
+
+			measure func=BPRM_CHECK
+			measure func=FILE_MMAP mask=MAY_EXEC
+			measure func=INODE_PERM mask=MAY_READ uid=0
+
+		The default policy measures all executables in bprm_check,
+		all files mmapped executable in file_mmap, and all files
+		open for read by root in inode_permission.
+
+		Examples of LSM specific definitions:
+
+		SELinux:
+			# SELINUX_MAGIC
+			dont_measure fsmagic=0xF97CFF8C
+
+			dont_measure obj_type=var_log_t
+			dont_measure obj_type=auditd_log_t
+			measure subj_user=system_u func=INODE_PERM mask=MAY_READ
+			measure subj_role=system_r func=INODE_PERM mask=MAY_READ
+
+		Smack:
+			measure subj_user=_ func=INODE_PERM mask=MAY_READ
@@ -12,7 +12,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
 	    kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
-	    mac80211.xml debugobjects.xml sh.xml regulator.xml
+	    mac80211.xml debugobjects.xml sh.xml regulator.xml \
+	    alsa-driver-api.xml writing-an-alsa-driver.xml

 ###
 # The build process is as follows (targets):
@@ -1,11 +1,11 @@
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
-
-<book>
-<?dbhtml filename="index.html">
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>

 <!-- ****************************************************** -->
 <!-- Header  -->
 <!-- ****************************************************** -->
+<book id="ALSA-Driver-API">
  <bookinfo>
    <title>The ALSA Driver API</title>

@@ -35,6 +35,8 @@

  </bookinfo>

+<toc></toc>
+
  <chapter><title>Management of Cards and Devices</title>
     <sect1><title>Card Management</title>
 !Esound/core/init.c
@@ -71,6 +73,10 @@
 !Esound/pci/ac97/ac97_codec.c
 !Esound/pci/ac97/ac97_pcm.c
     </sect1>
+     <sect1><title>Virtual Master Control API</title>
+!Esound/core/vmaster.c
+!Iinclude/sound/control.h
+     </sect1>
  </chapter>
  <chapter><title>MIDI API</title>
     <sect1><title>Raw MIDI API</title>
@@ -88,6 +94,9 @@
  <chapter><title>Miscellaneous Functions</title>
     <sect1><title>Hardware-Dependent Devices API</title>
 !Esound/core/hwdep.c
+     </sect1>
+     <sect1><title>Jack Abstraction Layer API</title>
+!Esound/core/jack.c
     </sect1>
     <sect1><title>ISA DMA Helpers</title>
 !Esound/core/isadma.c
@@ -440,6 +440,7 @@ desc->chip->end();
     used in the generic IRQ layer.
     </para>
 !Iinclude/linux/irq.h
+!Iinclude/linux/interrupt.h
  </chapter>

  <chapter id="pubfunctions">
@@ -17,8 +17,7 @@
    </authorgroup>

    <copyright>
-      <year>2007</year>
-      <year>2008</year>
+      <year>2007-2009</year>
      <holder>Johannes Berg</holder>
    </copyright>

@@ -165,8 +164,8 @@ usage should require reading the full document.
 !Pinclude/net/mac80211.h Frame format
      </sect1>
      <sect1>
-        <title>Alignment issues</title>
-        <para>TBD</para>
+        <title>Packet alignment</title>
+!Pnet/mac80211/rx.c Packet alignment
      </sect1>
      <sect1>
        <title>Calling into mac80211 from interrupts</title>
@@ -223,6 +222,17 @@ usage should require reading the full document.
 !Finclude/net/mac80211.h ieee80211_key_flags
    </chapter>

+    <chapter id="powersave">
+      <title>Powersave support</title>
+!Pinclude/net/mac80211.h Powersave support
+    </chapter>
+
+    <chapter id="beacon-filter">
+      <title>Beacon filter support</title>
+!Pinclude/net/mac80211.h Beacon filter support
+!Finclude/net/mac80211.h ieee80211_beacon_loss
+    </chapter>
+
    <chapter id="qos">
      <title>Multiple queues and QoS support</title>
      <para>TBD</para>
@@ -41,6 +41,13 @@ GPL version 2.
 </abstract>

 <revhistory>
+	<revision>
+	<revnumber>0.8</revnumber>
+	<date>2008-12-24</date>
+	<authorinitials>hjk</authorinitials>
+	<revremark>Added name attributes in mem and portio sysfs directories.
+		</revremark>
+	</revision>
 	<revision>
 	<revnumber>0.7</revnumber>
 	<date>2008-12-23</date>
@@ -303,10 +310,17 @@ interested in translating it, please email me
 	appear if the size of the mapping is not 0.
 </para>
 <para>
-	Each <filename>mapX/</filename> directory contains two read-only files
-	that show start address and size of the memory:
+	Each <filename>mapX/</filename> directory contains four read-only files
+	that show attributes of the memory:
 </para>
 <itemizedlist>
+<listitem>
+	<para>
+	<filename>name</filename>: A string identifier for this mapping. This
+	is optional, the string can be empty. Drivers can set this to make it
+	easier for userspace to find the correct mapping.
+	</para>
+</listitem>
 <listitem>
 	<para>
 	<filename>addr</filename>: The address of memory that can be mapped.
@@ -366,10 +380,17 @@ offset = N * getpagesize();
 	<filename>/sys/class/uio/uioX/portio/</filename>.
 </para>
 <para>
-	Each <filename>portX/</filename> directory contains three read-only
-	files that show start, size, and type of the port region:
+	Each <filename>portX/</filename> directory contains four read-only
+	files that show name, start, size, and type of the port region:
 </para>
 <itemizedlist>
+<listitem>
+	<para>
+	<filename>name</filename>: A string identifier for this port region.
+	The string is optional and can be empty. Drivers can set it to make it
+	easier for userspace to find a certain port region.
+	</para>
+</listitem>
 <listitem>
 	<para>
 	<filename>start</filename>: The first port of this region.
@@ -1,11 +1,11 @@
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">
-
-<book>
-<?dbhtml filename="index.html">
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>

 <!-- ****************************************************** -->
 <!-- Header  -->
 <!-- ****************************************************** -->
+<book id="Writing-an-ALSA-Driver">
  <bookinfo>
    <title>Writing an ALSA Driver</title>
    <author>
@@ -492,9 +492,9 @@
          }

          /* (2) */
-          card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
-          if (card == NULL)
-                  return -ENOMEM;
+          err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
+          if (err < 0)
+                  return err;

          /* (3) */
          err = snd_mychip_create(card, pci, &chip);
@@ -590,8 +590,9 @@
            <programlisting>
 <![CDATA[
  struct snd_card *card;
+  int err;
  ....
-  card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+  err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
 ]]>
            </programlisting>
          </informalexample>
@@ -809,26 +810,28 @@

      <para>
        As mentioned above, to create a card instance, call
-      <function>snd_card_new()</function>.
+      <function>snd_card_create()</function>.

        <informalexample>
          <programlisting>
 <![CDATA[
  struct snd_card *card;
-  card = snd_card_new(index, id, module, extra_size);
+  int err;
+  err = snd_card_create(index, id, module, extra_size, &card);
 ]]>
          </programlisting>
        </informalexample>
      </para>

      <para>
-        The function takes four arguments, the card-index number, the
+        The function takes five arguments, the card-index number, the
        id string, the module pointer (usually
        <constant>THIS_MODULE</constant>),
-        and the size of extra-data space.  The last argument is used to
+        the size of extra-data space, and the pointer to return the
+        card instance.  The extra_size argument is used to
        allocate card-&gt;private_data for the
        chip-specific data.  Note that these data
-        are allocated by <function>snd_card_new()</function>.
+        are allocated by <function>snd_card_create()</function>.
      </para>
    </section>

@@ -915,15 +918,16 @@
      </para>

      <section id="card-management-chip-specific-snd-card-new">
-        <title>1. Allocating via <function>snd_card_new()</function>.</title>
+        <title>1. Allocating via <function>snd_card_create()</function>.</title>
        <para>
          As mentioned above, you can pass the extra-data-length
-	  to the 4th argument of <function>snd_card_new()</function>, i.e.
+	  to the 4th argument of <function>snd_card_create()</function>, i.e.

          <informalexample>
            <programlisting>
 <![CDATA[
-  card = snd_card_new(index[dev], id[dev], THIS_MODULE, sizeof(struct mychip));
+  err = snd_card_create(index[dev], id[dev], THIS_MODULE,
+                        sizeof(struct mychip), &card);
 ]]>
            </programlisting>
          </informalexample>
@@ -952,8 +956,8 @@

        <para>
          After allocating a card instance via
-          <function>snd_card_new()</function> (with
-          <constant>NULL</constant> on the 4th arg), call
+          <function>snd_card_create()</function> (with
+          <constant>0</constant> on the 4th arg), call
          <function>kzalloc()</function>. 

          <informalexample>
@@ -961,7 +965,7 @@
 <![CDATA[
  struct snd_card *card;
  struct mychip *chip;
-  card = snd_card_new(index[dev], id[dev], THIS_MODULE, NULL);
+  err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
  .....
  chip = kzalloc(sizeof(*chip), GFP_KERNEL);
 ]]>
@@ -5750,8 +5754,9 @@ struct _snd_pcm_runtime {
          ....
          struct snd_card *card;
          struct mychip *chip;
+          int err;
          ....
-          card = snd_card_new(index[dev], id[dev], THIS_MODULE, NULL);
+          err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card);
          ....
          chip = kzalloc(sizeof(*chip), GFP_KERNEL);
          ....
@@ -5763,7 +5768,7 @@ struct _snd_pcm_runtime {
      </informalexample>

 	When you created the chip data with
-	<function>snd_card_new()</function>, it's anyway accessible
+	<function>snd_card_create()</function>, it's anyway accessible
 	via <structfield>private_data</structfield> field.

      <informalexample>
@@ -5775,9 +5780,10 @@ struct _snd_pcm_runtime {
          ....
          struct snd_card *card;
          struct mychip *chip;
+          int err;
          ....
-          card = snd_card_new(index[dev], id[dev], THIS_MODULE,
-                              sizeof(struct mychip));
+          err = snd_card_create(index[dev], id[dev], THIS_MODULE,
+                                sizeof(struct mychip), &card);
          ....
          chip = card->private_data;
          ....
@@ -35,9 +35,3 @@ noop anticipatory deadline [cfq]
 # echo anticipatory > /sys/block/hda/queue/scheduler
 # cat /sys/block/hda/queue/scheduler
 noop [anticipatory] deadline cfq
-
-Each io queue has a set of io scheduler tunables associated with it. These
-tunables control how the io scheduler works. You can find these entries
-in:
-
-/sys/block/<device>/queue/iosched
@@ -117,10 +117,28 @@ accessible parameters:
 sampling_rate: measured in uS (10^-6 seconds), this is how often you
 want the kernel to look at the CPU usage and to make decisions on
 what to do about the frequency.  Typically this is set to values of
-around '10000' or more.
+around '10000' or more. It's default value is (cmp. with users-guide.txt):
+transition_latency * 1000
+The lowest value you can set is:
+transition_latency * 100 or it may get restricted to a value where it
+makes not sense for the kernel anymore to poll that often which depends
+on your HZ config variable (HZ=1000: max=20000us, HZ=250: max=5000).
+Be aware that transition latency is in ns and sampling_rate is in us, so you
+get the same sysfs value by default.
+Sampling rate should always get adjusted considering the transition latency
+To set the sampling rate 750 times as high as the transition latency
+in the bash (as said, 1000 is default), do:
+echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \
+    >ondemand/sampling_rate

-show_sampling_rate_(min|max): the minimum and maximum sampling rates
-available that you may set 'sampling_rate' to.
+show_sampling_rate_(min|max): THIS INTERFACE IS DEPRECATED, DON'T USE IT.
+You can use wider ranges now and the general
+cpuinfo_transition_latency variable (cmp. with user-guide.txt) can be
+used to obtain exactly the same info:
+show_sampling_rate_min = transtition_latency * 500    / 1000
+show_sampling_rate_max = transtition_latency * 500000 / 1000
+(divided by 1000 is to illustrate that sampling rate is in us and
+transition latency is exported ns).

 up_threshold: defines what the average CPU usage between the samplings
 of 'sampling_rate' needs to be for the kernel to make a decision on
@@ -152,6 +152,18 @@ cpuinfo_min_freq :		this file shows the minimum operating
 				frequency the processor can run at(in kHz) 
 cpuinfo_max_freq :		this file shows the maximum operating
 				frequency the processor can run at(in kHz) 
+cpuinfo_transition_latency	The time it takes on this CPU to
+				switch between two frequencies in nano
+				seconds. If unknown or known to be
+				that high that the driver does not
+				work with the ondemand governor, -1
+				(CPUFREQ_ETERNAL) will be returned.
+				Using this information can be useful
+				to choose an appropriate polling
+				frequency for a kernel governor or
+				userspace daemon. Make sure to not
+				switch the frequency too often
+				resulting in performance loss.
 scaling_driver :		this file shows what cpufreq driver is
 				used to set the frequency on this CPU

@@ -3145,6 +3145,12 @@ Your cooperation is appreciated.
 		  1 = /dev/blockrom1	Second ROM card's translation layer interface
 		  ...

+260 char	OSD (Object-based-device) SCSI Device
+		  0 = /dev/osd0		First OSD Device
+		  1 = /dev/osd1		Second OSD Device
+		  ...
+		  255 = /dev/osd255	256th OSD Device
+
 ****	ADDITIONAL /dev DIRECTORY ENTRIES

 This section details additional entries that should or may exist in
@@ -62,7 +62,6 @@ aic7*reg_print.c*
 aic7*seq.h*
 aicasm
 aicdb.h*
-asm
 asm-offsets.h
 asm_offsets.h
 autoconf.h*
@@ -0,0 +1,240 @@
+
+Introduction
+============
+
+This document describes how to use the dynamic debug (ddebug) feature.
+
+Dynamic debug is designed to allow you to dynamically enable/disable kernel
+code to obtain additional kernel information. Currently, if
+CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_debug() calls can be
+dynamically enabled per-callsite.
+
+Dynamic debug has even more useful features:
+
+ * Simple query language allows turning on and off debugging statements by
+   matching any combination of:
+
+   - source filename
+   - function name
+   - line number (including ranges of line numbers)
+   - module name
+   - format string
+
+ * Provides a debugfs control file: <debugfs>/dynamic_debug/control which can be
+   read to display the complete list of known debug statements, to help guide you
+
+Controlling dynamic debug Behaviour
+===============================
+
+The behaviour of pr_debug()/dev_debug()s are controlled via writing to a
+control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs
+filesystem, in order to make use of this feature. Subsequently, we refer to the
+control file as: <debugfs>/dynamic_debug/control. For example, if you want to
+enable printing from source file 'svcsock.c', line 1603 you simply do:
+
+nullarbor:~ # echo 'file svcsock.c line 1603 +p' >
+				<debugfs>/dynamic_debug/control
+
+If you make a mistake with the syntax, the write will fail thus:
+
+nullarbor:~ # echo 'file svcsock.c wtf 1 +p' >
+				<debugfs>/dynamic_debug/control
+-bash: echo: write error: Invalid argument
+
+Viewing Dynamic Debug Behaviour
+===========================
+
+You can view the currently configured behaviour of all the debug statements
+via:
+
+nullarbor:~ # cat <debugfs>/dynamic_debug/control
+# filename:lineno [module]function flags format
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA Module Removed, deregister RPC RDMA transport\012"
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline       : %d\012"
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth         : %d\012"
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests     : %d\012"
+...
+
+
+You can also apply standard Unix text manipulation filters to this
+data, e.g.
+
+nullarbor:~ # grep -i rdma <debugfs>/dynamic_debug/control  | wc -l
+62
+
+nullarbor:~ # grep -i tcp <debugfs>/dynamic_debug/control | wc -l
+42
+
+Note in particular that the third column shows the enabled behaviour
+flags for each debug statement callsite (see below for definitions of the
+flags).  The default value, no extra behaviour enabled, is "-".  So
+you can view all the debug statement callsites with any non-default flags:
+
+nullarbor:~ # awk '$3 != "-"' <debugfs>/dynamic_debug/control
+# filename:lineno [module]function flags format
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012"
+
+
+Command Language Reference
+==========================
+
+At the lexical level, a command comprises a sequence of words separated
+by whitespace characters.  Note that newlines are treated as word
+separators and do *not* end a command or allow multiple commands to
+be done together.  So these are all equivalent:
+
+nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' >
+				<debugfs>/dynamic_debug/control
+nullarbor:~ # echo -c '  file   svcsock.c     line  1603 +p  ' >
+				<debugfs>/dynamic_debug/control
+nullarbor:~ # echo -c 'file svcsock.c\nline 1603 +p' >
+				<debugfs>/dynamic_debug/control
+nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
+				<debugfs>/dynamic_debug/control
+
+Commands are bounded by a write() system call.  If you want to do
+multiple commands you need to do a separate "echo" for each, like:
+
+nullarbor:~ # echo 'file svcsock.c line 1603 +p' > /proc/dprintk ;\
+> echo 'file svcsock.c line 1563 +p' > /proc/dprintk
+
+or even like:
+
+nullarbor:~ # (
+> echo 'file svcsock.c line 1603 +p' ;\
+> echo 'file svcsock.c line 1563 +p' ;\
+> ) > /proc/dprintk
+
+At the syntactical level, a command comprises a sequence of match
+specifications, followed by a flags change specification.
+
+command ::= match-spec* flags-spec
+
+The match-spec's are used to choose a subset of the known dprintk()
+callsites to which to apply the flags-spec.  Think of them as a query
+with implicit ANDs between each pair.  Note that an empty list of
+match-specs is possible, but is not very useful because it will not
+match any debug statement callsites.
+
+A match specification comprises a keyword, which controls the attribute
+of the callsite to be compared, and a value to compare against.  Possible
+keywords are:
+
+match-spec ::= 'func' string |
+	       'file' string |
+	       'module' string |
+	       'format' string |
+	       'line' line-range
+
+line-range ::= lineno |
+	       '-'lineno |
+	       lineno'-' |
+	       lineno'-'lineno
+// Note: line-range cannot contain space, e.g.
+// "1-30" is valid range but "1 - 30" is not.
+
+lineno ::= unsigned-int
+
+The meanings of each keyword are:
+
+func
+    The given string is compared against the function name
+    of each callsite.  Example:
+
+    func svc_tcp_accept
+
+file
+    The given string is compared against either the full
+    pathname or the basename of the source file of each
+    callsite.  Examples:
+
+    file svcsock.c
+    file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c
+
+module
+    The given string is compared against the module name
+    of each callsite.  The module name is the string as
+    seen in "lsmod", i.e. without the directory or the .ko
+    suffix and with '-' changed to '_'.  Examples:
+
+    module sunrpc
+    module nfsd
+
+format
+    The given string is searched for in the dynamic debug format
+    string.  Note that the string does not need to match the
+    entire format, only some part.  Whitespace and other
+    special characters can be escaped using C octal character
+    escape \ooo notation, e.g. the space character is \040.
+    Alternatively, the string can be enclosed in double quote
+    characters (") or single quote characters (').
+    Examples:
+
+    format svcrdma:	    // many of the NFS/RDMA server dprintks
+    format readahead	    // some dprintks in the readahead cache
+    format nfsd:\040SETATTR // one way to match a format with whitespace
+    format "nfsd: SETATTR"  // a neater way to match a format with whitespace
+    format 'nfsd: SETATTR'  // yet another way to match a format with whitespace
+
+line
+    The given line number or range of line numbers is compared
+    against the line number of each dprintk() callsite.  A single
+    line number matches the callsite line number exactly.  A
+    range of line numbers matches any callsite between the first
+    and last line number inclusive.  An empty first number means
+    the first line in the file, an empty line number means the
+    last number in the file.  Examples:
+
+    line 1603	    // exactly line 1603
+    line 1600-1605  // the six lines from line 1600 to line 1605
+    line -1605	    // the 1605 lines from line 1 to line 1605
+    line 1600-	    // all lines from line 1600 to the end of the file
+
+The flags specification comprises a change operation followed
+by one or more flag characters.  The change operation is one
+of the characters:
+
+-
+    remove the given flags
+
+
+    add the given flags
+
+=
+    set the flags to the given flags
+
+The flags are:
+
+p
+    Causes a printk() message to be emitted to dmesg
+
+Note the regexp ^[-+=][scp]+$ matches a flags specification.
+Note also that there is no convenient syntax to remove all
+the flags at once, you need to use "-psc".
+
+Examples
+========
+
+// enable the message at line 1603 of file svcsock.c
+nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
+				<debugfs>/dynamic_debug/control
+
+// enable all the messages in file svcsock.c
+nullarbor:~ # echo -n 'file svcsock.c +p' >
+				<debugfs>/dynamic_debug/control
+
+// enable all the messages in the NFS server module
+nullarbor:~ # echo -n 'module nfsd +p' >
+				<debugfs>/dynamic_debug/control
+
+// enable all 12 messages in the function svc_process()
+nullarbor:~ # echo -n 'func svc_process +p' >
+				<debugfs>/dynamic_debug/control
+
+// disable all 12 messages in the function svc_process()
+nullarbor:~ # echo -n 'func svc_process -p' >
+				<debugfs>/dynamic_debug/control
+
+// enable messages for NFS calls READ, READLINK, READDIR and READDIR+.
+nullarbor:~ # echo -n 'format "nfsd: READ" +p' >
+				<debugfs>/dynamic_debug/control
@@ -6,20 +6,47 @@ be removed from this file.

 ---------------------------

-What:	old static regulatory information and ieee80211_regdom module parameter
-When:	2.6.29
+What:	The ieee80211_regdom module parameter
+When:	March 2010 / desktop catchup
+
+Why:	This was inherited by the CONFIG_WIRELESS_OLD_REGULATORY code,
+	and currently serves as an option for users to define an
+	ISO / IEC 3166 alpha2 code for the country they are currently
+	present in. Although there are userspace API replacements for this
+	through nl80211 distributions haven't yet caught up with implementing
+	decent alternatives through standard GUIs. Although available as an
+	option through iw or wpa_supplicant its just a matter of time before
+	distributions pick up good GUI options for this. The ideal solution
+	would actually consist of intelligent designs which would do this for
+	the user automatically even when travelling through different countries.
+	Until then we leave this module parameter as a compromise.
+
+	When userspace improves with reasonable widely-available alternatives for
+	this we will no longer need this module parameter. This entry hopes that
+	by the super-futuristically looking date of "March 2010" we will have
+	such replacements widely available.
+
+Who:	Luis R. Rodriguez <lrodriguez@atheros.com>
+
+---------------------------
+
+What:	CONFIG_WIRELESS_OLD_REGULATORY - old static regulatory information
+When:	March 2010 / desktop catchup
+
 Why:	The old regulatory infrastructure has been replaced with a new one
 	which does not require statically defined regulatory domains. We do
 	not want to keep static regulatory domains in the kernel due to the
 	the dynamic nature of regulatory law and localization. We kept around
 	the old static definitions for the regulatory domains of:
+
 		* US
 		* JP
 		* EU
+
 	and used by default the US when CONFIG_WIRELESS_OLD_REGULATORY was
-	set. We also kept around the ieee80211_regdom module parameter in case
-	some applications were relying on it. Changing regulatory domains
-	can now be done instead by using nl80211, as is done with iw.
+	set. We will remove this option once the standard Linux desktop catches
+	up with the new userspace APIs we have implemented.
+
 Who:	Luis R. Rodriguez <lrodriguez@atheros.com>

 ---------------------------
@@ -229,7 +256,9 @@ Who:	Jan Engelhardt <jengelh@computergmbh.de>
 ---------------------------

 What:	b43 support for firmware revision < 410
-When:	July 2008
+When:	The schedule was July 2008, but it was decided that we are going to keep the
+        code as long as there are no major maintanance headaches.
+	So it _could_ be removed _any_ time now, if it conflicts with something new.
 Why:	The support code for the old firmware hurts code readability/maintainability
 	and slightly hurts runtime performance. Bugfixes for the old firmware
 	are not provided by Broadcom anymore.
@@ -344,3 +373,20 @@ Why:	See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
 	Removal is subject to fixing any remaining bugs in ACPI which may
 	cause the thermal throttling not to happen at the right time.
 Who:	Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com>
+
+-----------------------------
+
+What:	__do_IRQ all in one fits nothing interrupt handler
+When:	2.6.32
+Why:	__do_IRQ was kept for easy migration to the type flow handlers.
+	More than two years of migration time is enough.
+Who:	Thomas Gleixner <tglx@linutronix.de>
+
+-----------------------------
+
+What:	obsolete generic irq defines and typedefs
+When:	2.6.30
+Why:	The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
+	have been kept around for migration reasons. After more than two years
+	it's time to remove them finally
+Who:	Thomas Gleixner <tglx@linutronix.de>
@@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that
 can and should be done using the internal locking with smaller critical areas).
 Current worst offender is ext2_get_block()...

->fasync() is a mess. This area needs a big cleanup and that will probably
-affect locking.
+->fasync() is called without BKL protection, and is responsible for
+maintaining the FASYNC bit in filp->f_flags.  Most instances call
+fasync_helper(), which does that maintenance, so it's not normally
+something one needs to worry about.  Return values > 0 will be mapped to
+zero in the VFS layer.

 ->readdir() and ->ioctl() on directories must be changed. Ideally we would
 move ->readdir() to inode_operations and use a separate method for directory
@@ -44,6 +44,7 @@ parameter is applicable:
 	FB	The frame buffer device is enabled.
 	HW	Appropriate hardware is enabled.
 	IA-64	IA-64 architecture is enabled.
+	IMA     Integrity measurement architecture is enabled.
 	IOSCHED	More than one I/O scheduler is enabled.
 	IP_PNP	IP DHCP, BOOTP, or RARP is enabled.
 	ISAPNP	ISA PnP code is enabled.
@@ -492,10 +493,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			Default: 64

 	hpet=		[X86-32,HPET] option to control HPET usage
-			Format: { enable (default) | disable | force }
+			Format: { enable (default) | disable | force |
+				verbose }
 			disable: disable HPET and use PIT instead
 			force: allow force enabled of undocumented chips (ICH4,
 			VIA, nVidia)
+			verbose: show contents of HPET registers during setup

 	com20020=	[HW,NET] ARCnet - COM20020 chipset
 			Format:
@@ -829,6 +832,9 @@ and is between 256 and 4096 characters. It is defined in the file

 	hvc_iucv=	[S390] Number of z/VM IUCV hypervisor console (HVC)
 			       terminal devices. Valid values: 0..8
+	hvc_iucv_allow=	[S390] Comma-separated list of z/VM user IDs.
+			       If specified, z/VM IUCV HVC accepts connections
+			       from listed z/VM user IDs only.

 	i8042.debug	[HW] Toggle i8042 debug mode
 	i8042.direct	[HW] Put keyboard port into non-translated mode
@@ -902,6 +908,15 @@ and is between 256 and 4096 characters. It is defined in the file
 	ihash_entries=	[KNL]
 			Set number of hash buckets for inode cache.

+	ima_audit=	[IMA]
+			Format: { "0" | "1" }
+			0 -- integrity auditing messages. (Default)
+			1 -- enable informational integrity auditing messages.
+
+	ima_hash=	[IMA]
+			Formt: { "sha1" | "md5" }
+			default: "sha1"
+
 	in2000=		[HW,SCSI]
 			See header of drivers/scsi/in2000.c.

@@ -1821,11 +1836,6 @@ and is between 256 and 4096 characters. It is defined in the file
 			autoconfiguration.
 			Ranges are in pairs (memory base and size).

-	dynamic_printk	Enables pr_debug()/dev_dbg() calls if
-			CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
-			These can also be switched on/off via
-			<debugfs>/dynamic_printk/modules
-
 	print-fatal-signals=
 			[KNL] debug: print fatal signals
 			print-fatal-signals=1: print segfault info to
@@ -141,7 +141,8 @@ rx_ccid = 2
 	Default CCID for the receiver-sender half-connection; see tx_ccid.

 seq_window = 100
-	The initial sequence window (sec. 7.5.2).
+	The initial sequence window (sec. 7.5.2) of the sender. This influences
+	the local ackno validity and the remote seqno validity windows (7.5.1).

 tx_qlen = 5
 	The size of the transmit buffer in packets. A value of 0 corresponds
@@ -2,7 +2,7 @@

 ip_forward - BOOLEAN
 	0 - disabled (default)
-	not 0 - enabled 
+	not 0 - enabled

 	Forward Packets between interfaces.

@@ -36,49 +36,49 @@ rt_cache_rebuild_count - INTEGER
 IP Fragmentation:

 ipfrag_high_thresh - INTEGER
-	Maximum memory used to reassemble IP fragments. When 
+	Maximum memory used to reassemble IP fragments. When
 	ipfrag_high_thresh bytes of memory is allocated for this purpose,
 	the fragment handler will toss packets until ipfrag_low_thresh
 	is reached.
-	
+
 ipfrag_low_thresh - INTEGER
-	See ipfrag_high_thresh	
+	See ipfrag_high_thresh

 ipfrag_time - INTEGER
-	Time in seconds to keep an IP fragment in memory.	
+	Time in seconds to keep an IP fragment in memory.

 ipfrag_secret_interval - INTEGER
-	Regeneration interval (in seconds) of the hash secret (or lifetime 
+	Regeneration interval (in seconds) of the hash secret (or lifetime
 	for the hash secret) for IP fragments.
 	Default: 600

 ipfrag_max_dist - INTEGER
-	ipfrag_max_dist is a non-negative integer value which defines the 
-	maximum "disorder" which is allowed among fragments which share a 
-	common IP source address. Note that reordering of packets is 
-	not unusual, but if a large number of fragments arrive from a source 
-	IP address while a particular fragment queue remains incomplete, it 
-	probably indicates that one or more fragments belonging to that queue 
-	have been lost. When ipfrag_max_dist is positive, an additional check 
-	is done on fragments before they are added to a reassembly queue - if 
-	ipfrag_max_dist (or more) fragments have arrived from a particular IP 
-	address between additions to any IP fragment queue using that source 
-	address, it's presumed that one or more fragments in the queue are 
-	lost. The existing fragment queue will be dropped, and a new one 
+	ipfrag_max_dist is a non-negative integer value which defines the
+	maximum "disorder" which is allowed among fragments which share a
+	common IP source address. Note that reordering of packets is
+	not unusual, but if a large number of fragments arrive from a source
+	IP address while a particular fragment queue remains incomplete, it
+	probably indicates that one or more fragments belonging to that queue
+	have been lost. When ipfrag_max_dist is positive, an additional check
+	is done on fragments before they are added to a reassembly queue - if
+	ipfrag_max_dist (or more) fragments have arrived from a particular IP
+	address between additions to any IP fragment queue using that source
+	address, it's presumed that one or more fragments in the queue are
+	lost. The existing fragment queue will be dropped, and a new one
 	started. An ipfrag_max_dist value of zero disables this check.

 	Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
 	result in unnecessarily dropping fragment queues when normal
-	reordering of packets occurs, which could lead to poor application 
-	performance. Using a very large value, e.g. 50000, increases the 
-	likelihood of incorrectly reassembling IP fragments that originate 
+	reordering of packets occurs, which could lead to poor application
+	performance. Using a very large value, e.g. 50000, increases the
+	likelihood of incorrectly reassembling IP fragments that originate
 	from different IP datagrams, which could result in data corruption.
 	Default: 64

 INET peer storage:

 inet_peer_threshold - INTEGER
-	The approximate size of the storage.  Starting from this threshold	
+	The approximate size of the storage.  Starting from this threshold
 	entries will be thrown aggressively.  This threshold also determines
 	entries' time-to-live and time intervals between garbage collection
 	passes.  More entries, less time-to-live, less GC interval.
@@ -105,7 +105,7 @@ inet_peer_gc_maxtime - INTEGER
 	in effect under low (or absent) memory pressure on the pool.
 	Measured in seconds.

-TCP variables: 
+TCP variables:

 somaxconn - INTEGER
 	Limit of socket listen() backlog, known in userspace as SOMAXCONN.
@@ -310,7 +310,7 @@ tcp_orphan_retries - INTEGER

 tcp_reordering - INTEGER
 	Maximal reordering of packets in a TCP stream.
-	Default: 3	
+	Default: 3

 tcp_retrans_collapse - BOOLEAN
 	Bug-to-bug compatibility with some broken printers.
@@ -521,7 +521,7 @@ IP Variables:

 ip_local_port_range - 2 INTEGERS
 	Defines the local port range that is used by TCP and UDP to
-	choose the local port. The first number is the first, the 
+	choose the local port. The first number is the first, the
 	second the last local port number. Default value depends on
 	amount of memory available on the system:
 	> 128Mb 32768-61000
@@ -594,12 +594,12 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN

 	If zero, icmp error messages are sent with the primary address of
 	the exiting interface.
- 
+
 	If non-zero, the message will be sent with the primary address of
 	the interface that received the packet that caused the icmp error.
 	This is the behaviour network many administrators will expect from
 	a router. And it can make debugging complicated network layouts
-	much easier. 
+	much easier.

 	Note that if no primary address exists for the interface selected,
 	then the primary address of the first non-loopback interface that
@@ -611,7 +611,7 @@ igmp_max_memberships - INTEGER
 	Change the maximum number of multicast groups we can subscribe to.
 	Default: 20

-conf/interface/*  changes special settings per interface (where "interface" is 
+conf/interface/*  changes special settings per interface (where "interface" is
 		  the name of your network interface)
 conf/all/*	  is special, changes the settings for all interfaces

@@ -625,11 +625,11 @@ log_martians - BOOLEAN
 accept_redirects - BOOLEAN
 	Accept ICMP redirect messages.
 	accept_redirects for the interface will be enabled if:
-	- both conf/{all,interface}/accept_redirects are TRUE in the case forwarding
-	  for the interface is enabled
+	- both conf/{all,interface}/accept_redirects are TRUE in the case
+	  forwarding for the interface is enabled
 	or
-	- at least one of conf/{all,interface}/accept_redirects is TRUE in the case
-	  forwarding for the interface is disabled
+	- at least one of conf/{all,interface}/accept_redirects is TRUE in the
+	  case forwarding for the interface is disabled
 	accept_redirects for the interface will be disabled otherwise
 	default TRUE (host)
 		FALSE (router)
@@ -640,8 +640,8 @@ forwarding - BOOLEAN
 mc_forwarding - BOOLEAN
 	Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
 	and a multicast routing daemon is required.
-	conf/all/mc_forwarding must also be set to TRUE to enable multicast routing
-	for the interface
+	conf/all/mc_forwarding must also be set to TRUE to enable multicast
+	routing	for the interface

 medium_id - INTEGER
 	Integer value used to differentiate the devices by the medium they
@@ -649,7 +649,7 @@ medium_id - INTEGER
 	the broadcast packets are received only on one of them.
 	The default value 0 means that the device is the only interface
 	to its medium, value of -1 means that medium is not known.
-	
+
 	Currently, it is used to change the proxy_arp behavior:
 	the proxy_arp feature is enabled for packets forwarded between
 	two devices attached to different media.
@@ -699,16 +699,22 @@ accept_source_route - BOOLEAN
 	default TRUE (router)
 		FALSE (host)

-rp_filter - BOOLEAN
-	1 - do source validation by reversed path, as specified in RFC1812
-	    Recommended option for single homed hosts and stub network
-	    routers. Could cause troubles for complicated (not loop free)
-	    networks running a slow unreliable protocol (sort of RIP),
-	    or using static routes.
-
+rp_filter - INTEGER
 	0 - No source validation.
+	1 - Strict mode as defined in RFC3704 Strict Reverse Path
+	    Each incoming packet is tested against the FIB and if the interface
+	    is not the best reverse path the packet check will fail.
+	    By default failed packets are discarded.
+	2 - Loose mode as defined in RFC3704 Loose Reverse Path
+	    Each incoming packet's source address is also tested against the FIB
+	    and if the source address is not reachable via any interface
+	    the packet check will fail.

-	conf/all/rp_filter must also be set to TRUE to do source validation
+	Current recommended practice in RFC3704 is to enable strict mode
+	to prevent IP spoofing from DDos attacks. If using asymmetric routing
+	or other complicated routing, then loose mode is recommended.
+
+	conf/all/rp_filter must also be set to non-zero to do source validation
 	on the interface

 	Default value is 0. Note that some distributions enable it
@@ -782,6 +788,12 @@ arp_ignore - INTEGER
 	The max value from conf/{all,interface}/arp_ignore is used
 	when ARP request is received on the {interface}

+arp_notify - BOOLEAN
+	Define mode for notification of address and device changes.
+	0 - (default): do nothing
+	1 - Generate gratuitous arp replies when device is brought up
+	    or hardware address changes.
+
 arp_accept - BOOLEAN
 	Define behavior when gratuitous arp replies are received:
 	0 - drop gratuitous arp frames
@@ -823,7 +835,7 @@ apply to IPv6 [XXX?].

 bindv6only - BOOLEAN
 	Default value for IPV6_V6ONLY socket option,
-	which restricts use of the IPv6 socket to IPv6 communication 
+	which restricts use of the IPv6 socket to IPv6 communication
 	only.
 		TRUE: disable IPv4-mapped address feature
 		FALSE: enable IPv4-mapped address feature
@@ -833,19 +845,19 @@ bindv6only - BOOLEAN
 IPv6 Fragmentation:

 ip6frag_high_thresh - INTEGER
-	Maximum memory used to reassemble IPv6 fragments. When 
+	Maximum memory used to reassemble IPv6 fragments. When
 	ip6frag_high_thresh bytes of memory is allocated for this purpose,
 	the fragment handler will toss packets until ip6frag_low_thresh
 	is reached.
-	
+
 ip6frag_low_thresh - INTEGER
-	See ip6frag_high_thresh	
+	See ip6frag_high_thresh

 ip6frag_time - INTEGER
 	Time in seconds to keep an IPv6 fragment in memory.

 ip6frag_secret_interval - INTEGER
-	Regeneration interval (in seconds) of the hash secret (or lifetime 
+	Regeneration interval (in seconds) of the hash secret (or lifetime
 	for the hash secret) for IPv6 fragments.
 	Default: 600

@@ -854,17 +866,17 @@ conf/default/*:


 conf/all/*:
-	Change all the interface-specific settings.  
+	Change all the interface-specific settings.

 	[XXX:  Other special features than forwarding?]

 conf/all/forwarding - BOOLEAN
-	Enable global IPv6 forwarding between all interfaces.  
+	Enable global IPv6 forwarding between all interfaces.

-	IPv4 and IPv6 work differently here; e.g. netfilter must be used 
+	IPv4 and IPv6 work differently here; e.g. netfilter must be used
 	to control which interfaces may forward packets and which not.

-	This also sets all interfaces' Host/Router setting 
+	This also sets all interfaces' Host/Router setting
 	'forwarding' to the specified value.  See below for details.

 	This referred to as global forwarding.
@@ -875,12 +887,12 @@ proxy_ndp - BOOLEAN
 conf/interface/*:
 	Change special settings per interface.

-	The functional behaviour for certain settings is different 
+	The functional behaviour for certain settings is different
 	depending on whether local forwarding is enabled or not.

 accept_ra - BOOLEAN
 	Accept Router Advertisements; autoconfigure using them.
-	
+
 	Functional default: enabled if local forwarding is disabled.
 			    disabled if local forwarding is enabled.

@@ -926,7 +938,7 @@ accept_source_route - INTEGER
 	Default: 0

 autoconf - BOOLEAN
-	Autoconfigure addresses using Prefix Information in Router 
+	Autoconfigure addresses using Prefix Information in Router
 	Advertisements.

 	Functional default: enabled if accept_ra_pinfo is enabled.
@@ -935,11 +947,11 @@ autoconf - BOOLEAN
 dad_transmits - INTEGER
 	The amount of Duplicate Address Detection probes to send.
 	Default: 1
-	
-forwarding - BOOLEAN
-	Configure interface-specific Host/Router behaviour.  

-	Note: It is recommended to have the same setting on all 
+forwarding - BOOLEAN
+	Configure interface-specific Host/Router behaviour.
+
+	Note: It is recommended to have the same setting on all
 	interfaces; mixed router/host scenarios are rather uncommon.

 	FALSE:
@@ -948,13 +960,13 @@ forwarding - BOOLEAN

 	1. IsRouter flag is not set in Neighbour Advertisements.
 	2. Router Solicitations are being sent when necessary.
-	3. If accept_ra is TRUE (default), accept Router 
+	3. If accept_ra is TRUE (default), accept Router
 	   Advertisements (and do autoconfiguration).
 	4. If accept_redirects is TRUE (default), accept Redirects.

 	TRUE:

-	If local forwarding is enabled, Router behaviour is assumed. 
+	If local forwarding is enabled, Router behaviour is assumed.
 	This means exactly the reverse from the above:

 	1. IsRouter flag is set in Neighbour Advertisements.
@@ -989,7 +1001,7 @@ router_solicitation_interval - INTEGER
 	Default: 4

 router_solicitations - INTEGER
-	Number of Router Solicitations to send until assuming no 
+	Number of Router Solicitations to send until assuming no
 	routers are present.
 	Default: 3

@@ -1013,11 +1025,11 @@ temp_prefered_lft - INTEGER

 max_desync_factor - INTEGER
 	Maximum value for DESYNC_FACTOR, which is a random value
-	that ensures that clients don't synchronize with each 
+	that ensures that clients don't synchronize with each
 	other and generate new addresses at exactly the same time.
 	value is in seconds.
 	Default: 600
-	
+
 regen_max_retry - INTEGER
 	Number of attempts before give up attempting to generate
 	valid temporary addresses.
@@ -1025,13 +1037,15 @@ regen_max_retry - INTEGER

 max_addresses - INTEGER
 	Number of maximum addresses per interface.  0 disables limitation.
-	It is recommended not set too large value (or 0) because it would 
-	be too easy way to crash kernel to allow to create too much of 
+	It is recommended not set too large value (or 0) because it would
+	be too easy way to crash kernel to allow to create too much of
 	autoconfigured addresses.
 	Default: 16

 disable_ipv6 - BOOLEAN
-	Disable IPv6 operation.
+	Disable IPv6 operation.  If accept_dad is set to 2, this value
+	will be dynamically set to TRUE if DAD fails for the link-local
+	address.
 	Default: FALSE (enable IPv6 operation)

 accept_dad - INTEGER
@@ -0,0 +1,199 @@
+Linux Base Driver for 10 Gigabit PCI Express Intel(R) Network Connection
+========================================================================
+
+March 10, 2009
+
+
+Contents
+========
+
+- In This Release
+- Identifying Your Adapter
+- Building and Installation
+- Additional Configurations
+- Support
+
+
+
+In This Release
+===============
+
+This file describes the ixgbe Linux Base Driver for the 10 Gigabit PCI
+Express Intel(R) Network Connection.  This driver includes support for
+Itanium(R)2-based systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your 10 Gigabit adapter.  All hardware requirements listed apply
+to use with Linux.
+
+The following features are available in this kernel:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+ - Generic Receive Offload
+ - Data Center Bridging
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+Ethtool, lspci, and ifconfig can be used to display device and driver
+specific information.
+
+
+Identifying Your Adapter
+========================
+
+This driver supports devices based on the 82598 controller and the 82599
+controller.
+
+For specific information on identifying which adapter you have, please visit:
+
+    http://support.intel.com/support/network/sb/CS-008441.htm
+
+
+Building and Installation
+=========================
+
+select m for "Intel(R) 10GbE PCI Express adapters support" located at:
+      Location:
+        -> Device Drivers
+          -> Network device support (NETDEVICES [=y])
+            -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
+
+1. make modules & make modules_install
+
+2. Load the module:
+
+# modprobe ixgbe
+
+   The insmod command can be used if the full
+   path to the driver module is specified.  For example:
+
+     insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgbe/ixgbe.ko
+
+   With 2.6 based kernels also make sure that older ixgbe drivers are
+   removed from the kernel, before loading the new module:
+
+     rmmod ixgbe; modprobe ixgbe
+
+3. Assign an IP address to the interface by entering the following, where
+   x is the interface number:
+
+     ifconfig ethx <IP_address>
+
+4. Verify that the interface works. Enter the following, where <IP_address>
+   is the IP address for another machine on the same subnet as the interface
+   that is being tested:
+
+     ping  <IP_address>
+
+
+Additional Configurations
+=========================
+
+  Viewing Link Messages
+  ---------------------
+  Link messages will not be displayed to the console if the distribution is
+  restricting system messages. In order to see network driver link messages on
+  your console, set dmesg to eight by entering the following:
+
+       dmesg -n 8
+
+  NOTE: This setting is not saved across reboots.
+
+
+  Jumbo Frames
+  ------------
+  The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+  enabled by changing the MTU to a value larger than the default of 1500.
+  The maximum value for the MTU is 16110.  Use the ifconfig command to
+  increase the MTU size.  For example:
+
+        ifconfig ethx mtu 9000 up
+
+  The maximum MTU setting for Jumbo Frames is 16110.  This value coincides
+  with the maximum Jumbo Frames size of 16128.
+
+  Generic Receive Offload, aka GRO
+  --------------------------------
+  The driver supports the in-kernel software implementation of GRO.  GRO has
+  shown that by coalescing Rx traffic into larger chunks of data, CPU
+  utilization can be significantly reduced when under large Rx load.  GRO is an
+  evolution of the previously-used LRO interface.  GRO is able to coalesce
+  other protocols besides TCP.  It's also safe to use with configurations that
+  are problematic for LRO, namely bridging and iSCSI.
+
+  GRO is enabled by default in the driver.  Future versions of ethtool will
+  support disabling and re-enabling GRO on the fly.
+
+
+  Data Center Bridging, aka DCB
+  -----------------------------
+
+  DCB is a configuration Quality of Service implementation in hardware.
+  It uses the VLAN priority tag (802.1p) to filter traffic.  That means
+  that there are 8 different priorities that traffic can be filtered into.
+  It also enables priority flow control which can limit or eliminate the
+  number of dropped packets during network stress.  Bandwidth can be
+  allocated to each of these priorities, which is enforced at the hardware
+  level.
+
+  To enable DCB support in ixgbe, you must enable the DCB netlink layer to
+  allow the userspace tools (see below) to communicate with the driver.
+  This can be found in the kernel configuration here:
+
+        -> Networking support
+          -> Networking options
+            -> Data Center Bridging support
+
+  Once this is selected, DCB support must be selected for ixgbe.  This can
+  be found here:
+
+        -> Device Drivers
+          -> Network device support (NETDEVICES [=y])
+            -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
+              -> Intel(R) 10GbE PCI Express adapters support
+                -> Data Center Bridging (DCB) Support
+
+  After these options are selected, you must rebuild your kernel and your
+  modules.
+
+  In order to use DCB, userspace tools must be downloaded and installed.
+  The dcbd tools can be found at:
+
+        http://e1000.sf.net
+
+
+  Ethtool
+  -------
+  The driver utilizes the ethtool interface for driver configuration and
+  diagnostics, as well as displaying statistical information.  Ethtool
+  version 3.0 or later is required for this functionality.
+
+  The latest release of ethtool can be found from
+  http://sourceforge.net/projects/gkernel.
+
+
+  NAPI
+  ----
+
+  NAPI (Rx polling mode) is supported in the ixgbe driver.  NAPI is enabled
+  by default in the driver.
+
+  See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+    http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+    http://e1000.sourceforge.net
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
@@ -0,0 +1,356 @@
+
+Overview
+========
+
+This readme tries to provide some background on the hows and whys of RDS,
+and will hopefully help you find your way around the code.
+
+In addition, please see this email about RDS origins:
+http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
+
+RDS Architecture
+================
+
+RDS provides reliable, ordered datagram delivery by using a single
+reliable connection between any two nodes in the cluster. This allows
+applications to use a single socket to talk to any other process in the
+cluster - so in a cluster with N processes you need N sockets, in contrast
+to N*N if you use a connection-oriented socket transport like TCP.
+
+RDS is not Infiniband-specific; it was designed to support different
+transports.  The current implementation used to support RDS over TCP as well
+as IB. Work is in progress to support RDS over iWARP, and using DCE to
+guarantee no dropped packets on Ethernet, it may be possible to use RDS over
+UDP in the future.
+
+The high-level semantics of RDS from the application's point of view are
+
+ *	Addressing
+        RDS uses IPv4 addresses and 16bit port numbers to identify
+        the end point of a connection. All socket operations that involve
+        passing addresses between kernel and user space generally
+        use a struct sockaddr_in.
+
+        The fact that IPv4 addresses are used does not mean the underlying
+        transport has to be IP-based. In fact, RDS over IB uses a
+        reliable IB connection; the IP address is used exclusively to
+        locate the remote node's GID (by ARPing for the given IP).
+
+        The port space is entirely independent of UDP, TCP or any other
+        protocol.
+
+ *	Socket interface
+        RDS sockets work *mostly* as you would expect from a BSD
+        socket. The next section will cover the details. At any rate,
+        all I/O is performed through the standard BSD socket API.
+        Some additions like zerocopy support are implemented through
+        control messages, while other extensions use the getsockopt/
+        setsockopt calls.
+
+        Sockets must be bound before you can send or receive data.
+        This is needed because binding also selects a transport and
+        attaches it to the socket. Once bound, the transport assignment
+        does not change. RDS will tolerate IPs moving around (eg in
+        a active-active HA scenario), but only as long as the address
+        doesn't move to a different transport.
+
+ *	sysctls
+        RDS supports a number of sysctls in /proc/sys/net/rds
+
+
+Socket Interface
+================
+
+  AF_RDS, PF_RDS, SOL_RDS
+        These constants haven't been assigned yet, because RDS isn't in
+        mainline yet. Currently, the kernel module assigns some constant
+        and publishes it to user space through two sysctl files
+                /proc/sys/net/rds/pf_rds
+                /proc/sys/net/rds/sol_rds
+
+  fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+        This creates a new, unbound RDS socket.
+
+  setsockopt(SOL_SOCKET): send and receive buffer size
+        RDS honors the send and receive buffer size socket options.
+        You are not allowed to queue more than SO_SNDSIZE bytes to
+        a socket. A message is queued when sendmsg is called, and
+        it leaves the queue when the remote system acknowledges
+        its arrival.
+
+        The SO_RCVSIZE option controls the maximum receive queue length.
+        This is a soft limit rather than a hard limit - RDS will
+        continue to accept and queue incoming messages, even if that
+        takes the queue length over the limit. However, it will also
+        mark the port as "congested" and send a congestion update to
+        the source node. The source node is supposed to throttle any
+        processes sending to this congested port.
+
+  bind(fd, &sockaddr_in, ...)
+        This binds the socket to a local IP address and port, and a
+        transport.
+
+  sendmsg(fd, ...)
+        Sends a message to the indicated recipient. The kernel will
+        transparently establish the underlying reliable connection
+        if it isn't up yet.
+
+        An attempt to send a message that exceeds SO_SNDSIZE will
+        return with -EMSGSIZE
+
+        An attempt to send a message that would take the total number
+        of queued bytes over the SO_SNDSIZE threshold will return
+        EAGAIN.
+
+        An attempt to send a message to a destination that is marked
+        as "congested" will return ENOBUFS.
+
+  recvmsg(fd, ...)
+        Receives a message that was queued to this socket. The sockets
+        recv queue accounting is adjusted, and if the queue length
+        drops below SO_SNDSIZE, the port is marked uncongested, and
+        a congestion update is sent to all peers.
+
+        Applications can ask the RDS kernel module to receive
+        notifications via control messages (for instance, there is a
+        notification when a congestion update arrived, or when a RDMA
+        operation completes). These notifications are received through
+        the msg.msg_control buffer of struct msghdr. The format of the
+        messages is described in manpages.
+
+  poll(fd)
+        RDS supports the poll interface to allow the application
+        to implement async I/O.
+
+        POLLIN handling is pretty straightforward. When there's an
+        incoming message queued to the socket, or a pending notification,
+        we signal POLLIN.
+
+        POLLOUT is a little harder. Since you can essentially send
+        to any destination, RDS will always signal POLLOUT as long as
+        there's room on the send queue (ie the number of bytes queued
+        is less than the sendbuf size).
+
+        However, the kernel will refuse to accept messages to
+        a destination marked congested - in this case you will loop
+        forever if you rely on poll to tell you what to do.
+        This isn't a trivial problem, but applications can deal with
+        this - by using congestion notifications, and by checking for
+        ENOBUFS errors returned by sendmsg.
+
+  setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
+        This allows the application to discard all messages queued to a
+        specific destination on this particular socket.
+
+        This allows the application to cancel outstanding messages if
+        it detects a timeout. For instance, if it tried to send a message,
+        and the remote host is unreachable, RDS will keep trying forever.
+        The application may decide it's not worth it, and cancel the
+        operation. In this case, it would use RDS_CANCEL_SENT_TO to
+        nuke any pending messages.
+
+
+RDMA for RDS
+============
+
+  see rds-rdma(7) manpage (available in rds-tools)
+
+
+Congestion Notifications
+========================
+
+  see rds(7) manpage
+
+
+RDS Protocol
+============
+
+  Message header
+
+    The message header is a 'struct rds_header' (see rds.h):
+    Fields:
+      h_sequence:
+          per-packet sequence number
+      h_ack:
+          piggybacked acknowledgment of last packet received
+      h_len:
+          length of data, not including header
+      h_sport:
+          source port
+      h_dport:
+          destination port
+      h_flags:
+          CONG_BITMAP - this is a congestion update bitmap
+          ACK_REQUIRED - receiver must ack this packet
+          RETRANSMITTED - packet has previously been sent
+      h_credit:
+          indicate to other end of connection that
+          it has more credits available (i.e. there is
+          more send room)
+      h_padding[4]:
+          unused, for future use
+      h_csum:
+          header checksum
+      h_exthdr:
+          optional data can be passed here. This is currently used for
+          passing RDMA-related information.
+
+  ACK and retransmit handling
+
+      One might think that with reliable IB connections you wouldn't need
+      to ack messages that have been received.  The problem is that IB
+      hardware generates an ack message before it has DMAed the message
+      into memory.  This creates a potential message loss if the HCA is
+      disabled for any reason between when it sends the ack and before
+      the message is DMAed and processed.  This is only a potential issue
+      if another HCA is available for fail-over.
+
+      Sending an ack immediately would allow the sender to free the sent
+      message from their send queue quickly, but could cause excessive
+      traffic to be used for acks. RDS piggybacks acks on sent data
+      packets.  Ack-only packets are reduced by only allowing one to be
+      in flight at a time, and by the sender only asking for acks when
+      its send buffers start to fill up. All retransmissions are also
+      acked.
+
+  Flow Control
+
+      RDS's IB transport uses a credit-based mechanism to verify that
+      there is space in the peer's receive buffers for more data. This
+      eliminates the need for hardware retries on the connection.
+
+  Congestion
+
+      Messages waiting in the receive queue on the receiving socket
+      are accounted against the sockets SO_RCVBUF option value.  Only
+      the payload bytes in the message are accounted for.  If the
+      number of bytes queued equals or exceeds rcvbuf then the socket
+      is congested.  All sends attempted to this socket's address
+      should return block or return -EWOULDBLOCK.
+
+      Applications are expected to be reasonably tuned such that this
+      situation very rarely occurs.  An application encountering this
+      "back-pressure" is considered a bug.
+
+      This is implemented by having each node maintain bitmaps which
+      indicate which ports on bound addresses are congested.  As the
+      bitmap changes it is sent through all the connections which
+      terminate in the local address of the bitmap which changed.
+
+      The bitmaps are allocated as connections are brought up.  This
+      avoids allocation in the interrupt handling path which queues
+      sages on sockets.  The dense bitmaps let transports send the
+      entire bitmap on any bitmap change reasonably efficiently.  This
+      is much easier to implement than some finer-grained
+      communication of per-port congestion.  The sender does a very
+      inexpensive bit test to test if the port it's about to send to
+      is congested or not.
+
+
+RDS Transport Layer
+==================
+
+  As mentioned above, RDS is not IB-specific. Its code is divided
+  into a general RDS layer and a transport layer.
+
+  The general layer handles the socket API, congestion handling,
+  loopback, stats, usermem pinning, and the connection state machine.
+
+  The transport layer handles the details of the transport. The IB
+  transport, for example, handles all the queue pairs, work requests,
+  CM event handlers, and other Infiniband details.
+
+
+RDS Kernel Structures
+=====================
+
+  struct rds_message
+    aka possibly "rds_outgoing", the generic RDS layer copies data to
+    be sent and sets header fields as needed, based on the socket API.
+    This is then queued for the individual connection and sent by the
+    connection's transport.
+  struct rds_incoming
+    a generic struct referring to incoming data that can be handed from
+    the transport to the general code and queued by the general code
+    while the socket is awoken. It is then passed back to the transport
+    code to handle the actual copy-to-user.
+  struct rds_socket
+    per-socket information
+  struct rds_connection
+    per-connection information
+  struct rds_transport
+    pointers to transport-specific functions
+  struct rds_statistics
+    non-transport-specific statistics
+  struct rds_cong_map
+    wraps the raw congestion bitmap, contains rbnode, waitq, etc.
+
+Connection management
+=====================
+
+  Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
+  ERROR states.
+
+  The first time an attempt is made by an RDS socket to send data to
+  a node, a connection is allocated and connected. That connection is
+  then maintained forever -- if there are transport errors, the
+  connection will be dropped and re-established.
+
+  Dropping a connection while packets are queued will cause queued or
+  partially-sent datagrams to be retransmitted when the connection is
+  re-established.
+
+
+The send path
+=============
+
+  rds_sendmsg()
+    struct rds_message built from incoming data
+    CMSGs parsed (e.g. RDMA ops)
+    transport connection alloced and connected if not already
+    rds_message placed on send queue
+    send worker awoken
+  rds_send_worker()
+    calls rds_send_xmit() until queue is empty
+  rds_send_xmit()
+    transmits congestion map if one is pending
+    may set ACK_REQUIRED
+    calls transport to send either non-RDMA or RDMA message
+    (RDMA ops never retransmitted)
+  rds_ib_xmit()
+    allocs work requests from send ring
+    adds any new send credits available to peer (h_credits)
+    maps the rds_message's sg list
+    piggybacks ack
+    populates work requests
+    post send to connection's queue pair
+
+The recv path
+=============
+
+  rds_ib_recv_cq_comp_handler()
+    looks at write completions
+    unmaps recv buffer from device
+    no errors, call rds_ib_process_recv()
+    refill recv ring
+  rds_ib_process_recv()
+    validate header checksum
+    copy header to rds_ib_incoming struct if start of a new datagram
+    add to ibinc's fraglist
+    if competed datagram:
+      update cong map if datagram was cong update
+      call rds_recv_incoming() otherwise
+      note if ack is required
+  rds_recv_incoming()
+    drop duplicate packets
+    respond to pings
+    find the sock associated with this datagram
+    add to sock queue
+    wake up sock
+    do some congestion calculations
+  rds_recvmsg
+    copy data into user iovec
+    handle CMSGs
+    return to application
+
+
--- a/Show More
+++ b/Show More