You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1750 commits) ixgbe: Allow Priority Flow Control settings to survive a device reset net: core: remove unneeded include in net/core/utils.c. e1000e: update version number e1000e: fix close interrupt race e1000e: fix loss of multicast packets e1000e: commonize tx cleanup routine to match e1000 & igb netfilter: fix nf_logger name in ebt_ulog. netfilter: fix warning in ebt_ulog init function. netfilter: fix warning about invalid const usage e1000: fix close race with interrupt e1000: cleanup clean_tx_irq routine so that it completely cleans ring e1000: fix tx hang detect logic and address dma mapping issues bridge: bad error handling when adding invalid ether address bonding: select current active slave when enslaving device for mode tlb and alb gianfar: reallocate skb when headroom is not enough for fcb Bump release date to 25Mar2009 and version to 0.22 r6040: Fix second PHY address qeth: fix wait_event_timeout handling qeth: check for completion of a running recovery qeth: unregister MAC addresses during recovery. ... Manually fixed up conflicts in: drivers/infiniband/hw/cxgb3/cxio_hal.h drivers/infiniband/hw/nes/nes_nic.c
This commit is contained in:
@@ -17,8 +17,7 @@
|
||||
</authorgroup>
|
||||
|
||||
<copyright>
|
||||
<year>2007</year>
|
||||
<year>2008</year>
|
||||
<year>2007-2009</year>
|
||||
<holder>Johannes Berg</holder>
|
||||
</copyright>
|
||||
|
||||
@@ -165,8 +164,8 @@ usage should require reading the full document.
|
||||
!Pinclude/net/mac80211.h Frame format
|
||||
</sect1>
|
||||
<sect1>
|
||||
<title>Alignment issues</title>
|
||||
<para>TBD</para>
|
||||
<title>Packet alignment</title>
|
||||
!Pnet/mac80211/rx.c Packet alignment
|
||||
</sect1>
|
||||
<sect1>
|
||||
<title>Calling into mac80211 from interrupts</title>
|
||||
@@ -223,6 +222,11 @@ usage should require reading the full document.
|
||||
!Finclude/net/mac80211.h ieee80211_key_flags
|
||||
</chapter>
|
||||
|
||||
<chapter id="powersave">
|
||||
<title>Powersave support</title>
|
||||
!Pinclude/net/mac80211.h Powersave support
|
||||
</chapter>
|
||||
|
||||
<chapter id="qos">
|
||||
<title>Multiple queues and QoS support</title>
|
||||
<para>TBD</para>
|
||||
|
||||
@@ -229,7 +229,9 @@ Who: Jan Engelhardt <jengelh@computergmbh.de>
|
||||
---------------------------
|
||||
|
||||
What: b43 support for firmware revision < 410
|
||||
When: July 2008
|
||||
When: The schedule was July 2008, but it was decided that we are going to keep the
|
||||
code as long as there are no major maintanance headaches.
|
||||
So it _could_ be removed _any_ time now, if it conflicts with something new.
|
||||
Why: The support code for the old firmware hurts code readability/maintainability
|
||||
and slightly hurts runtime performance. Bugfixes for the old firmware
|
||||
are not provided by Broadcom anymore.
|
||||
|
||||
@@ -141,7 +141,8 @@ rx_ccid = 2
|
||||
Default CCID for the receiver-sender half-connection; see tx_ccid.
|
||||
|
||||
seq_window = 100
|
||||
The initial sequence window (sec. 7.5.2).
|
||||
The initial sequence window (sec. 7.5.2) of the sender. This influences
|
||||
the local ackno validity and the remote seqno validity windows (7.5.1).
|
||||
|
||||
tx_qlen = 5
|
||||
The size of the transmit buffer in packets. A value of 0 corresponds
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
ip_forward - BOOLEAN
|
||||
0 - disabled (default)
|
||||
not 0 - enabled
|
||||
not 0 - enabled
|
||||
|
||||
Forward Packets between interfaces.
|
||||
|
||||
@@ -36,49 +36,49 @@ rt_cache_rebuild_count - INTEGER
|
||||
IP Fragmentation:
|
||||
|
||||
ipfrag_high_thresh - INTEGER
|
||||
Maximum memory used to reassemble IP fragments. When
|
||||
Maximum memory used to reassemble IP fragments. When
|
||||
ipfrag_high_thresh bytes of memory is allocated for this purpose,
|
||||
the fragment handler will toss packets until ipfrag_low_thresh
|
||||
is reached.
|
||||
|
||||
|
||||
ipfrag_low_thresh - INTEGER
|
||||
See ipfrag_high_thresh
|
||||
See ipfrag_high_thresh
|
||||
|
||||
ipfrag_time - INTEGER
|
||||
Time in seconds to keep an IP fragment in memory.
|
||||
Time in seconds to keep an IP fragment in memory.
|
||||
|
||||
ipfrag_secret_interval - INTEGER
|
||||
Regeneration interval (in seconds) of the hash secret (or lifetime
|
||||
Regeneration interval (in seconds) of the hash secret (or lifetime
|
||||
for the hash secret) for IP fragments.
|
||||
Default: 600
|
||||
|
||||
ipfrag_max_dist - INTEGER
|
||||
ipfrag_max_dist is a non-negative integer value which defines the
|
||||
maximum "disorder" which is allowed among fragments which share a
|
||||
common IP source address. Note that reordering of packets is
|
||||
not unusual, but if a large number of fragments arrive from a source
|
||||
IP address while a particular fragment queue remains incomplete, it
|
||||
probably indicates that one or more fragments belonging to that queue
|
||||
have been lost. When ipfrag_max_dist is positive, an additional check
|
||||
is done on fragments before they are added to a reassembly queue - if
|
||||
ipfrag_max_dist (or more) fragments have arrived from a particular IP
|
||||
address between additions to any IP fragment queue using that source
|
||||
address, it's presumed that one or more fragments in the queue are
|
||||
lost. The existing fragment queue will be dropped, and a new one
|
||||
ipfrag_max_dist is a non-negative integer value which defines the
|
||||
maximum "disorder" which is allowed among fragments which share a
|
||||
common IP source address. Note that reordering of packets is
|
||||
not unusual, but if a large number of fragments arrive from a source
|
||||
IP address while a particular fragment queue remains incomplete, it
|
||||
probably indicates that one or more fragments belonging to that queue
|
||||
have been lost. When ipfrag_max_dist is positive, an additional check
|
||||
is done on fragments before they are added to a reassembly queue - if
|
||||
ipfrag_max_dist (or more) fragments have arrived from a particular IP
|
||||
address between additions to any IP fragment queue using that source
|
||||
address, it's presumed that one or more fragments in the queue are
|
||||
lost. The existing fragment queue will be dropped, and a new one
|
||||
started. An ipfrag_max_dist value of zero disables this check.
|
||||
|
||||
Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
|
||||
result in unnecessarily dropping fragment queues when normal
|
||||
reordering of packets occurs, which could lead to poor application
|
||||
performance. Using a very large value, e.g. 50000, increases the
|
||||
likelihood of incorrectly reassembling IP fragments that originate
|
||||
reordering of packets occurs, which could lead to poor application
|
||||
performance. Using a very large value, e.g. 50000, increases the
|
||||
likelihood of incorrectly reassembling IP fragments that originate
|
||||
from different IP datagrams, which could result in data corruption.
|
||||
Default: 64
|
||||
|
||||
INET peer storage:
|
||||
|
||||
inet_peer_threshold - INTEGER
|
||||
The approximate size of the storage. Starting from this threshold
|
||||
The approximate size of the storage. Starting from this threshold
|
||||
entries will be thrown aggressively. This threshold also determines
|
||||
entries' time-to-live and time intervals between garbage collection
|
||||
passes. More entries, less time-to-live, less GC interval.
|
||||
@@ -105,7 +105,7 @@ inet_peer_gc_maxtime - INTEGER
|
||||
in effect under low (or absent) memory pressure on the pool.
|
||||
Measured in seconds.
|
||||
|
||||
TCP variables:
|
||||
TCP variables:
|
||||
|
||||
somaxconn - INTEGER
|
||||
Limit of socket listen() backlog, known in userspace as SOMAXCONN.
|
||||
@@ -310,7 +310,7 @@ tcp_orphan_retries - INTEGER
|
||||
|
||||
tcp_reordering - INTEGER
|
||||
Maximal reordering of packets in a TCP stream.
|
||||
Default: 3
|
||||
Default: 3
|
||||
|
||||
tcp_retrans_collapse - BOOLEAN
|
||||
Bug-to-bug compatibility with some broken printers.
|
||||
@@ -521,7 +521,7 @@ IP Variables:
|
||||
|
||||
ip_local_port_range - 2 INTEGERS
|
||||
Defines the local port range that is used by TCP and UDP to
|
||||
choose the local port. The first number is the first, the
|
||||
choose the local port. The first number is the first, the
|
||||
second the last local port number. Default value depends on
|
||||
amount of memory available on the system:
|
||||
> 128Mb 32768-61000
|
||||
@@ -594,12 +594,12 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN
|
||||
|
||||
If zero, icmp error messages are sent with the primary address of
|
||||
the exiting interface.
|
||||
|
||||
|
||||
If non-zero, the message will be sent with the primary address of
|
||||
the interface that received the packet that caused the icmp error.
|
||||
This is the behaviour network many administrators will expect from
|
||||
a router. And it can make debugging complicated network layouts
|
||||
much easier.
|
||||
much easier.
|
||||
|
||||
Note that if no primary address exists for the interface selected,
|
||||
then the primary address of the first non-loopback interface that
|
||||
@@ -611,7 +611,7 @@ igmp_max_memberships - INTEGER
|
||||
Change the maximum number of multicast groups we can subscribe to.
|
||||
Default: 20
|
||||
|
||||
conf/interface/* changes special settings per interface (where "interface" is
|
||||
conf/interface/* changes special settings per interface (where "interface" is
|
||||
the name of your network interface)
|
||||
conf/all/* is special, changes the settings for all interfaces
|
||||
|
||||
@@ -625,11 +625,11 @@ log_martians - BOOLEAN
|
||||
accept_redirects - BOOLEAN
|
||||
Accept ICMP redirect messages.
|
||||
accept_redirects for the interface will be enabled if:
|
||||
- both conf/{all,interface}/accept_redirects are TRUE in the case forwarding
|
||||
for the interface is enabled
|
||||
- both conf/{all,interface}/accept_redirects are TRUE in the case
|
||||
forwarding for the interface is enabled
|
||||
or
|
||||
- at least one of conf/{all,interface}/accept_redirects is TRUE in the case
|
||||
forwarding for the interface is disabled
|
||||
- at least one of conf/{all,interface}/accept_redirects is TRUE in the
|
||||
case forwarding for the interface is disabled
|
||||
accept_redirects for the interface will be disabled otherwise
|
||||
default TRUE (host)
|
||||
FALSE (router)
|
||||
@@ -640,8 +640,8 @@ forwarding - BOOLEAN
|
||||
mc_forwarding - BOOLEAN
|
||||
Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
|
||||
and a multicast routing daemon is required.
|
||||
conf/all/mc_forwarding must also be set to TRUE to enable multicast routing
|
||||
for the interface
|
||||
conf/all/mc_forwarding must also be set to TRUE to enable multicast
|
||||
routing for the interface
|
||||
|
||||
medium_id - INTEGER
|
||||
Integer value used to differentiate the devices by the medium they
|
||||
@@ -649,7 +649,7 @@ medium_id - INTEGER
|
||||
the broadcast packets are received only on one of them.
|
||||
The default value 0 means that the device is the only interface
|
||||
to its medium, value of -1 means that medium is not known.
|
||||
|
||||
|
||||
Currently, it is used to change the proxy_arp behavior:
|
||||
the proxy_arp feature is enabled for packets forwarded between
|
||||
two devices attached to different media.
|
||||
@@ -699,16 +699,22 @@ accept_source_route - BOOLEAN
|
||||
default TRUE (router)
|
||||
FALSE (host)
|
||||
|
||||
rp_filter - BOOLEAN
|
||||
1 - do source validation by reversed path, as specified in RFC1812
|
||||
Recommended option for single homed hosts and stub network
|
||||
routers. Could cause troubles for complicated (not loop free)
|
||||
networks running a slow unreliable protocol (sort of RIP),
|
||||
or using static routes.
|
||||
|
||||
rp_filter - INTEGER
|
||||
0 - No source validation.
|
||||
1 - Strict mode as defined in RFC3704 Strict Reverse Path
|
||||
Each incoming packet is tested against the FIB and if the interface
|
||||
is not the best reverse path the packet check will fail.
|
||||
By default failed packets are discarded.
|
||||
2 - Loose mode as defined in RFC3704 Loose Reverse Path
|
||||
Each incoming packet's source address is also tested against the FIB
|
||||
and if the source address is not reachable via any interface
|
||||
the packet check will fail.
|
||||
|
||||
conf/all/rp_filter must also be set to TRUE to do source validation
|
||||
Current recommended practice in RFC3704 is to enable strict mode
|
||||
to prevent IP spoofing from DDos attacks. If using asymmetric routing
|
||||
or other complicated routing, then loose mode is recommended.
|
||||
|
||||
conf/all/rp_filter must also be set to non-zero to do source validation
|
||||
on the interface
|
||||
|
||||
Default value is 0. Note that some distributions enable it
|
||||
@@ -782,6 +788,12 @@ arp_ignore - INTEGER
|
||||
The max value from conf/{all,interface}/arp_ignore is used
|
||||
when ARP request is received on the {interface}
|
||||
|
||||
arp_notify - BOOLEAN
|
||||
Define mode for notification of address and device changes.
|
||||
0 - (default): do nothing
|
||||
1 - Generate gratuitous arp replies when device is brought up
|
||||
or hardware address changes.
|
||||
|
||||
arp_accept - BOOLEAN
|
||||
Define behavior when gratuitous arp replies are received:
|
||||
0 - drop gratuitous arp frames
|
||||
@@ -823,7 +835,7 @@ apply to IPv6 [XXX?].
|
||||
|
||||
bindv6only - BOOLEAN
|
||||
Default value for IPV6_V6ONLY socket option,
|
||||
which restricts use of the IPv6 socket to IPv6 communication
|
||||
which restricts use of the IPv6 socket to IPv6 communication
|
||||
only.
|
||||
TRUE: disable IPv4-mapped address feature
|
||||
FALSE: enable IPv4-mapped address feature
|
||||
@@ -833,19 +845,19 @@ bindv6only - BOOLEAN
|
||||
IPv6 Fragmentation:
|
||||
|
||||
ip6frag_high_thresh - INTEGER
|
||||
Maximum memory used to reassemble IPv6 fragments. When
|
||||
Maximum memory used to reassemble IPv6 fragments. When
|
||||
ip6frag_high_thresh bytes of memory is allocated for this purpose,
|
||||
the fragment handler will toss packets until ip6frag_low_thresh
|
||||
is reached.
|
||||
|
||||
|
||||
ip6frag_low_thresh - INTEGER
|
||||
See ip6frag_high_thresh
|
||||
See ip6frag_high_thresh
|
||||
|
||||
ip6frag_time - INTEGER
|
||||
Time in seconds to keep an IPv6 fragment in memory.
|
||||
|
||||
ip6frag_secret_interval - INTEGER
|
||||
Regeneration interval (in seconds) of the hash secret (or lifetime
|
||||
Regeneration interval (in seconds) of the hash secret (or lifetime
|
||||
for the hash secret) for IPv6 fragments.
|
||||
Default: 600
|
||||
|
||||
@@ -854,17 +866,17 @@ conf/default/*:
|
||||
|
||||
|
||||
conf/all/*:
|
||||
Change all the interface-specific settings.
|
||||
Change all the interface-specific settings.
|
||||
|
||||
[XXX: Other special features than forwarding?]
|
||||
|
||||
conf/all/forwarding - BOOLEAN
|
||||
Enable global IPv6 forwarding between all interfaces.
|
||||
Enable global IPv6 forwarding between all interfaces.
|
||||
|
||||
IPv4 and IPv6 work differently here; e.g. netfilter must be used
|
||||
IPv4 and IPv6 work differently here; e.g. netfilter must be used
|
||||
to control which interfaces may forward packets and which not.
|
||||
|
||||
This also sets all interfaces' Host/Router setting
|
||||
This also sets all interfaces' Host/Router setting
|
||||
'forwarding' to the specified value. See below for details.
|
||||
|
||||
This referred to as global forwarding.
|
||||
@@ -875,12 +887,12 @@ proxy_ndp - BOOLEAN
|
||||
conf/interface/*:
|
||||
Change special settings per interface.
|
||||
|
||||
The functional behaviour for certain settings is different
|
||||
The functional behaviour for certain settings is different
|
||||
depending on whether local forwarding is enabled or not.
|
||||
|
||||
accept_ra - BOOLEAN
|
||||
Accept Router Advertisements; autoconfigure using them.
|
||||
|
||||
|
||||
Functional default: enabled if local forwarding is disabled.
|
||||
disabled if local forwarding is enabled.
|
||||
|
||||
@@ -926,7 +938,7 @@ accept_source_route - INTEGER
|
||||
Default: 0
|
||||
|
||||
autoconf - BOOLEAN
|
||||
Autoconfigure addresses using Prefix Information in Router
|
||||
Autoconfigure addresses using Prefix Information in Router
|
||||
Advertisements.
|
||||
|
||||
Functional default: enabled if accept_ra_pinfo is enabled.
|
||||
@@ -935,11 +947,11 @@ autoconf - BOOLEAN
|
||||
dad_transmits - INTEGER
|
||||
The amount of Duplicate Address Detection probes to send.
|
||||
Default: 1
|
||||
|
||||
forwarding - BOOLEAN
|
||||
Configure interface-specific Host/Router behaviour.
|
||||
|
||||
Note: It is recommended to have the same setting on all
|
||||
forwarding - BOOLEAN
|
||||
Configure interface-specific Host/Router behaviour.
|
||||
|
||||
Note: It is recommended to have the same setting on all
|
||||
interfaces; mixed router/host scenarios are rather uncommon.
|
||||
|
||||
FALSE:
|
||||
@@ -948,13 +960,13 @@ forwarding - BOOLEAN
|
||||
|
||||
1. IsRouter flag is not set in Neighbour Advertisements.
|
||||
2. Router Solicitations are being sent when necessary.
|
||||
3. If accept_ra is TRUE (default), accept Router
|
||||
3. If accept_ra is TRUE (default), accept Router
|
||||
Advertisements (and do autoconfiguration).
|
||||
4. If accept_redirects is TRUE (default), accept Redirects.
|
||||
|
||||
TRUE:
|
||||
|
||||
If local forwarding is enabled, Router behaviour is assumed.
|
||||
If local forwarding is enabled, Router behaviour is assumed.
|
||||
This means exactly the reverse from the above:
|
||||
|
||||
1. IsRouter flag is set in Neighbour Advertisements.
|
||||
@@ -989,7 +1001,7 @@ router_solicitation_interval - INTEGER
|
||||
Default: 4
|
||||
|
||||
router_solicitations - INTEGER
|
||||
Number of Router Solicitations to send until assuming no
|
||||
Number of Router Solicitations to send until assuming no
|
||||
routers are present.
|
||||
Default: 3
|
||||
|
||||
@@ -1013,11 +1025,11 @@ temp_prefered_lft - INTEGER
|
||||
|
||||
max_desync_factor - INTEGER
|
||||
Maximum value for DESYNC_FACTOR, which is a random value
|
||||
that ensures that clients don't synchronize with each
|
||||
that ensures that clients don't synchronize with each
|
||||
other and generate new addresses at exactly the same time.
|
||||
value is in seconds.
|
||||
Default: 600
|
||||
|
||||
|
||||
regen_max_retry - INTEGER
|
||||
Number of attempts before give up attempting to generate
|
||||
valid temporary addresses.
|
||||
@@ -1025,13 +1037,15 @@ regen_max_retry - INTEGER
|
||||
|
||||
max_addresses - INTEGER
|
||||
Number of maximum addresses per interface. 0 disables limitation.
|
||||
It is recommended not set too large value (or 0) because it would
|
||||
be too easy way to crash kernel to allow to create too much of
|
||||
It is recommended not set too large value (or 0) because it would
|
||||
be too easy way to crash kernel to allow to create too much of
|
||||
autoconfigured addresses.
|
||||
Default: 16
|
||||
|
||||
disable_ipv6 - BOOLEAN
|
||||
Disable IPv6 operation.
|
||||
Disable IPv6 operation. If accept_dad is set to 2, this value
|
||||
will be dynamically set to TRUE if DAD fails for the link-local
|
||||
address.
|
||||
Default: FALSE (enable IPv6 operation)
|
||||
|
||||
accept_dad - INTEGER
|
||||
|
||||
@@ -0,0 +1,199 @@
|
||||
Linux Base Driver for 10 Gigabit PCI Express Intel(R) Network Connection
|
||||
========================================================================
|
||||
|
||||
March 10, 2009
|
||||
|
||||
|
||||
Contents
|
||||
========
|
||||
|
||||
- In This Release
|
||||
- Identifying Your Adapter
|
||||
- Building and Installation
|
||||
- Additional Configurations
|
||||
- Support
|
||||
|
||||
|
||||
|
||||
In This Release
|
||||
===============
|
||||
|
||||
This file describes the ixgbe Linux Base Driver for the 10 Gigabit PCI
|
||||
Express Intel(R) Network Connection. This driver includes support for
|
||||
Itanium(R)2-based systems.
|
||||
|
||||
For questions related to hardware requirements, refer to the documentation
|
||||
supplied with your 10 Gigabit adapter. All hardware requirements listed apply
|
||||
to use with Linux.
|
||||
|
||||
The following features are available in this kernel:
|
||||
- Native VLANs
|
||||
- Channel Bonding (teaming)
|
||||
- SNMP
|
||||
- Generic Receive Offload
|
||||
- Data Center Bridging
|
||||
|
||||
Channel Bonding documentation can be found in the Linux kernel source:
|
||||
/Documentation/networking/bonding.txt
|
||||
|
||||
Ethtool, lspci, and ifconfig can be used to display device and driver
|
||||
specific information.
|
||||
|
||||
|
||||
Identifying Your Adapter
|
||||
========================
|
||||
|
||||
This driver supports devices based on the 82598 controller and the 82599
|
||||
controller.
|
||||
|
||||
For specific information on identifying which adapter you have, please visit:
|
||||
|
||||
http://support.intel.com/support/network/sb/CS-008441.htm
|
||||
|
||||
|
||||
Building and Installation
|
||||
=========================
|
||||
|
||||
select m for "Intel(R) 10GbE PCI Express adapters support" located at:
|
||||
Location:
|
||||
-> Device Drivers
|
||||
-> Network device support (NETDEVICES [=y])
|
||||
-> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
|
||||
|
||||
1. make modules & make modules_install
|
||||
|
||||
2. Load the module:
|
||||
|
||||
# modprobe ixgbe
|
||||
|
||||
The insmod command can be used if the full
|
||||
path to the driver module is specified. For example:
|
||||
|
||||
insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgbe/ixgbe.ko
|
||||
|
||||
With 2.6 based kernels also make sure that older ixgbe drivers are
|
||||
removed from the kernel, before loading the new module:
|
||||
|
||||
rmmod ixgbe; modprobe ixgbe
|
||||
|
||||
3. Assign an IP address to the interface by entering the following, where
|
||||
x is the interface number:
|
||||
|
||||
ifconfig ethx <IP_address>
|
||||
|
||||
4. Verify that the interface works. Enter the following, where <IP_address>
|
||||
is the IP address for another machine on the same subnet as the interface
|
||||
that is being tested:
|
||||
|
||||
ping <IP_address>
|
||||
|
||||
|
||||
Additional Configurations
|
||||
=========================
|
||||
|
||||
Viewing Link Messages
|
||||
---------------------
|
||||
Link messages will not be displayed to the console if the distribution is
|
||||
restricting system messages. In order to see network driver link messages on
|
||||
your console, set dmesg to eight by entering the following:
|
||||
|
||||
dmesg -n 8
|
||||
|
||||
NOTE: This setting is not saved across reboots.
|
||||
|
||||
|
||||
Jumbo Frames
|
||||
------------
|
||||
The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
|
||||
enabled by changing the MTU to a value larger than the default of 1500.
|
||||
The maximum value for the MTU is 16110. Use the ifconfig command to
|
||||
increase the MTU size. For example:
|
||||
|
||||
ifconfig ethx mtu 9000 up
|
||||
|
||||
The maximum MTU setting for Jumbo Frames is 16110. This value coincides
|
||||
with the maximum Jumbo Frames size of 16128.
|
||||
|
||||
Generic Receive Offload, aka GRO
|
||||
--------------------------------
|
||||
The driver supports the in-kernel software implementation of GRO. GRO has
|
||||
shown that by coalescing Rx traffic into larger chunks of data, CPU
|
||||
utilization can be significantly reduced when under large Rx load. GRO is an
|
||||
evolution of the previously-used LRO interface. GRO is able to coalesce
|
||||
other protocols besides TCP. It's also safe to use with configurations that
|
||||
are problematic for LRO, namely bridging and iSCSI.
|
||||
|
||||
GRO is enabled by default in the driver. Future versions of ethtool will
|
||||
support disabling and re-enabling GRO on the fly.
|
||||
|
||||
|
||||
Data Center Bridging, aka DCB
|
||||
-----------------------------
|
||||
|
||||
DCB is a configuration Quality of Service implementation in hardware.
|
||||
It uses the VLAN priority tag (802.1p) to filter traffic. That means
|
||||
that there are 8 different priorities that traffic can be filtered into.
|
||||
It also enables priority flow control which can limit or eliminate the
|
||||
number of dropped packets during network stress. Bandwidth can be
|
||||
allocated to each of these priorities, which is enforced at the hardware
|
||||
level.
|
||||
|
||||
To enable DCB support in ixgbe, you must enable the DCB netlink layer to
|
||||
allow the userspace tools (see below) to communicate with the driver.
|
||||
This can be found in the kernel configuration here:
|
||||
|
||||
-> Networking support
|
||||
-> Networking options
|
||||
-> Data Center Bridging support
|
||||
|
||||
Once this is selected, DCB support must be selected for ixgbe. This can
|
||||
be found here:
|
||||
|
||||
-> Device Drivers
|
||||
-> Network device support (NETDEVICES [=y])
|
||||
-> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
|
||||
-> Intel(R) 10GbE PCI Express adapters support
|
||||
-> Data Center Bridging (DCB) Support
|
||||
|
||||
After these options are selected, you must rebuild your kernel and your
|
||||
modules.
|
||||
|
||||
In order to use DCB, userspace tools must be downloaded and installed.
|
||||
The dcbd tools can be found at:
|
||||
|
||||
http://e1000.sf.net
|
||||
|
||||
|
||||
Ethtool
|
||||
-------
|
||||
The driver utilizes the ethtool interface for driver configuration and
|
||||
diagnostics, as well as displaying statistical information. Ethtool
|
||||
version 3.0 or later is required for this functionality.
|
||||
|
||||
The latest release of ethtool can be found from
|
||||
http://sourceforge.net/projects/gkernel.
|
||||
|
||||
|
||||
NAPI
|
||||
----
|
||||
|
||||
NAPI (Rx polling mode) is supported in the ixgbe driver. NAPI is enabled
|
||||
by default in the driver.
|
||||
|
||||
See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
|
||||
|
||||
|
||||
Support
|
||||
=======
|
||||
|
||||
For general information, go to the Intel support website at:
|
||||
|
||||
http://support.intel.com
|
||||
|
||||
or the Intel Wired Networking project hosted by Sourceforge at:
|
||||
|
||||
http://e1000.sourceforge.net
|
||||
|
||||
If an issue is identified with the released source code on the supported
|
||||
kernel with a supported adapter, email the specific information related
|
||||
to the issue to e1000-devel@lists.sf.net
|
||||
@@ -0,0 +1,356 @@
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
This readme tries to provide some background on the hows and whys of RDS,
|
||||
and will hopefully help you find your way around the code.
|
||||
|
||||
In addition, please see this email about RDS origins:
|
||||
http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
|
||||
|
||||
RDS Architecture
|
||||
================
|
||||
|
||||
RDS provides reliable, ordered datagram delivery by using a single
|
||||
reliable connection between any two nodes in the cluster. This allows
|
||||
applications to use a single socket to talk to any other process in the
|
||||
cluster - so in a cluster with N processes you need N sockets, in contrast
|
||||
to N*N if you use a connection-oriented socket transport like TCP.
|
||||
|
||||
RDS is not Infiniband-specific; it was designed to support different
|
||||
transports. The current implementation used to support RDS over TCP as well
|
||||
as IB. Work is in progress to support RDS over iWARP, and using DCE to
|
||||
guarantee no dropped packets on Ethernet, it may be possible to use RDS over
|
||||
UDP in the future.
|
||||
|
||||
The high-level semantics of RDS from the application's point of view are
|
||||
|
||||
* Addressing
|
||||
RDS uses IPv4 addresses and 16bit port numbers to identify
|
||||
the end point of a connection. All socket operations that involve
|
||||
passing addresses between kernel and user space generally
|
||||
use a struct sockaddr_in.
|
||||
|
||||
The fact that IPv4 addresses are used does not mean the underlying
|
||||
transport has to be IP-based. In fact, RDS over IB uses a
|
||||
reliable IB connection; the IP address is used exclusively to
|
||||
locate the remote node's GID (by ARPing for the given IP).
|
||||
|
||||
The port space is entirely independent of UDP, TCP or any other
|
||||
protocol.
|
||||
|
||||
* Socket interface
|
||||
RDS sockets work *mostly* as you would expect from a BSD
|
||||
socket. The next section will cover the details. At any rate,
|
||||
all I/O is performed through the standard BSD socket API.
|
||||
Some additions like zerocopy support are implemented through
|
||||
control messages, while other extensions use the getsockopt/
|
||||
setsockopt calls.
|
||||
|
||||
Sockets must be bound before you can send or receive data.
|
||||
This is needed because binding also selects a transport and
|
||||
attaches it to the socket. Once bound, the transport assignment
|
||||
does not change. RDS will tolerate IPs moving around (eg in
|
||||
a active-active HA scenario), but only as long as the address
|
||||
doesn't move to a different transport.
|
||||
|
||||
* sysctls
|
||||
RDS supports a number of sysctls in /proc/sys/net/rds
|
||||
|
||||
|
||||
Socket Interface
|
||||
================
|
||||
|
||||
AF_RDS, PF_RDS, SOL_RDS
|
||||
These constants haven't been assigned yet, because RDS isn't in
|
||||
mainline yet. Currently, the kernel module assigns some constant
|
||||
and publishes it to user space through two sysctl files
|
||||
/proc/sys/net/rds/pf_rds
|
||||
/proc/sys/net/rds/sol_rds
|
||||
|
||||
fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
|
||||
This creates a new, unbound RDS socket.
|
||||
|
||||
setsockopt(SOL_SOCKET): send and receive buffer size
|
||||
RDS honors the send and receive buffer size socket options.
|
||||
You are not allowed to queue more than SO_SNDSIZE bytes to
|
||||
a socket. A message is queued when sendmsg is called, and
|
||||
it leaves the queue when the remote system acknowledges
|
||||
its arrival.
|
||||
|
||||
The SO_RCVSIZE option controls the maximum receive queue length.
|
||||
This is a soft limit rather than a hard limit - RDS will
|
||||
continue to accept and queue incoming messages, even if that
|
||||
takes the queue length over the limit. However, it will also
|
||||
mark the port as "congested" and send a congestion update to
|
||||
the source node. The source node is supposed to throttle any
|
||||
processes sending to this congested port.
|
||||
|
||||
bind(fd, &sockaddr_in, ...)
|
||||
This binds the socket to a local IP address and port, and a
|
||||
transport.
|
||||
|
||||
sendmsg(fd, ...)
|
||||
Sends a message to the indicated recipient. The kernel will
|
||||
transparently establish the underlying reliable connection
|
||||
if it isn't up yet.
|
||||
|
||||
An attempt to send a message that exceeds SO_SNDSIZE will
|
||||
return with -EMSGSIZE
|
||||
|
||||
An attempt to send a message that would take the total number
|
||||
of queued bytes over the SO_SNDSIZE threshold will return
|
||||
EAGAIN.
|
||||
|
||||
An attempt to send a message to a destination that is marked
|
||||
as "congested" will return ENOBUFS.
|
||||
|
||||
recvmsg(fd, ...)
|
||||
Receives a message that was queued to this socket. The sockets
|
||||
recv queue accounting is adjusted, and if the queue length
|
||||
drops below SO_SNDSIZE, the port is marked uncongested, and
|
||||
a congestion update is sent to all peers.
|
||||
|
||||
Applications can ask the RDS kernel module to receive
|
||||
notifications via control messages (for instance, there is a
|
||||
notification when a congestion update arrived, or when a RDMA
|
||||
operation completes). These notifications are received through
|
||||
the msg.msg_control buffer of struct msghdr. The format of the
|
||||
messages is described in manpages.
|
||||
|
||||
poll(fd)
|
||||
RDS supports the poll interface to allow the application
|
||||
to implement async I/O.
|
||||
|
||||
POLLIN handling is pretty straightforward. When there's an
|
||||
incoming message queued to the socket, or a pending notification,
|
||||
we signal POLLIN.
|
||||
|
||||
POLLOUT is a little harder. Since you can essentially send
|
||||
to any destination, RDS will always signal POLLOUT as long as
|
||||
there's room on the send queue (ie the number of bytes queued
|
||||
is less than the sendbuf size).
|
||||
|
||||
However, the kernel will refuse to accept messages to
|
||||
a destination marked congested - in this case you will loop
|
||||
forever if you rely on poll to tell you what to do.
|
||||
This isn't a trivial problem, but applications can deal with
|
||||
this - by using congestion notifications, and by checking for
|
||||
ENOBUFS errors returned by sendmsg.
|
||||
|
||||
setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
|
||||
This allows the application to discard all messages queued to a
|
||||
specific destination on this particular socket.
|
||||
|
||||
This allows the application to cancel outstanding messages if
|
||||
it detects a timeout. For instance, if it tried to send a message,
|
||||
and the remote host is unreachable, RDS will keep trying forever.
|
||||
The application may decide it's not worth it, and cancel the
|
||||
operation. In this case, it would use RDS_CANCEL_SENT_TO to
|
||||
nuke any pending messages.
|
||||
|
||||
|
||||
RDMA for RDS
|
||||
============
|
||||
|
||||
see rds-rdma(7) manpage (available in rds-tools)
|
||||
|
||||
|
||||
Congestion Notifications
|
||||
========================
|
||||
|
||||
see rds(7) manpage
|
||||
|
||||
|
||||
RDS Protocol
|
||||
============
|
||||
|
||||
Message header
|
||||
|
||||
The message header is a 'struct rds_header' (see rds.h):
|
||||
Fields:
|
||||
h_sequence:
|
||||
per-packet sequence number
|
||||
h_ack:
|
||||
piggybacked acknowledgment of last packet received
|
||||
h_len:
|
||||
length of data, not including header
|
||||
h_sport:
|
||||
source port
|
||||
h_dport:
|
||||
destination port
|
||||
h_flags:
|
||||
CONG_BITMAP - this is a congestion update bitmap
|
||||
ACK_REQUIRED - receiver must ack this packet
|
||||
RETRANSMITTED - packet has previously been sent
|
||||
h_credit:
|
||||
indicate to other end of connection that
|
||||
it has more credits available (i.e. there is
|
||||
more send room)
|
||||
h_padding[4]:
|
||||
unused, for future use
|
||||
h_csum:
|
||||
header checksum
|
||||
h_exthdr:
|
||||
optional data can be passed here. This is currently used for
|
||||
passing RDMA-related information.
|
||||
|
||||
ACK and retransmit handling
|
||||
|
||||
One might think that with reliable IB connections you wouldn't need
|
||||
to ack messages that have been received. The problem is that IB
|
||||
hardware generates an ack message before it has DMAed the message
|
||||
into memory. This creates a potential message loss if the HCA is
|
||||
disabled for any reason between when it sends the ack and before
|
||||
the message is DMAed and processed. This is only a potential issue
|
||||
if another HCA is available for fail-over.
|
||||
|
||||
Sending an ack immediately would allow the sender to free the sent
|
||||
message from their send queue quickly, but could cause excessive
|
||||
traffic to be used for acks. RDS piggybacks acks on sent data
|
||||
packets. Ack-only packets are reduced by only allowing one to be
|
||||
in flight at a time, and by the sender only asking for acks when
|
||||
its send buffers start to fill up. All retransmissions are also
|
||||
acked.
|
||||
|
||||
Flow Control
|
||||
|
||||
RDS's IB transport uses a credit-based mechanism to verify that
|
||||
there is space in the peer's receive buffers for more data. This
|
||||
eliminates the need for hardware retries on the connection.
|
||||
|
||||
Congestion
|
||||
|
||||
Messages waiting in the receive queue on the receiving socket
|
||||
are accounted against the sockets SO_RCVBUF option value. Only
|
||||
the payload bytes in the message are accounted for. If the
|
||||
number of bytes queued equals or exceeds rcvbuf then the socket
|
||||
is congested. All sends attempted to this socket's address
|
||||
should return block or return -EWOULDBLOCK.
|
||||
|
||||
Applications are expected to be reasonably tuned such that this
|
||||
situation very rarely occurs. An application encountering this
|
||||
"back-pressure" is considered a bug.
|
||||
|
||||
This is implemented by having each node maintain bitmaps which
|
||||
indicate which ports on bound addresses are congested. As the
|
||||
bitmap changes it is sent through all the connections which
|
||||
terminate in the local address of the bitmap which changed.
|
||||
|
||||
The bitmaps are allocated as connections are brought up. This
|
||||
avoids allocation in the interrupt handling path which queues
|
||||
sages on sockets. The dense bitmaps let transports send the
|
||||
entire bitmap on any bitmap change reasonably efficiently. This
|
||||
is much easier to implement than some finer-grained
|
||||
communication of per-port congestion. The sender does a very
|
||||
inexpensive bit test to test if the port it's about to send to
|
||||
is congested or not.
|
||||
|
||||
|
||||
RDS Transport Layer
|
||||
==================
|
||||
|
||||
As mentioned above, RDS is not IB-specific. Its code is divided
|
||||
into a general RDS layer and a transport layer.
|
||||
|
||||
The general layer handles the socket API, congestion handling,
|
||||
loopback, stats, usermem pinning, and the connection state machine.
|
||||
|
||||
The transport layer handles the details of the transport. The IB
|
||||
transport, for example, handles all the queue pairs, work requests,
|
||||
CM event handlers, and other Infiniband details.
|
||||
|
||||
|
||||
RDS Kernel Structures
|
||||
=====================
|
||||
|
||||
struct rds_message
|
||||
aka possibly "rds_outgoing", the generic RDS layer copies data to
|
||||
be sent and sets header fields as needed, based on the socket API.
|
||||
This is then queued for the individual connection and sent by the
|
||||
connection's transport.
|
||||
struct rds_incoming
|
||||
a generic struct referring to incoming data that can be handed from
|
||||
the transport to the general code and queued by the general code
|
||||
while the socket is awoken. It is then passed back to the transport
|
||||
code to handle the actual copy-to-user.
|
||||
struct rds_socket
|
||||
per-socket information
|
||||
struct rds_connection
|
||||
per-connection information
|
||||
struct rds_transport
|
||||
pointers to transport-specific functions
|
||||
struct rds_statistics
|
||||
non-transport-specific statistics
|
||||
struct rds_cong_map
|
||||
wraps the raw congestion bitmap, contains rbnode, waitq, etc.
|
||||
|
||||
Connection management
|
||||
=====================
|
||||
|
||||
Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
|
||||
ERROR states.
|
||||
|
||||
The first time an attempt is made by an RDS socket to send data to
|
||||
a node, a connection is allocated and connected. That connection is
|
||||
then maintained forever -- if there are transport errors, the
|
||||
connection will be dropped and re-established.
|
||||
|
||||
Dropping a connection while packets are queued will cause queued or
|
||||
partially-sent datagrams to be retransmitted when the connection is
|
||||
re-established.
|
||||
|
||||
|
||||
The send path
|
||||
=============
|
||||
|
||||
rds_sendmsg()
|
||||
struct rds_message built from incoming data
|
||||
CMSGs parsed (e.g. RDMA ops)
|
||||
transport connection alloced and connected if not already
|
||||
rds_message placed on send queue
|
||||
send worker awoken
|
||||
rds_send_worker()
|
||||
calls rds_send_xmit() until queue is empty
|
||||
rds_send_xmit()
|
||||
transmits congestion map if one is pending
|
||||
may set ACK_REQUIRED
|
||||
calls transport to send either non-RDMA or RDMA message
|
||||
(RDMA ops never retransmitted)
|
||||
rds_ib_xmit()
|
||||
allocs work requests from send ring
|
||||
adds any new send credits available to peer (h_credits)
|
||||
maps the rds_message's sg list
|
||||
piggybacks ack
|
||||
populates work requests
|
||||
post send to connection's queue pair
|
||||
|
||||
The recv path
|
||||
=============
|
||||
|
||||
rds_ib_recv_cq_comp_handler()
|
||||
looks at write completions
|
||||
unmaps recv buffer from device
|
||||
no errors, call rds_ib_process_recv()
|
||||
refill recv ring
|
||||
rds_ib_process_recv()
|
||||
validate header checksum
|
||||
copy header to rds_ib_incoming struct if start of a new datagram
|
||||
add to ibinc's fraglist
|
||||
if competed datagram:
|
||||
update cong map if datagram was cong update
|
||||
call rds_recv_incoming() otherwise
|
||||
note if ack is required
|
||||
rds_recv_incoming()
|
||||
drop duplicate packets
|
||||
respond to pings
|
||||
find the sock associated with this datagram
|
||||
add to sock queue
|
||||
wake up sock
|
||||
do some congestion calculations
|
||||
rds_recvmsg
|
||||
copy data into user iovec
|
||||
handle CMSGs
|
||||
return to application
|
||||
|
||||
|
||||
@@ -0,0 +1,180 @@
|
||||
The existing interfaces for getting network packages time stamped are:
|
||||
|
||||
* SO_TIMESTAMP
|
||||
Generate time stamp for each incoming packet using the (not necessarily
|
||||
monotonous!) system time. Result is returned via recv_msg() in a
|
||||
control message as timeval (usec resolution).
|
||||
|
||||
* SO_TIMESTAMPNS
|
||||
Same time stamping mechanism as SO_TIMESTAMP, but returns result as
|
||||
timespec (nsec resolution).
|
||||
|
||||
* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
|
||||
Only for multicasts: approximate send time stamp by receiving the looped
|
||||
packet and using its receive time stamp.
|
||||
|
||||
The following interface complements the existing ones: receive time
|
||||
stamps can be generated and returned for arbitrary packets and much
|
||||
closer to the point where the packet is really sent. Time stamps can
|
||||
be generated in software (as before) or in hardware (if the hardware
|
||||
has such a feature).
|
||||
|
||||
SO_TIMESTAMPING:
|
||||
|
||||
Instructs the socket layer which kind of information is wanted. The
|
||||
parameter is an integer with some of the following bits set. Setting
|
||||
other bits is an error and doesn't change the current state.
|
||||
|
||||
SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware
|
||||
SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or
|
||||
fails, then do it in software
|
||||
SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp
|
||||
as generated by the hardware
|
||||
SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or
|
||||
fails, then do it in software
|
||||
SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp
|
||||
SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to
|
||||
the system time base
|
||||
SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in
|
||||
software
|
||||
|
||||
SOF_TIMESTAMPING_TX/RX determine how time stamps are generated.
|
||||
SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the
|
||||
following control message:
|
||||
struct scm_timestamping {
|
||||
struct timespec systime;
|
||||
struct timespec hwtimetrans;
|
||||
struct timespec hwtimeraw;
|
||||
};
|
||||
|
||||
recvmsg() can be used to get this control message for regular incoming
|
||||
packets. For send time stamps the outgoing packet is looped back to
|
||||
the socket's error queue with the send time stamp(s) attached. It can
|
||||
be received with recvmsg(flags=MSG_ERRQUEUE). The call returns the
|
||||
original outgoing packet data including all headers preprended down to
|
||||
and including the link layer, the scm_timestamping control message and
|
||||
a sock_extended_err control message with ee_errno==ENOMSG and
|
||||
ee_origin==SO_EE_ORIGIN_TIMESTAMPING. A socket with such a pending
|
||||
bounced packet is ready for reading as far as select() is concerned.
|
||||
If the outgoing packet has to be fragmented, then only the first
|
||||
fragment is time stamped and returned to the sending socket.
|
||||
|
||||
All three values correspond to the same event in time, but were
|
||||
generated in different ways. Each of these values may be empty (= all
|
||||
zero), in which case no such value was available. If the application
|
||||
is not interested in some of these values, they can be left blank to
|
||||
avoid the potential overhead of calculating them.
|
||||
|
||||
systime is the value of the system time at that moment. This
|
||||
corresponds to the value also returned via SO_TIMESTAMP[NS]. If the
|
||||
time stamp was generated by hardware, then this field is
|
||||
empty. Otherwise it is filled in if SOF_TIMESTAMPING_SOFTWARE is
|
||||
set.
|
||||
|
||||
hwtimeraw is the original hardware time stamp. Filled in if
|
||||
SOF_TIMESTAMPING_RAW_HARDWARE is set. No assumptions about its
|
||||
relation to system time should be made.
|
||||
|
||||
hwtimetrans is the hardware time stamp transformed so that it
|
||||
corresponds as good as possible to system time. This correlation is
|
||||
not perfect; as a consequence, sorting packets received via different
|
||||
NICs by their hwtimetrans may differ from the order in which they were
|
||||
received. hwtimetrans may be non-monotonic even for the same NIC.
|
||||
Filled in if SOF_TIMESTAMPING_SYS_HARDWARE is set. Requires support
|
||||
by the network device and will be empty without that support.
|
||||
|
||||
|
||||
SIOCSHWTSTAMP:
|
||||
|
||||
Hardware time stamping must also be initialized for each device driver
|
||||
that is expected to do hardware time stamping. The parameter is:
|
||||
|
||||
struct hwtstamp_config {
|
||||
int flags; /* no flags defined right now, must be zero */
|
||||
int tx_type; /* HWTSTAMP_TX_* */
|
||||
int rx_filter; /* HWTSTAMP_FILTER_* */
|
||||
};
|
||||
|
||||
Desired behavior is passed into the kernel and to a specific device by
|
||||
calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
|
||||
ifr_data points to a struct hwtstamp_config. The tx_type and
|
||||
rx_filter are hints to the driver what it is expected to do. If
|
||||
the requested fine-grained filtering for incoming packets is not
|
||||
supported, the driver may time stamp more than just the requested types
|
||||
of packets.
|
||||
|
||||
A driver which supports hardware time stamping shall update the struct
|
||||
with the actual, possibly more permissive configuration. If the
|
||||
requested packets cannot be time stamped, then nothing should be
|
||||
changed and ERANGE shall be returned (in contrast to EINVAL, which
|
||||
indicates that SIOCSHWTSTAMP is not supported at all).
|
||||
|
||||
Only a processes with admin rights may change the configuration. User
|
||||
space is responsible to ensure that multiple processes don't interfere
|
||||
with each other and that the settings are reset.
|
||||
|
||||
/* possible values for hwtstamp_config->tx_type */
|
||||
enum {
|
||||
/*
|
||||
* no outgoing packet will need hardware time stamping;
|
||||
* should a packet arrive which asks for it, no hardware
|
||||
* time stamping will be done
|
||||
*/
|
||||
HWTSTAMP_TX_OFF,
|
||||
|
||||
/*
|
||||
* enables hardware time stamping for outgoing packets;
|
||||
* the sender of the packet decides which are to be
|
||||
* time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE
|
||||
* before sending the packet
|
||||
*/
|
||||
HWTSTAMP_TX_ON,
|
||||
};
|
||||
|
||||
/* possible values for hwtstamp_config->rx_filter */
|
||||
enum {
|
||||
/* time stamp no incoming packet at all */
|
||||
HWTSTAMP_FILTER_NONE,
|
||||
|
||||
/* time stamp any incoming packet */
|
||||
HWTSTAMP_FILTER_ALL,
|
||||
|
||||
/* return value: time stamp all packets requested plus some others */
|
||||
HWTSTAMP_FILTER_SOME,
|
||||
|
||||
/* PTP v1, UDP, any kind of event packet */
|
||||
HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
|
||||
|
||||
...
|
||||
};
|
||||
|
||||
|
||||
DEVICE IMPLEMENTATION
|
||||
|
||||
A driver which supports hardware time stamping must support the
|
||||
SIOCSHWTSTAMP ioctl. Time stamps for received packets must be stored
|
||||
in the skb with skb_hwtstamp_set().
|
||||
|
||||
Time stamps for outgoing packets are to be generated as follows:
|
||||
- In hard_start_xmit(), check if skb_hwtstamp_check_tx_hardware()
|
||||
returns non-zero. If yes, then the driver is expected
|
||||
to do hardware time stamping.
|
||||
- If this is possible for the skb and requested, then declare
|
||||
that the driver is doing the time stamping by calling
|
||||
skb_hwtstamp_tx_in_progress(). A driver not supporting
|
||||
hardware time stamping doesn't do that. A driver must never
|
||||
touch sk_buff::tstamp! It is used to store how time stamping
|
||||
for an outgoing packets is to be done.
|
||||
- As soon as the driver has sent the packet and/or obtained a
|
||||
hardware time stamp for it, it passes the time stamp back by
|
||||
calling skb_hwtstamp_tx() with the original skb, the raw
|
||||
hardware time stamp and a handle to the device (necessary
|
||||
to convert the hardware time stamp to system time). If obtaining
|
||||
the hardware time stamp somehow fails, then the driver should
|
||||
not fall back to software time stamping. The rationale is that
|
||||
this would occur at a later time in the processing pipeline
|
||||
than other software time stamping and therefore could lead
|
||||
to unexpected deltas between time stamps.
|
||||
- If the driver did not call skb_hwtstamp_tx_in_progress(), then
|
||||
dev_hard_start_xmit() checks whether software time stamping
|
||||
is wanted as fallback and potentially generates the time stamp.
|
||||
@@ -0,0 +1 @@
|
||||
timestamping
|
||||
@@ -0,0 +1,6 @@
|
||||
CPPFLAGS = -I../../../include
|
||||
|
||||
timestamping: timestamping.c
|
||||
|
||||
clean:
|
||||
rm -f timestamping
|
||||
File diff suppressed because it is too large
Load Diff
@@ -56,6 +56,12 @@ Properties:
|
||||
hardware.
|
||||
- fsl,magic-packet : If present, indicates that the hardware supports
|
||||
waking up via magic packet.
|
||||
- bd-stash : If present, indicates that the hardware supports stashing
|
||||
buffer descriptors in the L2.
|
||||
- rx-stash-len : Denotes the number of bytes of a received buffer to stash
|
||||
in the L2.
|
||||
- rx-stash-idx : Denotes the index of the first byte from the received
|
||||
buffer to stash in the L2.
|
||||
|
||||
Example:
|
||||
ethernet@24000 {
|
||||
|
||||
@@ -1011,6 +1011,8 @@ L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
|
||||
BROADCOM TG3 GIGABIT ETHERNET DRIVER
|
||||
P: Matt Carlson
|
||||
M: mcarlson@broadcom.com
|
||||
P: Michael Chan
|
||||
M: mchan@broadcom.com
|
||||
L: netdev@vger.kernel.org
|
||||
@@ -3646,6 +3648,12 @@ M: florian.fainelli@telecomint.eu
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
|
||||
RDS - RELIABLE DATAGRAM SOCKETS
|
||||
P: Andy Grover
|
||||
M: andy.grover@oracle.com
|
||||
L: rds-devel@oss.oracle.com
|
||||
S: Supported
|
||||
|
||||
READ-COPY UPDATE (RCU)
|
||||
P: Dipankar Sarma
|
||||
M: dipankar@in.ibm.com
|
||||
|
||||
@@ -62,6 +62,9 @@
|
||||
|
||||
#define SO_MARK 36
|
||||
|
||||
#define SO_TIMESTAMPING 37
|
||||
#define SCM_TIMESTAMPING SO_TIMESTAMPING
|
||||
|
||||
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
|
||||
* have to define SOCK_NONBLOCK to a different value here.
|
||||
*/
|
||||
|
||||
@@ -54,4 +54,7 @@
|
||||
|
||||
#define SO_MARK 36
|
||||
|
||||
#define SO_TIMESTAMPING 37
|
||||
#define SCM_TIMESTAMPING SO_TIMESTAMPING
|
||||
|
||||
#endif /* _ASM_SOCKET_H */
|
||||
|
||||
@@ -231,14 +231,17 @@ static struct platform_device kirkwood_switch_device = {
|
||||
|
||||
void __init kirkwood_ge00_switch_init(struct dsa_platform_data *d, int irq)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (irq != NO_IRQ) {
|
||||
kirkwood_switch_resources[0].start = irq;
|
||||
kirkwood_switch_resources[0].end = irq;
|
||||
kirkwood_switch_device.num_resources = 1;
|
||||
}
|
||||
|
||||
d->mii_bus = &kirkwood_ge00_shared.dev;
|
||||
d->netdev = &kirkwood_ge00.dev;
|
||||
for (i = 0; i < d->nr_chips; i++)
|
||||
d->chip[i].mii_bus = &kirkwood_ge00_shared.dev;
|
||||
kirkwood_switch_device.dev.platform_data = d;
|
||||
|
||||
platform_device_register(&kirkwood_switch_device);
|
||||
|
||||
@@ -75,7 +75,7 @@ static struct mv643xx_eth_platform_data rd88f6281_ge00_data = {
|
||||
.duplex = DUPLEX_FULL,
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f6281_switch_data = {
|
||||
static struct dsa_chip_data rd88f6281_switch_chip_data = {
|
||||
.port_names[0] = "lan1",
|
||||
.port_names[1] = "lan2",
|
||||
.port_names[2] = "lan3",
|
||||
@@ -83,6 +83,11 @@ static struct dsa_platform_data rd88f6281_switch_data = {
|
||||
.port_names[5] = "cpu",
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f6281_switch_plat_data = {
|
||||
.nr_chips = 1,
|
||||
.chip = &rd88f6281_switch_chip_data,
|
||||
};
|
||||
|
||||
static struct mv643xx_eth_platform_data rd88f6281_ge01_data = {
|
||||
.phy_addr = MV643XX_ETH_PHY_ADDR(11),
|
||||
};
|
||||
@@ -105,12 +110,12 @@ static void __init rd88f6281_init(void)
|
||||
kirkwood_ge00_init(&rd88f6281_ge00_data);
|
||||
kirkwood_pcie_id(&dev, &rev);
|
||||
if (rev == MV88F6281_REV_A0) {
|
||||
rd88f6281_switch_data.sw_addr = 10;
|
||||
rd88f6281_switch_chip_data.sw_addr = 10;
|
||||
kirkwood_ge01_init(&rd88f6281_ge01_data);
|
||||
} else {
|
||||
rd88f6281_switch_data.port_names[4] = "wan";
|
||||
rd88f6281_switch_chip_data.port_names[4] = "wan";
|
||||
}
|
||||
kirkwood_ge00_switch_init(&rd88f6281_switch_data, NO_IRQ);
|
||||
kirkwood_ge00_switch_init(&rd88f6281_switch_plat_data, NO_IRQ);
|
||||
|
||||
kirkwood_rtc_init();
|
||||
kirkwood_sata_init(&rd88f6281_sata_data);
|
||||
|
||||
@@ -220,14 +220,17 @@ static struct platform_device orion5x_switch_device = {
|
||||
|
||||
void __init orion5x_eth_switch_init(struct dsa_platform_data *d, int irq)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (irq != NO_IRQ) {
|
||||
orion5x_switch_resources[0].start = irq;
|
||||
orion5x_switch_resources[0].end = irq;
|
||||
orion5x_switch_device.num_resources = 1;
|
||||
}
|
||||
|
||||
d->mii_bus = &orion5x_eth_shared.dev;
|
||||
d->netdev = &orion5x_eth.dev;
|
||||
for (i = 0; i < d->nr_chips; i++)
|
||||
d->chip[i].mii_bus = &orion5x_eth_shared.dev;
|
||||
orion5x_switch_device.dev.platform_data = d;
|
||||
|
||||
platform_device_register(&orion5x_switch_device);
|
||||
|
||||
@@ -94,7 +94,7 @@ static struct mv643xx_eth_platform_data rd88f5181l_fxo_eth_data = {
|
||||
.duplex = DUPLEX_FULL,
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f5181l_fxo_switch_data = {
|
||||
static struct dsa_chip_data rd88f5181l_fxo_switch_chip_data = {
|
||||
.port_names[0] = "lan2",
|
||||
.port_names[1] = "lan1",
|
||||
.port_names[2] = "wan",
|
||||
@@ -103,6 +103,11 @@ static struct dsa_platform_data rd88f5181l_fxo_switch_data = {
|
||||
.port_names[7] = "lan3",
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f5181l_fxo_switch_plat_data = {
|
||||
.nr_chips = 1,
|
||||
.chip = &rd88f5181l_fxo_switch_chip_data,
|
||||
};
|
||||
|
||||
static void __init rd88f5181l_fxo_init(void)
|
||||
{
|
||||
/*
|
||||
@@ -117,7 +122,7 @@ static void __init rd88f5181l_fxo_init(void)
|
||||
*/
|
||||
orion5x_ehci0_init();
|
||||
orion5x_eth_init(&rd88f5181l_fxo_eth_data);
|
||||
orion5x_eth_switch_init(&rd88f5181l_fxo_switch_data, NO_IRQ);
|
||||
orion5x_eth_switch_init(&rd88f5181l_fxo_switch_plat_data, NO_IRQ);
|
||||
orion5x_uart0_init();
|
||||
|
||||
orion5x_setup_dev_boot_win(RD88F5181L_FXO_NOR_BOOT_BASE,
|
||||
|
||||
@@ -95,7 +95,7 @@ static struct mv643xx_eth_platform_data rd88f5181l_ge_eth_data = {
|
||||
.duplex = DUPLEX_FULL,
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f5181l_ge_switch_data = {
|
||||
static struct dsa_chip_data rd88f5181l_ge_switch_chip_data = {
|
||||
.port_names[0] = "lan2",
|
||||
.port_names[1] = "lan1",
|
||||
.port_names[2] = "wan",
|
||||
@@ -104,6 +104,11 @@ static struct dsa_platform_data rd88f5181l_ge_switch_data = {
|
||||
.port_names[7] = "lan3",
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f5181l_ge_switch_plat_data = {
|
||||
.nr_chips = 1,
|
||||
.chip = &rd88f5181l_ge_switch_chip_data,
|
||||
};
|
||||
|
||||
static struct i2c_board_info __initdata rd88f5181l_ge_i2c_rtc = {
|
||||
I2C_BOARD_INFO("ds1338", 0x68),
|
||||
};
|
||||
@@ -122,7 +127,8 @@ static void __init rd88f5181l_ge_init(void)
|
||||
*/
|
||||
orion5x_ehci0_init();
|
||||
orion5x_eth_init(&rd88f5181l_ge_eth_data);
|
||||
orion5x_eth_switch_init(&rd88f5181l_ge_switch_data, gpio_to_irq(8));
|
||||
orion5x_eth_switch_init(&rd88f5181l_ge_switch_plat_data,
|
||||
gpio_to_irq(8));
|
||||
orion5x_i2c_init();
|
||||
orion5x_uart0_init();
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ static struct mv643xx_eth_platform_data rd88f6183ap_ge_eth_data = {
|
||||
.duplex = DUPLEX_FULL,
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f6183ap_ge_switch_data = {
|
||||
static struct dsa_chip_data rd88f6183ap_ge_switch_chip_data = {
|
||||
.port_names[0] = "lan1",
|
||||
.port_names[1] = "lan2",
|
||||
.port_names[2] = "lan3",
|
||||
@@ -44,6 +44,11 @@ static struct dsa_platform_data rd88f6183ap_ge_switch_data = {
|
||||
.port_names[5] = "cpu",
|
||||
};
|
||||
|
||||
static struct dsa_platform_data rd88f6183ap_ge_switch_plat_data = {
|
||||
.nr_chips = 1,
|
||||
.chip = &rd88f6183ap_ge_switch_chip_data,
|
||||
};
|
||||
|
||||
static struct mtd_partition rd88f6183ap_ge_partitions[] = {
|
||||
{
|
||||
.name = "kernel",
|
||||
@@ -89,7 +94,8 @@ static void __init rd88f6183ap_ge_init(void)
|
||||
*/
|
||||
orion5x_ehci0_init();
|
||||
orion5x_eth_init(&rd88f6183ap_ge_eth_data);
|
||||
orion5x_eth_switch_init(&rd88f6183ap_ge_switch_data, gpio_to_irq(3));
|
||||
orion5x_eth_switch_init(&rd88f6183ap_ge_switch_plat_data,
|
||||
gpio_to_irq(3));
|
||||
spi_register_board_info(rd88f6183ap_ge_spi_slave_info,
|
||||
ARRAY_SIZE(rd88f6183ap_ge_spi_slave_info));
|
||||
orion5x_spi_init();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user