Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (25 commits)
  [XFRM]: Fix OOPSes in xfrm_audit_log().
  [TCP]: cleanup of htcp (resend)
  [TCP]: Use read mostly for CUBIC parameters.
  [NETFILTER]: nf_conntrack_tcp: make sysctl variables static
  [NETFILTER]: ip6t_mh: drop piggyback payload packet on MH packets
  [NETFILTER]: Fix whitespace errors
  [NETFILTER]: Kconfig: improve dependency handling
  [NETFILTER]: xt_mac/xt_CLASSIFY: use IPv6 hook names for IPv6 registration
  [NETFILTER]: nf_conntrack: change nf_conntrack_l[34]proto_unregister to void
  [NETFILTER]: nf_conntrack: properly use RCU for nf_conntrack_destroyed callback
  [NETFILTER]: ip_conntrack: properly use RCU for ip_conntrack_destroyed callback
  [NETFILTER]: nf_conntrack: fix invalid conntrack statistics RCU assumption
  [NETFILTER]: ip_conntrack: fix invalid conntrack statistics RCU assumption
  [NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays
  [NETFILTER]: ip_conntrack: properly use RCU API for ip_ct_protos array
  [NETFILTER]: nf_nat: properly use RCU API for nf_nat_protos array
  [NETFILTER]: ip_nat: properly use RCU API for ip_nat_protos array
  [NETFILTER]: nf_log: minor cleanups
  [NETFILTER]: nf_log: switch logger registration/unregistration to mutex
  [NETFILTER]: nf_log: make nf_log_unregister_pf return void
  ...
This commit is contained in:
Linus Torvalds
2007-02-12 15:34:17 -08:00
71 changed files with 768 additions and 709 deletions
+2 -2
View File
@@ -172,8 +172,8 @@ struct nf_logger {
/* Function to register/unregister log function. */
int nf_log_register(int pf, struct nf_logger *logger);
int nf_log_unregister_pf(int pf);
void nf_log_unregister_logger(struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger);
void nf_log_unregister_pf(int pf);
/* Calls the registered backend logging function */
void nf_log_packet(int pf,
@@ -301,6 +301,12 @@ extern unsigned int ip_conntrack_htable_size;
extern int ip_conntrack_checksum;
#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
#define CONNTRACK_STAT_INC_ATOMIC(count) \
do { \
local_bh_disable(); \
__get_cpu_var(ip_conntrack_stat).count++; \
local_bh_enable(); \
} while (0)
#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
#include <linux/notifier.h>
+6
View File
@@ -257,6 +257,12 @@ extern int nf_conntrack_max;
DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
#define NF_CT_STAT_INC_ATOMIC(count) \
do { \
local_bh_disable(); \
__get_cpu_var(nf_conntrack_stat).count++; \
local_bh_enable(); \
} while (0)
/* no helper, no nat */
#define NF_CT_F_BASIC 0
+2 -2
View File
@@ -89,7 +89,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
/* Protocol registration. */
extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
extern int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
extern struct nf_conntrack_l3proto *
nf_ct_l3proto_find_get(u_int16_t l3proto);
@@ -106,7 +106,7 @@ __nf_ct_l3proto_find(u_int16_t l3proto)
{
if (unlikely(l3proto >= AF_MAX))
return &nf_conntrack_l3proto_generic;
return nf_ct_l3protos[l3proto];
return rcu_dereference(nf_ct_l3protos[l3proto]);
}
#endif /*_NF_CONNTRACK_L3PROTO_H*/
+1 -1
View File
@@ -109,7 +109,7 @@ extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
/* Protocol registration. */
extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto);
extern int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto);
extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto);
/* Generic netlink helpers */
extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+1 -1
View File
@@ -208,7 +208,7 @@ static int __init ebt_log_init(void)
static void __exit ebt_log_fini(void)
{
nf_log_unregister_logger(&ebt_log_logger);
nf_log_unregister(&ebt_log_logger);
ebt_unregister_watcher(&log);
}
+1 -1
View File
@@ -323,7 +323,7 @@ static void __exit ebt_ulog_fini(void)
ebt_ulog_buff_t *ub;
int i;
nf_log_unregister_logger(&ebt_ulog_logger);
nf_log_unregister(&ebt_ulog_logger);
ebt_unregister_watcher(&ulog);
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i];
+5 -3
View File
@@ -226,7 +226,7 @@ config IP_NF_QUEUE
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
depends on NETFILTER_XTABLES
select NETFILTER_XTABLES
help
iptables is a general, extensible packet identification framework.
The packet filtering and full NAT (masquerading, port forwarding,
@@ -606,7 +606,9 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
depends on IP_NF_MANGLE && EXPERIMENTAL
depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
@@ -629,7 +631,7 @@ config IP_NF_RAW
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
depends on NETFILTER_XTABLES
select NETFILTER_XTABLES
help
arptables is a general, extensible packet identification framework.
The ARP packet filtering and mangling (manipulation)subsystems
+32 -19
View File
@@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto;
struct ip_conntrack_helper *helper;
typeof(ip_conntrack_destroyed) destroyed;
DEBUGP("destroy_conntrack(%p)\n", ct);
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
@@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct)
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */
rcu_read_lock();
proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
if (proto && proto->destroy)
proto->destroy(ct);
if (ip_conntrack_destroyed)
ip_conntrack_destroyed(ct);
destroyed = rcu_dereference(ip_conntrack_destroyed);
if (destroyed)
destroyed(ct);
rcu_read_unlock();
write_lock_bh(&ip_conntrack_lock);
/* Expectations will have been removed in clean_from_lists,
@@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain)
if (del_timer(&ct->timeout)) {
death_by_timeout((unsigned long)ct);
dropped = 1;
CONNTRACK_STAT_INC(early_drop);
CONNTRACK_STAT_INC_ATOMIC(early_drop);
}
ip_conntrack_put(ct);
return dropped;
@@ -595,13 +600,13 @@ ip_conntrack_proto_find_get(u_int8_t protocol)
{
struct ip_conntrack_protocol *p;
preempt_disable();
rcu_read_lock();
p = __ip_conntrack_proto_find(protocol);
if (p) {
if (!try_module_get(p->me))
p = &ip_conntrack_generic_protocol;
}
preempt_enable();
rcu_read_unlock();
return p;
}
@@ -802,7 +807,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
/* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) {
CONNTRACK_STAT_INC(ignore);
CONNTRACK_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT;
}
@@ -830,6 +835,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
}
#endif
/* rcu_read_lock()ed by nf_hook_slow */
proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
/* It may be an special packet, error, unclean...
@@ -837,20 +843,20 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
* core what to do with the packet. */
if (proto->error != NULL
&& (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
CONNTRACK_STAT_INC(error);
CONNTRACK_STAT_INC(invalid);
CONNTRACK_STAT_INC_ATOMIC(error);
CONNTRACK_STAT_INC_ATOMIC(invalid);
return -ret;
}
if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
/* Not valid part of a connection */
CONNTRACK_STAT_INC(invalid);
CONNTRACK_STAT_INC_ATOMIC(invalid);
return NF_ACCEPT;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
CONNTRACK_STAT_INC(drop);
CONNTRACK_STAT_INC_ATOMIC(drop);
return NF_DROP;
}
@@ -862,7 +868,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
* the netfilter core what to do*/
nf_conntrack_put((*pskb)->nfct);
(*pskb)->nfct = NULL;
CONNTRACK_STAT_INC(invalid);
CONNTRACK_STAT_INC_ATOMIC(invalid);
return -ret;
}
@@ -875,8 +881,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int invert_tuplepr(struct ip_conntrack_tuple *inverse,
const struct ip_conntrack_tuple *orig)
{
return ip_ct_invert_tuple(inverse, orig,
__ip_conntrack_proto_find(orig->dst.protonum));
struct ip_conntrack_protocol *proto;
int ret;
rcu_read_lock();
proto = __ip_conntrack_proto_find(orig->dst.protonum);
ret = ip_ct_invert_tuple(inverse, orig, proto);
rcu_read_unlock();
return ret;
}
/* Would two expected things clash? */
@@ -1354,7 +1367,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
supposed to kill the mall. */
void ip_conntrack_cleanup(void)
{
ip_ct_attach = NULL;
rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
@@ -1507,15 +1520,15 @@ int __init ip_conntrack_init(void)
/* Don't NEED lock here, but good form anyway. */
write_lock_bh(&ip_conntrack_lock);
for (i = 0; i < MAX_IP_CT_PROTO; i++)
ip_ct_protos[i] = &ip_conntrack_generic_protocol;
rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
/* Sew in builtin protocols. */
ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
write_unlock_bh(&ip_conntrack_lock);
/* For use by ipt_REJECT */
ip_ct_attach = ip_conntrack_attach;
rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
+4 -5
View File
@@ -796,7 +796,7 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
ret = -EBUSY;
goto out;
}
ip_ct_protos[proto->proto] = proto;
rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
out:
write_unlock_bh(&ip_conntrack_lock);
return ret;
@@ -805,11 +805,10 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
{
write_lock_bh(&ip_conntrack_lock);
ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
rcu_assign_pointer(ip_ct_protos[proto->proto],
&ip_conntrack_generic_protocol);
write_unlock_bh(&ip_conntrack_lock);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
synchronize_rcu();
/* Remove all contrack entries for this protocol */
ip_ct_iterate_cleanup(kill_proto, &proto->proto);
+37 -37
View File
@@ -50,7 +50,7 @@ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
static inline struct ip_nat_protocol *
__ip_nat_proto_find(u_int8_t protonum)
{
return ip_nat_protos[protonum];
return rcu_dereference(ip_nat_protos[protonum]);
}
struct ip_nat_protocol *
@@ -58,13 +58,11 @@ ip_nat_proto_find_get(u_int8_t protonum)
{
struct ip_nat_protocol *p;
/* we need to disable preemption to make sure 'p' doesn't get
* removed until we've grabbed the reference */
preempt_disable();
rcu_read_lock();
p = __ip_nat_proto_find(protonum);
if (!try_module_get(p->me))
p = &ip_nat_unknown_protocol;
preempt_enable();
rcu_read_unlock();
return p;
}
@@ -120,8 +118,8 @@ static int
in_range(const struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range)
{
struct ip_nat_protocol *proto =
__ip_nat_proto_find(tuple->dst.protonum);
struct ip_nat_protocol *proto;
int ret = 0;
/* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */
@@ -131,12 +129,15 @@ in_range(const struct ip_conntrack_tuple *tuple,
return 0;
}
rcu_read_lock();
proto = __ip_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, IP_NAT_MANIP_SRC,
&range->min, &range->max))
return 1;
ret = 1;
rcu_read_unlock();
return 0;
return ret;
}
static inline int
@@ -260,27 +261,25 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
rcu_read_lock();
proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, conntrack);
ip_nat_proto_put(proto);
return;
goto out;
}
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
|| proto->in_range(tuple, maniptype, &range->min, &range->max))
&& !ip_nat_used_tuple(tuple, conntrack)) {
ip_nat_proto_put(proto);
return;
}
&& !ip_nat_used_tuple(tuple, conntrack))
goto out;
/* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, conntrack);
ip_nat_proto_put(proto);
out:
rcu_read_unlock();
}
unsigned int
@@ -360,12 +359,11 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
p = ip_nat_proto_find_get(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
ip_nat_proto_put(p);
/* rcu_read_lock()ed by nf_hook_slow */
p = __ip_nat_proto_find(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
return 0;
}
ip_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff;
@@ -422,6 +420,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
struct icmphdr icmp;
struct iphdr ip;
} *inside;
struct ip_conntrack_protocol *proto;
struct ip_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -457,10 +456,11 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
*pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
proto = __ip_conntrack_proto_find(inside->ip.protocol);
if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
&inner,
__ip_conntrack_proto_find(inside->ip.protocol)))
&inner, proto))
return 0;
/* Change inner back to look like incoming packet. We do the
@@ -515,7 +515,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
ret = -EBUSY;
goto out;
}
ip_nat_protos[proto->protonum] = proto;
rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
out:
write_unlock_bh(&ip_nat_lock);
return ret;
@@ -526,11 +526,10 @@ EXPORT_SYMBOL(ip_nat_protocol_register);
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
{
write_lock_bh(&ip_nat_lock);
ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
rcu_assign_pointer(ip_nat_protos[proto->protonum],
&ip_nat_unknown_protocol);
write_unlock_bh(&ip_nat_lock);
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
synchronize_rcu();
}
EXPORT_SYMBOL(ip_nat_protocol_unregister);
@@ -594,10 +593,10 @@ static int __init ip_nat_init(void)
/* Sew in builtin protocols. */
write_lock_bh(&ip_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
ip_nat_protos[i] = &ip_nat_unknown_protocol;
ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
write_unlock_bh(&ip_nat_lock);
for (i = 0; i < ip_nat_htable_size; i++) {
@@ -605,8 +604,8 @@ static int __init ip_nat_init(void)
}
/* FIXME: Man, this is a hack. <SIGH> */
IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
/* Initialize fake conntrack so that NAT will skip it */
ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
@@ -624,7 +623,8 @@ static int clean_nat(struct ip_conntrack *i, void *data)
static void __exit ip_nat_cleanup(void)
{
ip_ct_iterate_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL;
rcu_assign_pointer(ip_conntrack_destroyed, NULL);
synchronize_rcu();
vfree(bysource);
}
+1 -1
View File
@@ -489,7 +489,7 @@ static int __init ipt_log_init(void)
static void __exit ipt_log_fini(void)
{
nf_log_unregister_logger(&ipt_log_logger);
nf_log_unregister(&ipt_log_logger);
xt_unregister_target(&ipt_log_reg);
}
+1 -1
View File
@@ -419,7 +419,7 @@ static void __exit ipt_ulog_fini(void)
DEBUGP("ipt_ULOG: cleanup_module\n");
if (nflog)
nf_log_unregister_logger(&ipt_ulog_logger);
nf_log_unregister(&ipt_ulog_logger);
xt_unregister_target(&ipt_ulog_reg);
sock_release(nflognl->sk_socket);
@@ -170,7 +170,9 @@ icmp_error_message(struct sk_buff *skb,
return -NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_slow */
innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
+37 -38
View File
@@ -53,7 +53,7 @@ static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
static inline struct nf_nat_protocol *
__nf_nat_proto_find(u_int8_t protonum)
{
return nf_nat_protos[protonum];
return rcu_dereference(nf_nat_protos[protonum]);
}
struct nf_nat_protocol *
@@ -61,13 +61,11 @@ nf_nat_proto_find_get(u_int8_t protonum)
{
struct nf_nat_protocol *p;
/* we need to disable preemption to make sure 'p' doesn't get
* removed until we've grabbed the reference */
preempt_disable();
rcu_read_lock();
p = __nf_nat_proto_find(protonum);
if (!try_module_get(p->me))
p = &nf_nat_unknown_protocol;
preempt_enable();
rcu_read_unlock();
return p;
}
@@ -126,8 +124,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range)
{
struct nf_nat_protocol *proto;
int ret = 0;
proto = __nf_nat_proto_find(tuple->dst.protonum);
/* If we are supposed to map IPs, then we must be in the
range specified, otherwise let this drag us onto a new src IP. */
if (range->flags & IP_NAT_RANGE_MAP_IPS) {
@@ -136,12 +134,15 @@ in_range(const struct nf_conntrack_tuple *tuple,
return 0;
}
rcu_read_lock();
proto = __nf_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, IP_NAT_MANIP_SRC,
&range->min, &range->max))
return 1;
ret = 1;
rcu_read_unlock();
return 0;
return ret;
}
static inline int
@@ -268,27 +269,25 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
proto = nf_nat_proto_find_get(orig_tuple->dst.protonum);
rcu_read_lock();
proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
/* Change protocol info to have some randomization */
if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
proto->unique_tuple(tuple, range, maniptype, ct);
nf_nat_proto_put(proto);
return;
goto out;
}
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
!nf_nat_used_tuple(tuple, ct)) {
nf_nat_proto_put(proto);
return;
}
!nf_nat_used_tuple(tuple, ct))
goto out;
/* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, ct);
nf_nat_proto_put(proto);
out:
rcu_read_unlock();
}
unsigned int
@@ -369,12 +368,11 @@ manip_pkt(u_int16_t proto,
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
p = nf_nat_proto_find_get(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
nf_nat_proto_put(p);
/* rcu_read_lock()ed by nf_hook_slow */
p = __nf_nat_proto_find(proto);
if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
return 0;
}
nf_nat_proto_put(p);
iph = (void *)(*pskb)->data + iphdroff;
@@ -431,6 +429,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
struct icmphdr icmp;
struct iphdr ip;
} *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
int hdrlen = (*pskb)->nh.iph->ihl * 4;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -466,16 +465,16 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
*pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
if (!nf_ct_get_tuple(*pskb,
(*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
(*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
(u_int16_t)AF_INET,
inside->ip.protocol,
&inner,
l3proto,
__nf_ct_l4proto_find((u_int16_t)PF_INET,
inside->ip.protocol)))
&inner, l3proto, l4proto))
return 0;
/* Change inner back to look like incoming packet. We do the
@@ -529,7 +528,7 @@ int nf_nat_protocol_register(struct nf_nat_protocol *proto)
ret = -EBUSY;
goto out;
}
nf_nat_protos[proto->protonum] = proto;
rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
out:
write_unlock_bh(&nf_nat_lock);
return ret;
@@ -540,11 +539,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
{
write_lock_bh(&nf_nat_lock);
nf_nat_protos[proto->protonum] = &nf_nat_unknown_protocol;
rcu_assign_pointer(nf_nat_protos[proto->protonum],
&nf_nat_unknown_protocol);
write_unlock_bh(&nf_nat_lock);
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
synchronize_rcu();
}
EXPORT_SYMBOL(nf_nat_protocol_unregister);
@@ -608,10 +606,10 @@ static int __init nf_nat_init(void)
/* Sew in builtin protocols. */
write_lock_bh(&nf_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
nf_nat_protos[i] = &nf_nat_unknown_protocol;
nf_nat_protos[IPPROTO_TCP] = &nf_nat_protocol_tcp;
nf_nat_protos[IPPROTO_UDP] = &nf_nat_protocol_udp;
nf_nat_protos[IPPROTO_ICMP] = &nf_nat_protocol_icmp;
rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
write_unlock_bh(&nf_nat_lock);
for (i = 0; i < nf_nat_htable_size; i++) {
@@ -619,8 +617,8 @@ static int __init nf_nat_init(void)
}
/* FIXME: Man, this is a hack. <SIGH> */
NF_CT_ASSERT(nf_conntrack_destroyed == NULL);
nf_conntrack_destroyed = &nf_nat_cleanup_conntrack;
NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL);
rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack);
/* Initialize fake conntrack so that NAT will skip it */
nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
@@ -644,7 +642,8 @@ static int clean_nat(struct nf_conn *i, void *data)
static void __exit nf_nat_cleanup(void)
{
nf_ct_iterate_cleanup(&clean_nat, NULL);
nf_conntrack_destroyed = NULL;
rcu_assign_pointer(nf_conntrack_destroyed, NULL);
synchronize_rcu();
vfree(bysource);
nf_ct_l3proto_put(l3proto);
}
+9 -9
View File
@@ -26,16 +26,16 @@
*/
#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
static int fast_convergence = 1;
static int max_increment = 16;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh = 100;
static int bic_scale = 41;
static int tcp_friendliness = 1;
static int fast_convergence __read_mostly = 1;
static int max_increment __read_mostly = 16;
static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh __read_mostly = 100;
static int bic_scale __read_mostly = 41;
static int tcp_friendliness __read_mostly = 1;
static u32 cube_rtt_scale;
static u32 beta_scale;
static u64 cube_factor;
static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly;
static u64 cube_factor __read_mostly;
/* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644);
+33 -24
View File
@@ -14,18 +14,19 @@
#define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
#define BETA_MAX 102 /* 0.8 with shift << 7 */
static int use_rtt_scaling = 1;
static int use_rtt_scaling __read_mostly = 1;
module_param(use_rtt_scaling, int, 0644);
MODULE_PARM_DESC(use_rtt_scaling, "turn on/off RTT scaling");
static int use_bandwidth_switch = 1;
static int use_bandwidth_switch __read_mostly = 1;
module_param(use_bandwidth_switch, int, 0644);
MODULE_PARM_DESC(use_bandwidth_switch, "turn on/off bandwidth switcher");
struct htcp {
u32 alpha; /* Fixed point arith, << 7 */
u8 beta; /* Fixed point arith, << 7 */
u8 modeswitch; /* Delay modeswitch until we had at least one congestion event */
u8 modeswitch; /* Delay modeswitch
until we had at least one congestion event */
u16 pkts_acked;
u32 packetcount;
u32 minRTT;
@@ -44,14 +45,14 @@ struct htcp {
u32 lasttime;
};
static inline u32 htcp_cong_time(struct htcp *ca)
static inline u32 htcp_cong_time(const struct htcp *ca)
{
return jiffies - ca->last_cong;
}
static inline u32 htcp_ccount(struct htcp *ca)
static inline u32 htcp_ccount(const struct htcp *ca)
{
return htcp_cong_time(ca)/ca->minRTT;
return htcp_cong_time(ca) / ca->minRTT;
}
static inline void htcp_reset(struct htcp *ca)
@@ -67,10 +68,12 @@ static u32 htcp_cwnd_undo(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
ca->last_cong = ca->undo_last_cong;
ca->maxRTT = ca->undo_maxRTT;
ca->old_maxB = ca->undo_old_maxB;
return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta);
return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
}
static inline void measure_rtt(struct sock *sk)
@@ -78,17 +81,19 @@ static inline void measure_rtt(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
u32 srtt = tp->srtt>>3;
u32 srtt = tp->srtt >> 3;
/* keep track of minimum RTT seen so far, minRTT is zero at first */
if (ca->minRTT > srtt || !ca->minRTT)
ca->minRTT = srtt;
/* max RTT */
if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
if (icsk->icsk_ca_state == TCP_CA_Open
&& tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
if (ca->maxRTT < ca->minRTT)
ca->maxRTT = ca->minRTT;
if (ca->maxRTT < srtt && srtt <= ca->maxRTT+msecs_to_jiffies(20))
if (ca->maxRTT < srtt
&& srtt <= ca->maxRTT + msecs_to_jiffies(20))
ca->maxRTT = srtt;
}
}
@@ -116,15 +121,16 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
ca->packetcount += pkts_acked;
if (ca->packetcount >= tp->snd_cwnd - (ca->alpha>>7? : 1)
if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1)
&& now - ca->lasttime >= ca->minRTT
&& ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount*HZ/(now - ca->lasttime);
__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
if (htcp_ccount(ca) <= 3) {
/* just after backoff */
ca->minB = ca->maxB = ca->Bi = cur_Bi;
} else {
ca->Bi = (3*ca->Bi + cur_Bi)/4;
ca->Bi = (3 * ca->Bi + cur_Bi) / 4;
if (ca->Bi > ca->maxB)
ca->maxB = ca->Bi;
if (ca->minB > ca->maxB)
@@ -142,7 +148,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
u32 old_maxB = ca->old_maxB;
ca->old_maxB = ca->maxB;
if (!between(5*maxB, 4*old_maxB, 6*old_maxB)) {
if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
ca->beta = BETA_MIN;
ca->modeswitch = 0;
return;
@@ -150,7 +156,7 @@ static inline void htcp_beta_update(struct htcp *ca, u32 minRTT, u32 maxRTT)
}
if (ca->modeswitch && minRTT > msecs_to_jiffies(10) && maxRTT) {
ca->beta = (minRTT<<7)/maxRTT;
ca->beta = (minRTT << 7) / maxRTT;
if (ca->beta < BETA_MIN)
ca->beta = BETA_MIN;
else if (ca->beta > BETA_MAX)
@@ -169,23 +175,26 @@ static inline void htcp_alpha_update(struct htcp *ca)
if (diff > HZ) {
diff -= HZ;
factor = 1+ ( 10*diff + ((diff/2)*(diff/2)/HZ) )/HZ;
factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / HZ)) / HZ;
}
if (use_rtt_scaling && minRTT) {
u32 scale = (HZ<<3)/(10*minRTT);
scale = min(max(scale, 1U<<2), 10U<<3); /* clamping ratio to interval [0.5,10]<<3 */
factor = (factor<<3)/scale;
u32 scale = (HZ << 3) / (10 * minRTT);
/* clamping ratio to interval [0.5,10]<<3 */
scale = min(max(scale, 1U << 2), 10U << 3);
factor = (factor << 3) / scale;
if (!factor)
factor = 1;
}
ca->alpha = 2*factor*((1<<7)-ca->beta);
ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
if (!ca->alpha)
ca->alpha = ALPHA_BASE;
}
/* After we have the rtt data to calculate beta, we'd still prefer to wait one
/*
* After we have the rtt data to calculate beta, we'd still prefer to wait one
* rtt before we adjust our beta to ensure we are working from a consistent
* data.
*
@@ -202,15 +211,16 @@ static void htcp_param_update(struct sock *sk)
htcp_beta_update(ca, minRTT, maxRTT);
htcp_alpha_update(ca);
/* add slowly fading memory for maxRTT to accommodate routing changes etc */
/* add slowly fading memory for maxRTT to accommodate routing changes */
if (minRTT > 0 && maxRTT > minRTT)
ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100;
ca->maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
}
static u32 htcp_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
}
@@ -227,7 +237,6 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp);
else {
measure_rtt(sk);
/* In dangerous area, increase slowly.
+2 -1
View File
@@ -42,7 +42,8 @@ config IP6_NF_QUEUE
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6 && EXPERIMENTAL && NETFILTER_XTABLES
depends on INET && IPV6 && EXPERIMENTAL
select NETFILTER_XTABLES
help
ip6tables is a general, extensible packet identification framework.
Currently only the packet filtering and packet mangling subsystem
+1 -1
View File
@@ -501,7 +501,7 @@ static int __init ip6t_log_init(void)
static void __exit ip6t_log_fini(void)
{
nf_log_unregister_logger(&ip6t_logger);
nf_log_unregister(&ip6t_logger);
xt_unregister_target(&ip6t_log_reg);
}
+7
View File
@@ -66,6 +66,13 @@ match(const struct sk_buff *skb,
return 0;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
duprintf("Dropping invalid MH Payload Proto: %u\n",
mh->ip6mh_proto);
*hotdrop = 1;
return 0;
}
return type_match(mhinfo->types[0], mhinfo->types[1], mh->ip6mh_type,
!!(mhinfo->invflags & IP6T_MH_INV_TYPE));
}

Some files were not shown because too many files have changed in this diff Show More