netfilter: ipset: Introduce RCU locking in hash:* types

Three types of data need to be protected in the case of the hash types:

a. The hash buckets: standard rcu pointer operations are used.
b. The element blobs in the hash buckets are stored in an array and
   a bitmap is used for book-keeping to tell which elements in the array
   are used or free.
c. Networks per cidr values and the cidr values themselves are stored
   in fix sized arrays and need no protection. The values are modified
   in such an order that in the worst case an element testing is repeated
   once with the same cidr value.

The ipset hash approach uses arrays instead of lists and therefore is
incompatible with rhashtable.

Performance is tested by Jesper Dangaard Brouer:

Simple drop in FORWARD
~~~~~~~~~~~~~~~~~~~~~~

Dropping via simple iptables net-mask match::

 iptables -t raw -N simple || iptables -t raw -F simple
 iptables -t raw -I simple  -s 198.18.0.0/15 -j DROP
 iptables -t raw -D PREROUTING -j simple
 iptables -t raw -I PREROUTING -j simple

Drop performance in "raw": 11.3Mpps

Generator: sending 12.2Mpps (tx:12264083 pps)

Drop via original ipset in RAW table
~~~~~~~~~~~~~~~~~~~~~~~~~~~

Create a set with lots of elements::

 sudo ./ipset destroy test
 echo "create test hash:ip hashsize 65536" > test.set
 for x in `seq 0 255`; do
    for y in `seq 0 255`; do
        echo "add test 198.18.$x.$y" >> test.set
    done
 done
 sudo ./ipset restore < test.set

Dropping via ipset::

 iptables -t raw -F
 iptables -t raw -N net198 || iptables -t raw -F net198
 iptables -t raw -I net198 -m set --match-set test src -j DROP
 iptables -t raw -I PREROUTING -j net198

Drop performance in "raw" with ipset: 8Mpps

Perf report numbers ipset drop in "raw"::

 +   24.65%  ksoftirqd/1  [ip_set]           [k] ip_set_test
 -   21.42%  ksoftirqd/1  [kernel.kallsyms]  [k] _raw_read_lock_bh
    - _raw_read_lock_bh
       + 99.88% ip_set_test
 -   19.42%  ksoftirqd/1  [kernel.kallsyms]  [k] _raw_read_unlock_bh
    - _raw_read_unlock_bh
       + 99.72% ip_set_test
 +    4.31%  ksoftirqd/1  [ip_set_hash_ip]   [k] hash_ip4_kadt
 +    2.27%  ksoftirqd/1  [ixgbe]            [k] ixgbe_fetch_rx_buffer
 +    2.18%  ksoftirqd/1  [ip_tables]        [k] ipt_do_table
 +    1.81%  ksoftirqd/1  [ip_set_hash_ip]   [k] hash_ip4_test
 +    1.61%  ksoftirqd/1  [kernel.kallsyms]  [k] __netif_receive_skb_core
 +    1.44%  ksoftirqd/1  [kernel.kallsyms]  [k] build_skb
 +    1.42%  ksoftirqd/1  [kernel.kallsyms]  [k] ip_rcv
 +    1.36%  ksoftirqd/1  [kernel.kallsyms]  [k] __local_bh_enable_ip
 +    1.16%  ksoftirqd/1  [kernel.kallsyms]  [k] dev_gro_receive
 +    1.09%  ksoftirqd/1  [kernel.kallsyms]  [k] __rcu_read_unlock
 +    0.96%  ksoftirqd/1  [ixgbe]            [k] ixgbe_clean_rx_irq
 +    0.95%  ksoftirqd/1  [kernel.kallsyms]  [k] __netdev_alloc_frag
 +    0.88%  ksoftirqd/1  [kernel.kallsyms]  [k] kmem_cache_alloc
 +    0.87%  ksoftirqd/1  [xt_set]           [k] set_match_v3
 +    0.85%  ksoftirqd/1  [kernel.kallsyms]  [k] inet_gro_receive
 +    0.83%  ksoftirqd/1  [kernel.kallsyms]  [k] nf_iterate
 +    0.76%  ksoftirqd/1  [kernel.kallsyms]  [k] put_compound_page
 +    0.75%  ksoftirqd/1  [kernel.kallsyms]  [k] __rcu_read_lock

Drop via ipset in RAW table with RCU-locking
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

With RCU locking, the RW-lock is gone.

Drop performance in "raw" with ipset with RCU-locking: 11.3Mpps

Performance-tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
This commit is contained in:
Jozsef Kadlecsik
2015-06-13 17:29:56 +02:00
parent 96f51428c4
commit 18f84d41d3
12 changed files with 369 additions and 241 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -315,6 +315,7 @@ hash_ip_init(void)
static void __exit
hash_ip_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_ip_type);
}

View File

@@ -319,6 +319,7 @@ hash_ipmark_init(void)
static void __exit
hash_ipmark_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_ipmark_type);
}

View File

@@ -382,6 +382,7 @@ hash_ipport_init(void)
static void __exit
hash_ipport_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_ipport_type);
}

View File

@@ -397,6 +397,7 @@ hash_ipportip_init(void)
static void __exit
hash_ipportip_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_ipportip_type);
}

View File

@@ -554,6 +554,7 @@ hash_ipportnet_init(void)
static void __exit
hash_ipportnet_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_ipportnet_type);
}

View File

@@ -165,6 +165,7 @@ hash_mac_init(void)
static void __exit
hash_mac_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_mac_type);
}

View File

@@ -392,6 +392,7 @@ hash_net_init(void)
static void __exit
hash_net_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_net_type);
}

View File

@@ -500,6 +500,7 @@ hash_netiface_init(void)
static void __exit
hash_netiface_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_netiface_type);
}

View File

@@ -480,6 +480,7 @@ hash_netnet_init(void)
static void __exit
hash_netnet_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_netnet_type);
}

View File

@@ -498,6 +498,7 @@ hash_netport_init(void)
static void __exit
hash_netport_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_netport_type);
}

View File

@@ -581,6 +581,7 @@ hash_netportnet_init(void)
static void __exit
hash_netportnet_fini(void)
{
rcu_barrier();
ip_set_type_unregister(&hash_netportnet_type);
}