mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
net: filter: Just In Time compiler for x86-64
In order to speedup packet filtering, here is an implementation of a JIT compiler for x86_64 It is disabled by default, and must be enabled by the admin. echo 1 >/proc/sys/net/core/bpf_jit_enable It uses module_alloc() and module_free() to get memory in the 2GB text kernel range since we call helpers functions from the generated code. EAX : BPF A accumulator EBX : BPF X accumulator RDI : pointer to skb (first argument given to JIT function) RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) r9d : skb->len - skb->data_len (headlen) r8 : skb->data To get a trace of generated code, use : echo 2 >/proc/sys/net/core/bpf_jit_enable Example of generated code : # tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24 flen=18 proglen=147 pass=3 image=ffffffffa00b5000 JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00 JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00 JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0 JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00 JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00 JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24 JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31 JIT code: ffffffffa00b5090: c0 c9 c3 BPF program is 144 bytes long, so native program is almost same size ;) (000) ldh [12] (001) jeq #0x800 jt 2 jf 8 (002) ld [26] (003) and #0xffffff00 (004) jeq #0xc0a81400 jt 16 jf 5 (005) ld [30] (006) and #0xffffff00 (007) jeq #0xc0a81400 jt 16 jf 17 (008) jeq #0x806 jt 10 jf 9 (009) jeq #0x8035 jt 10 jf 17 (010) ld [28] (011) and #0xffffff00 (012) jeq #0xc0a81400 jt 16 jf 13 (013) ld [38] (014) and #0xffffff00 (015) jeq #0xc0a81400 jt 16 jf 17 (016) ret #65535 (017) ret #0 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Ben Hutchings <bhutchings@solarflare.com> Cc: Hagen Paul Pfeifer <hagen@jauu.net> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
b678027cb7
commit
0a14842f5a
@@ -32,6 +32,17 @@ Table : Subdirectories in /proc/sys/net
|
||||
1. /proc/sys/net/core - Network core options
|
||||
-------------------------------------------------------
|
||||
|
||||
bpf_jit_enable
|
||||
--------------
|
||||
|
||||
This enables Berkeley Packet Filter Just in Time compiler.
|
||||
Currently supported on x86_64 architecture, bpf_jit provides a framework
|
||||
to speed packet filtering, the one used by tcpdump/libpcap for example.
|
||||
Values :
|
||||
0 - disable the JIT (default value)
|
||||
1 - enable the JIT
|
||||
2 - enable the JIT and ask the compiler to emit traces on kernel log.
|
||||
|
||||
rmem_default
|
||||
------------
|
||||
|
||||
|
||||
@@ -4372,6 +4372,7 @@ S: Maintained
|
||||
F: net/ipv4/
|
||||
F: net/ipv6/
|
||||
F: include/net/ip*
|
||||
F: arch/x86/net/*
|
||||
|
||||
NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
|
||||
M: Paul Moore <paul.moore@hp.com>
|
||||
|
||||
@@ -15,3 +15,4 @@ obj-y += vdso/
|
||||
obj-$(CONFIG_IA32_EMULATION) += ia32/
|
||||
|
||||
obj-y += platform/
|
||||
obj-y += net/
|
||||
|
||||
@@ -72,6 +72,7 @@ config X86
|
||||
select IRQ_FORCED_THREADING
|
||||
select USE_GENERIC_SMP_HELPERS if SMP
|
||||
select ARCH_NO_SYSDEV_OPS
|
||||
select HAVE_BPF_JIT if X86_64
|
||||
|
||||
config INSTRUCTION_DECODER
|
||||
def_bool (KPROBES || PERF_EVENTS)
|
||||
|
||||
4
arch/x86/net/Makefile
Normal file
4
arch/x86/net/Makefile
Normal file
@@ -0,0 +1,4 @@
|
||||
#
|
||||
# Arch-specific network modules
|
||||
#
|
||||
obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
|
||||
140
arch/x86/net/bpf_jit.S
Normal file
140
arch/x86/net/bpf_jit.S
Normal file
@@ -0,0 +1,140 @@
|
||||
/* bpf_jit.S : BPF JIT helper functions
|
||||
*
|
||||
* Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
/*
|
||||
* Calling convention :
|
||||
* rdi : skb pointer
|
||||
* esi : offset of byte(s) to fetch in skb (can be scratched)
|
||||
* r8 : copy of skb->data
|
||||
* r9d : hlen = skb->len - skb->data_len
|
||||
*/
|
||||
#define SKBDATA %r8
|
||||
|
||||
sk_load_word_ind:
|
||||
.globl sk_load_word_ind
|
||||
|
||||
add %ebx,%esi /* offset += X */
|
||||
# test %esi,%esi /* if (offset < 0) goto bpf_error; */
|
||||
js bpf_error
|
||||
|
||||
sk_load_word:
|
||||
.globl sk_load_word
|
||||
|
||||
mov %r9d,%eax # hlen
|
||||
sub %esi,%eax # hlen - offset
|
||||
cmp $3,%eax
|
||||
jle bpf_slow_path_word
|
||||
mov (SKBDATA,%rsi),%eax
|
||||
bswap %eax /* ntohl() */
|
||||
ret
|
||||
|
||||
|
||||
sk_load_half_ind:
|
||||
.globl sk_load_half_ind
|
||||
|
||||
add %ebx,%esi /* offset += X */
|
||||
js bpf_error
|
||||
|
||||
sk_load_half:
|
||||
.globl sk_load_half
|
||||
|
||||
mov %r9d,%eax
|
||||
sub %esi,%eax # hlen - offset
|
||||
cmp $1,%eax
|
||||
jle bpf_slow_path_half
|
||||
movzwl (SKBDATA,%rsi),%eax
|
||||
rol $8,%ax # ntohs()
|
||||
ret
|
||||
|
||||
sk_load_byte_ind:
|
||||
.globl sk_load_byte_ind
|
||||
add %ebx,%esi /* offset += X */
|
||||
js bpf_error
|
||||
|
||||
sk_load_byte:
|
||||
.globl sk_load_byte
|
||||
|
||||
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
|
||||
jle bpf_slow_path_byte
|
||||
movzbl (SKBDATA,%rsi),%eax
|
||||
ret
|
||||
|
||||
/**
|
||||
* sk_load_byte_msh - BPF_S_LDX_B_MSH helper
|
||||
*
|
||||
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
|
||||
* Must preserve A accumulator (%eax)
|
||||
* Inputs : %esi is the offset value, already known positive
|
||||
*/
|
||||
ENTRY(sk_load_byte_msh)
|
||||
CFI_STARTPROC
|
||||
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
|
||||
jle bpf_slow_path_byte_msh
|
||||
movzbl (SKBDATA,%rsi),%ebx
|
||||
and $15,%bl
|
||||
shl $2,%bl
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(sk_load_byte_msh)
|
||||
|
||||
bpf_error:
|
||||
# force a return 0 from jit handler
|
||||
xor %eax,%eax
|
||||
mov -8(%rbp),%rbx
|
||||
leaveq
|
||||
ret
|
||||
|
||||
/* rsi contains offset and can be scratched */
|
||||
#define bpf_slow_path_common(LEN) \
|
||||
push %rdi; /* save skb */ \
|
||||
push %r9; \
|
||||
push SKBDATA; \
|
||||
/* rsi already has offset */ \
|
||||
mov $LEN,%ecx; /* len */ \
|
||||
lea -12(%rbp),%rdx; \
|
||||
call skb_copy_bits; \
|
||||
test %eax,%eax; \
|
||||
pop SKBDATA; \
|
||||
pop %r9; \
|
||||
pop %rdi
|
||||
|
||||
|
||||
bpf_slow_path_word:
|
||||
bpf_slow_path_common(4)
|
||||
js bpf_error
|
||||
mov -12(%rbp),%eax
|
||||
bswap %eax
|
||||
ret
|
||||
|
||||
bpf_slow_path_half:
|
||||
bpf_slow_path_common(2)
|
||||
js bpf_error
|
||||
mov -12(%rbp),%ax
|
||||
rol $8,%ax
|
||||
movzwl %ax,%eax
|
||||
ret
|
||||
|
||||
bpf_slow_path_byte:
|
||||
bpf_slow_path_common(1)
|
||||
js bpf_error
|
||||
movzbl -12(%rbp),%eax
|
||||
ret
|
||||
|
||||
bpf_slow_path_byte_msh:
|
||||
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
|
||||
bpf_slow_path_common(1)
|
||||
js bpf_error
|
||||
movzbl -12(%rbp),%eax
|
||||
and $15,%al
|
||||
shl $2,%al
|
||||
xchg %eax,%ebx
|
||||
ret
|
||||
654
arch/x86/net/bpf_jit_comp.c
Normal file
654
arch/x86/net/bpf_jit_comp.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -135,6 +135,8 @@ struct sk_filter
|
||||
{
|
||||
atomic_t refcnt;
|
||||
unsigned int len; /* Number of filter blocks */
|
||||
unsigned int (*bpf_func)(const struct sk_buff *skb,
|
||||
const struct sock_filter *filter);
|
||||
struct rcu_head rcu;
|
||||
struct sock_filter insns[0];
|
||||
};
|
||||
@@ -153,6 +155,80 @@ extern unsigned int sk_run_filter(const struct sk_buff *skb,
|
||||
extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
|
||||
extern int sk_detach_filter(struct sock *sk);
|
||||
extern int sk_chk_filter(struct sock_filter *filter, int flen);
|
||||
|
||||
#ifdef CONFIG_BPF_JIT
|
||||
extern void bpf_jit_compile(struct sk_filter *fp);
|
||||
extern void bpf_jit_free(struct sk_filter *fp);
|
||||
#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
|
||||
#else
|
||||
static inline void bpf_jit_compile(struct sk_filter *fp)
|
||||
{
|
||||
}
|
||||
static inline void bpf_jit_free(struct sk_filter *fp)
|
||||
{
|
||||
}
|
||||
#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BPF_S_RET_K = 1,
|
||||
BPF_S_RET_A,
|
||||
BPF_S_ALU_ADD_K,
|
||||
BPF_S_ALU_ADD_X,
|
||||
BPF_S_ALU_SUB_K,
|
||||
BPF_S_ALU_SUB_X,
|
||||
BPF_S_ALU_MUL_K,
|
||||
BPF_S_ALU_MUL_X,
|
||||
BPF_S_ALU_DIV_X,
|
||||
BPF_S_ALU_AND_K,
|
||||
BPF_S_ALU_AND_X,
|
||||
BPF_S_ALU_OR_K,
|
||||
BPF_S_ALU_OR_X,
|
||||
BPF_S_ALU_LSH_K,
|
||||
BPF_S_ALU_LSH_X,
|
||||
BPF_S_ALU_RSH_K,
|
||||
BPF_S_ALU_RSH_X,
|
||||
BPF_S_ALU_NEG,
|
||||
BPF_S_LD_W_ABS,
|
||||
BPF_S_LD_H_ABS,
|
||||
BPF_S_LD_B_ABS,
|
||||
BPF_S_LD_W_LEN,
|
||||
BPF_S_LD_W_IND,
|
||||
BPF_S_LD_H_IND,
|
||||
BPF_S_LD_B_IND,
|
||||
BPF_S_LD_IMM,
|
||||
BPF_S_LDX_W_LEN,
|
||||
BPF_S_LDX_B_MSH,
|
||||
BPF_S_LDX_IMM,
|
||||
BPF_S_MISC_TAX,
|
||||
BPF_S_MISC_TXA,
|
||||
BPF_S_ALU_DIV_K,
|
||||
BPF_S_LD_MEM,
|
||||
BPF_S_LDX_MEM,
|
||||
BPF_S_ST,
|
||||
BPF_S_STX,
|
||||
BPF_S_JMP_JA,
|
||||
BPF_S_JMP_JEQ_K,
|
||||
BPF_S_JMP_JEQ_X,
|
||||
BPF_S_JMP_JGE_K,
|
||||
BPF_S_JMP_JGE_X,
|
||||
BPF_S_JMP_JGT_K,
|
||||
BPF_S_JMP_JGT_X,
|
||||
BPF_S_JMP_JSET_K,
|
||||
BPF_S_JMP_JSET_X,
|
||||
/* Ancillary data */
|
||||
BPF_S_ANC_PROTOCOL,
|
||||
BPF_S_ANC_PKTTYPE,
|
||||
BPF_S_ANC_IFINDEX,
|
||||
BPF_S_ANC_NLATTR,
|
||||
BPF_S_ANC_NLATTR_NEST,
|
||||
BPF_S_ANC_MARK,
|
||||
BPF_S_ANC_QUEUE,
|
||||
BPF_S_ANC_HATYPE,
|
||||
BPF_S_ANC_RXHASH,
|
||||
BPF_S_ANC_CPU,
|
||||
};
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* __LINUX_FILTER_H__ */
|
||||
|
||||
@@ -2514,6 +2514,7 @@ extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
|
||||
extern int netdev_max_backlog;
|
||||
extern int netdev_tstamp_prequeue;
|
||||
extern int weight_p;
|
||||
extern int bpf_jit_enable;
|
||||
extern int netdev_set_master(struct net_device *dev, struct net_device *master);
|
||||
extern int netdev_set_bond_master(struct net_device *dev,
|
||||
struct net_device *master);
|
||||
|
||||
@@ -391,8 +391,8 @@ struct sk_buff {
|
||||
|
||||
__u32 rxhash;
|
||||
|
||||
__u16 queue_mapping;
|
||||
kmemcheck_bitfield_begin(flags2);
|
||||
__u16 queue_mapping:16;
|
||||
#ifdef CONFIG_IPV6_NDISC_NODETYPE
|
||||
__u8 ndisc_nodetype:2;
|
||||
#endif
|
||||
|
||||
13
net/Kconfig
13
net/Kconfig
@@ -232,6 +232,19 @@ config XPS
|
||||
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
|
||||
default y
|
||||
|
||||
config HAVE_BPF_JIT
|
||||
bool
|
||||
|
||||
config BPF_JIT
|
||||
bool "enable BPF Just In Time compiler"
|
||||
depends on HAVE_BPF_JIT
|
||||
---help---
|
||||
Berkeley Packet Filter filtering capabilities are normally handled
|
||||
by an interpreter. This option allows kernel to generate a native
|
||||
code when filter is loaded in memory. This should speedup
|
||||
packet sniffing (libpcap/tcpdump). Note : Admin should enable
|
||||
this feature changing /proc/sys/net/core/bpf_jit_enable
|
||||
|
||||
menu "Network testing"
|
||||
|
||||
config NET_PKTGEN
|
||||
|
||||
@@ -39,65 +39,6 @@
|
||||
#include <linux/filter.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
|
||||
enum {
|
||||
BPF_S_RET_K = 1,
|
||||
BPF_S_RET_A,
|
||||
BPF_S_ALU_ADD_K,
|
||||
BPF_S_ALU_ADD_X,
|
||||
BPF_S_ALU_SUB_K,
|
||||
BPF_S_ALU_SUB_X,
|
||||
BPF_S_ALU_MUL_K,
|
||||
BPF_S_ALU_MUL_X,
|
||||
BPF_S_ALU_DIV_X,
|
||||
BPF_S_ALU_AND_K,
|
||||
BPF_S_ALU_AND_X,
|
||||
BPF_S_ALU_OR_K,
|
||||
BPF_S_ALU_OR_X,
|
||||
BPF_S_ALU_LSH_K,
|
||||
BPF_S_ALU_LSH_X,
|
||||
BPF_S_ALU_RSH_K,
|
||||
BPF_S_ALU_RSH_X,
|
||||
BPF_S_ALU_NEG,
|
||||
BPF_S_LD_W_ABS,
|
||||
BPF_S_LD_H_ABS,
|
||||
BPF_S_LD_B_ABS,
|
||||
BPF_S_LD_W_LEN,
|
||||
BPF_S_LD_W_IND,
|
||||
BPF_S_LD_H_IND,
|
||||
BPF_S_LD_B_IND,
|
||||
BPF_S_LD_IMM,
|
||||
BPF_S_LDX_W_LEN,
|
||||
BPF_S_LDX_B_MSH,
|
||||
BPF_S_LDX_IMM,
|
||||
BPF_S_MISC_TAX,
|
||||
BPF_S_MISC_TXA,
|
||||
BPF_S_ALU_DIV_K,
|
||||
BPF_S_LD_MEM,
|
||||
BPF_S_LDX_MEM,
|
||||
BPF_S_ST,
|
||||
BPF_S_STX,
|
||||
BPF_S_JMP_JA,
|
||||
BPF_S_JMP_JEQ_K,
|
||||
BPF_S_JMP_JEQ_X,
|
||||
BPF_S_JMP_JGE_K,
|
||||
BPF_S_JMP_JGE_X,
|
||||
BPF_S_JMP_JGT_K,
|
||||
BPF_S_JMP_JGT_X,
|
||||
BPF_S_JMP_JSET_K,
|
||||
BPF_S_JMP_JSET_X,
|
||||
/* Ancillary data */
|
||||
BPF_S_ANC_PROTOCOL,
|
||||
BPF_S_ANC_PKTTYPE,
|
||||
BPF_S_ANC_IFINDEX,
|
||||
BPF_S_ANC_NLATTR,
|
||||
BPF_S_ANC_NLATTR_NEST,
|
||||
BPF_S_ANC_MARK,
|
||||
BPF_S_ANC_QUEUE,
|
||||
BPF_S_ANC_HATYPE,
|
||||
BPF_S_ANC_RXHASH,
|
||||
BPF_S_ANC_CPU,
|
||||
};
|
||||
|
||||
/* No hurry in this branch */
|
||||
static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
|
||||
{
|
||||
@@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
|
||||
rcu_read_lock();
|
||||
filter = rcu_dereference(sk->sk_filter);
|
||||
if (filter) {
|
||||
unsigned int pkt_len = sk_run_filter(skb, filter->insns);
|
||||
unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
|
||||
|
||||
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
|
||||
}
|
||||
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
|
||||
|
||||
bpf_jit_free(fp);
|
||||
kfree(fp);
|
||||
}
|
||||
EXPORT_SYMBOL(sk_filter_release_rcu);
|
||||
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
|
||||
atomic_set(&fp->refcnt, 1);
|
||||
fp->len = fprog->len;
|
||||
fp->bpf_func = sk_run_filter;
|
||||
|
||||
err = sk_chk_filter(fp->insns, fp->len);
|
||||
if (err) {
|
||||
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
|
||||
return err;
|
||||
}
|
||||
|
||||
bpf_jit_compile(fp);
|
||||
|
||||
old_fp = rcu_dereference_protected(sk->sk_filter,
|
||||
sock_owned_by_user(sk));
|
||||
rcu_assign_pointer(sk->sk_filter, fp);
|
||||
|
||||
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
#ifdef CONFIG_BPF_JIT
|
||||
{
|
||||
.procname = "bpf_jit_enable",
|
||||
.data = &bpf_jit_enable,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "netdev_tstamp_prequeue",
|
||||
.data = &netdev_tstamp_prequeue,
|
||||
|
||||
@@ -538,7 +538,7 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
|
||||
rcu_read_lock();
|
||||
filter = rcu_dereference(sk->sk_filter);
|
||||
if (filter != NULL)
|
||||
res = sk_run_filter(skb, filter->insns);
|
||||
res = SK_RUN_FILTER(filter, skb);
|
||||
rcu_read_unlock();
|
||||
|
||||
return res;
|
||||
|
||||
Reference in New Issue
Block a user