Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2019-06-19

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) new SO_REUSEPORT_DETACH_BPF setsocktopt, from Martin.

2) BTF based map definition, from Andrii.

3) support bpf_map_lookup_elem for xskmap, from Jonathan.

4) bounded loops and scalar precision logic in the verifier, from Alexei.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2019-06-20 00:06:27 -04:00
101 changed files with 4048 additions and 860 deletions

View File

@@ -122,6 +122,8 @@
#define SO_RCVTIMEO_NEW 66
#define SO_SNDTIMEO_NEW 67
#define SO_DETACH_REUSEPORT_BPF 68
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64

View File

@@ -133,6 +133,8 @@
#define SO_RCVTIMEO_NEW 66
#define SO_SNDTIMEO_NEW 67
#define SO_DETACH_REUSEPORT_BPF 68
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64

View File

@@ -114,6 +114,8 @@
#define SO_RCVTIMEO_NEW 0x4040
#define SO_SNDTIMEO_NEW 0x4041
#define SO_DETACH_REUSEPORT_BPF 0x4042
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64

View File

@@ -115,6 +115,8 @@
#define SO_RCVTIMEO_NEW 0x0044
#define SO_SNDTIMEO_NEW 0x0045
#define SO_DETACH_REUSEPORT_BPF 0x0047
#if !defined(__KERNEL__)

View File

@@ -277,6 +277,7 @@ enum bpf_reg_type {
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
};
/* The information passed from prog-specific *_is_valid_access
@@ -1098,6 +1099,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
#else
static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
enum bpf_access_type type,
@@ -1114,6 +1124,21 @@ static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
static inline bool bpf_xdp_sock_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
return false;
}
static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size)
{
return 0;
}
#endif /* CONFIG_INET */
#endif /* _LINUX_BPF_H */

View File

@@ -136,6 +136,8 @@ struct bpf_reg_state {
*/
s32 subreg_def;
enum bpf_reg_liveness live;
/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
bool precise;
};
enum bpf_stack_slot_type {
@@ -187,14 +189,77 @@ struct bpf_func_state {
struct bpf_stack_state *stack;
};
struct bpf_idx_pair {
u32 prev_idx;
u32 idx;
};
#define MAX_CALL_FRAMES 8
struct bpf_verifier_state {
/* call stack tracking */
struct bpf_func_state *frame[MAX_CALL_FRAMES];
struct bpf_verifier_state *parent;
/*
* 'branches' field is the number of branches left to explore:
* 0 - all possible paths from this state reached bpf_exit or
* were safely pruned
* 1 - at least one path is being explored.
* This state hasn't reached bpf_exit
* 2 - at least two paths are being explored.
* This state is an immediate parent of two children.
* One is fallthrough branch with branches==1 and another
* state is pushed into stack (to be explored later) also with
* branches==1. The parent of this state has branches==1.
* The verifier state tree connected via 'parent' pointer looks like:
* 1
* 1
* 2 -> 1 (first 'if' pushed into stack)
* 1
* 2 -> 1 (second 'if' pushed into stack)
* 1
* 1
* 1 bpf_exit.
*
* Once do_check() reaches bpf_exit, it calls update_branch_counts()
* and the verifier state tree will look:
* 1
* 1
* 2 -> 1 (first 'if' pushed into stack)
* 1
* 1 -> 1 (second 'if' pushed into stack)
* 0
* 0
* 0 bpf_exit.
* After pop_stack() the do_check() will resume at second 'if'.
*
* If is_state_visited() sees a state with branches > 0 it means
* there is a loop. If such state is exactly equal to the current state
* it's an infinite loop. Note states_equal() checks for states
* equvalency, so two states being 'states_equal' does not mean
* infinite loop. The exact comparison is provided by
* states_maybe_looping() function. It's a stronger pre-check and
* much faster than states_equal().
*
* This algorithm may not find all possible infinite loops or
* loop iteration count may be too high.
* In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
*/
u32 branches;
u32 insn_idx;
u32 curframe;
u32 active_spin_lock;
bool speculative;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
u32 last_insn_idx;
/* jmp history recorded from first to last.
* backtracking is using it to go from last to first.
* For most states jmp_history_cnt is [0-3].
* For loops can go up to ~40.
*/
struct bpf_idx_pair *jmp_history;
u32 jmp_history_cnt;
};
#define bpf_get_spilled_reg(slot, frame) \
@@ -309,7 +374,9 @@ struct bpf_verifier_env {
} cfg;
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
u32 insn_processed;
u32 prev_insn_processed, insn_processed;
/* number of jmps, calls, exits analyzed so far */
u32 prev_jmps_processed, jmps_processed;
/* total verification time */
u64 verification_time;
/* maximum number of verifier states kept in 'branching' instructions */

View File

@@ -35,6 +35,8 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
struct sk_buff *skb,
int hdr_len);
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
extern int reuseport_detach_prog(struct sock *sk);
int reuseport_get_id(struct sock_reuseport *reuse);
#endif /* _SOCK_REUSEPORT_H */

View File

@@ -58,11 +58,11 @@ struct xdp_sock {
struct xdp_umem *umem;
struct list_head flush_node;
u16 queue_id;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head list;
bool zc;
/* Protects multiple processes in the control path */
struct mutex mutex;
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head list;
/* Mutual exclusion of NAPI TX thread and sendmsg error paths
* in the SKB destructor callback.
*/

View File

@@ -117,6 +117,8 @@
#define SO_RCVTIMEO_NEW 66
#define SO_SNDTIMEO_NEW 67
#define SO_DETACH_REUSEPORT_BPF 68
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))

View File

@@ -3085,6 +3085,10 @@ struct bpf_sock_tuple {
};
};
struct bpf_xdp_sock {
__u32 queue_id;
};
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
@@ -3245,6 +3249,7 @@ struct bpf_sock_addr {
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
__bpf_md_ptr(struct bpf_sock *, sk);
};
/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -3296,6 +3301,7 @@ struct bpf_sock_ops {
__u32 sk_txhash;
__u64 bytes_received;
__u64 bytes_acked;
__bpf_md_ptr(struct bpf_sock *, sk);
};
/* Definitions for bpf_sock_ops_cb_flags */

View File

@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o
CFLAGS_core.o += $(call cc-disable-warning, override-init)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o

View File

@@ -80,8 +80,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
int err = -EINVAL;
u64 cost;
int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);

File diff suppressed because it is too large Load Diff

View File

@@ -17,8 +17,8 @@ struct xsk_map {
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
int cpu, err = -EINVAL;
struct xsk_map *m;
int cpu, err;
u64 cost;
if (!capable(CAP_NET_ADMIN))
@@ -151,6 +151,12 @@ void __xsk_map_flush(struct bpf_map *map)
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return __xsk_map_lookup_elem(map, *(u32 *)key);
}
static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
{
return ERR_PTR(-EOPNOTSUPP);
}
@@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = {
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
.map_lookup_elem = xsk_map_lookup_elem,
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,

View File

@@ -5695,6 +5695,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
return INET_ECN_set_ce(skb);
}
bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
return false;
if (off % size != 0)
return false;
switch (off) {
default:
return size == sizeof(__u32);
}
}
u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
#define BPF_XDP_SOCK_GET(FIELD) \
do { \
BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct xdp_sock, FIELD)); \
} while (0)
switch (si->off) {
case offsetof(struct bpf_xdp_sock, queue_id):
BPF_XDP_SOCK_GET(queue_id);
break;
}
return insn - insn_buf;
}
static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
.func = bpf_skb_ecn_set_ce,
.gpl_only = false,
@@ -5897,6 +5937,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skc_lookup_tcp:
return &bpf_sock_addr_skc_lookup_tcp_proto;
#endif /* CONFIG_INET */
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5934,6 +5978,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
#endif
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
@@ -6114,6 +6162,14 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_local_storage_proto;
case BPF_FUNC_perf_event_output:
return &bpf_sockopt_event_output_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
return bpf_base_func_proto(func_id);
}
@@ -6801,6 +6857,13 @@ static bool sock_addr_is_valid_access(int off, int size,
if (size != size_default)
return false;
break;
case offsetof(struct bpf_sock_addr, sk):
if (type != BPF_READ)
return false;
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCKET;
break;
default:
if (type == BPF_READ) {
if (size != size_default)
@@ -6844,6 +6907,11 @@ static bool sock_ops_is_valid_access(int off, int size,
if (size != sizeof(__u64))
return false;
break;
case offsetof(struct bpf_sock_ops, sk):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCKET_OR_NULL;
break;
default:
if (size != size_default)
return false;
@@ -7751,6 +7819,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
break;
case offsetof(struct bpf_sock_addr, sk):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sock_addr_kern, sk));
break;
}
return insn - insn_buf;
@@ -8010,6 +8083,19 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
struct sock, type);
break;
case offsetof(struct bpf_sock_ops, sk):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern,
is_fullsock),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sock_ops_kern,
is_fullsock));
*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sock_ops_kern, sk));
break;
}
return insn - insn_buf;
}

View File

@@ -1039,6 +1039,10 @@ set_rcvbuf:
}
break;
case SO_DETACH_REUSEPORT_BPF:
ret = reuseport_detach_prog(sk);
break;
case SO_DETACH_FILTER:
ret = sk_detach_filter(sk);
break;

View File

@@ -332,3 +332,27 @@ int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
return 0;
}
EXPORT_SYMBOL(reuseport_attach_prog);
int reuseport_detach_prog(struct sock *sk)
{
struct sock_reuseport *reuse;
struct bpf_prog *old_prog;
if (!rcu_access_pointer(sk->sk_reuseport_cb))
return sk->sk_reuseport ? -ENOENT : -EINVAL;
old_prog = NULL;
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
rcu_swap_protected(reuse->prog, old_prog,
lockdep_is_held(&reuseport_lock));
spin_unlock_bh(&reuseport_lock);
if (!old_prog)
return -ENOENT;
sk_reuseport_prog_free(old_prog);
return 0;
}
EXPORT_SYMBOL(reuseport_detach_prog);

View File

@@ -170,21 +170,12 @@ always += ibumad_kern.o
always += hbm_out_kern.o
KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
KBUILD_HOSTLDLIBS += $(LIBBPF) -lelf
HOSTLDLIBS_tracex4 += -lrt
@@ -206,6 +197,17 @@ HOSTCC = $(CROSS_COMPILE)gcc
CLANG_ARCH_ARGS = -target $(ARCH)
endif
# Don't evaluate probes and warnings if we need to run make recursively
ifneq ($(src),)
HDR_PROBE := $(shell echo "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \
$(HOSTCC) $(KBUILD_HOSTCFLAGS) -x c - -o /dev/null 2>/dev/null && \
echo okay)
ifeq ($(HDR_PROBE),)
$(warning WARNING: Detected possible issues with include path.)
$(warning WARNING: Please install kernel headers locally (make headers_install).)
endif
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
@@ -223,6 +225,7 @@ ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
DWARF2BTF = y
endif
endif
endif
# Trick to allow make to be run from this directory
all:

View File

@@ -14,7 +14,7 @@
#include <bpf/bpf.h>
#include "bpf/libbpf.h"
#include "libbpf.h"
#include "bpf_insn.h"
#include "sock_example.h"

View File

@@ -50,8 +50,8 @@
#include "cgroup_helpers.h"
#include "hbm.h"
#include "bpf_util.h"
#include "bpf/bpf.h"
#include "bpf/libbpf.h"
#include "bpf.h"
#include "libbpf.h"
bool outFlag = true;
int minRate = 1000; /* cgroup rate limit in Mbps */
@@ -411,7 +411,7 @@ static void Usage(void)
" -l also limit flows using loopback\n"
" -n <#> to create cgroup \"/hbm#\" and attach prog\n"
" Default is /hbm1\n"
" --no_cn disable CN notifcations\n"
" --no_cn disable CN notifications\n"
" -r <rate> Rate in Mbps\n"
" -s Update HBM stats\n"
" -t <time> Exit after specified seconds (default is 0)\n"

Some files were not shown because too many files have changed in this diff Show More