Daniel Borkmann says:

====================
pull-request: bpf-next 2022-03-04

We've added 32 non-merge commits during the last 14 day(s) which contain
a total of 59 files changed, 1038 insertions(+), 473 deletions(-).

The main changes are:

1) Optimize BPF stackmap's build_id retrieval by caching last valid build_id,
   as consecutive stack frames are likely to be in the same VMA and therefore
   have the same build id, from Hao Luo.

2) Several improvements to arm64 BPF JIT, that is, support for JITing
   the atomic[64]_fetch_add, atomic[64]_[fetch_]{and,or,xor} and lastly
   atomic[64]_{xchg|cmpxchg}. Also fix the BTF line info dump for JITed
   programs, from Hou Tao.

3) Optimize generic BPF map batch deletion by only enforcing synchronize_rcu()
   barrier once upon return to user space, from Eric Dumazet.

4) For kernel build parse DWARF and generate BTF through pahole with enabled
   multithreading, from Kui-Feng Lee.

5) BPF verifier usability improvements by making log info more concise and
   replacing inv with scalar type name, from Mykola Lysenko.

6) Two follow-up fixes for BPF prog JIT pack allocator, from Song Liu.

7) Add a new Kconfig to allow for loading kernel modules with non-matching
   BTF type info; their BTF info is then removed on load, from Connor O'Brien.

8) Remove reallocarray() usage from bpftool and switch to libbpf_reallocarray()
   in order to fix compilation errors for older glibc, from Mauricio Vásquez.

9) Fix libbpf to error on conflicting name in BTF when type declaration
   appears before the definition, from Xu Kuohai.

10) Fix issue in BPF preload for in-kernel light skeleton where loaded BPF
    program fds prevent init process from setting up fd 0-2, from Yucong Sun.

11) Fix libbpf reuse of pinned perf RB map when max_entries is auto-determined
    by libbpf, from Stijn Tintel.

12) Several cleanups for libbpf and a fix to enforce perf RB map #pages to be
    non-zero, from Yuntao Wang.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (32 commits)
  bpf: Small BPF verifier log improvements
  libbpf: Add a check to ensure that page_cnt is non-zero
  bpf, x86: Set header->size properly before freeing it
  x86: Disable HAVE_ARCH_HUGE_VMALLOC on 32-bit x86
  bpf, test_run: Fix overflow in XDP frags bpf_test_finish
  selftests/bpf: Update btf_dump case for conflicting names
  libbpf: Skip forward declaration when counting duplicated type names
  bpf: Add some description about BPF_JIT_ALWAYS_ON in Kconfig
  bpf, docs: Add a missing colon in verifier.rst
  bpf: Cache the last valid build_id
  libbpf: Fix BPF_MAP_TYPE_PERF_EVENT_ARRAY auto-pinning
  bpf, selftests: Use raw_tp program for atomic test
  bpf, arm64: Support more atomic operations
  bpftool: Remove redundant slashes
  bpf: Add config to allow loading modules with BTF mismatches
  bpf, arm64: Feed byte-offset into bpf line info
  bpf, arm64: Call build_prologue() first in first JIT pass
  bpf: Fix issue with bpf preload module taking over stdout/stdin of kernel.
  bpftool: Bpf skeletons assert type sizes
  bpf: Cleanup comments
  ...
====================

Link: https://lore.kernel.org/r/20220304164313.31675-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2022-03-04 19:28:17 -08:00
59 changed files with 1039 additions and 474 deletions

View File

@@ -329,7 +329,7 @@ Program with unreachable instructions::
BPF_EXIT_INSN(),
};
Error:
Error::
unreachable insn 1

View File

@@ -34,18 +34,6 @@
*/
#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE
/*
* BRK instruction encoding
* The #imm16 value should be placed at bits[20:5] within BRK ins
*/
#define AARCH64_BREAK_MON 0xd4200000
/*
* BRK instruction for provoking a fault on purpose
* Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
*/
#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
#define AARCH64_BREAK_KGDB_DYN_DBG \
(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))

View File

@@ -3,7 +3,21 @@
#ifndef __ASM_INSN_DEF_H
#define __ASM_INSN_DEF_H
#include <asm/brk-imm.h>
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
/*
* BRK instruction encoding
* The #imm16 value should be placed at bits[20:5] within BRK ins
*/
#define AARCH64_BREAK_MON 0xd4200000
/*
* BRK instruction for provoking a fault on purpose
* Unlike kgdb, #imm16 value with unallocated handler is used for faulting.
*/
#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5))
#endif /* __ASM_INSN_DEF_H */

View File

@@ -205,7 +205,9 @@ enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
AARCH64_INSN_LDST_LOAD_EX,
AARCH64_INSN_LDST_LOAD_ACQ_EX,
AARCH64_INSN_LDST_STORE_EX,
AARCH64_INSN_LDST_STORE_REL_EX,
};
enum aarch64_insn_adsb_type {
@@ -280,6 +282,36 @@ enum aarch64_insn_adr_type {
AARCH64_INSN_ADR_TYPE_ADR,
};
enum aarch64_insn_mem_atomic_op {
AARCH64_INSN_MEM_ATOMIC_ADD,
AARCH64_INSN_MEM_ATOMIC_CLR,
AARCH64_INSN_MEM_ATOMIC_EOR,
AARCH64_INSN_MEM_ATOMIC_SET,
AARCH64_INSN_MEM_ATOMIC_SWP,
};
enum aarch64_insn_mem_order_type {
AARCH64_INSN_MEM_ORDER_NONE,
AARCH64_INSN_MEM_ORDER_ACQ,
AARCH64_INSN_MEM_ORDER_REL,
AARCH64_INSN_MEM_ORDER_ACQREL,
};
enum aarch64_insn_mb_type {
AARCH64_INSN_MB_SY,
AARCH64_INSN_MB_ST,
AARCH64_INSN_MB_LD,
AARCH64_INSN_MB_ISH,
AARCH64_INSN_MB_ISHST,
AARCH64_INSN_MB_ISHLD,
AARCH64_INSN_MB_NSH,
AARCH64_INSN_MB_NSHST,
AARCH64_INSN_MB_NSHLD,
AARCH64_INSN_MB_OSH,
AARCH64_INSN_MB_OSHST,
AARCH64_INSN_MB_OSHLD,
};
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
{ \
@@ -303,6 +335,11 @@ __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
@@ -474,13 +511,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register state,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
@@ -541,6 +571,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
enum aarch64_insn_prfm_policy policy);
#ifdef CONFIG_ARM64_LSE_ATOMICS
u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size,
enum aarch64_insn_mem_atomic_op op,
enum aarch64_insn_mem_order_type order);
u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size,
enum aarch64_insn_mem_order_type order);
#else
static inline
u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size,
enum aarch64_insn_mem_atomic_op op,
enum aarch64_insn_mem_order_type order)
{
return AARCH64_BREAK_FAULT;
}
static inline
u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size,
enum aarch64_insn_mem_order_type order)
{
return AARCH64_BREAK_FAULT;
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);

View File

@@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
switch (type) {
case AARCH64_INSN_LDST_LOAD_EX:
case AARCH64_INSN_LDST_LOAD_ACQ_EX:
insn = aarch64_insn_get_load_ex_value();
if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX)
insn |= BIT(15);
break;
case AARCH64_INSN_LDST_STORE_EX:
case AARCH64_INSN_LDST_STORE_REL_EX:
insn = aarch64_insn_get_store_ex_value();
if (type == AARCH64_INSN_LDST_STORE_REL_EX)
insn |= BIT(15);
break;
default:
pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
@@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
state);
}
u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size)
#ifdef CONFIG_ARM64_LSE_ATOMICS
static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type,
u32 insn)
{
u32 insn = aarch64_insn_get_ldadd_value();
u32 order;
switch (type) {
case AARCH64_INSN_MEM_ORDER_NONE:
order = 0;
break;
case AARCH64_INSN_MEM_ORDER_ACQ:
order = 2;
break;
case AARCH64_INSN_MEM_ORDER_REL:
order = 1;
break;
case AARCH64_INSN_MEM_ORDER_ACQREL:
order = 3;
break;
default:
pr_err("%s: unknown mem order %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
insn &= ~GENMASK(23, 22);
insn |= order << 22;
return insn;
}
u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size,
enum aarch64_insn_mem_atomic_op op,
enum aarch64_insn_mem_order_type order)
{
u32 insn;
switch (op) {
case AARCH64_INSN_MEM_ATOMIC_ADD:
insn = aarch64_insn_get_ldadd_value();
break;
case AARCH64_INSN_MEM_ATOMIC_CLR:
insn = aarch64_insn_get_ldclr_value();
break;
case AARCH64_INSN_MEM_ATOMIC_EOR:
insn = aarch64_insn_get_ldeor_value();
break;
case AARCH64_INSN_MEM_ATOMIC_SET:
insn = aarch64_insn_get_ldset_value();
break;
case AARCH64_INSN_MEM_ATOMIC_SWP:
insn = aarch64_insn_get_swp_value();
break;
default:
pr_err("%s: unimplemented mem atomic op %d\n", __func__, op);
return AARCH64_BREAK_FAULT;
}
switch (size) {
case AARCH64_INSN_SIZE_32:
@@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
insn = aarch64_insn_encode_ldst_size(size, insn);
insn = aarch64_insn_encode_ldst_order(order, insn);
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
result);
@@ -631,18 +692,69 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
value);
}
u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size)
static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type,
u32 insn)
{
/*
* STADD is simply encoded as an alias for LDADD with XZR as
* the destination register.
*/
return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
value, size);
u32 order;
switch (type) {
case AARCH64_INSN_MEM_ORDER_NONE:
order = 0;
break;
case AARCH64_INSN_MEM_ORDER_ACQ:
order = BIT(22);
break;
case AARCH64_INSN_MEM_ORDER_REL:
order = BIT(15);
break;
case AARCH64_INSN_MEM_ORDER_ACQREL:
order = BIT(15) | BIT(22);
break;
default:
pr_err("%s: unknown mem order %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
insn &= ~(BIT(15) | BIT(22));
insn |= order;
return insn;
}
u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
enum aarch64_insn_register address,
enum aarch64_insn_register value,
enum aarch64_insn_size_type size,
enum aarch64_insn_mem_order_type order)
{
u32 insn;
switch (size) {
case AARCH64_INSN_SIZE_32:
case AARCH64_INSN_SIZE_64:
break;
default:
pr_err("%s: unimplemented size encoding %d\n", __func__, size);
return AARCH64_BREAK_FAULT;
}
insn = aarch64_insn_get_cas_value();
insn = aarch64_insn_encode_ldst_size(size, insn);
insn = aarch64_insn_encode_cas_order(order, insn);
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
result);
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
address);
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
value);
}
#endif
static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
enum aarch64_insn_prfm_policy policy,
@@ -1379,7 +1491,7 @@ static u32 aarch64_encode_immediate(u64 imm,
* Compute the rotation to get a continuous set of
* ones, with the first bit set at position 0
*/
ror = fls(~imm);
ror = fls64(~imm);
}
/*
@@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
}
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
{
u32 opt;
u32 insn;
switch (type) {
case AARCH64_INSN_MB_SY:
opt = 0xf;
break;
case AARCH64_INSN_MB_ST:
opt = 0xe;
break;
case AARCH64_INSN_MB_LD:
opt = 0xd;
break;
case AARCH64_INSN_MB_ISH:
opt = 0xb;
break;
case AARCH64_INSN_MB_ISHST:
opt = 0xa;
break;
case AARCH64_INSN_MB_ISHLD:
opt = 0x9;
break;
case AARCH64_INSN_MB_NSH:
opt = 0x7;
break;
case AARCH64_INSN_MB_NSHST:
opt = 0x6;
break;
case AARCH64_INSN_MB_NSHLD:
opt = 0x5;
break;
default:
pr_err("%s: unknown dmb type %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
insn = aarch64_insn_get_dmb_value();
insn &= ~GENMASK(11, 8);
insn |= (opt << 8);
return insn;
}

View File

@@ -88,10 +88,42 @@
/* [Rn] = Rt; (atomic) Rs = [state] */
#define A64_STXR(sf, Rt, Rn, Rs) \
A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
/* [Rn] = Rt (store release); (atomic) Rs = [state] */
#define A64_STLXR(sf, Rt, Rn, Rs) \
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
AARCH64_INSN_LDST_STORE_REL_EX)
/* LSE atomics */
#define A64_STADD(sf, Rn, Rs) \
aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
/*
* LSE atomics
*
* ST{ADD,CLR,SET,EOR} is simply encoded as an alias for
* LDD{ADD,CLR,SET,EOR} with XZR as the destination register.
*/
#define A64_ST_OP(sf, Rn, Rs, op) \
aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_##op, \
AARCH64_INSN_MEM_ORDER_NONE)
/* [Rn] <op>= Rs */
#define A64_STADD(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, ADD)
#define A64_STCLR(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, CLR)
#define A64_STEOR(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, EOR)
#define A64_STSET(sf, Rn, Rs) A64_ST_OP(sf, Rn, Rs, SET)
#define A64_LD_OP_AL(sf, Rt, Rn, Rs, op) \
aarch64_insn_gen_atomic_ld_op(Rt, Rn, Rs, \
A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_##op, \
AARCH64_INSN_MEM_ORDER_ACQREL)
/* Rt = [Rn] (load acquire); [Rn] <op>= Rs (store release) */
#define A64_LDADDAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, ADD)
#define A64_LDCLRAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, CLR)
#define A64_LDEORAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, EOR)
#define A64_LDSETAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, SET)
/* Rt = [Rn] (load acquire); [Rn] = Rs (store release) */
#define A64_SWPAL(sf, Rt, Rn, Rs) A64_LD_OP_AL(sf, Rt, Rn, Rs, SWP)
/* Rs = CAS(Rn, Rs, Rt) (load acquire & store release) */
#define A64_CASAL(sf, Rt, Rn, Rs) \
aarch64_insn_gen_cas(Rt, Rn, Rs, A64_SIZE(sf), \
AARCH64_INSN_MEM_ORDER_ACQREL)
/* Add/subtract (immediate) */
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
@@ -196,6 +228,9 @@
#define A64_ANDS(sf, Rd, Rn, Rm) A64_LOGIC_SREG(sf, Rd, Rn, Rm, AND_SETFLAGS)
/* Rn & Rm; set condition flags */
#define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm)
/* Rd = ~Rm (alias of ORN with A64_ZR as Rn) */
#define A64_MVN(sf, Rd, Rm) \
A64_LOGIC_SREG(sf, Rd, A64_ZR, Rm, ORN)
/* Logical (immediate) */
#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \
@@ -219,4 +254,7 @@
#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ)
#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC)
/* DMB */
#define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH)
#endif /* _BPF_JIT_H */

View File

@@ -27,6 +27,17 @@
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define check_imm(bits, imm) do { \
if ((((imm) > 0) && ((imm) >> (bits))) || \
(((imm) < 0) && (~(imm) >> (bits)))) { \
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
i, imm, imm); \
return -EINVAL; \
} \
} while (0)
#define check_imm19(imm) check_imm(19, imm)
#define check_imm26(imm) check_imm(26, imm)
/* Map BPF registers to A64 registers */
static const int bpf2a64[] = {
/* return value from in-kernel function, and exit value from eBPF */
@@ -329,6 +340,170 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
#undef jmp_offset
}
#ifdef CONFIG_ARM64_LSE_ATOMICS
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
const u8 code = insn->code;
const u8 dst = bpf2a64[insn->dst_reg];
const u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const bool isdw = BPF_SIZE(code) == BPF_DW;
const s16 off = insn->off;
u8 reg;
if (!off) {
reg = dst;
} else {
emit_a64_mov_i(1, tmp, off, ctx);
emit(A64_ADD(1, tmp, tmp, dst), ctx);
reg = tmp;
}
switch (insn->imm) {
/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
case BPF_ADD:
emit(A64_STADD(isdw, reg, src), ctx);
break;
case BPF_AND:
emit(A64_MVN(isdw, tmp2, src), ctx);
emit(A64_STCLR(isdw, reg, tmp2), ctx);
break;
case BPF_OR:
emit(A64_STSET(isdw, reg, src), ctx);
break;
case BPF_XOR:
emit(A64_STEOR(isdw, reg, src), ctx);
break;
/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
case BPF_ADD | BPF_FETCH:
emit(A64_LDADDAL(isdw, src, reg, src), ctx);
break;
case BPF_AND | BPF_FETCH:
emit(A64_MVN(isdw, tmp2, src), ctx);
emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
break;
case BPF_OR | BPF_FETCH:
emit(A64_LDSETAL(isdw, src, reg, src), ctx);
break;
case BPF_XOR | BPF_FETCH:
emit(A64_LDEORAL(isdw, src, reg, src), ctx);
break;
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
case BPF_XCHG:
emit(A64_SWPAL(isdw, src, reg, src), ctx);
break;
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
case BPF_CMPXCHG:
emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
break;
default:
pr_err_once("unknown atomic op code %02x\n", insn->imm);
return -EINVAL;
}
return 0;
}
#else
static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
return -EINVAL;
}
#endif
static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
const u8 code = insn->code;
const u8 dst = bpf2a64[insn->dst_reg];
const u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 tmp3 = bpf2a64[TMP_REG_3];
const int i = insn - ctx->prog->insnsi;
const s32 imm = insn->imm;
const s16 off = insn->off;
const bool isdw = BPF_SIZE(code) == BPF_DW;
u8 reg;
s32 jmp_offset;
if (!off) {
reg = dst;
} else {
emit_a64_mov_i(1, tmp, off, ctx);
emit(A64_ADD(1, tmp, tmp, dst), ctx);
reg = tmp;
}
if (imm == BPF_ADD || imm == BPF_AND ||
imm == BPF_OR || imm == BPF_XOR) {
/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
emit(A64_LDXR(isdw, tmp2, reg), ctx);
if (imm == BPF_ADD)
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
else if (imm == BPF_AND)
emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
else if (imm == BPF_OR)
emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
else
emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
jmp_offset = -3;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
} else if (imm == (BPF_ADD | BPF_FETCH) ||
imm == (BPF_AND | BPF_FETCH) ||
imm == (BPF_OR | BPF_FETCH) ||
imm == (BPF_XOR | BPF_FETCH)) {
/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
const u8 ax = bpf2a64[BPF_REG_AX];
emit(A64_MOV(isdw, ax, src), ctx);
emit(A64_LDXR(isdw, src, reg), ctx);
if (imm == (BPF_ADD | BPF_FETCH))
emit(A64_ADD(isdw, tmp2, src, ax), ctx);
else if (imm == (BPF_AND | BPF_FETCH))
emit(A64_AND(isdw, tmp2, src, ax), ctx);
else if (imm == (BPF_OR | BPF_FETCH))
emit(A64_ORR(isdw, tmp2, src, ax), ctx);
else
emit(A64_EOR(isdw, tmp2, src, ax), ctx);
emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
jmp_offset = -3;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
emit(A64_DMB_ISH, ctx);
} else if (imm == BPF_XCHG) {
/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
emit(A64_MOV(isdw, tmp2, src), ctx);
emit(A64_LDXR(isdw, src, reg), ctx);
emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
jmp_offset = -2;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
emit(A64_DMB_ISH, ctx);
} else if (imm == BPF_CMPXCHG) {
/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
const u8 r0 = bpf2a64[BPF_REG_0];
emit(A64_MOV(isdw, tmp2, r0), ctx);
emit(A64_LDXR(isdw, r0, reg), ctx);
emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
jmp_offset = 4;
check_imm19(jmp_offset);
emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
jmp_offset = -4;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
emit(A64_DMB_ISH, ctx);
} else {
pr_err_once("unknown atomic op code %02x\n", imm);
return -EINVAL;
}
return 0;
}
static void build_epilogue(struct jit_ctx *ctx)
{
const u8 r0 = bpf2a64[BPF_REG_0];
@@ -434,29 +609,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 tmp3 = bpf2a64[TMP_REG_3];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
BPF_CLASS(code) == BPF_JMP;
const bool isdw = BPF_SIZE(code) == BPF_DW;
u8 jmp_cond, reg;
u8 jmp_cond;
s32 jmp_offset;
u32 a64_insn;
int ret;
#define check_imm(bits, imm) do { \
if ((((imm) > 0) && ((imm) >> (bits))) || \
(((imm) < 0) && (~(imm) >> (bits)))) { \
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
i, imm, imm); \
return -EINVAL; \
} \
} while (0)
#define check_imm19(imm) check_imm(19, imm)
#define check_imm26(imm) check_imm(26, imm)
switch (code) {
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
@@ -891,33 +1053,12 @@ emit_cond_jmp:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
if (insn->imm != BPF_ADD) {
pr_err_once("unknown atomic op code %02x\n", insn->imm);
return -EINVAL;
}
/* STX XADD: lock *(u32 *)(dst + off) += src
* and
* STX XADD: lock *(u64 *)(dst + off) += src
*/
if (!off) {
reg = dst;
} else {
emit_a64_mov_i(1, tmp, off, ctx);
emit(A64_ADD(1, tmp, tmp, dst), ctx);
reg = tmp;
}
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
emit(A64_STADD(isdw, reg, src), ctx);
} else {
emit(A64_LDXR(isdw, tmp2, reg), ctx);
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
jmp_offset = -3;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
}
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
ret = emit_lse_atomic(insn, ctx);
else
ret = emit_ll_sc_atomic(insn, ctx);
if (ret)
return ret;
break;
default:
@@ -1049,15 +1190,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
/* 1. Initial fake pass to compute ctx->idx. */
/* Fake pass to fill in ctx->offset. */
if (build_body(&ctx, extra_pass)) {
/*
* 1. Initial fake pass to compute ctx->idx and ctx->offset.
*
* BPF line info needs ctx->offset[i] to be the offset of
* instruction[i] in jited image, so build prologue first.
*/
if (build_prologue(&ctx, was_classic)) {
prog = orig_prog;
goto out_off;
}
if (build_prologue(&ctx, was_classic)) {
if (build_body(&ctx, extra_pass)) {
prog = orig_prog;
goto out_off;
}
@@ -1130,6 +1274,11 @@ skip_init_ctx:
prog->jited_len = prog_size;
if (!prog->is_func || extra_pass) {
int i;
/* offset[prog->len] is the size of program */
for (i = 0; i <= prog->len; i++)
ctx.offset[i] *= AARCH64_INSN_SIZE;
bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_off:
kfree(ctx.offset);

View File

@@ -158,7 +158,7 @@ config X86
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMALLOC if X86_64
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if X86_64

View File

@@ -2330,8 +2330,11 @@ skip_init_addrs:
if (proglen <= 0) {
out_image:
image = NULL;
if (header)
if (header) {
bpf_arch_text_copy(&header->size, &rw_header->size,
sizeof(rw_header->size));
bpf_jit_binary_pack_free(header, rw_header);
}
prog = orig_prog;
goto out_addrs;
}

View File

@@ -58,6 +58,10 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid speculative
execution of BPF instructions by the interpreter.
When CONFIG_BPF_JIT_ALWAYS_ON is enabled, /proc/sys/net/core/bpf_jit_enable
is permanently set to 1 and setting any other value than that will
return failure.
config BPF_JIT_DEFAULT_ON
def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
depends on HAVE_EBPF_JIT && BPF_JIT

View File

@@ -136,7 +136,7 @@ bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
* will be done by the caller.
*
* Although the unlock will be done under
* rcu_read_lock(), it is more intutivie to
* rcu_read_lock(), it is more intuitive to
* read if the freeing of the storage is done
* after the raw_spin_unlock_bh(&local_storage->lock).
*

View File

@@ -1,4 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
@@ -2547,7 +2547,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
*
* We now need to continue from the last-resolved-ptr to
* ensure the last-resolved-ptr will not referring back to
* the currenct ptr (t).
* the current ptr (t).
*/
if (btf_type_is_modifier(next_type)) {
const struct btf_type *resolved_type;
@@ -6148,7 +6148,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf);
/* If we encontered an error, return it. */
/* If we encountered an error, return it. */
if (ssnprintf.show.state.status)
return ssnprintf.show.state.status;
@@ -6398,7 +6398,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
pr_warn("failed to validate module [%s] BTF: %ld\n",
mod->name, PTR_ERR(btf));
kfree(btf_mod);
err = PTR_ERR(btf);
if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
err = PTR_ERR(btf);
goto out;
}
err = btf_alloc_id(btf);
@@ -6706,7 +6707,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
const struct btf_kfunc_id_set *kset)
{
bool vmlinux_set = !btf_is_module(btf);
int type, ret;
int type, ret = 0;
for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
if (!kset->sets[type])

View File

@@ -1031,7 +1031,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
* @sk: The socket sending or receiving traffic
* @skb: The skb that is being sent or received
* @type: The type of program to be exectuted
* @type: The type of program to be executed
*
* If no socket is passed, or the socket is not of type INET or INET6,
* this function does nothing and returns 0.
@@ -1094,7 +1094,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
/**
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
* @sk: sock structure to manipulate
* @type: The type of program to be exectuted
* @type: The type of program to be executed
*
* socket is passed is expected to be of type INET or INET6.
*
@@ -1119,7 +1119,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* provided by user sockaddr
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
* @type: The type of program to be exectuted
* @type: The type of program to be executed
* @t_ctx: Pointer to attach type specific context
* @flags: Pointer to u32 which contains higher bits of BPF program
* return value (OR'ed together).
@@ -1166,7 +1166,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
* sk with connection information (IP addresses, etc.) May not contain
* cgroup info if it is a req sock.
* @type: The type of program to be exectuted
* @type: The type of program to be executed
*
* socket passed is expected to be of type INET or INET6.
*

View File

@@ -1112,13 +1112,16 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
* 1) when the program is freed after;
* 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize).
* For case 2), we need to free both the RO memory and the RW buffer.
* Also, ro_header->size in 2) is not properly set yet, so rw_header->size
* is used for uncharge.
*
* bpf_jit_binary_pack_free requires proper ro_header->size. However,
* bpf_jit_binary_pack_alloc does not set it. Therefore, ro_header->size
* must be set with either bpf_jit_binary_pack_finalize (normal path) or
* bpf_arch_text_copy (when jit fails).
*/
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
struct bpf_binary_header *rw_header)
{
u32 size = rw_header ? rw_header->size : ro_header->size;
u32 size = ro_header->size;
bpf_prog_pack_free(ro_header);
kvfree(rw_header);

View File

@@ -1636,7 +1636,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
value_size = size * num_possible_cpus();
total = 0;
/* while experimenting with hash tables with sizes ranging from 10 to
* 1000, it was observed that a bucket can have upto 5 entries.
* 1000, it was observed that a bucket can have up to 5 entries.
*/
bucket_size = 5;

View File

@@ -1093,7 +1093,7 @@ struct bpf_hrtimer {
struct bpf_timer_kern {
struct bpf_hrtimer *timer;
/* bpf_spin_lock is used here instead of spinlock_t to make
* sure that it always fits into space resereved by struct bpf_timer
* sure that it always fits into space reserved by struct bpf_timer
* regardless of LOCKDEP and spinlock debug flags.
*/
struct bpf_spin_lock lock;

View File

@@ -1,4 +1,4 @@
//SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf-cgroup.h>
#include <linux/bpf.h>
#include <linux/bpf_local_storage.h>

View File

@@ -54,6 +54,13 @@ static int load_skel(void)
err = PTR_ERR(progs_link);
goto out;
}
/* Avoid taking over stdin/stdout/stderr of init process. Zeroing out
* makes skel_closenz() a no-op later in iterators_bpf__destroy().
*/
close_fd(skel->links.dump_bpf_map_fd);
skel->links.dump_bpf_map_fd = 0;
close_fd(skel->links.dump_bpf_prog_fd);
skel->links.dump_bpf_prog_fd = 0;
return 0;
out:
free_links_and_skel();

View File

@@ -143,7 +143,7 @@ static void reuseport_array_free(struct bpf_map *map)
/*
* Once reaching here, all sk->sk_user_data is not
* referenceing this "array". "array" can be freed now.
* referencing this "array". "array" can be freed now.
*/
bpf_map_area_free(array);
}

View File

@@ -132,7 +132,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
int i;
struct mmap_unlock_irq_work *work = NULL;
bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
struct vm_area_struct *vma;
struct vm_area_struct *vma, *prev_vma = NULL;
const char *prev_build_id;
/* If the irq_work is in use, fall back to report ips. Same
* fallback is used for kernel stack (!user) on a stackmap with
@@ -150,6 +151,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
}
for (i = 0; i < trace_nr; i++) {
if (range_in_vma(prev_vma, ips[i], ips[i])) {
vma = prev_vma;
memcpy(id_offs[i].build_id, prev_build_id,
BUILD_ID_SIZE_MAX);
goto build_id_valid;
}
vma = find_vma(current->mm, ips[i]);
if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
@@ -158,9 +165,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
continue;
}
build_id_valid:
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
- vma->vm_start;
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
prev_vma = vma;
prev_build_id = id_offs[i].build_id;
}
bpf_mmap_unlock_mm(work, current->mm);
}

Some files were not shown because too many files have changed in this diff Show More