Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says: ==================== pull-request: bpf-next 2018-08-07 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add cgroup local storage for BPF programs, which provides a fast accessible memory for storing various per-cgroup data like number of transmitted packets, etc, from Roman. 2) Support bpf_get_socket_cookie() BPF helper in several more program types that have a full socket available, from Andrey. 3) Significantly improve the performance of perf events which are reported from BPF offload. Also convert a couple of BPF AF_XDP samples overto use libbpf, both from Jakub. 4) seg6local LWT provides the End.DT6 action, which allows to decapsulate an outer IPv6 header containing a Segment Routing Header. Adds this action now to the seg6local BPF interface, from Mathieu. 5) Do not mark dst register as unbounded in MOV64 instruction when both src and dst register are the same, from Arthur. 6) Define u_smp_rmb() and u_smp_wmb() to their respective barrier instructions on arm64 for the AF_XDP sample code, from Brian. 7) Convert the tcp_client.py and tcp_server.py BPF selftest scripts over from Python 2 to Python 3, from Jeremy. 8) Enable BTF build flags to the BPF sample code Makefile, from Taeung. 9) Remove an unnecessary rcu_read_lock() in run_lwt_bpf(), from Taehee. 10) Several improvements to the README.rst from the BPF documentation to make it more consistent with RST format, from Tobin. 11) Replace all occurrences of strerror() by calls to strerror_r() in libbpf and fix a FORTIFY_SOURCE build error along with it, from Thomas. 12) Fix a bug in bpftool's get_btf() function to correctly propagate an error via PTR_ERR(), from Yue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2026-05-01 15:00:59 -07:00 · 2018-08-07 11:02:05 -07:00
parent c5d99d2b35 85fc4b16aa
commit 1ba982806c
50 changed files with 1896 additions and 303 deletions
@@ -1,5 +1,5 @@
 =================
-BPF documentation
+BPF Documentation
 =================

 This directory contains documentation for the BPF (Berkeley Packet
@@ -22,14 +22,14 @@ Frequently asked questions (FAQ)

 Two sets of Questions and Answers (Q&A) are maintained.

-* QA for common questions about BPF see: bpf_design_QA_
+.. toctree::
+   :maxdepth: 1

-* QA for developers interacting with BPF subsystem: bpf_devel_QA_
+   bpf_design_QA
+   bpf_devel_QA


 .. Links:
-.. _bpf_design_QA: bpf_design_QA.rst
-.. _bpf_devel_QA:  bpf_devel_QA.rst
 .. _Documentation/networking/filter.txt: ../networking/filter.txt
 .. _man-pages: https://www.kernel.org/doc/man-pages/
 .. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html
@@ -90,6 +90,7 @@ needed).
   crypto/index
   filesystems/index
   vm/index
+   bpf/index

 Architecture-specific documentation
 -----------------------------------
@@ -196,14 +196,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
 */
 void lirc_bpf_free(struct rc_dev *rcdev)
 {
-	struct bpf_prog **progs;
+	struct bpf_prog_array_item *item;

 	if (!rcdev->raw->progs)
 		return;

-	progs = rcu_dereference(rcdev->raw->progs)->progs;
-	while (*progs)
-		bpf_prog_put(*progs++);
+	item = rcu_dereference(rcdev->raw->progs)->items;
+	while (item->prog) {
+		bpf_prog_put(item->prog);
+		item++;
+	}

 	bpf_prog_array_free(rcdev->raw->progs);
 }
@@ -43,8 +43,6 @@
 #include "fw.h"
 #include "main.h"

-#define cmsg_warn(bpf, msg...)	nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
-
 #define NFP_BPF_TAG_ALLOC_SPAN	(U16_MAX / 4)

 static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
@@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
 	}

 	if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
-		nfp_bpf_event_output(bpf, skb);
+		if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
+			dev_consume_skb_any(skb);
+		else
+			dev_kfree_skb_any(skb);
 		return;
 	}

@@ -465,3 +466,21 @@ err_unlock:
 err_free:
 	dev_kfree_skb_any(skb);
 }
+
+void
+nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
+{
+	struct nfp_app_bpf *bpf = app->priv;
+	const struct cmsg_hdr *hdr = data;
+
+	if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
+		cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
+		return;
+	}
+
+	if (hdr->type == CMSG_TYPE_BPF_EVENT)
+		nfp_bpf_event_output(bpf, data, len);
+	else
+		cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",
+			  hdr->type);
+}
@@ -51,6 +51,7 @@ enum bpf_cap_tlv_type {
 	NFP_BPF_CAP_TYPE_MAPS		= 3,
 	NFP_BPF_CAP_TYPE_RANDOM		= 4,
 	NFP_BPF_CAP_TYPE_QUEUE_SELECT	= 5,
+	NFP_BPF_CAP_TYPE_ADJUST_TAIL	= 6,
 };

 struct nfp_bpf_cap_tlv_func {
@@ -1642,6 +1642,51 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }

+static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u32 ret_einval, end;
+	swreg plen, delta;
+
+	BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
+
+	plen = imm_a(nfp_prog);
+	delta = reg_a(2 * 2);
+
+	ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
+	end = nfp_prog_current_offset(nfp_prog) + 11;
+
+	/* Calculate resulting length */
+	emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
+	/* delta == 0 is not allowed by the kernel, add must overflow to make
+	 * length smaller.
+	 */
+	emit_br(nfp_prog, BR_BCC, ret_einval, 0);
+
+	/* if (new_len < 14) then -EINVAL */
+	emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
+	emit_br(nfp_prog, BR_BMI, ret_einval, 0);
+
+	emit_alu(nfp_prog, plen_reg(nfp_prog),
+		 plen_reg(nfp_prog), ALU_OP_ADD, delta);
+	emit_alu(nfp_prog, pv_len(nfp_prog),
+		 pv_len(nfp_prog), ALU_OP_ADD, delta);
+
+	emit_br(nfp_prog, BR_UNC, end, 2);
+	wrp_immed(nfp_prog, reg_both(0), 0);
+	wrp_immed(nfp_prog, reg_both(1), 0);
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
+		return -EINVAL;
+
+	wrp_immed(nfp_prog, reg_both(0), -22);
+	wrp_immed(nfp_prog, reg_both(1), ~0);
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, end))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int
 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
@@ -3041,6 +3086,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	switch (meta->insn.imm) {
 	case BPF_FUNC_xdp_adjust_head:
 		return adjust_head(nfp_prog, meta);
+	case BPF_FUNC_xdp_adjust_tail:
+		return adjust_tail(nfp_prog, meta);
 	case BPF_FUNC_map_lookup_elem:
 	case BPF_FUNC_map_update_elem:
 	case BPF_FUNC_map_delete_elem:
@@ -3883,6 +3930,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
 	struct nfp_insn_meta *meta1, *meta2;
 	struct nfp_bpf_map *nfp_map;
 	struct bpf_map *map;
+	u32 id;

 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
 		if (meta1->skip || meta2->skip)
@@ -3894,11 +3942,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)

 		map = (void *)(unsigned long)((u32)meta1->insn.imm |
 					      (u64)meta2->insn.imm << 32);
-		if (bpf_map_offload_neutral(map))
-			continue;
-		nfp_map = map_to_offmap(map)->dev_priv;
+		if (bpf_map_offload_neutral(map)) {
+			id = map->id;
+		} else {
+			nfp_map = map_to_offmap(map)->dev_priv;
+			id = nfp_map->tid;
+		}

-		meta1->insn.imm = nfp_map->tid;
+		meta1->insn.imm = id;
 		meta2->insn.imm = 0;
 	}

@@ -45,8 +45,8 @@

 const struct rhashtable_params nfp_bpf_maps_neutral_params = {
 	.nelem_hint		= 4,
-	.key_len		= FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
-	.key_offset		= offsetof(struct nfp_bpf_neutral_map, ptr),
+	.key_len		= FIELD_SIZEOF(struct bpf_map, id),
+	.key_offset		= offsetof(struct nfp_bpf_neutral_map, map_id),
 	.head_offset		= offsetof(struct nfp_bpf_neutral_map, l),
 	.automatic_shrinking	= true,
 };
@@ -334,6 +334,14 @@ nfp_bpf_parse_cap_qsel(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
 	return 0;
 }

+static int
+nfp_bpf_parse_cap_adjust_tail(struct nfp_app_bpf *bpf, void __iomem *value,
+			      u32 length)
+{
+	bpf->adjust_tail = true;
+	return 0;
+}
+
 static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 {
 	struct nfp_cpp *cpp = app->pf->cpp;
@@ -380,6 +388,11 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 			if (nfp_bpf_parse_cap_qsel(app->priv, value, length))
 				goto err_release_free;
 			break;
+		case NFP_BPF_CAP_TYPE_ADJUST_TAIL:
+			if (nfp_bpf_parse_cap_adjust_tail(app->priv, value,
+							  length))
+				goto err_release_free;
+			break;
 		default:
 			nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
 			break;
@@ -490,6 +503,7 @@ const struct nfp_app_type app_bpf = {
 	.vnic_free	= nfp_bpf_vnic_free,

 	.ctrl_msg_rx	= nfp_bpf_ctrl_msg_rx,
+	.ctrl_msg_rx_raw	= nfp_bpf_ctrl_msg_rx_raw,

 	.setup_tc	= nfp_bpf_setup_tc,
 	.bpf		= nfp_ndo_bpf,
@@ -47,6 +47,8 @@
 #include "../nfp_asm.h"
 #include "fw.h"

+#define cmsg_warn(bpf, msg...)	nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
+
 /* For relocation logic use up-most byte of branch instruction as scratch
 * area.  Remember to clear this before sending instructions to HW!
 */
@@ -148,6 +150,7 @@ enum pkt_vec {
 *
 * @pseudo_random:	FW initialized the pseudo-random machinery (CSRs)
 * @queue_select:	BPF can set the RX queue ID in packet vector
+ * @adjust_tail:	BPF can simply trunc packet size for adjust tail
 */
 struct nfp_app_bpf {
 	struct nfp_app *app;
@@ -193,6 +196,7 @@ struct nfp_app_bpf {

 	bool pseudo_random;
 	bool queue_select;
+	bool adjust_tail;
 };

 enum nfp_bpf_map_use {
@@ -221,6 +225,7 @@ struct nfp_bpf_map {
 struct nfp_bpf_neutral_map {
 	struct rhash_head l;
 	struct bpf_map *ptr;
+	u32 map_id;
 	u32 count;
 };

@@ -501,7 +506,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
 int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
 			       void *key, void *next_key);

-int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
+			 unsigned int len);

 void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
+void
+nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data,
+			unsigned int len);
 #endif
@@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 	ASSERT_RTNL();

 	/* Reuse path - other offloaded program is already tracking this map. */
-	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
 					nfp_bpf_maps_neutral_params);
 	if (record) {
 		nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
@@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 	}

 	record->ptr = map;
+	record->map_id = map->id;
 	record->count = 1;

 	err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
@@ -379,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
 			bpf->maps.max_elems - bpf->map_elems_in_use);
 		return -ENOMEM;
 	}
-	if (offmap->map.key_size > bpf->maps.max_key_sz ||
-	    offmap->map.value_size > bpf->maps.max_val_sz ||
-	    round_up(offmap->map.key_size, 8) +
+
+	if (round_up(offmap->map.key_size, 8) +
 	    round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) {
-		pr_info("elements don't fit in device constraints\n");
+		pr_info("map elements too large: %u, FW max element size (key+value): %u\n",
+			round_up(offmap->map.key_size, 8) +
+			round_up(offmap->map.value_size, 8),
+			bpf->maps.max_elem_sz);
+		return -ENOMEM;
+	}
+	if (offmap->map.key_size > bpf->maps.max_key_sz) {
+		pr_info("map key size %u, FW max is %u\n",
+			offmap->map.key_size, bpf->maps.max_key_sz);
+		return -ENOMEM;
+	}
+	if (offmap->map.value_size > bpf->maps.max_val_sz) {
+		pr_info("map value size %u, FW max is %u\n",
+			offmap->map.value_size, bpf->maps.max_val_sz);
 		return -ENOMEM;
 	}

@@ -453,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src,
 	return 0;
 }

-int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
+			 unsigned int len)
 {
-	struct cmsg_bpf_event *cbe = (void *)skb->data;
-	u32 pkt_size, data_size;
-	struct bpf_map *map;
+	struct cmsg_bpf_event *cbe = (void *)data;
+	struct nfp_bpf_neutral_map *record;
+	u32 pkt_size, data_size, map_id;
+	u64 map_id_full;

-	if (skb->len < sizeof(struct cmsg_bpf_event))
-		goto err_drop;
+	if (len < sizeof(struct cmsg_bpf_event))
+		return -EINVAL;

 	pkt_size = be32_to_cpu(cbe->pkt_size);
 	data_size = be32_to_cpu(cbe->data_size);
-	map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
+	map_id_full = be64_to_cpu(cbe->map_ptr);
+	map_id = map_id_full;

-	if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
-		goto err_drop;
+	if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+		return -EINVAL;
 	if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
-		goto err_drop;
+		return -EINVAL;

 	rcu_read_lock();
-	if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
-				    nfp_bpf_maps_neutral_params)) {
+	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id,
+					nfp_bpf_maps_neutral_params);
+	if (!record || map_id_full > U32_MAX) {
 		rcu_read_unlock();
-		pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
-			map);
-		goto err_drop;
+		cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n",
+			  map_id_full, map_id_full);
+		return -EINVAL;
 	}

-	bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
+	bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id),
 			 &cbe->data[round_up(pkt_size, 4)], data_size,
 			 cbe->data, pkt_size, nfp_bpf_perf_event_copy);
 	rcu_read_unlock();

-	dev_consume_skb_any(skb);
 	return 0;
-err_drop:
-	dev_kfree_skb_any(skb);
-	return -EINVAL;
 }

 static int
@@ -178,6 +178,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 		nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
 		break;

+	case BPF_FUNC_xdp_adjust_tail:
+		if (!bpf->adjust_tail) {
+			pr_vlog(env, "adjust_tail not supported by FW\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+
 	case BPF_FUNC_map_lookup_elem:
 		if (!nfp_bpf_map_call_ok("map_lookup", env, meta,
 					 bpf->helpers.map_lookup, reg1) ||
@@ -172,6 +172,8 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)

 	if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc))
 		return ERR_PTR(-EINVAL);
+	if (WARN_ON(!apps[id]->ctrl_msg_rx && apps[id]->ctrl_msg_rx_raw))
+		return ERR_PTR(-EINVAL);

 	app = kzalloc(sizeof(*app), GFP_KERNEL);
 	if (!app)
@@ -98,6 +98,7 @@ extern const struct nfp_app_type app_abm;
 * @start:	start application logic
 * @stop:	stop application logic
 * @ctrl_msg_rx:    control message handler
+ * @ctrl_msg_rx_raw:	handler for control messages from data queues
 * @setup_tc:	setup TC ndo
 * @bpf:	BPF ndo offload-related calls
 * @xdp_offload:    offload an XDP program
@@ -150,6 +151,8 @@ struct nfp_app_type {
 	void (*stop)(struct nfp_app *app);

 	void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
+	void (*ctrl_msg_rx_raw)(struct nfp_app *app, const void *data,
+				unsigned int len);

 	int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
 			enum tc_setup_type type, void *type_data);
@@ -318,6 +321,11 @@ static inline bool nfp_app_ctrl_has_meta(struct nfp_app *app)
 	return app->type->ctrl_has_meta;
 }

+static inline bool nfp_app_ctrl_uses_data_vnics(struct nfp_app *app)
+{
+	return app && app->type->ctrl_msg_rx_raw;
+}
+
 static inline const char *nfp_app_extra_cap(struct nfp_app *app,
 					    struct nfp_net *nn)
 {
@@ -381,6 +389,16 @@ static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb)
 	app->type->ctrl_msg_rx(app, skb);
 }

+static inline void
+nfp_app_ctrl_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
+{
+	if (!app || !app->type->ctrl_msg_rx_raw)
+		return;
+
+	trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, data, len);
+	app->type->ctrl_msg_rx_raw(app, data, len);
+}
+
 static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode)
 {
 	if (!app->type->eswitch_mode_get)
@@ -93,6 +93,7 @@ enum br_mask {
 	BR_BNE = 0x01,
 	BR_BMI = 0x02,
 	BR_BHS = 0x04,
+	BR_BCC = 0x05,
 	BR_BLO = 0x05,
 	BR_BGE = 0x08,
 	BR_BLT = 0x09,
@@ -1759,6 +1759,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			}
 		}

+		if (likely(!meta.portid)) {
+			netdev = dp->netdev;
+		} else if (meta.portid == NFP_META_PORT_ID_CTRL) {
+			struct nfp_net *nn = netdev_priv(dp->netdev);
+
+			nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
+					    pkt_len);
+			nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
+					    rxbuf->dma_addr);
+			continue;
+		} else {
+			struct nfp_net *nn;
+
+			nn = netdev_priv(dp->netdev);
+			netdev = nfp_app_repr_get(nn->app, meta.portid);
+			if (unlikely(!netdev)) {
+				nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
+						NULL);
+				continue;
+			}
+			nfp_repr_inc_rx_stats(netdev, pkt_len);
+		}
+
 		skb = build_skb(rxbuf->frag, true_bufsz);
 		if (unlikely(!skb)) {
 			nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
@@ -1774,20 +1797,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)

 		nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);

-		if (likely(!meta.portid)) {
-			netdev = dp->netdev;
-		} else {
-			struct nfp_net *nn;
-
-			nn = netdev_priv(dp->netdev);
-			netdev = nfp_app_repr_get(nn->app, meta.portid);
-			if (unlikely(!netdev)) {
-				nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
-				continue;
-			}
-			nfp_repr_inc_rx_stats(netdev, pkt_len);
-		}
-
 		skb_reserve(skb, pkt_off);
 		skb_put(skb, pkt_len);

@@ -3857,6 +3866,9 @@ int nfp_net_init(struct nfp_net *nn)
 		nn->dp.mtu = NFP_NET_DEFAULT_MTU;
 	nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);

+	if (nfp_app_ctrl_uses_data_vnics(nn->app))
+		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
+
 	if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
 		nfp_net_rss_init(nn);
 		nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
@@ -127,6 +127,7 @@
 #define   NFP_NET_CFG_CTRL_GATHER	  (0x1 <<  9) /* Gather DMA */
 #define   NFP_NET_CFG_CTRL_LSO		  (0x1 << 10) /* LSO/TSO (version 1) */
 #define   NFP_NET_CFG_CTRL_CTAG_FILTER	  (0x1 << 11) /* VLAN CTAG filtering */
+#define   NFP_NET_CFG_CTRL_CMSG_DATA	  (0x1 << 12) /* RX cmsgs on data Qs */
 #define   NFP_NET_CFG_CTRL_RINGCFG	  (0x1 << 16) /* Ring runtime changes */
 #define   NFP_NET_CFG_CTRL_RSS		  (0x1 << 17) /* RSS (version 1) */
 #define   NFP_NET_CFG_CTRL_IRQMOD	  (0x1 << 18) /* Interrupt moderation */
@@ -4,22 +4,46 @@

 #include <linux/errno.h>
 #include <linux/jump_label.h>
+#include <linux/percpu.h>
+#include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>

 struct sock;
 struct sockaddr;
 struct cgroup;
 struct sk_buff;
+struct bpf_map;
+struct bpf_prog;
 struct bpf_sock_ops_kern;
+struct bpf_cgroup_storage;

 #ifdef CONFIG_CGROUP_BPF

 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)

+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
+struct bpf_cgroup_storage_map;
+
+struct bpf_storage_buffer {
+	struct rcu_head rcu;
+	char data[0];
+};
+
+struct bpf_cgroup_storage {
+	struct bpf_storage_buffer *buf;
+	struct bpf_cgroup_storage_map *map;
+	struct bpf_cgroup_storage_key key;
+	struct list_head list;
+	struct rb_node node;
+	struct rcu_head rcu;
+};
+
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
+	struct bpf_cgroup_storage *storage;
 };

 struct bpf_prog_array;
@@ -77,6 +101,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);

+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+{
+	struct bpf_storage_buffer *buf;
+
+	if (!storage)
+		return;
+
+	buf = READ_ONCE(storage->buf);
+	this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+}
+
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+			     struct cgroup *cgroup,
+			     enum bpf_attach_type type);
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -221,6 +265,16 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }

+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
+static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
+					    struct bpf_map *map) { return 0; }
+static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
+					      struct bpf_map *map) {}
+static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+	struct bpf_prog *prog) { return 0; }
+static inline void bpf_cgroup_storage_free(
+	struct bpf_cgroup_storage *storage) {}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
@@ -155,6 +155,7 @@ enum bpf_arg_type {
 enum bpf_return_type {
 	RET_INTEGER,			/* function returns integer */
 	RET_VOID,			/* function doesn't return anything */
+	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
 };

@@ -282,6 +283,7 @@ struct bpf_prog_aux {
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
+	struct bpf_map *cgroup_storage;
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
 	void *security;
@@ -348,9 +350,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 * The 'struct bpf_prog_array *' should only be replaced with xchg()
 * since other cpus are walking the array of pointers in parallel.
 */
+struct bpf_prog_array_item {
+	struct bpf_prog *prog;
+	struct bpf_cgroup_storage *cgroup_storage;
+};
+
 struct bpf_prog_array {
 	struct rcu_head rcu;
-	struct bpf_prog *progs[0];
+	struct bpf_prog_array_item items[0];
 };

 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -371,7 +378,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,

 #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
 	({						\
-		struct bpf_prog **_prog, *__prog;	\
+		struct bpf_prog_array_item *_item;	\
+		struct bpf_prog *_prog;			\
 		struct bpf_prog_array *_array;		\
 		u32 _ret = 1;				\
 		preempt_disable();			\
@@ -379,10 +387,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 		_array = rcu_dereference(array);	\
 		if (unlikely(check_non_null && !_array))\
 			goto _out;			\
-		_prog = _array->progs;			\
-		while ((__prog = READ_ONCE(*_prog))) {	\
-			_ret &= func(__prog, ctx);	\
-			_prog++;			\
+		_item = &_array->items[0];		\
+		while ((_prog = READ_ONCE(_item->prog))) {		\
+			bpf_cgroup_storage_set(_item->cgroup_storage);	\
+			_ret &= func(_prog, ctx);	\
+			_item++;			\
 		}					\
 _out:							\
 		rcu_read_unlock();			\
@@ -435,6 +444,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
 void *bpf_map_area_alloc(size_t size, int numa_node);
 void bpf_map_area_free(void *base);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
@@ -777,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;

+extern const struct bpf_func_proto bpf_get_local_storage_proto;
+
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
 #ifdef CONFIG_CGROUPS
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 #endif
+#ifdef CONFIG_CGROUP_BPF
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
+#endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
@@ -21,10 +21,12 @@

 extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 			       u32 tbl_id);
+extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);

 struct seg6_bpf_srh_state {
-	bool valid;
+	struct ipv6_sr_hdr *srh;
 	u16 hdrlen;
+	bool valid;
 };

 DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
 	__u8	data[0];	/* Arbitrary size */
 };

+struct bpf_cgroup_storage_key {
+	__u64	cgroup_inode_id;	/* cgroup inode id */
+	__u32	attach_type;		/* program attach type */
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -120,6 +125,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
+	BPF_MAP_TYPE_CGROUP_STORAGE,
 };

 enum bpf_prog_type {
@@ -1371,6 +1377,20 @@ union bpf_attr {
 * 		A 8-byte long non-decreasing number on success, or 0 if the
 * 		socket field is missing inside *skb*.
 *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
 * u32 bpf_get_socket_uid(struct sk_buff *skb)
 * 	Return
 * 		The owner UID of the socket associated to *skb*. If the socket
@@ -2075,6 +2095,24 @@ union bpf_attr {
 * 	Return
 * 		A 64-bit integer containing the current cgroup id based
 * 		on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ *	Description
+ *		Get the pointer to the local storage area.
+ *		The type and the size of the local storage is defined
+ *		by the *map* argument.
+ *		The *flags* meaning is specific for each map type,
+ *		and has to be 0 for cgroup local storage.
+ *
+ *		Depending on the bpf program type, a local storage area
+ *		can be shared between multiple instances of the bpf program,
+ *		running simultaneously.
+ *
+ *		A user should care about the synchronization by himself.
+ *		For example, by using the BPF_STX_XADD instruction to alter
+ *		the shared data.
+ *	Return
+ *		Pointer to the local storage area.
 */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2157,7 +2195,8 @@ union bpf_attr {
 	FN(rc_repeat),			\
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),
+	FN(get_current_cgroup_id),	\
+	FN(get_local_storage),

 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
--- a/Show More
+++ b/Show More