mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-11-14 1) Add BTF generation for kernel modules and extend BTF infra in kernel e.g. support for split BTF loading and validation, from Andrii Nakryiko. 2) Support for pointers beyond pkt_end to recognize LLVM generated patterns on inlined branch conditions, from Alexei Starovoitov. 3) Implements bpf_local_storage for task_struct for BPF LSM, from KP Singh. 4) Enable FENTRY/FEXIT/RAW_TP tracing program to use the bpf_sk_storage infra, from Martin KaFai Lau. 5) Add XDP bulk APIs that introduce a defer/flush mechanism to optimize the XDP_REDIRECT path, from Lorenzo Bianconi. 6) Fix a potential (although rather theoretical) deadlock of hashtab in NMI context, from Song Liu. 7) Fixes for cross and out-of-tree build of bpftool and runqslower allowing build for different target archs on same source tree, from Jean-Philippe Brucker. 8) Fix error path in htab_map_alloc() triggered from syzbot, from Eric Dumazet. 9) Move functionality from test_tcpbpf_user into the test_progs framework so it can run in BPF CI, from Alexander Duyck. 10) Lift hashtab key_size limit to be larger than MAX_BPF_STACK, from Florian Lehner. Note that for the fix from Song we have seen a sparse report on context imbalance which requires changes in sparse itself for proper annotation detection where this is currently being discussed on linux-sparse among developers [0]. Once we have more clarification/guidance after their fix, Song will follow-up. [0] https://lore.kernel.org/linux-sparse/CAHk-=wh4bx8A8dHnX612MsDO13st6uzAz1mJ1PaHHVevJx_ZCw@mail.gmail.com/T/ https://lore.kernel.org/linux-sparse/20201109221345.uklbp3lzgq6g42zb@ltop.local/T/ * git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (66 commits) net: mlx5: Add xdp tx return bulking support net: mvpp2: Add xdp tx return bulking support net: mvneta: Add xdp tx return bulking support net: page_pool: Add bulk support for ptr_ring net: xdp: Introduce bulking for xdp tx return path bpf: Expose bpf_d_path helper to sleepable LSM hooks bpf: Augment the set of sleepable LSM hooks bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP bpf: Allow using bpf_sk_storage in FENTRY/FEXIT/RAW_TP bpf: Rename some functions in bpf_sk_storage bpf: Folding omem_charge() into sk_storage_charge() selftests/bpf: Add asm tests for pkt vs pkt_end comparison. selftests/bpf: Add skb_pkt_end test bpf: Support for pointers beyond pkt_end. tools/bpf: Always run the *-clean recipes tools/bpf: Add bootstrap/ to .gitignore bpf: Fix NULL dereference in bpf_task_storage tools/bpftool: Fix build slowdown tools/runqslower: Build bpftool using HOSTCC tools/runqslower: Enable out-of-tree build ... ==================== Link: https://lore.kernel.org/r/20201114020819.29584-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -15,3 +15,11 @@ Description:
|
||||
information with description of all internal kernel types. See
|
||||
Documentation/bpf/btf.rst for detailed description of format
|
||||
itself.
|
||||
|
||||
What: /sys/kernel/btf/<module-name>
|
||||
Date: Nov 2020
|
||||
KernelVersion: 5.11
|
||||
Contact: bpf@vger.kernel.org
|
||||
Description:
|
||||
Read-only binary attribute exposing kernel module's BTF type
|
||||
information as an add-on to the kernel's BTF (/sys/kernel/btf/vmlinux).
|
||||
|
||||
@@ -1834,8 +1834,13 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
|
||||
struct netdev_queue *nq, bool napi)
|
||||
{
|
||||
unsigned int bytes_compl = 0, pkts_compl = 0;
|
||||
struct xdp_frame_bulk bq;
|
||||
int i;
|
||||
|
||||
xdp_frame_bulk_init(&bq);
|
||||
|
||||
rcu_read_lock(); /* need for xdp_return_frame_bulk */
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
struct mvneta_tx_buf *buf = &txq->buf[txq->txq_get_index];
|
||||
struct mvneta_tx_desc *tx_desc = txq->descs +
|
||||
@@ -1857,9 +1862,12 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
|
||||
if (napi && buf->type == MVNETA_TYPE_XDP_TX)
|
||||
xdp_return_frame_rx_napi(buf->xdpf);
|
||||
else
|
||||
xdp_return_frame(buf->xdpf);
|
||||
xdp_return_frame_bulk(buf->xdpf, &bq);
|
||||
}
|
||||
}
|
||||
xdp_flush_frame_bulk(&bq);
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
|
||||
}
|
||||
|
||||
@@ -2440,8 +2440,13 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
|
||||
struct mvpp2_tx_queue *txq,
|
||||
struct mvpp2_txq_pcpu *txq_pcpu, int num)
|
||||
{
|
||||
struct xdp_frame_bulk bq;
|
||||
int i;
|
||||
|
||||
xdp_frame_bulk_init(&bq);
|
||||
|
||||
rcu_read_lock(); /* need for xdp_return_frame_bulk */
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
struct mvpp2_txq_pcpu_buf *tx_buf =
|
||||
txq_pcpu->buffs + txq_pcpu->txq_get_index;
|
||||
@@ -2454,10 +2459,13 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
|
||||
dev_kfree_skb_any(tx_buf->skb);
|
||||
else if (tx_buf->type == MVPP2_TYPE_XDP_TX ||
|
||||
tx_buf->type == MVPP2_TYPE_XDP_NDO)
|
||||
xdp_return_frame(tx_buf->xdpf);
|
||||
xdp_return_frame_bulk(tx_buf->xdpf, &bq);
|
||||
|
||||
mvpp2_txq_inc_get(txq_pcpu);
|
||||
}
|
||||
xdp_flush_frame_bulk(&bq);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static inline struct mvpp2_rx_queue *mvpp2_get_rx_queue(struct mvpp2_port *port,
|
||||
|
||||
@@ -366,7 +366,8 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
|
||||
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
|
||||
struct mlx5e_xdp_wqe_info *wi,
|
||||
u32 *xsk_frames,
|
||||
bool recycle)
|
||||
bool recycle,
|
||||
struct xdp_frame_bulk *bq)
|
||||
{
|
||||
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
|
||||
u16 i;
|
||||
@@ -379,7 +380,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
|
||||
/* XDP_TX from the XSK RQ and XDP_REDIRECT */
|
||||
dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
|
||||
xdpi.frame.xdpf->len, DMA_TO_DEVICE);
|
||||
xdp_return_frame(xdpi.frame.xdpf);
|
||||
xdp_return_frame_bulk(xdpi.frame.xdpf, bq);
|
||||
break;
|
||||
case MLX5E_XDP_XMIT_MODE_PAGE:
|
||||
/* XDP_TX from the regular RQ */
|
||||
@@ -397,12 +398,15 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
|
||||
|
||||
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
|
||||
{
|
||||
struct xdp_frame_bulk bq;
|
||||
struct mlx5e_xdpsq *sq;
|
||||
struct mlx5_cqe64 *cqe;
|
||||
u32 xsk_frames = 0;
|
||||
u16 sqcc;
|
||||
int i;
|
||||
|
||||
xdp_frame_bulk_init(&bq);
|
||||
|
||||
sq = container_of(cq, struct mlx5e_xdpsq, cq);
|
||||
|
||||
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
|
||||
@@ -434,7 +438,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
|
||||
|
||||
sqcc += wi->num_wqebbs;
|
||||
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
|
||||
} while (!last_wqe);
|
||||
|
||||
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
|
||||
@@ -447,6 +451,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
|
||||
}
|
||||
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
|
||||
|
||||
xdp_flush_frame_bulk(&bq);
|
||||
|
||||
if (xsk_frames)
|
||||
xsk_tx_completed(sq->xsk_pool, xsk_frames);
|
||||
|
||||
@@ -463,8 +469,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
|
||||
|
||||
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
|
||||
{
|
||||
struct xdp_frame_bulk bq;
|
||||
u32 xsk_frames = 0;
|
||||
|
||||
xdp_frame_bulk_init(&bq);
|
||||
|
||||
rcu_read_lock(); /* need for xdp_return_frame_bulk */
|
||||
|
||||
while (sq->cc != sq->pc) {
|
||||
struct mlx5e_xdp_wqe_info *wi;
|
||||
u16 ci;
|
||||
@@ -474,9 +485,12 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
|
||||
|
||||
sq->cc += wi->num_wqebbs;
|
||||
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
|
||||
mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
|
||||
}
|
||||
|
||||
xdp_flush_frame_bulk(&bq);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (xsk_frames)
|
||||
xsk_tx_completed(sq->xsk_pool, xsk_frames);
|
||||
}
|
||||
|
||||
@@ -36,9 +36,11 @@ struct seq_operations;
|
||||
struct bpf_iter_aux_info;
|
||||
struct bpf_local_storage;
|
||||
struct bpf_local_storage_map;
|
||||
struct kobject;
|
||||
|
||||
extern struct idr btf_idr;
|
||||
extern spinlock_t btf_idr_lock;
|
||||
extern struct kobject *btf_kobj;
|
||||
|
||||
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
|
||||
struct bpf_iter_aux_info *aux);
|
||||
@@ -310,6 +312,7 @@ enum bpf_return_type {
|
||||
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
|
||||
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
|
||||
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
|
||||
RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
|
||||
};
|
||||
|
||||
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
|
||||
@@ -1294,6 +1297,10 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
|
||||
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
|
||||
struct bpf_link_info *info);
|
||||
|
||||
enum bpf_iter_feature {
|
||||
BPF_ITER_RESCHED = BIT(0),
|
||||
};
|
||||
|
||||
#define BPF_ITER_CTX_ARG_MAX 2
|
||||
struct bpf_iter_reg {
|
||||
const char *target;
|
||||
@@ -1302,6 +1309,7 @@ struct bpf_iter_reg {
|
||||
bpf_iter_show_fdinfo_t show_fdinfo;
|
||||
bpf_iter_fill_link_info_t fill_link_info;
|
||||
u32 ctx_arg_info_size;
|
||||
u32 feature;
|
||||
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
|
||||
const struct bpf_iter_seq_info *seq_info;
|
||||
};
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#ifndef _LINUX_BPF_LSM_H
|
||||
#define _LINUX_BPF_LSM_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/lsm_hooks.h>
|
||||
|
||||
@@ -26,6 +27,8 @@ extern struct lsm_blob_sizes bpf_lsm_blob_sizes;
|
||||
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
|
||||
const struct bpf_prog *prog);
|
||||
|
||||
bool bpf_lsm_is_sleepable_hook(u32 btf_id);
|
||||
|
||||
static inline struct bpf_storage_blob *bpf_inode(
|
||||
const struct inode *inode)
|
||||
{
|
||||
@@ -35,12 +38,29 @@ static inline struct bpf_storage_blob *bpf_inode(
|
||||
return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
|
||||
}
|
||||
|
||||
static inline struct bpf_storage_blob *bpf_task(
|
||||
const struct task_struct *task)
|
||||
{
|
||||
if (unlikely(!task->security))
|
||||
return NULL;
|
||||
|
||||
return task->security + bpf_lsm_blob_sizes.lbs_task;
|
||||
}
|
||||
|
||||
extern const struct bpf_func_proto bpf_inode_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
|
||||
void bpf_inode_storage_free(struct inode *inode);
|
||||
void bpf_task_storage_free(struct task_struct *task);
|
||||
|
||||
#else /* !CONFIG_BPF_LSM */
|
||||
|
||||
static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -53,10 +73,20 @@ static inline struct bpf_storage_blob *bpf_inode(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct bpf_storage_blob *bpf_task(
|
||||
const struct task_struct *task)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void bpf_inode_storage_free(struct inode *inode)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void bpf_task_storage_free(struct task_struct *task)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BPF_LSM */
|
||||
|
||||
#endif /* _LINUX_BPF_LSM_H */
|
||||
|
||||
@@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
|
||||
#endif
|
||||
#ifdef CONFIG_BPF_LSM
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
|
||||
#endif
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
|
||||
#if defined(CONFIG_XDP_SOCKETS)
|
||||
|
||||
@@ -45,7 +45,7 @@ struct bpf_reg_state {
|
||||
enum bpf_reg_type type;
|
||||
union {
|
||||
/* valid when type == PTR_TO_PACKET */
|
||||
u16 range;
|
||||
int range;
|
||||
|
||||
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
|
||||
* PTR_TO_MAP_VALUE_OR_NULL
|
||||
|
||||
@@ -475,6 +475,10 @@ struct module {
|
||||
unsigned int num_bpf_raw_events;
|
||||
struct bpf_raw_event_map *bpf_raw_events;
|
||||
#endif
|
||||
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
||||
unsigned int btf_data_size;
|
||||
void *btf_data;
|
||||
#endif
|
||||
#ifdef CONFIG_JUMP_LABEL
|
||||
struct jump_entry *jump_entries;
|
||||
unsigned int num_jump_entries;
|
||||
|
||||
@@ -20,6 +20,8 @@ void bpf_sk_storage_free(struct sock *sk);
|
||||
|
||||
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_storage_get_tracing_proto;
|
||||
extern const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto;
|
||||
|
||||
struct bpf_local_storage_elem;
|
||||
struct bpf_sk_storage_diag;
|
||||
|
||||
@@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params);
|
||||
void page_pool_destroy(struct page_pool *pool);
|
||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
|
||||
void page_pool_release_page(struct page_pool *pool, struct page *page);
|
||||
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
||||
int count);
|
||||
#else
|
||||
static inline void page_pool_destroy(struct page_pool *pool)
|
||||
{
|
||||
@@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool,
|
||||
struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
||||
int count)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void page_pool_put_page(struct page_pool *pool, struct page *page,
|
||||
@@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
|
||||
if (unlikely(pool->p.nid != new_nid))
|
||||
page_pool_update_nid(pool, new_nid);
|
||||
}
|
||||
|
||||
static inline void page_pool_ring_lock(struct page_pool *pool)
|
||||
__acquires(&pool->ring.producer_lock)
|
||||
{
|
||||
if (in_serving_softirq())
|
||||
spin_lock(&pool->ring.producer_lock);
|
||||
else
|
||||
spin_lock_bh(&pool->ring.producer_lock);
|
||||
}
|
||||
|
||||
static inline void page_pool_ring_unlock(struct page_pool *pool)
|
||||
__releases(&pool->ring.producer_lock)
|
||||
{
|
||||
if (in_serving_softirq())
|
||||
spin_unlock(&pool->ring.producer_lock);
|
||||
else
|
||||
spin_unlock_bh(&pool->ring.producer_lock);
|
||||
}
|
||||
|
||||
#endif /* _NET_PAGE_POOL_H */
|
||||
|
||||
@@ -104,6 +104,18 @@ struct xdp_frame {
|
||||
struct net_device *dev_rx; /* used by cpumap */
|
||||
};
|
||||
|
||||
#define XDP_BULK_QUEUE_SIZE 16
|
||||
struct xdp_frame_bulk {
|
||||
int count;
|
||||
void *xa;
|
||||
void *q[XDP_BULK_QUEUE_SIZE];
|
||||
};
|
||||
|
||||
static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
|
||||
{
|
||||
/* bq->count will be zero'ed when bq->xa gets updated */
|
||||
bq->xa = NULL;
|
||||
}
|
||||
|
||||
static inline struct skb_shared_info *
|
||||
xdp_get_shared_info_from_frame(struct xdp_frame *frame)
|
||||
@@ -194,6 +206,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
|
||||
void xdp_return_frame(struct xdp_frame *xdpf);
|
||||
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
|
||||
void xdp_return_buff(struct xdp_buff *xdp);
|
||||
void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
|
||||
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
|
||||
struct xdp_frame_bulk *bq);
|
||||
|
||||
/* When sending xdp_frame into the network stack, then there is no
|
||||
* return point callback, which is needed to release e.g. DMA-mapping
|
||||
@@ -245,6 +260,6 @@ bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
|
||||
void xdp_attachment_setup(struct xdp_attachment_info *info,
|
||||
struct netdev_bpf *bpf);
|
||||
|
||||
#define DEV_MAP_BULK_SIZE 16
|
||||
#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
|
||||
|
||||
#endif /* __LINUX_NET_XDP_H__ */
|
||||
|
||||
@@ -157,6 +157,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_STRUCT_OPS,
|
||||
BPF_MAP_TYPE_RINGBUF,
|
||||
BPF_MAP_TYPE_INODE_STORAGE,
|
||||
BPF_MAP_TYPE_TASK_STORAGE,
|
||||
};
|
||||
|
||||
/* Note that tracing related programs such as
|
||||
@@ -3742,6 +3743,50 @@ union bpf_attr {
|
||||
* Return
|
||||
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||
* **TC_ACT_SHOT** on error.
|
||||
*
|
||||
* void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
|
||||
* Description
|
||||
* Get a bpf_local_storage from the *task*.
|
||||
*
|
||||
* Logically, it could be thought of as getting the value from
|
||||
* a *map* with *task* as the **key**. From this
|
||||
* perspective, the usage is not much different from
|
||||
* **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
|
||||
* helper enforces the key must be an task_struct and the map must also
|
||||
* be a **BPF_MAP_TYPE_TASK_STORAGE**.
|
||||
*
|
||||
* Underneath, the value is stored locally at *task* instead of
|
||||
* the *map*. The *map* is used as the bpf-local-storage
|
||||
* "type". The bpf-local-storage "type" (i.e. the *map*) is
|
||||
* searched against all bpf_local_storage residing at *task*.
|
||||
*
|
||||
* An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
|
||||
* used such that a new bpf_local_storage will be
|
||||
* created if one does not exist. *value* can be used
|
||||
* together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
|
||||
* the initial value of a bpf_local_storage. If *value* is
|
||||
* **NULL**, the new bpf_local_storage will be zero initialized.
|
||||
* Return
|
||||
* A bpf_local_storage pointer is returned on success.
|
||||
*
|
||||
* **NULL** if not found or there was an error in adding
|
||||
* a new bpf_local_storage.
|
||||
*
|
||||
* long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
|
||||
* Description
|
||||
* Delete a bpf_local_storage from a *task*.
|
||||
* Return
|
||||
* 0 on success.
|
||||
*
|
||||
* **-ENOENT** if the bpf_local_storage cannot be found.
|
||||
*
|
||||
* struct task_struct *bpf_get_current_task_btf(void)
|
||||
* Description
|
||||
* Return a BTF pointer to the "current" task.
|
||||
* This pointer can also be used in helpers that accept an
|
||||
* *ARG_PTR_TO_BTF_ID* of type *task_struct*.
|
||||
* Return
|
||||
* Pointer to the current task.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -3900,6 +3945,9 @@ union bpf_attr {
|
||||
FN(bpf_per_cpu_ptr), \
|
||||
FN(bpf_this_cpu_ptr), \
|
||||
FN(redirect_peer), \
|
||||
FN(task_storage_get), \
|
||||
FN(task_storage_delete), \
|
||||
FN(get_current_task_btf), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
@@ -4418,6 +4466,9 @@ struct bpf_btf_info {
|
||||
__aligned_u64 btf;
|
||||
__u32 btf_size;
|
||||
__u32 id;
|
||||
__aligned_u64 name;
|
||||
__u32 name_len;
|
||||
__u32 kernel_btf;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_link_info {
|
||||
|
||||
@@ -10,6 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
|
||||
obj-$(CONFIG_BPF_JIT) += trampoline.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf.o
|
||||
|
||||
@@ -67,6 +67,15 @@ static void bpf_iter_done_stop(struct seq_file *seq)
|
||||
iter_priv->done_stop = true;
|
||||
}
|
||||
|
||||
static bool bpf_iter_support_resched(struct seq_file *seq)
|
||||
{
|
||||
struct bpf_iter_priv_data *iter_priv;
|
||||
|
||||
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
|
||||
target_private);
|
||||
return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED;
|
||||
}
|
||||
|
||||
/* maximum visited objects before bailing out */
|
||||
#define MAX_ITER_OBJECTS 1000000
|
||||
|
||||
@@ -83,6 +92,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
|
||||
struct seq_file *seq = file->private_data;
|
||||
size_t n, offs, copied = 0;
|
||||
int err = 0, num_objs = 0;
|
||||
bool can_resched;
|
||||
void *p;
|
||||
|
||||
mutex_lock(&seq->lock);
|
||||
@@ -135,6 +145,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
|
||||
goto done;
|
||||
}
|
||||
|
||||
can_resched = bpf_iter_support_resched(seq);
|
||||
while (1) {
|
||||
loff_t pos = seq->index;
|
||||
|
||||
@@ -180,6 +191,9 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (can_resched)
|
||||
cond_resched();
|
||||
}
|
||||
stop:
|
||||
offs = seq->count;
|
||||
|
||||
@@ -63,11 +63,99 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_sk_storage_get_proto;
|
||||
case BPF_FUNC_sk_storage_delete:
|
||||
return &bpf_sk_storage_delete_proto;
|
||||
case BPF_FUNC_spin_lock:
|
||||
return &bpf_spin_lock_proto;
|
||||
case BPF_FUNC_spin_unlock:
|
||||
return &bpf_spin_unlock_proto;
|
||||
case BPF_FUNC_task_storage_get:
|
||||
return &bpf_task_storage_get_proto;
|
||||
case BPF_FUNC_task_storage_delete:
|
||||
return &bpf_task_storage_delete_proto;
|
||||
default:
|
||||
return tracing_prog_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
|
||||
/* The set of hooks which are called without pagefaults disabled and are allowed
|
||||
* to "sleep" and thus can be used for sleeable BPF programs.
|
||||
*/
|
||||
BTF_SET_START(sleepable_lsm_hooks)
|
||||
BTF_ID(func, bpf_lsm_bpf)
|
||||
BTF_ID(func, bpf_lsm_bpf_map)
|
||||
BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_bpf_map_free_security)
|
||||
BTF_ID(func, bpf_lsm_bpf_prog)
|
||||
BTF_ID(func, bpf_lsm_bprm_check_security)
|
||||
BTF_ID(func, bpf_lsm_bprm_committed_creds)
|
||||
BTF_ID(func, bpf_lsm_bprm_committing_creds)
|
||||
BTF_ID(func, bpf_lsm_bprm_creds_for_exec)
|
||||
BTF_ID(func, bpf_lsm_bprm_creds_from_file)
|
||||
BTF_ID(func, bpf_lsm_capget)
|
||||
BTF_ID(func, bpf_lsm_capset)
|
||||
BTF_ID(func, bpf_lsm_cred_prepare)
|
||||
BTF_ID(func, bpf_lsm_file_ioctl)
|
||||
BTF_ID(func, bpf_lsm_file_lock)
|
||||
BTF_ID(func, bpf_lsm_file_open)
|
||||
BTF_ID(func, bpf_lsm_file_receive)
|
||||
BTF_ID(func, bpf_lsm_inet_conn_established)
|
||||
BTF_ID(func, bpf_lsm_inode_create)
|
||||
BTF_ID(func, bpf_lsm_inode_free_security)
|
||||
BTF_ID(func, bpf_lsm_inode_getattr)
|
||||
BTF_ID(func, bpf_lsm_inode_getxattr)
|
||||
BTF_ID(func, bpf_lsm_inode_mknod)
|
||||
BTF_ID(func, bpf_lsm_inode_need_killpriv)
|
||||
BTF_ID(func, bpf_lsm_inode_post_setxattr)
|
||||
BTF_ID(func, bpf_lsm_inode_readlink)
|
||||
BTF_ID(func, bpf_lsm_inode_rename)
|
||||
BTF_ID(func, bpf_lsm_inode_rmdir)
|
||||
BTF_ID(func, bpf_lsm_inode_setattr)
|
||||
BTF_ID(func, bpf_lsm_inode_setxattr)
|
||||
BTF_ID(func, bpf_lsm_inode_symlink)
|
||||
BTF_ID(func, bpf_lsm_inode_unlink)
|
||||
BTF_ID(func, bpf_lsm_kernel_module_request)
|
||||
BTF_ID(func, bpf_lsm_kernfs_init_security)
|
||||
BTF_ID(func, bpf_lsm_key_free)
|
||||
BTF_ID(func, bpf_lsm_mmap_file)
|
||||
BTF_ID(func, bpf_lsm_netlink_send)
|
||||
BTF_ID(func, bpf_lsm_path_notify)
|
||||
BTF_ID(func, bpf_lsm_release_secctx)
|
||||
BTF_ID(func, bpf_lsm_sb_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_sb_eat_lsm_opts)
|
||||
BTF_ID(func, bpf_lsm_sb_kern_mount)
|
||||
BTF_ID(func, bpf_lsm_sb_mount)
|
||||
BTF_ID(func, bpf_lsm_sb_remount)
|
||||
BTF_ID(func, bpf_lsm_sb_set_mnt_opts)
|
||||
BTF_ID(func, bpf_lsm_sb_show_options)
|
||||
BTF_ID(func, bpf_lsm_sb_statfs)
|
||||
BTF_ID(func, bpf_lsm_sb_umount)
|
||||
BTF_ID(func, bpf_lsm_settime)
|
||||
BTF_ID(func, bpf_lsm_socket_accept)
|
||||
BTF_ID(func, bpf_lsm_socket_bind)
|
||||
BTF_ID(func, bpf_lsm_socket_connect)
|
||||
BTF_ID(func, bpf_lsm_socket_create)
|
||||
BTF_ID(func, bpf_lsm_socket_getpeername)
|
||||
BTF_ID(func, bpf_lsm_socket_getpeersec_dgram)
|
||||
BTF_ID(func, bpf_lsm_socket_getsockname)
|
||||
BTF_ID(func, bpf_lsm_socket_getsockopt)
|
||||
BTF_ID(func, bpf_lsm_socket_listen)
|
||||
BTF_ID(func, bpf_lsm_socket_post_create)
|
||||
BTF_ID(func, bpf_lsm_socket_recvmsg)
|
||||
BTF_ID(func, bpf_lsm_socket_sendmsg)
|
||||
BTF_ID(func, bpf_lsm_socket_shutdown)
|
||||
BTF_ID(func, bpf_lsm_socket_socketpair)
|
||||
BTF_ID(func, bpf_lsm_syslog)
|
||||
BTF_ID(func, bpf_lsm_task_alloc)
|
||||
BTF_ID(func, bpf_lsm_task_getsecid)
|
||||
BTF_ID(func, bpf_lsm_task_prctl)
|
||||
BTF_ID(func, bpf_lsm_task_setscheduler)
|
||||
BTF_ID(func, bpf_lsm_task_to_inode)
|
||||
BTF_SET_END(sleepable_lsm_hooks)
|
||||
|
||||
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
|
||||
{
|
||||
return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
|
||||
}
|
||||
|
||||
const struct bpf_prog_ops lsm_prog_ops = {
|
||||
};
|
||||
|
||||
|
||||
315
kernel/bpf/bpf_task_storage.c
Normal file
315
kernel/bpf/bpf_task_storage.c
Normal file
@@ -0,0 +1,315 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2020 Facebook
|
||||
* Copyright 2020 Google LLC.
|
||||
*/
|
||||
|
||||
#include <linux/pid.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_local_storage.h>
|
||||
#include <linux/filter.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/fdtable.h>
|
||||
|
||||
DEFINE_BPF_STORAGE_CACHE(task_cache);
|
||||
|
||||
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
|
||||
{
|
||||
struct task_struct *task = owner;
|
||||
struct bpf_storage_blob *bsb;
|
||||
|
||||
bsb = bpf_task(task);
|
||||
if (!bsb)
|
||||
return NULL;
|
||||
return &bsb->storage;
|
||||
}
|
||||
|
||||
static struct bpf_local_storage_data *
|
||||
task_storage_lookup(struct task_struct *task, struct bpf_map *map,
|
||||
bool cacheit_lockit)
|
||||
{
|
||||
struct bpf_local_storage *task_storage;
|
||||
struct bpf_local_storage_map *smap;
|
||||
struct bpf_storage_blob *bsb;
|
||||
|
||||
bsb = bpf_task(task);
|
||||
if (!bsb)
|
||||
return NULL;
|
||||
|
||||
task_storage = rcu_dereference(bsb->storage);
|
||||
if (!task_storage)
|
||||
return NULL;
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
|
||||
}
|
||||
|
||||
void bpf_task_storage_free(struct task_struct *task)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool free_task_storage = false;
|
||||
struct bpf_storage_blob *bsb;
|
||||
struct hlist_node *n;
|
||||
|
||||
bsb = bpf_task(task);
|
||||
if (!bsb)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
local_storage = rcu_dereference(bsb->storage);
|
||||
if (!local_storage) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
/* Neither the bpf_prog nor the bpf-map's syscall
|
||||
* could be modifying the local_storage->list now.
|
||||
* Thus, no elem can be added-to or deleted-from the
|
||||
* local_storage->list by the bpf_prog or by the bpf-map's syscall.
|
||||
*
|
||||
* It is racing with bpf_local_storage_map_free() alone
|
||||
* when unlinking elem from the local_storage->list and
|
||||
* the map's bucket->list.
|
||||
*/
|
||||
raw_spin_lock_bh(&local_storage->lock);
|
||||
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
|
||||
/* Always unlink from map before unlinking from
|
||||
* local_storage.
|
||||
*/
|
||||
bpf_selem_unlink_map(selem);
|
||||
free_task_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, false);
|
||||
}
|
||||
raw_spin_unlock_bh(&local_storage->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* free_task_storage should always be true as long as
|
||||
* local_storage->list was non-empty.
|
||||
*/
|
||||
if (free_task_storage)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
|
||||
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
struct task_struct *task;
|
||||
unsigned int f_flags;
|
||||
struct pid *pid;
|
||||
int fd, err;
|
||||
|
||||
fd = *(int *)key;
|
||||
pid = pidfd_get_pid(fd, &f_flags);
|
||||
if (IS_ERR(pid))
|
||||
return ERR_CAST(pid);
|
||||
|
||||
/* We should be in an RCU read side critical section, it should be safe
|
||||
* to call pid_task.
|
||||
*/
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
task = pid_task(pid, PIDTYPE_PID);
|
||||
if (!task) {
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sdata = task_storage_lookup(task, map, true);
|
||||
put_pid(pid);
|
||||
return sdata ? sdata->data : NULL;
|
||||
out:
|
||||
put_pid(pid);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
struct task_struct *task;
|
||||
unsigned int f_flags;
|
||||
struct pid *pid;
|
||||
int fd, err;
|
||||
|
||||
fd = *(int *)key;
|
||||
pid = pidfd_get_pid(fd, &f_flags);
|
||||
if (IS_ERR(pid))
|
||||
return PTR_ERR(pid);
|
||||
|
||||
/* We should be in an RCU read side critical section, it should be safe
|
||||
* to call pid_task.
|
||||
*/
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
task = pid_task(pid, PIDTYPE_PID);
|
||||
if (!task || !task_storage_ptr(task)) {
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value, map_flags);
|
||||
|
||||
err = PTR_ERR_OR_ZERO(sdata);
|
||||
out:
|
||||
put_pid(pid);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
sdata = task_storage_lookup(task, map, false);
|
||||
if (!sdata)
|
||||
return -ENOENT;
|
||||
|
||||
bpf_selem_unlink(SELEM(sdata));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct task_struct *task;
|
||||
unsigned int f_flags;
|
||||
struct pid *pid;
|
||||
int fd, err;
|
||||
|
||||
fd = *(int *)key;
|
||||
pid = pidfd_get_pid(fd, &f_flags);
|
||||
if (IS_ERR(pid))
|
||||
return PTR_ERR(pid);
|
||||
|
||||
/* We should be in an RCU read side critical section, it should be safe
|
||||
* to call pid_task.
|
||||
*/
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
task = pid_task(pid, PIDTYPE_PID);
|
||||
if (!task) {
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = task_storage_delete(task, map);
|
||||
out:
|
||||
put_pid(pid);
|
||||
return err;
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
|
||||
task, void *, value, u64, flags)
|
||||
{
|
||||
struct bpf_local_storage_data *sdata;
|
||||
|
||||
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
/* explicitly check that the task_storage_ptr is not
|
||||
* NULL as task_storage_lookup returns NULL in this case and
|
||||
* bpf_local_storage_update expects the owner to have a
|
||||
* valid storage pointer.
|
||||
*/
|
||||
if (!task_storage_ptr(task))
|
||||
return (unsigned long)NULL;
|
||||
|
||||
sdata = task_storage_lookup(task, map, true);
|
||||
if (sdata)
|
||||
return (unsigned long)sdata->data;
|
||||
|
||||
/* This helper must only be called from places where the lifetime of the task
|
||||
* is guaranteed. Either by being refcounted or by being protected
|
||||
* by an RCU read-side critical section.
|
||||
*/
|
||||
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
|
||||
sdata = bpf_local_storage_update(
|
||||
task, (struct bpf_local_storage_map *)map, value,
|
||||
BPF_NOEXIST);
|
||||
return IS_ERR(sdata) ? (unsigned long)NULL :
|
||||
(unsigned long)sdata->data;
|
||||
}
|
||||
|
||||
return (unsigned long)NULL;
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
|
||||
task)
|
||||
{
|
||||
/* This helper must only be called from places where the lifetime of the task
|
||||
* is guaranteed. Either by being refcounted or by being protected
|
||||
* by an RCU read-side critical section.
|
||||
*/
|
||||
return task_storage_delete(task, map);
|
||||
}
|
||||
|
||||
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = bpf_local_storage_map_alloc(attr);
|
||||
if (IS_ERR(smap))
|
||||
return ERR_CAST(smap);
|
||||
|
||||
smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
|
||||
return &smap->map;
|
||||
}
|
||||
|
||||
static void task_storage_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = (struct bpf_local_storage_map *)map;
|
||||
bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
|
||||
bpf_local_storage_map_free(smap);
|
||||
}
|
||||
|
||||
static int task_storage_map_btf_id;
|
||||
const struct bpf_map_ops task_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
.map_alloc = task_storage_map_alloc,
|
||||
.map_free = task_storage_map_free,
|
||||
.map_get_next_key = notsupp_get_next_key,
|
||||
.map_lookup_elem = bpf_pid_task_storage_lookup_elem,
|
||||
.map_update_elem = bpf_pid_task_storage_update_elem,
|
||||
.map_delete_elem = bpf_pid_task_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_name = "bpf_local_storage_map",
|
||||
.map_btf_id = &task_storage_map_btf_id,
|
||||
.map_owner_storage_ptr = task_storage_ptr,
|
||||
};
|
||||
|
||||
BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
|
||||
|
||||
const struct bpf_func_proto bpf_task_storage_get_proto = {
|
||||
.func = bpf_task_storage_get,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_btf_id = &bpf_task_storage_btf_ids[0],
|
||||
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
const struct bpf_func_proto bpf_task_storage_delete_proto = {
|
||||
.func = bpf_task_storage_delete,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_btf_id = &bpf_task_storage_btf_ids[0],
|
||||
};
|
||||
413
kernel/bpf/btf.c
413
kernel/bpf/btf.c
File diff suppressed because it is too large
Load Diff
@@ -86,6 +86,9 @@ struct bucket {
|
||||
};
|
||||
};
|
||||
|
||||
#define HASHTAB_MAP_LOCK_COUNT 8
|
||||
#define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
|
||||
|
||||
struct bpf_htab {
|
||||
struct bpf_map map;
|
||||
struct bucket *buckets;
|
||||
@@ -99,6 +102,8 @@ struct bpf_htab {
|
||||
u32 n_buckets; /* number of hash buckets */
|
||||
u32 elem_size; /* size of each element in bytes */
|
||||
u32 hashrnd;
|
||||
struct lock_class_key lockdep_key;
|
||||
int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT];
|
||||
};
|
||||
|
||||
/* each htab element is struct htab_elem + key + value */
|
||||
@@ -138,33 +143,53 @@ static void htab_init_buckets(struct bpf_htab *htab)
|
||||
|
||||
for (i = 0; i < htab->n_buckets; i++) {
|
||||
INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
|
||||
if (htab_use_raw_lock(htab))
|
||||
if (htab_use_raw_lock(htab)) {
|
||||
raw_spin_lock_init(&htab->buckets[i].raw_lock);
|
||||
else
|
||||
lockdep_set_class(&htab->buckets[i].raw_lock,
|
||||
&htab->lockdep_key);
|
||||
} else {
|
||||
spin_lock_init(&htab->buckets[i].lock);
|
||||
lockdep_set_class(&htab->buckets[i].lock,
|
||||
&htab->lockdep_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
|
||||
struct bucket *b)
|
||||
static inline int htab_lock_bucket(const struct bpf_htab *htab,
|
||||
struct bucket *b, u32 hash,
|
||||
unsigned long *pflags)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
hash = hash & HASHTAB_MAP_LOCK_MASK;
|
||||
|
||||
migrate_disable();
|
||||
if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
|
||||
__this_cpu_dec(*(htab->map_locked[hash]));
|
||||
migrate_enable();
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (htab_use_raw_lock(htab))
|
||||
raw_spin_lock_irqsave(&b->raw_lock, flags);
|
||||
else
|
||||
spin_lock_irqsave(&b->lock, flags);
|
||||
return flags;
|
||||
*pflags = flags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void htab_unlock_bucket(const struct bpf_htab *htab,
|
||||
struct bucket *b,
|
||||
struct bucket *b, u32 hash,
|
||||
unsigned long flags)
|
||||
{
|
||||
hash = hash & HASHTAB_MAP_LOCK_MASK;
|
||||
if (htab_use_raw_lock(htab))
|
||||
raw_spin_unlock_irqrestore(&b->raw_lock, flags);
|
||||
else
|
||||
spin_unlock_irqrestore(&b->lock, flags);
|
||||
__this_cpu_dec(*(htab->map_locked[hash]));
|
||||
migrate_enable();
|
||||
}
|
||||
|
||||
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
|
||||
@@ -390,17 +415,11 @@ static int htab_map_alloc_check(union bpf_attr *attr)
|
||||
attr->value_size == 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->key_size > MAX_BPF_STACK)
|
||||
/* eBPF programs initialize keys on stack, so they cannot be
|
||||
* larger than max stack size
|
||||
*/
|
||||
return -E2BIG;
|
||||
|
||||
if (attr->value_size >= KMALLOC_MAX_SIZE -
|
||||
MAX_BPF_STACK - sizeof(struct htab_elem))
|
||||
/* if value_size is bigger, the user space won't be able to
|
||||
* access the elements via bpf syscall. This check also makes
|
||||
* sure that the elem_size doesn't overflow and it's
|
||||
if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
|
||||
sizeof(struct htab_elem))
|
||||
/* if key_size + value_size is bigger, the user space won't be
|
||||
* able to access the elements via bpf syscall. This check
|
||||
* also makes sure that the elem_size doesn't overflow and it's
|
||||
* kmalloc-able later in htab_map_update_elem()
|
||||
*/
|
||||
return -E2BIG;
|
||||
@@ -422,13 +441,15 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
|
||||
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
|
||||
struct bpf_htab *htab;
|
||||
int err, i;
|
||||
u64 cost;
|
||||
int err;
|
||||
|
||||
htab = kzalloc(sizeof(*htab), GFP_USER);
|
||||
if (!htab)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
lockdep_register_key(&htab->lockdep_key);
|
||||
|
||||
bpf_map_init_from_attr(&htab->map, attr);
|
||||
|
||||
if (percpu_lru) {
|
||||
@@ -480,6 +501,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
if (!htab->buckets)
|
||||
goto free_charge;
|
||||
|
||||
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
|
||||
htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int),
|
||||
sizeof(int), GFP_USER);
|
||||
if (!htab->map_locked[i])
|
||||
goto free_map_locked;
|
||||
}
|
||||
|
||||
if (htab->map.map_flags & BPF_F_ZERO_SEED)
|
||||
htab->hashrnd = 0;
|
||||
else
|
||||
@@ -490,7 +518,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
if (prealloc) {
|
||||
err = prealloc_init(htab);
|
||||
if (err)
|
||||
goto free_buckets;
|
||||
goto free_map_locked;
|
||||
|
||||
if (!percpu && !lru) {
|
||||
/* lru itself can remove the least used element, so
|
||||
@@ -506,11 +534,14 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
|
||||
free_prealloc:
|
||||
prealloc_destroy(htab);
|
||||
free_buckets:
|
||||
free_map_locked:
|
||||
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
|
||||
free_percpu(htab->map_locked[i]);
|
||||
bpf_map_area_free(htab->buckets);
|
||||
free_charge:
|
||||
bpf_map_charge_finish(&htab->map.memory);
|
||||
free_htab:
|
||||
lockdep_unregister_key(&htab->lockdep_key);
|
||||
kfree(htab);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
@@ -687,12 +718,15 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
|
||||
struct hlist_nulls_node *n;
|
||||
unsigned long flags;
|
||||
struct bucket *b;
|
||||
int ret;
|
||||
|
||||
tgt_l = container_of(node, struct htab_elem, lru_node);
|
||||
b = __select_bucket(htab, tgt_l->hash);
|
||||
head = &b->head;
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags);
|
||||
if (ret)
|
||||
return false;
|
||||
|
||||
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
||||
if (l == tgt_l) {
|
||||
@@ -700,7 +734,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
|
||||
break;
|
||||
}
|
||||
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, tgt_l->hash, flags);
|
||||
|
||||
return l == tgt_l;
|
||||
}
|
||||
@@ -998,7 +1032,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
*/
|
||||
}
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
@@ -1039,7 +1075,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1077,7 +1113,9 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
return -ENOMEM;
|
||||
memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
@@ -1096,7 +1134,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
ret = 0;
|
||||
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
|
||||
if (ret)
|
||||
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
||||
@@ -1131,7 +1169,9 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
@@ -1154,7 +1194,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1194,7 +1234,9 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
@@ -1216,7 +1258,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
}
|
||||
ret = 0;
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
if (l_new)
|
||||
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
||||
return ret;
|
||||
@@ -1244,7 +1286,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
struct htab_elem *l;
|
||||
unsigned long flags;
|
||||
u32 hash, key_size;
|
||||
int ret = -ENOENT;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
@@ -1254,17 +1296,20 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
if (l) {
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
free_htab_elem(htab, l);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1276,7 +1321,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
||||
struct htab_elem *l;
|
||||
unsigned long flags;
|
||||
u32 hash, key_size;
|
||||
int ret = -ENOENT;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
|
||||
|
||||
@@ -1286,16 +1331,18 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
if (l) {
|
||||
if (l)
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
ret = 0;
|
||||
}
|
||||
else
|
||||
ret = -ENOENT;
|
||||
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
if (l)
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
return ret;
|
||||
@@ -1321,6 +1368,7 @@ static void delete_all_elements(struct bpf_htab *htab)
|
||||
static void htab_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
int i;
|
||||
|
||||
/* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
|
||||
* bpf_free_used_maps() is called after bpf prog is no longer executing.
|
||||
@@ -1338,6 +1386,9 @@ static void htab_map_free(struct bpf_map *map)
|
||||
|
||||
free_percpu(htab->extra_elems);
|
||||
bpf_map_area_free(htab->buckets);
|
||||
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
|
||||
free_percpu(htab->map_locked[i]);
|
||||
lockdep_unregister_key(&htab->lockdep_key);
|
||||
kfree(htab);
|
||||
}
|
||||
|
||||
@@ -1441,8 +1492,11 @@ again_nocopy:
|
||||
b = &htab->buckets[batch];
|
||||
head = &b->head;
|
||||
/* do not grab the lock unless need it (bucket_cnt > 0). */
|
||||
if (locked)
|
||||
flags = htab_lock_bucket(htab, b);
|
||||
if (locked) {
|
||||
ret = htab_lock_bucket(htab, b, batch, &flags);
|
||||
if (ret)
|
||||
goto next_batch;
|
||||
}
|
||||
|
||||
bucket_cnt = 0;
|
||||
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
|
||||
@@ -1459,7 +1513,7 @@ again_nocopy:
|
||||
/* Note that since bucket_cnt > 0 here, it is implicit
|
||||
* that the locked was grabbed, so release it.
|
||||
*/
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, batch, flags);
|
||||
rcu_read_unlock();
|
||||
bpf_enable_instrumentation();
|
||||
goto after_loop;
|
||||
@@ -1470,7 +1524,7 @@ again_nocopy:
|
||||
/* Note that since bucket_cnt > 0 here, it is implicit
|
||||
* that the locked was grabbed, so release it.
|
||||
*/
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, batch, flags);
|
||||
rcu_read_unlock();
|
||||
bpf_enable_instrumentation();
|
||||
kvfree(keys);
|
||||
@@ -1523,7 +1577,7 @@ again_nocopy:
|
||||
dst_val += value_size;
|
||||
}
|
||||
|
||||
htab_unlock_bucket(htab, b, flags);
|
||||
htab_unlock_bucket(htab, b, batch, flags);
|
||||
locked = false;
|
||||
|
||||
while (node_to_free) {
|
||||
|
||||
@@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
|
||||
map->map_type != BPF_MAP_TYPE_ARRAY &&
|
||||
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
|
||||
map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
|
||||
map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
|
||||
return -ENOTSUPP;
|
||||
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
|
||||
map->value_size) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user