You've already forked linux-rockchip
mirror of
https://github.com/armbian/linux-rockchip.git
synced 2026-01-06 11:08:10 -08:00
perf stat: Introduce 'bperf' to share hardware PMCs with BPF
The perf tool uses performance monitoring counters (PMCs) to monitor
system performance. The PMCs are limited hardware resources. For
example, Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu.
Modern data center systems use these PMCs in many different ways: system
level monitoring, (maybe nested) container level monitoring, per process
monitoring, profiling (in sample mode), etc. In some cases, there are
more active perf_events than available hardware PMCs. To allow all
perf_events to have a chance to run, it is necessary to do expensive
time multiplexing of events.
On the other hand, many monitoring tools count the common metrics
(cycles, instructions). It is a waste to have multiple tools create
multiple perf_events of "cycles" and occupy multiple PMCs.
bperf tries to reduce such wastes by allowing multiple perf_events of
"cycles" or "instructions" (at different scopes) to share PMUs. Instead
of having each perf-stat session to read its own perf_events, bperf uses
BPF programs to read the perf_events and aggregate readings to BPF maps.
Then, the perf-stat session(s) reads the values from these BPF maps.
Please refer to the comment before the definition of bperf_ops for the
description of bperf architecture.
bperf is off by default. To enable it, pass --bpf-counters option to
perf-stat. bperf uses a BPF hashmap to share information about BPF
programs and maps used by bperf. This map is pinned to bpffs. The
default path is /sys/fs/bpf/perf_attr_map. The user could change the
path with option --bpf-attr-map.
Committer testing:
# dmesg|grep "Performance Events" -A5
[ 0.225277] Performance Events: Fam17h+ core perfctr, AMD PMU driver.
[ 0.225280] ... version: 0
[ 0.225280] ... bit width: 48
[ 0.225281] ... generic registers: 6
[ 0.225281] ... value mask: 0000ffffffffffff
[ 0.225281] ... max period: 00007fffffffffff
#
# for a in $(seq 6) ; do perf stat -a -e cycles,instructions sleep 100000 & done
[1] 2436231
[2] 2436232
[3] 2436233
[4] 2436234
[5] 2436235
[6] 2436236
# perf stat -a -e cycles,instructions sleep 0.1
Performance counter stats for 'system wide':
310,326,987 cycles (41.87%)
236,143,290 instructions # 0.76 insn per cycle (41.87%)
0.100800885 seconds time elapsed
#
We can see that the counters were enabled for this workload 41.87% of
the time.
Now with --bpf-counters:
# for a in $(seq 32) ; do perf stat --bpf-counters -a -e cycles,instructions sleep 100000 & done
[1] 2436514
[2] 2436515
[3] 2436516
[4] 2436517
[5] 2436518
[6] 2436519
[7] 2436520
[8] 2436521
[9] 2436522
[10] 2436523
[11] 2436524
[12] 2436525
[13] 2436526
[14] 2436527
[15] 2436528
[16] 2436529
[17] 2436530
[18] 2436531
[19] 2436532
[20] 2436533
[21] 2436534
[22] 2436535
[23] 2436536
[24] 2436537
[25] 2436538
[26] 2436539
[27] 2436540
[28] 2436541
[29] 2436542
[30] 2436543
[31] 2436544
[32] 2436545
#
# ls -la /sys/fs/bpf/perf_attr_map
-rw-------. 1 root root 0 Mar 23 14:53 /sys/fs/bpf/perf_attr_map
# bpftool map | grep bperf | wc -l
64
#
# bpftool map | tail
1265: percpu_array name accum_readings flags 0x0
key 4B value 24B max_entries 1 memlock 4096B
1266: hash name filter flags 0x0
key 4B value 4B max_entries 1 memlock 4096B
1267: array name bperf_fo.bss flags 0x400
key 4B value 8B max_entries 1 memlock 4096B
btf_id 996
pids perf(2436545)
1268: percpu_array name accum_readings flags 0x0
key 4B value 24B max_entries 1 memlock 4096B
1269: hash name filter flags 0x0
key 4B value 4B max_entries 1 memlock 4096B
1270: array name bperf_fo.bss flags 0x400
key 4B value 8B max_entries 1 memlock 4096B
btf_id 997
pids perf(2436541)
1285: array name pid_iter.rodata flags 0x480
key 4B value 4B max_entries 1 memlock 4096B
btf_id 1017 frozen
pids bpftool(2437504)
1286: array flags 0x0
key 4B value 32B max_entries 1 memlock 4096B
#
# bpftool map dump id 1268 | tail
value (CPU 21):
8f f3 bc ca 00 00 00 00 80 fd 2a d1 4d 00 00 00
80 fd 2a d1 4d 00 00 00
value (CPU 22):
7e d5 64 4d 00 00 00 00 a4 8a 2e ee 4d 00 00 00
a4 8a 2e ee 4d 00 00 00
value (CPU 23):
a7 78 3e 06 01 00 00 00 b2 34 94 f6 4d 00 00 00
b2 34 94 f6 4d 00 00 00
Found 1 element
# bpftool map dump id 1268 | tail
value (CPU 21):
c6 8b d9 ca 00 00 00 00 20 c6 fc 83 4e 00 00 00
20 c6 fc 83 4e 00 00 00
value (CPU 22):
9c b4 d2 4d 00 00 00 00 3e 0c df 89 4e 00 00 00
3e 0c df 89 4e 00 00 00
value (CPU 23):
18 43 66 06 01 00 00 00 5b 69 ed 83 4e 00 00 00
5b 69 ed 83 4e 00 00 00
Found 1 element
# bpftool map dump id 1268 | tail
value (CPU 21):
f2 6e db ca 00 00 00 00 92 67 4c ba 4e 00 00 00
92 67 4c ba 4e 00 00 00
value (CPU 22):
dc 8e e1 4d 00 00 00 00 d9 32 7a c5 4e 00 00 00
d9 32 7a c5 4e 00 00 00
value (CPU 23):
bd 2b 73 06 01 00 00 00 7c 73 87 bf 4e 00 00 00
7c 73 87 bf 4e 00 00 00
Found 1 element
#
# perf stat --bpf-counters -a -e cycles,instructions sleep 0.1
Performance counter stats for 'system wide':
119,410,122 cycles
152,105,479 instructions # 1.27 insn per cycle
0.101395093 seconds time elapsed
#
See? We had the counters enabled all the time.
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: kernel-team@fb.com
Link: http://lore.kernel.org/lkml/20210316211837.910506-2-songliubraving@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
committed by
Arnaldo Carvalho de Melo
parent
4d39c89f0b
commit
7fac83aaf2
@@ -93,6 +93,17 @@ report::
|
||||
|
||||
1.102235068 seconds time elapsed
|
||||
|
||||
--bpf-counters::
|
||||
Use BPF programs to aggregate readings from perf_events. This
|
||||
allows multiple perf-stat sessions that are counting the same metric (cycles,
|
||||
instructions, etc.) to share hardware counters.
|
||||
|
||||
--bpf-attr-map::
|
||||
With option "--bpf-counters", different perf-stat sessions share
|
||||
information about shared BPF programs and maps via a pinned hashmap.
|
||||
Use "--bpf-attr-map" to specify the path of this pinned hashmap.
|
||||
The default path is /sys/fs/bpf/perf_attr_map.
|
||||
|
||||
ifdef::HAVE_LIBPFM[]
|
||||
--pfm-events events::
|
||||
Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
|
||||
|
||||
@@ -1007,6 +1007,7 @@ python-clean:
|
||||
SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
|
||||
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
|
||||
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
|
||||
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
|
||||
|
||||
ifdef BUILD_BPF_SKEL
|
||||
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
|
||||
|
||||
@@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
|
||||
}
|
||||
|
||||
evlist__for_each_cpu (evsel_list, i, cpu) {
|
||||
/*
|
||||
* bperf calls evsel__open_per_cpu() in bperf__load(), so
|
||||
* no need to call it again here.
|
||||
*/
|
||||
if (target.use_bpf)
|
||||
break;
|
||||
affinity__set(&affinity, cpu);
|
||||
|
||||
evlist__for_each_entry(evsel_list, counter) {
|
||||
@@ -1146,6 +1152,10 @@ static struct option stat_options[] = {
|
||||
#ifdef HAVE_BPF_SKEL
|
||||
OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
|
||||
"stat events on existing bpf program id"),
|
||||
OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
|
||||
"use bpf program to count events"),
|
||||
OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
|
||||
"path to perf_event_attr map"),
|
||||
#endif
|
||||
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
|
||||
"system-wide collection from all CPUs"),
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
14
tools/perf/util/bpf_skel/bperf.h
Normal file
14
tools/perf/util/bpf_skel/bperf.h
Normal file
@@ -0,0 +1,14 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
// Copyright (c) 2021 Facebook
|
||||
|
||||
#ifndef __BPERF_STAT_H
|
||||
#define __BPERF_STAT_H
|
||||
|
||||
typedef struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(struct bpf_perf_event_value));
|
||||
__uint(max_entries, 1);
|
||||
} reading_map;
|
||||
|
||||
#endif /* __BPERF_STAT_H */
|
||||
69
tools/perf/util/bpf_skel/bperf_follower.bpf.c
Normal file
69
tools/perf/util/bpf_skel/bperf_follower.bpf.c
Normal file
@@ -0,0 +1,69 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
// Copyright (c) 2021 Facebook
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bperf.h"
|
||||
#include "bperf_u.h"
|
||||
|
||||
reading_map diff_readings SEC(".maps");
|
||||
reading_map accum_readings SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(__u32));
|
||||
} filter SEC(".maps");
|
||||
|
||||
enum bperf_filter_type type = 0;
|
||||
int enabled = 0;
|
||||
|
||||
SEC("fexit/XXX")
|
||||
int BPF_PROG(fexit_XXX)
|
||||
{
|
||||
struct bpf_perf_event_value *diff_val, *accum_val;
|
||||
__u32 filter_key, zero = 0;
|
||||
__u32 *accum_key;
|
||||
|
||||
if (!enabled)
|
||||
return 0;
|
||||
|
||||
switch (type) {
|
||||
case BPERF_FILTER_GLOBAL:
|
||||
accum_key = &zero;
|
||||
goto do_add;
|
||||
case BPERF_FILTER_CPU:
|
||||
filter_key = bpf_get_smp_processor_id();
|
||||
break;
|
||||
case BPERF_FILTER_PID:
|
||||
filter_key = bpf_get_current_pid_tgid() & 0xffffffff;
|
||||
break;
|
||||
case BPERF_FILTER_TGID:
|
||||
filter_key = bpf_get_current_pid_tgid() >> 32;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
accum_key = bpf_map_lookup_elem(&filter, &filter_key);
|
||||
if (!accum_key)
|
||||
return 0;
|
||||
|
||||
do_add:
|
||||
diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
|
||||
if (!diff_val)
|
||||
return 0;
|
||||
|
||||
accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
|
||||
if (!accum_val)
|
||||
return 0;
|
||||
|
||||
accum_val->counter += diff_val->counter;
|
||||
accum_val->enabled += diff_val->enabled;
|
||||
accum_val->running += diff_val->running;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
||||
46
tools/perf/util/bpf_skel/bperf_leader.bpf.c
Normal file
46
tools/perf/util/bpf_skel/bperf_leader.bpf.c
Normal file
@@ -0,0 +1,46 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
// Copyright (c) 2021 Facebook
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bperf.h"
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(int));
|
||||
__uint(map_flags, BPF_F_PRESERVE_ELEMS);
|
||||
} events SEC(".maps");
|
||||
|
||||
reading_map prev_readings SEC(".maps");
|
||||
reading_map diff_readings SEC(".maps");
|
||||
|
||||
SEC("raw_tp/sched_switch")
|
||||
int BPF_PROG(on_switch)
|
||||
{
|
||||
struct bpf_perf_event_value val, *prev_val, *diff_val;
|
||||
__u32 key = bpf_get_smp_processor_id();
|
||||
__u32 zero = 0;
|
||||
long err;
|
||||
|
||||
prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
|
||||
if (!prev_val)
|
||||
return 0;
|
||||
|
||||
diff_val = bpf_map_lookup_elem(&diff_readings, &zero);
|
||||
if (!diff_val)
|
||||
return 0;
|
||||
|
||||
err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
|
||||
if (err)
|
||||
return 0;
|
||||
|
||||
diff_val->counter = val.counter - prev_val->counter;
|
||||
diff_val->enabled = val.enabled - prev_val->enabled;
|
||||
diff_val->running = val.running - prev_val->running;
|
||||
*prev_val = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
||||
14
tools/perf/util/bpf_skel/bperf_u.h
Normal file
14
tools/perf/util/bpf_skel/bperf_u.h
Normal file
@@ -0,0 +1,14 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
// Copyright (c) 2021 Facebook
|
||||
|
||||
#ifndef __BPERF_STAT_U_H
|
||||
#define __BPERF_STAT_U_H
|
||||
|
||||
enum bperf_filter_type {
|
||||
BPERF_FILTER_GLOBAL = 1,
|
||||
BPERF_FILTER_CPU,
|
||||
BPERF_FILTER_PID,
|
||||
BPERF_FILTER_TGID,
|
||||
};
|
||||
|
||||
#endif /* __BPERF_STAT_U_H */
|
||||
@@ -20,6 +20,8 @@ union perf_event;
|
||||
struct bpf_counter_ops;
|
||||
struct target;
|
||||
struct hashmap;
|
||||
struct bperf_leader_bpf;
|
||||
struct bperf_follower_bpf;
|
||||
|
||||
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
|
||||
|
||||
@@ -130,8 +132,24 @@ struct evsel {
|
||||
* See also evsel__has_callchain().
|
||||
*/
|
||||
__u64 synth_sample_type;
|
||||
struct list_head bpf_counter_list;
|
||||
|
||||
/*
|
||||
* bpf_counter_ops serves two use cases:
|
||||
* 1. perf-stat -b counting events used byBPF programs
|
||||
* 2. perf-stat --use-bpf use BPF programs to aggregate counts
|
||||
*/
|
||||
struct bpf_counter_ops *bpf_counter_ops;
|
||||
|
||||
/* for perf-stat -b */
|
||||
struct list_head bpf_counter_list;
|
||||
|
||||
/* for perf-stat --use-bpf */
|
||||
int bperf_leader_prog_fd;
|
||||
int bperf_leader_link_fd;
|
||||
union {
|
||||
struct bperf_leader_bpf *leader_skel;
|
||||
struct bperf_follower_bpf *follower_skel;
|
||||
};
|
||||
};
|
||||
|
||||
struct perf_missing_features {
|
||||
|
||||
@@ -16,6 +16,8 @@ struct target {
|
||||
bool uses_mmap;
|
||||
bool default_per_cpu;
|
||||
bool per_thread;
|
||||
bool use_bpf;
|
||||
const char *attr_map;
|
||||
};
|
||||
|
||||
enum target_errno {
|
||||
@@ -66,7 +68,7 @@ static inline bool target__has_cpu(struct target *target)
|
||||
|
||||
static inline bool target__has_bpf(struct target *target)
|
||||
{
|
||||
return target->bpf_str;
|
||||
return target->bpf_str || target->use_bpf;
|
||||
}
|
||||
|
||||
static inline bool target__none(struct target *target)
|
||||
|
||||
Reference in New Issue
Block a user