mirror of
https://github.com/ukui/kernel.git
synced 2026-03-09 10:07:04 -07:00
Merge tag 'perf-tools-for-v6.1-2-2022-10-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo:
- Use BPF CO-RE (Compile Once, Run Everywhere) to support old kernels
when using bperf (perf BPF based counters) with cgroups.
- Support HiSilicon PCIe Performance Monitoring Unit (PMU), that
monitors bandwidth, latency, bus utilization and buffer occupancy.
Documented in Documentation/admin-guide/perf/hisi-pcie-pmu.rst.
- User space tasks can migrate between CPUs, so when tracing selected
CPUs, system-wide sideband is still needed, fix it in the setup of
Intel PT on hybrid systems.
- Fix metricgroups title message in 'perf list', it should state that
the metrics groups are to be used with the '-M' option, not '-e'.
- Sync the msr-index.h copy with the kernel sources, adding support for
using "AMD64_TSC_RATIO" in filter expressions in 'perf trace' as well
as decoding it when printing the MSR tracepoint arguments.
- Fix program header size and alignment when generating a JIT ELF in
'perf inject'.
- Add multiple new Intel PT 'perf test' entries, including a jitdump
one.
- Fix the 'perf test' entries for 'perf stat' CSV and JSON output when
running on PowerPC due to an invalid topology number in that arch.
- Fix the 'perf test' for arm_coresight failures on the ARM Juno
system.
- Fix the 'perf test' attr entry for PERF_FORMAT_LOST, adding this
option to the or expression expected in the intercepted
perf_event_open() syscall.
- Add missing condition flags ('hs', 'lo', 'vc', 'vs') for arm64 in the
'perf annotate' asm parser.
- Fix 'perf mem record -C' option processing, it was being chopped up
when preparing the underlying 'perf record -e mem-events' and thus
being ignored, requiring using '-- -C CPUs' as a workaround.
- Improvements and tidy ups for 'perf test' shell infra.
- Fix Intel PT information printing segfault in uClibc, where a NULL
format was being passed to fprintf.
* tag 'perf-tools-for-v6.1-2-2022-10-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (23 commits)
tools arch x86: Sync the msr-index.h copy with the kernel sources
perf auxtrace arm64: Add support for parsing HiSilicon PCIe Trace packet
perf auxtrace arm64: Add support for HiSilicon PCIe Tune and Trace device driver
perf auxtrace arm: Refactor event list iteration in auxtrace_record__init()
perf tests stat+json_output: Include sanity check for topology
perf tests stat+csv_output: Include sanity check for topology
perf intel-pt: Fix system_wide dummy event for hybrid
perf intel-pt: Fix segfault in intel_pt_print_info() with uClibc
perf test: Fix attr tests for PERF_FORMAT_LOST
perf test: test_intel_pt.sh: Add 9 tests
perf inject: Fix GEN_ELF_TEXT_OFFSET for jit
perf test: test_intel_pt.sh: Add jitdump test
perf test: test_intel_pt.sh: Tidy some alignment
perf test: test_intel_pt.sh: Print a message when skipping kernel tracing
perf test: test_intel_pt.sh: Tidy some perf record options
perf test: test_intel_pt.sh: Fix return checking again
perf: Skip and warn on unknown format 'configN' attrs
perf list: Fix metricgroups title message
perf mem: Fix -C option behavior for perf mem record
perf annotate: Add missing condition flags for arm64
...
This commit is contained in:
@@ -155,6 +155,11 @@
|
||||
* Return Stack Buffer Predictions.
|
||||
*/
|
||||
|
||||
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
|
||||
* IA32_XAPIC_DISABLE_STATUS MSR
|
||||
* supported
|
||||
*/
|
||||
|
||||
#define MSR_IA32_FLUSH_CMD 0x0000010b
|
||||
#define L1D_FLUSH BIT(0) /*
|
||||
* Writeback and invalidate the
|
||||
@@ -585,6 +590,9 @@
|
||||
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
|
||||
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
|
||||
|
||||
/* AMD Last Branch Record MSRs */
|
||||
#define MSR_AMD64_LBR_SELECT 0xc000010e
|
||||
|
||||
/* Fam 17h MSRs */
|
||||
#define MSR_F17H_IRPERF 0xc00000e9
|
||||
|
||||
@@ -756,6 +764,8 @@
|
||||
#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
|
||||
#define MSR_AMD_SAMP_BR_FROM 0xc0010300
|
||||
|
||||
#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6)
|
||||
|
||||
#define MSR_IA32_MPERF 0x000000e7
|
||||
#define MSR_IA32_APERF 0x000000e8
|
||||
|
||||
@@ -1054,4 +1064,12 @@
|
||||
#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
|
||||
#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
|
||||
|
||||
/* x2APIC locked status */
|
||||
#define MSR_IA32_XAPIC_DISABLE_STATUS 0xBD
|
||||
#define LEGACY_XAPIC_DISABLED BIT(0) /*
|
||||
* x2APIC mode is locked and
|
||||
* disabling x2APIC will cause
|
||||
* a #GP
|
||||
*/
|
||||
|
||||
#endif /* _ASM_X86_MSR_INDEX_H */
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <sys/types.h> /* pid_t */
|
||||
|
||||
#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem))
|
||||
@@ -207,7 +206,7 @@ struct perf_record_range_cpu_map {
|
||||
__u16 end_cpu;
|
||||
};
|
||||
|
||||
struct __packed perf_record_cpu_map_data {
|
||||
struct perf_record_cpu_map_data {
|
||||
__u16 type;
|
||||
union {
|
||||
/* Used when type == PERF_CPU_MAP__CPUS. */
|
||||
@@ -219,7 +218,7 @@ struct __packed perf_record_cpu_map_data {
|
||||
/* Used when type == PERF_CPU_MAP__RANGE_CPUS. */
|
||||
struct perf_record_range_cpu_map range_cpu_data;
|
||||
};
|
||||
};
|
||||
} __attribute__((packed));
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
|
||||
@@ -4,9 +4,11 @@
|
||||
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
|
||||
*/
|
||||
|
||||
#include <dirent.h>
|
||||
#include <stdbool.h>
|
||||
#include <linux/coresight-pmu.h>
|
||||
#include <linux/zalloc.h>
|
||||
#include <api/fs/fs.h>
|
||||
|
||||
#include "../../../util/auxtrace.h"
|
||||
#include "../../../util/debug.h"
|
||||
@@ -14,6 +16,7 @@
|
||||
#include "../../../util/pmu.h"
|
||||
#include "cs-etm.h"
|
||||
#include "arm-spe.h"
|
||||
#include "hisi-ptt.h"
|
||||
|
||||
static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
|
||||
{
|
||||
@@ -50,42 +53,114 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
|
||||
return arm_spe_pmus;
|
||||
}
|
||||
|
||||
static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err)
|
||||
{
|
||||
const char *sysfs = sysfs__mountpoint();
|
||||
struct perf_pmu **hisi_ptt_pmus = NULL;
|
||||
struct dirent *dent;
|
||||
char path[PATH_MAX];
|
||||
DIR *dir = NULL;
|
||||
int idx = 0;
|
||||
|
||||
snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
|
||||
dir = opendir(path);
|
||||
if (!dir) {
|
||||
pr_err("can't read directory '%s'\n", EVENT_SOURCE_DEVICE_PATH);
|
||||
*err = -EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
while ((dent = readdir(dir))) {
|
||||
if (strstr(dent->d_name, HISI_PTT_PMU_NAME))
|
||||
(*nr_ptts)++;
|
||||
}
|
||||
|
||||
if (!(*nr_ptts))
|
||||
goto out;
|
||||
|
||||
hisi_ptt_pmus = zalloc(sizeof(struct perf_pmu *) * (*nr_ptts));
|
||||
if (!hisi_ptt_pmus) {
|
||||
pr_err("hisi_ptt alloc failed\n");
|
||||
*err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rewinddir(dir);
|
||||
while ((dent = readdir(dir))) {
|
||||
if (strstr(dent->d_name, HISI_PTT_PMU_NAME) && idx < *nr_ptts) {
|
||||
hisi_ptt_pmus[idx] = perf_pmu__find(dent->d_name);
|
||||
if (hisi_ptt_pmus[idx])
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
closedir(dir);
|
||||
return hisi_ptt_pmus;
|
||||
}
|
||||
|
||||
static struct perf_pmu *find_pmu_for_event(struct perf_pmu **pmus,
|
||||
int pmu_nr, struct evsel *evsel)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!pmus)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < pmu_nr; i++) {
|
||||
if (evsel->core.attr.type == pmus[i]->type)
|
||||
return pmus[i];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct auxtrace_record
|
||||
*auxtrace_record__init(struct evlist *evlist, int *err)
|
||||
{
|
||||
struct perf_pmu *cs_etm_pmu;
|
||||
struct evsel *evsel;
|
||||
bool found_etm = false;
|
||||
struct perf_pmu *found_spe = NULL;
|
||||
struct perf_pmu *cs_etm_pmu = NULL;
|
||||
struct perf_pmu **arm_spe_pmus = NULL;
|
||||
struct perf_pmu **hisi_ptt_pmus = NULL;
|
||||
struct evsel *evsel;
|
||||
struct perf_pmu *found_etm = NULL;
|
||||
struct perf_pmu *found_spe = NULL;
|
||||
struct perf_pmu *found_ptt = NULL;
|
||||
int auxtrace_event_cnt = 0;
|
||||
int nr_spes = 0;
|
||||
int i = 0;
|
||||
int nr_ptts = 0;
|
||||
|
||||
if (!evlist)
|
||||
return NULL;
|
||||
|
||||
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
|
||||
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
|
||||
hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err);
|
||||
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
if (cs_etm_pmu &&
|
||||
evsel->core.attr.type == cs_etm_pmu->type)
|
||||
found_etm = true;
|
||||
if (cs_etm_pmu && !found_etm)
|
||||
found_etm = find_pmu_for_event(&cs_etm_pmu, 1, evsel);
|
||||
|
||||
if (!nr_spes || found_spe)
|
||||
continue;
|
||||
if (arm_spe_pmus && !found_spe)
|
||||
found_spe = find_pmu_for_event(arm_spe_pmus, nr_spes, evsel);
|
||||
|
||||
for (i = 0; i < nr_spes; i++) {
|
||||
if (evsel->core.attr.type == arm_spe_pmus[i]->type) {
|
||||
found_spe = arm_spe_pmus[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hisi_ptt_pmus && !found_ptt)
|
||||
found_ptt = find_pmu_for_event(hisi_ptt_pmus, nr_ptts, evsel);
|
||||
}
|
||||
free(arm_spe_pmus);
|
||||
|
||||
if (found_etm && found_spe) {
|
||||
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
|
||||
free(arm_spe_pmus);
|
||||
free(hisi_ptt_pmus);
|
||||
|
||||
if (found_etm)
|
||||
auxtrace_event_cnt++;
|
||||
|
||||
if (found_spe)
|
||||
auxtrace_event_cnt++;
|
||||
|
||||
if (found_ptt)
|
||||
auxtrace_event_cnt++;
|
||||
|
||||
if (auxtrace_event_cnt > 1) {
|
||||
pr_err("Concurrent AUX trace operation not currently supported\n");
|
||||
*err = -EOPNOTSUPP;
|
||||
return NULL;
|
||||
}
|
||||
@@ -96,6 +171,9 @@ struct auxtrace_record
|
||||
#if defined(__aarch64__)
|
||||
if (found_spe)
|
||||
return arm_spe_recording_init(err, found_spe);
|
||||
|
||||
if (found_ptt)
|
||||
return hisi_ptt_recording_init(err, found_ptt);
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "arm-spe.h"
|
||||
#include "hisi-ptt.h"
|
||||
#include "../../../util/pmu.h"
|
||||
|
||||
struct perf_event_attr
|
||||
@@ -22,6 +23,8 @@ struct perf_event_attr
|
||||
#if defined(__aarch64__)
|
||||
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
|
||||
return arm_spe_pmu_default_config(pmu);
|
||||
} else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
|
||||
pmu->selectable = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
|
||||
if (err)
|
||||
goto out_free_arm;
|
||||
/* b, b.cond, br, cbz/cbnz, tbz/tbnz */
|
||||
err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl)?n?z?$",
|
||||
err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|hs|le|lo|ls|lt|mi|ne|pl|vc|vs)?n?z?$",
|
||||
REG_EXTENDED);
|
||||
if (err)
|
||||
goto out_free_call;
|
||||
|
||||
@@ -11,4 +11,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
|
||||
perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
|
||||
../../arm/util/auxtrace.o \
|
||||
../../arm/util/cs-etm.o \
|
||||
arm-spe.o mem-events.o
|
||||
arm-spe.o mem-events.o hisi-ptt.o
|
||||
|
||||
188
tools/perf/arch/arm64/util/hisi-ptt.c
Normal file
188
tools/perf/arch/arm64/util/hisi-ptt.c
Normal file
@@ -0,0 +1,188 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* HiSilicon PCIe Trace and Tuning (PTT) support
|
||||
* Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/zalloc.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <internal/lib.h> // page_size
|
||||
#include "../../../util/auxtrace.h"
|
||||
#include "../../../util/cpumap.h"
|
||||
#include "../../../util/debug.h"
|
||||
#include "../../../util/event.h"
|
||||
#include "../../../util/evlist.h"
|
||||
#include "../../../util/evsel.h"
|
||||
#include "../../../util/hisi-ptt.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/record.h"
|
||||
#include "../../../util/session.h"
|
||||
#include "../../../util/tsc.h"
|
||||
|
||||
#define KiB(x) ((x) * 1024)
|
||||
#define MiB(x) ((x) * 1024 * 1024)
|
||||
|
||||
struct hisi_ptt_recording {
|
||||
struct auxtrace_record itr;
|
||||
struct perf_pmu *hisi_ptt_pmu;
|
||||
struct evlist *evlist;
|
||||
};
|
||||
|
||||
static size_t
|
||||
hisi_ptt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
|
||||
struct evlist *evlist __maybe_unused)
|
||||
{
|
||||
return HISI_PTT_AUXTRACE_PRIV_SIZE;
|
||||
}
|
||||
|
||||
static int hisi_ptt_info_fill(struct auxtrace_record *itr,
|
||||
struct perf_session *session,
|
||||
struct perf_record_auxtrace_info *auxtrace_info,
|
||||
size_t priv_size)
|
||||
{
|
||||
struct hisi_ptt_recording *pttr =
|
||||
container_of(itr, struct hisi_ptt_recording, itr);
|
||||
struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu;
|
||||
|
||||
if (priv_size != HISI_PTT_AUXTRACE_PRIV_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!session->evlist->core.nr_mmaps)
|
||||
return -EINVAL;
|
||||
|
||||
auxtrace_info->type = PERF_AUXTRACE_HISI_PTT;
|
||||
auxtrace_info->priv[0] = hisi_ptt_pmu->type;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hisi_ptt_set_auxtrace_mmap_page(struct record_opts *opts)
|
||||
{
|
||||
bool privileged = perf_event_paranoid_check(-1);
|
||||
|
||||
if (!opts->full_auxtrace)
|
||||
return 0;
|
||||
|
||||
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
|
||||
if (privileged) {
|
||||
opts->auxtrace_mmap_pages = MiB(16) / page_size;
|
||||
} else {
|
||||
opts->auxtrace_mmap_pages = KiB(128) / page_size;
|
||||
if (opts->mmap_pages == UINT_MAX)
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate auxtrace_mmap_pages */
|
||||
if (opts->auxtrace_mmap_pages) {
|
||||
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
|
||||
size_t min_sz = KiB(8);
|
||||
|
||||
if (sz < min_sz || !is_power_of_2(sz)) {
|
||||
pr_err("Invalid mmap size for HISI PTT: must be at least %zuKiB and a power of 2\n",
|
||||
min_sz / 1024);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hisi_ptt_recording_options(struct auxtrace_record *itr,
|
||||
struct evlist *evlist,
|
||||
struct record_opts *opts)
|
||||
{
|
||||
struct hisi_ptt_recording *pttr =
|
||||
container_of(itr, struct hisi_ptt_recording, itr);
|
||||
struct perf_pmu *hisi_ptt_pmu = pttr->hisi_ptt_pmu;
|
||||
struct evsel *evsel, *hisi_ptt_evsel = NULL;
|
||||
struct evsel *tracking_evsel;
|
||||
int err;
|
||||
|
||||
pttr->evlist = evlist;
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
if (evsel->core.attr.type == hisi_ptt_pmu->type) {
|
||||
if (hisi_ptt_evsel) {
|
||||
pr_err("There may be only one " HISI_PTT_PMU_NAME "x event\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
evsel->core.attr.freq = 0;
|
||||
evsel->core.attr.sample_period = 1;
|
||||
evsel->needs_auxtrace_mmap = true;
|
||||
hisi_ptt_evsel = evsel;
|
||||
opts->full_auxtrace = true;
|
||||
}
|
||||
}
|
||||
|
||||
err = hisi_ptt_set_auxtrace_mmap_page(opts);
|
||||
if (err)
|
||||
return err;
|
||||
/*
|
||||
* To obtain the auxtrace buffer file descriptor, the auxtrace event
|
||||
* must come first.
|
||||
*/
|
||||
evlist__to_front(evlist, hisi_ptt_evsel);
|
||||
evsel__set_sample_bit(hisi_ptt_evsel, TIME);
|
||||
|
||||
/* Add dummy event to keep tracking */
|
||||
err = parse_event(evlist, "dummy:u");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
tracking_evsel = evlist__last(evlist);
|
||||
evlist__set_tracking_event(evlist, tracking_evsel);
|
||||
|
||||
tracking_evsel->core.attr.freq = 0;
|
||||
tracking_evsel->core.attr.sample_period = 1;
|
||||
evsel__set_sample_bit(tracking_evsel, TIME);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 hisi_ptt_reference(struct auxtrace_record *itr __maybe_unused)
|
||||
{
|
||||
return rdtsc();
|
||||
}
|
||||
|
||||
static void hisi_ptt_recording_free(struct auxtrace_record *itr)
|
||||
{
|
||||
struct hisi_ptt_recording *pttr =
|
||||
container_of(itr, struct hisi_ptt_recording, itr);
|
||||
|
||||
free(pttr);
|
||||
}
|
||||
|
||||
struct auxtrace_record *hisi_ptt_recording_init(int *err,
|
||||
struct perf_pmu *hisi_ptt_pmu)
|
||||
{
|
||||
struct hisi_ptt_recording *pttr;
|
||||
|
||||
if (!hisi_ptt_pmu) {
|
||||
*err = -ENODEV;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pttr = zalloc(sizeof(*pttr));
|
||||
if (!pttr) {
|
||||
*err = -ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pttr->hisi_ptt_pmu = hisi_ptt_pmu;
|
||||
pttr->itr.pmu = hisi_ptt_pmu;
|
||||
pttr->itr.recording_options = hisi_ptt_recording_options;
|
||||
pttr->itr.info_priv_size = hisi_ptt_info_priv_size;
|
||||
pttr->itr.info_fill = hisi_ptt_info_fill;
|
||||
pttr->itr.free = hisi_ptt_recording_free;
|
||||
pttr->itr.reference = hisi_ptt_reference;
|
||||
pttr->itr.read_finish = auxtrace_record__read_finish;
|
||||
pttr->itr.alignment = 0;
|
||||
|
||||
*err = 0;
|
||||
return &pttr->itr;
|
||||
}
|
||||
@@ -866,7 +866,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
|
||||
* User space tasks can migrate between CPUs, so when tracing
|
||||
* selected CPUs, sideband for all CPUs is still needed.
|
||||
*/
|
||||
need_system_wide_tracking = evlist->core.has_user_cpus &&
|
||||
need_system_wide_tracking = opts->target.cpu_list &&
|
||||
!intel_pt_evsel->core.attr.exclude_user;
|
||||
|
||||
tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking);
|
||||
|
||||
@@ -60,7 +60,7 @@ int cmd_list(int argc, const char **argv)
|
||||
setup_pager();
|
||||
|
||||
if (!raw_dump && pager_in_use())
|
||||
printf("\nList of pre-defined events (to be used in -e):\n\n");
|
||||
printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
|
||||
|
||||
if (hybrid_type) {
|
||||
pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type);
|
||||
|
||||
@@ -97,6 +97,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||
else
|
||||
rec_argc = argc + 9 * perf_pmu__hybrid_pmu_num();
|
||||
|
||||
if (mem->cpu_list)
|
||||
rec_argc += 2;
|
||||
|
||||
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
||||
if (!rec_argv)
|
||||
return -1;
|
||||
@@ -159,6 +162,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||
if (all_kernel)
|
||||
rec_argv[i++] = "--all-kernel";
|
||||
|
||||
if (mem->cpu_list) {
|
||||
rec_argv[i++] = "-C";
|
||||
rec_argv[i++] = mem->cpu_list;
|
||||
}
|
||||
|
||||
for (j = 0; j < argc; j++, i++)
|
||||
rec_argv[i] = argv[j];
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ size=128
|
||||
config=0
|
||||
sample_period=*
|
||||
sample_type=263
|
||||
read_format=0|4
|
||||
read_format=0|4|20
|
||||
disabled=1
|
||||
inherit=1
|
||||
pinned=0
|
||||
|
||||
@@ -11,7 +11,7 @@ size=128
|
||||
config=9
|
||||
sample_period=4000
|
||||
sample_type=455
|
||||
read_format=4
|
||||
read_format=4|20
|
||||
# Event will be enabled right away.
|
||||
disabled=0
|
||||
inherit=1
|
||||
|
||||
@@ -7,14 +7,14 @@ ret = 1
|
||||
fd=1
|
||||
group_fd=-1
|
||||
sample_type=327
|
||||
read_format=4
|
||||
read_format=4|20
|
||||
|
||||
[event-2:base-record]
|
||||
fd=2
|
||||
group_fd=1
|
||||
config=1
|
||||
sample_type=327
|
||||
read_format=4
|
||||
read_format=4|20
|
||||
mmap=0
|
||||
comm=0
|
||||
task=0
|
||||
|
||||
@@ -7,7 +7,7 @@ ret = 1
|
||||
fd=1
|
||||
group_fd=-1
|
||||
sample_type=343
|
||||
read_format=12
|
||||
read_format=12|28
|
||||
inherit=0
|
||||
|
||||
[event-2:base-record]
|
||||
@@ -21,8 +21,8 @@ config=3
|
||||
# default | PERF_SAMPLE_READ
|
||||
sample_type=343
|
||||
|
||||
# PERF_FORMAT_ID | PERF_FORMAT_GROUP
|
||||
read_format=12
|
||||
# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST
|
||||
read_format=12|28
|
||||
task=0
|
||||
mmap=0
|
||||
comm=0
|
||||
|
||||
@@ -7,7 +7,7 @@ ret = 1
|
||||
fd=1
|
||||
group_fd=-1
|
||||
sample_type=327
|
||||
read_format=4
|
||||
read_format=4|20
|
||||
|
||||
[event-2:base-record]
|
||||
fd=2
|
||||
@@ -15,7 +15,7 @@ group_fd=1
|
||||
type=0
|
||||
config=1
|
||||
sample_type=327
|
||||
read_format=4
|
||||
read_format=4|20
|
||||
mmap=0
|
||||
comm=0
|
||||
task=0
|
||||
|
||||
@@ -9,7 +9,7 @@ group_fd=-1
|
||||
config=0|1
|
||||
sample_period=1234000
|
||||
sample_type=87
|
||||
read_format=12
|
||||
read_format=12|28
|
||||
inherit=0
|
||||
freq=0
|
||||
|
||||
@@ -19,7 +19,7 @@ group_fd=1
|
||||
config=0|1
|
||||
sample_period=6789000
|
||||
sample_type=87
|
||||
read_format=12
|
||||
read_format=12|28
|
||||
disabled=0
|
||||
inherit=0
|
||||
mmap=0
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
set -e
|
||||
|
||||
skip_test=0
|
||||
|
||||
function commachecker()
|
||||
{
|
||||
local -i cnt=0
|
||||
@@ -156,14 +158,47 @@ check_per_socket()
|
||||
echo "[Success]"
|
||||
}
|
||||
|
||||
# The perf stat options for per-socket, per-core, per-die
|
||||
# and -A ( no_aggr mode ) uses the info fetched from this
|
||||
# directory: "/sys/devices/system/cpu/cpu*/topology". For
|
||||
# example, socket value is fetched from "physical_package_id"
|
||||
# file in topology directory.
|
||||
# Reference: cpu__get_topology_int in util/cpumap.c
|
||||
# If the platform doesn't expose topology information, values
|
||||
# will be set to -1. For example, incase of pSeries platform
|
||||
# of powerpc, value for "physical_package_id" is restricted
|
||||
# and set to -1. Check here validates the socket-id read from
|
||||
# topology file before proceeding further
|
||||
|
||||
FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
|
||||
FILE_NAME="physical_package_id"
|
||||
|
||||
check_for_topology()
|
||||
{
|
||||
if ! ParanoidAndNotRoot 0
|
||||
then
|
||||
socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
|
||||
[ -z $socket_file ] && return 0
|
||||
socket_id=`cat $socket_file`
|
||||
[ $socket_id == -1 ] && skip_test=1
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
check_for_topology
|
||||
check_no_args
|
||||
check_system_wide
|
||||
check_system_wide_no_aggr
|
||||
check_interval
|
||||
check_event
|
||||
check_per_core
|
||||
check_per_thread
|
||||
check_per_die
|
||||
check_per_node
|
||||
check_per_socket
|
||||
if [ $skip_test -ne 1 ]
|
||||
then
|
||||
check_system_wide_no_aggr
|
||||
check_per_core
|
||||
check_per_die
|
||||
check_per_socket
|
||||
else
|
||||
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
set -e
|
||||
|
||||
skip_test=0
|
||||
|
||||
pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py
|
||||
if [ "x$PYTHON" == "x" ]
|
||||
then
|
||||
@@ -134,14 +136,47 @@ check_per_socket()
|
||||
echo "[Success]"
|
||||
}
|
||||
|
||||
# The perf stat options for per-socket, per-core, per-die
|
||||
# and -A ( no_aggr mode ) uses the info fetched from this
|
||||
# directory: "/sys/devices/system/cpu/cpu*/topology". For
|
||||
# example, socket value is fetched from "physical_package_id"
|
||||
# file in topology directory.
|
||||
# Reference: cpu__get_topology_int in util/cpumap.c
|
||||
# If the platform doesn't expose topology information, values
|
||||
# will be set to -1. For example, incase of pSeries platform
|
||||
# of powerpc, value for "physical_package_id" is restricted
|
||||
# and set to -1. Check here validates the socket-id read from
|
||||
# topology file before proceeding further
|
||||
|
||||
FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
|
||||
FILE_NAME="physical_package_id"
|
||||
|
||||
check_for_topology()
|
||||
{
|
||||
if ! ParanoidAndNotRoot 0
|
||||
then
|
||||
socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
|
||||
[ -z $socket_file ] && return 0
|
||||
socket_id=`cat $socket_file`
|
||||
[ $socket_id == -1 ] && skip_test=1
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
check_for_topology
|
||||
check_no_args
|
||||
check_system_wide
|
||||
check_system_wide_no_aggr
|
||||
check_interval
|
||||
check_event
|
||||
check_per_core
|
||||
check_per_thread
|
||||
check_per_die
|
||||
check_per_node
|
||||
check_per_socket
|
||||
if [ $skip_test -ne 1 ]
|
||||
then
|
||||
check_system_wide_no_aggr
|
||||
check_per_core
|
||||
check_per_die
|
||||
check_per_socket
|
||||
else
|
||||
echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -70,7 +70,7 @@ perf_report_instruction_samples() {
|
||||
# 68.12% touch libc-2.27.so [.] _dl_addr
|
||||
# 5.80% touch libc-2.27.so [.] getenv
|
||||
# 4.35% touch ld-2.27.so [.] _dl_fixup
|
||||
perf report --itrace=i1000i --stdio -i ${perfdata} 2>&1 | \
|
||||
perf report --itrace=i20i --stdio -i ${perfdata} 2>&1 | \
|
||||
egrep " +[0-9]+\.[0-9]+% +$1" > /dev/null 2>&1
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@ outfile="${temp_dir}/test-out.txt"
|
||||
errfile="${temp_dir}/test-err.txt"
|
||||
workload="${temp_dir}/workload"
|
||||
awkscript="${temp_dir}/awkscript"
|
||||
jitdump_workload="${temp_dir}/jitdump_workload"
|
||||
maxbrstack="${temp_dir}/maxbrstack.py"
|
||||
|
||||
cleanup()
|
||||
{
|
||||
@@ -42,6 +44,21 @@ trap_cleanup()
|
||||
|
||||
trap trap_cleanup EXIT TERM INT
|
||||
|
||||
# perf record for testing without decoding
|
||||
perf_record_no_decode()
|
||||
{
|
||||
# Options to speed up recording: no post-processing, no build-id cache update,
|
||||
# and no BPF events.
|
||||
perf record -B -N --no-bpf-event "$@"
|
||||
}
|
||||
|
||||
# perf record for testing should not need BPF events
|
||||
perf_record_no_bpf()
|
||||
{
|
||||
# Options for no BPF events
|
||||
perf record --no-bpf-event "$@"
|
||||
}
|
||||
|
||||
have_workload=false
|
||||
cat << _end_of_file_ | /usr/bin/cc -o "${workload}" -xc - -pthread && have_workload=true
|
||||
#include <time.h>
|
||||
@@ -76,7 +93,7 @@ _end_of_file_
|
||||
can_cpu_wide()
|
||||
{
|
||||
echo "Checking for CPU-wide recording on CPU $1"
|
||||
if ! perf record -o "${tmpfile}" -B -N --no-bpf-event -e dummy:u -C "$1" true >/dev/null 2>&1 ; then
|
||||
if ! perf_record_no_decode -o "${tmpfile}" -e dummy:u -C "$1" true >/dev/null 2>&1 ; then
|
||||
echo "No so skipping"
|
||||
return 2
|
||||
fi
|
||||
@@ -93,7 +110,7 @@ test_system_wide_side_band()
|
||||
can_cpu_wide 1 || return $?
|
||||
|
||||
# Record on CPU 0 a task running on CPU 1
|
||||
perf record -B -N --no-bpf-event -o "${perfdatafile}" -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname
|
||||
|
||||
# Should get MMAP events from CPU 1 because they can be needed to decode
|
||||
mmap_cnt=$(perf script -i "${perfdatafile}" --no-itrace --show-mmap-events -C 1 2>/dev/null | grep -c MMAP)
|
||||
@@ -109,7 +126,14 @@ test_system_wide_side_band()
|
||||
|
||||
can_kernel()
|
||||
{
|
||||
perf record -o "${tmpfile}" -B -N --no-bpf-event -e dummy:k true >/dev/null 2>&1 || return 2
|
||||
if [ -z "${can_kernel_trace}" ] ; then
|
||||
can_kernel_trace=0
|
||||
perf_record_no_decode -o "${tmpfile}" -e dummy:k true >/dev/null 2>&1 && can_kernel_trace=1
|
||||
fi
|
||||
if [ ${can_kernel_trace} -eq 0 ] ; then
|
||||
echo "SKIP: no kernel tracing"
|
||||
return 2
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
@@ -235,7 +259,7 @@ test_per_thread()
|
||||
wait_for_threads ${w1} 2
|
||||
wait_for_threads ${w2} 2
|
||||
|
||||
perf record -B -N --no-bpf-event -o "${perfdatafile}" -e intel_pt//u"${k}" -vvv --per-thread -p "${w1},${w2}" 2>"${errfile}" >"${outfile}" &
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u"${k}" -vvv --per-thread -p "${w1},${w2}" 2>"${errfile}" >"${outfile}" &
|
||||
ppid=$!
|
||||
echo "perf PID is $ppid"
|
||||
wait_for_perf_to_start ${ppid} "${errfile}" || return 1
|
||||
@@ -254,6 +278,342 @@ test_per_thread()
|
||||
return 0
|
||||
}
|
||||
|
||||
test_jitdump()
|
||||
{
|
||||
echo "--- Test tracing self-modifying code that uses jitdump ---"
|
||||
|
||||
script_path=$(realpath "$0")
|
||||
script_dir=$(dirname "$script_path")
|
||||
jitdump_incl_dir="${script_dir}/../../util"
|
||||
jitdump_h="${jitdump_incl_dir}/jitdump.h"
|
||||
|
||||
if [ ! -e "${jitdump_h}" ] ; then
|
||||
echo "SKIP: Include file jitdump.h not found"
|
||||
return 2
|
||||
fi
|
||||
|
||||
if [ -z "${have_jitdump_workload}" ] ; then
|
||||
have_jitdump_workload=false
|
||||
# Create a workload that uses self-modifying code and generates its own jitdump file
|
||||
cat <<- "_end_of_file_" | /usr/bin/cc -o "${jitdump_workload}" -I "${jitdump_incl_dir}" -xc - -pthread && have_jitdump_workload=true
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "jitdump.h"
|
||||
|
||||
#define CHK_BYTE 0x5a
|
||||
|
||||
static inline uint64_t rdtsc(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdtsc" : "=a" (low), "=d" (high));
|
||||
|
||||
return low | ((uint64_t)high) << 32;
|
||||
}
|
||||
|
||||
static FILE *open_jitdump(void)
|
||||
{
|
||||
struct jitheader header = {
|
||||
.magic = JITHEADER_MAGIC,
|
||||
.version = JITHEADER_VERSION,
|
||||
.total_size = sizeof(header),
|
||||
.pid = getpid(),
|
||||
.timestamp = rdtsc(),
|
||||
.flags = JITDUMP_FLAGS_ARCH_TIMESTAMP,
|
||||
};
|
||||
char filename[256];
|
||||
FILE *f;
|
||||
void *m;
|
||||
|
||||
snprintf(filename, sizeof(filename), "jit-%d.dump", getpid());
|
||||
f = fopen(filename, "w+");
|
||||
if (!f)
|
||||
goto err;
|
||||
/* Create an MMAP event for the jitdump file. That is how perf tool finds it. */
|
||||
m = mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(f), 0);
|
||||
if (m == MAP_FAILED)
|
||||
goto err_close;
|
||||
munmap(m, 4096);
|
||||
if (fwrite(&header,sizeof(header),1,f) != 1)
|
||||
goto err_close;
|
||||
return f;
|
||||
|
||||
err_close:
|
||||
fclose(f);
|
||||
err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int write_jitdump(FILE *f, void *addr, const uint8_t *dat, size_t sz, uint64_t *idx)
|
||||
{
|
||||
struct jr_code_load rec = {
|
||||
.p.id = JIT_CODE_LOAD,
|
||||
.p.total_size = sizeof(rec) + sz,
|
||||
.p.timestamp = rdtsc(),
|
||||
.pid = getpid(),
|
||||
.tid = gettid(),
|
||||
.vma = (unsigned long)addr,
|
||||
.code_addr = (unsigned long)addr,
|
||||
.code_size = sz,
|
||||
.code_index = ++*idx,
|
||||
};
|
||||
|
||||
if (fwrite(&rec,sizeof(rec),1,f) != 1 ||
|
||||
fwrite(dat, sz, 1, f) != 1)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void close_jitdump(FILE *f)
|
||||
{
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
/* Get a memory page to store executable code */
|
||||
void *addr = mmap(0, 4096, PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
/* Code to execute: mov CHK_BYTE, %eax ; ret */
|
||||
uint8_t dat[] = {0xb8, CHK_BYTE, 0x00, 0x00, 0x00, 0xc3};
|
||||
FILE *f = open_jitdump();
|
||||
uint64_t idx = 0;
|
||||
int ret = 1;
|
||||
|
||||
if (!f)
|
||||
return 1;
|
||||
/* Copy executable code to executable memory page */
|
||||
memcpy(addr, dat, sizeof(dat));
|
||||
/* Record it in the jitdump file */
|
||||
if (write_jitdump(f, addr, dat, sizeof(dat), &idx))
|
||||
goto out_close;
|
||||
/* Call it */
|
||||
ret = ((int (*)(void))addr)() - CHK_BYTE;
|
||||
out_close:
|
||||
close_jitdump(f);
|
||||
return ret;
|
||||
}
|
||||
_end_of_file_
|
||||
fi
|
||||
|
||||
if ! $have_jitdump_workload ; then
|
||||
echo "SKIP: No jitdump workload"
|
||||
return 2
|
||||
fi
|
||||
|
||||
# Change to temp_dir so jitdump collateral files go there
|
||||
cd "${temp_dir}"
|
||||
perf_record_no_bpf -o "${tmpfile}" -e intel_pt//u "${jitdump_workload}"
|
||||
perf inject -i "${tmpfile}" -o "${perfdatafile}" --jit
|
||||
decode_br_cnt=$(perf script -i "${perfdatafile}" --itrace=b | wc -l)
|
||||
# Note that overflow and lost errors are suppressed for the error count
|
||||
decode_err_cnt=$(perf script -i "${perfdatafile}" --itrace=e-o-l | grep -ci error)
|
||||
cd -
|
||||
# Should be thousands of branches
|
||||
if [ "${decode_br_cnt}" -lt 1000 ] ; then
|
||||
echo "Decode failed, only ${decode_br_cnt} branches"
|
||||
return 1
|
||||
fi
|
||||
# Should be no errors
|
||||
if [ "${decode_err_cnt}" -ne 0 ] ; then
|
||||
echo "Decode failed, ${decode_err_cnt} errors"
|
||||
perf script -i "${perfdatafile}" --itrace=e-o-l --show-mmap-events | cat
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_packet_filter()
|
||||
{
|
||||
echo "--- Test with MTC and TSC disabled ---"
|
||||
# Disable MTC and TSC
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt/mtc=0,tsc=0/u uname
|
||||
# Should not get MTC packet
|
||||
mtc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "MTC 0x")
|
||||
if [ "${mtc_cnt}" -ne 0 ] ; then
|
||||
echo "Failed to filter with mtc=0"
|
||||
return 1
|
||||
fi
|
||||
# Should not get TSC package
|
||||
tsc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TSC 0x")
|
||||
if [ "${tsc_cnt}" -ne 0 ] ; then
|
||||
echo "Failed to filter with tsc=0"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_disable_branch()
|
||||
{
|
||||
echo "--- Test with branches disabled ---"
|
||||
# Disable branch
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt/branch=0/u uname
|
||||
# Should not get branch related packets
|
||||
tnt_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TNT 0x")
|
||||
tip_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TIP 0x")
|
||||
fup_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "FUP 0x")
|
||||
if [ "${tnt_cnt}" -ne 0 ] || [ "${tip_cnt}" -ne 0 ] || [ "${fup_cnt}" -ne 0 ] ; then
|
||||
echo "Failed to disable branches"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_time_cyc()
|
||||
{
|
||||
echo "--- Test with/without CYC ---"
|
||||
# Check if CYC is supported
|
||||
cyc=$(cat /sys/bus/event_source/devices/intel_pt/caps/psb_cyc)
|
||||
if [ "${cyc}" != "1" ] ; then
|
||||
echo "SKIP: CYC is not supported"
|
||||
return 2
|
||||
fi
|
||||
# Enable CYC
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt/cyc/u uname
|
||||
# should get CYC packets
|
||||
cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC 0x")
|
||||
if [ "${cyc_cnt}" = "0" ] ; then
|
||||
echo "Failed to get CYC packet"
|
||||
return 1
|
||||
fi
|
||||
# Without CYC
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u uname
|
||||
# Should not get CYC packets
|
||||
cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC 0x")
|
||||
if [ "${cyc_cnt}" -gt 0 ] ; then
|
||||
echo "Still get CYC packet without cyc"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_sample()
|
||||
{
|
||||
echo "--- Test recording with sample mode ---"
|
||||
# Check if recording with sample mode is working
|
||||
if ! perf_record_no_decode -o "${perfdatafile}" --aux-sample=8192 -e '{intel_pt//u,branch-misses:u}' uname ; then
|
||||
echo "perf record failed with --aux-sample"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_kernel_trace()
|
||||
{
|
||||
echo "--- Test with kernel trace ---"
|
||||
# Check if recording with kernel trace is working
|
||||
can_kernel || return 2
|
||||
if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt//k -m1,128 uname ; then
|
||||
echo "perf record failed with intel_pt//k"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_virtual_lbr()
|
||||
{
|
||||
echo "--- Test virtual LBR ---"
|
||||
|
||||
# Python script to determine the maximum size of branch stacks
|
||||
cat << "_end_of_file_" > "${maxbrstack}"
|
||||
from __future__ import print_function
|
||||
|
||||
bmax = 0
|
||||
|
||||
def process_event(param_dict):
|
||||
if "brstack" in param_dict:
|
||||
brstack = param_dict["brstack"]
|
||||
n = len(brstack)
|
||||
global bmax
|
||||
if n > bmax:
|
||||
bmax = n
|
||||
|
||||
def trace_end():
|
||||
print("max brstack", bmax)
|
||||
_end_of_file_
|
||||
|
||||
# Check if virtual lbr is working
|
||||
perf_record_no_bpf -o "${perfdatafile}" --aux-sample -e '{intel_pt//,cycles}:u' uname
|
||||
times_val=$(perf script -i "${perfdatafile}" --itrace=L -s "${maxbrstack}" 2>/dev/null | grep "max brstack " | cut -d " " -f 3)
|
||||
case "${times_val}" in
|
||||
[0-9]*) ;;
|
||||
*) times_val=0;;
|
||||
esac
|
||||
if [ "${times_val}" -lt 2 ] ; then
|
||||
echo "Failed with virtual lbr"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_power_event()
|
||||
{
|
||||
echo "--- Test power events ---"
|
||||
# Check if power events are supported
|
||||
power_event=$(cat /sys/bus/event_source/devices/intel_pt/caps/power_event_trace)
|
||||
if [ "${power_event}" != "1" ] ; then
|
||||
echo "SKIP: power_event_trace is not supported"
|
||||
return 2
|
||||
fi
|
||||
if ! perf_record_no_decode -o "${perfdatafile}" -a -e intel_pt/pwr_evt/u uname ; then
|
||||
echo "perf record failed with pwr_evt"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_no_tnt()
|
||||
{
|
||||
echo "--- Test with TNT packets disabled ---"
|
||||
# Check if TNT disable is supported
|
||||
notnt=$(cat /sys/bus/event_source/devices/intel_pt/caps/tnt_disable)
|
||||
if [ "${notnt}" != "1" ] ; then
|
||||
echo "SKIP: tnt_disable is not supported"
|
||||
return 2
|
||||
fi
|
||||
perf_record_no_decode -o "${perfdatafile}" -e intel_pt/notnt/u uname
|
||||
# Should be no TNT packets
|
||||
tnt_cnt=$(perf script -i "${perfdatafile}" -D | grep -c TNT)
|
||||
if [ "${tnt_cnt}" -ne 0 ] ; then
|
||||
echo "TNT packets still there after notnt"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
test_event_trace()
|
||||
{
|
||||
echo "--- Test with event_trace ---"
|
||||
# Check if event_trace is supported
|
||||
event_trace=$(cat /sys/bus/event_source/devices/intel_pt/caps/event_trace)
|
||||
if [ "${event_trace}" != 1 ] ; then
|
||||
echo "SKIP: event_trace is not supported"
|
||||
return 2
|
||||
fi
|
||||
if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt/event/u uname ; then
|
||||
echo "perf record failed with event trace"
|
||||
return 1
|
||||
fi
|
||||
echo OK
|
||||
return 0
|
||||
}
|
||||
|
||||
count_result()
|
||||
{
|
||||
if [ "$1" -eq 2 ] ; then
|
||||
@@ -265,13 +625,22 @@ count_result()
|
||||
return
|
||||
fi
|
||||
err_cnt=$((err_cnt + 1))
|
||||
ret=0
|
||||
}
|
||||
|
||||
ret=0
|
||||
test_system_wide_side_band || ret=$? ; count_result $ret
|
||||
test_per_thread "" "" || ret=$? ; count_result $ret
|
||||
test_per_thread "k" "(incl. kernel) " || ret=$? ; count_result $ret
|
||||
test_system_wide_side_band || ret=$? ; count_result $ret ; ret=0
|
||||
test_per_thread "" "" || ret=$? ; count_result $ret ; ret=0
|
||||
test_per_thread "k" "(incl. kernel) " || ret=$? ; count_result $ret ; ret=0
|
||||
test_jitdump || ret=$? ; count_result $ret ; ret=0
|
||||
test_packet_filter || ret=$? ; count_result $ret ; ret=0
|
||||
test_disable_branch || ret=$? ; count_result $ret ; ret=0
|
||||
test_time_cyc || ret=$? ; count_result $ret ; ret=0
|
||||
test_sample || ret=$? ; count_result $ret ; ret=0
|
||||
test_kernel_trace || ret=$? ; count_result $ret ; ret=0
|
||||
test_virtual_lbr || ret=$? ; count_result $ret ; ret=0
|
||||
test_power_event || ret=$? ; count_result $ret ; ret=0
|
||||
test_no_tnt || ret=$? ; count_result $ret ; ret=0
|
||||
test_event_trace || ret=$? ; count_result $ret ; ret=0
|
||||
|
||||
cleanup
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user