Merge tag 'perf-tools-fixes-for-v6.0-2022-08-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tool updates from Arnaldo Carvalho de Melo:

 - 'perf c2c' now supports ARM64, adjust its output to cope with
   differences with what is in x86_64. Now go find false sharing on
   ARM64 (at least Neoverse) as well!

 - Refactor the JSON processing, making the output more compact and thus
   reducing the size of the resulting perf binary

 - Improvements for 'perf offcpu' profiling, including tracking child
   processes

 - Update Intel JSON metrics and events files for broadwellde,
   broadwellx, cascadelakex, haswellx, icelakex, ivytown, jaketown,
   knightslanding, sapphirerapids, skylakex and snowridgex

 - Add 'perf stat' JSON output and a 'perf test' entry for it

 - Ignore memfd and anonymous mmap events if jitdump present

 - Refactor 'perf test' shell tests allowing subdirs

 - Fix an error handling path in 'parse_perf_probe_command()'

 - Fixes for the guest Intel PT tracing patchkit in the 1st batch of
   this merge window

 - Print debuginfod queries if -v option is used, to explain delays in
   processing when debuginfo servers are enabled to fetch DSOs with
   richer symbol tables

 - Improve error message for 'perf record -p not_existing_pid'

 - Fix openssl and libbpf feature detection

 - Add PMU pai_crypto event description for IBM z16 on 'perf list'

 - Fix typos and duplicated words on comments in various places

* tag 'perf-tools-fixes-for-v6.0-2022-08-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (81 commits)
  perf test: Refactor shell tests allowing subdirs
  perf vendor events: Update events for snowridgex
  perf vendor events: Update events and metrics for skylakex
  perf vendor events: Update metrics for sapphirerapids
  perf vendor events: Update events for knightslanding
  perf vendor events: Update metrics for jaketown
  perf vendor events: Update metrics for ivytown
  perf vendor events: Update events and metrics for icelakex
  perf vendor events: Update events and metrics for haswellx
  perf vendor events: Update events and metrics for cascadelakex
  perf vendor events: Update events and metrics for broadwellx
  perf vendor events: Update metrics for broadwellde
  perf jevents: Fold strings optimization
  perf jevents: Compress the pmu_events_table
  perf metrics: Copy entire pmu_event in find metric
  perf pmu-events: Hide the pmu_events
  perf pmu-events: Don't assume pmu_event is an array
  perf pmu-events: Move test events/metrics to JSON
  perf test: Use full metric resolution
  perf pmu-events: Hide pmu_events_map
  ...
This commit is contained in:
Linus Torvalds
2022-08-14 09:22:11 -07:00
112 changed files with 95927 additions and 7120 deletions

View File

@@ -93,9 +93,11 @@ INSTALL ?= install
RM ?= rm -f
FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled libcap \
FEATURE_TESTS = libbfd libbfd-liberty libbfd-liberty-z \
disassembler-four-args disassembler-init-styled libcap \
clang-bpf-co-re
FEATURE_DISPLAY = libbfd libcap clang-bpf-co-re
FEATURE_DISPLAY = libbfd libbfd-liberty libbfd-liberty-z \
libcap clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall

View File

@@ -90,6 +90,8 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@@ -97,7 +99,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
$(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -241,16 +243,18 @@ $(OUTPUT)test-libpython.bin:
$(BUILD) $(FLAGS_PYTHON_EMBED)
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(BUILD_BFD)
$(OUTPUT)test-libbfd-buildid.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(BUILD_BFD) || $(BUILD_BFD) -liberty || $(BUILD_BFD) -liberty -lz
$(OUTPUT)test-disassembler-four-args.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(BUILD_BFD) -lopcodes || $(BUILD_BFD) -lopcodes -liberty || \
$(BUILD_BFD) -lopcodes -liberty -lz
$(OUTPUT)test-disassembler-init-styled.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(BUILD_BFD) -lopcodes || $(BUILD_BFD) -lopcodes -liberty || \
$(BUILD_BFD) -lopcodes -liberty -lz
$(OUTPUT)test-reallocarray.bin:
$(BUILD)

View File

@@ -1,22 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
#include <openssl/evp.h>
#include <openssl/sha.h>
#include <openssl/md5.h>
/*
* The MD5_* API have been deprecated since OpenSSL 3.0, which causes the
* feature test to fail silently. This is a workaround.
*/
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
int main(void)
{
MD5_CTX context;
EVP_MD_CTX *mdctx;
unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
unsigned char dat[] = "12345";
unsigned int digest_len;
MD5_Init(&context);
MD5_Update(&context, &dat[0], sizeof(dat));
MD5_Final(&md[0], &context);
mdctx = EVP_MD_CTX_new();
if (!mdctx)
return 0;
EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
EVP_DigestUpdate(mdctx, &dat[0], sizeof(dat));
EVP_DigestFinal_ex(mdctx, &md[0], &digest_len);
EVP_MD_CTX_free(mdctx);
SHA1(&dat[0], sizeof(dat), &md[0]);

View File

@@ -1310,7 +1310,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */

View File

@@ -0,0 +1,16 @@
include::guestmount.txt[]
--guestkallsyms=<path>::
Guest OS /proc/kallsyms file copy. perf reads it to get guest
kernel symbols. Users copy it out from guest OS.
--guestmodules=<path>::
Guest OS /proc/modules file copy. perf reads it to get guest
kernel module information. Users copy it out from guest OS.
--guestvmlinux=<path>::
Guest OS kernel vmlinux.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.

View File

@@ -0,0 +1,11 @@
--guestmount=<path>::
Guest OS root file system mount directory. Users mount guest OS
root directories under <path> by a specific filesystem access method,
typically, sshfs.
For example, start 2 guest OS, one's pid is 8888 and the other's is 9999:
[verse]
$ mkdir \~/guestmount
$ cd \~/guestmount
$ sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
$ sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
$ perf {GMEXAMPLECMD} --guestmount=~/guestmount {GMEXAMPLESUBCMD}

View File

@@ -109,7 +109,9 @@ REPORT OPTIONS
-d::
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
Switch to HITM type (rmt, lcl) or peer snooping type (peer) to display
and sort on. Total HITMs (tot) as default, except Arm64 uses peer mode
as default.
--stitch-lbr::
Show callgraph with stitched LBRs, which may have more complete
@@ -174,12 +176,18 @@ For each cacheline in the 1) list we display following data:
Cacheline
- cacheline address (hex number)
Rmt/Lcl Hitm
Rmt/Lcl Hitm (Display with HITM types)
- cacheline percentage of all Remote/Local HITM accesses
LLC Load Hitm - Total, LclHitm, RmtHitm
Peer Snoop (Display with peer type)
- cacheline percentage of all peer accesses
LLC Load Hitm - Total, LclHitm, RmtHitm (For display with HITM types)
- count of Total/Local/Remote load HITMs
Load Peer - Total, Local, Remote (For display with peer type)
- count of Total/Local/Remote load from peer cache or DRAM
Total records
- sum of all cachelines accesses
@@ -201,16 +209,21 @@ For each cacheline in the 1) list we display following data:
- count of LLC load accesses, includes LLC hits and LLC HITMs
RMT Load Hit - RmtHit, RmtHitm
- count of remote load accesses, includes remote hits and remote HITMs
- count of remote load accesses, includes remote hits and remote HITMs;
on Arm neoverse cores, RmtHit is used to account remote accesses,
includes remote DRAM or any upward cache level in remote node
Load Dram - Lcl, Rmt
- count of local and remote DRAM accesses
For each offset in the 2) list we display following data:
HITM - Rmt, Lcl
HITM - Rmt, Lcl (Display with HITM types)
- % of Remote/Local HITM accesses for given offset within cacheline
Peer Snoop - Rmt, Lcl (Display with peer type)
- % of Remote/Local peer accesses for given offset within cacheline
Store Refs - L1 Hit, L1 Miss, N/A
- % of store accesses that hit L1, missed L1 and N/A (no available) memory
level for given offset within cacheline
@@ -227,9 +240,12 @@ For each offset in the 2) list we display following data:
Code address
- code address responsible for the accesses
cycles - rmt hitm, lcl hitm, load
cycles - rmt hitm, lcl hitm, load (Display with HITM types)
- sum of cycles for given accesses - Remote/Local HITM and generic load
cycles - rmt peer, lcl peer, load (Display with peer type)
- sum of cycles for given accesses - Remote/Local peer load and generic load
cpu cnt
- number of cpus that participated on the access
@@ -251,7 +267,8 @@ The 'Node' field displays nodes that accesses given cacheline
offset. Its output comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
Node{cpus %hitms %stores} (Display with HITM types)
Node{cpus %peers %stores} (Display with peer type)
- node IDs with list of affected CPUs in following format:
Node{cpu list}

View File

@@ -102,6 +102,10 @@ include::itrace.txt[]
should be used, and also --buildid-all and --switch-events may be
useful.
:GMEXAMPLECMD: inject
:GMEXAMPLESUBCMD:
include::guestmount.txt[]
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],

View File

@@ -77,26 +77,11 @@ OPTIONS
Collect host side performance profile.
--guest::
Collect guest side performance profile.
--guestmount=<path>::
Guest os root file system mount directory. Users mounts guest os
root directories under <path> by a specific filesystem access method,
typically, sshfs. For example, start 2 guest os. The one's pid is 8888
and the other's is 9999.
#mkdir ~/guestmount; cd ~/guestmount
#sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
#sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
#perf kvm --host --guest --guestmount=~/guestmount top
--guestkallsyms=<path>::
Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
kernel symbols. Users copy it out from guest os.
--guestmodules=<path>::
Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>::
Guest os kernel vmlinux.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
:GMEXAMPLECMD: kvm --host --guest
:GMEXAMPLESUBCMD: top
include::guest-files.txt[]
-v::
--verbose::
Be more verbose (show counter open errors, etc).

View File

@@ -228,7 +228,7 @@ OPTIONS
Instruction Trace decoding.
The machine_pid and vcpu fields are derived from data resulting from using
perf insert to insert a perf.data file recorded inside a virtual machine into
perf inject to insert a perf.data file recorded inside a virtual machine into
a perf.data file recorded on the host at the same time.
Finally, a user may not set fields to none for all event types.
@@ -507,9 +507,9 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]
SEE ALSO
--------

View File

@@ -570,6 +570,27 @@ Additional metrics may be printed with all earlier fields being empty.
include::intel-hybrid.txt[]
JSON FORMAT
-----------
With -j, perf stat is able to print out a JSON format output
that can be used for parsing.
- timestamp : optional usec time stamp in fractions of second (with -I)
- optional aggregate options:
- core : core identifier (with --per-core)
- die : die identifier (with --per-die)
- socket : socket identifier (with --per-socket)
- node : node identifier (with --per-node)
- thread : thread identifier (with --per-thread)
- counter-value : counter value
- unit : unit of the counter value or empty
- event : event name
- variance : optional variance if multiple values are collected (with -r)
- runtime : run time of counter
- metric-value : optional metric value
- metric-unit : optional unit of metric
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-list[1]

View File

@@ -297,9 +297,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
@@ -329,8 +326,8 @@ ifneq ($(TCMALLOC),)
endif
ifeq ($(FEATURES_DUMP),)
# We will display at the end of this Makefile.config, using $(call feature_display_entries)
# As we may retry some feature detection here, see the disassembler-four-args case, for instance
# We will display at the end of this Makefile.config, using $(call feature_display_entries),
# as we may retry some feature detection here.
FEATURE_DISPLAY_DEFERRED := 1
include $(srctree)/tools/build/Makefile.feature
else
@@ -924,13 +921,9 @@ ifndef NO_LIBBFD
ifeq ($(feature-libbfd-liberty), 1)
EXTLIBS += -lbfd -lopcodes -liberty
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl
else
ifeq ($(feature-libbfd-liberty-z), 1)
EXTLIBS += -lbfd -lopcodes -liberty -lz
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl
endif
endif
$(call feature_check,disassembler-four-args)
@@ -1356,7 +1349,7 @@ endif
# re-generate FEATURE-DUMP as we may have called feature_check, found out
# extra libraries to add to LDFLAGS of some other test and then redo those
# tests, see the block about libbfd, disassembler-four-args, for instance.
# tests.
$(shell rm -f $(FEATURE_DUMP_FILENAME))
$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))

View File

@@ -1005,7 +1005,8 @@ install-tests: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
install-bin: install-tools install-tests install-traceevent-plugins

View File

@@ -438,7 +438,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
if (opts->full_auxtrace) {
struct evsel *tracking_evsel;
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
goto out;

View File

@@ -257,7 +257,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
return err;

View File

@@ -3,7 +3,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"
const struct pmu_events_map *pmu_events_map__find(void)
const struct pmu_events_table *pmu_events_table__find(void)
{
struct perf_pmu *pmu = NULL;
@@ -18,7 +18,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
return perf_pmu__find_map(pmu);
return perf_pmu__find_table(pmu);
}
return NULL;

View File

@@ -56,7 +56,7 @@ int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, in
return TEST_FAIL;
}
ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
ret = parse_event(evlist, "intel_cqm/llc_occupancy/");
if (ret) {
pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
err = TEST_SKIP;

View File

@@ -233,7 +233,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
struct evsel *tracking_evsel;
int err;
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
return err;

View File

@@ -426,7 +426,7 @@ static int intel_pt_track_switches(struct evlist *evlist)
if (!evlist__can_select_event(evlist, sched_switch))
return -EPERM;
err = parse_events(evlist, sched_switch, NULL);
err = parse_event(evlist, sched_switch);
if (err) {
pr_debug2("%s: failed to parse %s, error %d\n",
__func__, sched_switch, err);

View File

@@ -316,7 +316,7 @@ static int iostat_event_group(struct evlist *evl,
sprintf(iostat_cmd, iostat_cmd_template,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx);
ret = parse_events(evl, iostat_cmd, NULL);
ret = parse_event(evl, iostat_cmd);
if (ret)
goto err;
}

Some files were not shown because too many files have changed in this diff Show More