mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'perf-tools-for-v6.5-1-2023-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next
Pull perf tools updates from Namhyung Kim:
"Internal cleanup:
- Refactor PMU data management to handle hybrid systems in a generic
way.
Do more work in the lexer so that legacy event types parse more
easily. A side-effect of this is that if a PMU is specified,
scanning sysfs is avoided improving start-up time.
- Fix hybrid metrics, for example, the TopdownL1 works for both
performance and efficiency cores on Intel machines. To support
this, sort and regroup events after parsing.
- Add reference count checking for the 'thread' data structure.
- Lots of fixes for memory leaks in various places thanks to the ASAN
and Ian's refcount checker.
- Reduce the binary size by replacing static variables with local or
dynamically allocated memory.
- Introduce shared_mutex for annotate data to reduce memory
footprint.
- Make filesystem access library functions more thread safe.
Test:
- Organize cpu_map tests into a single suite.
- Add metric value validation test to check if the values are within
correct value ranges.
- Add perf stat stdio output test to check if event and metric names
match.
- Add perf data converter JSON output test.
- Fix a lot of issues reported by shellcheck(1). This is a
preparation to enable shellcheck by default.
- Make the large x86 new instructions test optional at build time
using EXTRA_TESTS=1.
- Add a test for libpfm4 events.
perf script:
- Add 'dsoff' outpuf field to display offset from the DSO.
$ perf script -F comm,pid,event,ip,dsoff
ls 2695501 cycles: 152cc73ef4b5 (/usr/lib/x86_64-linux-gnu/ld-2.31.so+0x1c4b5)
ls 2695501 cycles: ffffffff99045b3e ([kernel.kallsyms])
ls 2695501 cycles: ffffffff9968e107 ([kernel.kallsyms])
ls 2695501 cycles: ffffffffc1f54afb ([kernel.kallsyms])
ls 2695501 cycles: ffffffff9968382f ([kernel.kallsyms])
ls 2695501 cycles: ffffffff99e00094 ([kernel.kallsyms])
ls 2695501 cycles: 152cc718a8d0 (/usr/lib/x86_64-linux-gnu/libselinux.so.1+0x68d0)
ls 2695501 cycles: ffffffff992a6db0 ([kernel.kallsyms])
- Adjust width for large PID/TID values.
perf report:
- Robustify reading addr2line output for srcline by checking sentinel
output before the actual data and by using timeout of 1 second.
- Allow config terms (like 'name=ABC') with breakpoint events.
$ perf record -e mem:0x55feb98dd169:x/name=breakpoint/ -p 19646 -- sleep 1
perf annotate:
- Handle x86 instruction suffix like 'l' in 'movl' generally.
- Parse instruction operands properly even with a whitespace. This is
needed for llvm-objdump output.
- Support RISC-V binutils lookup using the triplet prefixes.
- Add '<' and '>' key to navigate to prev/next symbols in TUI.
- Fix instruction association and parsing for LoongArch.
perf stat:
- Add --per-cache aggregation option, optionally specify a cache
level like `--per-cache=L2`.
$ sudo perf stat --per-cache -a -e ls_dmnd_fills_from_sys.ext_cache_remote --\
taskset -c 0-15,64-79,128-143,192-207\
perf bench sched messaging -p -t -l 100000 -g 8
# Running 'sched/messaging' benchmark:
# 20 sender and receiver threads per group
# 8 groups == 320 threads run
Total time: 7.648 [sec]
Performance counter stats for 'system wide':
S0-D0-L3-ID0 16 17,145,912 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID8 16 14,977,628 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID16 16 262,539 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID24 16 3,140 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID32 16 27,403 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID40 16 17,026 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID48 16 7,292 ls_dmnd_fills_from_sys.ext_cache_remote
S0-D0-L3-ID56 16 2,464 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID64 16 22,489,306 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID72 16 21,455,257 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID80 16 11,619 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID88 16 30,978 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID96 16 37,628 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID104 16 13,594 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID112 16 10,164 ls_dmnd_fills_from_sys.ext_cache_remote
S1-D1-L3-ID120 16 11,259 ls_dmnd_fills_from_sys.ext_cache_remote
7.779171484 seconds time elapsed
- Change default (no event/metric) formatting for default metrics so
that events are hidden and the metric and group appear.
Performance counter stats for 'ls /':
1.85 msec task-clock # 0.594 CPUs utilized
0 context-switches # 0.000 /sec
0 cpu-migrations # 0.000 /sec
97 page-faults # 52.517 K/sec
2,187,173 cycles # 1.184 GHz
2,474,459 instructions # 1.13 insn per cycle
531,584 branches # 287.805 M/sec
13,626 branch-misses # 2.56% of all branches
TopdownL1 # 23.5 % tma_backend_bound
# 11.5 % tma_bad_speculation
# 39.1 % tma_frontend_bound
# 25.9 % tma_retiring
- Allow --cputype option to have any PMU name (not just hybrid).
- Fix output value not to added when it runs multiple times with -r
option.
perf list:
- Show metricgroup description from JSON file called
metricgroups.json.
- Allow 'pfm' argument to list only libpfm4 events and check each
event is supported before showing it.
JSON vendor events:
- Avoid event grouping using "NO_GROUP_EVENTS" constraints. The
topdown events are correctly grouped even if no group exists.
- Add "Default" metric group to print it in the default output. And
use "DefaultMetricgroupName" to indicate the real metric group
name.
- Add AmpereOne core PMU events.
Misc:
- Define man page date correctly.
- Track exception level properly on ARM CoreSight ETM.
- Allow anonymous struct, union or enum when retrieving type names
from DWARF.
- Fix incorrect filename when calling `perf inject --jit`.
- Handle PLT size correctly on LoongArch"
* tag 'perf-tools-for-v6.5-1-2023-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next: (269 commits)
perf test: Skip metrics w/o event name in stat STD output linter
perf test: Reorder event name checks in stat STD output linter
perf pmu: Remove a hard coded cpu PMU assumption
perf pmus: Add notion of default PMU for JSON events
perf unwind: Fix map reference counts
perf test: Set PERF_EXEC_PATH for script execution
perf script: Initialize buffer for regs_map()
perf tests: Fix test_arm_callgraph_fp variable expansion
perf symbol: Add LoongArch case in get_plt_sizes()
perf test: Remove x permission from lib/stat_output.sh
perf test: Rerun failed metrics with longer workload
perf test: Add skip list for metrics known would fail
perf test: Add metric value validation test
perf jit: Fix incorrect file name in DWARF line table
perf annotate: Fix instruction association and parsing for LoongArch
perf annotation: Switch lock from a mutex to a sharded_mutex
perf sharded_mutex: Introduce sharded_mutex
tools: Fix incorrect calculation of object size by sizeof
perf subcmd: Fix missing check for return value of malloc() in add_cmdname()
perf parse-events: Remove unneeded semicolon
...
This commit is contained in:
@@ -14,7 +14,7 @@ struct cgroupfs_cache_entry {
|
||||
};
|
||||
|
||||
/* just cache last used one */
|
||||
static struct cgroupfs_cache_entry cached;
|
||||
static struct cgroupfs_cache_entry *cached;
|
||||
|
||||
int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
|
||||
{
|
||||
@@ -24,9 +24,9 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
|
||||
char *p, *path;
|
||||
char mountpoint[PATH_MAX];
|
||||
|
||||
if (!strcmp(cached.subsys, subsys)) {
|
||||
if (strlen(cached.mountpoint) < maxlen) {
|
||||
strcpy(buf, cached.mountpoint);
|
||||
if (cached && !strcmp(cached->subsys, subsys)) {
|
||||
if (strlen(cached->mountpoint) < maxlen) {
|
||||
strcpy(buf, cached->mountpoint);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
@@ -91,8 +91,13 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
|
||||
free(line);
|
||||
fclose(fp);
|
||||
|
||||
strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1);
|
||||
strcpy(cached.mountpoint, mountpoint);
|
||||
if (!cached)
|
||||
cached = calloc(1, sizeof(*cached));
|
||||
|
||||
if (cached) {
|
||||
strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1);
|
||||
strcpy(cached->mountpoint, mountpoint);
|
||||
}
|
||||
|
||||
if (mountpoint[0] && strlen(mountpoint) < maxlen) {
|
||||
strcpy(buf, mountpoint);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
@@ -10,6 +11,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mount.h>
|
||||
|
||||
@@ -43,7 +45,7 @@
|
||||
#define BPF_FS_MAGIC 0xcafe4a11
|
||||
#endif
|
||||
|
||||
static const char * const sysfs__fs_known_mountpoints[] = {
|
||||
static const char * const sysfs__known_mountpoints[] = {
|
||||
"/sys",
|
||||
0,
|
||||
};
|
||||
@@ -86,87 +88,89 @@ static const char * const bpf_fs__known_mountpoints[] = {
|
||||
};
|
||||
|
||||
struct fs {
|
||||
const char *name;
|
||||
const char * const *mounts;
|
||||
char path[PATH_MAX];
|
||||
bool found;
|
||||
bool checked;
|
||||
long magic;
|
||||
};
|
||||
|
||||
enum {
|
||||
FS__SYSFS = 0,
|
||||
FS__PROCFS = 1,
|
||||
FS__DEBUGFS = 2,
|
||||
FS__TRACEFS = 3,
|
||||
FS__HUGETLBFS = 4,
|
||||
FS__BPF_FS = 5,
|
||||
const char * const name;
|
||||
const char * const * const mounts;
|
||||
char *path;
|
||||
pthread_mutex_t mount_mutex;
|
||||
const long magic;
|
||||
};
|
||||
|
||||
#ifndef TRACEFS_MAGIC
|
||||
#define TRACEFS_MAGIC 0x74726163
|
||||
#endif
|
||||
|
||||
static struct fs fs__entries[] = {
|
||||
[FS__SYSFS] = {
|
||||
.name = "sysfs",
|
||||
.mounts = sysfs__fs_known_mountpoints,
|
||||
.magic = SYSFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__PROCFS] = {
|
||||
.name = "proc",
|
||||
.mounts = procfs__known_mountpoints,
|
||||
.magic = PROC_SUPER_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__DEBUGFS] = {
|
||||
.name = "debugfs",
|
||||
.mounts = debugfs__known_mountpoints,
|
||||
.magic = DEBUGFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__TRACEFS] = {
|
||||
.name = "tracefs",
|
||||
.mounts = tracefs__known_mountpoints,
|
||||
.magic = TRACEFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__HUGETLBFS] = {
|
||||
.name = "hugetlbfs",
|
||||
.mounts = hugetlbfs__known_mountpoints,
|
||||
.magic = HUGETLBFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__BPF_FS] = {
|
||||
.name = "bpf",
|
||||
.mounts = bpf_fs__known_mountpoints,
|
||||
.magic = BPF_FS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
};
|
||||
static void fs__init_once(struct fs *fs);
|
||||
static const char *fs__mountpoint(const struct fs *fs);
|
||||
static const char *fs__mount(struct fs *fs);
|
||||
|
||||
#define FS(lower_name, fs_name, upper_name) \
|
||||
static struct fs fs__##lower_name = { \
|
||||
.name = #fs_name, \
|
||||
.mounts = lower_name##__known_mountpoints, \
|
||||
.magic = upper_name##_MAGIC, \
|
||||
.mount_mutex = PTHREAD_MUTEX_INITIALIZER, \
|
||||
}; \
|
||||
\
|
||||
static void lower_name##_init_once(void) \
|
||||
{ \
|
||||
struct fs *fs = &fs__##lower_name; \
|
||||
\
|
||||
fs__init_once(fs); \
|
||||
} \
|
||||
\
|
||||
const char *lower_name##__mountpoint(void) \
|
||||
{ \
|
||||
static pthread_once_t init_once = PTHREAD_ONCE_INIT; \
|
||||
struct fs *fs = &fs__##lower_name; \
|
||||
\
|
||||
pthread_once(&init_once, lower_name##_init_once); \
|
||||
return fs__mountpoint(fs); \
|
||||
} \
|
||||
\
|
||||
const char *lower_name##__mount(void) \
|
||||
{ \
|
||||
const char *mountpoint = lower_name##__mountpoint(); \
|
||||
struct fs *fs = &fs__##lower_name; \
|
||||
\
|
||||
if (mountpoint) \
|
||||
return mountpoint; \
|
||||
\
|
||||
return fs__mount(fs); \
|
||||
} \
|
||||
\
|
||||
bool lower_name##__configured(void) \
|
||||
{ \
|
||||
return lower_name##__mountpoint() != NULL; \
|
||||
}
|
||||
|
||||
FS(sysfs, sysfs, SYSFS);
|
||||
FS(procfs, procfs, PROC_SUPER);
|
||||
FS(debugfs, debugfs, DEBUGFS);
|
||||
FS(tracefs, tracefs, TRACEFS);
|
||||
FS(hugetlbfs, hugetlbfs, HUGETLBFS);
|
||||
FS(bpf_fs, bpf, BPF_FS);
|
||||
|
||||
static bool fs__read_mounts(struct fs *fs)
|
||||
{
|
||||
bool found = false;
|
||||
char type[100];
|
||||
FILE *fp;
|
||||
char path[PATH_MAX + 1];
|
||||
|
||||
fp = fopen("/proc/mounts", "r");
|
||||
if (fp == NULL)
|
||||
return NULL;
|
||||
return false;
|
||||
|
||||
while (!found &&
|
||||
fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
|
||||
fs->path, type) == 2) {
|
||||
while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
|
||||
path, type) == 2) {
|
||||
|
||||
if (strcmp(type, fs->name) == 0)
|
||||
found = true;
|
||||
if (strcmp(type, fs->name) == 0) {
|
||||
fs->path = strdup(path);
|
||||
fclose(fp);
|
||||
return fs->path != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
fs->checked = true;
|
||||
return fs->found = found;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int fs__valid_mount(const char *fs, long magic)
|
||||
@@ -188,8 +192,9 @@ static bool fs__check_mounts(struct fs *fs)
|
||||
ptr = fs->mounts;
|
||||
while (*ptr) {
|
||||
if (fs__valid_mount(*ptr, fs->magic) == 0) {
|
||||
fs->found = true;
|
||||
strcpy(fs->path, *ptr);
|
||||
fs->path = strdup(*ptr);
|
||||
if (!fs->path)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
ptr++;
|
||||
@@ -227,43 +232,26 @@ static bool fs__env_override(struct fs *fs)
|
||||
if (!override_path)
|
||||
return false;
|
||||
|
||||
fs->found = true;
|
||||
fs->checked = true;
|
||||
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
|
||||
fs->path[sizeof(fs->path) - 1] = '\0';
|
||||
fs->path = strdup(override_path);
|
||||
if (!fs->path)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char *fs__get_mountpoint(struct fs *fs)
|
||||
static void fs__init_once(struct fs *fs)
|
||||
{
|
||||
if (fs__env_override(fs))
|
||||
return fs->path;
|
||||
|
||||
if (fs__check_mounts(fs))
|
||||
return fs->path;
|
||||
|
||||
if (fs__read_mounts(fs))
|
||||
return fs->path;
|
||||
|
||||
return NULL;
|
||||
if (!fs__env_override(fs) &&
|
||||
!fs__check_mounts(fs) &&
|
||||
!fs__read_mounts(fs)) {
|
||||
assert(!fs->path);
|
||||
} else {
|
||||
assert(fs->path);
|
||||
}
|
||||
}
|
||||
|
||||
static const char *fs__mountpoint(int idx)
|
||||
static const char *fs__mountpoint(const struct fs *fs)
|
||||
{
|
||||
struct fs *fs = &fs__entries[idx];
|
||||
|
||||
if (fs->found)
|
||||
return (const char *)fs->path;
|
||||
|
||||
/* the mount point was already checked for the mount point
|
||||
* but and did not exist, so return NULL to avoid scanning again.
|
||||
* This makes the found and not found paths cost equivalent
|
||||
* in case of multiple calls.
|
||||
*/
|
||||
if (fs->checked)
|
||||
return NULL;
|
||||
|
||||
return fs__get_mountpoint(fs);
|
||||
return fs->path;
|
||||
}
|
||||
|
||||
static const char *mount_overload(struct fs *fs)
|
||||
@@ -278,45 +266,29 @@ static const char *mount_overload(struct fs *fs)
|
||||
return getenv(upper_name) ?: *fs->mounts;
|
||||
}
|
||||
|
||||
static const char *fs__mount(int idx)
|
||||
static const char *fs__mount(struct fs *fs)
|
||||
{
|
||||
struct fs *fs = &fs__entries[idx];
|
||||
const char *mountpoint;
|
||||
|
||||
if (fs__mountpoint(idx))
|
||||
return (const char *)fs->path;
|
||||
pthread_mutex_lock(&fs->mount_mutex);
|
||||
|
||||
/* Check if path found inside the mutex to avoid races with other callers of mount. */
|
||||
mountpoint = fs__mountpoint(fs);
|
||||
if (mountpoint)
|
||||
goto out;
|
||||
|
||||
mountpoint = mount_overload(fs);
|
||||
|
||||
if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
|
||||
return NULL;
|
||||
|
||||
return fs__check_mounts(fs) ? fs->path : NULL;
|
||||
if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 &&
|
||||
fs__valid_mount(mountpoint, fs->magic) == 0) {
|
||||
fs->path = strdup(mountpoint);
|
||||
mountpoint = fs->path;
|
||||
}
|
||||
out:
|
||||
pthread_mutex_unlock(&fs->mount_mutex);
|
||||
return mountpoint;
|
||||
}
|
||||
|
||||
#define FS(name, idx) \
|
||||
const char *name##__mountpoint(void) \
|
||||
{ \
|
||||
return fs__mountpoint(idx); \
|
||||
} \
|
||||
\
|
||||
const char *name##__mount(void) \
|
||||
{ \
|
||||
return fs__mount(idx); \
|
||||
} \
|
||||
\
|
||||
bool name##__configured(void) \
|
||||
{ \
|
||||
return name##__mountpoint() != NULL; \
|
||||
}
|
||||
|
||||
FS(sysfs, FS__SYSFS);
|
||||
FS(procfs, FS__PROCFS);
|
||||
FS(debugfs, FS__DEBUGFS);
|
||||
FS(tracefs, FS__TRACEFS);
|
||||
FS(hugetlbfs, FS__HUGETLBFS);
|
||||
FS(bpf_fs, FS__BPF_FS);
|
||||
|
||||
int filename__read_int(const char *filename, int *value)
|
||||
{
|
||||
char line[64];
|
||||
|
||||
@@ -13,17 +13,12 @@
|
||||
|
||||
#include "tracing_path.h"
|
||||
|
||||
static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
|
||||
static char tracing_path[PATH_MAX] = "/sys/kernel/tracing";
|
||||
static char tracing_events_path[PATH_MAX] = "/sys/kernel/tracing/events";
|
||||
|
||||
static void __tracing_path_set(const char *tracing, const char *mountpoint)
|
||||
{
|
||||
snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
|
||||
snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
|
||||
mountpoint, tracing);
|
||||
snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
|
||||
mountpoint, tracing, "events");
|
||||
}
|
||||
|
||||
static const char *tracing_path_tracefs_mount(void)
|
||||
@@ -149,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
|
||||
/* sdt markers */
|
||||
if (!strncmp(filename, "sdt_", 4)) {
|
||||
snprintf(buf, size,
|
||||
"Error:\tFile %s/%s not found.\n"
|
||||
"Error:\tFile %s/events/%s not found.\n"
|
||||
"Hint:\tSDT event cannot be directly recorded on.\n"
|
||||
"\tPlease first use 'perf probe %s:%s' before recording it.\n",
|
||||
tracing_events_path, filename, sys, name);
|
||||
tracing_path, filename, sys, name);
|
||||
} else {
|
||||
snprintf(buf, size,
|
||||
"Error:\tFile %s/%s not found.\n"
|
||||
"Error:\tFile %s/events/%s not found.\n"
|
||||
"Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
|
||||
tracing_events_path, filename);
|
||||
tracing_path, filename);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -169,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
|
||||
break;
|
||||
case EACCES: {
|
||||
snprintf(buf, size,
|
||||
"Error:\tNo permissions to read %s/%s\n"
|
||||
"Error:\tNo permissions to read %s/events/%s\n"
|
||||
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
|
||||
tracing_events_path, filename, tracing_path_mount());
|
||||
tracing_path, filename, tracing_path_mount());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#define __API_IO__
|
||||
|
||||
#include <errno.h>
|
||||
#include <poll.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
@@ -23,6 +24,8 @@ struct io {
|
||||
char *end;
|
||||
/* Currently accessed data pointer. */
|
||||
char *data;
|
||||
/* Read timeout, 0 implies no timeout. */
|
||||
int timeout_ms;
|
||||
/* Set true on when the end of file on read error. */
|
||||
bool eof;
|
||||
};
|
||||
@@ -35,6 +38,7 @@ static inline void io__init(struct io *io, int fd,
|
||||
io->buf = buf;
|
||||
io->end = buf;
|
||||
io->data = buf;
|
||||
io->timeout_ms = 0;
|
||||
io->eof = false;
|
||||
}
|
||||
|
||||
@@ -47,7 +51,29 @@ static inline int io__get_char(struct io *io)
|
||||
return -1;
|
||||
|
||||
if (ptr == io->end) {
|
||||
ssize_t n = read(io->fd, io->buf, io->buf_len);
|
||||
ssize_t n;
|
||||
|
||||
if (io->timeout_ms != 0) {
|
||||
struct pollfd pfds[] = {
|
||||
{
|
||||
.fd = io->fd,
|
||||
.events = POLLIN,
|
||||
},
|
||||
};
|
||||
|
||||
n = poll(pfds, 1, io->timeout_ms);
|
||||
if (n == 0)
|
||||
errno = ETIMEDOUT;
|
||||
if (n > 0 && !(pfds[0].revents & POLLIN)) {
|
||||
errno = EIO;
|
||||
n = -1;
|
||||
}
|
||||
if (n <= 0) {
|
||||
io->eof = true;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
n = read(io->fd, io->buf, io->buf_len);
|
||||
|
||||
if (n <= 0) {
|
||||
io->eof = true;
|
||||
|
||||
@@ -99,6 +99,11 @@ static int cmp_cpu(const void *a, const void *b)
|
||||
return cpu_a->cpu - cpu_b->cpu;
|
||||
}
|
||||
|
||||
static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
|
||||
{
|
||||
return RC_CHK_ACCESS(cpus)->map[idx];
|
||||
}
|
||||
|
||||
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
|
||||
{
|
||||
size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
|
||||
@@ -111,8 +116,12 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
|
||||
/* Remove dups */
|
||||
j = 0;
|
||||
for (i = 0; i < nr_cpus; i++) {
|
||||
if (i == 0 || RC_CHK_ACCESS(cpus)->map[i].cpu != RC_CHK_ACCESS(cpus)->map[i - 1].cpu)
|
||||
RC_CHK_ACCESS(cpus)->map[j++].cpu = RC_CHK_ACCESS(cpus)->map[i].cpu;
|
||||
if (i == 0 ||
|
||||
__perf_cpu_map__cpu(cpus, i).cpu !=
|
||||
__perf_cpu_map__cpu(cpus, i - 1).cpu) {
|
||||
RC_CHK_ACCESS(cpus)->map[j++].cpu =
|
||||
__perf_cpu_map__cpu(cpus, i).cpu;
|
||||
}
|
||||
}
|
||||
perf_cpu_map__set_nr(cpus, j);
|
||||
assert(j <= nr_cpus);
|
||||
@@ -269,26 +278,31 @@ out:
|
||||
return cpus;
|
||||
}
|
||||
|
||||
static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
|
||||
{
|
||||
return RC_CHK_ACCESS(cpus)->nr;
|
||||
}
|
||||
|
||||
struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
|
||||
{
|
||||
struct perf_cpu result = {
|
||||
.cpu = -1
|
||||
};
|
||||
|
||||
if (cpus && idx < RC_CHK_ACCESS(cpus)->nr)
|
||||
return RC_CHK_ACCESS(cpus)->map[idx];
|
||||
if (cpus && idx < __perf_cpu_map__nr(cpus))
|
||||
return __perf_cpu_map__cpu(cpus, idx);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
|
||||
{
|
||||
return cpus ? RC_CHK_ACCESS(cpus)->nr : 1;
|
||||
return cpus ? __perf_cpu_map__nr(cpus) : 1;
|
||||
}
|
||||
|
||||
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
|
||||
{
|
||||
return map ? RC_CHK_ACCESS(map)->map[0].cpu == -1 : true;
|
||||
return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
|
||||
}
|
||||
|
||||
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
|
||||
@@ -299,10 +313,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
|
||||
return -1;
|
||||
|
||||
low = 0;
|
||||
high = RC_CHK_ACCESS(cpus)->nr;
|
||||
high = __perf_cpu_map__nr(cpus);
|
||||
while (low < high) {
|
||||
int idx = (low + high) / 2;
|
||||
struct perf_cpu cpu_at_idx = RC_CHK_ACCESS(cpus)->map[idx];
|
||||
struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);
|
||||
|
||||
if (cpu_at_idx.cpu == cpu.cpu)
|
||||
return idx;
|
||||
@@ -321,6 +335,32 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
|
||||
return perf_cpu_map__idx(cpus, cpu) != -1;
|
||||
}
|
||||
|
||||
bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
|
||||
{
|
||||
int nr;
|
||||
|
||||
if (lhs == rhs)
|
||||
return true;
|
||||
|
||||
if (!lhs || !rhs)
|
||||
return false;
|
||||
|
||||
nr = __perf_cpu_map__nr(lhs);
|
||||
if (nr != __perf_cpu_map__nr(rhs))
|
||||
return false;
|
||||
|
||||
for (int idx = 0; idx < nr; idx++) {
|
||||
if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
|
||||
{
|
||||
return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
|
||||
}
|
||||
|
||||
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
|
||||
{
|
||||
struct perf_cpu result = {
|
||||
@@ -328,7 +368,9 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
|
||||
};
|
||||
|
||||
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
|
||||
return RC_CHK_ACCESS(map)->nr > 0 ? RC_CHK_ACCESS(map)->map[RC_CHK_ACCESS(map)->nr - 1] : result;
|
||||
return __perf_cpu_map__nr(map) > 0
|
||||
? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
|
||||
: result;
|
||||
}
|
||||
|
||||
/** Is 'b' a subset of 'a'. */
|
||||
@@ -336,15 +378,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
|
||||
{
|
||||
if (a == b || !b)
|
||||
return true;
|
||||
if (!a || RC_CHK_ACCESS(b)->nr > RC_CHK_ACCESS(a)->nr)
|
||||
if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
|
||||
return false;
|
||||
|
||||
for (int i = 0, j = 0; i < RC_CHK_ACCESS(a)->nr; i++) {
|
||||
if (RC_CHK_ACCESS(a)->map[i].cpu > RC_CHK_ACCESS(b)->map[j].cpu)
|
||||
for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
|
||||
if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
|
||||
return false;
|
||||
if (RC_CHK_ACCESS(a)->map[i].cpu == RC_CHK_ACCESS(b)->map[j].cpu) {
|
||||
if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
|
||||
j++;
|
||||
if (j == RC_CHK_ACCESS(b)->nr)
|
||||
if (j == __perf_cpu_map__nr(b))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -374,27 +416,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
|
||||
return perf_cpu_map__get(other);
|
||||
}
|
||||
|
||||
tmp_len = RC_CHK_ACCESS(orig)->nr + RC_CHK_ACCESS(other)->nr;
|
||||
tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
|
||||
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
|
||||
if (!tmp_cpus)
|
||||
return NULL;
|
||||
|
||||
/* Standard merge algorithm from wikipedia */
|
||||
i = j = k = 0;
|
||||
while (i < RC_CHK_ACCESS(orig)->nr && j < RC_CHK_ACCESS(other)->nr) {
|
||||
if (RC_CHK_ACCESS(orig)->map[i].cpu <= RC_CHK_ACCESS(other)->map[j].cpu) {
|
||||
if (RC_CHK_ACCESS(orig)->map[i].cpu == RC_CHK_ACCESS(other)->map[j].cpu)
|
||||
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
|
||||
if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
|
||||
if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
|
||||
j++;
|
||||
tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++];
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
|
||||
} else
|
||||
tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++];
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
|
||||
}
|
||||
|
||||
while (i < RC_CHK_ACCESS(orig)->nr)
|
||||
tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++];
|
||||
while (i < __perf_cpu_map__nr(orig))
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
|
||||
|
||||
while (j < RC_CHK_ACCESS(other)->nr)
|
||||
tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++];
|
||||
while (j < __perf_cpu_map__nr(other))
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
|
||||
assert(k <= tmp_len);
|
||||
|
||||
merged = cpu_map__trim_new(k, tmp_cpus);
|
||||
@@ -402,3 +444,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
|
||||
perf_cpu_map__put(orig);
|
||||
return merged;
|
||||
}
|
||||
|
||||
struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
|
||||
struct perf_cpu_map *other)
|
||||
{
|
||||
struct perf_cpu *tmp_cpus;
|
||||
int tmp_len;
|
||||
int i, j, k;
|
||||
struct perf_cpu_map *merged = NULL;
|
||||
|
||||
if (perf_cpu_map__is_subset(other, orig))
|
||||
return perf_cpu_map__get(orig);
|
||||
if (perf_cpu_map__is_subset(orig, other))
|
||||
return perf_cpu_map__get(other);
|
||||
|
||||
tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
|
||||
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
|
||||
if (!tmp_cpus)
|
||||
return NULL;
|
||||
|
||||
i = j = k = 0;
|
||||
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
|
||||
if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
|
||||
i++;
|
||||
else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
|
||||
j++;
|
||||
else {
|
||||
j++;
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
|
||||
}
|
||||
}
|
||||
if (k)
|
||||
merged = cpu_map__trim_new(k, tmp_cpus);
|
||||
free(tmp_cpus);
|
||||
return merged;
|
||||
}
|
||||
|
||||
@@ -36,18 +36,33 @@ void perf_evlist__init(struct perf_evlist *evlist)
|
||||
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
|
||||
struct perf_evsel *evsel)
|
||||
{
|
||||
/*
|
||||
* We already have cpus for evsel (via PMU sysfs) so
|
||||
* keep it, if there's no target cpu list defined.
|
||||
*/
|
||||
if (evsel->system_wide) {
|
||||
/* System wide: set the cpu map of the evsel to all online CPUs. */
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__new(NULL);
|
||||
} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
|
||||
/*
|
||||
* User requested CPUs on a core PMU, ensure the requested CPUs
|
||||
* are valid by intersecting with those of the PMU.
|
||||
*/
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);
|
||||
} else if (!evsel->own_cpus || evlist->has_user_cpus ||
|
||||
(!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) {
|
||||
(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
|
||||
/*
|
||||
* The PMU didn't specify a default cpu map, this isn't a core
|
||||
* event and the user requested CPUs or the evlist user
|
||||
* requested CPUs have the "any CPU" (aka dummy) CPU value. In
|
||||
* which case use the user requested CPUs rather than the PMU
|
||||
* ones.
|
||||
*/
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
|
||||
} else if (evsel->cpus != evsel->own_cpus) {
|
||||
/*
|
||||
* No user requested cpu map but the PMU cpu map doesn't match
|
||||
* the evsel's. Reset it back to the PMU cpu map.
|
||||
*/
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
|
||||
}
|
||||
|
||||
@@ -41,7 +41,14 @@ struct perf_sample_id {
|
||||
struct perf_evsel {
|
||||
struct list_head node;
|
||||
struct perf_event_attr attr;
|
||||
/** The commonly used cpu map of CPUs the event should be opened upon, etc. */
|
||||
struct perf_cpu_map *cpus;
|
||||
/**
|
||||
* The cpu map read from the PMU. For core PMUs this is the list of all
|
||||
* CPUs the event can be opened upon. For other PMUs this is the default
|
||||
* cpu map for opening the event on, for example, the first CPU on a
|
||||
* socket for an uncore event.
|
||||
*/
|
||||
struct perf_cpu_map *own_cpus;
|
||||
struct perf_thread_map *threads;
|
||||
struct xyarray *fd;
|
||||
@@ -55,9 +62,9 @@ struct perf_evsel {
|
||||
int nr_members;
|
||||
/*
|
||||
* system_wide is for events that need to be on every CPU, irrespective
|
||||
* of user requested CPUs or threads. Map propagation will set cpus to
|
||||
* this event's own_cpus, whereby they will contribute to evlist
|
||||
* all_cpus.
|
||||
* of user requested CPUs or threads. Tha main example of this is the
|
||||
* dummy event. Map propagation will set cpus for this event to all CPUs
|
||||
* as software PMU events like dummy, have a CPU map that is empty.
|
||||
*/
|
||||
bool system_wide;
|
||||
/*
|
||||
@@ -65,6 +72,8 @@ struct perf_evsel {
|
||||
* i.e. it cannot be the 'any CPU' value of -1.
|
||||
*/
|
||||
bool requires_cpu;
|
||||
/** Is the PMU for the event a core one? Effects the handling of own_cpus. */
|
||||
bool is_pmu_core;
|
||||
int idx;
|
||||
};
|
||||
|
||||
|
||||
@@ -11,8 +11,16 @@ struct perf_cpu {
|
||||
int cpu;
|
||||
};
|
||||
|
||||
struct perf_cache {
|
||||
int cache_lvl;
|
||||
int cache;
|
||||
};
|
||||
|
||||
struct perf_cpu_map;
|
||||
|
||||
/**
|
||||
* perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
|
||||
*/
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
|
||||
@@ -20,12 +28,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
|
||||
struct perf_cpu_map *other);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
|
||||
struct perf_cpu_map *other);
|
||||
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
|
||||
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
|
||||
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
|
||||
/**
|
||||
* perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value.
|
||||
*/
|
||||
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
|
||||
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
|
||||
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
|
||||
LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
|
||||
const struct perf_cpu_map *rhs);
|
||||
/**
|
||||
* perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value?
|
||||
*/
|
||||
LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
|
||||
|
||||
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
|
||||
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
|
||||
|
||||
@@ -380,7 +380,8 @@ enum {
|
||||
PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
|
||||
PERF_STAT_CONFIG_TERM__INTERVAL = 1,
|
||||
PERF_STAT_CONFIG_TERM__SCALE = 2,
|
||||
PERF_STAT_CONFIG_TERM__MAX = 3,
|
||||
PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3,
|
||||
PERF_STAT_CONFIG_TERM__MAX = 4,
|
||||
};
|
||||
|
||||
struct perf_record_stat_config_entry {
|
||||
|
||||
@@ -36,38 +36,40 @@ static int is_absolute_path(const char *path)
|
||||
return path[0] == '/';
|
||||
}
|
||||
|
||||
static const char *get_pwd_cwd(void)
|
||||
static const char *get_pwd_cwd(char *buf, size_t sz)
|
||||
{
|
||||
static char cwd[PATH_MAX + 1];
|
||||
char *pwd;
|
||||
struct stat cwd_stat, pwd_stat;
|
||||
if (getcwd(cwd, PATH_MAX) == NULL)
|
||||
if (getcwd(buf, sz) == NULL)
|
||||
return NULL;
|
||||
pwd = getenv("PWD");
|
||||
if (pwd && strcmp(pwd, cwd)) {
|
||||
stat(cwd, &cwd_stat);
|
||||
if (pwd && strcmp(pwd, buf)) {
|
||||
stat(buf, &cwd_stat);
|
||||
if (!stat(pwd, &pwd_stat) &&
|
||||
pwd_stat.st_dev == cwd_stat.st_dev &&
|
||||
pwd_stat.st_ino == cwd_stat.st_ino) {
|
||||
strlcpy(cwd, pwd, PATH_MAX);
|
||||
strlcpy(buf, pwd, sz);
|
||||
}
|
||||
}
|
||||
return cwd;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static const char *make_nonrelative_path(const char *path)
|
||||
static const char *make_nonrelative_path(char *buf, size_t sz, const char *path)
|
||||
{
|
||||
static char buf[PATH_MAX + 1];
|
||||
|
||||
if (is_absolute_path(path)) {
|
||||
if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
|
||||
if (strlcpy(buf, path, sz) >= sz)
|
||||
die("Too long path: %.*s", 60, path);
|
||||
} else {
|
||||
const char *cwd = get_pwd_cwd();
|
||||
const char *cwd = get_pwd_cwd(buf, sz);
|
||||
|
||||
if (!cwd)
|
||||
die("Cannot determine the current working directory");
|
||||
if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
|
||||
|
||||
if (strlen(cwd) + strlen(path) + 2 >= sz)
|
||||
die("Too long path: %.*s", 60, path);
|
||||
|
||||
strcat(buf, "/");
|
||||
strcat(buf, path);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
@@ -133,8 +135,11 @@ static void add_path(char **out, const char *path)
|
||||
if (path && *path) {
|
||||
if (is_absolute_path(path))
|
||||
astrcat(out, path);
|
||||
else
|
||||
astrcat(out, make_nonrelative_path(path));
|
||||
else {
|
||||
char buf[PATH_MAX];
|
||||
|
||||
astrcat(out, make_nonrelative_path(buf, sizeof(buf), path));
|
||||
}
|
||||
|
||||
astrcat(out, ":");
|
||||
}
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
|
||||
{
|
||||
struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
|
||||
if (!ent)
|
||||
return;
|
||||
|
||||
ent->len = len;
|
||||
memcpy(ent->name, name, len);
|
||||
@@ -66,6 +68,7 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
|
||||
while (ci < cmds->cnt && ei < excludes->cnt) {
|
||||
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
|
||||
if (cmp < 0) {
|
||||
zfree(&cmds->names[cj]);
|
||||
cmds->names[cj++] = cmds->names[ci++];
|
||||
} else if (cmp == 0) {
|
||||
ci++;
|
||||
@@ -75,9 +78,12 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
|
||||
}
|
||||
}
|
||||
|
||||
while (ci < cmds->cnt)
|
||||
while (ci < cmds->cnt) {
|
||||
zfree(&cmds->names[cj]);
|
||||
cmds->names[cj++] = cmds->names[ci++];
|
||||
|
||||
}
|
||||
for (ci = cj; ci < cmds->cnt; ci++)
|
||||
zfree(&cmds->names[ci]);
|
||||
cmds->cnt = cj;
|
||||
}
|
||||
|
||||
|
||||
@@ -250,11 +250,20 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
|
||||
mv $@+ $@
|
||||
|
||||
# Generate date from either KBUILD_BUILD_TIMESTAMP or git log of
|
||||
# the doc input file
|
||||
PERF_DATE = $(strip \
|
||||
$(if $(KBUILD_BUILD_TIMESTAMP), \
|
||||
$(shell date -u -d '$(KBUILD_BUILD_TIMESTAMP)' +%Y-%m-%d), \
|
||||
$(shell git log -1 --pretty="format:%cd" \
|
||||
--date=short --no-show-signature $<)))
|
||||
|
||||
ifdef USE_ASCIIDOCTOR
|
||||
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
|
||||
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
|
||||
$(ASCIIDOC) -b manpage -d manpage \
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
|
||||
-adocdate=$(PERF_DATE) -o $@+ $< && \
|
||||
mv $@+ $@
|
||||
endif
|
||||
|
||||
@@ -266,9 +275,7 @@ $(OUTPUT)%.xml : %.txt
|
||||
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
|
||||
$(ASCIIDOC) -b docbook -d manpage \
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
|
||||
-aperf_date=$(shell git log -1 --pretty="format:%cd" \
|
||||
--date=short --no-show-signature $<) \
|
||||
-o $@+ $< && \
|
||||
-aperf_date=$(PERF_DATE) -o $@+ $< && \
|
||||
mv $@+ $@
|
||||
|
||||
XSLT = docbook.xsl
|
||||
|
||||
@@ -130,7 +130,7 @@ OPTIONS
|
||||
-F::
|
||||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
|
||||
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
|
||||
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth,
|
||||
phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat,
|
||||
|
||||
@@ -308,6 +308,14 @@ use --per-die in addition to -a. (system-wide). The output includes the
|
||||
die number and the number of online processors on that die. This is
|
||||
useful to gauge the amount of aggregation.
|
||||
|
||||
--per-cache::
|
||||
Aggregate counts per cache instance for system-wide mode measurements. By
|
||||
default, the aggregation happens for the cache level at the highest index
|
||||
in the system. To specify a particular level, mention the cache level
|
||||
alongside the option in the format [Ll][1-9][0-9]*. For example:
|
||||
Using option "--per-cache=l3" or "--per-cache=L3" will aggregate the
|
||||
information at the boundary of the level 3 cache in the system.
|
||||
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements. This
|
||||
is a useful mode to detect imbalance between physical cores. To enable this mode,
|
||||
@@ -353,6 +361,15 @@ small group that need not have multiplexing is lowered. This option
|
||||
forbids the event merging logic from sharing events between groups and
|
||||
may be used to increase accuracy in this case.
|
||||
|
||||
--metric-no-threshold::
|
||||
Metric thresholds may increase the number of events necessary to
|
||||
compute whether a metric has exceeded its threshold expression. This
|
||||
may not be desirable, for example, as the events can introduce
|
||||
multiplexing. This option disables the adding of threshold expression
|
||||
events for a metric. However, if there are sufficient events to
|
||||
compute the threshold then the threshold is still computed and used to
|
||||
color the metric's computed value.
|
||||
|
||||
--quiet::
|
||||
Don't print output, warnings or messages. This is useful with perf stat
|
||||
record below to only write data to the perf.data file.
|
||||
@@ -379,6 +396,14 @@ Aggregate counts per processor socket for system-wide mode measurements.
|
||||
--per-die::
|
||||
Aggregate counts per processor die for system-wide mode measurements.
|
||||
|
||||
--per-cache::
|
||||
Aggregate counts per cache instance for system-wide mode measurements. By
|
||||
default, the aggregation happens for the cache level at the highest index
|
||||
in the system. To specify a particular level, mention the cache level
|
||||
alongside the option in the format [Ll][1-9][0-9]*. For example: Using
|
||||
option "--per-cache=l3" or "--per-cache=L3" will aggregate the
|
||||
information at the boundary of the level 3 cache in the system.
|
||||
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements.
|
||||
|
||||
@@ -389,6 +414,12 @@ For a group all metrics from the group are added.
|
||||
The events from the metrics are automatically measured.
|
||||
See perf list output for the possible metrics and metricgroups.
|
||||
|
||||
When threshold information is available for a metric, the
|
||||
color red is used to signify a metric has exceeded a threshold
|
||||
while green shows it hasn't. The default color means that
|
||||
no threshold information was available or the threshold
|
||||
couldn't be computed.
|
||||
|
||||
-A::
|
||||
--no-aggr::
|
||||
Do not aggregate counts across all monitored CPUs.
|
||||
|
||||
@@ -1075,6 +1075,11 @@ ifndef NO_AUXTRACE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef EXTRA_TESTS
|
||||
$(call detected,CONFIG_EXTRA_TESTS)
|
||||
CFLAGS += -DHAVE_EXTRA_TESTS
|
||||
endif
|
||||
|
||||
ifndef NO_JVMTI
|
||||
ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
|
||||
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
|
||||
|
||||
@@ -128,6 +128,10 @@ include ../scripts/utilities.mak
|
||||
#
|
||||
# Define BUILD_NONDISTRO to enable building an linking against libbfd and
|
||||
# libiberty distribution license incompatible libraries.
|
||||
#
|
||||
# Define EXTRA_TESTS to enable building extra tests useful mainly to perf
|
||||
# developers, such as:
|
||||
# x86 instruction decoder - new instructions test
|
||||
|
||||
# As per kernel Makefile, avoid funny character set dependencies
|
||||
unexport LC_ALL
|
||||
|
||||
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
|
||||
|
||||
sp = (unsigned long) regs[PERF_REG_ARM_SP];
|
||||
|
||||
map = maps__find(thread->maps, (u64)sp);
|
||||
map = maps__find(thread__maps(thread), (u64)sp);
|
||||
if (!map) {
|
||||
pr_debug("failed to get stack map\n");
|
||||
free(buf);
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "../../../util/debug.h"
|
||||
#include "../../../util/evlist.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/pmus.h"
|
||||
#include "cs-etm.h"
|
||||
#include "arm-spe.h"
|
||||
#include "hisi-ptt.h"
|
||||
@@ -40,7 +41,7 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
|
||||
arm_spe_pmus[*nr_spes] = perf_pmus__find(arm_spe_pmu_name);
|
||||
if (arm_spe_pmus[*nr_spes]) {
|
||||
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
|
||||
__func__, __LINE__, *nr_spes,
|
||||
@@ -87,7 +88,7 @@ static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err)
|
||||
rewinddir(dir);
|
||||
while ((dent = readdir(dir))) {
|
||||
if (strstr(dent->d_name, HISI_PTT_PMU_NAME) && idx < *nr_ptts) {
|
||||
hisi_ptt_pmus[idx] = perf_pmu__find(dent->d_name);
|
||||
hisi_ptt_pmus[idx] = perf_pmus__find(dent->d_name);
|
||||
if (hisi_ptt_pmus[idx])
|
||||
idx++;
|
||||
}
|
||||
@@ -131,7 +132,7 @@ struct auxtrace_record
|
||||
if (!evlist)
|
||||
return NULL;
|
||||
|
||||
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
|
||||
cs_etm_pmu = perf_pmus__find(CORESIGHT_ETM_PMU_NAME);
|
||||
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
|
||||
hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err);
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include "../../../util/evsel.h"
|
||||
#include "../../../util/perf_api_probe.h"
|
||||
#include "../../../util/evsel_config.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/pmus.h"
|
||||
#include "../../../util/cs-etm.h"
|
||||
#include <internal/lib.h> // page_size
|
||||
#include "../../../util/session.h"
|
||||
@@ -881,7 +881,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
|
||||
struct perf_pmu *cs_etm_pmu;
|
||||
struct cs_etm_recording *ptr;
|
||||
|
||||
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
|
||||
cs_etm_pmu = perf_pmus__find(CORESIGHT_ETM_PMU_NAME);
|
||||
|
||||
if (!cs_etm_pmu) {
|
||||
*err = -EINVAL;
|
||||
|
||||
@@ -19,27 +19,28 @@ if ! test -r $input; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
create_table_from_c()
|
||||
create_sc_table()
|
||||
{
|
||||
local sc nr last_sc
|
||||
local sc nr max_nr
|
||||
|
||||
while read sc nr; do
|
||||
printf "%s\n" " [$nr] = \"$sc\","
|
||||
last_sc=$sc
|
||||
max_nr=$nr
|
||||
done
|
||||
|
||||
printf "%s\n" "#define SYSCALLTBL_ARM64_MAX_ID __NR_$last_sc"
|
||||
echo "#define SYSCALLTBL_ARM64_MAX_ID $max_nr"
|
||||
}
|
||||
|
||||
create_table()
|
||||
{
|
||||
echo "#include \"$input\""
|
||||
echo "static const char *syscalltbl_arm64[] = {"
|
||||
create_table_from_c
|
||||
echo "static const char *const syscalltbl_arm64[] = {"
|
||||
create_sc_table
|
||||
echo "};"
|
||||
}
|
||||
|
||||
$gcc -E -dM -x c -I $incpath/include/uapi $input \
|
||||
|sed -ne 's/^#define __NR_//p' \
|
||||
|sort -t' ' -k2 -n \
|
||||
|awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
|
||||
sub("^#define __NR(3264)?_", "");
|
||||
print | "sort -k2 -n"}' \
|
||||
|create_table
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user