mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'perf-tools-for-v5.16-2021-11-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo:
"Hardware tracing:
- ARM:
* Print the size of the buffer size consistently in hexadecimal in
ARM Coresight.
* Add Coresight snapshot mode support.
* Update --switch-events docs in 'perf record'.
* Support hardware-based PID tracing.
* Track task context switch for cpu-mode events.
- Vendor events:
* Add metric events JSON file for power10 platform
perf test:
- Get 'perf test' unit tests closer to kunit.
- Topology tests improvements.
- Remove bashisms from some tests.
perf bench:
- Fix memory leak of perf_cpu_map__new() in the futex benchmarks.
libbpf:
- Add some more weak libbpf functions o allow building with the
libbpf versions, old ones, present in distros.
libbeauty:
- Translate [gs]setsockopt 'level' argument integer values to
strings.
tools headers UAPI:
- Sync futex_waitv, arch prctl, sound, i195_drm and msr-index files
with the kernel sources.
Documentation:
- Add documentation to 'struct symbol'.
- Synchronize the definition of enum perf_hw_id with code in
tools/perf/design.txt"
* tag 'perf-tools-for-v5.16-2021-11-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (67 commits)
perf tests: Remove bash constructs from stat_all_pmu.sh
perf tests: Remove bash construct from record+zstd_comp_decomp.sh
perf test: Remove bash construct from stat_bpf_counters.sh test
perf bench futex: Fix memory leak of perf_cpu_map__new()
tools arch x86: Sync the msr-index.h copy with the kernel sources
tools headers UAPI: Sync drm/i915_drm.h with the kernel sources
tools headers UAPI: Sync sound/asound.h with the kernel sources
tools headers UAPI: Sync linux/prctl.h with the kernel sources
tools headers UAPI: Sync arch prctl headers with the kernel sources
perf tools: Add more weak libbpf functions
perf bpf: Avoid memory leak from perf_env__insert_btf()
perf symbols: Factor out annotation init/exit
perf symbols: Bit pack to save a byte
perf symbols: Add documentation to 'struct symbol'
tools headers UAPI: Sync files changed by new futex_waitv syscall
perf test bpf: Use ARRAY_CHECK() instead of ad-hoc equivalent, addressing array_size.cocci warning
perf arm-spe: Support hardware-based PID tracing
perf arm-spe: Save context ID in record
perf arm-spe: Update --switch-events docs in 'perf record'
perf arm-spe: Track task context switch for cpu-mode events
...
This commit is contained in:
@@ -625,6 +625,8 @@
|
||||
|
||||
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
|
||||
|
||||
#define MSR_IA32_XFD 0x000001c4
|
||||
#define MSR_IA32_XFD_ERR 0x000001c5
|
||||
#define MSR_IA32_XSS 0x00000da0
|
||||
|
||||
#define MSR_IA32_APICBASE 0x0000001b
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
#define ARCH_GET_CPUID 0x1011
|
||||
#define ARCH_SET_CPUID 0x1012
|
||||
|
||||
#define ARCH_GET_XCOMP_SUPP 0x1021
|
||||
#define ARCH_GET_XCOMP_PERM 0x1022
|
||||
#define ARCH_REQ_XCOMP_PERM 0x1023
|
||||
|
||||
#define ARCH_MAP_VDSO_X32 0x2001
|
||||
#define ARCH_MAP_VDSO_32 0x2002
|
||||
#define ARCH_MAP_VDSO_64 0x2003
|
||||
|
||||
@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
|
||||
#define __NR_process_mrelease 448
|
||||
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
|
||||
|
||||
#define __NR_futex_waitv 449
|
||||
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 449
|
||||
#define __NR_syscalls 450
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
||||
@@ -1522,6 +1522,12 @@ struct drm_i915_gem_caching {
|
||||
#define I915_TILING_NONE 0
|
||||
#define I915_TILING_X 1
|
||||
#define I915_TILING_Y 2
|
||||
/*
|
||||
* Do not add new tiling types here. The I915_TILING_* values are for
|
||||
* de-tiling fence registers that no longer exist on modern platforms. Although
|
||||
* the hardware may support new types of tiling in general (e.g., Tile4), we
|
||||
* do not need to add them to the uapi that is specific to now-defunct ioctls.
|
||||
*/
|
||||
#define I915_TILING_LAST I915_TILING_Y
|
||||
|
||||
#define I915_BIT_6_SWIZZLE_NONE 0
|
||||
@@ -1824,6 +1830,7 @@ struct drm_i915_gem_context_param {
|
||||
* Extensions:
|
||||
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
|
||||
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
|
||||
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_ENGINES 0xa
|
||||
|
||||
@@ -1846,6 +1853,55 @@ struct drm_i915_gem_context_param {
|
||||
* attempted to use it, never re-use this context param number.
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_RINGSIZE 0xc
|
||||
|
||||
/*
|
||||
* I915_CONTEXT_PARAM_PROTECTED_CONTENT:
|
||||
*
|
||||
* Mark that the context makes use of protected content, which will result
|
||||
* in the context being invalidated when the protected content session is.
|
||||
* Given that the protected content session is killed on suspend, the device
|
||||
* is kept awake for the lifetime of a protected context, so the user should
|
||||
* make sure to dispose of them once done.
|
||||
* This flag can only be set at context creation time and, when set to true,
|
||||
* must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE
|
||||
* to false. This flag can't be set to true in conjunction with setting the
|
||||
* I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example:
|
||||
*
|
||||
* .. code-block:: C
|
||||
*
|
||||
* struct drm_i915_gem_context_create_ext_setparam p_protected = {
|
||||
* .base = {
|
||||
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
|
||||
* },
|
||||
* .param = {
|
||||
* .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT,
|
||||
* .value = 1,
|
||||
* }
|
||||
* };
|
||||
* struct drm_i915_gem_context_create_ext_setparam p_norecover = {
|
||||
* .base = {
|
||||
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
|
||||
* .next_extension = to_user_pointer(&p_protected),
|
||||
* },
|
||||
* .param = {
|
||||
* .param = I915_CONTEXT_PARAM_RECOVERABLE,
|
||||
* .value = 0,
|
||||
* }
|
||||
* };
|
||||
* struct drm_i915_gem_context_create_ext create = {
|
||||
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
|
||||
* .extensions = to_user_pointer(&p_norecover);
|
||||
* };
|
||||
*
|
||||
* ctx_id = gem_context_create_ext(drm_fd, &create);
|
||||
*
|
||||
* In addition to the normal failure cases, setting this flag during context
|
||||
* creation can result in the following errors:
|
||||
*
|
||||
* -ENODEV: feature not available
|
||||
* -EPERM: trying to mark a recoverable or not bannable context as protected
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
|
||||
/* Must be kept compact -- no holes and well documented */
|
||||
|
||||
__u64 value;
|
||||
@@ -2049,6 +2105,135 @@ struct i915_context_engines_bond {
|
||||
struct i915_engine_class_instance engines[N__]; \
|
||||
} __attribute__((packed)) name__
|
||||
|
||||
/**
|
||||
* struct i915_context_engines_parallel_submit - Configure engine for
|
||||
* parallel submission.
|
||||
*
|
||||
* Setup a slot in the context engine map to allow multiple BBs to be submitted
|
||||
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
|
||||
* in parallel. Multiple hardware contexts are created internally in the i915 to
|
||||
* run these BBs. Once a slot is configured for N BBs only N BBs can be
|
||||
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
|
||||
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
|
||||
* many BBs there are based on the slot's configuration. The N BBs are the last
|
||||
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
|
||||
*
|
||||
* The default placement behavior is to create implicit bonds between each
|
||||
* context if each context maps to more than 1 physical engine (e.g. context is
|
||||
* a virtual engine). Also we only allow contexts of same engine class and these
|
||||
* contexts must be in logically contiguous order. Examples of the placement
|
||||
* behavior are described below. Lastly, the default is to not allow BBs to be
|
||||
* preempted mid-batch. Rather insert coordinated preemption points on all
|
||||
* hardware contexts between each set of BBs. Flags could be added in the future
|
||||
* to change both of these default behaviors.
|
||||
*
|
||||
* Returns -EINVAL if hardware context placement configuration is invalid or if
|
||||
* the placement configuration isn't supported on the platform / submission
|
||||
* interface.
|
||||
* Returns -ENODEV if extension isn't supported on the platform / submission
|
||||
* interface.
|
||||
*
|
||||
* .. code-block:: none
|
||||
*
|
||||
* Examples syntax:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
*
|
||||
* Example 1 pseudo code:
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=1,
|
||||
* engines=CS[0],CS[1])
|
||||
*
|
||||
* Results in the following valid placement:
|
||||
* CS[0], CS[1]
|
||||
*
|
||||
* Example 2 pseudo code:
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[2],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[2], CS[3]
|
||||
*
|
||||
* This can be thought of as two virtual engines, each containing two
|
||||
* engines thereby making a 2D array. However, there are bonds tying the
|
||||
* entries together and placing restrictions on how they can be scheduled.
|
||||
* Specifically, the scheduler can choose only vertical columns from the 2D
|
||||
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
|
||||
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
|
||||
* versa. Same for CS[2] requires also using CS[3].
|
||||
* VE[0] = CS[0], CS[2]
|
||||
* VE[1] = CS[1], CS[3]
|
||||
*
|
||||
* Example 3 pseudo code:
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[1],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid and invalid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
|
||||
*/
|
||||
struct i915_context_engines_parallel_submit {
|
||||
/**
|
||||
* @base: base user extension.
|
||||
*/
|
||||
struct i915_user_extension base;
|
||||
|
||||
/**
|
||||
* @engine_index: slot for parallel engine
|
||||
*/
|
||||
__u16 engine_index;
|
||||
|
||||
/**
|
||||
* @width: number of contexts per parallel engine or in other words the
|
||||
* number of batches in each submission
|
||||
*/
|
||||
__u16 width;
|
||||
|
||||
/**
|
||||
* @num_siblings: number of siblings per context or in other words the
|
||||
* number of possible placements for each submission
|
||||
*/
|
||||
__u16 num_siblings;
|
||||
|
||||
/**
|
||||
* @mbz16: reserved for future use; must be zero
|
||||
*/
|
||||
__u16 mbz16;
|
||||
|
||||
/**
|
||||
* @flags: all undefined flags must be zero, currently not defined flags
|
||||
*/
|
||||
__u64 flags;
|
||||
|
||||
/**
|
||||
* @mbz64: reserved for future use; must be zero
|
||||
*/
|
||||
__u64 mbz64[3];
|
||||
|
||||
/**
|
||||
* @engines: 2-d array of engine instances to configure parallel engine
|
||||
*
|
||||
* length = width (i) * num_siblings (j)
|
||||
* index = j + i * num_siblings
|
||||
*/
|
||||
struct i915_engine_class_instance engines[0];
|
||||
|
||||
} __packed;
|
||||
|
||||
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
|
||||
struct i915_user_extension base; \
|
||||
__u16 engine_index; \
|
||||
__u16 width; \
|
||||
__u16 num_siblings; \
|
||||
__u16 mbz16; \
|
||||
__u64 flags; \
|
||||
__u64 mbz64[3]; \
|
||||
struct i915_engine_class_instance engines[N__]; \
|
||||
} __attribute__((packed)) name__
|
||||
|
||||
/**
|
||||
* DOC: Context Engine Map uAPI
|
||||
*
|
||||
@@ -2108,6 +2293,7 @@ struct i915_context_param_engines {
|
||||
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
|
||||
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
|
||||
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
|
||||
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
|
||||
struct i915_engine_class_instance engines[0];
|
||||
} __attribute__((packed));
|
||||
|
||||
@@ -2726,14 +2912,20 @@ struct drm_i915_engine_info {
|
||||
|
||||
/** @flags: Engine flags. */
|
||||
__u64 flags;
|
||||
#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
|
||||
|
||||
/** @capabilities: Capabilities of this engine. */
|
||||
__u64 capabilities;
|
||||
#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
|
||||
#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
|
||||
|
||||
/** @logical_instance: Logical instance of engine */
|
||||
__u16 logical_instance;
|
||||
|
||||
/** @rsvd1: Reserved fields. */
|
||||
__u64 rsvd1[4];
|
||||
__u16 rsvd1[3];
|
||||
/** @rsvd2: Reserved fields. */
|
||||
__u64 rsvd2[3];
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -2979,8 +3171,12 @@ struct drm_i915_gem_create_ext {
|
||||
*
|
||||
* For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
|
||||
* struct drm_i915_gem_create_ext_memory_regions.
|
||||
*
|
||||
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
|
||||
* struct drm_i915_gem_create_ext_protected_content.
|
||||
*/
|
||||
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
|
||||
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
|
||||
__u64 extensions;
|
||||
};
|
||||
|
||||
@@ -3038,6 +3234,50 @@ struct drm_i915_gem_create_ext_memory_regions {
|
||||
__u64 regions;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_i915_gem_create_ext_protected_content - The
|
||||
* I915_OBJECT_PARAM_PROTECTED_CONTENT extension.
|
||||
*
|
||||
* If this extension is provided, buffer contents are expected to be protected
|
||||
* by PXP encryption and require decryption for scan out and processing. This
|
||||
* is only possible on platforms that have PXP enabled, on all other scenarios
|
||||
* using this extension will cause the ioctl to fail and return -ENODEV. The
|
||||
* flags parameter is reserved for future expansion and must currently be set
|
||||
* to zero.
|
||||
*
|
||||
* The buffer contents are considered invalid after a PXP session teardown.
|
||||
*
|
||||
* The encryption is guaranteed to be processed correctly only if the object
|
||||
* is submitted with a context created using the
|
||||
* I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks
|
||||
* at submission time on the validity of the objects involved.
|
||||
*
|
||||
* Below is an example on how to create a protected object:
|
||||
*
|
||||
* .. code-block:: C
|
||||
*
|
||||
* struct drm_i915_gem_create_ext_protected_content protected_ext = {
|
||||
* .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
|
||||
* .flags = 0,
|
||||
* };
|
||||
* struct drm_i915_gem_create_ext create_ext = {
|
||||
* .size = PAGE_SIZE,
|
||||
* .extensions = (uintptr_t)&protected_ext,
|
||||
* };
|
||||
*
|
||||
* int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
|
||||
* if (err) ...
|
||||
*/
|
||||
struct drm_i915_gem_create_ext_protected_content {
|
||||
/** @base: Extension link. See struct i915_user_extension. */
|
||||
struct i915_user_extension base;
|
||||
/** @flags: reserved for future usage, currently MBZ */
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
/* ID of the protected content session managed by i915 when PXP is active */
|
||||
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -268,5 +268,8 @@ struct prctl_mm_map {
|
||||
# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
|
||||
# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
|
||||
# define PR_SCHED_CORE_MAX 4
|
||||
# define PR_SCHED_CORE_SCOPE_THREAD 0
|
||||
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
|
||||
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
||||
@@ -1002,7 +1002,7 @@ typedef int __bitwise snd_ctl_elem_iface_t;
|
||||
#define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1)
|
||||
#define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
|
||||
#define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */
|
||||
// (1 << 3) is unused.
|
||||
/* (1 << 3) is unused. */
|
||||
#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */
|
||||
#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */
|
||||
#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
|
||||
|
||||
@@ -469,7 +469,7 @@ This option sets the time out limit. The default value is 500 ms.
|
||||
|
||||
--switch-events::
|
||||
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
|
||||
PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
|
||||
PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE)
|
||||
switch events will be enabled automatically, which can be suppressed by
|
||||
by the option --no-switch-events.
|
||||
|
||||
|
||||
@@ -516,17 +516,17 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
|
||||
$(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
|
||||
$(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
|
||||
|
||||
socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c
|
||||
socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
|
||||
|
||||
$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
|
||||
$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
|
||||
|
||||
socket_arrays := $(beauty_outdir)/socket_arrays.c
|
||||
socket_arrays := $(beauty_outdir)/socket.c
|
||||
socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
|
||||
|
||||
$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
|
||||
$(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
|
||||
$(socket_arrays): $(linux_uapi_dir)/in.h $(beauty_linux_dir)/socket.h $(socket_tbl)
|
||||
$(Q)$(SHELL) '$(socket_tbl)' $(linux_uapi_dir) $(beauty_linux_dir) > $@
|
||||
|
||||
sockaddr_arrays := $(beauty_outdir)/sockaddr.c
|
||||
sockaddr_tbl := $(srctree)/tools/perf/trace/beauty/sockaddr.sh
|
||||
|
||||
$(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl)
|
||||
$(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@
|
||||
|
||||
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
|
||||
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
|
||||
@@ -736,8 +736,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
|
||||
$(sndrv_ctl_ioctl_array) \
|
||||
$(kcmp_type_array) \
|
||||
$(kvm_ioctl_array) \
|
||||
$(socket_ipproto_array) \
|
||||
$(socket_arrays) \
|
||||
$(sockaddr_arrays) \
|
||||
$(vhost_virtio_ioctl_array) \
|
||||
$(madvise_behavior_array) \
|
||||
$(mmap_flags_array) \
|
||||
@@ -1113,8 +1113,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
|
||||
$(OUTPUT)$(sndrv_pcm_ioctl_array) \
|
||||
$(OUTPUT)$(kvm_ioctl_array) \
|
||||
$(OUTPUT)$(kcmp_type_array) \
|
||||
$(OUTPUT)$(socket_ipproto_array) \
|
||||
$(OUTPUT)$(socket_arrays) \
|
||||
$(OUTPUT)$(sockaddr_arrays) \
|
||||
$(OUTPUT)$(vhost_virtio_ioctl_array) \
|
||||
$(OUTPUT)$(perf_ioctl_array) \
|
||||
$(OUTPUT)$(prctl_option_array) \
|
||||
|
||||
@@ -2,6 +2,6 @@
|
||||
#ifndef ARCH_TESTS_H
|
||||
#define ARCH_TESTS_H
|
||||
|
||||
extern struct test arch_tests[];
|
||||
extern struct test_suite *arch_tests[];
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,18 +3,10 @@
|
||||
#include "tests/tests.h"
|
||||
#include "arch-tests.h"
|
||||
|
||||
struct test arch_tests[] = {
|
||||
struct test_suite *arch_tests[] = {
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
{
|
||||
.desc = "DWARF unwind",
|
||||
.func = test__dwarf_unwind,
|
||||
},
|
||||
&suite__dwarf_unwind,
|
||||
#endif
|
||||
{
|
||||
.desc = "Vectors page",
|
||||
.func = test__vectors_page,
|
||||
},
|
||||
{
|
||||
.func = NULL,
|
||||
},
|
||||
&suite__vectors_page,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
|
||||
#define VECTORS__MAP_NAME "[vectors]"
|
||||
|
||||
int test__vectors_page(struct test *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
static int test__vectors_page(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
@@ -22,3 +21,5 @@ int test__vectors_page(struct test *test __maybe_unused,
|
||||
|
||||
return TEST_OK;
|
||||
}
|
||||
|
||||
DEFINE_SUITE("Vectors page", vectors_page);
|
||||
|
||||
@@ -2,6 +2,6 @@
|
||||
#ifndef ARCH_TESTS_H
|
||||
#define ARCH_TESTS_H
|
||||
|
||||
extern struct test arch_tests[];
|
||||
extern struct test_suite *arch_tests[];
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,14 +3,9 @@
|
||||
#include "tests/tests.h"
|
||||
#include "arch-tests.h"
|
||||
|
||||
struct test arch_tests[] = {
|
||||
struct test_suite *arch_tests[] = {
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
{
|
||||
.desc = "DWARF unwind",
|
||||
.func = test__dwarf_unwind,
|
||||
},
|
||||
&suite__dwarf_unwind,
|
||||
#endif
|
||||
{
|
||||
.func = NULL,
|
||||
},
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "../../../util/auxtrace.h"
|
||||
#include "../../../util/record.h"
|
||||
#include "../../../util/arm-spe.h"
|
||||
#include <tools/libc_compat.h> // reallocarray
|
||||
|
||||
#define KiB(x) ((x) * 1024)
|
||||
#define MiB(x) ((x) * 1024 * 1024)
|
||||
@@ -31,6 +32,8 @@ struct arm_spe_recording {
|
||||
struct auxtrace_record itr;
|
||||
struct perf_pmu *arm_spe_pmu;
|
||||
struct evlist *evlist;
|
||||
int wrapped_cnt;
|
||||
bool *wrapped;
|
||||
};
|
||||
|
||||
static void arm_spe_set_timestamp(struct auxtrace_record *itr,
|
||||
@@ -84,6 +87,55 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
|
||||
bool privileged)
|
||||
{
|
||||
/*
|
||||
* The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
|
||||
* snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
|
||||
* unprivileged users.
|
||||
*
|
||||
* The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
|
||||
* unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
|
||||
* will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
|
||||
* user is likely to get an error as they exceed their mlock limmit.
|
||||
*/
|
||||
|
||||
/*
|
||||
* No size were given to '-S' or '-m,', so go with the default
|
||||
*/
|
||||
if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
|
||||
if (privileged) {
|
||||
opts->auxtrace_mmap_pages = MiB(4) / page_size;
|
||||
} else {
|
||||
opts->auxtrace_mmap_pages = KiB(128) / page_size;
|
||||
if (opts->mmap_pages == UINT_MAX)
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
} else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) {
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
|
||||
* auxtrace mmap area.
|
||||
*/
|
||||
if (!opts->auxtrace_snapshot_size)
|
||||
opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size;
|
||||
|
||||
/*
|
||||
* '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
|
||||
* enough to fit the requested snapshot size.
|
||||
*/
|
||||
if (!opts->auxtrace_mmap_pages) {
|
||||
size_t sz = opts->auxtrace_snapshot_size;
|
||||
|
||||
sz = round_up(sz, page_size) / page_size;
|
||||
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
|
||||
}
|
||||
}
|
||||
|
||||
static int arm_spe_recording_options(struct auxtrace_record *itr,
|
||||
struct evlist *evlist,
|
||||
struct record_opts *opts)
|
||||
@@ -115,6 +167,36 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
|
||||
if (!opts->full_auxtrace)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* we are in snapshot mode.
|
||||
*/
|
||||
if (opts->auxtrace_snapshot_mode) {
|
||||
/*
|
||||
* Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
|
||||
* default values.
|
||||
*/
|
||||
if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages)
|
||||
arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged);
|
||||
|
||||
/*
|
||||
* Snapshot size can't be bigger than the auxtrace area.
|
||||
*/
|
||||
if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) {
|
||||
pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
|
||||
opts->auxtrace_snapshot_size,
|
||||
opts->auxtrace_mmap_pages * (size_t)page_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Something went wrong somewhere - this shouldn't happen.
|
||||
*/
|
||||
if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
|
||||
pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/* We are in full trace mode but '-m,xyz' wasn't specified */
|
||||
if (!opts->auxtrace_mmap_pages) {
|
||||
if (privileged) {
|
||||
@@ -138,6 +220,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->auxtrace_snapshot_mode)
|
||||
pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
|
||||
opts->auxtrace_snapshot_size);
|
||||
|
||||
/*
|
||||
* To obtain the auxtrace buffer file descriptor, the auxtrace event
|
||||
@@ -166,8 +251,199 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
|
||||
tracking_evsel->core.attr.sample_period = 1;
|
||||
|
||||
/* In per-cpu case, always need the time of mmap events etc */
|
||||
if (!perf_cpu_map__empty(cpus))
|
||||
if (!perf_cpu_map__empty(cpus)) {
|
||||
evsel__set_sample_bit(tracking_evsel, TIME);
|
||||
evsel__set_sample_bit(tracking_evsel, CPU);
|
||||
|
||||
/* also track task context switch */
|
||||
if (!record_opts__no_switch_events(opts))
|
||||
tracking_evsel->core.attr.context_switch = 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
|
||||
struct record_opts *opts,
|
||||
const char *str)
|
||||
{
|
||||
unsigned long long snapshot_size = 0;
|
||||
char *endptr;
|
||||
|
||||
if (str) {
|
||||
snapshot_size = strtoull(str, &endptr, 0);
|
||||
if (*endptr || snapshot_size > SIZE_MAX)
|
||||
return -1;
|
||||
}
|
||||
|
||||
opts->auxtrace_snapshot_mode = true;
|
||||
opts->auxtrace_snapshot_size = snapshot_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int arm_spe_snapshot_start(struct auxtrace_record *itr)
|
||||
{
|
||||
struct arm_spe_recording *ptr =
|
||||
container_of(itr, struct arm_spe_recording, itr);
|
||||
struct evsel *evsel;
|
||||
|
||||
evlist__for_each_entry(ptr->evlist, evsel) {
|
||||
if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
|
||||
return evsel__disable(evsel);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
|
||||
{
|
||||
struct arm_spe_recording *ptr =
|
||||
container_of(itr, struct arm_spe_recording, itr);
|
||||
struct evsel *evsel;
|
||||
|
||||
evlist__for_each_entry(ptr->evlist, evsel) {
|
||||
if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
|
||||
return evsel__enable(evsel);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
|
||||
{
|
||||
bool *wrapped;
|
||||
int cnt = ptr->wrapped_cnt, new_cnt, i;
|
||||
|
||||
/*
|
||||
* No need to allocate, so return early.
|
||||
*/
|
||||
if (idx < cnt)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Make ptr->wrapped as big as idx.
|
||||
*/
|
||||
new_cnt = idx + 1;
|
||||
|
||||
/*
|
||||
* Free'ed in arm_spe_recording_free().
|
||||
*/
|
||||
wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
|
||||
if (!wrapped)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* init new allocated values.
|
||||
*/
|
||||
for (i = cnt; i < new_cnt; i++)
|
||||
wrapped[i] = false;
|
||||
|
||||
ptr->wrapped_cnt = new_cnt;
|
||||
ptr->wrapped = wrapped;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
|
||||
size_t buffer_size, u64 head)
|
||||
{
|
||||
u64 i, watermark;
|
||||
u64 *buf = (u64 *)buffer;
|
||||
size_t buf_size = buffer_size;
|
||||
|
||||
/*
|
||||
* Defensively handle the case where head might be continually increasing - if its value is
|
||||
* equal or greater than the size of the ring buffer, then we can safely determine it has
|
||||
* wrapped around. Otherwise, continue to detect if head might have wrapped.
|
||||
*/
|
||||
if (head >= buffer_size)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
|
||||
*/
|
||||
watermark = buf_size - 512;
|
||||
|
||||
/*
|
||||
* The value of head is somewhere within the size of the ring buffer. This can be that there
|
||||
* hasn't been enough data to fill the ring buffer yet or the trace time was so long that
|
||||
* head has numerically wrapped around. To find we need to check if we have data at the
|
||||
* very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed
|
||||
* out and there is a fresh mapping with every new session.
|
||||
*/
|
||||
|
||||
/*
|
||||
* head is less than 512 byte from the end of the ring buffer.
|
||||
*/
|
||||
if (head > watermark)
|
||||
watermark = head;
|
||||
|
||||
/*
|
||||
* Speed things up by using 64 bit transactions (see "u64 *buf" above)
|
||||
*/
|
||||
watermark /= sizeof(u64);
|
||||
buf_size /= sizeof(u64);
|
||||
|
||||
/*
|
||||
* If we find trace data at the end of the ring buffer, head has been there and has
|
||||
* numerically wrapped around at least once.
|
||||
*/
|
||||
for (i = watermark; i < buf_size; i++)
|
||||
if (buf[i])
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
|
||||
struct auxtrace_mmap *mm, unsigned char *data,
|
||||
u64 *head, u64 *old)
|
||||
{
|
||||
int err;
|
||||
bool wrapped;
|
||||
struct arm_spe_recording *ptr =
|
||||
container_of(itr, struct arm_spe_recording, itr);
|
||||
|
||||
/*
|
||||
* Allocate memory to keep track of wrapping if this is the first
|
||||
* time we deal with this *mm.
|
||||
*/
|
||||
if (idx >= ptr->wrapped_cnt) {
|
||||
err = arm_spe_alloc_wrapped_array(ptr, idx);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if *head has wrapped around. If it hasn't only the
|
||||
* amount of data between *head and *old is snapshot'ed to avoid
|
||||
* bloating the perf.data file with zeros. But as soon as *head has
|
||||
* wrapped around the entire size of the AUX ring buffer it taken.
|
||||
*/
|
||||
wrapped = ptr->wrapped[idx];
|
||||
if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
|
||||
wrapped = true;
|
||||
ptr->wrapped[idx] = true;
|
||||
}
|
||||
|
||||
pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
|
||||
__func__, idx, (size_t)*old, (size_t)*head, mm->len);
|
||||
|
||||
/*
|
||||
* No wrap has occurred, we can just use *head and *old.
|
||||
*/
|
||||
if (!wrapped)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* *head has wrapped around - adjust *head and *old to pickup the
|
||||
* entire content of the AUX buffer.
|
||||
*/
|
||||
if (*head >= mm->len) {
|
||||
*old = *head - mm->len;
|
||||
} else {
|
||||
*head += mm->len;
|
||||
*old = *head - mm->len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -186,6 +462,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
|
||||
struct arm_spe_recording *sper =
|
||||
container_of(itr, struct arm_spe_recording, itr);
|
||||
|
||||
free(sper->wrapped);
|
||||
free(sper);
|
||||
}
|
||||
|
||||
@@ -207,6 +484,10 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
|
||||
|
||||
sper->arm_spe_pmu = arm_spe_pmu;
|
||||
sper->itr.pmu = arm_spe_pmu;
|
||||
sper->itr.snapshot_start = arm_spe_snapshot_start;
|
||||
sper->itr.snapshot_finish = arm_spe_snapshot_finish;
|
||||
sper->itr.find_snapshot = arm_spe_find_snapshot;
|
||||
sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
|
||||
sper->itr.recording_options = arm_spe_recording_options;
|
||||
sper->itr.info_priv_size = arm_spe_info_priv_size;
|
||||
sper->itr.info_fill = arm_spe_info_fill;
|
||||
|
||||
@@ -2,6 +2,6 @@
|
||||
#ifndef ARCH_TESTS_H
|
||||
#define ARCH_TESTS_H
|
||||
|
||||
extern struct test arch_tests[];
|
||||
extern struct test_suite *arch_tests[];
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,14 +3,10 @@
|
||||
#include "tests/tests.h"
|
||||
#include "arch-tests.h"
|
||||
|
||||
struct test arch_tests[] = {
|
||||
|
||||
struct test_suite *arch_tests[] = {
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
{
|
||||
.desc = "Test dwarf unwind",
|
||||
.func = test__dwarf_unwind,
|
||||
},
|
||||
&suite__dwarf_unwind,
|
||||
#endif
|
||||
{
|
||||
.func = NULL,
|
||||
},
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -370,6 +370,7 @@
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
447 common memfd_secret sys_memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
#ifndef ARCH_TESTS_H
|
||||
#define ARCH_TESTS_H
|
||||
|
||||
struct test;
|
||||
struct test_suite;
|
||||
|
||||
/* Tests */
|
||||
int test__rdpmc(struct test *test, int subtest);
|
||||
int test__insn_x86(struct test *test, int subtest);
|
||||
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
|
||||
int test__bp_modify(struct test *test, int subtest);
|
||||
int test__x86_sample_parsing(struct test *test, int subtest);
|
||||
int test__rdpmc(struct test_suite *test, int subtest);
|
||||
int test__insn_x86(struct test_suite *test, int subtest);
|
||||
int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
|
||||
int test__bp_modify(struct test_suite *test, int subtest);
|
||||
int test__x86_sample_parsing(struct test_suite *test, int subtest);
|
||||
|
||||
extern struct test arch_tests[];
|
||||
extern struct test_suite *arch_tests[];
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,39 +3,28 @@
|
||||
#include "tests/tests.h"
|
||||
#include "arch-tests.h"
|
||||
|
||||
struct test arch_tests[] = {
|
||||
{
|
||||
.desc = "x86 rdpmc",
|
||||
.func = test__rdpmc,
|
||||
},
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
{
|
||||
.desc = "DWARF unwind",
|
||||
.func = test__dwarf_unwind,
|
||||
},
|
||||
#endif
|
||||
DEFINE_SUITE("x86 rdpmc", rdpmc);
|
||||
#ifdef HAVE_AUXTRACE_SUPPORT
|
||||
{
|
||||
.desc = "x86 instruction decoder - new instructions",
|
||||
.func = test__insn_x86,
|
||||
},
|
||||
{
|
||||
.desc = "Intel PT packet decoder",
|
||||
.func = test__intel_pt_pkt_decoder,
|
||||
},
|
||||
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
|
||||
DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
|
||||
#endif
|
||||
#if defined(__x86_64__)
|
||||
{
|
||||
.desc = "x86 bp modify",
|
||||
.func = test__bp_modify,
|
||||
},
|
||||
DEFINE_SUITE("x86 bp modify", bp_modify);
|
||||
#endif
|
||||
{
|
||||
.desc = "x86 Sample parsing",
|
||||
.func = test__x86_sample_parsing,
|
||||
},
|
||||
{
|
||||
.func = NULL,
|
||||
},
|
||||
DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
|
||||
|
||||
struct test_suite *arch_tests[] = {
|
||||
&suite__rdpmc,
|
||||
#ifdef HAVE_DWARF_UNWIND_SUPPORT
|
||||
&suite__dwarf_unwind,
|
||||
#endif
|
||||
#ifdef HAVE_AUXTRACE_SUPPORT
|
||||
&suite__insn_x86,
|
||||
&suite__intel_pt_pkt_decoder,
|
||||
#endif
|
||||
#if defined(__x86_64__)
|
||||
&suite__bp_modify,
|
||||
#endif
|
||||
&suite__x86_sample_parsing,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -204,7 +204,7 @@ out:
|
||||
return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
|
||||
}
|
||||
|
||||
int test__bp_modify(struct test *test __maybe_unused,
|
||||
int test__bp_modify(struct test_suite *test __maybe_unused,
|
||||
int subtest __maybe_unused)
|
||||
{
|
||||
TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user