mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel improvements:
- watchdog driver improvements by Li Zefan
- Power7 CPI stack events related improvements by Sukadev Bhattiprolu
- event multiplexing via hrtimers and other improvements by Stephane
Eranian
- kernel stack use optimization by Andrew Hunter
- AMD IOMMU uncore PMU support by Suravee Suthikulpanit
- NMI handling rate-limits by Dave Hansen
- various hw_breakpoint fixes by Oleg Nesterov
- hw_breakpoint overflow period sampling and related signal handling
fixes by Jiri Olsa
- Intel Haswell PMU support by Andi Kleen
Tooling improvements:
- Reset SIGTERM handler in workload child process, fix from David
Ahern.
- Makefile reorganization, prep work for Kconfig patches, from Jiri
Olsa.
- Add automated make test suite, from Jiri Olsa.
- Add --percent-limit option to 'top' and 'report', from Namhyung
Kim.
- Sorting improvements, from Namhyung Kim.
- Expand definition of sysfs format attribute, from Michael Ellerman.
Tooling fixes:
- 'perf tests' fixes from Jiri Olsa.
- Make Power7 CPI stack events available in sysfs, from Sukadev
Bhattiprolu.
- Handle death by SIGTERM in 'perf record', fix from David Ahern.
- Fix printing of perf_event_paranoid message, from David Ahern.
- Handle realloc failures in 'perf kvm', from David Ahern.
- Fix divide by 0 in variance, from David Ahern.
- Save parent pid in thread struct, from David Ahern.
- Handle JITed code in shared memory, from Andi Kleen.
- Fixes for 'perf diff', from Jiri Olsa.
- Remove some unused struct members, from Jiri Olsa.
- Add missing liblk.a dependency for python/perf.so, fix from Jiri
Olsa.
- Respect CROSS_COMPILE in liblk.a, from Rabin Vincent.
- No need to do locking when adding hists in perf report, only 'top'
needs that, from Namhyung Kim.
- Fix alignment of symbol column in in the hists browser (top,
report) when -v is given, from NAmhyung Kim.
- Fix 'perf top' -E option behavior, from Namhyung Kim.
- Fix bug in isupper() and islower(), from Sukadev Bhattiprolu.
- Fix compile errors in bp_signal 'perf test', from Sukadev
Bhattiprolu.
... and more things"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits)
perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable()
perf/x86: Fix shared register mutual exclusion enforcement
perf/x86/intel: Support full width counting
x86: Add NMI duration tracepoints
perf: Drop sample rate when sampling is too slow
x86: Warn when NMI handlers take large amounts of time
hw_breakpoint: Introduce "struct bp_cpuinfo"
hw_breakpoint: Simplify *register_wide_hw_breakpoint()
hw_breakpoint: Introduce cpumask_of_bp()
hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths
hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths
perf/x86/intel: Add mem-loads/stores support for Haswell
perf/x86/intel: Support Haswell/v4 LBR format
perf/x86/intel: Move NMI clearing to end of PMI handler
perf/x86/intel: Add Haswell PEBS support
perf/x86/intel: Add simple Haswell PMU support
perf/x86/intel: Add Haswell PEBS record support
perf/x86/intel: Fix sparse warning
perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation
perf/x86/amd: Add IOMMU Performance Counter resource management
...
This commit is contained in:
@@ -27,14 +27,36 @@ Description: Generic performance monitoring events
|
||||
"basename".
|
||||
|
||||
|
||||
What: /sys/devices/cpu/events/PM_LD_MISS_L1
|
||||
/sys/devices/cpu/events/PM_LD_REF_L1
|
||||
/sys/devices/cpu/events/PM_CYC
|
||||
What: /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL
|
||||
/sys/devices/cpu/events/PM_BRU_FIN
|
||||
/sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
|
||||
/sys/devices/cpu/events/PM_BRU_MPRED
|
||||
/sys/devices/cpu/events/PM_INST_CMPL
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_BRU
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_DFU
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_DIV
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_ERAT_MISS
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_FXU
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_IFU
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_LSU
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_REJECT
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR_LONG
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_STORE
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_THRD
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR
|
||||
/sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR_LONG
|
||||
/sys/devices/cpu/events/PM_CYC
|
||||
/sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED
|
||||
/sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED_IC_MISS
|
||||
/sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
|
||||
/sys/devices/cpu/events/PM_GCT_NOSLOT_IC_MISS
|
||||
/sys/devices/cpu/events/PM_GRP_CMPL
|
||||
/sys/devices/cpu/events/PM_INST_CMPL
|
||||
/sys/devices/cpu/events/PM_LD_MISS_L1
|
||||
/sys/devices/cpu/events/PM_LD_REF_L1
|
||||
/sys/devices/cpu/events/PM_RUN_CYC
|
||||
/sys/devices/cpu/events/PM_RUN_INST_CMPL
|
||||
|
||||
Date: 2013/01/08
|
||||
|
||||
|
||||
@@ -9,6 +9,12 @@ Description:
|
||||
we want to export, so that userspace can deal with sane
|
||||
name/value pairs.
|
||||
|
||||
Userspace must be prepared for the possibility that attributes
|
||||
define overlapping bit ranges. For example:
|
||||
attr1 = 'config:0-23'
|
||||
attr2 = 'config:0-7'
|
||||
attr3 = 'config:12-35'
|
||||
|
||||
Example: 'config1:1,6-10,44'
|
||||
Defines contents of attribute that occupies bits 1,6-10,44 of
|
||||
perf_event_attr::config1.
|
||||
|
||||
@@ -70,12 +70,12 @@ show up in /proc/sys/kernel:
|
||||
- shmall
|
||||
- shmmax [ sysv ipc ]
|
||||
- shmmni
|
||||
- softlockup_thresh
|
||||
- stop-a [ SPARC only ]
|
||||
- sysrq ==> Documentation/sysrq.txt
|
||||
- tainted
|
||||
- threads-max
|
||||
- unknown_nmi_panic
|
||||
- watchdog_thresh
|
||||
- version
|
||||
|
||||
==============================================================
|
||||
@@ -427,6 +427,32 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
|
||||
|
||||
==============================================================
|
||||
|
||||
perf_cpu_time_max_percent:
|
||||
|
||||
Hints to the kernel how much CPU time it should be allowed to
|
||||
use to handle perf sampling events. If the perf subsystem
|
||||
is informed that its samples are exceeding this limit, it
|
||||
will drop its sampling frequency to attempt to reduce its CPU
|
||||
usage.
|
||||
|
||||
Some perf sampling happens in NMIs. If these samples
|
||||
unexpectedly take too long to execute, the NMIs can become
|
||||
stacked up next to each other so much that nothing else is
|
||||
allowed to execute.
|
||||
|
||||
0: disable the mechanism. Do not monitor or correct perf's
|
||||
sampling rate no matter how CPU time it takes.
|
||||
|
||||
1-100: attempt to throttle perf's sample rate to this
|
||||
percentage of CPU. Note: the kernel calculates an
|
||||
"expected" length of each sample event. 100 here means
|
||||
100% of that expected length. Even if this is set to
|
||||
100, you may still see sample throttling if this
|
||||
length is exceeded. Set to 0 if you truly do not care
|
||||
how much CPU is consumed.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
||||
pid_max:
|
||||
|
||||
@@ -604,15 +630,6 @@ without users and with a dead originative process will be destroyed.
|
||||
|
||||
==============================================================
|
||||
|
||||
softlockup_thresh:
|
||||
|
||||
This value can be used to lower the softlockup tolerance threshold. The
|
||||
default threshold is 60 seconds. If a cpu is locked up for 60 seconds,
|
||||
the kernel complains. Valid values are 1-60 seconds. Setting this
|
||||
tunable to zero will disable the softlockup detection altogether.
|
||||
|
||||
==============================================================
|
||||
|
||||
tainted:
|
||||
|
||||
Non-zero if the kernel has been tainted. Numeric values, which
|
||||
@@ -648,3 +665,16 @@ that time, kernel debugging information is displayed on console.
|
||||
|
||||
NMI switch that most IA32 servers have fires unknown NMI up, for
|
||||
example. If a system hangs up, try pressing the NMI switch.
|
||||
|
||||
==============================================================
|
||||
|
||||
watchdog_thresh:
|
||||
|
||||
This value can be used to control the frequency of hrtimer and NMI
|
||||
events and the soft and hard lockup thresholds. The default threshold
|
||||
is 10 seconds.
|
||||
|
||||
The softlockup threshold is (2 * watchdog_thresh). Setting this
|
||||
tunable to zero will disable lockup detection altogether.
|
||||
|
||||
==============================================================
|
||||
|
||||
43
Documentation/trace/events-nmi.txt
Normal file
43
Documentation/trace/events-nmi.txt
Normal file
@@ -0,0 +1,43 @@
|
||||
NMI Trace Events
|
||||
|
||||
These events normally show up here:
|
||||
|
||||
/sys/kernel/debug/tracing/events/nmi
|
||||
|
||||
--
|
||||
|
||||
nmi_handler:
|
||||
|
||||
You might want to use this tracepoint if you suspect that your
|
||||
NMI handlers are hogging large amounts of CPU time. The kernel
|
||||
will warn if it sees long-running handlers:
|
||||
|
||||
INFO: NMI handler took too long to run: 9.207 msecs
|
||||
|
||||
and this tracepoint will allow you to drill down and get some
|
||||
more details.
|
||||
|
||||
Let's say you suspect that perf_event_nmi_handler() is causing
|
||||
you some problems and you only want to trace that handler
|
||||
specifically. You need to find its address:
|
||||
|
||||
$ grep perf_event_nmi_handler /proc/kallsyms
|
||||
ffffffff81625600 t perf_event_nmi_handler
|
||||
|
||||
Let's also say you are only interested in when that function is
|
||||
really hogging a lot of CPU time, like a millisecond at a time.
|
||||
Note that the kernel's output is in milliseconds, but the input
|
||||
to the filter is in nanoseconds! You can filter on 'delta_ns':
|
||||
|
||||
cd /sys/kernel/debug/tracing/events/nmi/nmi_handler
|
||||
echo 'handler==0xffffffff81625600 && delta_ns>1000000' > filter
|
||||
echo 1 > enable
|
||||
|
||||
Your output would then look like:
|
||||
|
||||
$ cat /sys/kernel/debug/tracing/trace_pipe
|
||||
<idle>-0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1
|
||||
<idle>-0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1
|
||||
<idle>-0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1
|
||||
<idle>-0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1
|
||||
|
||||
@@ -882,7 +882,7 @@ static int __init init_hw_perf_events(void)
|
||||
}
|
||||
|
||||
register_cpu_notifier(&metag_pmu_notifier);
|
||||
ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
|
||||
ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -62,6 +62,29 @@
|
||||
#define PME_PM_BRU_FIN 0x10068
|
||||
#define PME_PM_BRU_MPRED 0x400f6
|
||||
|
||||
#define PME_PM_CMPLU_STALL_FXU 0x20014
|
||||
#define PME_PM_CMPLU_STALL_DIV 0x40014
|
||||
#define PME_PM_CMPLU_STALL_SCALAR 0x40012
|
||||
#define PME_PM_CMPLU_STALL_SCALAR_LONG 0x20018
|
||||
#define PME_PM_CMPLU_STALL_VECTOR 0x2001c
|
||||
#define PME_PM_CMPLU_STALL_VECTOR_LONG 0x4004a
|
||||
#define PME_PM_CMPLU_STALL_LSU 0x20012
|
||||
#define PME_PM_CMPLU_STALL_REJECT 0x40016
|
||||
#define PME_PM_CMPLU_STALL_ERAT_MISS 0x40018
|
||||
#define PME_PM_CMPLU_STALL_DCACHE_MISS 0x20016
|
||||
#define PME_PM_CMPLU_STALL_STORE 0x2004a
|
||||
#define PME_PM_CMPLU_STALL_THRD 0x1001c
|
||||
#define PME_PM_CMPLU_STALL_IFU 0x4004c
|
||||
#define PME_PM_CMPLU_STALL_BRU 0x4004e
|
||||
#define PME_PM_GCT_NOSLOT_IC_MISS 0x2001a
|
||||
#define PME_PM_GCT_NOSLOT_BR_MPRED 0x4001a
|
||||
#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS 0x4001c
|
||||
#define PME_PM_GRP_CMPL 0x30004
|
||||
#define PME_PM_1PLUS_PPC_CMPL 0x100f2
|
||||
#define PME_PM_CMPLU_STALL_DFU 0x2003c
|
||||
#define PME_PM_RUN_CYC 0x200f4
|
||||
#define PME_PM_RUN_INST_CMPL 0x400fa
|
||||
|
||||
/*
|
||||
* Layout of constraint bits:
|
||||
* 6666555555555544444444443333333333222222222211111111110000000000
|
||||
@@ -393,6 +416,31 @@ POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1);
|
||||
POWER_EVENT_ATTR(BRU_FIN, BRU_FIN)
|
||||
POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED);
|
||||
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_SCALAR, CMPLU_STALL_SCALAR);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG, CMPLU_STALL_SCALAR_LONG);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_VECTOR, CMPLU_STALL_VECTOR);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG, CMPLU_STALL_VECTOR_LONG);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_LSU, CMPLU_STALL_LSU);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_REJECT, CMPLU_STALL_REJECT);
|
||||
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS, CMPLU_STALL_ERAT_MISS);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS, CMPLU_STALL_DCACHE_MISS);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_STORE, CMPLU_STALL_STORE);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_THRD, CMPLU_STALL_THRD);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_IFU, CMPLU_STALL_IFU);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_BRU, CMPLU_STALL_BRU);
|
||||
POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS, GCT_NOSLOT_IC_MISS);
|
||||
|
||||
POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED, GCT_NOSLOT_BR_MPRED);
|
||||
POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS, GCT_NOSLOT_BR_MPRED_IC_MISS);
|
||||
POWER_EVENT_ATTR(GRP_CMPL, GRP_CMPL);
|
||||
POWER_EVENT_ATTR(1PLUS_PPC_CMPL, 1PLUS_PPC_CMPL);
|
||||
POWER_EVENT_ATTR(CMPLU_STALL_DFU, CMPLU_STALL_DFU);
|
||||
POWER_EVENT_ATTR(RUN_CYC, RUN_CYC);
|
||||
POWER_EVENT_ATTR(RUN_INST_CMPL, RUN_INST_CMPL);
|
||||
|
||||
static struct attribute *power7_events_attr[] = {
|
||||
GENERIC_EVENT_PTR(CYC),
|
||||
GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
|
||||
@@ -411,6 +459,31 @@ static struct attribute *power7_events_attr[] = {
|
||||
POWER_EVENT_PTR(LD_MISS_L1),
|
||||
POWER_EVENT_PTR(BRU_FIN),
|
||||
POWER_EVENT_PTR(BRU_MPRED),
|
||||
|
||||
POWER_EVENT_PTR(CMPLU_STALL_FXU),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_DIV),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_SCALAR),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_VECTOR),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_LSU),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_REJECT),
|
||||
|
||||
POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_STORE),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_THRD),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_IFU),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_BRU),
|
||||
POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS),
|
||||
POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED),
|
||||
|
||||
POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS),
|
||||
POWER_EVENT_PTR(GRP_CMPL),
|
||||
POWER_EVENT_PTR(1PLUS_PPC_CMPL),
|
||||
POWER_EVENT_PTR(CMPLU_STALL_DFU),
|
||||
POWER_EVENT_PTR(RUN_CYC),
|
||||
POWER_EVENT_PTR(RUN_INST_CMPL),
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
@@ -34,8 +34,6 @@
|
||||
#include <asm/sys_ia32.h>
|
||||
#include <asm/smap.h>
|
||||
|
||||
#define FIX_EFLAGS __FIX_EFLAGS
|
||||
|
||||
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
@@ -29,6 +29,9 @@
|
||||
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
|
||||
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
|
||||
|
||||
#define HSW_IN_TX (1ULL << 32)
|
||||
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
|
||||
|
||||
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
|
||||
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
|
||||
#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
|
||||
#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
|
||||
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
|
||||
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
|
||||
X86_EFLAGS_CF)
|
||||
X86_EFLAGS_CF | X86_EFLAGS_RF)
|
||||
|
||||
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
|
||||
|
||||
|
||||
@@ -170,6 +170,9 @@
|
||||
#define MSR_KNC_EVNTSEL0 0x00000028
|
||||
#define MSR_KNC_EVNTSEL1 0x00000029
|
||||
|
||||
/* Alternative perfctr range with full access. */
|
||||
#define MSR_IA32_PMC0 0x000004c1
|
||||
|
||||
/* AMD64 MSRs. Not complete. See the architecture manual for a more
|
||||
complete list. */
|
||||
|
||||
|
||||
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
|
||||
ifdef CONFIG_PERF_EVENTS
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
|
||||
ifdef CONFIG_AMD_IOMMU
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
|
||||
endif
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
|
||||
endif
|
||||
|
||||
|
||||
obj-$(CONFIG_X86_MCE) += mcheck/
|
||||
obj-$(CONFIG_MTRR) += mtrr/
|
||||
|
||||
|
||||
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
|
||||
* check that PEBS LBR correction does not conflict with
|
||||
* whatever the user is asking with attr->branch_sample_type
|
||||
*/
|
||||
if (event->attr.precise_ip > 1) {
|
||||
if (event->attr.precise_ip > 1 &&
|
||||
x86_pmu.intel_cap.pebs_format < 2) {
|
||||
u64 *br_type = &event->attr.branch_sample_type;
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
@@ -568,7 +569,7 @@ struct sched_state {
|
||||
struct perf_sched {
|
||||
int max_weight;
|
||||
int max_events;
|
||||
struct event_constraint **constraints;
|
||||
struct perf_event **events;
|
||||
struct sched_state state;
|
||||
int saved_states;
|
||||
struct sched_state saved[SCHED_STATES_MAX];
|
||||
@@ -577,7 +578,7 @@ struct perf_sched {
|
||||
/*
|
||||
* Initialize interator that runs through all events and counters.
|
||||
*/
|
||||
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
|
||||
static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
|
||||
int num, int wmin, int wmax)
|
||||
{
|
||||
int idx;
|
||||
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
|
||||
memset(sched, 0, sizeof(*sched));
|
||||
sched->max_events = num;
|
||||
sched->max_weight = wmax;
|
||||
sched->constraints = c;
|
||||
sched->events = events;
|
||||
|
||||
for (idx = 0; idx < num; idx++) {
|
||||
if (c[idx]->weight == wmin)
|
||||
if (events[idx]->hw.constraint->weight == wmin)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
|
||||
if (sched->state.event >= sched->max_events)
|
||||
return false;
|
||||
|
||||
c = sched->constraints[sched->state.event];
|
||||
|
||||
c = sched->events[sched->state.event]->hw.constraint;
|
||||
/* Prefer fixed purpose counters */
|
||||
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
|
||||
idx = INTEL_PMC_IDX_FIXED;
|
||||
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
|
||||
if (sched->state.weight > sched->max_weight)
|
||||
return false;
|
||||
}
|
||||
c = sched->constraints[sched->state.event];
|
||||
c = sched->events[sched->state.event]->hw.constraint;
|
||||
} while (c->weight != sched->state.weight);
|
||||
|
||||
sched->state.counter = 0; /* start with first counter */
|
||||
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
|
||||
/*
|
||||
* Assign a counter for each event.
|
||||
*/
|
||||
int perf_assign_events(struct event_constraint **constraints, int n,
|
||||
int perf_assign_events(struct perf_event **events, int n,
|
||||
int wmin, int wmax, int *assign)
|
||||
{
|
||||
struct perf_sched sched;
|
||||
|
||||
perf_sched_init(&sched, constraints, n, wmin, wmax);
|
||||
perf_sched_init(&sched, events, n, wmin, wmax);
|
||||
|
||||
do {
|
||||
if (!perf_sched_find_counter(&sched))
|
||||
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
|
||||
|
||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
{
|
||||
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
|
||||
struct event_constraint *c;
|
||||
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
struct perf_event *e;
|
||||
int i, wmin, wmax, num = 0;
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
||||
|
||||
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
||||
hwc = &cpuc->event_list[i]->hw;
|
||||
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
|
||||
constraints[i] = c;
|
||||
hwc->constraint = c;
|
||||
|
||||
wmin = min(wmin, c->weight);
|
||||
wmax = max(wmax, c->weight);
|
||||
}
|
||||
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
*/
|
||||
for (i = 0; i < n; i++) {
|
||||
hwc = &cpuc->event_list[i]->hw;
|
||||
c = constraints[i];
|
||||
c = hwc->constraint;
|
||||
|
||||
/* never assigned */
|
||||
if (hwc->idx == -1)
|
||||
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||
|
||||
/* slow path */
|
||||
if (i != n)
|
||||
num = perf_assign_events(constraints, n, wmin, wmax, assign);
|
||||
num = perf_assign_events(cpuc->event_list, n, wmin,
|
||||
wmax, assign);
|
||||
|
||||
/*
|
||||
* Mark the event as committed, so we do not put_constraint()
|
||||
* in case new events are added and fail scheduling.
|
||||
*/
|
||||
if (!num && assign) {
|
||||
for (i = 0; i < n; i++) {
|
||||
e = cpuc->event_list[i];
|
||||
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* scheduling failed or is just a simulation,
|
||||
* free resources if necessary
|
||||
*/
|
||||
if (!assign || num) {
|
||||
for (i = 0; i < n; i++) {
|
||||
e = cpuc->event_list[i];
|
||||
/*
|
||||
* do not put_constraint() on comitted events,
|
||||
* because they are good to go
|
||||
*/
|
||||
if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
|
||||
continue;
|
||||
|
||||
if (x86_pmu.put_event_constraints)
|
||||
x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
|
||||
x86_pmu.put_event_constraints(cpuc, e);
|
||||
}
|
||||
}
|
||||
return num ? -EINVAL : 0;
|
||||
@@ -1152,6 +1174,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
int i;
|
||||
|
||||
/*
|
||||
* event is descheduled
|
||||
*/
|
||||
event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
|
||||
|
||||
/*
|
||||
* If we're called during a txn, we don't need to do anything.
|
||||
* The events never got scheduled and ->cancel_txn will truncate
|
||||
@@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void)
|
||||
static int __kprobes
|
||||
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
|
||||
{
|
||||
int ret;
|
||||
u64 start_clock;
|
||||
u64 finish_clock;
|
||||
|
||||
if (!atomic_read(&active_events))
|
||||
return NMI_DONE;
|
||||
|
||||
return x86_pmu.handle_irq(regs);
|
||||
start_clock = local_clock();
|
||||
ret = x86_pmu.handle_irq(regs);
|
||||
finish_clock = local_clock();
|
||||
|
||||
perf_sample_event_took(finish_clock - start_clock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct event_constraint emptyconstraint;
|
||||
|
||||
@@ -63,10 +63,12 @@ struct event_constraint {
|
||||
int flags;
|
||||
};
|
||||
/*
|
||||
* struct event_constraint flags
|
||||
* struct hw_perf_event.flags flags
|
||||
*/
|
||||
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
|
||||
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
@@ -227,11 +229,14 @@ struct cpu_hw_events {
|
||||
* - inv
|
||||
* - edge
|
||||
* - cnt-mask
|
||||
* - in_tx
|
||||
* - in_tx_checkpointed
|
||||
* The other filters are supported by fixed counters.
|
||||
* The any-thread option is supported starting with v3.
|
||||
*/
|
||||
#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
|
||||
#define FIXED_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
|
||||
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code + UMask
|
||||
@@ -247,6 +252,11 @@ struct cpu_hw_events {
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
|
||||
|
||||
/* DataLA version of store sampling without extra enable bit. */
|
||||
#define INTEL_PST_HSW_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
|
||||
#define EVENT_CONSTRAINT_END \
|
||||
EVENT_CONSTRAINT(0, 0, 0)
|
||||
|
||||
@@ -301,6 +311,11 @@ union perf_capabilities {
|
||||
u64 pebs_arch_reg:1;
|
||||
u64 pebs_format:4;
|
||||
u64 smm_freeze:1;
|
||||
/*
|
||||
* PMU supports separate counter range for writing
|
||||
* values > 32bit.
|
||||
*/
|
||||
u64 full_width_write:1;
|
||||
};
|
||||
u64 capabilities;
|
||||
};
|
||||
@@ -375,6 +390,7 @@ struct x86_pmu {
|
||||
struct event_constraint *event_constraints;
|
||||
struct x86_pmu_quirk *quirks;
|
||||
int perfctr_second_write;
|
||||
bool late_ack;
|
||||
|
||||
/*
|
||||
* sysfs attrs
|
||||
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
|
||||
|
||||
void x86_pmu_enable_all(int added);
|
||||
|
||||
int perf_assign_events(struct event_constraint **constraints, int n,
|
||||
int perf_assign_events(struct perf_event **events, int n,
|
||||
int wmin, int wmax, int *assign);
|
||||
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||
|
||||
@@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_ivb_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_hsw_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event);
|
||||
|
||||
@@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = {
|
||||
.cpu_dead = amd_pmu_cpu_dead,
|
||||
};
|
||||
|
||||
static int setup_event_constraints(void)
|
||||
static int __init amd_core_pmu_init(void)
|
||||
{
|
||||
if (boot_cpu_data.x86 == 0x15)
|
||||
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
|
||||
return 0;
|
||||
}
|
||||
if (!cpu_has_perfctr_core)
|
||||
return 0;
|
||||
|
||||
static int setup_perfctr_core(void)
|
||||
{
|
||||
if (!cpu_has_perfctr_core) {
|
||||
WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
|
||||
KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 0x15:
|
||||
pr_cont("Fam15h ");
|
||||
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("core perfctr but no constraints; unknown hardware!\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
|
||||
KERN_ERR "hw perf events core counters need constraints handler!");
|
||||
|
||||
/*
|
||||
* If core performance counter extensions exists, we must use
|
||||
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
|
||||
* x86_pmu_addr_offset().
|
||||
* amd_pmu_addr_offset().
|
||||
*/
|
||||
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
|
||||
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
|
||||
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
|
||||
|
||||
printk(KERN_INFO "perf: AMD core performance counters detected\n");
|
||||
|
||||
pr_cont("core perfctr, ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
__init int amd_pmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* Performance-monitoring supported from K7 and later: */
|
||||
if (boot_cpu_data.x86 < 6)
|
||||
return -ENODEV;
|
||||
|
||||
x86_pmu = amd_pmu;
|
||||
|
||||
setup_event_constraints();
|
||||
setup_perfctr_core();
|
||||
ret = amd_core_pmu_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Events are common for all AMDs */
|
||||
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
|
||||
|
||||
504
arch/x86/kernel/cpu/perf_event_amd_iommu.c
Normal file
504
arch/x86/kernel/cpu/perf_event_amd_iommu.c
Normal file
File diff suppressed because it is too large
Load Diff
40
arch/x86/kernel/cpu/perf_event_amd_iommu.h
Normal file
40
arch/x86/kernel/cpu/perf_event_amd_iommu.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Steven Kinney <Steven.Kinney@amd.com>
|
||||
* Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _PERF_EVENT_AMD_IOMMU_H_
|
||||
#define _PERF_EVENT_AMD_IOMMU_H_
|
||||
|
||||
/* iommu pc mmio region register indexes */
|
||||
#define IOMMU_PC_COUNTER_REG 0x00
|
||||
#define IOMMU_PC_COUNTER_SRC_REG 0x08
|
||||
#define IOMMU_PC_PASID_MATCH_REG 0x10
|
||||
#define IOMMU_PC_DOMID_MATCH_REG 0x18
|
||||
#define IOMMU_PC_DEVID_MATCH_REG 0x20
|
||||
#define IOMMU_PC_COUNTER_REPORT_REG 0x28
|
||||
|
||||
/* maximun specified bank/counters */
|
||||
#define PC_MAX_SPEC_BNKS 64
|
||||
#define PC_MAX_SPEC_CNTRS 16
|
||||
|
||||
/* iommu pc reg masks*/
|
||||
#define IOMMU_BASE_DEVID 0x0000
|
||||
|
||||
/* amd_iommu_init.c external support functions */
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
|
||||
#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/hardirq.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct event_constraint intel_hsw_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
||||
/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
|
||||
INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
|
||||
/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
|
||||
INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
|
||||
/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
|
||||
INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static u64 intel_pmu_event_map(int hw_event)
|
||||
{
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
|
||||
return true;
|
||||
|
||||
/* implicit branch sampling to correct PEBS skid */
|
||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
||||
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
|
||||
x86_pmu.intel_cap.pebs_format < 2)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
|
||||
/*
|
||||
* Some chipsets need to unmask the LVTPC in a particular spot
|
||||
* inside the nmi handler. As a result, the unmasking was pushed
|
||||
* into all the nmi handlers.
|
||||
*
|
||||
* This handler doesn't seem to have any issues with the unmasking
|
||||
* so it was left at the top.
|
||||
* No known reason to not always do late ACK,
|
||||
* but just in case do it opt-in.
|
||||
*/
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
|
||||
if (!x86_pmu.late_ack)
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
intel_pmu_disable_all();
|
||||
handled = intel_pmu_drain_bts_buffer();
|
||||
status = intel_pmu_get_status();
|
||||
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
|
||||
again:
|
||||
intel_pmu_ack_status(status);
|
||||
if (++loops > 100) {
|
||||
WARN_ONCE(1, "perfevents: irq loop stuck!\n");
|
||||
perf_event_print_debug();
|
||||
static bool warned = false;
|
||||
if (!warned) {
|
||||
WARN(1, "perfevents: irq loop stuck!\n");
|
||||
perf_event_print_debug();
|
||||
warned = true;
|
||||
}
|
||||
intel_pmu_reset();
|
||||
goto done;
|
||||
}
|
||||
@@ -1235,6 +1253,13 @@ again:
|
||||
|
||||
done:
|
||||
intel_pmu_enable_all(0);
|
||||
/*
|
||||
* Only unmask the NMI after the overflow counters
|
||||
* have been reset. This avoids spurious NMIs on
|
||||
* Haswell CPUs.
|
||||
*/
|
||||
if (x86_pmu.late_ack)
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
return handled;
|
||||
}
|
||||
|
||||
@@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
if (x86_pmu.event_constraints) {
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
if ((event->hw.config & c->cmask) == c->code) {
|
||||
/* hw.flags zeroed at initialization */
|
||||
event->hw.flags |= c->flags;
|
||||
return c;
|
||||
}
|
||||
@@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
|
||||
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
event->hw.flags = 0;
|
||||
intel_put_shared_regs_event_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
@@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added)
|
||||
}
|
||||
}
|
||||
|
||||
static int hsw_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = intel_pmu_hw_config(event);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
|
||||
return 0;
|
||||
event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
|
||||
|
||||
/*
|
||||
* IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
|
||||
* PEBS or in ANY thread mode. Since the results are non-sensical forbid
|
||||
* this combination.
|
||||
*/
|
||||
if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
|
||||
((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
|
||||
event->attr.precise_ip > 0))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct event_constraint counter2_constraint =
|
||||
EVENT_CONSTRAINT(0, 0x4, 0);
|
||||
|
||||
static struct event_constraint *
|
||||
hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c = intel_get_event_constraints(cpuc, event);
|
||||
|
||||
/* Handle special quirk on in_tx_checkpointed only in counter 2 */
|
||||
if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
|
||||
if (c->idxmsk64 & (1U << 2))
|
||||
return &counter2_constraint;
|
||||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-7" );
|
||||
PMU_FORMAT_ATTR(umask, "config:8-15" );
|
||||
PMU_FORMAT_ATTR(edge, "config:18" );
|
||||
@@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
|
||||
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
|
||||
PMU_FORMAT_ATTR(inv, "config:23" );
|
||||
PMU_FORMAT_ATTR(cmask, "config:24-31" );
|
||||
PMU_FORMAT_ATTR(in_tx, "config:32");
|
||||
PMU_FORMAT_ATTR(in_tx_cp, "config:33");
|
||||
|
||||
static struct attribute *intel_arch_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
@@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
|
||||
&format_attr_any.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_cmask.attr,
|
||||
&format_attr_in_tx.attr,
|
||||
&format_attr_in_tx_cp.attr,
|
||||
|
||||
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
|
||||
&format_attr_ldlat.attr, /* PEBS load latency */
|
||||
@@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void)
|
||||
}
|
||||
}
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
|
||||
|
||||
static struct attribute *hsw_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_hsw),
|
||||
EVENT_PTR(mem_st_hsw),
|
||||
NULL
|
||||
};
|
||||
|
||||
__init int intel_pmu_init(void)
|
||||
{
|
||||
union cpuid10_edx edx;
|
||||
@@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
|
||||
case 60: /* Haswell Client */
|
||||
case 70:
|
||||
case 71:
|
||||
case 63:
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||
|
||||
intel_pmu_lbr_init_snb();
|
||||
|
||||
x86_pmu.event_constraints = intel_hsw_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_snb_extra_regs;
|
||||
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
|
||||
/* all extra regs are per-cpu when HT is on */
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
x86_pmu.hw_config = hsw_hw_config;
|
||||
x86_pmu.get_event_constraints = hsw_get_event_constraints;
|
||||
x86_pmu.cpu_events = hsw_events_attrs;
|
||||
pr_cont("Haswell events, ");
|
||||
break;
|
||||
|
||||
default:
|
||||
switch (x86_pmu.version) {
|
||||
case 1:
|
||||
@@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void)
|
||||
* counter, so do not extend mask to generic counters
|
||||
*/
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
if (c->cmask != X86_RAW_EVENT_MASK
|
||||
if (c->cmask != FIXED_EVENT_FLAGS
|
||||
|| c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
|
||||
continue;
|
||||
}
|
||||
@@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Support full width counters using alternative MSR range */
|
||||
if (x86_pmu.intel_cap.full_width_write) {
|
||||
x86_pmu.max_period = x86_pmu.cntval_mask;
|
||||
x86_pmu.perfctr = MSR_IA32_PMC0;
|
||||
pr_cont("full-width counters, ");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 precise_store_data_hsw(u64 status)
|
||||
{
|
||||
union perf_mem_data_src dse;
|
||||
|
||||
dse.val = 0;
|
||||
dse.mem_op = PERF_MEM_OP_STORE;
|
||||
dse.mem_lvl = PERF_MEM_LVL_NA;
|
||||
if (status & 1)
|
||||
dse.mem_lvl = PERF_MEM_LVL_L1;
|
||||
/* Nothing else supported. Sorry. */
|
||||
return dse.val;
|
||||
}
|
||||
|
||||
static u64 load_latency_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
|
||||
u64 status, dla, dse, lat;
|
||||
};
|
||||
|
||||
/*
|
||||
* Same as pebs_record_nhm, with two additional fields.
|
||||
*/
|
||||
struct pebs_record_hsw {
|
||||
struct pebs_record_nhm nhm;
|
||||
/*
|
||||
* Real IP of the event. In the Intel documentation this
|
||||
* is called eventingrip.
|
||||
*/
|
||||
u64 real_ip;
|
||||
/*
|
||||
* TSX tuning information field: abort cycles and abort flags.
|
||||
*/
|
||||
u64 tsx_tuning;
|
||||
};
|
||||
|
||||
void init_debug_store_on_cpu(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
@@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_hsw_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
||||
INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
|
||||
INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
|
||||
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
/* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
|
||||
/* MEM_UOPS_RETIRED.STLB_MISS_STORES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
|
||||
/* MEM_UOPS_RETIRED.SPLIT_STORES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||
INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
|
||||
INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
|
||||
/* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
|
||||
INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
|
||||
/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
|
||||
/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
|
||||
/* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
|
||||
INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
|
||||
INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
|
||||
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
@@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||
|
||||
if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
|
||||
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
|
||||
else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled &= ~(1ULL << 63);
|
||||
|
||||
if (cpuc->enabled)
|
||||
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||
|
||||
@@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
*/
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct pebs_record_nhm *pebs = __pebs;
|
||||
struct pebs_record_hsw *pebs_hsw = __pebs;
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs regs;
|
||||
u64 sample_type;
|
||||
@@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
return;
|
||||
|
||||
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
|
||||
fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
|
||||
fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
|
||||
PERF_X86_EVENT_PEBS_ST_HSW);
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
@@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
* if PEBS-LL or PreciseStore
|
||||
*/
|
||||
if (fll || fst) {
|
||||
if (sample_type & PERF_SAMPLE_ADDR)
|
||||
data.addr = pebs->dla;
|
||||
|
||||
/*
|
||||
* Use latency for weight (only avail with PEBS-LL)
|
||||
*/
|
||||
@@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
if (fll)
|
||||
data.data_src.val = load_latency_data(pebs->dse);
|
||||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
|
||||
data.data_src.val =
|
||||
precise_store_data_hsw(pebs->dse);
|
||||
else
|
||||
data.data_src.val = precise_store_data(pebs->dse);
|
||||
}
|
||||
@@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
regs.bp = pebs->bp;
|
||||
regs.sp = pebs->sp;
|
||||
|
||||
if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))
|
||||
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
|
||||
regs.ip = pebs_hsw->real_ip;
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
else
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
|
||||
if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
|
||||
x86_pmu.intel_cap.pebs_format >= 1)
|
||||
data.addr = pebs->dla;
|
||||
|
||||
if (has_branch_stack(event))
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
|
||||
@@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
|
||||
__intel_pmu_pebs_event(event, iregs, at);
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
|
||||
void *top)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
struct pebs_record_nhm *at, *top;
|
||||
struct perf_event *event = NULL;
|
||||
u64 status = 0;
|
||||
int bit, n;
|
||||
|
||||
if (!x86_pmu.pebs_active)
|
||||
return;
|
||||
|
||||
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
|
||||
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
|
||||
int bit;
|
||||
|
||||
ds->pebs_index = ds->pebs_buffer_base;
|
||||
|
||||
n = top - at;
|
||||
if (n <= 0)
|
||||
return;
|
||||
for (; at < top; at += x86_pmu.pebs_record_size) {
|
||||
struct pebs_record_nhm *p = at;
|
||||
|
||||
/*
|
||||
* Should not happen, we program the threshold at 1 and do not
|
||||
* set a reset value.
|
||||
*/
|
||||
WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
|
||||
|
||||
for ( ; at < top; at++) {
|
||||
for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
|
||||
for_each_set_bit(bit, (unsigned long *)&p->status,
|
||||
x86_pmu.max_pebs_events) {
|
||||
event = cpuc->events[bit];
|
||||
if (!test_bit(bit, cpuc->active_mask))
|
||||
continue;
|
||||
@@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
struct pebs_record_nhm *at, *top;
|
||||
int n;
|
||||
|
||||
if (!x86_pmu.pebs_active)
|
||||
return;
|
||||
|
||||
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
|
||||
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
|
||||
|
||||
ds->pebs_index = ds->pebs_buffer_base;
|
||||
|
||||
n = top - at;
|
||||
if (n <= 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Should not happen, we program the threshold at 1 and do not
|
||||
* set a reset value.
|
||||
*/
|
||||
WARN_ONCE(n > x86_pmu.max_pebs_events,
|
||||
"Unexpected number of pebs records %d\n", n);
|
||||
|
||||
return __intel_pmu_drain_pebs_nhm(iregs, at, top);
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
struct pebs_record_hsw *at, *top;
|
||||
int n;
|
||||
|
||||
if (!x86_pmu.pebs_active)
|
||||
return;
|
||||
|
||||
at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
|
||||
top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
|
||||
|
||||
n = top - at;
|
||||
if (n <= 0)
|
||||
return;
|
||||
/*
|
||||
* Should not happen, we program the threshold at 1 and do not
|
||||
* set a reset value.
|
||||
*/
|
||||
WARN_ONCE(n > x86_pmu.max_pebs_events,
|
||||
"Unexpected number of pebs records %d\n", n);
|
||||
|
||||
return __intel_pmu_drain_pebs_nhm(iregs, at, top);
|
||||
}
|
||||
|
||||
/*
|
||||
* BTS, PEBS probe and setup
|
||||
*/
|
||||
@@ -888,6 +1010,12 @@ void intel_ds_init(void)
|
||||
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
pr_cont("PEBS fmt2%c, ", pebs_type);
|
||||
x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
|
||||
x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
|
||||
x86_pmu.pebs = 0;
|
||||
|
||||
@@ -12,6 +12,16 @@ enum {
|
||||
LBR_FORMAT_LIP = 0x01,
|
||||
LBR_FORMAT_EIP = 0x02,
|
||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
|
||||
};
|
||||
|
||||
static enum {
|
||||
LBR_EIP_FLAGS = 1,
|
||||
LBR_TSX = 2,
|
||||
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
|
||||
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
|
||||
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -56,6 +66,8 @@ enum {
|
||||
LBR_FAR)
|
||||
|
||||
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
||||
#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
|
||||
#define LBR_FROM_FLAG_ABORT (1ULL << 61)
|
||||
|
||||
#define for_each_branch_sample_type(x) \
|
||||
for ((x) = PERF_SAMPLE_BRANCH_USER; \
|
||||
@@ -81,9 +93,13 @@ enum {
|
||||
X86_BR_JMP = 1 << 9, /* jump */
|
||||
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
|
||||
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
|
||||
X86_BR_ABORT = 1 << 12,/* transaction abort */
|
||||
X86_BR_IN_TX = 1 << 13,/* in transaction */
|
||||
X86_BR_NO_TX = 1 << 14,/* not in transaction */
|
||||
};
|
||||
|
||||
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
|
||||
#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
|
||||
|
||||
#define X86_BR_ANY \
|
||||
(X86_BR_CALL |\
|
||||
@@ -95,6 +111,7 @@ enum {
|
||||
X86_BR_JCC |\
|
||||
X86_BR_JMP |\
|
||||
X86_BR_IRQ |\
|
||||
X86_BR_ABORT |\
|
||||
X86_BR_IND_CALL)
|
||||
|
||||
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
|
||||
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
||||
|
||||
for (i = 0; i < x86_pmu.lbr_nr; i++) {
|
||||
unsigned long lbr_idx = (tos - i) & mask;
|
||||
u64 from, to, mis = 0, pred = 0;
|
||||
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
|
||||
int skip = 0;
|
||||
int lbr_flags = lbr_desc[lbr_format];
|
||||
|
||||
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
|
||||
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
|
||||
|
||||
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
|
||||
if (lbr_flags & LBR_EIP_FLAGS) {
|
||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||
pred = !mis;
|
||||
from = (u64)((((s64)from) << 1) >> 1);
|
||||
skip = 1;
|
||||
}
|
||||
if (lbr_flags & LBR_TSX) {
|
||||
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
|
||||
abort = !!(from & LBR_FROM_FLAG_ABORT);
|
||||
skip = 3;
|
||||
}
|
||||
from = (u64)((((s64)from) << skip) >> skip);
|
||||
|
||||
cpuc->lbr_entries[i].from = from;
|
||||
cpuc->lbr_entries[i].to = to;
|
||||
cpuc->lbr_entries[i].mispred = mis;
|
||||
cpuc->lbr_entries[i].predicted = pred;
|
||||
cpuc->lbr_entries[i].in_tx = in_tx;
|
||||
cpuc->lbr_entries[i].abort = abort;
|
||||
cpuc->lbr_entries[i].reserved = 0;
|
||||
}
|
||||
cpuc->lbr_stack.nr = i;
|
||||
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
|
||||
* - in case there is no HW filter
|
||||
* - in case the HW filter has errata or limitations
|
||||
*/
|
||||
static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
{
|
||||
u64 br_type = event->attr.branch_sample_type;
|
||||
int mask = 0;
|
||||
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
if (br_type & PERF_SAMPLE_BRANCH_USER)
|
||||
mask |= X86_BR_USER;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
|
||||
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
|
||||
mask |= X86_BR_KERNEL;
|
||||
}
|
||||
|
||||
/* we ignore BRANCH_HV here */
|
||||
|
||||
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
|
||||
mask |= X86_BR_IND_CALL;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
|
||||
mask |= X86_BR_ABORT;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
|
||||
mask |= X86_BR_IN_TX;
|
||||
|
||||
if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
|
||||
mask |= X86_BR_NO_TX;
|
||||
|
||||
/*
|
||||
* stash actual user request into reg, it may
|
||||
* be used by fixup code for some CPU
|
||||
*/
|
||||
event->hw.branch_reg.reg = mask;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
||||
/*
|
||||
* setup SW LBR filter
|
||||
*/
|
||||
ret = intel_pmu_setup_sw_lbr_filter(event);
|
||||
if (ret)
|
||||
return ret;
|
||||
intel_pmu_setup_sw_lbr_filter(event);
|
||||
|
||||
/*
|
||||
* setup HW LBR filter, if any
|
||||
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
|
||||
* decoded (e.g., text page not present), then X86_BR_NONE is
|
||||
* returned.
|
||||
*/
|
||||
static int branch_type(unsigned long from, unsigned long to)
|
||||
static int branch_type(unsigned long from, unsigned long to, int abort)
|
||||
{
|
||||
struct insn insn;
|
||||
void *addr;
|
||||
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
|
||||
if (from == 0 || to == 0)
|
||||
return X86_BR_NONE;
|
||||
|
||||
if (abort)
|
||||
return X86_BR_ABORT | to_plm;
|
||||
|
||||
if (from_plm == X86_BR_USER) {
|
||||
/*
|
||||
* can happen if measuring at the user level only
|
||||
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
|
||||
from = cpuc->lbr_entries[i].from;
|
||||
to = cpuc->lbr_entries[i].to;
|
||||
|
||||
type = branch_type(from, to);
|
||||
type = branch_type(from, to, cpuc->lbr_entries[i].abort);
|
||||
if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
|
||||
if (cpuc->lbr_entries[i].in_tx)
|
||||
type |= X86_BR_IN_TX;
|
||||
else
|
||||
type |= X86_BR_NO_TX;
|
||||
}
|
||||
|
||||
/* if type does not correspond, then discard */
|
||||
if (type == X86_BR_NONE || (br_sel & type) != type) {
|
||||
|
||||
@@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
|
||||
if (!uncore_box_is_fake(box))
|
||||
reg1->alloc |= alloc;
|
||||
|
||||
return 0;
|
||||
return NULL;
|
||||
fail:
|
||||
for (; i >= 0; i--) {
|
||||
if (alloc & (0x1 << i))
|
||||
@@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
|
||||
(!uncore_box_is_fake(box) && reg1->alloc))
|
||||
return NULL;
|
||||
again:
|
||||
mask = 0xff << (idx * 8);
|
||||
mask = 0xffULL << (idx * 8);
|
||||
raw_spin_lock_irqsave(&er->lock, flags);
|
||||
if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
|
||||
!((config1 ^ er->config) & mask)) {
|
||||
@@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
|
||||
int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
|
||||
u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
|
||||
u64 config = reg1->config;
|
||||
|
||||
/* get the non-shared control bits and shift them */
|
||||
@@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
|
||||
static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
|
||||
{
|
||||
unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
|
||||
struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
|
||||
struct event_constraint *c;
|
||||
int i, wmin, wmax, ret = 0;
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
|
||||
|
||||
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
|
||||
hwc = &box->event_list[i]->hw;
|
||||
c = uncore_get_event_constraint(box, box->event_list[i]);
|
||||
constraints[i] = c;
|
||||
hwc->constraint = c;
|
||||
wmin = min(wmin, c->weight);
|
||||
wmax = max(wmax, c->weight);
|
||||
}
|
||||
@@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
|
||||
/* fastpath, try to reuse previous register */
|
||||
for (i = 0; i < n; i++) {
|
||||
hwc = &box->event_list[i]->hw;
|
||||
c = constraints[i];
|
||||
c = hwc->constraint;
|
||||
|
||||
/* never assigned */
|
||||
if (hwc->idx == -1)
|
||||
@@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
|
||||
}
|
||||
/* slow path */
|
||||
if (i != n)
|
||||
ret = perf_assign_events(constraints, n, wmin, wmax, assign);
|
||||
ret = perf_assign_events(box->event_list, n,
|
||||
wmin, wmax, assign);
|
||||
|
||||
if (!assign || ret) {
|
||||
for (i = 0; i < n; i++)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user