You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Features:
- Add "uretprobes" - an optimization to uprobes, like kretprobes are
an optimization to kprobes. "perf probe -x file sym%return" now
works like kretprobes. By Oleg Nesterov.
- Introduce per core aggregation in 'perf stat', from Stephane
Eranian.
- Add memory profiling via PEBS, from Stephane Eranian.
- Event group view for 'annotate' in --stdio, --tui and --gtk, from
Namhyung Kim.
- Add support for AMD NB and L2I "uncore" counters, by Jacob Shin.
- Add Ivy Bridge-EP uncore support, by Zheng Yan
- IBM zEnterprise EC12 oprofile support patchlet from Robert Richter.
- Add perf test entries for checking breakpoint overflow signal
handler issues, from Jiri Olsa.
- Add perf test entry for for checking number of EXIT events, from
Namhyung Kim.
- Add perf test entries for checking --cpu in record and stat, from
Jiri Olsa.
- Introduce perf stat --repeat forever, from Frederik Deweerdt.
- Add --no-demangle to report/top, from Namhyung Kim.
- PowerPC fixes plus a couple of cleanups/optimizations in uprobes
and trace_uprobes, by Oleg Nesterov.
Various fixes and refactorings:
- Fix dependency of the python binding wrt libtraceevent, from
Naohiro Aota.
- Simplify some perf_evlist methods and to allow 'stat' to share code
with 'record' and 'trace', by Arnaldo Carvalho de Melo.
- Remove dead code in related to libtraceevent integration, from
Namhyung Kim.
- Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf
sched lat' back working, by Arnaldo Carvalho de Melo
- We don't use Newt anymore, just plain libslang, by Arnaldo Carvalho
de Melo.
- Kill a bunch of die() calls, from Namhyung Kim.
- Fix build on non-glibc systems due to libio.h absence, from Cody P
Schafer.
- Remove some perf_session and tracing dead code, from David Ahern.
- Honor parallel jobs, fix from Borislav Petkov
- Introduce tools/lib/lk library, initially just removing duplication
among tools/perf and tools/vm. from Borislav Petkov
... and many more I missed to list, see the shortlog and git log for
more details."
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (136 commits)
perf/x86/intel/P4: Robistify P4 PMU types
perf/x86/amd: Fix AMD NB and L2I "uncore" support
perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c
perf/x86: Check all MSRs before passing hw check
perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
perf/x86/intel: Add Ivy Bridge-EP uncore support
perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management
perf/x86: Avoid kfree() in CPU_{STARTING,DYING}
uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty
uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit()
uprobes/tracing: Change create_trace_uprobe() to support uretprobes
uprobes/tracing: Make seq_printf() code uretprobe-friendly
uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly
uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly
uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher()
uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers
uprobes/tracing: Generalize struct uprobe_trace_entry_head
uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls
uprobes/tracing: Kill the pointless seq_print_ip_sym() call
uprobes/tracing: Kill the pointless task_pt_regs() calls
...
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
Uprobe-tracer: Uprobe-based Event Tracing
|
||||
=========================================
|
||||
Documentation written by Srikar Dronamraju
|
||||
Uprobe-tracer: Uprobe-based Event Tracing
|
||||
=========================================
|
||||
|
||||
Documentation written by Srikar Dronamraju
|
||||
|
||||
|
||||
Overview
|
||||
--------
|
||||
@@ -13,78 +15,94 @@ current_tracer. Instead of that, add probe points via
|
||||
/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
|
||||
|
||||
However unlike kprobe-event tracer, the uprobe event interface expects the
|
||||
user to calculate the offset of the probepoint in the object
|
||||
user to calculate the offset of the probepoint in the object.
|
||||
|
||||
Synopsis of uprobe_tracer
|
||||
-------------------------
|
||||
p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe
|
||||
p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a uprobe
|
||||
r[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a return uprobe (uretprobe)
|
||||
-:[GRP/]EVENT : Clear uprobe or uretprobe event
|
||||
|
||||
GRP : Group name. If omitted, use "uprobes" for it.
|
||||
EVENT : Event name. If omitted, the event name is generated
|
||||
based on SYMBOL+offs.
|
||||
PATH : path to an executable or a library.
|
||||
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
|
||||
GRP : Group name. If omitted, "uprobes" is the default value.
|
||||
EVENT : Event name. If omitted, the event name is generated based
|
||||
on SYMBOL+offs.
|
||||
PATH : Path to an executable or a library.
|
||||
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
|
||||
|
||||
FETCHARGS : Arguments. Each probe can have up to 128 args.
|
||||
%REG : Fetch register REG
|
||||
FETCHARGS : Arguments. Each probe can have up to 128 args.
|
||||
%REG : Fetch register REG
|
||||
|
||||
Event Profiling
|
||||
---------------
|
||||
You can check the total number of probe hits and probe miss-hits via
|
||||
You can check the total number of probe hits and probe miss-hits via
|
||||
/sys/kernel/debug/tracing/uprobe_profile.
|
||||
The first column is event name, the second is the number of probe hits,
|
||||
The first column is event name, the second is the number of probe hits,
|
||||
the third is the number of probe miss-hits.
|
||||
|
||||
Usage examples
|
||||
--------------
|
||||
To add a probe as a new event, write a new definition to uprobe_events
|
||||
as below.
|
||||
* Add a probe as a new uprobe event, write a new definition to uprobe_events
|
||||
as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash)
|
||||
|
||||
echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
|
||||
echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
|
||||
|
||||
This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash
|
||||
* Add a probe as a new uretprobe event:
|
||||
|
||||
echo > /sys/kernel/debug/tracing/uprobe_events
|
||||
echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
|
||||
|
||||
This clears all probe points.
|
||||
* Unset registered event:
|
||||
|
||||
The following example shows how to dump the instruction pointer and %ax
|
||||
a register at the probed text address. Here we are trying to probe
|
||||
function zfree in /bin/zsh
|
||||
echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events
|
||||
|
||||
* Print out the events that are registered:
|
||||
|
||||
cat /sys/kernel/debug/tracing/uprobe_events
|
||||
|
||||
* Clear all events:
|
||||
|
||||
echo > /sys/kernel/debug/tracing/uprobe_events
|
||||
|
||||
Following example shows how to dump the instruction pointer and %ax register
|
||||
at the probed text address. Probe zfree function in /bin/zsh:
|
||||
|
||||
# cd /sys/kernel/debug/tracing/
|
||||
# cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
|
||||
# cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
|
||||
00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
|
||||
# objdump -T /bin/zsh | grep -w zfree
|
||||
0000000000446420 g DF .text 0000000000000012 Base zfree
|
||||
|
||||
0x46420 is the offset of zfree in object /bin/zsh that is loaded at
|
||||
0x00400000. Hence the command to probe would be :
|
||||
0x46420 is the offset of zfree in object /bin/zsh that is loaded at
|
||||
0x00400000. Hence the command to uprobe would be:
|
||||
|
||||
# echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
|
||||
# echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events
|
||||
|
||||
Please note: User has to explicitly calculate the offset of the probepoint
|
||||
And the same for the uretprobe would be:
|
||||
|
||||
# echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events
|
||||
|
||||
Please note: User has to explicitly calculate the offset of the probe-point
|
||||
in the object. We can see the events that are registered by looking at the
|
||||
uprobe_events file.
|
||||
|
||||
# cat uprobe_events
|
||||
p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax
|
||||
p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax
|
||||
r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax
|
||||
|
||||
The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format
|
||||
Format of events can be seen by viewing the file events/uprobes/zfree_entry/format
|
||||
|
||||
# cat events/uprobes/p_zsh_0x46420/format
|
||||
name: p_zsh_0x46420
|
||||
# cat events/uprobes/zfree_entry/format
|
||||
name: zfree_entry
|
||||
ID: 922
|
||||
format:
|
||||
field:unsigned short common_type; offset:0; size:2; signed:0;
|
||||
field:unsigned char common_flags; offset:2; size:1; signed:0;
|
||||
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
|
||||
field:int common_pid; offset:4; size:4; signed:1;
|
||||
field:int common_padding; offset:8; size:4; signed:1;
|
||||
field:unsigned short common_type; offset:0; size:2; signed:0;
|
||||
field:unsigned char common_flags; offset:2; size:1; signed:0;
|
||||
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
|
||||
field:int common_pid; offset:4; size:4; signed:1;
|
||||
field:int common_padding; offset:8; size:4; signed:1;
|
||||
|
||||
field:unsigned long __probe_ip; offset:12; size:4; signed:0;
|
||||
field:u32 arg1; offset:16; size:4; signed:0;
|
||||
field:u32 arg2; offset:20; size:4; signed:0;
|
||||
field:unsigned long __probe_ip; offset:12; size:4; signed:0;
|
||||
field:u32 arg1; offset:16; size:4; signed:0;
|
||||
field:u32 arg2; offset:20; size:4; signed:0;
|
||||
|
||||
print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2
|
||||
|
||||
@@ -94,6 +112,7 @@ events, you need to enable it by:
|
||||
# echo 1 > events/uprobes/enable
|
||||
|
||||
Lets disable the event after sleeping for some time.
|
||||
|
||||
# sleep 20
|
||||
# echo 0 > events/uprobes/enable
|
||||
|
||||
@@ -104,10 +123,11 @@ And you can see the traced information via /sys/kernel/debug/tracing/trace.
|
||||
#
|
||||
# TASK-PID CPU# TIMESTAMP FUNCTION
|
||||
# | | | | |
|
||||
zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
|
||||
zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
|
||||
zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
|
||||
zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
|
||||
zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79
|
||||
zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
|
||||
zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79
|
||||
zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
|
||||
|
||||
Each line shows us probes were triggered for a pid 24842 with ip being
|
||||
0x446421 and contents of ax register being 79.
|
||||
Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420
|
||||
and contents of ax register being 79. And uretprobe was triggered with ip at
|
||||
0x446540 with counterpart function entry at 0x446420.
|
||||
|
||||
@@ -1332,11 +1332,11 @@ kernelversion:
|
||||
# Clear a bunch of variables before executing the submake
|
||||
tools/: FORCE
|
||||
$(Q)mkdir -p $(objtree)/tools
|
||||
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS= O=$(objtree) subdir=tools -C $(src)/tools/
|
||||
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/
|
||||
|
||||
tools/%: FORCE
|
||||
$(Q)mkdir -p $(objtree)/tools
|
||||
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS= O=$(objtree) subdir=tools -C $(src)/tools/ $*
|
||||
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $*
|
||||
|
||||
# Single targets
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -51,4 +51,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
||||
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
|
||||
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
|
||||
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
||||
extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
|
||||
#endif /* _ASM_UPROBES_H */
|
||||
|
||||
@@ -30,6 +30,16 @@
|
||||
|
||||
#define UPROBE_TRAP_NR UINT_MAX
|
||||
|
||||
/**
|
||||
* is_trap_insn - check if the instruction is a trap variant
|
||||
* @insn: instruction to be checked.
|
||||
* Returns true if @insn is a trap variant.
|
||||
*/
|
||||
bool is_trap_insn(uprobe_opcode_t *insn)
|
||||
{
|
||||
return (is_trap(*insn));
|
||||
}
|
||||
|
||||
/**
|
||||
* arch_uprobe_analyze_insn
|
||||
* @mm: the probed address space.
|
||||
@@ -43,12 +53,6 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
|
||||
if (addr & 0x03)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We currently don't support a uprobe on an already
|
||||
* existing breakpoint instruction underneath
|
||||
*/
|
||||
if (is_trap(auprobe->ainsn))
|
||||
return -ENOTSUPP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -188,3 +192,16 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
|
||||
{
|
||||
unsigned long orig_ret_vaddr;
|
||||
|
||||
orig_ret_vaddr = regs->link;
|
||||
|
||||
/* Replace the return addr with trampoline addr */
|
||||
regs->link = trampoline_vaddr;
|
||||
|
||||
return orig_ret_vaddr;
|
||||
}
|
||||
|
||||
@@ -440,6 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
|
||||
switch (id.machine) {
|
||||
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
|
||||
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
|
||||
case 0x2827: ops->cpu_type = "s390/zEC12"; break;
|
||||
default: return -ENODEV;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,6 +168,7 @@
|
||||
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
@@ -311,6 +312,7 @@ extern const char * const x86_power_flags[32];
|
||||
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
|
||||
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
|
||||
#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
|
||||
#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
|
||||
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
|
||||
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
|
||||
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
|
||||
|
||||
@@ -24,45 +24,45 @@
|
||||
#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
|
||||
#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
|
||||
|
||||
#define P4_ESCR_EVENT_MASK 0x7e000000U
|
||||
#define P4_ESCR_EVENT_MASK 0x7e000000ULL
|
||||
#define P4_ESCR_EVENT_SHIFT 25
|
||||
#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
|
||||
#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL
|
||||
#define P4_ESCR_EVENTMASK_SHIFT 9
|
||||
#define P4_ESCR_TAG_MASK 0x000001e0U
|
||||
#define P4_ESCR_TAG_MASK 0x000001e0ULL
|
||||
#define P4_ESCR_TAG_SHIFT 5
|
||||
#define P4_ESCR_TAG_ENABLE 0x00000010U
|
||||
#define P4_ESCR_T0_OS 0x00000008U
|
||||
#define P4_ESCR_T0_USR 0x00000004U
|
||||
#define P4_ESCR_T1_OS 0x00000002U
|
||||
#define P4_ESCR_T1_USR 0x00000001U
|
||||
#define P4_ESCR_TAG_ENABLE 0x00000010ULL
|
||||
#define P4_ESCR_T0_OS 0x00000008ULL
|
||||
#define P4_ESCR_T0_USR 0x00000004ULL
|
||||
#define P4_ESCR_T1_OS 0x00000002ULL
|
||||
#define P4_ESCR_T1_USR 0x00000001ULL
|
||||
|
||||
#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
|
||||
#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
|
||||
#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
|
||||
|
||||
#define P4_CCCR_OVF 0x80000000U
|
||||
#define P4_CCCR_CASCADE 0x40000000U
|
||||
#define P4_CCCR_OVF_PMI_T0 0x04000000U
|
||||
#define P4_CCCR_OVF_PMI_T1 0x08000000U
|
||||
#define P4_CCCR_FORCE_OVF 0x02000000U
|
||||
#define P4_CCCR_EDGE 0x01000000U
|
||||
#define P4_CCCR_THRESHOLD_MASK 0x00f00000U
|
||||
#define P4_CCCR_OVF 0x80000000ULL
|
||||
#define P4_CCCR_CASCADE 0x40000000ULL
|
||||
#define P4_CCCR_OVF_PMI_T0 0x04000000ULL
|
||||
#define P4_CCCR_OVF_PMI_T1 0x08000000ULL
|
||||
#define P4_CCCR_FORCE_OVF 0x02000000ULL
|
||||
#define P4_CCCR_EDGE 0x01000000ULL
|
||||
#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL
|
||||
#define P4_CCCR_THRESHOLD_SHIFT 20
|
||||
#define P4_CCCR_COMPLEMENT 0x00080000U
|
||||
#define P4_CCCR_COMPARE 0x00040000U
|
||||
#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
|
||||
#define P4_CCCR_COMPLEMENT 0x00080000ULL
|
||||
#define P4_CCCR_COMPARE 0x00040000ULL
|
||||
#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL
|
||||
#define P4_CCCR_ESCR_SELECT_SHIFT 13
|
||||
#define P4_CCCR_ENABLE 0x00001000U
|
||||
#define P4_CCCR_THREAD_SINGLE 0x00010000U
|
||||
#define P4_CCCR_THREAD_BOTH 0x00020000U
|
||||
#define P4_CCCR_THREAD_ANY 0x00030000U
|
||||
#define P4_CCCR_RESERVED 0x00000fffU
|
||||
#define P4_CCCR_ENABLE 0x00001000ULL
|
||||
#define P4_CCCR_THREAD_SINGLE 0x00010000ULL
|
||||
#define P4_CCCR_THREAD_BOTH 0x00020000ULL
|
||||
#define P4_CCCR_THREAD_ANY 0x00030000ULL
|
||||
#define P4_CCCR_RESERVED 0x00000fffULL
|
||||
|
||||
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
|
||||
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
|
||||
|
||||
#define P4_GEN_ESCR_EMASK(class, name, bit) \
|
||||
class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
|
||||
class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT)
|
||||
#define P4_ESCR_EMASK_BIT(class, name) class##__##name
|
||||
|
||||
/*
|
||||
@@ -107,7 +107,7 @@
|
||||
* P4_PEBS_CONFIG_MASK and related bits on
|
||||
* modification.)
|
||||
*/
|
||||
#define P4_CONFIG_ALIASABLE (1 << 9)
|
||||
#define P4_CONFIG_ALIASABLE (1ULL << 9)
|
||||
|
||||
/*
|
||||
* The bits we allow to pass for RAW events
|
||||
@@ -784,17 +784,17 @@ enum P4_ESCR_EMASKS {
|
||||
* Note we have UOP and PEBS bits reserved for now
|
||||
* just in case if we will need them once
|
||||
*/
|
||||
#define P4_PEBS_CONFIG_ENABLE (1 << 7)
|
||||
#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
|
||||
#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
|
||||
#define P4_PEBS_CONFIG_MASK 0xff
|
||||
#define P4_PEBS_CONFIG_ENABLE (1ULL << 7)
|
||||
#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8)
|
||||
#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL
|
||||
#define P4_PEBS_CONFIG_MASK 0xFFLL
|
||||
|
||||
/*
|
||||
* mem: Only counters MSR_IQ_COUNTER4 (16) and
|
||||
* MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
|
||||
*/
|
||||
#define P4_PEBS_ENABLE 0x02000000U
|
||||
#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
|
||||
#define P4_PEBS_ENABLE 0x02000000ULL
|
||||
#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL
|
||||
|
||||
#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
|
||||
#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
|
||||
|
||||
@@ -55,4 +55,5 @@ extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
||||
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
|
||||
extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
|
||||
extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
|
||||
extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
|
||||
#endif /* _ASM_UPROBES_H */
|
||||
|
||||
@@ -72,6 +72,7 @@
|
||||
#define MSR_IA32_PEBS_ENABLE 0x000003f1
|
||||
#define MSR_IA32_DS_AREA 0x00000600
|
||||
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
|
||||
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
||||
|
||||
#define MSR_MTRRfix64K_00000 0x00000250
|
||||
#define MSR_MTRRfix16K_80000 0x00000258
|
||||
@@ -195,6 +196,10 @@
|
||||
#define MSR_AMD64_IBSBRTARGET 0xc001103b
|
||||
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
|
||||
|
||||
/* Fam 16h MSRs */
|
||||
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
|
||||
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
|
||||
|
||||
/* Fam 15h MSRs */
|
||||
#define MSR_F15H_PERF_CTL 0xc0010200
|
||||
#define MSR_F15H_PERF_CTR 0xc0010201
|
||||
|
||||
@@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
|
||||
|
||||
ifdef CONFIG_PERF_EVENTS
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
|
||||
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
|
||||
|
||||
@@ -180,8 +180,9 @@ static void release_pmc_hardware(void) {}
|
||||
|
||||
static bool check_hw_exists(void)
|
||||
{
|
||||
u64 val, val_new = ~0;
|
||||
int i, reg, ret = 0;
|
||||
u64 val, val_fail, val_new= ~0;
|
||||
int i, reg, reg_fail, ret = 0;
|
||||
int bios_fail = 0;
|
||||
|
||||
/*
|
||||
* Check to see if the BIOS enabled any of the counters, if so
|
||||
@@ -192,8 +193,11 @@ static bool check_hw_exists(void)
|
||||
ret = rdmsrl_safe(reg, &val);
|
||||
if (ret)
|
||||
goto msr_fail;
|
||||
if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
|
||||
goto bios_fail;
|
||||
if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
|
||||
bios_fail = 1;
|
||||
val_fail = val;
|
||||
reg_fail = reg;
|
||||
}
|
||||
}
|
||||
|
||||
if (x86_pmu.num_counters_fixed) {
|
||||
@@ -202,8 +206,11 @@ static bool check_hw_exists(void)
|
||||
if (ret)
|
||||
goto msr_fail;
|
||||
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
|
||||
if (val & (0x03 << i*4))
|
||||
goto bios_fail;
|
||||
if (val & (0x03 << i*4)) {
|
||||
bios_fail = 1;
|
||||
val_fail = val;
|
||||
reg_fail = reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,14 +228,13 @@ static bool check_hw_exists(void)
|
||||
if (ret || val != val_new)
|
||||
goto msr_fail;
|
||||
|
||||
return true;
|
||||
|
||||
bios_fail:
|
||||
/*
|
||||
* We still allow the PMU driver to operate:
|
||||
*/
|
||||
printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
|
||||
printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
|
||||
if (bios_fail) {
|
||||
printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
|
||||
printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -1316,9 +1322,16 @@ static struct attribute_group x86_pmu_format_group = {
|
||||
*/
|
||||
static void __init filter_events(struct attribute **attrs)
|
||||
{
|
||||
struct device_attribute *d;
|
||||
struct perf_pmu_events_attr *pmu_attr;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; attrs[i]; i++) {
|
||||
d = (struct device_attribute *)attrs[i];
|
||||
pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
|
||||
/* str trumps id */
|
||||
if (pmu_attr->event_str)
|
||||
continue;
|
||||
if (x86_pmu.event_map(i))
|
||||
continue;
|
||||
|
||||
@@ -1330,23 +1343,46 @@ static void __init filter_events(struct attribute **attrs)
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
/* Merge two pointer arrays */
|
||||
static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
|
||||
{
|
||||
struct attribute **new;
|
||||
int j, i;
|
||||
|
||||
for (j = 0; a[j]; j++)
|
||||
;
|
||||
for (i = 0; b[i]; i++)
|
||||
j++;
|
||||
j++;
|
||||
|
||||
new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
j = 0;
|
||||
for (i = 0; a[i]; i++)
|
||||
new[j++] = a[i];
|
||||
for (i = 0; b[i]; i++)
|
||||
new[j++] = b[i];
|
||||
new[j] = NULL;
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr = \
|
||||
container_of(attr, struct perf_pmu_events_attr, attr);
|
||||
|
||||
u64 config = x86_pmu.event_map(pmu_attr->id);
|
||||
|
||||
/* string trumps id */
|
||||
if (pmu_attr->event_str)
|
||||
return sprintf(page, "%s", pmu_attr->event_str);
|
||||
|
||||
return x86_pmu.events_sysfs_show(page, config);
|
||||
}
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
|
||||
events_sysfs_show)
|
||||
|
||||
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
|
||||
EVENT_ATTR(instructions, INSTRUCTIONS );
|
||||
EVENT_ATTR(cache-references, CACHE_REFERENCES );
|
||||
@@ -1459,16 +1495,27 @@ static int __init init_hw_perf_events(void)
|
||||
|
||||
unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
|
||||
0, x86_pmu.num_counters, 0);
|
||||
0, x86_pmu.num_counters, 0, 0);
|
||||
|
||||
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
|
||||
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
|
||||
|
||||
if (x86_pmu.event_attrs)
|
||||
x86_pmu_events_group.attrs = x86_pmu.event_attrs;
|
||||
|
||||
if (!x86_pmu.events_sysfs_show)
|
||||
x86_pmu_events_group.attrs = &empty_attrs;
|
||||
else
|
||||
filter_events(x86_pmu_events_group.attrs);
|
||||
|
||||
if (x86_pmu.cpu_events) {
|
||||
struct attribute **tmp;
|
||||
|
||||
tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
|
||||
if (!WARN_ON(!tmp))
|
||||
x86_pmu_events_group.attrs = tmp;
|
||||
}
|
||||
|
||||
pr_info("... version: %d\n", x86_pmu.version);
|
||||
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
|
||||
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
|
||||
|
||||
@@ -46,6 +46,7 @@ enum extra_reg_type {
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
@@ -59,7 +60,13 @@ struct event_constraint {
|
||||
u64 cmask;
|
||||
int weight;
|
||||
int overlap;
|
||||
int flags;
|
||||
};
|
||||
/*
|
||||
* struct event_constraint flags
|
||||
*/
|
||||
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
@@ -170,16 +177,17 @@ struct cpu_hw_events {
|
||||
void *kfree_on_online;
|
||||
};
|
||||
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
|
||||
{ .idxmsk64 = (n) }, \
|
||||
.code = (c), \
|
||||
.cmask = (m), \
|
||||
.weight = (w), \
|
||||
.overlap = (o), \
|
||||
.flags = f, \
|
||||
}
|
||||
|
||||
#define EVENT_CONSTRAINT(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
|
||||
|
||||
/*
|
||||
* The overlap flag marks event constraints with overlapping counter
|
||||
@@ -203,7 +211,7 @@ struct cpu_hw_events {
|
||||
* and its counter masks must be kept at a minimum.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code.
|
||||
@@ -231,6 +239,14 @@ struct cpu_hw_events {
|
||||
#define INTEL_UEVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
|
||||
|
||||
#define INTEL_PLD_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
|
||||
|
||||
#define INTEL_PST_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
|
||||
|
||||
#define EVENT_CONSTRAINT_END \
|
||||
EVENT_CONSTRAINT(0, 0, 0)
|
||||
|
||||
@@ -260,12 +276,22 @@ struct extra_reg {
|
||||
.msr = (ms), \
|
||||
.config_mask = (m), \
|
||||
.valid_mask = (vm), \
|
||||
.idx = EXTRA_REG_##i \
|
||||
.idx = EXTRA_REG_##i, \
|
||||
}
|
||||
|
||||
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
|
||||
ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
|
||||
INTEL_UEVENT_EXTRA_REG(c, \
|
||||
MSR_PEBS_LD_LAT_THRESHOLD, \
|
||||
0xffff, \
|
||||
LDLAT)
|
||||
|
||||
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
|
||||
|
||||
union perf_capabilities {
|
||||
@@ -355,8 +381,10 @@ struct x86_pmu {
|
||||
*/
|
||||
int attr_rdpmc;
|
||||
struct attribute **format_attrs;
|
||||
struct attribute **event_attrs;
|
||||
|
||||
ssize_t (*events_sysfs_show)(char *page, u64 config);
|
||||
struct attribute **cpu_events;
|
||||
|
||||
/*
|
||||
* CPU Hotplug hooks
|
||||
@@ -421,6 +449,23 @@ do { \
|
||||
#define ERF_NO_HT_SHARING 1
|
||||
#define ERF_HAS_RSP_1 2
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = PERF_COUNT_HW_##_id, \
|
||||
.event_str = NULL, \
|
||||
};
|
||||
|
||||
#define EVENT_ATTR_STR(_name, v, str) \
|
||||
static struct perf_pmu_events_attr event_attr_##v = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = 0, \
|
||||
.event_str = str, \
|
||||
};
|
||||
|
||||
extern struct x86_pmu x86_pmu __read_mostly;
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
|
||||
@@ -628,6 +673,9 @@ int p6_pmu_init(void);
|
||||
|
||||
int knc_pmu_init(void);
|
||||
|
||||
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
||||
@@ -132,14 +132,11 @@ static u64 amd_pmu_event_map(int hw_event)
|
||||
return amd_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static struct event_constraint *amd_nb_event_constraint;
|
||||
|
||||
/*
|
||||
* Previously calculated offsets
|
||||
*/
|
||||
static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
|
||||
static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
|
||||
static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
|
||||
|
||||
/*
|
||||
* Legacy CPUs:
|
||||
@@ -147,14 +144,10 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
|
||||
*
|
||||
* CPUs with core performance counter extensions:
|
||||
* 6 counters starting at 0xc0010200 each offset by 2
|
||||
*
|
||||
* CPUs with north bridge performance counter extensions:
|
||||
* 4 additional counters starting at 0xc0010240 each offset by 2
|
||||
* (indexed right above either one of the above core counters)
|
||||
*/
|
||||
static inline int amd_pmu_addr_offset(int index, bool eventsel)
|
||||
{
|
||||
int offset, first, base;
|
||||
int offset;
|
||||
|
||||
if (!index)
|
||||
return index;
|
||||
@@ -167,23 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
|
||||
if (offset)
|
||||
return offset;
|
||||
|
||||
if (amd_nb_event_constraint &&
|
||||
test_bit(index, amd_nb_event_constraint->idxmsk)) {
|
||||
/*
|
||||
* calculate the offset of NB counters with respect to
|
||||
* base eventsel or perfctr
|
||||
*/
|
||||
|
||||
first = find_first_bit(amd_nb_event_constraint->idxmsk,
|
||||
X86_PMC_IDX_MAX);
|
||||
|
||||
if (eventsel)
|
||||
base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
|
||||
else
|
||||
base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
|
||||
|
||||
offset = base + ((index - first) << 1);
|
||||
} else if (!cpu_has_perfctr_core)
|
||||
if (!cpu_has_perfctr_core)
|
||||
offset = index;
|
||||
else
|
||||
offset = index << 1;
|
||||
@@ -196,36 +173,6 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
|
||||
return offset;
|
||||
}
|
||||
|
||||
static inline int amd_pmu_rdpmc_index(int index)
|
||||
{
|
||||
int ret, first;
|
||||
|
||||
if (!index)
|
||||
return index;
|
||||
|
||||
ret = rdpmc_indexes[index];
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (amd_nb_event_constraint &&
|
||||
test_bit(index, amd_nb_event_constraint->idxmsk)) {
|
||||
/*
|
||||
* according to the mnual, ECX value of the NB counters is
|
||||
* the index of the NB counter (0, 1, 2 or 3) plus 6
|
||||
*/
|
||||
|
||||
first = find_first_bit(amd_nb_event_constraint->idxmsk,
|
||||
X86_PMC_IDX_MAX);
|
||||
ret = index - first + 6;
|
||||
} else
|
||||
ret = index;
|
||||
|
||||
rdpmc_indexes[index] = ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amd_core_hw_config(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.exclude_host && event->attr.exclude_guest)
|
||||
@@ -244,34 +191,6 @@ static int amd_core_hw_config(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* NB counters do not support the following event select bits:
|
||||
* Host/Guest only
|
||||
* Counter mask
|
||||
* Invert counter mask
|
||||
* Edge detect
|
||||
* OS/User mode
|
||||
*/
|
||||
static int amd_nb_hw_config(struct perf_event *event)
|
||||
{
|
||||
/* for NB, we only allow system wide counting mode */
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->attr.exclude_user || event->attr.exclude_kernel ||
|
||||
event->attr.exclude_host || event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
|
||||
ARCH_PERFMON_EVENTSEL_OS);
|
||||
|
||||
if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
|
||||
ARCH_PERFMON_EVENTSEL_INT))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD64 events are detected based on their event codes.
|
||||
*/
|
||||
@@ -285,11 +204,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
|
||||
return (hwc->config & 0xe0) == 0xe0;
|
||||
}
|
||||
|
||||
static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
|
||||
{
|
||||
return amd_nb_event_constraint && amd_is_nb_event(hwc);
|
||||
}
|
||||
|
||||
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
struct amd_nb *nb = cpuc->amd_nb;
|
||||
@@ -315,9 +229,6 @@ static int amd_pmu_hw_config(struct perf_event *event)
|
||||
if (event->attr.type == PERF_TYPE_RAW)
|
||||
event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
|
||||
|
||||
if (amd_is_perfctr_nb_event(&event->hw))
|
||||
return amd_nb_hw_config(event);
|
||||
|
||||
return amd_core_hw_config(event);
|
||||
}
|
||||
|
||||
@@ -341,19 +252,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
|
||||
}
|
||||
}
|
||||
|
||||
static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
|
||||
{
|
||||
int core_id = cpu_data(smp_processor_id()).cpu_core_id;
|
||||
|
||||
/* deliver interrupts only to this core */
|
||||
if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
|
||||
hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
|
||||
hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
|
||||
hwc->config |= (u64)(core_id) <<
|
||||
AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD64 NorthBridge events need special treatment because
|
||||
* counter access needs to be synchronized across all cores
|
||||
@@ -441,9 +339,6 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
|
||||
if (new == -1)
|
||||
return &emptyconstraint;
|
||||
|
||||
if (amd_is_perfctr_nb_event(hwc))
|
||||
amd_nb_interrupt_hw_config(hwc);
|
||||
|
||||
return &nb->event_constraints[new];
|
||||
}
|
||||
|
||||
@@ -543,8 +438,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
|
||||
return &unconstrained;
|
||||
|
||||
return __amd_get_nb_event_constraints(cpuc, event,
|
||||
amd_nb_event_constraint);
|
||||
return __amd_get_nb_event_constraints(cpuc, event, NULL);
|
||||
}
|
||||
|
||||
static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
@@ -643,9 +537,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
|
||||
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
|
||||
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
|
||||
|
||||
static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
|
||||
static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
|
||||
|
||||
static struct event_constraint *
|
||||
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
{
|
||||
@@ -711,8 +602,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
|
||||
return &amd_f15_PMC20;
|
||||
}
|
||||
case AMD_EVENT_NB:
|
||||
return __amd_get_nb_event_constraints(cpuc, event,
|
||||
amd_nb_event_constraint);
|
||||
/* moved to perf_event_amd_uncore.c */
|
||||
return &emptyconstraint;
|
||||
default:
|
||||
return &emptyconstraint;
|
||||
}
|
||||
@@ -738,7 +629,6 @@ static __initconst const struct x86_pmu amd_pmu = {
|
||||
.eventsel = MSR_K7_EVNTSEL0,
|
||||
.perfctr = MSR_K7_PERFCTR0,
|
||||
.addr_offset = amd_pmu_addr_offset,
|
||||
.rdpmc_index = amd_pmu_rdpmc_index,
|
||||
.event_map = amd_pmu_event_map,
|
||||
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
|
||||
.num_counters = AMD64_NUM_COUNTERS,
|
||||
@@ -790,23 +680,6 @@ static int setup_perfctr_core(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_perfctr_nb(void)
|
||||
{
|
||||
if (!cpu_has_perfctr_nb)
|
||||
return -ENODEV;
|
||||
|
||||
x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
|
||||
|
||||
if (cpu_has_perfctr_core)
|
||||
amd_nb_event_constraint = &amd_NBPMC96;
|
||||
else
|
||||
amd_nb_event_constraint = &amd_NBPMC74;
|
||||
|
||||
printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__init int amd_pmu_init(void)
|
||||
{
|
||||
/* Performance-monitoring supported from K7 and later: */
|
||||
@@ -817,7 +690,6 @@ __init int amd_pmu_init(void)
|
||||
|
||||
setup_event_constraints();
|
||||
setup_perfctr_core();
|
||||
setup_perfctr_nb();
|
||||
|
||||
/* Events are common for all AMDs */
|
||||
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
|
||||
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
|
||||
{
|
||||
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
@@ -108,6 +109,8 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
|
||||
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
|
||||
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
||||
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
|
||||
INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
@@ -136,6 +139,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
|
||||
{
|
||||
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
|
||||
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
@@ -155,6 +159,8 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
|
||||
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
|
||||
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
|
||||
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
@@ -164,6 +170,21 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
||||
|
||||
struct attribute *nhm_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_nhm),
|
||||
NULL,
|
||||
};
|
||||
|
||||
struct attribute *snb_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_snb),
|
||||
EVENT_PTR(mem_st_snb),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static u64 intel_pmu_event_map(int hw_event)
|
||||
{
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
@@ -1398,8 +1419,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
|
||||
if (x86_pmu.event_constraints) {
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
if ((event->hw.config & c->cmask) == c->code)
|
||||
if ((event->hw.config & c->cmask) == c->code) {
|
||||
/* hw.flags zeroed at initialization */
|
||||
event->hw.flags |= c->flags;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1444,6 +1468,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
|
||||
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
event->hw.flags = 0;
|
||||
intel_put_shared_regs_event_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
@@ -1767,6 +1792,8 @@ static void intel_pmu_flush_branch_stack(void)
|
||||
|
||||
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
|
||||
|
||||
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
|
||||
|
||||
static struct attribute *intel_arch3_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
@@ -1777,6 +1804,7 @@ static struct attribute *intel_arch3_formats_attr[] = {
|
||||
&format_attr_cmask.attr,
|
||||
|
||||
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
|
||||
&format_attr_ldlat.attr, /* PEBS load latency */
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -2037,6 +2065,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
|
||||
x86_pmu.extra_regs = intel_nehalem_extra_regs;
|
||||
|
||||
x86_pmu.cpu_events = nhm_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.STALLED_CYCLES */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
@@ -2080,6 +2110,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.extra_regs = intel_westmere_extra_regs;
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
|
||||
x86_pmu.cpu_events = nhm_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.STALLED_CYCLES */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
@@ -2111,6 +2143,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
x86_pmu.cpu_events = snb_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
@@ -2140,6 +2174,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
x86_pmu.cpu_events = snb_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
|
||||
@@ -24,6 +24,130 @@ struct pebs_record_32 {
|
||||
|
||||
*/
|
||||
|
||||
union intel_x86_pebs_dse {
|
||||
u64 val;
|
||||
struct {
|
||||
unsigned int ld_dse:4;
|
||||
unsigned int ld_stlb_miss:1;
|
||||
unsigned int ld_locked:1;
|
||||
unsigned int ld_reserved:26;
|
||||
};
|
||||
struct {
|
||||
unsigned int st_l1d_hit:1;
|
||||
unsigned int st_reserved1:3;
|
||||
unsigned int st_stlb_miss:1;
|
||||
unsigned int st_locked:1;
|
||||
unsigned int st_reserved2:26;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Map PEBS Load Latency Data Source encodings to generic
|
||||
* memory data source information
|
||||
*/
|
||||
#define P(a, b) PERF_MEM_S(a, b)
|
||||
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
|
||||
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
|
||||
|
||||
static const u64 pebs_data_source[] = {
|
||||
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
|
||||
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
|
||||
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
|
||||
OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
|
||||
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
|
||||
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
|
||||
OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
|
||||
OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
|
||||
OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
|
||||
OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
|
||||
OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
|
||||
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
|
||||
};
|
||||
|
||||
static u64 precise_store_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
|
||||
|
||||
dse.val = status;
|
||||
|
||||
/*
|
||||
* bit 4: TLB access
|
||||
* 1 = stored missed 2nd level TLB
|
||||
*
|
||||
* so it either hit the walker or the OS
|
||||
* otherwise hit 2nd level TLB
|
||||
*/
|
||||
if (dse.st_stlb_miss)
|
||||
val |= P(TLB, MISS);
|
||||
else
|
||||
val |= P(TLB, HIT);
|
||||
|
||||
/*
|
||||
* bit 0: hit L1 data cache
|
||||
* if not set, then all we know is that
|
||||
* it missed L1D
|
||||
*/
|
||||
if (dse.st_l1d_hit)
|
||||
val |= P(LVL, HIT);
|
||||
else
|
||||
val |= P(LVL, MISS);
|
||||
|
||||
/*
|
||||
* bit 5: Locked prefix
|
||||
*/
|
||||
if (dse.st_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 load_latency_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
u64 val;
|
||||
int model = boot_cpu_data.x86_model;
|
||||
int fam = boot_cpu_data.x86;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
/*
|
||||
* use the mapping table for bit 0-3
|
||||
*/
|
||||
val = pebs_data_source[dse.ld_dse];
|
||||
|
||||
/*
|
||||
* Nehalem models do not support TLB, Lock infos
|
||||
*/
|
||||
if (fam == 0x6 && (model == 26 || model == 30
|
||||
|| model == 31 || model == 46)) {
|
||||
val |= P(TLB, NA) | P(LOCK, NA);
|
||||
return val;
|
||||
}
|
||||
/*
|
||||
* bit 4: TLB access
|
||||
* 0 = did not miss 2nd level TLB
|
||||
* 1 = missed 2nd level TLB
|
||||
*/
|
||||
if (dse.ld_stlb_miss)
|
||||
val |= P(TLB, MISS) | P(TLB, L2);
|
||||
else
|
||||
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
|
||||
|
||||
/*
|
||||
* bit 5: locked prefix
|
||||
*/
|
||||
if (dse.ld_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
struct pebs_record_core {
|
||||
u64 flags, ip;
|
||||
u64 ax, bx, cx, dx;
|
||||
@@ -365,7 +489,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
|
||||
};
|
||||
|
||||
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
|
||||
@@ -380,7 +504,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||
};
|
||||
|
||||
struct event_constraint intel_westmere_pebs_event_constraints[] = {
|
||||
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
|
||||
@@ -400,7 +524,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
||||
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
@@ -414,7 +539,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
||||
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
@@ -431,8 +557,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
|
||||
if (x86_pmu.pebs_constraints) {
|
||||
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
|
||||
if ((event->hw.config & c->cmask) == c->code)
|
||||
if ((event->hw.config & c->cmask) == c->code) {
|
||||
event->hw.flags |= c->flags;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -447,6 +575,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
|
||||
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
||||
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
||||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled |= 1ULL << 63;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
@@ -559,20 +692,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
struct pt_regs *iregs, void *__pebs)
|
||||
{
|
||||
/*
|
||||
* We cast to pebs_record_core since that is a subset of
|
||||
* both formats and we don't use the other fields in this
|
||||
* routine.
|
||||
* We cast to pebs_record_nhm to get the load latency data
|
||||
* if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
|
||||
*/
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct pebs_record_core *pebs = __pebs;
|
||||
struct pebs_record_nhm *pebs = __pebs;
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs regs;
|
||||
u64 sample_type;
|
||||
int fll, fst;
|
||||
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
return;
|
||||
|
||||
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
|
||||
fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
data.period = event->hw.last_period;
|
||||
sample_type = event->attr.sample_type;
|
||||
|
||||
/*
|
||||
* if PEBS-LL or PreciseStore
|
||||
*/
|
||||
if (fll || fst) {
|
||||
if (sample_type & PERF_SAMPLE_ADDR)
|
||||
data.addr = pebs->dla;
|
||||
|
||||
/*
|
||||
* Use latency for weight (only avail with PEBS-LL)
|
||||
*/
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
|
||||
data.weight = pebs->lat;
|
||||
|
||||
/*
|
||||
* data.data_src encodes the data source
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
if (fll)
|
||||
data.data_src.val = load_latency_data(pebs->dse);
|
||||
else
|
||||
data.data_src.val = precise_store_data(pebs->dse);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the interrupt regs as a base because the PEBS record
|
||||
* does not contain a full regs set, specifically it seems to
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -76,7 +76,7 @@
|
||||
#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
|
||||
#define SNBEP_PMON_CTL_RST (1 << 17)
|
||||
#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
|
||||
#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */
|
||||
#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21)
|
||||
#define SNBEP_PMON_CTL_EN (1 << 22)
|
||||
#define SNBEP_PMON_CTL_INVERT (1 << 23)
|
||||
#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
|
||||
@@ -148,9 +148,20 @@
|
||||
#define SNBEP_C0_MSR_PMON_CTL0 0xd10
|
||||
#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
|
||||
#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
|
||||
#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f
|
||||
#define SNBEP_CBO_MSR_OFFSET 0x20
|
||||
|
||||
#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID 0x1f
|
||||
#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID 0x3fc00
|
||||
#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE 0x7c0000
|
||||
#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC 0xff800000
|
||||
|
||||
#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) { \
|
||||
.event = (e), \
|
||||
.msr = SNBEP_C0_MSR_PMON_BOX_FILTER, \
|
||||
.config_mask = (m), \
|
||||
.idx = (i) \
|
||||
}
|
||||
|
||||
/* SNB-EP PCU register */
|
||||
#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
|
||||
#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
|
||||
@@ -160,6 +171,55 @@
|
||||
#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
|
||||
#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
|
||||
|
||||
/* IVT event control */
|
||||
#define IVT_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
|
||||
SNBEP_PMON_BOX_CTL_RST_CTRS)
|
||||
#define IVT_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
|
||||
SNBEP_PMON_CTL_UMASK_MASK | \
|
||||
SNBEP_PMON_CTL_EDGE_DET | \
|
||||
SNBEP_PMON_CTL_TRESH_MASK)
|
||||
/* IVT Ubox */
|
||||
#define IVT_U_MSR_PMON_GLOBAL_CTL 0xc00
|
||||
#define IVT_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
|
||||
#define IVT_U_PMON_GLOBAL_UNFRZ_ALL (1 << 29)
|
||||
|
||||
#define IVT_U_MSR_PMON_RAW_EVENT_MASK \
|
||||
(SNBEP_PMON_CTL_EV_SEL_MASK | \
|
||||
SNBEP_PMON_CTL_UMASK_MASK | \
|
||||
SNBEP_PMON_CTL_EDGE_DET | \
|
||||
SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
|
||||
/* IVT Cbo */
|
||||
#define IVT_CBO_MSR_PMON_RAW_EVENT_MASK (IVT_PMON_RAW_EVENT_MASK | \
|
||||
SNBEP_CBO_PMON_CTL_TID_EN)
|
||||
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_TID (0x1fULL << 0)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_LINK (0xfULL << 5)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_STATE (0x3fULL << 17)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_NID (0xffffULL << 32)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_OPC (0x1ffULL << 52)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_C6 (0x1ULL << 61)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_NC (0x1ULL << 62)
|
||||
#define IVT_CB0_MSR_PMON_BOX_FILTER_IOSC (0x1ULL << 63)
|
||||
|
||||
/* IVT home agent */
|
||||
#define IVT_HA_PCI_PMON_CTL_Q_OCC_RST (1 << 16)
|
||||
#define IVT_HA_PCI_PMON_RAW_EVENT_MASK \
|
||||
(IVT_PMON_RAW_EVENT_MASK | \
|
||||
IVT_HA_PCI_PMON_CTL_Q_OCC_RST)
|
||||
/* IVT PCU */
|
||||
#define IVT_PCU_MSR_PMON_RAW_EVENT_MASK \
|
||||
(SNBEP_PMON_CTL_EV_SEL_MASK | \
|
||||
SNBEP_PMON_CTL_EV_SEL_EXT | \
|
||||
SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
|
||||
SNBEP_PMON_CTL_EDGE_DET | \
|
||||
SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
|
||||
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
|
||||
SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
|
||||
/* IVT QPI */
|
||||
#define IVT_QPI_PCI_PMON_RAW_EVENT_MASK \
|
||||
(IVT_PMON_RAW_EVENT_MASK | \
|
||||
SNBEP_PMON_CTL_EV_SEL_EXT)
|
||||
|
||||
/* NHM-EX event control */
|
||||
#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
|
||||
#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
|
||||
|
||||
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)
|
||||
* So at moment let leave metrics turned on forever -- it's
|
||||
* ok for now but need to be revisited!
|
||||
*
|
||||
* (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0);
|
||||
* (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
|
||||
* (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
|
||||
* (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
|
||||
*/
|
||||
}
|
||||
|
||||
@@ -910,8 +910,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
|
||||
* asserted again and again
|
||||
*/
|
||||
(void)wrmsrl_safe(hwc->config_base,
|
||||
(u64)(p4_config_unpack_cccr(hwc->config)) &
|
||||
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
|
||||
p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
|
||||
}
|
||||
|
||||
static void p4_pmu_disable_all(void)
|
||||
@@ -957,7 +956,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
|
||||
u64 escr_addr, cccr;
|
||||
|
||||
bind = &p4_event_bind_map[idx];
|
||||
escr_addr = (u64)bind->escr_msr[thread];
|
||||
escr_addr = bind->escr_msr[thread];
|
||||
|
||||
/*
|
||||
* - we dont support cascaded counters yet
|
||||
|
||||
@@ -353,7 +353,11 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
|
||||
* have given.
|
||||
*/
|
||||
newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
|
||||
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
|
||||
if ((s64) (s32) newdisp != newdisp) {
|
||||
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
|
||||
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
|
||||
return 0;
|
||||
}
|
||||
disp = (u8 *) dest + insn_offset_displacement(&insn);
|
||||
*(s32 *) disp = (s32) newdisp;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user