Merge branch 'perf/hw-branch-sampling' into perf/core

Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2012-03-12 20:46:35 +01:00
33 changed files with 2019 additions and 245 deletions

View File

@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
{
int err;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:

View File

@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
if (armpmu->map_event(event) == -ENOENT)
return -ENOENT;

View File

@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
{
int err = 0;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:

View File

@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
ev = event->attr.config;

View File

@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
{
int err;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:

View File

@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0)
return -ENODEV;
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (attr->type) {
case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events)

View File

@@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345

View File

@@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0;
}
/*
* check that branch_sample_type is compatible with
* settings needed for precise_ip > 1 which implies
* using the LBR to capture ALL taken branches at the
* priv levels of the measurement
*/
static inline int precise_br_compat(struct perf_event *event)
{
u64 m = event->attr.branch_sample_type;
u64 b = 0;
/* must capture all branches */
if (!(m & PERF_SAMPLE_BRANCH_ANY))
return 0;
m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_user)
b |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
b |= PERF_SAMPLE_BRANCH_KERNEL;
/*
* ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
*/
return m == b;
}
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
@@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
/*
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
if (event->attr.precise_ip > 1) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
if (!precise_br_compat(event))
return -EOPNOTSUPP;
/* branch_sample_type is compatible */
} else {
/*
* user did not specify branch_sample_type
*
* For PEBS fixups, we capture all
* the branches at the priv level of the
* event.
*/
*br_type = PERF_SAMPLE_BRANCH_ANY;
if (!event->attr.exclude_user)
*br_type |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
}
}
}
/*
@@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
/* mark not used */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event);
}
@@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};
static void x86_pmu_flush_branch_stack(void)
{
if (x86_pmu.flush_branch_stack)
x86_pmu.flush_branch_stack();
}
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
.event_idx = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
};
void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)

View File

@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
u64 br_sel;
/*
* Intel host/guest exclude bits
@@ -344,6 +347,7 @@ struct x86_pmu {
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
void (*flush_branch_stack)(void);
/*
* Intel Arch Perfmon v2+
@@ -365,6 +369,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
/*
* Extra registers for events
@@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
@@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void);
void intel_pmu_lbr_init_snb(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
int p4_pmu_init(void);
int p6_pmu_init(void);

View File

@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
if (has_branch_stack(event))
return -EOPNOTSUPP;
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0

View File

@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
if (has_branch_stack(event))
return true;
/* implicit branch sampling to correct PEBS skid */
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
return true;
return false;
}
static void intel_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
/*
* must disable before any actual event
* because any event may be combined with LBR
*/
if (intel_pmu_needs_lbr_smpl(event))
intel_pmu_lbr_disable(event);
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_pmu_enable_bts(hwc->config);
return;
}
/*
* must enabled before any actual event
* because any event may be combined with LBR
*/
if (intel_pmu_needs_lbr_smpl(event))
intel_pmu_lbr_enable(event);
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1058,6 +1084,9 @@ again:
data.period = event->hw.last_period;
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
struct perf_event *event,
struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
/* already allocated shared msr */
if (reg->alloc)
return &unconstrained;
return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[reg->idx];
@@ -1157,14 +1186,10 @@ again:
reg->alloc = 1;
/*
* All events using extra_reg are unconstrained.
* Avoids calling x86_get_event_constraints()
*
* Must revisit if extra_reg controlling events
* ever have constraints. Worst case we go through
* the regular event constraint table.
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = &unconstrained;
c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -1201,11 +1226,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct event_constraint *c = NULL;
if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
c = __intel_shared_reg_get_constraints(cpuc, event);
struct event_constraint *c = NULL, *d;
struct hw_perf_event_extra *xreg, *breg;
xreg = &event->hw.extra_reg;
if (xreg->idx != EXTRA_REG_NONE) {
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
if (c == &emptyconstraint)
return c;
}
breg = &event->hw.branch_reg;
if (breg->idx != EXTRA_REG_NONE) {
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
if (d == &emptyconstraint) {
__intel_shared_reg_put_constraints(cpuc, xreg);
c = d;
}
}
return c;
}
@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
reg = &event->hw.branch_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->hw.config = alt_config;
}
if (intel_pmu_needs_lbr_smpl(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
}
if (event->attr.type != PERF_TYPE_RAW)
return 0;
@@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
if (!x86_pmu.extra_regs)
if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1455,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();
if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
cpuc->lbr_sel = NULL;
if (!cpuc->shared_regs)
return;
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc;
if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
}
}
cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
}
cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
if (x86_pmu.lbr_sel_map)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}
static void intel_pmu_cpu_dying(int cpu)
@@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
static void intel_pmu_flush_branch_stack(void)
{
/*
* Intel LBR does not tag entries with the
* PID of the current task, then we need to
* flush it on ctxsw
* For now, we simply reset it
*/
if (x86_pmu.lbr_nr)
intel_pmu_lbr_reset();
}
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
.flush_branch_stack = intel_pmu_flush_branch_stack,
};
static __init void intel_clovertown_quirk(void)
@@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
intel_pmu_lbr_init_nhm();
intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;

View File

@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "perf_event.h"
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx;
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
intel_pmu_lbr_enable(event);
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
intel_pmu_lbr_disable(event);
}
void intel_pmu_pebs_enable_all(void)
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
#include <asm/insn.h>
static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* both formats and we don't use the other fields in this
* routine.
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, &regs))
x86_pmu_stop(event, 0);
}

File diff suppressed because it is too large Load Diff

View File

@@ -129,10 +129,39 @@ enum perf_event_sample_format {
PERF_SAMPLE_PERIOD = 1U << 8,
PERF_SAMPLE_STREAM_ID = 1U << 9,
PERF_SAMPLE_RAW = 1U << 10,
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */
};
/*
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
* not have to match. Branch priv level is checked for permissions.
*
* The branch types can be combined, however BRANCH_ANY covers all types
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */
PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */
PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */
PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */
PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */
PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */
};
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
PERF_SAMPLE_BRANCH_KERNEL|\
PERF_SAMPLE_BRANCH_HV)
/*
* The format of the data returned by read() on a perf event fd,
* as specified by attr.read_format:
@@ -163,6 +192,8 @@ enum perf_event_read_format {
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -240,6 +271,7 @@ struct perf_event_attr {
__u64 bp_len;
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum branch_sample_type */
};
/*
@@ -458,6 +490,8 @@ enum perf_event_type {
*
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -530,12 +564,34 @@ struct perf_raw_record {
void *data;
};
/*
* single taken branch record layout:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
* mispred: branch target was mispredicted
* predicted: branch target was predicted
*
* support for mispred, predicted is optional. In case it
* is not supported mispred = predicted = 0.
*/
struct perf_branch_entry {
__u64 from;
__u64 to;
__u64 flags;
__u64 from;
__u64 to;
__u64 mispred:1, /* target mispredicted */
predicted:1,/* target predicted */
reserved:62;
};
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
*
* Note that nr can vary from sample to sample
* branches (to, from) are stored from most recent
* to least recent, i.e., entries[0] contains the most
* recent branch.
*/
struct perf_branch_stack {
__u64 nr;
struct perf_branch_entry entries[0];
@@ -566,7 +622,9 @@ struct hw_perf_event {
unsigned long event_base;
int idx;
int last_cpu;
struct hw_perf_event_extra extra_reg;
struct hw_perf_event_extra branch_reg;
};
struct { /* software */
struct hrtimer hrtimer;
@@ -690,6 +748,11 @@ struct pmu {
* if no implementation is provided it will default to: event->hw.idx + 1.
*/
int (*event_idx) (struct perf_event *event); /*optional */
/*
* flush branch stack on context-switches (needed in cpu-wide mode)
*/
void (*flush_branch_stack) (void);
};
/**
@@ -923,7 +986,8 @@ struct perf_event_context {
u64 parent_gen;
u64 generation;
int pin_count;
int nr_cgroups; /* cgroup events present */
int nr_cgroups; /* cgroup evts */
int nr_branch_stack; /* branch_stack evt */
struct rcu_head rcu_head;
};
@@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running);
struct perf_sample_data {
u64 type;
@@ -1007,12 +1072,14 @@ struct perf_sample_data {
u64 period;
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
};
static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
{
data->addr = addr;
data->raw = NULL;
data->br_stack = NULL;
}
extern void perf_output_sample(struct perf_output_handle *handle,
@@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data);
# define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif
static inline bool has_branch_stack(struct perf_event *event)
{
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
}
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size);
extern void perf_output_end(struct perf_output_handle *handle);

View File

@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
PERF_FLAG_FD_OUTPUT |\
PERF_FLAG_PID_CGROUP)
/*
* branch priv levels that need permission checks
*/
#define PERF_SAMPLE_BRANCH_PERM_PLM \
(PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
@@ -130,6 +137,7 @@ enum event_type_t {
*/
struct static_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
if (is_cgroup_event(event))
ctx->nr_cgroups++;
if (has_branch_stack(event))
ctx->nr_branch_stack++;
list_add_rcu(&event->event_entry, &ctx->event_list);
if (!ctx->nr_events)
perf_pmu_rotate_start(ctx->pmu);
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
cpuctx->cgrp = NULL;
}
if (has_branch_stack(event))
ctx->nr_branch_stack--;
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
@@ -2194,6 +2208,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
perf_pmu_rotate_start(ctx->pmu);
}
/*
* When sampling the branck stack in system-wide, it may be necessary
* to flush the stack on context switch. This happens when the branch
* stack does not tag its entries with the pid of the current task.
* Otherwise it becomes impossible to associate a branch entry with a
* task. This ambiguity is more likely to appear when the branch stack
* supports priv level filtering and the user sets it to monitor only
* at the user level (which could be a useful measurement in system-wide
* mode). In that case, the risk is high of having a branch stack with
* branch from multiple tasks. Flushing may mean dropping the existing
* entries or stashing them somewhere in the PMU specific code layer.
*
* This function provides the context switch callback to the lower code
* layer. It is invoked ONLY when there is at least one system-wide context
* with at least one active event using taken branch sampling.
*/
static void perf_branch_stack_sched_in(struct task_struct *prev,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
unsigned long flags;
/* no need to flush branch stack if not changing task */
if (prev == task)
return;
local_irq_save(flags);
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
/*
* check if the context has at least one
* event using PERF_SAMPLE_BRANCH_STACK
*/
if (cpuctx->ctx.nr_branch_stack > 0
&& pmu->flush_branch_stack) {
pmu = cpuctx->ctx.pmu;
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
perf_pmu_disable(pmu);
pmu->flush_branch_stack();
perf_pmu_enable(pmu);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
}
rcu_read_unlock();
local_irq_restore(flags);
}
/*
* Called from scheduler to add the events of the current task
* with interrupts disabled.
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev,
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
perf_cgroup_sched_in(prev, task);
/* check for system-wide branch_stack events */
if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
perf_branch_stack_sched_in(prev, task);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event)
atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
static_key_slow_dec_deferred(&perf_sched_events);
}
if (has_branch_stack(event)) {
static_key_slow_dec_deferred(&perf_sched_events);
/* is system-wide event */
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_dec(&per_cpu(perf_branch_stack_events,
event->cpu));
}
}
if (event->rb) {
@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle,
}
}
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
if (data->br_stack) {
size_t size;
size = data->br_stack->nr
* sizeof(struct perf_branch_entry);
perf_output_put(handle, data->br_stack->nr);
perf_output_copy(handle, data->br_stack->entries, size);
} else {
/*
* we always store at least the value of nr
*/
u64 nr = 0;
perf_output_put(handle, nr);
}
}
}
void perf_prepare_sample(struct perf_event_header *header,
@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header,
WARN_ON_ONCE(size & (sizeof(u64)-1));
header->size += size;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
if (data->br_stack) {
size += data->br_stack->nr
* sizeof(struct perf_branch_entry);
}
header->size += size;
}
}
static void perf_event_output(struct perf_event *event,
@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event)
if (event->attr.type != PERF_TYPE_SOFTWARE)
return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
switch (event_id) {
case PERF_COUNT_SW_CPU_CLOCK:
case PERF_COUNT_SW_TASK_CLOCK:
@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event)
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -ENOENT;
/*
* no branch sampling for tracepoint events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
err = perf_trace_init(event);
if (err)
return err;
@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
perf_swevent_init_hrtimer(event);
return 0;
@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
return -ENOENT;
/*
* no branch sampling for software events
*/
if (has_branch_stack(event))
return -EOPNOTSUPP;
perf_swevent_init_hrtimer(event);
return 0;
@@ -5866,6 +6003,12 @@ done:
return ERR_PTR(err);
}
}
if (has_branch_stack(event)) {
static_key_slow_inc(&perf_sched_events.key);
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_inc(&per_cpu(perf_branch_stack_events,
event->cpu));
}
}
return event;
@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
if (attr->read_format & ~(PERF_FORMAT_MAX-1))
return -EINVAL;
if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
u64 mask = attr->branch_sample_type;
/* only using defined bits */
if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
return -EINVAL;
/* at least one branch bit must be set */
if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
return -EINVAL;
/* kernel level capture: check permissions */
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EACCES;
/* propagate priv level, when not set for branch */
if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
/* exclude_kernel checked on syscall entry */
if (!attr->exclude_kernel)
mask |= PERF_SAMPLE_BRANCH_KERNEL;
if (!attr->exclude_user)
mask |= PERF_SAMPLE_BRANCH_USER;
if (!attr->exclude_hv)
mask |= PERF_SAMPLE_BRANCH_HV;
/*
* adjust user setting (for HW filter setup)
*/
attr->branch_sample_type = mask;
}
}
out:
return ret;

View File

@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp)
if (bp->attr.type != PERF_TYPE_BREAKPOINT)
return -ENOENT;
/*
* no branch sampling for breakpoint events
*/
if (has_branch_stack(bp))
return -EOPNOTSUPP;
err = register_perf_hw_breakpoint(bp);
if (err)
return err;

View File

@@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
corresponding events, i.e., they always refer to events defined earlier on the command
line.
-b::
--branch-any::
Enable taken branch stack sampling. Any type of taken branch may be sampled.
This is a shortcut for --branch-filter any. See --branch-filter for more infos.
-j::
--branch-filter::
Enable taken branch stack sampling. Each sample captures a series of consecutive
taken branches. The number of branches captured with each sample depends on the
underlying hardware, the type of branches of interest, and the executed code.
It is possible to select the types of branches captured by enabling filters. The
following filters are defined:
- any: any type of branches
- any_call: any function call or system call
- any_ret: any function return or system call return
- any_ind: any indirect branch
- u: only when the branch target is at the user level
- k: only when the branch target is in the kernel
- hv: only when the target is at the hypervisor level
+
The option requires at least one branch type among any, any_call, any_ret, ind_call.
The privilege levels may be ommitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]

View File

@@ -153,6 +153,16 @@ OPTIONS
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
-b::
--branch-stack::
Use the addresses of sampled taken branches instead of the instruction
address to build the histograms. To generate meaningful output, the
perf.data file must have been obtained using perf record -b or
perf record --branch-filter xxx where xxx is a branch filter option.
perf report is able to auto-detect whether a perf.data file contains
branch stacks and it will automatically switch to the branch view mode,
unless --no-branch-stack is used.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]

View File

@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (!have_tracepoints(&evsel_list->entries))
perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
if (!rec->opts.branch_stack)
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
if (!rec->file_new) {
err = perf_session__read_header(session, output);
if (err < 0)
@@ -638,6 +641,90 @@ out_delete_session:
return err;
}
#define BRANCH_OPT(n, m) \
{ .name = n, .mode = (m) }
#define BRANCH_END { .name = NULL }
struct branch_mode {
const char *name;
int mode;
};
static const struct branch_mode branch_modes[] = {
BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
BRANCH_END
};
static int
parse_branch_stack(const struct option *opt, const char *str, int unset)
{
#define ONLY_PLM \
(PERF_SAMPLE_BRANCH_USER |\
PERF_SAMPLE_BRANCH_KERNEL |\
PERF_SAMPLE_BRANCH_HV)
uint64_t *mode = (uint64_t *)opt->value;
const struct branch_mode *br;
char *s, *os = NULL, *p;
int ret = -1;
if (unset)
return 0;
/*
* cannot set it twice, -b + --branch-filter for instance
*/
if (*mode)
return -1;
/* str may be NULL in case no arg is passed to -b */
if (str) {
/* because str is read-only */
s = os = strdup(str);
if (!s)
return -1;
for (;;) {
p = strchr(s, ',');
if (p)
*p = '\0';
for (br = branch_modes; br->name; br++) {
if (!strcasecmp(s, br->name))
break;
}
if (!br->name) {
ui__warning("unknown branch filter %s,"
" check man page\n", s);
goto error;
}
*mode |= br->mode;
if (!p)
break;
s = p + 1;
}
}
ret = 0;
/* default to any branch */
if ((*mode & ~ONLY_PLM) == 0) {
*mode = PERF_SAMPLE_BRANCH_ANY;
}
error:
free(os);
return ret;
}
static const char * const record_usage[] = {
"perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]",
@@ -727,6 +814,14 @@ const struct option record_options[] = {
"monitor event in cgroup name only",
parse_cgroups),
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
"branch any", "sample any taken branches",
parse_branch_stack),
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
OPT_END()
};

View File

@@ -53,6 +53,82 @@ struct perf_report {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
struct addr_location *al,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct symbol *parent = NULL;
int err = 0;
unsigned i;
struct hist_entry *he;
struct branch_info *bi, *bx;
if ((sort__has_parent || symbol_conf.use_callchain)
&& sample->callchain) {
err = machine__resolve_callchain(machine, evsel, al->thread,
sample->callchain, &parent);
if (err)
return err;
}
bi = machine__resolve_bstack(machine, al->thread,
sample->branch_stack);
if (!bi)
return -ENOMEM;
for (i = 0; i < sample->branch_stack->nr; i++) {
if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
continue;
/*
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
he = __hists__add_branch_entry(&evsel->hists, al, parent,
&bi[i], 1);
if (he) {
struct annotation *notes;
err = -ENOMEM;
bx = he->branch_info;
if (bx->from.sym && use_browser > 0) {
notes = symbol__annotation(bx->from.sym);
if (!notes->src
&& symbol__alloc_hist(bx->from.sym) < 0)
goto out;
err = symbol__inc_addr_samples(bx->from.sym,
bx->from.map,
evsel->idx,
bx->from.al_addr);
if (err)
goto out;
}
if (bx->to.sym && use_browser > 0) {
notes = symbol__annotation(bx->to.sym);
if (!notes->src
&& symbol__alloc_hist(bx->to.sym) < 0)
goto out;
err = symbol__inc_addr_samples(bx->to.sym,
bx->to.map,
evsel->idx,
bx->to.al_addr);
if (err)
goto out;
}
evsel->hists.stats.total_period += 1;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
err = 0;
} else
return -ENOMEM;
}
out:
return err;
}
static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample,
@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
if (al.map != NULL)
al.map->dso->hit = 1;
if (sort__branch_mode == 1) {
if (perf_report__add_branch_hist_entry(tool, &al, sample,
evsel, machine)) {
pr_debug("problem adding lbr entry, skipping event\n");
return -1;
}
} else {
if (al.map != NULL)
al.map->dso->hit = 1;
if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
pr_debug("problem incrementing symbol period, skipping event\n");
return -1;
if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
pr_debug("problem incrementing symbol period, skipping event\n");
return -1;
}
}
return 0;
}
@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
}
}
if (sort__branch_mode == 1) {
if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
fprintf(stderr, "selected -b but no branch data."
" Did you call perf record without"
" -b?\n");
return -1;
}
}
return 0;
}
@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
{
int ret = -EINVAL;
u64 nr_samples;
struct perf_session *session;
struct perf_session *session = rep->session;
struct perf_evsel *pos;
struct map *kernel_map;
struct kmap *kernel_kmap;
@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
signal(SIGINT, sig_handler);
session = perf_session__new(rep->input_name, O_RDONLY,
rep->force, false, &rep->tool);
if (session == NULL)
return -ENOMEM;
rep->session = session;
if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap);
@@ -427,9 +512,19 @@ setup:
return 0;
}
static int
parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
{
sort__branch_mode = !unset;
return 0;
}
int cmd_report(int argc, const char **argv, const char *prefix __used)
{
struct perf_session *session;
struct stat st;
bool has_br_stack = false;
int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
"perf report [<options>]",
@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
OPT_BOOLEAN(0, "stdio", &report.use_stdio,
"Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
"sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
" dso_from, symbol_to, symbol_from, mispredict"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
"use branch records for histogram filling", parse_branch_mode),
OPT_END()
};
@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
else
report.input_name = "perf.data";
}
session = perf_session__new(report.input_name, O_RDONLY,
report.force, false, &report.tool);
if (session == NULL)
return -ENOMEM;
if (strcmp(report.input_name, "-") != 0)
report.session = session;
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
if (sort__branch_mode == -1 && has_br_stack)
sort__branch_mode = 1;
/* sort__branch_mode could be 0 if --no-branch-stack */
if (sort__branch_mode == 1) {
/*
* if no sort_order is provided, then specify
* branch-mode specific order
*/
if (sort_order == default_sort_order)
sort_order = "comm,dso_from,symbol_from,"
"dso_to,symbol_to";
}
if (strcmp(report.input_name, "-") != 0) {
setup_browser(true);
else
} else {
use_browser = 0;
}
/*
* Only in the newt browser we are doing integrated annotation,
@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
}
if (symbol__init() < 0)
return -1;
goto error;
setup_sorting(report_usage, options);
if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0)
return -1;
goto error;
/*
* Only show the parent fields if we explicitly
@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
if (argc)
usage_with_options(report_usage, options);
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
return __cmd_report(&report);
if (sort__branch_mode == 1) {
sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
} else {
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
}
ret = __cmd_report(&report);
error:
perf_session__delete(session);
return ret;
}

Some files were not shown because too many files have changed in this diff Show More