You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
* Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf sched lat'
back working.
* We don't use Newt anymore, just plain libslang.
* Kill a bunch of die() calls, from Namhyung Kim.
* Add --no-demangle to report/top, from Namhyung Kim.
* Fix dependency of the python binding wrt libtraceevent, from Naohiro Aota.
* Introduce per core aggregation in 'perf stat', from Stephane Eranian.
* Add memory profiling via PEBS, from Stephane Eranian.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -71,6 +71,7 @@
|
||||
#define MSR_IA32_PEBS_ENABLE 0x000003f1
|
||||
#define MSR_IA32_DS_AREA 0x00000600
|
||||
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
|
||||
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
||||
|
||||
#define MSR_MTRRfix64K_00000 0x00000250
|
||||
#define MSR_MTRRfix16K_80000 0x00000258
|
||||
|
||||
@@ -1316,9 +1316,16 @@ static struct attribute_group x86_pmu_format_group = {
|
||||
*/
|
||||
static void __init filter_events(struct attribute **attrs)
|
||||
{
|
||||
struct device_attribute *d;
|
||||
struct perf_pmu_events_attr *pmu_attr;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; attrs[i]; i++) {
|
||||
d = (struct device_attribute *)attrs[i];
|
||||
pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
|
||||
/* str trumps id */
|
||||
if (pmu_attr->event_str)
|
||||
continue;
|
||||
if (x86_pmu.event_map(i))
|
||||
continue;
|
||||
|
||||
@@ -1330,23 +1337,46 @@ static void __init filter_events(struct attribute **attrs)
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
/* Merge two pointer arrays */
|
||||
static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
|
||||
{
|
||||
struct attribute **new;
|
||||
int j, i;
|
||||
|
||||
for (j = 0; a[j]; j++)
|
||||
;
|
||||
for (i = 0; b[i]; i++)
|
||||
j++;
|
||||
j++;
|
||||
|
||||
new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
|
||||
if (!new)
|
||||
return NULL;
|
||||
|
||||
j = 0;
|
||||
for (i = 0; a[i]; i++)
|
||||
new[j++] = a[i];
|
||||
for (i = 0; b[i]; i++)
|
||||
new[j++] = b[i];
|
||||
new[j] = NULL;
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr = \
|
||||
container_of(attr, struct perf_pmu_events_attr, attr);
|
||||
|
||||
u64 config = x86_pmu.event_map(pmu_attr->id);
|
||||
|
||||
/* string trumps id */
|
||||
if (pmu_attr->event_str)
|
||||
return sprintf(page, "%s", pmu_attr->event_str);
|
||||
|
||||
return x86_pmu.events_sysfs_show(page, config);
|
||||
}
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
|
||||
events_sysfs_show)
|
||||
|
||||
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
|
||||
EVENT_ATTR(instructions, INSTRUCTIONS );
|
||||
EVENT_ATTR(cache-references, CACHE_REFERENCES );
|
||||
@@ -1459,16 +1489,27 @@ static int __init init_hw_perf_events(void)
|
||||
|
||||
unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
|
||||
0, x86_pmu.num_counters, 0);
|
||||
0, x86_pmu.num_counters, 0, 0);
|
||||
|
||||
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
|
||||
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
|
||||
|
||||
if (x86_pmu.event_attrs)
|
||||
x86_pmu_events_group.attrs = x86_pmu.event_attrs;
|
||||
|
||||
if (!x86_pmu.events_sysfs_show)
|
||||
x86_pmu_events_group.attrs = &empty_attrs;
|
||||
else
|
||||
filter_events(x86_pmu_events_group.attrs);
|
||||
|
||||
if (x86_pmu.cpu_events) {
|
||||
struct attribute **tmp;
|
||||
|
||||
tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
|
||||
if (!WARN_ON(!tmp))
|
||||
x86_pmu_events_group.attrs = tmp;
|
||||
}
|
||||
|
||||
pr_info("... version: %d\n", x86_pmu.version);
|
||||
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
|
||||
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
|
||||
|
||||
@@ -46,6 +46,7 @@ enum extra_reg_type {
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
@@ -59,7 +60,13 @@ struct event_constraint {
|
||||
u64 cmask;
|
||||
int weight;
|
||||
int overlap;
|
||||
int flags;
|
||||
};
|
||||
/*
|
||||
* struct event_constraint flags
|
||||
*/
|
||||
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
|
||||
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
@@ -170,16 +177,17 @@ struct cpu_hw_events {
|
||||
void *kfree_on_online;
|
||||
};
|
||||
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
|
||||
#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
|
||||
{ .idxmsk64 = (n) }, \
|
||||
.code = (c), \
|
||||
.cmask = (m), \
|
||||
.weight = (w), \
|
||||
.overlap = (o), \
|
||||
.flags = f, \
|
||||
}
|
||||
|
||||
#define EVENT_CONSTRAINT(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
|
||||
|
||||
/*
|
||||
* The overlap flag marks event constraints with overlapping counter
|
||||
@@ -203,7 +211,7 @@ struct cpu_hw_events {
|
||||
* and its counter masks must be kept at a minimum.
|
||||
*/
|
||||
#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
|
||||
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code.
|
||||
@@ -231,6 +239,14 @@ struct cpu_hw_events {
|
||||
#define INTEL_UEVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
|
||||
|
||||
#define INTEL_PLD_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
|
||||
|
||||
#define INTEL_PST_CONSTRAINT(c, n) \
|
||||
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
|
||||
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
|
||||
|
||||
#define EVENT_CONSTRAINT_END \
|
||||
EVENT_CONSTRAINT(0, 0, 0)
|
||||
|
||||
@@ -260,12 +276,22 @@ struct extra_reg {
|
||||
.msr = (ms), \
|
||||
.config_mask = (m), \
|
||||
.valid_mask = (vm), \
|
||||
.idx = EXTRA_REG_##i \
|
||||
.idx = EXTRA_REG_##i, \
|
||||
}
|
||||
|
||||
#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
|
||||
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
|
||||
ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
|
||||
|
||||
#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
|
||||
INTEL_UEVENT_EXTRA_REG(c, \
|
||||
MSR_PEBS_LD_LAT_THRESHOLD, \
|
||||
0xffff, \
|
||||
LDLAT)
|
||||
|
||||
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
|
||||
|
||||
union perf_capabilities {
|
||||
@@ -355,8 +381,10 @@ struct x86_pmu {
|
||||
*/
|
||||
int attr_rdpmc;
|
||||
struct attribute **format_attrs;
|
||||
struct attribute **event_attrs;
|
||||
|
||||
ssize_t (*events_sysfs_show)(char *page, u64 config);
|
||||
struct attribute **cpu_events;
|
||||
|
||||
/*
|
||||
* CPU Hotplug hooks
|
||||
@@ -421,6 +449,23 @@ do { \
|
||||
#define ERF_NO_HT_SHARING 1
|
||||
#define ERF_HAS_RSP_1 2
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
|
||||
#define EVENT_ATTR(_name, _id) \
|
||||
static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = PERF_COUNT_HW_##_id, \
|
||||
.event_str = NULL, \
|
||||
};
|
||||
|
||||
#define EVENT_ATTR_STR(_name, v, str) \
|
||||
static struct perf_pmu_events_attr event_attr_##v = { \
|
||||
.attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
|
||||
.id = 0, \
|
||||
.event_str = str, \
|
||||
};
|
||||
|
||||
extern struct x86_pmu x86_pmu __read_mostly;
|
||||
|
||||
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
|
||||
@@ -628,6 +673,9 @@ int p6_pmu_init(void);
|
||||
|
||||
int knc_pmu_init(void);
|
||||
|
||||
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page);
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
||||
@@ -81,6 +81,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
|
||||
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
|
||||
{
|
||||
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
@@ -136,6 +137,7 @@ static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
|
||||
{
|
||||
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
|
||||
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
@@ -155,9 +157,25 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
|
||||
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
|
||||
INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
|
||||
INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
|
||||
|
||||
struct attribute *nhm_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_nhm),
|
||||
NULL,
|
||||
};
|
||||
|
||||
struct attribute *snb_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_snb),
|
||||
EVENT_PTR(mem_st_snb),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static u64 intel_pmu_event_map(int hw_event)
|
||||
{
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
@@ -1392,8 +1410,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
|
||||
|
||||
if (x86_pmu.event_constraints) {
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
if ((event->hw.config & c->cmask) == c->code)
|
||||
if ((event->hw.config & c->cmask) == c->code) {
|
||||
/* hw.flags zeroed at initialization */
|
||||
event->hw.flags |= c->flags;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1438,6 +1459,7 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
|
||||
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
event->hw.flags = 0;
|
||||
intel_put_shared_regs_event_constraints(cpuc, event);
|
||||
}
|
||||
|
||||
@@ -1761,6 +1783,8 @@ static void intel_pmu_flush_branch_stack(void)
|
||||
|
||||
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
|
||||
|
||||
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
|
||||
|
||||
static struct attribute *intel_arch3_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask.attr,
|
||||
@@ -1771,6 +1795,7 @@ static struct attribute *intel_arch3_formats_attr[] = {
|
||||
&format_attr_cmask.attr,
|
||||
|
||||
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
|
||||
&format_attr_ldlat.attr, /* PEBS load latency */
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -2031,6 +2056,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
|
||||
x86_pmu.extra_regs = intel_nehalem_extra_regs;
|
||||
|
||||
x86_pmu.cpu_events = nhm_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.STALLED_CYCLES */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
@@ -2074,6 +2101,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.extra_regs = intel_westmere_extra_regs;
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
|
||||
x86_pmu.cpu_events = nhm_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.STALLED_CYCLES */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
@@ -2102,6 +2131,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
x86_pmu.cpu_events = snb_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
@@ -2128,6 +2159,8 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.er_flags |= ERF_HAS_RSP_1;
|
||||
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
|
||||
|
||||
x86_pmu.cpu_events = snb_events_attrs;
|
||||
|
||||
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
||||
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
|
||||
|
||||
@@ -24,6 +24,130 @@ struct pebs_record_32 {
|
||||
|
||||
*/
|
||||
|
||||
union intel_x86_pebs_dse {
|
||||
u64 val;
|
||||
struct {
|
||||
unsigned int ld_dse:4;
|
||||
unsigned int ld_stlb_miss:1;
|
||||
unsigned int ld_locked:1;
|
||||
unsigned int ld_reserved:26;
|
||||
};
|
||||
struct {
|
||||
unsigned int st_l1d_hit:1;
|
||||
unsigned int st_reserved1:3;
|
||||
unsigned int st_stlb_miss:1;
|
||||
unsigned int st_locked:1;
|
||||
unsigned int st_reserved2:26;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Map PEBS Load Latency Data Source encodings to generic
|
||||
* memory data source information
|
||||
*/
|
||||
#define P(a, b) PERF_MEM_S(a, b)
|
||||
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
|
||||
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
|
||||
|
||||
static const u64 pebs_data_source[] = {
|
||||
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
|
||||
OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
|
||||
OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
|
||||
OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
|
||||
OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
|
||||
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
|
||||
OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
|
||||
OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
|
||||
OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
|
||||
OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
|
||||
OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
|
||||
OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
|
||||
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
|
||||
};
|
||||
|
||||
static u64 precise_store_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
|
||||
|
||||
dse.val = status;
|
||||
|
||||
/*
|
||||
* bit 4: TLB access
|
||||
* 1 = stored missed 2nd level TLB
|
||||
*
|
||||
* so it either hit the walker or the OS
|
||||
* otherwise hit 2nd level TLB
|
||||
*/
|
||||
if (dse.st_stlb_miss)
|
||||
val |= P(TLB, MISS);
|
||||
else
|
||||
val |= P(TLB, HIT);
|
||||
|
||||
/*
|
||||
* bit 0: hit L1 data cache
|
||||
* if not set, then all we know is that
|
||||
* it missed L1D
|
||||
*/
|
||||
if (dse.st_l1d_hit)
|
||||
val |= P(LVL, HIT);
|
||||
else
|
||||
val |= P(LVL, MISS);
|
||||
|
||||
/*
|
||||
* bit 5: Locked prefix
|
||||
*/
|
||||
if (dse.st_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 load_latency_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
u64 val;
|
||||
int model = boot_cpu_data.x86_model;
|
||||
int fam = boot_cpu_data.x86;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
/*
|
||||
* use the mapping table for bit 0-3
|
||||
*/
|
||||
val = pebs_data_source[dse.ld_dse];
|
||||
|
||||
/*
|
||||
* Nehalem models do not support TLB, Lock infos
|
||||
*/
|
||||
if (fam == 0x6 && (model == 26 || model == 30
|
||||
|| model == 31 || model == 46)) {
|
||||
val |= P(TLB, NA) | P(LOCK, NA);
|
||||
return val;
|
||||
}
|
||||
/*
|
||||
* bit 4: TLB access
|
||||
* 0 = did not miss 2nd level TLB
|
||||
* 1 = missed 2nd level TLB
|
||||
*/
|
||||
if (dse.ld_stlb_miss)
|
||||
val |= P(TLB, MISS) | P(TLB, L2);
|
||||
else
|
||||
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
|
||||
|
||||
/*
|
||||
* bit 5: locked prefix
|
||||
*/
|
||||
if (dse.ld_locked)
|
||||
val |= P(LOCK, LOCKED);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
struct pebs_record_core {
|
||||
u64 flags, ip;
|
||||
u64 ax, bx, cx, dx;
|
||||
@@ -364,7 +488,7 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
|
||||
};
|
||||
|
||||
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
|
||||
@@ -379,7 +503,7 @@ struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||
};
|
||||
|
||||
struct event_constraint intel_westmere_pebs_event_constraints[] = {
|
||||
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
|
||||
@@ -399,7 +523,8 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
||||
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
@@ -413,7 +538,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
||||
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
||||
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
|
||||
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
||||
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
||||
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
||||
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
||||
@@ -430,8 +556,10 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
|
||||
|
||||
if (x86_pmu.pebs_constraints) {
|
||||
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
|
||||
if ((event->hw.config & c->cmask) == c->code)
|
||||
if ((event->hw.config & c->cmask) == c->code) {
|
||||
event->hw.flags |= c->flags;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -446,6 +574,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
|
||||
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
||||
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
||||
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
||||
cpuc->pebs_enabled |= 1ULL << 63;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
@@ -558,20 +691,51 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
struct pt_regs *iregs, void *__pebs)
|
||||
{
|
||||
/*
|
||||
* We cast to pebs_record_core since that is a subset of
|
||||
* both formats and we don't use the other fields in this
|
||||
* routine.
|
||||
* We cast to pebs_record_nhm to get the load latency data
|
||||
* if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
|
||||
*/
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
struct pebs_record_core *pebs = __pebs;
|
||||
struct pebs_record_nhm *pebs = __pebs;
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs regs;
|
||||
u64 sample_type;
|
||||
int fll, fst;
|
||||
|
||||
if (!intel_pmu_save_and_restart(event))
|
||||
return;
|
||||
|
||||
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
|
||||
fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
data.period = event->hw.last_period;
|
||||
sample_type = event->attr.sample_type;
|
||||
|
||||
/*
|
||||
* if PEBS-LL or PreciseStore
|
||||
*/
|
||||
if (fll || fst) {
|
||||
if (sample_type & PERF_SAMPLE_ADDR)
|
||||
data.addr = pebs->dla;
|
||||
|
||||
/*
|
||||
* Use latency for weight (only avail with PEBS-LL)
|
||||
*/
|
||||
if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
|
||||
data.weight = pebs->lat;
|
||||
|
||||
/*
|
||||
* data.data_src encodes the data source
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
if (fll)
|
||||
data.data_src.val = load_latency_data(pebs->dse);
|
||||
else
|
||||
data.data_src.val = precise_store_data(pebs->dse);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the interrupt regs as a base because the PEBS record
|
||||
* does not contain a full regs set, specifically it seems to
|
||||
|
||||
@@ -2438,7 +2438,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
|
||||
|
||||
type->unconstrainted = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
|
||||
0, type->num_counters, 0);
|
||||
0, type->num_counters, 0, 0);
|
||||
|
||||
for (i = 0; i < type->num_boxes; i++) {
|
||||
pmus[i].func_id = -1;
|
||||
|
||||
@@ -127,6 +127,7 @@ struct hw_perf_event {
|
||||
int event_base_rdpmc;
|
||||
int idx;
|
||||
int last_cpu;
|
||||
int flags;
|
||||
|
||||
struct hw_perf_event_extra extra_reg;
|
||||
struct hw_perf_event_extra branch_reg;
|
||||
@@ -567,11 +568,13 @@ struct perf_sample_data {
|
||||
u32 reserved;
|
||||
} cpu_entry;
|
||||
u64 period;
|
||||
union perf_mem_data_src data_src;
|
||||
struct perf_callchain_entry *callchain;
|
||||
struct perf_raw_record *raw;
|
||||
struct perf_branch_stack *br_stack;
|
||||
struct perf_regs_user regs_user;
|
||||
u64 stack_user_size;
|
||||
u64 weight;
|
||||
};
|
||||
|
||||
static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
@@ -585,6 +588,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
|
||||
data->regs_user.regs = NULL;
|
||||
data->stack_user_size = 0;
|
||||
data->weight = 0;
|
||||
data->data_src.val = 0;
|
||||
}
|
||||
|
||||
extern void perf_output_sample(struct perf_output_handle *handle,
|
||||
@@ -809,6 +814,7 @@ do { \
|
||||
struct perf_pmu_events_attr {
|
||||
struct device_attribute attr;
|
||||
u64 id;
|
||||
const char *event_str;
|
||||
};
|
||||
|
||||
#define PMU_EVENT_ATTR(_name, _var, _id, _show) \
|
||||
|
||||
@@ -132,8 +132,10 @@ enum perf_event_sample_format {
|
||||
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
|
||||
PERF_SAMPLE_REGS_USER = 1U << 12,
|
||||
PERF_SAMPLE_STACK_USER = 1U << 13,
|
||||
PERF_SAMPLE_WEIGHT = 1U << 14,
|
||||
PERF_SAMPLE_DATA_SRC = 1U << 15,
|
||||
|
||||
PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */
|
||||
PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -443,6 +445,7 @@ struct perf_event_mmap_page {
|
||||
#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
|
||||
#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
|
||||
|
||||
#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
|
||||
/*
|
||||
* Indicates that the content of PERF_SAMPLE_IP points to
|
||||
* the actual instruction that triggered the event. See also
|
||||
@@ -588,6 +591,9 @@ enum perf_event_type {
|
||||
* { u64 size;
|
||||
* char data[size];
|
||||
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
|
||||
*
|
||||
* { u64 weight; } && PERF_SAMPLE_WEIGHT
|
||||
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_SAMPLE = 9,
|
||||
@@ -613,4 +619,67 @@ enum perf_callchain_context {
|
||||
#define PERF_FLAG_FD_OUTPUT (1U << 1)
|
||||
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
|
||||
|
||||
union perf_mem_data_src {
|
||||
__u64 val;
|
||||
struct {
|
||||
__u64 mem_op:5, /* type of opcode */
|
||||
mem_lvl:14, /* memory hierarchy level */
|
||||
mem_snoop:5, /* snoop mode */
|
||||
mem_lock:2, /* lock instr */
|
||||
mem_dtlb:7, /* tlb access */
|
||||
mem_rsvd:31;
|
||||
};
|
||||
};
|
||||
|
||||
/* type of opcode (load/store/prefetch,code) */
|
||||
#define PERF_MEM_OP_NA 0x01 /* not available */
|
||||
#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
|
||||
#define PERF_MEM_OP_STORE 0x04 /* store instruction */
|
||||
#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
|
||||
#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
|
||||
#define PERF_MEM_OP_SHIFT 0
|
||||
|
||||
/* memory hierarchy (memory level, hit or miss) */
|
||||
#define PERF_MEM_LVL_NA 0x01 /* not available */
|
||||
#define PERF_MEM_LVL_HIT 0x02 /* hit level */
|
||||
#define PERF_MEM_LVL_MISS 0x04 /* miss level */
|
||||
#define PERF_MEM_LVL_L1 0x08 /* L1 */
|
||||
#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
|
||||
#define PERF_MEM_LVL_L2 0x20 /* L2 hit */
|
||||
#define PERF_MEM_LVL_L3 0x40 /* L3 hit */
|
||||
#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
|
||||
#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
|
||||
#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
|
||||
#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
|
||||
#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
|
||||
#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
|
||||
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
|
||||
#define PERF_MEM_LVL_SHIFT 5
|
||||
|
||||
/* snoop mode */
|
||||
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
|
||||
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
|
||||
#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
|
||||
#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
|
||||
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
|
||||
#define PERF_MEM_SNOOP_SHIFT 19
|
||||
|
||||
/* locked instruction */
|
||||
#define PERF_MEM_LOCK_NA 0x01 /* not available */
|
||||
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
|
||||
#define PERF_MEM_LOCK_SHIFT 24
|
||||
|
||||
/* TLB access */
|
||||
#define PERF_MEM_TLB_NA 0x01 /* not available */
|
||||
#define PERF_MEM_TLB_HIT 0x02 /* hit level */
|
||||
#define PERF_MEM_TLB_MISS 0x04 /* miss level */
|
||||
#define PERF_MEM_TLB_L1 0x08 /* L1 */
|
||||
#define PERF_MEM_TLB_L2 0x10 /* L2 */
|
||||
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
|
||||
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
|
||||
#define PERF_MEM_TLB_SHIFT 26
|
||||
|
||||
#define PERF_MEM_S(a, s) \
|
||||
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
|
||||
|
||||
#endif /* _UAPI_LINUX_PERF_EVENT_H */
|
||||
|
||||
@@ -976,9 +976,15 @@ static void perf_event__header_size(struct perf_event *event)
|
||||
if (sample_type & PERF_SAMPLE_PERIOD)
|
||||
size += sizeof(data->period);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
size += sizeof(data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_READ)
|
||||
size += event->read_size;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
size += sizeof(data->data_src.val);
|
||||
|
||||
event->header_size = size;
|
||||
}
|
||||
|
||||
@@ -4193,6 +4199,12 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
perf_output_sample_ustack(handle,
|
||||
data->stack_user_size,
|
||||
data->regs_user.regs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
perf_output_put(handle, data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
perf_output_put(handle, data->data_src.val);
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
@@ -4779,6 +4791,9 @@ got_name:
|
||||
mmap_event->file_name = name;
|
||||
mmap_event->file_size = size;
|
||||
|
||||
if (!(vma->vm_flags & VM_EXEC))
|
||||
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
|
||||
|
||||
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
perf-mem(1)
|
||||
===========
|
||||
|
||||
NAME
|
||||
----
|
||||
perf-mem - Profile memory accesses
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf mem' [<options>] (record [<command>] | report)
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
"perf mem -t <TYPE> record" runs a command and gathers memory operation data
|
||||
from it, into perf.data. Perf record options are accepted and are passed through.
|
||||
|
||||
"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
|
||||
right set of options to display a memory access profile.
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
<command>...::
|
||||
Any command you can specify in a shell.
|
||||
|
||||
-t::
|
||||
--type=::
|
||||
Select the memory operation type: load or store (default: load)
|
||||
|
||||
-D::
|
||||
--dump-raw-samples=::
|
||||
Dump the raw decoded samples on the screen in a format that is easy to parse with
|
||||
one sample per line.
|
||||
|
||||
-x::
|
||||
--field-separator::
|
||||
Specify the field separator used when dump raw samples (-D option). By default,
|
||||
The separator is the space character.
|
||||
|
||||
-C::
|
||||
--cpu-list::
|
||||
Restrict dump of raw samples to those provided via this option. Note that the same
|
||||
option can be passed in record mode. It will be interpreted the same way as perf
|
||||
record.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-report[1]
|
||||
@@ -182,6 +182,12 @@ is enabled for all the sampling events. The sampled branch type is the same for
|
||||
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
|
||||
Note that this feature may not be available on all processors.
|
||||
|
||||
-W::
|
||||
--weight::
|
||||
Enable weightened sampling. An additional weight is recorded per sample and can be
|
||||
displayed with the weight and local_weight sort keys. This currently works for TSX
|
||||
abort events and some memory events in precise mode on modern Intel CPUs.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-list[1]
|
||||
|
||||
@@ -59,7 +59,7 @@ OPTIONS
|
||||
--sort=::
|
||||
Sort histogram entries by given key(s) - multiple keys can be specified
|
||||
in CSV format. Following sort keys are available:
|
||||
pid, comm, dso, symbol, parent, cpu, srcline.
|
||||
pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
|
||||
|
||||
Each key has following meaning:
|
||||
|
||||
@@ -206,6 +206,10 @@ OPTIONS
|
||||
--group::
|
||||
Show event group information together.
|
||||
|
||||
--demangle::
|
||||
Demangle symbol names to human readable form. It's enabled by default,
|
||||
disable with --no-demangle.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-annotate[1]
|
||||
|
||||
@@ -119,13 +119,19 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
|
||||
Print count deltas every N milliseconds (minimum: 100ms)
|
||||
example: perf stat -I 1000 -e cycles -a sleep 5
|
||||
|
||||
--aggr-socket::
|
||||
--per-socket::
|
||||
Aggregate counts per processor socket for system-wide mode measurements. This
|
||||
is a useful mode to detect imbalance between sockets. To enable this mode,
|
||||
use --aggr-socket in addition to -a. (system-wide). The output includes the
|
||||
use --per-socket in addition to -a. (system-wide). The output includes the
|
||||
socket number and the number of online processors on that socket. This is
|
||||
useful to gauge the amount of aggregation.
|
||||
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements. This
|
||||
is a useful mode to detect imbalance between physical cores. To enable this mode,
|
||||
use --per-core in addition to -a. (system-wide). The output includes the
|
||||
core number and the number of online logical processors on that physical processor.
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ Default is to monitor all CPUS.
|
||||
|
||||
-s::
|
||||
--sort::
|
||||
Sort by key(s): pid, comm, dso, symbol, parent, srcline.
|
||||
Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
|
||||
|
||||
-n::
|
||||
--show-nr-samples::
|
||||
|
||||
+15
-8
@@ -35,7 +35,9 @@ include config/utilities.mak
|
||||
#
|
||||
# Define WERROR=0 to disable treating any warnings as errors.
|
||||
#
|
||||
# Define NO_NEWT if you do not want TUI support.
|
||||
# Define NO_NEWT if you do not want TUI support. (deprecated)
|
||||
#
|
||||
# Define NO_SLANG if you do not want TUI support.
|
||||
#
|
||||
# Define NO_GTK2 if you do not want GTK+ GUI support.
|
||||
#
|
||||
@@ -104,6 +106,10 @@ ifdef PARSER_DEBUG
|
||||
PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG
|
||||
endif
|
||||
|
||||
ifdef NO_NEWT
|
||||
NO_SLANG=1
|
||||
endif
|
||||
|
||||
CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS)
|
||||
EXTLIBS = -lpthread -lrt -lelf -lm
|
||||
ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
|
||||
@@ -272,7 +278,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
|
||||
python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
|
||||
|
||||
PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
|
||||
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
|
||||
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT)
|
||||
|
||||
$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
|
||||
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
|
||||
@@ -547,6 +553,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
|
||||
BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
|
||||
BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
|
||||
BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
|
||||
BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
|
||||
|
||||
PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
|
||||
|
||||
@@ -679,15 +686,15 @@ ifndef NO_LIBAUDIT
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef NO_NEWT
|
||||
FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt
|
||||
ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT),libnewt),y)
|
||||
msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
|
||||
ifndef NO_SLANG
|
||||
FLAGS_SLANG=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -I/usr/include/slang -lslang
|
||||
ifneq ($(call try-cc,$(SOURCE_SLANG),$(FLAGS_SLANG),libslang),y)
|
||||
msg := $(warning slang not found, disables TUI support. Please install slang-devel or libslang-dev);
|
||||
else
|
||||
# Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
|
||||
BASIC_CFLAGS += -I/usr/include/slang
|
||||
BASIC_CFLAGS += -DNEWT_SUPPORT
|
||||
EXTLIBS += -lnewt -lslang
|
||||
BASIC_CFLAGS += -DSLANG_SUPPORT
|
||||
EXTLIBS += -lslang
|
||||
LIB_OBJS += $(OUTPUT)ui/browser.o
|
||||
LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
|
||||
LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
|
||||
|
||||
@@ -63,7 +63,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
|
||||
return 0;
|
||||
}
|
||||
|
||||
he = __hists__add_entry(&evsel->hists, al, NULL, 1);
|
||||
he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
|
||||
if (he == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
|
||||
@@ -231,9 +231,10 @@ int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
|
||||
}
|
||||
|
||||
static int hists__add_entry(struct hists *self,
|
||||
struct addr_location *al, u64 period)
|
||||
struct addr_location *al, u64 period,
|
||||
u64 weight)
|
||||
{
|
||||
if (__hists__add_entry(self, al, NULL, period) != NULL)
|
||||
if (__hists__add_entry(self, al, NULL, period, weight) != NULL)
|
||||
return 0;
|
||||
return -ENOMEM;
|
||||
}
|
||||
@@ -255,7 +256,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
|
||||
if (al.filtered)
|
||||
return 0;
|
||||
|
||||
if (hists__add_entry(&evsel->hists, &al, sample->period)) {
|
||||
if (hists__add_entry(&evsel->hists, &al, sample->period, sample->weight)) {
|
||||
pr_warning("problem incrementing symbol period, skipping event\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,242 @@
|
||||
#include "builtin.h"
|
||||
#include "perf.h"
|
||||
|
||||
#include "util/parse-options.h"
|
||||
#include "util/trace-event.h"
|
||||
#include "util/tool.h"
|
||||
#include "util/session.h"
|
||||
|
||||
#define MEM_OPERATION_LOAD "load"
|
||||
#define MEM_OPERATION_STORE "store"
|
||||
|
||||
static const char *mem_operation = MEM_OPERATION_LOAD;
|
||||
|
||||
struct perf_mem {
|
||||
struct perf_tool tool;
|
||||
char const *input_name;
|
||||
symbol_filter_t annotate_init;
|
||||
bool hide_unresolved;
|
||||
bool dump_raw;
|
||||
const char *cpu_list;
|
||||
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
|
||||
};
|
||||
|
||||
static const char * const mem_usage[] = {
|
||||
"perf mem [<options>] {record <command> |report}",
|
||||
NULL
|
||||
};
|
||||
|
||||
static int __cmd_record(int argc, const char **argv)
|
||||
{
|
||||
int rec_argc, i = 0, j;
|
||||
const char **rec_argv;
|
||||
char event[64];
|
||||
int ret;
|
||||
|
||||
rec_argc = argc + 4;
|
||||
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
||||
if (!rec_argv)
|
||||
return -1;
|
||||
|
||||
rec_argv[i++] = strdup("record");
|
||||
if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
|
||||
rec_argv[i++] = strdup("-W");
|
||||
rec_argv[i++] = strdup("-d");
|
||||
rec_argv[i++] = strdup("-e");
|
||||
|
||||
if (strcmp(mem_operation, MEM_OPERATION_LOAD))
|
||||
sprintf(event, "cpu/mem-stores/pp");
|
||||
else
|
||||
sprintf(event, "cpu/mem-loads/pp");
|
||||
|
||||
rec_argv[i++] = strdup(event);
|
||||
for (j = 1; j < argc; j++, i++)
|
||||
rec_argv[i] = argv[j];
|
||||
|
||||
ret = cmd_record(i, rec_argv, NULL);
|
||||
free(rec_argv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
dump_raw_samples(struct perf_tool *tool,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample,
|
||||
struct perf_evsel *evsel __maybe_unused,
|
||||
struct machine *machine)
|
||||
{
|
||||
struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
|
||||
struct addr_location al;
|
||||
const char *fmt;
|
||||
|
||||
if (perf_event__preprocess_sample(event, machine, &al, sample,
|
||||
mem->annotate_init) < 0) {
|
||||
fprintf(stderr, "problem processing %d event, skipping it.\n",
|
||||
event->header.type);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
|
||||
return 0;
|
||||
|
||||
if (al.map != NULL)
|
||||
al.map->dso->hit = 1;
|
||||
|
||||
if (symbol_conf.field_sep) {
|
||||
fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
|
||||
"%s0x%"PRIx64"%s%s:%s\n";
|
||||
} else {
|
||||
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
|
||||
"%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
|
||||
symbol_conf.field_sep = " ";
|
||||
}
|
||||
|
||||
printf(fmt,
|
||||
sample->pid,
|
||||
symbol_conf.field_sep,
|
||||
sample->tid,
|
||||
symbol_conf.field_sep,
|
||||
event->ip.ip,
|
||||
symbol_conf.field_sep,
|
||||
sample->addr,
|
||||
symbol_conf.field_sep,
|
||||
sample->weight,
|
||||
symbol_conf.field_sep,
|
||||
sample->data_src,
|
||||
symbol_conf.field_sep,
|
||||
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
|
||||
al.sym ? al.sym->name : "???");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_sample_event(struct perf_tool *tool,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample,
|
||||
struct perf_evsel *evsel,
|
||||
struct machine *machine)
|
||||
{
|
||||
return dump_raw_samples(tool, event, sample, evsel, machine);
|
||||
}
|
||||
|
||||
static int report_raw_events(struct perf_mem *mem)
|
||||
{
|
||||
int err = -EINVAL;
|
||||
int ret;
|
||||
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
|
||||
0, false, &mem->tool);
|
||||
|
||||
if (session == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
if (mem->cpu_list) {
|
||||
ret = perf_session__cpu_bitmap(session, mem->cpu_list,
|
||||
mem->cpu_bitmap);
|
||||
if (ret)
|
||||
goto out_delete;
|
||||
}
|
||||
|
||||
if (symbol__init() < 0)
|
||||
return -1;
|
||||
|
||||
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
|
||||
|
||||
err = perf_session__process_events(session, &mem->tool);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
|
||||
out_delete:
|
||||
perf_session__delete(session);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int report_events(int argc, const char **argv, struct perf_mem *mem)
|
||||
{
|
||||
const char **rep_argv;
|
||||
int ret, i = 0, j, rep_argc;
|
||||
|
||||
if (mem->dump_raw)
|
||||
return report_raw_events(mem);
|
||||
|
||||
rep_argc = argc + 3;
|
||||
rep_argv = calloc(rep_argc + 1, sizeof(char *));
|
||||
if (!rep_argv)
|
||||
return -1;
|
||||
|
||||
rep_argv[i++] = strdup("report");
|
||||
rep_argv[i++] = strdup("--mem-mode");
|
||||
rep_argv[i++] = strdup("-n"); /* display number of samples */
|
||||
|
||||
/*
|
||||
* there is no weight (cost) associated with stores, so don't print
|
||||
* the column
|
||||
*/
|
||||
if (strcmp(mem_operation, MEM_OPERATION_LOAD))
|
||||
rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
|
||||
"dso_daddr,tlb,locked");
|
||||
|
||||
for (j = 1; j < argc; j++, i++)
|
||||
rep_argv[i] = argv[j];
|
||||
|
||||
ret = cmd_report(i, rep_argv, NULL);
|
||||
free(rep_argv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
{
|
||||
struct stat st;
|
||||
struct perf_mem mem = {
|
||||
.tool = {
|
||||
.sample = process_sample_event,
|
||||
.mmap = perf_event__process_mmap,
|
||||
.comm = perf_event__process_comm,
|
||||
.lost = perf_event__process_lost,
|
||||
.fork = perf_event__process_fork,
|
||||
.build_id = perf_event__process_build_id,
|
||||
.ordered_samples = true,
|
||||
},
|
||||
.input_name = "perf.data",
|
||||
};
|
||||
const struct option mem_options[] = {
|
||||
OPT_STRING('t', "type", &mem_operation,
|
||||
"type", "memory operations(load/store)"),
|
||||
OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
|
||||
"dump raw samples in ASCII"),
|
||||
OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
|
||||
"Only display entries resolved to a symbol"),
|
||||
OPT_STRING('i', "input", &input_name, "file",
|
||||
"input file name"),
|
||||
OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
|
||||
"list of cpus to profile"),
|
||||
OPT_STRING('x', "field-separator", &symbol_conf.field_sep,
|
||||
"separator",
|
||||
"separator for columns, no spaces will be added"
|
||||
" between columns '.' is reserved."),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
argc = parse_options(argc, argv, mem_options, mem_usage,
|
||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||
|
||||
if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
|
||||
usage_with_options(mem_usage, mem_options);
|
||||
|
||||
if (!mem.input_name || !strlen(mem.input_name)) {
|
||||
if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
|
||||
mem.input_name = "-";
|
||||
else
|
||||
mem.input_name = "perf.data";
|
||||
}
|
||||
|
||||
if (!strncmp(argv[0], "rec", 3))
|
||||
return __cmd_record(argc, argv);
|
||||
else if (!strncmp(argv[0], "rep", 3))
|
||||
return report_events(argc, argv, &mem);
|
||||
else
|
||||
usage_with_options(mem_usage, mem_options);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -5,8 +5,6 @@
|
||||
* (or a CPU, or a PID) into the perf.data output file - for
|
||||
* later analysis via perf report.
|
||||
*/
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
#include "builtin.h"
|
||||
|
||||
#include "perf.h"
|
||||
@@ -955,6 +953,8 @@ const struct option record_options[] = {
|
||||
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
|
||||
"branch filter mask", "branch stack filter modes",
|
||||
parse_branch_stack),
|
||||
OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
|
||||
"sample by weight (on special events only)"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
|
||||
+139
-8
@@ -46,6 +46,7 @@ struct perf_report {
|
||||
bool show_full_info;
|
||||
bool show_threads;
|
||||
bool inverted_callchain;
|
||||
bool mem_mode;
|
||||
struct perf_read_values show_threads_values;
|
||||
const char *pretty_printing_style;
|
||||
symbol_filter_t annotate_init;
|
||||
@@ -64,6 +65,99 @@ static int perf_report_config(const char *var, const char *value, void *cb)
|
||||
return perf_default_config(var, value, cb);
|
||||
}
|
||||
|
||||
static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
|
||||
struct addr_location *al,
|
||||
struct perf_sample *sample,
|
||||
struct perf_evsel *evsel,
|
||||
struct machine *machine,
|
||||
union perf_event *event)
|
||||
{
|
||||
struct perf_report *rep = container_of(tool, struct perf_report, tool);
|
||||
struct symbol *parent = NULL;
|
||||
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
|
||||
int err = 0;
|
||||
struct hist_entry *he;
|
||||
struct mem_info *mi, *mx;
|
||||
uint64_t cost;
|
||||
|
||||
if ((sort__has_parent || symbol_conf.use_callchain) &&
|
||||
sample->callchain) {
|
||||
err = machine__resolve_callchain(machine, evsel, al->thread,
|
||||
sample, &parent);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
mi = machine__resolve_mem(machine, al->thread, sample, cpumode);
|
||||
if (!mi)
|
||||
return -ENOMEM;
|
||||
|
||||
if (rep->hide_unresolved && !al->sym)
|
||||
return 0;
|
||||
|
||||
cost = sample->weight;
|
||||
if (!cost)
|
||||
cost = 1;
|
||||
|
||||
/*
|
||||
* must pass period=weight in order to get the correct
|
||||
* sorting from hists__collapse_resort() which is solely
|
||||
* based on periods. We want sorting be done on nr_events * weight
|
||||
* and this is indirectly achieved by passing period=weight here
|
||||
* and the he_stat__add_period() function.
|
||||
*/
|
||||
he = __hists__add_mem_entry(&evsel->hists, al, parent, mi, cost, cost);
|
||||
if (!he)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* In the TUI browser, we are doing integrated annotation,
|
||||
* so we don't allocate the extra space needed because the stdio
|
||||
* code will not use it.
|
||||
*/
|
||||
if (sort__has_sym && he->ms.sym && use_browser > 0) {
|
||||
struct annotation *notes = symbol__annotation(he->ms.sym);
|
||||
|
||||
assert(evsel != NULL);
|
||||
|
||||
if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
|
||||
goto out;
|
||||
|
||||
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (sort__has_sym && he->mem_info->daddr.sym && use_browser > 0) {
|
||||
struct annotation *notes;
|
||||
|
||||
mx = he->mem_info;
|
||||
|
||||
notes = symbol__annotation(mx->daddr.sym);
|
||||
if (notes->src == NULL && symbol__alloc_hist(mx->daddr.sym) < 0)
|
||||
goto out;
|
||||
|
||||
err = symbol__inc_addr_samples(mx->daddr.sym,
|
||||
mx->daddr.map,
|
||||
evsel->idx,
|
||||
mx->daddr.al_addr);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
evsel->hists.stats.total_period += cost;
|
||||
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
|
||||
err = 0;
|
||||
|
||||
if (symbol_conf.use_callchain) {
|
||||
err = callchain_append(he->callchain,
|
||||
&callchain_cursor,
|
||||
sample->period);
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
|
||||
struct addr_location *al,
|
||||
struct perf_sample *sample,
|
||||
@@ -98,7 +192,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
|
||||
* and not events sampled. Thus we use a pseudo period of 1.
|
||||
*/
|
||||
he = __hists__add_branch_entry(&evsel->hists, al, parent,
|
||||
&bi[i], 1);
|
||||
&bi[i], 1, 1);
|
||||
if (he) {
|
||||
struct annotation *notes;
|
||||
err = -ENOMEM;
|
||||
@@ -156,7 +250,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
|
||||
return err;
|
||||
}
|
||||
|
||||
he = __hists__add_entry(&evsel->hists, al, parent, sample->period);
|
||||
he = __hists__add_entry(&evsel->hists, al, parent, sample->period,
|
||||
sample->weight);
|
||||
if (he == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -168,7 +263,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
|
||||
return err;
|
||||
}
|
||||
/*
|
||||
* Only in the newt browser we are doing integrated annotation,
|
||||
* Only in the TUI browser we are doing integrated annotation,
|
||||
* so we don't allocated the extra space needed because the stdio
|
||||
* code will not use it.
|
||||
*/
|
||||
@@ -219,6 +314,12 @@ static int process_sample_event(struct perf_tool *tool,
|
||||
pr_debug("problem adding lbr entry, skipping event\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (rep->mem_mode == 1) {
|
||||
if (perf_report__add_mem_hist_entry(tool, &al, sample,
|
||||
evsel, machine, event)) {
|
||||
pr_debug("problem adding mem entry, skipping event\n");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (al.map != NULL)
|
||||
al.map->dso->hit = 1;
|
||||
@@ -302,7 +403,8 @@ static void sig_handler(int sig __maybe_unused)
|
||||
session_done = 1;
|
||||
}
|
||||
|
||||
static size_t hists__fprintf_nr_sample_events(struct hists *self,
|
||||
static size_t hists__fprintf_nr_sample_events(struct perf_report *rep,
|
||||
struct hists *self,
|
||||
const char *evname, FILE *fp)
|
||||
{
|
||||
size_t ret;
|
||||
@@ -330,7 +432,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
|
||||
if (evname != NULL)
|
||||
ret += fprintf(fp, " of event '%s'", evname);
|
||||
|
||||
ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
|
||||
if (rep->mem_mode) {
|
||||
ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
|
||||
ret += fprintf(fp, "\n# Sort order : %s", sort_order);
|
||||
} else
|
||||
ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
|
||||
return ret + fprintf(fp, "\n#\n");
|
||||
}
|
||||
|
||||
@@ -348,7 +454,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
|
||||
!perf_evsel__is_group_leader(pos))
|
||||
continue;
|
||||
|
||||
hists__fprintf_nr_sample_events(hists, evname, stdout);
|
||||
hists__fprintf_nr_sample_events(rep, hists, evname, stdout);
|
||||
hists__fprintf(hists, true, 0, 0, stdout);
|
||||
fprintf(stdout, "\n\n");
|
||||
}
|
||||
@@ -644,7 +750,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"Use the stdio interface"),
|
||||
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
|
||||
"sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
|
||||
" dso_to, dso_from, symbol_to, symbol_from, mispredict"),
|
||||
" dso_to, dso_from, symbol_to, symbol_from, mispredict,"
|
||||
" weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
|
||||
"snoop, locked"),
|
||||
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
|
||||
"Show sample percentage for different cpu modes"),
|
||||
OPT_STRING('p', "parent", &parent_pattern, "regex",
|
||||
@@ -692,6 +800,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
"use branch records for histogram filling", parse_branch_mode),
|
||||
OPT_STRING(0, "objdump", &objdump_path, "path",
|
||||
"objdump binary to use for disassembly and annotations"),
|
||||
OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
|
||||
"Disable symbol demangling"),
|
||||
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
@@ -749,12 +860,24 @@ repeat:
|
||||
"dso_to,symbol_to";
|
||||
|
||||
}
|
||||
if (report.mem_mode) {
|
||||
if (sort__branch_mode == 1) {
|
||||
fprintf(stderr, "branch and mem mode incompatible\n");
|
||||
goto error;
|
||||
}
|
||||
/*
|
||||
* if no sort_order is provided, then specify
|
||||
* branch-mode specific order
|
||||
*/
|
||||
if (sort_order == default_sort_order)
|
||||
sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
|
||||
}
|
||||
|
||||
if (setup_sorting() < 0)
|
||||
usage_with_options(report_usage, options);
|
||||
|
||||
/*
|
||||
* Only in the newt browser we are doing integrated annotation,
|
||||
* Only in the TUI browser we are doing integrated annotation,
|
||||
* so don't allocate extra space that won't be used in the stdio
|
||||
* implementation.
|
||||
*/
|
||||
@@ -814,6 +937,14 @@ repeat:
|
||||
sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
|
||||
sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
|
||||
} else {
|
||||
if (report.mem_mode) {
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout);
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout);
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout);
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout);
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout);
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout);
|
||||
}
|
||||
sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
|
||||
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user