Merge branch 'perf/core-v3' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into perf/hw_breakpoints

Pull AMD range breakpoints support from Frederic Weisbecker:

" - Extend breakpoint tools and core to support address range through perf
    event with initial backend support for AMD extended breakpoints.

    Syntax is:

           perf record -e mem:addr/len:type

    For example set write breakpoint from 0x1000 to 0x1200 (0x1000 + 512)

           perf record -e mem:0x1000/512:w

 - Clean up a bit breakpoint code validation

 It has been acked by Jiri and Oleg. "

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2014-12-08 11:50:24 +01:00
104 changed files with 4416 additions and 884 deletions
+2
View File
@@ -174,6 +174,7 @@
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
/*
@@ -383,6 +384,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT)
#if __GNUC__ >= 4
extern void warn_pre_alternatives(void);
+5
View File
@@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */
#ifdef CONFIG_CPU_SUP_AMD
extern void set_dr_addr_mask(unsigned long mask, int dr);
#else
static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
#endif
#endif /* _ASM_X86_DEBUGREG_H */
+1
View File
@@ -12,6 +12,7 @@
*/
struct arch_hw_breakpoint {
unsigned long address;
unsigned long mask;
u8 len;
u8 type;
};
+3
View File
@@ -177,6 +177,9 @@ struct x86_pmu_capability {
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
#define IBS_CAPS_OPBRNFUSE (1U<<8)
#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
#define IBS_CAPS_OPDATA4 (1U<<10)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
+5
View File
@@ -206,11 +206,16 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
+19
View File
@@ -870,3 +870,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
return false;
}
void set_dr_addr_mask(unsigned long mask, int dr)
{
if (!cpu_has_bpext)
return;
switch (dr) {
case 0:
wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
break;
case 1:
case 2:
case 3:
wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
break;
default:
break;
}
}
+4
View File
@@ -253,6 +253,10 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
/* Like UEVENT_CONSTRAINT, but match flags too */
#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
#define INTEL_PLD_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+15
View File
@@ -565,6 +565,21 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
/*
* Read IbsBrTarget and IbsOpData4 separately
* depending on their availability.
* Can't add to offset_max as they are staggered
*/
if (ibs_caps & IBS_CAPS_BRNTRGT) {
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
size++;
}
if (ibs_caps & IBS_CAPS_OPDATA4) {
rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
size++;
}
}
ibs_data.size = sizeof(u64) * size;
regs = *iregs;
+52 -29
View File
@@ -552,18 +552,18 @@ int intel_pmu_drain_bts_buffer(void)
* PEBS
*/
struct event_constraint intel_core2_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};
struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};
@@ -577,36 +577,36 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
EVENT_CONSTRAINT_END
};
struct event_constraint intel_westmere_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
EVENT_CONSTRAINT_END
};
struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
@@ -617,7 +617,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
};
struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
@@ -628,7 +628,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
};
struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
@@ -886,6 +886,29 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;
if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs.ax = pebs->ax;
regs.bx = pebs->bx;
regs.cx = pebs->cx;
regs.dx = pebs->dx;
regs.si = pebs->si;
regs.di = pebs->di;
regs.bp = pebs->bp;
regs.sp = pebs->sp;
regs.flags = pebs->flags;
#ifndef CONFIG_X86_32
regs.r8 = pebs->r8;
regs.r9 = pebs->r9;
regs.r10 = pebs->r10;
regs.r11 = pebs->r11;
regs.r12 = pebs->r12;
regs.r13 = pebs->r13;
regs.r14 = pebs->r14;
regs.r15 = pebs->r15;
#endif
}
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
@@ -449,7 +449,11 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
static struct uncore_event_desc snbep_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
{ /* end: all zeroes */ },
};
@@ -2036,7 +2040,11 @@ static struct intel_uncore_type hswep_uncore_ha = {
static struct uncore_event_desc hswep_uncore_imc_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"),
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
{ /* end: all zeroes */ },
};
+17 -28
View File
@@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
*dr7 |= encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
if (info->mask)
set_dr_addr_mask(info->mask, i);
return 0;
}
@@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
*dr7 &= ~__encode_dr7(i, info->len, info->type);
set_debugreg(*dr7, 7);
}
static int get_hbp_len(u8 hbp_len)
{
unsigned int len_in_bytes = 0;
switch (hbp_len) {
case X86_BREAKPOINT_LEN_1:
len_in_bytes = 1;
break;
case X86_BREAKPOINT_LEN_2:
len_in_bytes = 2;
break;
case X86_BREAKPOINT_LEN_4:
len_in_bytes = 4;
break;
#ifdef CONFIG_X86_64
case X86_BREAKPOINT_LEN_8:
len_in_bytes = 8;
break;
#endif
}
return len_in_bytes;
if (info->mask)
set_dr_addr_mask(0, i);
}
/*
@@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
va = info->address;
len = get_hbp_len(info->len);
len = bp->attr.bp_len;
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
}
@@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp)
}
/* Len */
info->mask = 0;
switch (bp->attr.bp_len) {
case HW_BREAKPOINT_LEN_1:
info->len = X86_BREAKPOINT_LEN_1;
@@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp)
break;
#endif
default:
return -EINVAL;
if (!is_power_of_2(bp->attr.bp_len))
return -EINVAL;
if (!cpu_has_bpext)
return -EOPNOTSUPP;
info->mask = bp->attr.bp_len - 1;
info->len = X86_BREAKPOINT_LEN_1;
}
return 0;
}
/*
* Validate the arch-specific HW Breakpoint register settings
*/
@@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
if (ret)
return ret;
ret = -EINVAL;
switch (info->len) {
case X86_BREAKPOINT_LEN_1:
align = 0;
if (info->mask)
align = info->mask;
break;
case X86_BREAKPOINT_LEN_2:
align = 1;
@@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
break;
#endif
default:
return ret;
WARN_ON_ONCE(1);
}
/*
+20 -17
View File
@@ -79,7 +79,7 @@ struct perf_branch_stack {
struct perf_branch_entry entries[0];
};
struct perf_regs_user {
struct perf_regs {
__u64 abi;
struct pt_regs *regs;
};
@@ -580,34 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
struct perf_sample_data {
u64 type;
/*
* Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched.
*/
u64 addr;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
u64 period;
u64 weight;
u64 txn;
union perf_mem_data_src data_src;
/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
*/
u64 type;
u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
u64 addr;
u64 id;
u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
u64 period;
union perf_mem_data_src data_src;
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
struct perf_regs_user regs_user;
struct perf_regs regs_user;
struct perf_regs regs_intr;
u64 stack_user_size;
u64 weight;
/*
* Transaction flags for abort events:
*/
u64 txn;
};
} ____cacheline_aligned;
/* default value for data source */
#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
@@ -624,9 +630,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->raw = NULL;
data->br_stack = NULL;
data->period = period;
data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
data->regs_user.regs = NULL;
data->stack_user_size = 0;
data->weight = 0;
data->data_src.val = PERF_MEM_NA;
data->txn = 0;
+14 -1
View File
@@ -137,8 +137,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_DATA_SRC = 1U << 15,
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
};
/*
@@ -238,6 +239,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
/*
* Hardware event_id to monitor via a performance monitoring event:
@@ -334,6 +336,15 @@ struct perf_event_attr {
/* Align to u64. */
__u32 __reserved_2;
/*
* Defines set of regs to dump for each sample
* state captured on:
* - precise = 0: PMU interrupt
* - precise > 0: sampled instruction
*
* See asm/perf_regs.h for details.
*/
__u64 sample_regs_intr;
};
#define perf_flags(attr) (*(&(attr)->read_format + 1))
@@ -686,6 +697,8 @@ enum perf_event_type {
* { u64 weight; } && PERF_SAMPLE_WEIGHT
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* };
*/
PERF_RECORD_SAMPLE = 9,
+51 -9
View File
@@ -4460,7 +4460,7 @@ perf_output_sample_regs(struct perf_output_handle *handle,
}
}
static void perf_sample_regs_user(struct perf_regs_user *regs_user,
static void perf_sample_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs)
{
if (!user_mode(regs)) {
@@ -4471,11 +4471,22 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
}
if (regs) {
regs_user->regs = regs;
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
}
}
static void perf_sample_regs_intr(struct perf_regs *regs_intr,
struct pt_regs *regs)
{
regs_intr->regs = regs;
regs_intr->abi = perf_reg_abi(current);
}
/*
* Get remaining task size from user stack pointer.
*
@@ -4857,6 +4868,23 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_TRANSACTION)
perf_output_put(handle, data->txn);
if (sample_type & PERF_SAMPLE_REGS_INTR) {
u64 abi = data->regs_intr.abi;
/*
* If there are no regs to dump, notice it through
* first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
*/
perf_output_put(handle, abi);
if (abi) {
u64 mask = event->attr.sample_regs_intr;
perf_output_sample_regs(handle,
data->regs_intr.regs,
mask);
}
}
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
@@ -4922,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size;
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
perf_sample_regs_user(&data->regs_user, regs);
if (sample_type & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
int size = sizeof(u64);
perf_sample_regs_user(&data->regs_user, regs);
if (data->regs_user.regs) {
u64 mask = event->attr.sample_regs_user;
size += hweight64(mask) * sizeof(u64);
@@ -4943,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header,
* in case new sample type is added, because we could eat
* up the rest of the sample size.
*/
struct perf_regs_user *uregs = &data->regs_user;
u16 stack_size = event->attr.sample_stack_user;
u16 size = sizeof(u64);
if (!uregs->abi)
perf_sample_regs_user(uregs, regs);
stack_size = perf_sample_ustack_size(stack_size, header->size,
uregs->regs);
data->regs_user.regs);
/*
* If there is something to dump, add space for the dump
@@ -4964,6 +4989,21 @@ void perf_prepare_sample(struct perf_event_header *header,
data->stack_user_size = stack_size;
header->size += size;
}
if (sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);
perf_sample_regs_intr(&data->regs_intr, regs);
if (data->regs_intr.regs) {
u64 mask = event->attr.sample_regs_intr;
size += hweight64(mask) * sizeof(u64);
}
header->size += size;
}
}
static void perf_event_output(struct perf_event *event,
@@ -7151,6 +7191,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
ret = -EINVAL;
}
if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
ret = perf_reg_validate(attr->sample_regs_intr);
out:
return ret;
+2
View File
@@ -2,6 +2,8 @@ PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
perf
perf-read-vdso32
perf-read-vdsox32
perf-help
perf-record
perf-report
+11 -2
View File
@@ -33,12 +33,15 @@ OPTIONS
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
- a hardware breakpoint event in the form of '\mem:addr[:access]'
- a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
where addr is the address in memory you want to break in.
Access is the memory access type (read, write, execute) it can
be passed as follows: '\mem:addr[:[r][w][x]]'.
be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range,
number of bytes from specified addr, which the breakpoint will cover.
If you want to profile read-write accesses in 0x1000, just set
'mem:0x1000:rw'.
If you want to profile write accesses in [0x1000~1008), just set
'mem:0x1000/8:w'.
--filter=<filter>::
Event filter.
@@ -214,6 +217,12 @@ if combined with -a or -C options.
After starting the program, wait msecs before measuring. This is useful to
filter out the startup phase of the program, which is often very different.
-I::
--intr-regs::
Capture machine state (registers) at interrupt, i.e., on counter overflows for
each sample. List of captured registers depends on the architecture. This option
is off by default.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
+45 -6
View File
@@ -60,6 +60,15 @@ include config/utilities.mak
#
# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
# for dwarf backtrace post unwind.
#
# Define NO_PERF_READ_VDSO32 if you do not want to build perf-read-vdso32
# for reading the 32-bit compatibility VDSO in 64-bit mode
#
# Define NO_PERF_READ_VDSOX32 if you do not want to build perf-read-vdsox32
# for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode
#
# Define NO_ZLIB if you do not want to support compressed kernel modules
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -171,11 +180,16 @@ $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
#
# Single 'perf' binary right now:
#
PROGRAMS += $(OUTPUT)perf
ifndef NO_PERF_READ_VDSO32
PROGRAMS += $(OUTPUT)perf-read-vdso32
endif
ifndef NO_PERF_READ_VDSOX32
PROGRAMS += $(OUTPUT)perf-read-vdsox32
endif
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
@@ -247,12 +261,14 @@ LIB_H += util/annotate.h
LIB_H += util/cache.h
LIB_H += util/callchain.h
LIB_H += util/build-id.h
LIB_H += util/db-export.h
LIB_H += util/debug.h
LIB_H += util/pmu.h
LIB_H += util/event.h
LIB_H += util/evsel.h
LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
LIB_H += util/find-vdso-map.c
LIB_H += util/levenshtein.h
LIB_H += util/machine.h
LIB_H += util/map.h
@@ -304,6 +320,7 @@ LIB_H += ui/util.h
LIB_H += ui/ui.h
LIB_H += util/data.h
LIB_H += util/kvm-stat.h
LIB_H += util/thread-stack.h
LIB_OBJS += $(OUTPUT)util/abspath.o
LIB_OBJS += $(OUTPUT)util/alias.o
@@ -311,6 +328,7 @@ LIB_OBJS += $(OUTPUT)util/annotate.o
LIB_OBJS += $(OUTPUT)util/build-id.o
LIB_OBJS += $(OUTPUT)util/config.o
LIB_OBJS += $(OUTPUT)util/ctype.o
LIB_OBJS += $(OUTPUT)util/db-export.o
LIB_OBJS += $(OUTPUT)util/pmu.o
LIB_OBJS += $(OUTPUT)util/environment.o
LIB_OBJS += $(OUTPUT)util/event.o
@@ -380,6 +398,7 @@ LIB_OBJS += $(OUTPUT)util/srcline.o
LIB_OBJS += $(OUTPUT)util/data.o
LIB_OBJS += $(OUTPUT)util/tsc.o
LIB_OBJS += $(OUTPUT)util/cloexec.o
LIB_OBJS += $(OUTPUT)util/thread-stack.o
LIB_OBJS += $(OUTPUT)ui/setup.o
LIB_OBJS += $(OUTPUT)ui/helpline.o
@@ -478,8 +497,6 @@ ifneq ($(OUTPUT),)
endif
ifdef NO_LIBELF
EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
# Remove ELF/DWARF dependent codes
LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
@@ -568,6 +585,10 @@ ifndef NO_LIBNUMA
BUILTIN_OBJS += $(OUTPUT)bench/numa.o
endif
ifndef NO_ZLIB
LIB_OBJS += $(OUTPUT)util/zlib.o
endif
ifdef ASCIIDOC8
export ASCIIDOC8
endif
@@ -732,6 +753,16 @@ $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Uti
$(OUTPUT)perf-%: %.o $(PERFLIBS)
$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
ifndef NO_PERF_READ_VDSO32
$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-vdso-map.c
$(QUIET_CC)$(CC) -m32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
endif
ifndef NO_PERF_READ_VDSOX32
$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
endif
$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
@@ -876,6 +907,14 @@ install-bin: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
$(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
ifndef NO_PERF_READ_VDSO32
$(call QUIET_INSTALL, perf-read-vdso32) \
$(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)';
endif
ifndef NO_PERF_READ_VDSOX32
$(call QUIET_INSTALL, perf-read-vdsox32) \
$(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
endif
$(call QUIET_INSTALL, libexec) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-archive) \
@@ -928,7 +967,7 @@ config-clean:
clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)
@@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
* yet used)
* -1 in case of errors
*/
static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
static int check_return_addr(struct dso *dso, Dwarf_Addr pc)
{
int rc = -1;
Dwfl *dwfl;
@@ -156,15 +156,27 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
Dwarf_Addr end = pc;
bool signalp;
dwfl = dwfl_begin(&offline_callbacks);
if (!dwfl) {
pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
return -1;
}
dwfl = dso->dwfl;
if (dwfl_report_offline(dwfl, "", exec_file, -1) == NULL) {
pr_debug("dwfl_report_offline() failed %s\n", dwarf_errmsg(-1));
goto out;
if (!dwfl) {
dwfl = dwfl_begin(&offline_callbacks);
if (!dwfl) {
pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
return -1;
}
if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) {
pr_debug("dwfl_report_offline() failed %s\n",
dwarf_errmsg(-1));
/*
* We normally cache the DWARF debug info and never
* call dwfl_end(). But to prevent fd leak, free in
* case of error.
*/
dwfl_end(dwfl);
goto out;
}
dso->dwfl = dwfl;
}
mod = dwfl_addrmodule(dwfl, pc);
@@ -194,7 +206,6 @@ static int check_return_addr(const char *exec_file, Dwarf_Addr pc)
rc = check_return_reg(ra_regno, frame);
out:
dwfl_end(dwfl);
return rc;
}
@@ -221,8 +232,7 @@ out:
* index: of callchain entry that needs to be ignored (if any)
* -1 if no entry needs to be ignored or in case of errors
*/
int arch_skip_callchain_idx(struct machine *machine, struct thread *thread,
struct ip_callchain *chain)
int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
{
struct addr_location al;
struct dso *dso = NULL;
@@ -235,7 +245,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread,
ip = chain->ips[2];
thread__find_addr_location(thread, machine, PERF_RECORD_MISC_USER,
thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
MAP__FUNCTION, ip, &al);
if (al.map)
@@ -246,7 +256,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread,
return skip_slot;
}
rc = check_return_addr(dso->long_name, ip);
rc = check_return_addr(dso, ip);
pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n",
dso->long_name, chain->nr, ip, rc);
+1
View File
@@ -357,6 +357,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
static struct perf_tool tool = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
+2 -2
View File
@@ -217,8 +217,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
goto repipe;
}
thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
sample->ip, &al);
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
if (al.map != NULL) {
if (!al.map->dso->hit) {
@@ -410,6 +409,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.tracing_data = perf_event__repipe_op2_synth,
.finished_round = perf_event__repipe_op2_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
},
.input_name = "-",
.samples = LIST_HEAD_INIT(inject.samples),

Some files were not shown because too many files have changed in this diff Show More