mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main kernel side changes in this cycle were:
- Various Intel-PT updates and optimizations (Alexander Shishkin)
- Prohibit kprobes on Xen/KVM emulate prefixes (Masami Hiramatsu)
- Add support for LSM and SELinux checks to control access to the
perf syscall (Joel Fernandes)
- Misc other changes, optimizations, fixes and cleanups - see the
shortlog for details.
There were numerous tooling changes as well - 254 non-merge commits.
Here are the main changes - too many to list in detail:
- Enhancements to core tooling infrastructure, perf.data, libperf,
libtraceevent, event parsing, vendor events, Intel PT, callchains,
BPF support and instruction decoding.
- There were updates to the following tools:
perf annotate
perf diff
perf inject
perf kvm
perf list
perf maps
perf parse
perf probe
perf record
perf report
perf script
perf stat
perf test
perf trace
- And a lot of other changes: please see the shortlog and Git log for
more details"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (279 commits)
perf parse: Fix potential memory leak when handling tracepoint errors
perf probe: Fix spelling mistake "addrees" -> "address"
libtraceevent: Fix memory leakage in copy_filter_type
libtraceevent: Fix header installation
perf intel-bts: Does not support AUX area sampling
perf intel-pt: Add support for decoding AUX area samples
perf intel-pt: Add support for recording AUX area samples
perf pmu: When using default config, record which bits of config were changed by the user
perf auxtrace: Add support for queuing AUX area samples
perf session: Add facility to peek at all events
perf auxtrace: Add support for dumping AUX area samples
perf inject: Cut AUX area samples
perf record: Add aux-sample-size config term
perf record: Add support for AUX area sampling
perf auxtrace: Add support for AUX area sample recording
perf auxtrace: Move perf_evsel__find_pmu()
perf record: Add a function to test for kernel support for AUX area sampling
perf tools: Add kernel AUX area sampling definitions
perf/core: Make the mlock accounting simple again
perf report: Jump to symbol source view from total cycles view
...
This commit is contained in:
@@ -12846,6 +12846,13 @@ F: arch/*/events/*
|
||||
F: arch/*/events/*/*
|
||||
F: tools/perf/
|
||||
|
||||
PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
|
||||
R: John Garry <john.garry@huawei.com>
|
||||
R: Will Deacon <will@kernel.org>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Supported
|
||||
F: tools/perf/pmu-events/arch/arm64/
|
||||
|
||||
PERSONALITY HANDLING
|
||||
M: Christoph Hellwig <hch@infradead.org>
|
||||
L: linux-abi-devel@lists.sourceforge.net
|
||||
|
||||
@@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
|
||||
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
|
||||
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
@@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
|
||||
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
|
||||
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
|
||||
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
|
||||
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
|
||||
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
|
||||
static void pmao_restore_workaround(bool ebb) { }
|
||||
#endif /* CONFIG_PPC32 */
|
||||
|
||||
@@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
|
||||
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
|
||||
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
|
||||
*/
|
||||
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
|
||||
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
|
||||
{
|
||||
unsigned long mmcra = regs->dsisr;
|
||||
bool sdar_valid;
|
||||
@@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
|
||||
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
|
||||
*addrp = mfspr(SPRN_SDAR);
|
||||
|
||||
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
|
||||
is_kernel_addr(mfspr(SPRN_SDAR)))
|
||||
if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
|
||||
*addrp = 0;
|
||||
}
|
||||
|
||||
@@ -444,7 +443,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
|
||||
}
|
||||
|
||||
/* Processing BHRB entries */
|
||||
static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
|
||||
static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
|
||||
{
|
||||
u64 val;
|
||||
u64 addr;
|
||||
@@ -472,8 +471,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
|
||||
* exporting it to userspace (avoid exposure of regions
|
||||
* where we could have speculative execution)
|
||||
*/
|
||||
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
|
||||
is_kernel_addr(addr))
|
||||
if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
|
||||
continue;
|
||||
|
||||
/* Branches are read most recent first (ie. mfbhrb 0 is
|
||||
@@ -2087,12 +2085,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
|
||||
if (event->attr.sample_type &
|
||||
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
|
||||
perf_get_data_addr(regs, &data.addr);
|
||||
perf_get_data_addr(event, regs, &data.addr);
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
struct cpu_hw_events *cpuhw;
|
||||
cpuhw = this_cpu_ptr(&cpu_hw_events);
|
||||
power_pmu_bhrb_read(cpuhw);
|
||||
power_pmu_bhrb_read(event, cpuhw);
|
||||
data.br_stack = &cpuhw->bhrb_stack;
|
||||
}
|
||||
|
||||
|
||||
@@ -652,15 +652,7 @@ static void amd_pmu_disable_event(struct perf_event *event)
|
||||
*/
|
||||
static int amd_pmu_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int active, handled;
|
||||
|
||||
/*
|
||||
* Obtain the active count before calling x86_pmu_handle_irq() since
|
||||
* it is possible that x86_pmu_handle_irq() may make a counter
|
||||
* inactive (through x86_pmu_stop).
|
||||
*/
|
||||
active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
|
||||
int handled;
|
||||
|
||||
/* Process any counter overflows */
|
||||
handled = x86_pmu_handle_irq(regs);
|
||||
@@ -670,8 +662,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
|
||||
* NMIs will be claimed if arriving within that window.
|
||||
*/
|
||||
if (handled) {
|
||||
this_cpu_write(perf_nmi_tstamp,
|
||||
jiffies + perf_nmi_window);
|
||||
this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
@@ -2243,6 +2243,13 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
x86_pmu.sched_task(ctx, sched_in);
|
||||
}
|
||||
|
||||
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next)
|
||||
{
|
||||
if (x86_pmu.swap_task_ctx)
|
||||
x86_pmu.swap_task_ctx(prev, next);
|
||||
}
|
||||
|
||||
void perf_check_microcode(void)
|
||||
{
|
||||
if (x86_pmu.check_microcode)
|
||||
@@ -2297,6 +2304,7 @@ static struct pmu pmu = {
|
||||
.event_idx = x86_pmu_event_idx,
|
||||
.sched_task = x86_pmu_sched_task,
|
||||
.task_ctx_size = sizeof(struct x86_perf_task_context),
|
||||
.swap_task_ctx = x86_pmu_swap_task_ctx,
|
||||
.check_period = x86_pmu_check_period,
|
||||
|
||||
.aux_output_match = x86_pmu_aux_output_match,
|
||||
|
||||
@@ -549,9 +549,11 @@ static int bts_event_init(struct perf_event *event)
|
||||
* Note that the default paranoia setting permits unprivileged
|
||||
* users to profile the kernel.
|
||||
*/
|
||||
if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
if (event->attr.exclude_kernel) {
|
||||
ret = perf_allow_kernel(&event->attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (x86_add_exclusive(x86_lbr_exclusive_bts))
|
||||
return -EBUSY;
|
||||
|
||||
@@ -3315,8 +3315,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
if (x86_pmu.version < 3)
|
||||
return -EINVAL;
|
||||
|
||||
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
ret = perf_allow_cpu(&event->attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
|
||||
|
||||
@@ -3830,6 +3831,12 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
|
||||
intel_pmu_lbr_sched_task(ctx, sched_in);
|
||||
}
|
||||
|
||||
static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next)
|
||||
{
|
||||
intel_pmu_lbr_swap_task_ctx(prev, next);
|
||||
}
|
||||
|
||||
static int intel_pmu_check_period(struct perf_event *event, u64 value)
|
||||
{
|
||||
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
|
||||
@@ -3965,6 +3972,7 @@ static __initconst const struct x86_pmu intel_pmu = {
|
||||
|
||||
.guest_get_msrs = intel_guest_get_msrs,
|
||||
.sched_task = intel_pmu_sched_task,
|
||||
.swap_task_ctx = intel_pmu_swap_task_ctx,
|
||||
|
||||
.check_period = intel_pmu_check_period,
|
||||
|
||||
|
||||
@@ -417,6 +417,29 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
|
||||
cpuc->last_log_id = ++task_ctx->log_id;
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next)
|
||||
{
|
||||
struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
|
||||
|
||||
swap(prev->task_ctx_data, next->task_ctx_data);
|
||||
|
||||
/*
|
||||
* Architecture specific synchronization makes sense in
|
||||
* case both prev->task_ctx_data and next->task_ctx_data
|
||||
* pointers are allocated.
|
||||
*/
|
||||
|
||||
prev_ctx_data = next->task_ctx_data;
|
||||
next_ctx_data = prev->task_ctx_data;
|
||||
|
||||
if (!prev_ctx_data || !next_ctx_data)
|
||||
return;
|
||||
|
||||
swap(prev_ctx_data->lbr_callstack_users,
|
||||
next_ctx_data->lbr_callstack_users);
|
||||
}
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
@@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event)
|
||||
* the user needs special permissions to be able to use it
|
||||
*/
|
||||
if (p4_ht_active() && p4_event_bind_map[v].shared) {
|
||||
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
v = perf_allow_cpu(&event->attr);
|
||||
if (v)
|
||||
return v;
|
||||
}
|
||||
|
||||
/* ESCR EventMask bits may be invalid */
|
||||
|
||||
@@ -397,6 +397,20 @@ static bool pt_event_valid(struct perf_event *event)
|
||||
* These all are cpu affine and operate on a local PT
|
||||
*/
|
||||
|
||||
static void pt_config_start(struct perf_event *event)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 ctl = event->hw.config;
|
||||
|
||||
ctl |= RTIT_CTL_TRACEEN;
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
|
||||
else
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||
|
||||
WRITE_ONCE(event->hw.config, ctl);
|
||||
}
|
||||
|
||||
/* Address ranges and their corresponding msr configuration registers */
|
||||
static const struct pt_address_range {
|
||||
unsigned long msr_a;
|
||||
@@ -469,6 +483,7 @@ static u64 pt_config_filters(struct perf_event *event)
|
||||
static void pt_config(struct perf_event *event)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
u64 reg;
|
||||
|
||||
/* First round: clear STATUS, in particular the PSB byte counter. */
|
||||
@@ -478,7 +493,9 @@ static void pt_config(struct perf_event *event)
|
||||
}
|
||||
|
||||
reg = pt_config_filters(event);
|
||||
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
|
||||
reg |= RTIT_CTL_TRACEEN;
|
||||
if (!buf->single)
|
||||
reg |= RTIT_CTL_TOPA;
|
||||
|
||||
/*
|
||||
* Previously, we had BRANCH_EN on by default, but now that PT has
|
||||
@@ -501,10 +518,7 @@ static void pt_config(struct perf_event *event)
|
||||
reg |= (event->attr.config & PT_CONFIG_MASK);
|
||||
|
||||
event->hw.config = reg;
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
|
||||
else
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
||||
pt_config_start(event);
|
||||
}
|
||||
|
||||
static void pt_config_stop(struct perf_event *event)
|
||||
@@ -533,18 +547,6 @@ static void pt_config_stop(struct perf_event *event)
|
||||
wmb();
|
||||
}
|
||||
|
||||
static void pt_config_buffer(void *buf, unsigned int topa_idx,
|
||||
unsigned int output_off)
|
||||
{
|
||||
u64 reg;
|
||||
|
||||
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
|
||||
|
||||
reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
|
||||
|
||||
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
|
||||
}
|
||||
|
||||
/**
|
||||
* struct topa - ToPA metadata
|
||||
* @list: linkage to struct pt_buffer's list of tables
|
||||
@@ -602,6 +604,33 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
|
||||
#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
|
||||
#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
|
||||
|
||||
static void pt_config_buffer(struct pt_buffer *buf)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 reg, mask;
|
||||
void *base;
|
||||
|
||||
if (buf->single) {
|
||||
base = buf->data_pages[0];
|
||||
mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7;
|
||||
} else {
|
||||
base = topa_to_page(buf->cur)->table;
|
||||
mask = (u64)buf->cur_idx;
|
||||
}
|
||||
|
||||
reg = virt_to_phys(base);
|
||||
if (pt->output_base != reg) {
|
||||
pt->output_base = reg;
|
||||
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
|
||||
}
|
||||
|
||||
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
|
||||
if (pt->output_mask != reg) {
|
||||
pt->output_mask = reg;
|
||||
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* topa_alloc() - allocate page-sized ToPA table
|
||||
* @cpu: CPU on which to allocate.
|
||||
@@ -802,6 +831,11 @@ static void pt_update_head(struct pt *pt)
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
u64 topa_idx, base, old;
|
||||
|
||||
if (buf->single) {
|
||||
local_set(&buf->data_size, buf->output_off);
|
||||
return;
|
||||
}
|
||||
|
||||
/* offset of the first region in this table from the beginning of buf */
|
||||
base = buf->cur->offset + buf->output_off;
|
||||
|
||||
@@ -903,18 +937,21 @@ static void pt_handle_status(struct pt *pt)
|
||||
*/
|
||||
static void pt_read_offset(struct pt_buffer *buf)
|
||||
{
|
||||
u64 offset, base_topa;
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct topa_page *tp;
|
||||
|
||||
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
|
||||
tp = phys_to_virt(base_topa);
|
||||
buf->cur = &tp->topa;
|
||||
if (!buf->single) {
|
||||
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
|
||||
tp = phys_to_virt(pt->output_base);
|
||||
buf->cur = &tp->topa;
|
||||
}
|
||||
|
||||
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
|
||||
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
|
||||
/* offset within current output region */
|
||||
buf->output_off = offset >> 32;
|
||||
buf->output_off = pt->output_mask >> 32;
|
||||
/* index of current output region within this table */
|
||||
buf->cur_idx = (offset & 0xffffff80) >> 7;
|
||||
if (!buf->single)
|
||||
buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
|
||||
}
|
||||
|
||||
static struct topa_entry *
|
||||
@@ -1030,6 +1067,9 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
||||
unsigned long head = local64_read(&buf->head);
|
||||
unsigned long idx, npages, wakeup;
|
||||
|
||||
if (buf->single)
|
||||
return 0;
|
||||
|
||||
/* can't stop in the middle of an output region */
|
||||
if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
@@ -1111,13 +1151,17 @@ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
|
||||
if (buf->snapshot)
|
||||
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
|
||||
|
||||
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
|
||||
te = pt_topa_entry_for_page(buf, pg);
|
||||
if (!buf->single) {
|
||||
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
|
||||
te = pt_topa_entry_for_page(buf, pg);
|
||||
|
||||
cur_tp = topa_entry_to_page(te);
|
||||
buf->cur = &cur_tp->topa;
|
||||
buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
|
||||
buf->output_off = head & (pt_buffer_region_size(buf) - 1);
|
||||
cur_tp = topa_entry_to_page(te);
|
||||
buf->cur = &cur_tp->topa;
|
||||
buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
|
||||
buf->output_off = head & (pt_buffer_region_size(buf) - 1);
|
||||
} else {
|
||||
buf->output_off = head;
|
||||
}
|
||||
|
||||
local64_set(&buf->head, head);
|
||||
local_set(&buf->data_size, 0);
|
||||
@@ -1131,6 +1175,9 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
|
||||
{
|
||||
struct topa *topa, *iter;
|
||||
|
||||
if (buf->single)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(topa, iter, &buf->tables, list) {
|
||||
/*
|
||||
* right now, this is in free_aux() path only, so
|
||||
@@ -1176,6 +1223,36 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
|
||||
{
|
||||
struct page *p = virt_to_page(buf->data_pages[0]);
|
||||
int ret = -ENOTSUPP, order = 0;
|
||||
|
||||
/*
|
||||
* We can use single range output mode
|
||||
* + in snapshot mode, where we don't need interrupts;
|
||||
* + if the hardware supports it;
|
||||
* + if the entire buffer is one contiguous allocation.
|
||||
*/
|
||||
if (!buf->snapshot)
|
||||
goto out;
|
||||
|
||||
if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output))
|
||||
goto out;
|
||||
|
||||
if (PagePrivate(p))
|
||||
order = page_private(p);
|
||||
|
||||
if (1 << order != nr_pages)
|
||||
goto out;
|
||||
|
||||
buf->single = true;
|
||||
buf->nr_pages = nr_pages;
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
|
||||
* @cpu: Cpu on which to allocate, -1 means current.
|
||||
@@ -1198,6 +1275,13 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
|
||||
if (!nr_pages)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Only support AUX sampling in snapshot mode, where we don't
|
||||
* generate NMIs.
|
||||
*/
|
||||
if (event->attr.aux_sample_size && !snapshot)
|
||||
return NULL;
|
||||
|
||||
if (cpu == -1)
|
||||
cpu = raw_smp_processor_id();
|
||||
node = cpu_to_node(cpu);
|
||||
@@ -1213,6 +1297,10 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
|
||||
|
||||
INIT_LIST_HEAD(&buf->tables);
|
||||
|
||||
ret = pt_buffer_try_single(buf, nr_pages);
|
||||
if (!ret)
|
||||
return buf;
|
||||
|
||||
ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
|
||||
if (ret) {
|
||||
kfree(buf);
|
||||
@@ -1379,9 +1467,8 @@ void intel_pt_interrupt(void)
|
||||
return;
|
||||
}
|
||||
|
||||
pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
|
||||
buf->output_off);
|
||||
pt_config(event);
|
||||
pt_config_buffer(buf);
|
||||
pt_config_start(event);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1444,8 +1531,7 @@ static void pt_event_start(struct perf_event *event, int mode)
|
||||
WRITE_ONCE(pt->handle_nmi, 1);
|
||||
hwc->state = 0;
|
||||
|
||||
pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
|
||||
buf->output_off);
|
||||
pt_config_buffer(buf);
|
||||
pt_config(event);
|
||||
|
||||
return;
|
||||
@@ -1496,6 +1582,52 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||
}
|
||||
}
|
||||
|
||||
static long pt_event_snapshot_aux(struct perf_event *event,
|
||||
struct perf_output_handle *handle,
|
||||
unsigned long size)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
||||
unsigned long from = 0, to;
|
||||
long ret;
|
||||
|
||||
if (WARN_ON_ONCE(!buf))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Sampling is only allowed on snapshot events;
|
||||
* see pt_buffer_setup_aux().
|
||||
*/
|
||||
if (WARN_ON_ONCE(!buf->snapshot))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Here, handle_nmi tells us if the tracing is on
|
||||
*/
|
||||
if (READ_ONCE(pt->handle_nmi))
|
||||
pt_config_stop(event);
|
||||
|
||||
pt_read_offset(buf);
|
||||
pt_update_head(pt);
|
||||
|
||||
to = local_read(&buf->data_size);
|
||||
if (to < size)
|
||||
from = buf->nr_pages << PAGE_SHIFT;
|
||||
from += to - size;
|
||||
|
||||
ret = perf_output_copy_aux(&pt->handle, handle, from, to);
|
||||
|
||||
/*
|
||||
* If the tracing was on when we turned up, restart it.
|
||||
* Compiler barrier not needed as we couldn't have been
|
||||
* preempted by anything that touches pt->handle_nmi.
|
||||
*/
|
||||
if (pt->handle_nmi)
|
||||
pt_config_start(event);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pt_event_del(struct perf_event *event, int mode)
|
||||
{
|
||||
pt_event_stop(event, PERF_EF_UPDATE);
|
||||
@@ -1615,6 +1747,7 @@ static __init int pt_init(void)
|
||||
pt_pmu.pmu.del = pt_event_del;
|
||||
pt_pmu.pmu.start = pt_event_start;
|
||||
pt_pmu.pmu.stop = pt_event_stop;
|
||||
pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux;
|
||||
pt_pmu.pmu.read = pt_event_read;
|
||||
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
||||
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
||||
|
||||
@@ -64,6 +64,7 @@ struct pt_pmu {
|
||||
* @lost: if data was lost/truncated
|
||||
* @head: logical write offset inside the buffer
|
||||
* @snapshot: if this is for a snapshot/overwrite counter
|
||||
* @single: use Single Range Output instead of ToPA
|
||||
* @stop_pos: STOP topa entry index
|
||||
* @intr_pos: INT topa entry index
|
||||
* @stop_te: STOP topa entry pointer
|
||||
@@ -80,6 +81,7 @@ struct pt_buffer {
|
||||
local_t data_size;
|
||||
local64_t head;
|
||||
bool snapshot;
|
||||
bool single;
|
||||
long stop_pos, intr_pos;
|
||||
struct topa_entry *stop_te, *intr_te;
|
||||
void **data_pages;
|
||||
@@ -111,16 +113,20 @@ struct pt_filters {
|
||||
|
||||
/**
|
||||
* struct pt - per-cpu pt context
|
||||
* @handle: perf output handle
|
||||
* @handle: perf output handle
|
||||
* @filters: last configured filters
|
||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||
* @vmx_on: 1 if VMX is ON on this cpu
|
||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||
* @vmx_on: 1 if VMX is ON on this cpu
|
||||
* @output_base: cached RTIT_OUTPUT_BASE MSR value
|
||||
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
|
||||
*/
|
||||
struct pt {
|
||||
struct perf_output_handle handle;
|
||||
struct pt_filters filters;
|
||||
int handle_nmi;
|
||||
int vmx_on;
|
||||
u64 output_base;
|
||||
u64 output_mask;
|
||||
};
|
||||
|
||||
#endif /* __INTEL_PT_H__ */
|
||||
|
||||
@@ -682,6 +682,14 @@ struct x86_pmu {
|
||||
*/
|
||||
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
|
||||
|
||||
/*
|
||||
* perf task context (i.e. struct perf_event_context::task_ctx_data)
|
||||
* switch helper to bridge calls from perf/core to perf/x86.
|
||||
* See struct pmu::swap_task_ctx() usage for examples;
|
||||
*/
|
||||
void (*swap_task_ctx)(struct perf_event_context *prev,
|
||||
struct perf_event_context *next);
|
||||
|
||||
/*
|
||||
* AMD bits
|
||||
*/
|
||||
@@ -1016,6 +1024,9 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
|
||||
|
||||
void intel_ds_init(void);
|
||||
|
||||
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
|
||||
struct perf_event_context *next);
|
||||
|
||||
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
|
||||
|
||||
u64 lbr_from_signext_quirk_wr(u64 val);
|
||||
|
||||
@@ -7,9 +7,11 @@
|
||||
# define __ASM_FORM_RAW(x) x
|
||||
# define __ASM_FORM_COMMA(x) x,
|
||||
#else
|
||||
# define __ASM_FORM(x) " " #x " "
|
||||
# define __ASM_FORM_RAW(x) #x
|
||||
# define __ASM_FORM_COMMA(x) " " #x ","
|
||||
#include <linux/stringify.h>
|
||||
|
||||
# define __ASM_FORM(x) " " __stringify(x) " "
|
||||
# define __ASM_FORM_RAW(x) __stringify(x)
|
||||
# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
|
||||
#endif
|
||||
|
||||
#ifndef __x86_64__
|
||||
|
||||
14
arch/x86/include/asm/emulate_prefix.h
Normal file
14
arch/x86/include/asm/emulate_prefix.h
Normal file
@@ -0,0 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_X86_EMULATE_PREFIX_H
|
||||
#define _ASM_X86_EMULATE_PREFIX_H
|
||||
|
||||
/*
|
||||
* Virt escape sequences to trigger instruction emulation;
|
||||
* ideally these would decode to 'whole' instruction and not destroy
|
||||
* the instruction stream; sadly this is not true for the 'kvm' one :/
|
||||
*/
|
||||
|
||||
#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
|
||||
#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
|
||||
|
||||
#endif
|
||||
@@ -45,6 +45,7 @@ struct insn {
|
||||
struct insn_field immediate2; /* for 64bit imm or seg16 */
|
||||
};
|
||||
|
||||
int emulate_prefix_size;
|
||||
insn_attr_t attr;
|
||||
unsigned char opnd_bytes;
|
||||
unsigned char addr_bytes;
|
||||
@@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
|
||||
return (insn->vex_prefix.nbytes == 4);
|
||||
}
|
||||
|
||||
static inline int insn_has_emulate_prefix(struct insn *insn)
|
||||
{
|
||||
return !!insn->emulate_prefix_size;
|
||||
}
|
||||
|
||||
/* Ensure this instruction is decoded completely */
|
||||
static inline int insn_complete(struct insn *insn)
|
||||
{
|
||||
|
||||
@@ -379,12 +379,9 @@ struct xen_pmu_arch {
|
||||
* Prefix forces emulation of some non-trapping instructions.
|
||||
* Currently only CPUID.
|
||||
*/
|
||||
#ifdef __ASSEMBLY__
|
||||
#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
|
||||
#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
|
||||
#else
|
||||
#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
|
||||
#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
|
||||
#endif
|
||||
#include <asm/emulate_prefix.h>
|
||||
|
||||
#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
|
||||
#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
|
||||
|
||||
#endif /* _ASM_X86_XEN_INTERFACE_H */
|
||||
|
||||
@@ -351,6 +351,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
|
||||
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
|
||||
insn_get_length(insn);
|
||||
|
||||
/* We can not probe force emulate prefixed instruction */
|
||||
if (insn_has_emulate_prefix(insn))
|
||||
return 0;
|
||||
|
||||
/* Another subsystem puts a breakpoint, failed to recover */
|
||||
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
|
||||
return 0;
|
||||
|
||||
@@ -68,6 +68,7 @@
|
||||
#include <asm/mshyperv.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/intel_pt.h>
|
||||
#include <asm/emulate_prefix.h>
|
||||
#include <clocksource/hyperv_timer.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
@@ -5492,6 +5493,7 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
|
||||
|
||||
int handle_ud(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
|
||||
int emul_type = EMULTYPE_TRAP_UD;
|
||||
char sig[5]; /* ud2; .ascii "kvm" */
|
||||
struct x86_exception e;
|
||||
@@ -5499,7 +5501,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
|
||||
if (force_emulation_prefix &&
|
||||
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
|
||||
sig, sizeof(sig), &e) == 0 &&
|
||||
memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
|
||||
memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
|
||||
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
|
||||
emul_type = EMULTYPE_TRAP_UD_FORCED;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#include <asm/inat.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include <asm/emulate_prefix.h>
|
||||
|
||||
/* Verify next sizeof(t) bytes can be on the same instruction */
|
||||
#define validate_next(t, insn, n) \
|
||||
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
|
||||
@@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
|
||||
insn->addr_bytes = 4;
|
||||
}
|
||||
|
||||
static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
|
||||
static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
|
||||
|
||||
static int __insn_get_emulate_prefix(struct insn *insn,
|
||||
const insn_byte_t *prefix, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
insn->emulate_prefix_size = len;
|
||||
insn->next_byte += len;
|
||||
|
||||
return 1;
|
||||
|
||||
err_out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void insn_get_emulate_prefix(struct insn *insn)
|
||||
{
|
||||
if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
|
||||
return;
|
||||
|
||||
__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_prefixes - scan x86 instruction prefix bytes
|
||||
* @insn: &struct insn containing instruction
|
||||
@@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn)
|
||||
if (prefixes->got)
|
||||
return;
|
||||
|
||||
insn_get_emulate_prefix(insn);
|
||||
|
||||
nb = 0;
|
||||
lb = 0;
|
||||
b = peek_next(insn_byte_t, insn);
|
||||
|
||||
@@ -333,7 +333,7 @@ AVXcode: 1
|
||||
06: CLTS
|
||||
07: SYSRET (o64)
|
||||
08: INVD
|
||||
09: WBINVD
|
||||
09: WBINVD | WBNOINVD (F3)
|
||||
0a:
|
||||
0b: UD2 (1B)
|
||||
0c:
|
||||
@@ -364,7 +364,7 @@ AVXcode: 1
|
||||
# a ModR/M byte.
|
||||
1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
|
||||
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
|
||||
1c:
|
||||
1c: Grp20 (1A),(1C)
|
||||
1d:
|
||||
1e:
|
||||
1f: NOP Ev
|
||||
@@ -792,6 +792,8 @@ f3: Grp17 (1A)
|
||||
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
|
||||
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
|
||||
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
|
||||
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
|
||||
f9: MOVDIRI My,Gy
|
||||
EndTable
|
||||
|
||||
Table: 3-byte opcode 2 (0x0f 0x3a)
|
||||
@@ -943,9 +945,9 @@ GrpTable: Grp6
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp7
|
||||
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
|
||||
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
|
||||
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
|
||||
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
|
||||
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
|
||||
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
|
||||
3: LIDT Ms
|
||||
4: SMSW Mw/Rv
|
||||
5: rdpkru (110),(11B) | wrpkru (111),(11B)
|
||||
@@ -1020,7 +1022,7 @@ GrpTable: Grp15
|
||||
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
|
||||
4: XSAVE | ptwrite Ey (F3),(11B)
|
||||
5: XRSTOR | lfence (11B)
|
||||
6: XSAVEOPT | clwb (66) | mfence (11B)
|
||||
6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
|
||||
7: clflush | clflushopt (66) | sfence (11B)
|
||||
EndTable
|
||||
|
||||
@@ -1051,6 +1053,10 @@ GrpTable: Grp19
|
||||
6: vscatterpf1qps/d Wx (66),(ev)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp20
|
||||
0: cldemote Mb
|
||||
EndTable
|
||||
|
||||
# AMD's Prefetch Group
|
||||
GrpTable: GrpP
|
||||
0: PREFETCH
|
||||
|
||||
@@ -1818,6 +1818,14 @@ union security_list_options {
|
||||
void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
int (*locked_down)(enum lockdown_reason what);
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
int (*perf_event_open)(struct perf_event_attr *attr, int type);
|
||||
int (*perf_event_alloc)(struct perf_event *event);
|
||||
void (*perf_event_free)(struct perf_event *event);
|
||||
int (*perf_event_read)(struct perf_event *event);
|
||||
int (*perf_event_write)(struct perf_event *event);
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
struct security_hook_heads {
|
||||
@@ -2060,6 +2068,13 @@ struct security_hook_heads {
|
||||
struct hlist_head bpf_prog_free_security;
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
struct hlist_head locked_down;
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct hlist_head perf_event_open;
|
||||
struct hlist_head perf_event_alloc;
|
||||
struct hlist_head perf_event_free;
|
||||
struct hlist_head perf_event_read;
|
||||
struct hlist_head perf_event_write;
|
||||
#endif
|
||||
} __randomize_layout;
|
||||
|
||||
/*
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user