Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main kernel side changes in this cycle were:

   - Various Intel-PT updates and optimizations (Alexander Shishkin)

   - Prohibit kprobes on Xen/KVM emulate prefixes (Masami Hiramatsu)

   - Add support for LSM and SELinux checks to control access to the
     perf syscall (Joel Fernandes)

   - Misc other changes, optimizations, fixes and cleanups - see the
     shortlog for details.

  There were numerous tooling changes as well - 254 non-merge commits.
  Here are the main changes - too many to list in detail:

   - Enhancements to core tooling infrastructure, perf.data, libperf,
     libtraceevent, event parsing, vendor events, Intel PT, callchains,
     BPF support and instruction decoding.

   - There were updates to the following tools:

        perf annotate
        perf diff
        perf inject
        perf kvm
        perf list
        perf maps
        perf parse
        perf probe
        perf record
        perf report
        perf script
        perf stat
        perf test
        perf trace

   - And a lot of other changes: please see the shortlog and Git log for
     more details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (279 commits)
  perf parse: Fix potential memory leak when handling tracepoint errors
  perf probe: Fix spelling mistake "addrees" -> "address"
  libtraceevent: Fix memory leakage in copy_filter_type
  libtraceevent: Fix header installation
  perf intel-bts: Does not support AUX area sampling
  perf intel-pt: Add support for decoding AUX area samples
  perf intel-pt: Add support for recording AUX area samples
  perf pmu: When using default config, record which bits of config were changed by the user
  perf auxtrace: Add support for queuing AUX area samples
  perf session: Add facility to peek at all events
  perf auxtrace: Add support for dumping AUX area samples
  perf inject: Cut AUX area samples
  perf record: Add aux-sample-size config term
  perf record: Add support for AUX area sampling
  perf auxtrace: Add support for AUX area sample recording
  perf auxtrace: Move perf_evsel__find_pmu()
  perf record: Add a function to test for kernel support for AUX area sampling
  perf tools: Add kernel AUX area sampling definitions
  perf/core: Make the mlock accounting simple again
  perf report: Jump to symbol source view from total cycles view
  ...
This commit is contained in:
Linus Torvalds
2019-11-26 15:04:47 -08:00
297 changed files with 44924 additions and 35331 deletions

View File

@@ -12846,6 +12846,13 @@ F: arch/*/events/*
F: arch/*/events/*/*
F: tools/perf/
PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
R: John Garry <john.garry@huawei.com>
R: Will Deacon <will@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: tools/perf/pmu-events/arch/arm64/
PERSONALITY HANDLING
M: Christoph Hellwig <hch@infradead.org>
L: linux-abi-devel@lists.sourceforge.net

View File

@@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
return 0;
}
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
return 0;
@@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
*/
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
{
unsigned long mmcra = regs->dsisr;
bool sdar_valid;
@@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
is_kernel_addr(mfspr(SPRN_SDAR)))
if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
*addrp = 0;
}
@@ -444,7 +443,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Processing BHRB entries */
static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
@@ -472,8 +471,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
*/
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
is_kernel_addr(addr))
if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -2087,12 +2085,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type &
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
perf_get_data_addr(regs, &data.addr);
perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(cpuhw);
power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
}

View File

@@ -652,15 +652,7 @@ static void amd_pmu_disable_event(struct perf_event *event)
*/
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int active, handled;
/*
* Obtain the active count before calling x86_pmu_handle_irq() since
* it is possible that x86_pmu_handle_irq() may make a counter
* inactive (through x86_pmu_stop).
*/
active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
int handled;
/* Process any counter overflows */
handled = x86_pmu_handle_irq(regs);
@@ -670,8 +662,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
* NMIs will be claimed if arriving within that window.
*/
if (handled) {
this_cpu_write(perf_nmi_tstamp,
jiffies + perf_nmi_window);
this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
return handled;
}

View File

@@ -2243,6 +2243,13 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
x86_pmu.sched_task(ctx, sched_in);
}
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
if (x86_pmu.swap_task_ctx)
x86_pmu.swap_task_ctx(prev, next);
}
void perf_check_microcode(void)
{
if (x86_pmu.check_microcode)
@@ -2297,6 +2304,7 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
.aux_output_match = x86_pmu_aux_output_match,

View File

@@ -549,9 +549,11 @@ static int bts_event_init(struct perf_event *event)
* Note that the default paranoia setting permits unprivileged
* users to profile the kernel.
*/
if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
!capable(CAP_SYS_ADMIN))
return -EACCES;
if (event->attr.exclude_kernel) {
ret = perf_allow_kernel(&event->attr);
if (ret)
return ret;
}
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;

View File

@@ -3315,8 +3315,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (x86_pmu.version < 3)
return -EINVAL;
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
ret = perf_allow_cpu(&event->attr);
if (ret)
return ret;
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
@@ -3830,6 +3831,12 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
intel_pmu_lbr_sched_task(ctx, sched_in);
}
static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
intel_pmu_lbr_swap_task_ctx(prev, next);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
{
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
@@ -3965,6 +3972,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
.swap_task_ctx = intel_pmu_swap_task_ctx,
.check_period = intel_pmu_check_period,

View File

@@ -417,6 +417,29 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
cpuc->last_log_id = ++task_ctx->log_id;
}
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
swap(prev->task_ctx_data, next->task_ctx_data);
/*
* Architecture specific synchronization makes sense in
* case both prev->task_ctx_data and next->task_ctx_data
* pointers are allocated.
*/
prev_ctx_data = next->task_ctx_data;
next_ctx_data = prev->task_ctx_data;
if (!prev_ctx_data || !next_ctx_data)
return;
swap(prev_ctx_data->lbr_callstack_users,
next_ctx_data->lbr_callstack_users);
}
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

View File

@@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event)
* the user needs special permissions to be able to use it
*/
if (p4_ht_active() && p4_event_bind_map[v].shared) {
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
v = perf_allow_cpu(&event->attr);
if (v)
return v;
}
/* ESCR EventMask bits may be invalid */

View File

@@ -397,6 +397,20 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
static void pt_config_start(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 ctl = event->hw.config;
ctl |= RTIT_CTL_TRACEEN;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
WRITE_ONCE(event->hw.config, ctl);
}
/* Address ranges and their corresponding msr configuration registers */
static const struct pt_address_range {
unsigned long msr_a;
@@ -469,6 +483,7 @@ static u64 pt_config_filters(struct perf_event *event)
static void pt_config(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 reg;
/* First round: clear STATUS, in particular the PSB byte counter. */
@@ -478,7 +493,9 @@ static void pt_config(struct perf_event *event)
}
reg = pt_config_filters(event);
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
reg |= RTIT_CTL_TRACEEN;
if (!buf->single)
reg |= RTIT_CTL_TOPA;
/*
* Previously, we had BRANCH_EN on by default, but now that PT has
@@ -501,10 +518,7 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.config = reg;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
wrmsrl(MSR_IA32_RTIT_CTL, reg);
pt_config_start(event);
}
static void pt_config_stop(struct perf_event *event)
@@ -533,18 +547,6 @@ static void pt_config_stop(struct perf_event *event)
wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
unsigned int output_off)
{
u64 reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
/**
* struct topa - ToPA metadata
* @list: linkage to struct pt_buffer's list of tables
@@ -602,6 +604,33 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
static void pt_config_buffer(struct pt_buffer *buf)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg, mask;
void *base;
if (buf->single) {
base = buf->data_pages[0];
mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7;
} else {
base = topa_to_page(buf->cur)->table;
mask = (u64)buf->cur_idx;
}
reg = virt_to_phys(base);
if (pt->output_base != reg) {
pt->output_base = reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
}
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
if (pt->output_mask != reg) {
pt->output_mask = reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
}
/**
* topa_alloc() - allocate page-sized ToPA table
* @cpu: CPU on which to allocate.
@@ -802,6 +831,11 @@ static void pt_update_head(struct pt *pt)
struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 topa_idx, base, old;
if (buf->single) {
local_set(&buf->data_size, buf->output_off);
return;
}
/* offset of the first region in this table from the beginning of buf */
base = buf->cur->offset + buf->output_off;
@@ -903,18 +937,21 @@ static void pt_handle_status(struct pt *pt)
*/
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base_topa;
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct topa_page *tp;
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
tp = phys_to_virt(base_topa);
buf->cur = &tp->topa;
if (!buf->single) {
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
tp = phys_to_virt(pt->output_base);
buf->cur = &tp->topa;
}
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
buf->output_off = offset >> 32;
buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
buf->cur_idx = (offset & 0xffffff80) >> 7;
if (!buf->single)
buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
}
static struct topa_entry *
@@ -1030,6 +1067,9 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long head = local64_read(&buf->head);
unsigned long idx, npages, wakeup;
if (buf->single)
return 0;
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
@@ -1111,13 +1151,17 @@ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
if (buf->snapshot)
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
te = pt_topa_entry_for_page(buf, pg);
if (!buf->single) {
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
te = pt_topa_entry_for_page(buf, pg);
cur_tp = topa_entry_to_page(te);
buf->cur = &cur_tp->topa;
buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
buf->output_off = head & (pt_buffer_region_size(buf) - 1);
cur_tp = topa_entry_to_page(te);
buf->cur = &cur_tp->topa;
buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
buf->output_off = head & (pt_buffer_region_size(buf) - 1);
} else {
buf->output_off = head;
}
local64_set(&buf->head, head);
local_set(&buf->data_size, 0);
@@ -1131,6 +1175,9 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
{
struct topa *topa, *iter;
if (buf->single)
return;
list_for_each_entry_safe(topa, iter, &buf->tables, list) {
/*
* right now, this is in free_aux() path only, so
@@ -1176,6 +1223,36 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
return 0;
}
static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
{
struct page *p = virt_to_page(buf->data_pages[0]);
int ret = -ENOTSUPP, order = 0;
/*
* We can use single range output mode
* + in snapshot mode, where we don't need interrupts;
* + if the hardware supports it;
* + if the entire buffer is one contiguous allocation.
*/
if (!buf->snapshot)
goto out;
if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output))
goto out;
if (PagePrivate(p))
order = page_private(p);
if (1 << order != nr_pages)
goto out;
buf->single = true;
buf->nr_pages = nr_pages;
ret = 0;
out:
return ret;
}
/**
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
* @cpu: Cpu on which to allocate, -1 means current.
@@ -1198,6 +1275,13 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
if (!nr_pages)
return NULL;
/*
* Only support AUX sampling in snapshot mode, where we don't
* generate NMIs.
*/
if (event->attr.aux_sample_size && !snapshot)
return NULL;
if (cpu == -1)
cpu = raw_smp_processor_id();
node = cpu_to_node(cpu);
@@ -1213,6 +1297,10 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
INIT_LIST_HEAD(&buf->tables);
ret = pt_buffer_try_single(buf, nr_pages);
if (!ret)
return buf;
ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
if (ret) {
kfree(buf);
@@ -1379,9 +1467,8 @@ void intel_pt_interrupt(void)
return;
}
pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config(event);
pt_config_buffer(buf);
pt_config_start(event);
}
}
@@ -1444,8 +1531,7 @@ static void pt_event_start(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config_buffer(buf);
pt_config(event);
return;
@@ -1496,6 +1582,52 @@ static void pt_event_stop(struct perf_event *event, int mode)
}
}
static long pt_event_snapshot_aux(struct perf_event *event,
struct perf_output_handle *handle,
unsigned long size)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
unsigned long from = 0, to;
long ret;
if (WARN_ON_ONCE(!buf))
return 0;
/*
* Sampling is only allowed on snapshot events;
* see pt_buffer_setup_aux().
*/
if (WARN_ON_ONCE(!buf->snapshot))
return 0;
/*
* Here, handle_nmi tells us if the tracing is on
*/
if (READ_ONCE(pt->handle_nmi))
pt_config_stop(event);
pt_read_offset(buf);
pt_update_head(pt);
to = local_read(&buf->data_size);
if (to < size)
from = buf->nr_pages << PAGE_SHIFT;
from += to - size;
ret = perf_output_copy_aux(&pt->handle, handle, from, to);
/*
* If the tracing was on when we turned up, restart it.
* Compiler barrier not needed as we couldn't have been
* preempted by anything that touches pt->handle_nmi.
*/
if (pt->handle_nmi)
pt_config_start(event);
return ret;
}
static void pt_event_del(struct perf_event *event, int mode)
{
pt_event_stop(event, PERF_EF_UPDATE);
@@ -1615,6 +1747,7 @@ static __init int pt_init(void)
pt_pmu.pmu.del = pt_event_del;
pt_pmu.pmu.start = pt_event_start;
pt_pmu.pmu.stop = pt_event_stop;
pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux;
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;

View File

@@ -64,6 +64,7 @@ struct pt_pmu {
* @lost: if data was lost/truncated
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
* @single: use Single Range Output instead of ToPA
* @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer
@@ -80,6 +81,7 @@ struct pt_buffer {
local_t data_size;
local64_t head;
bool snapshot;
bool single;
long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te;
void **data_pages;
@@ -111,16 +113,20 @@ struct pt_filters {
/**
* struct pt - per-cpu pt context
* @handle: perf output handle
* @handle: perf output handle
* @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
* @output_base: cached RTIT_OUTPUT_BASE MSR value
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/
struct pt {
struct perf_output_handle handle;
struct pt_filters filters;
int handle_nmi;
int vmx_on;
u64 output_base;
u64 output_mask;
};
#endif /* __INTEL_PT_H__ */

View File

@@ -682,6 +682,14 @@ struct x86_pmu {
*/
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
/*
* perf task context (i.e. struct perf_event_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86.
* See struct pmu::swap_task_ctx() usage for examples;
*/
void (*swap_task_ctx)(struct perf_event_context *prev,
struct perf_event_context *next);
/*
* AMD bits
*/
@@ -1016,6 +1024,9 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
void intel_ds_init(void);
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);

View File

@@ -7,9 +7,11 @@
# define __ASM_FORM_RAW(x) x
# define __ASM_FORM_COMMA(x) x,
#else
# define __ASM_FORM(x) " " #x " "
# define __ASM_FORM_RAW(x) #x
# define __ASM_FORM_COMMA(x) " " #x ","
#include <linux/stringify.h>
# define __ASM_FORM(x) " " __stringify(x) " "
# define __ASM_FORM_RAW(x) __stringify(x)
# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
#endif
#ifndef __x86_64__

View File

@@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EMULATE_PREFIX_H
#define _ASM_X86_EMULATE_PREFIX_H
/*
* Virt escape sequences to trigger instruction emulation;
* ideally these would decode to 'whole' instruction and not destroy
* the instruction stream; sadly this is not true for the 'kvm' one :/
*/
#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
#endif

View File

@@ -45,6 +45,7 @@ struct insn {
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
int emulate_prefix_size;
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
@@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
}
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{

View File

@@ -379,12 +379,9 @@ struct xen_pmu_arch {
* Prefix forces emulation of some non-trapping instructions.
* Currently only CPUID.
*/
#ifdef __ASSEMBLY__
#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
#else
#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
#endif
#include <asm/emulate_prefix.h>
#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
#endif /* _ASM_X86_XEN_INTERFACE_H */

View File

@@ -351,6 +351,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
insn_get_length(insn);
/* We can not probe force emulate prefixed instruction */
if (insn_has_emulate_prefix(insn))
return 0;
/* Another subsystem puts a breakpoint, failed to recover */
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
return 0;

View File

@@ -68,6 +68,7 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
#include <asm/emulate_prefix.h>
#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
@@ -5492,6 +5493,7 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
int handle_ud(struct kvm_vcpu *vcpu)
{
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
int emul_type = EMULTYPE_TRAP_UD;
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
@@ -5499,7 +5501,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
if (force_emulation_prefix &&
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
sig, sizeof(sig), &e) == 0 &&
memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
emul_type = EMULTYPE_TRAP_UD_FORCED;
}

View File

@@ -13,6 +13,8 @@
#include <asm/inat.h>
#include <asm/insn.h>
#include <asm/emulate_prefix.h>
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
@@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
insn->addr_bytes = 4;
}
static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
static int __insn_get_emulate_prefix(struct insn *insn,
const insn_byte_t *prefix, size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
goto err_out;
}
insn->emulate_prefix_size = len;
insn->next_byte += len;
return 1;
err_out:
return 0;
}
static void insn_get_emulate_prefix(struct insn *insn)
{
if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
return;
__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
}
/**
* insn_get_prefixes - scan x86 instruction prefix bytes
* @insn: &struct insn containing instruction
@@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn)
if (prefixes->got)
return;
insn_get_emulate_prefix(insn);
nb = 0;
lb = 0;
b = peek_next(insn_byte_t, insn);

View File

@@ -333,7 +333,7 @@ AVXcode: 1
06: CLTS
07: SYSRET (o64)
08: INVD
09: WBINVD
09: WBINVD | WBNOINVD (F3)
0a:
0b: UD2 (1B)
0c:
@@ -364,7 +364,7 @@ AVXcode: 1
# a ModR/M byte.
1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c:
1c: Grp20 (1A),(1C)
1d:
1e:
1f: NOP Ev
@@ -792,6 +792,8 @@ f3: Grp17 (1A)
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
f9: MOVDIRI My,Gy
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
@@ -943,9 +945,9 @@ GrpTable: Grp6
EndTable
GrpTable: Grp7
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5: rdpkru (110),(11B) | wrpkru (111),(11B)
@@ -1020,7 +1022,7 @@ GrpTable: Grp15
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@@ -1051,6 +1053,10 @@ GrpTable: Grp19
6: vscatterpf1qps/d Wx (66),(ev)
EndTable
GrpTable: Grp20
0: cldemote Mb
EndTable
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH

View File

@@ -1818,6 +1818,14 @@ union security_list_options {
void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
#endif /* CONFIG_BPF_SYSCALL */
int (*locked_down)(enum lockdown_reason what);
#ifdef CONFIG_PERF_EVENTS
int (*perf_event_open)(struct perf_event_attr *attr, int type);
int (*perf_event_alloc)(struct perf_event *event);
void (*perf_event_free)(struct perf_event *event);
int (*perf_event_read)(struct perf_event *event);
int (*perf_event_write)(struct perf_event *event);
#endif
};
struct security_hook_heads {
@@ -2060,6 +2068,13 @@ struct security_hook_heads {
struct hlist_head bpf_prog_free_security;
#endif /* CONFIG_BPF_SYSCALL */
struct hlist_head locked_down;
#ifdef CONFIG_PERF_EVENTS
struct hlist_head perf_event_open;
struct hlist_head perf_event_alloc;
struct hlist_head perf_event_free;
struct hlist_head perf_event_read;
struct hlist_head perf_event_write;
#endif
} __randomize_layout;
/*

Some files were not shown because too many files have changed in this diff Show More