Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Fix the handling of the phycal timer offset when FEAT_ECV and
     CNTPOFF_EL2 are implemented

   - Restore the functionnality of Permission Indirection that was
     broken by the Fine Grained Trapping rework

   - Cleanup some PMU event sharing code

  MIPS:

   - Fix W=1 build

  s390:

   - One small fix for gisa to avoid stalls

  x86:

   - Truncate writes to PMU counters to the counter's width to avoid
     spurious overflows when emulating counter events in software

   - Set the LVTPC entry mask bit when handling a PMI (to match
     Intel-defined architectural behavior)

   - Treat KVM_REQ_PMI as a wake event instead of queueing host IRQ work
     to kick the guest out of emulated halt

   - Fix for loading XSAVE state from an old kernel into a new one

   - Fixes for AMD AVIC

  selftests:

   - Play nice with %llx when formatting guest printf and assert
     statements

   - Clean up stale test metadata

   - Zero-initialize structures in memslot perf test to workaround a
     suspected 'may be used uninitialized' false positives from GCC"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
  KVM: arm64: timers: Correctly handle TGE flip with CNTPOFF_EL2
  KVM: arm64: POR{E0}_EL1 do not need trap handlers
  KVM: arm64: Add nPIR{E0}_EL1 to HFG traps
  KVM: MIPS: fix -Wunused-but-set-variable warning
  KVM: arm64: pmu: Drop redundant check for non-NULL kvm_pmu_events
  KVM: SVM: Fix build error when using -Werror=unused-but-set-variable
  x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested()
  x86: KVM: SVM: add support for Invalid IPI Vector interception
  x86: KVM: SVM: always update the x2avic msr interception
  KVM: selftests: Force load all supported XSAVE state in state test
  KVM: selftests: Load XSAVE state into untouched vCPU during state test
  KVM: selftests: Touch relevant XSAVE state in guest for state test
  KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2}
  x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer
  KVM: selftests: Zero-initialize entire test_result in memslot perf test
  KVM: selftests: Remove obsolete and incorrect test case metadata
  KVM: selftests: Treat %llx like %lx when formatting guest printf
  KVM: x86/pmu: Synthesize at most one PMI per VM-exit
  KVM: x86: Mask LVTPC when handling a PMI
  KVM: x86/pmu: Truncate counter value to allowed width on write
  ...
This commit is contained in:
Linus Torvalds
2023-10-16 18:34:17 -07:00
36 changed files with 276 additions and 120 deletions

View File

@@ -344,14 +344,14 @@
*/
#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
#define __HFGWTR_EL2_MASK GENMASK(49, 0)
#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
#define __HFGITR_EL2_RES0 GENMASK(63, 57)
#define __HFGITR_EL2_MASK GENMASK(54, 0)

View File

@@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
static bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
@@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}
static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (vcpu_has_nv(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
@@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);
if (!has_cntpoff())
cval -= timer_get_offset(ctx);
cval -= timer_get_offset(ctx);
timer_set_cval(ctx, cval);
@@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
if (!has_cntpoff())
cval += offset;
cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);

View File

@@ -977,6 +977,8 @@ enum fg_filter_id {
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),

View File

@@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
___activate_traps(vcpu);
if (has_cntpoff()) {
struct timer_map map;
get_timer_map(vcpu, &map);
/*
* We're entrering the guest. Reload the correct
* values from memory now that TGE is clear.
*/
if (map.direct_ptimer == vcpu_ptimer(vcpu))
val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
if (map.direct_ptimer == vcpu_hptimer(vcpu))
val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
if (map.direct_ptimer) {
write_sysreg_el0(val, SYS_CNTP_CVAL);
isb();
}
}
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
if (has_cntpoff()) {
struct timer_map map;
u64 val, offset;
get_timer_map(vcpu, &map);
/*
* We're exiting the guest. Save the latest CVAL value
* to memory and apply the offset now that TGE is set.
*/
val = read_sysreg_el0(SYS_CNTP_CVAL);
if (map.direct_ptimer == vcpu_ptimer(vcpu))
__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
if (map.direct_ptimer == vcpu_hptimer(vcpu))
__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
offset = read_sysreg_s(SYS_CNTPOFF_EL2);
if (map.direct_ptimer && offset) {
write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
isb();
}
}
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by

View File

@@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr))
if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr))
return;
if (!attr->exclude_host)
@@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
if (!kvm_arm_support_pmu_v3() || !pmu)
if (!kvm_arm_support_pmu_v3())
return;
pmu->events_host &= ~clr;

View File

@@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },

View File

@@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
gfn_t gfn = gpa >> PAGE_SHIFT;
int srcu_idx, err;
kvm_pfn_t pfn;
pte_t *ptep, entry, old_pte;
pte_t *ptep, entry;
bool writeable;
unsigned long prot_bits;
unsigned long mmu_seq;
@@ -664,7 +664,6 @@ retry:
entry = pfn_pte(pfn, __pgprot(prot_bits));
/* Write the PTE */
old_pte = *ptep;
set_pte(ptep, entry);
err = 0;

View File

@@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
return 0;
}
static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
{
return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa);
}
static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
{
set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
@@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
if (!gi->origin)
return;
if (gi->alert.mask)
KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x",
kvm, gi->alert.mask);
while (gisa_in_alert_list(gi->origin))
cpu_relax();
WARN(gi->alert.mask != 0x00,
"unexpected non zero alert.mask 0x%02x",
gi->alert.mask);
gi->alert.mask = 0x00;
if (gisa_set_iam(gi->origin, gi->alert.mask))
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);

View File

@@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) {
static inline void fpu_sync_guest_vmexit_xfd_state(void) { }
#endif
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
unsigned int size, u64 xfeatures, u32 pkru);
extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
static inline void fpstate_set_confidential(struct fpu_guest *gfpu)

View File

@@ -528,7 +528,6 @@ struct kvm_pmu {
u64 raw_event_mask;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be

View File

@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
AVIC_IPI_FAILURE_INVALID_TARGET,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)

View File

@@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
unsigned int size, u32 pkru)
unsigned int size, u64 xfeatures, u32 pkru)
{
struct fpstate *kstate = gfpu->fpstate;
union fpregs_state *ustate = buf;
struct membuf mb = { .p = buf, .left = size };
if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
__copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
__copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru,
XSTATE_COPY_XSAVE);
} else {
memcpy(&ustate->fxsave, &kstate->regs.fxsave,
sizeof(ustate->fxsave));

View File

@@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
* @fpstate: The fpstate buffer from which to copy
* @xfeatures: The mask of xfeatures to save (XSAVE mode only)
* @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
@@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
* It supports partial copy but @to.pos always starts from zero.
*/
void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode)
u64 xfeatures, u32 pkru_val,
enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
@@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
break;
case XSTATE_COPY_XSAVE:
header.xfeatures &= fpstate->user_xfeatures;
header.xfeatures &= fpstate->user_xfeatures & xfeatures;
break;
}
@@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
tsk->thread.fpu.fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1536,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
if (!guest_fpu)
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */

View File

@@ -43,7 +43,8 @@ enum xstate_copy_mode {
struct membuf;
extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
u64 xfeatures, u32 pkru_val,
enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);

View File

@@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 =
cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
/*
* FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
* XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
* supported by the host.
*/
vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
XFEATURE_MASK_FPSSE;
kvm_update_pv_runtime(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);

View File

@@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
{
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
int vector, mode, trig_mode;
int r;
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
vector = reg & APIC_VECTOR_MASK;
mode = reg & APIC_MODE_MASK;
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
NULL);
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
if (r && lvt_type == APIC_LVTPC)
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
return 0;
}

View File

@@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
{
struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
kvm_pmu_deliver_pmi(vcpu);
}
static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
if (!pmc->intr || skip_pmi)
return;
/*
* Inject PMI. If vcpu was in a guest mode during NMI PMI
* can be ejected on a guest mode re-entry. Otherwise we can't
* be sure that vcpu wasn't executing hlt instruction at the
* time of vmexit and is not going to re-enter guest mode until
* woken up. So we should wake it, but this is impossible from
* NMI context. Do it from irq work instead.
*/
if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
else
if (pmc->intr && !skip_pmi)
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}
@@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
irq_work_sync(&pmu->irq_work);
static_call(kvm_x86_pmu_reset)(vcpu);
}
@@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
pmu->event_count = 0;
pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);

View File

@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
{
pmc->counter += val - pmc_read_counter(pmc);
pmc->counter &= pmc_bitmask(pmc);
}
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {

View File

@@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR:
/* Invalid IPI with vector < 16 */
break;
default:
pr_err("Unknown IPI interception\n");
vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n");
}
return 1;

View File

@@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
nested_svm_uninit_mmu_context(vcpu);
vmcb_mark_all_dirty(svm->vmcb);
if (kvm_apicv_activated(vcpu->kvm))
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);

Some files were not shown because too many files have changed in this diff Show More