mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'kvm-x86-misc-6.11' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.11 - Add a global struct to consolidate tracking of host values, e.g. EFER, and move "shadow_phys_bits" into the structure as "maxphyaddr". - Add KVM_CAP_X86_APIC_BUS_CYCLES_NS to allow configuring the effective APIC bus frequency, because TDX. - Print the name of the APICv/AVIC inhibits in the relevant tracepoint. - Clean up KVM's handling of vendor specific emulation to consistently act on "compatible with Intel/AMD", versus checking for a specific vendor. - Misc cleanups
This commit is contained in:
@@ -6483,9 +6483,12 @@ More architecture-specific flags detailing state of the VCPU that may
|
||||
affect the device's behavior. Current defined flags::
|
||||
|
||||
/* x86, set if the VCPU is in system management mode */
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
/* x86, set if bus lock detected in VM */
|
||||
#define KVM_RUN_BUS_LOCK (1 << 1)
|
||||
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
|
||||
/* x86, set if the VCPU is executing a nested (L2) guest */
|
||||
#define KVM_RUN_X86_GUEST_MODE (1 << 2)
|
||||
|
||||
/* arm64, set for KVM_EXIT_DEBUG */
|
||||
#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0)
|
||||
|
||||
@@ -7831,29 +7834,31 @@ Valid bits in args[0] are::
|
||||
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
|
||||
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
|
||||
|
||||
Enabling this capability on a VM provides userspace with a way to select
|
||||
a policy to handle the bus locks detected in guest. Userspace can obtain
|
||||
the supported modes from the result of KVM_CHECK_EXTENSION and define it
|
||||
through the KVM_ENABLE_CAP.
|
||||
Enabling this capability on a VM provides userspace with a way to select a
|
||||
policy to handle the bus locks detected in guest. Userspace can obtain the
|
||||
supported modes from the result of KVM_CHECK_EXTENSION and define it through
|
||||
the KVM_ENABLE_CAP. The supported modes are mutually-exclusive.
|
||||
|
||||
KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported
|
||||
currently and mutually exclusive with each other. More bits can be added in
|
||||
the future.
|
||||
This capability allows userspace to force VM exits on bus locks detected in the
|
||||
guest, irrespective whether or not the host has enabled split-lock detection
|
||||
(which triggers an #AC exception that KVM intercepts). This capability is
|
||||
intended to mitigate attacks where a malicious/buggy guest can exploit bus
|
||||
locks to degrade the performance of the whole system.
|
||||
|
||||
With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits
|
||||
so that no additional actions are needed. This is the default mode.
|
||||
If KVM_BUS_LOCK_DETECTION_OFF is set, KVM doesn't force guest bus locks to VM
|
||||
exit, although the host kernel's split-lock #AC detection still applies, if
|
||||
enabled.
|
||||
|
||||
With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected
|
||||
in VM. KVM just exits to userspace when handling them. Userspace can enforce
|
||||
its own throttling or other policy based mitigations.
|
||||
If KVM_BUS_LOCK_DETECTION_EXIT is set, KVM enables a CPU feature that ensures
|
||||
bus locks in the guest trigger a VM exit, and KVM exits to userspace for all
|
||||
such VM exits, e.g. to allow userspace to throttle the offending guest and/or
|
||||
apply some other policy-based mitigation. When exiting to userspace, KVM sets
|
||||
KVM_RUN_X86_BUS_LOCK in vcpu-run->flags, and conditionally sets the exit_reason
|
||||
to KVM_EXIT_X86_BUS_LOCK.
|
||||
|
||||
This capability is aimed to address the thread that VM can exploit bus locks to
|
||||
degree the performance of the whole system. Once the userspace enable this
|
||||
capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the
|
||||
KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning
|
||||
the bus lock vm exit can be preempted by a higher priority VM exit, the exit
|
||||
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
|
||||
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
||||
Note! Detected bus locks may be coincident with other exits to userspace, i.e.
|
||||
KVM_RUN_X86_BUS_LOCK should be checked regardless of the primary exit reason if
|
||||
userspace wants to take action on all detected bus locks.
|
||||
|
||||
7.23 KVM_CAP_PPC_DAWR1
|
||||
----------------------
|
||||
@@ -8137,6 +8142,37 @@ error/annotated fault.
|
||||
|
||||
See KVM_EXIT_MEMORY_FAULT for more information.
|
||||
|
||||
7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS
|
||||
-----------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] is the desired APIC bus clock rate, in nanoseconds
|
||||
:Returns: 0 on success, -EINVAL if args[0] contains an invalid value for the
|
||||
frequency or if any vCPUs have been created, -ENXIO if a virtual
|
||||
local APIC has not been created using KVM_CREATE_IRQCHIP.
|
||||
|
||||
This capability sets the VM's APIC bus clock frequency, used by KVM's in-kernel
|
||||
virtual APIC when emulating APIC timers. KVM's default value can be retrieved
|
||||
by KVM_CHECK_EXTENSION.
|
||||
|
||||
Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the
|
||||
core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest.
|
||||
|
||||
7.36 KVM_CAP_X86_GUEST_MODE
|
||||
------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.
|
||||
|
||||
The presence of this capability indicates that KVM_RUN will update the
|
||||
KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
|
||||
vCPU was executing nested guest code when it exited.
|
||||
|
||||
KVM exits with the register state of either the L1 or L2 guest
|
||||
depending on which executed at the time of an exit. Userspace must
|
||||
take care to differentiate between these cases.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
|
||||
@@ -1208,7 +1208,7 @@ enum kvm_apicv_inhibit {
|
||||
* APIC acceleration is disabled by a module parameter
|
||||
* and/or not supported in hardware.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_DISABLE,
|
||||
APICV_INHIBIT_REASON_DISABLED,
|
||||
|
||||
/*
|
||||
* APIC acceleration is inhibited because AutoEOI feature is
|
||||
@@ -1278,8 +1278,27 @@ enum kvm_apicv_inhibit {
|
||||
* mapping between logical ID and vCPU.
|
||||
*/
|
||||
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
|
||||
|
||||
NR_APICV_INHIBIT_REASONS,
|
||||
};
|
||||
|
||||
#define __APICV_INHIBIT_REASON(reason) \
|
||||
{ BIT(APICV_INHIBIT_REASON_##reason), #reason }
|
||||
|
||||
#define APICV_INHIBIT_REASONS \
|
||||
__APICV_INHIBIT_REASON(DISABLED), \
|
||||
__APICV_INHIBIT_REASON(HYPERV), \
|
||||
__APICV_INHIBIT_REASON(ABSENT), \
|
||||
__APICV_INHIBIT_REASON(BLOCKIRQ), \
|
||||
__APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
|
||||
__APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
|
||||
__APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
|
||||
__APICV_INHIBIT_REASON(NESTED), \
|
||||
__APICV_INHIBIT_REASON(IRQWIN), \
|
||||
__APICV_INHIBIT_REASON(PIT_REINJ), \
|
||||
__APICV_INHIBIT_REASON(SEV), \
|
||||
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long n_used_mmu_pages;
|
||||
unsigned long n_requested_mmu_pages;
|
||||
@@ -1365,6 +1384,7 @@ struct kvm_arch {
|
||||
|
||||
u32 default_tsc_khz;
|
||||
bool user_set_tsc;
|
||||
u64 apic_bus_cycle_ns;
|
||||
|
||||
seqcount_raw_spinlock_t pvclock_sc;
|
||||
bool use_master_clock;
|
||||
@@ -1709,7 +1729,6 @@ struct kvm_x86_ops {
|
||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
|
||||
const unsigned long required_apicv_inhibits;
|
||||
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
|
||||
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
||||
@@ -1855,7 +1874,6 @@ struct kvm_arch_async_pf {
|
||||
};
|
||||
|
||||
extern u32 __read_mostly kvm_nr_uret_msrs;
|
||||
extern u64 __read_mostly host_efer;
|
||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||
extern bool __read_mostly enable_apicv;
|
||||
extern struct kvm_x86_ops kvm_x86_ops;
|
||||
|
||||
@@ -106,6 +106,7 @@ struct kvm_ioapic_state {
|
||||
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
|
||||
#define KVM_RUN_X86_GUEST_MODE (1 << 2)
|
||||
|
||||
/* for KVM_GET_REGS and KVM_SET_REGS */
|
||||
struct kvm_regs {
|
||||
|
||||
@@ -335,6 +335,18 @@ static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *entry;
|
||||
|
||||
entry = kvm_find_cpuid_entry(vcpu, 0);
|
||||
if (!entry)
|
||||
return false;
|
||||
|
||||
return is_guest_vendor_amd(entry->ebx, entry->ecx, entry->edx) ||
|
||||
is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx);
|
||||
}
|
||||
|
||||
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
@@ -102,24 +102,6 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
|
||||
*reg &= ~__feature_bit(x86_feature);
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0);
|
||||
return best &&
|
||||
(is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
|
||||
is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, 0);
|
||||
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
|
||||
}
|
||||
|
||||
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.is_amd_compatible;
|
||||
|
||||
@@ -2354,50 +2354,6 @@ setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
|
||||
ss->avl = 0;
|
||||
}
|
||||
|
||||
static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
eax = ecx = 0;
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
||||
return is_guest_vendor_intel(ebx, ecx, edx);
|
||||
}
|
||||
|
||||
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
/*
|
||||
* syscall should always be enabled in longmode - so only become
|
||||
* vendor specific (cpuid) if other modes are active...
|
||||
*/
|
||||
if (ctxt->mode == X86EMUL_MODE_PROT64)
|
||||
return true;
|
||||
|
||||
eax = 0x00000000;
|
||||
ecx = 0x00000000;
|
||||
ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
||||
/*
|
||||
* remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
|
||||
* 64bit guest with a 32bit compat-app running will #UD !! While this
|
||||
* behaviour can be fixed (by emulating) into AMD response - CPUs of
|
||||
* AMD can't behave like Intel.
|
||||
*/
|
||||
if (is_guest_vendor_intel(ebx, ecx, edx))
|
||||
return false;
|
||||
|
||||
if (is_guest_vendor_amd(ebx, ecx, edx) ||
|
||||
is_guest_vendor_hygon(ebx, ecx, edx))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* default: (not Intel, not AMD, not Hygon), apply Intel's
|
||||
* stricter rules...
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
static int em_syscall(struct x86_emulate_ctxt *ctxt)
|
||||
{
|
||||
const struct x86_emulate_ops *ops = ctxt->ops;
|
||||
@@ -2411,7 +2367,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
|
||||
ctxt->mode == X86EMUL_MODE_VM86)
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
if (!(em_syscall_is_enabled(ctxt)))
|
||||
/*
|
||||
* Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
|
||||
* AMD allows SYSCALL in any flavor of protected mode. Note, it's
|
||||
* infeasible to emulate Intel behavior when running on AMD hardware,
|
||||
* as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
|
||||
* for KVM to trap-and-emulate, unlike emulating AMD on Intel.
|
||||
*/
|
||||
if (ctxt->mode != X86EMUL_MODE_PROT64 &&
|
||||
ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
ops->get_msr(ctxt, MSR_EFER, &efer);
|
||||
@@ -2471,11 +2435,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
|
||||
return emulate_gp(ctxt, 0);
|
||||
|
||||
/*
|
||||
* Not recognized on AMD in compat mode (but is recognized in legacy
|
||||
* mode).
|
||||
* Intel's architecture allows SYSENTER in compatibility mode, but AMD
|
||||
* does not. Note, AMD does allow SYSENTER in legacy protected mode.
|
||||
*/
|
||||
if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
|
||||
&& !vendor_intel(ctxt))
|
||||
if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
|
||||
!ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
|
||||
return emulate_ud(ctxt);
|
||||
|
||||
/* sysenter/sysexit have not been tested in 64bit mode. */
|
||||
@@ -2647,7 +2611,14 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
|
||||
* manner when ECX is zero due to REP-string optimizations.
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
if (ctxt->ad_bytes != 4)
|
||||
return;
|
||||
|
||||
eax = ecx = 0;
|
||||
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
|
||||
if (!is_guest_vendor_intel(ebx, ecx, edx))
|
||||
return;
|
||||
|
||||
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
|
||||
|
||||
@@ -1737,7 +1737,8 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
|
||||
data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
|
||||
break;
|
||||
case HV_X64_MSR_APIC_FREQUENCY:
|
||||
data = APIC_BUS_FREQUENCY;
|
||||
data = div64_u64(1000000000ULL,
|
||||
vcpu->kvm->arch.apic_bus_cycle_ns);
|
||||
break;
|
||||
default:
|
||||
kvm_pr_unimpl_rdmsr(vcpu, msr);
|
||||
|
||||
@@ -223,6 +223,7 @@ struct x86_emulate_ops {
|
||||
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
|
||||
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
|
||||
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
|
||||
bool (*guest_cpuid_is_intel_compatible)(struct x86_emulate_ctxt *ctxt);
|
||||
|
||||
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
|
||||
|
||||
|
||||
@@ -1557,7 +1557,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
|
||||
remaining = 0;
|
||||
|
||||
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
|
||||
return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count));
|
||||
return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
|
||||
apic->divide_count));
|
||||
}
|
||||
|
||||
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
|
||||
@@ -1973,7 +1974,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
|
||||
|
||||
static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
|
||||
{
|
||||
return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
|
||||
return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
|
||||
(u64)apic->divide_count;
|
||||
}
|
||||
|
||||
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
#define APIC_DEST_NOSHORT 0x0
|
||||
#define APIC_DEST_MASK 0x800
|
||||
|
||||
#define APIC_BUS_CYCLE_NS 1
|
||||
#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
|
||||
#define APIC_BUS_CYCLE_NS_DEFAULT 1
|
||||
|
||||
#define APIC_BROADCAST 0xFF
|
||||
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
||||
|
||||
@@ -57,12 +57,6 @@ static __always_inline u64 rsvd_bits(int s, int e)
|
||||
return ((2ULL << (e - s)) - 1) << s;
|
||||
}
|
||||
|
||||
/*
|
||||
* The number of non-reserved physical address bits irrespective of features
|
||||
* that repurpose legal bits, e.g. MKTME.
|
||||
*/
|
||||
extern u8 __read_mostly shadow_phys_bits;
|
||||
|
||||
static inline gfn_t kvm_mmu_max_gfn(void)
|
||||
{
|
||||
/*
|
||||
@@ -76,30 +70,11 @@ static inline gfn_t kvm_mmu_max_gfn(void)
|
||||
* than hardware's real MAXPHYADDR. Using the host MAXPHYADDR
|
||||
* disallows such SPTEs entirely and simplifies the TDP MMU.
|
||||
*/
|
||||
int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52;
|
||||
int max_gpa_bits = likely(tdp_enabled) ? kvm_host.maxphyaddr : 52;
|
||||
|
||||
return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
|
||||
}
|
||||
|
||||
static inline u8 kvm_get_shadow_phys_bits(void)
|
||||
{
|
||||
/*
|
||||
* boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
|
||||
* in CPU detection code, but the processor treats those reduced bits as
|
||||
* 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
|
||||
* the physical address bits reported by CPUID.
|
||||
*/
|
||||
if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
|
||||
return cpuid_eax(0x80000008) & 0xff;
|
||||
|
||||
/*
|
||||
* Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
|
||||
* custom CPUID. Proceed with whatever the kernel found since these features
|
||||
* aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
|
||||
*/
|
||||
return boot_cpu_data.x86_phys_bits;
|
||||
}
|
||||
|
||||
u8 kvm_mmu_get_max_tdp_level(void);
|
||||
|
||||
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
|
||||
|
||||
@@ -5109,7 +5109,7 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
|
||||
|
||||
static inline u64 reserved_hpa_bits(void)
|
||||
{
|
||||
return rsvd_bits(shadow_phys_bits, 63);
|
||||
return rsvd_bits(kvm_host.maxphyaddr, 63);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -43,7 +43,25 @@ u64 __read_mostly shadow_acc_track_mask;
|
||||
u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
|
||||
u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
|
||||
|
||||
u8 __read_mostly shadow_phys_bits;
|
||||
static u8 __init kvm_get_host_maxphyaddr(void)
|
||||
{
|
||||
/*
|
||||
* boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
|
||||
* in CPU detection code, but the processor treats those reduced bits as
|
||||
* 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
|
||||
* the physical address bits reported by CPUID, i.e. the raw MAXPHYADDR,
|
||||
* when reasoning about CPU behavior with respect to MAXPHYADDR.
|
||||
*/
|
||||
if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
|
||||
return cpuid_eax(0x80000008) & 0xff;
|
||||
|
||||
/*
|
||||
* Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
|
||||
* custom CPUID. Proceed with whatever the kernel found since these features
|
||||
* aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
|
||||
*/
|
||||
return boot_cpu_data.x86_phys_bits;
|
||||
}
|
||||
|
||||
void __init kvm_mmu_spte_module_init(void)
|
||||
{
|
||||
@@ -55,6 +73,8 @@ void __init kvm_mmu_spte_module_init(void)
|
||||
* will change when the vendor module is (re)loaded.
|
||||
*/
|
||||
allow_mmio_caching = enable_mmio_caching;
|
||||
|
||||
kvm_host.maxphyaddr = kvm_get_host_maxphyaddr();
|
||||
}
|
||||
|
||||
static u64 generation_mmio_spte_mask(u64 gen)
|
||||
@@ -441,8 +461,6 @@ void kvm_mmu_reset_all_pte_masks(void)
|
||||
u8 low_phys_bits;
|
||||
u64 mask;
|
||||
|
||||
shadow_phys_bits = kvm_get_shadow_phys_bits();
|
||||
|
||||
/*
|
||||
* If the CPU has 46 or less physical address bits, then set an
|
||||
* appropriate mask to guard against L1TF attacks. Otherwise, it is
|
||||
@@ -494,7 +512,7 @@ void kvm_mmu_reset_all_pte_masks(void)
|
||||
* 52-bit physical addresses then there are no reserved PA bits in the
|
||||
* PTEs and so the reserved PA approach must be disabled.
|
||||
*/
|
||||
if (shadow_phys_bits < 52)
|
||||
if (kvm_host.maxphyaddr < 52)
|
||||
mask = BIT_ULL(51) | PT_PRESENT_MASK;
|
||||
else
|
||||
mask = 0;
|
||||
|
||||
@@ -194,7 +194,7 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
|
||||
attr.sample_period = get_sample_period(pmc, pmc->counter);
|
||||
|
||||
if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
|
||||
guest_cpuid_is_intel(pmc->vcpu)) {
|
||||
(boot_cpu_has(X86_FEATURE_RTM) || boot_cpu_has(X86_FEATURE_HLE))) {
|
||||
/*
|
||||
* HSW_IN_TX_CHECKPOINTED is not supported with nonzero
|
||||
* period. Just clear the sample period so at least
|
||||
|
||||
@@ -4401,9 +4401,9 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
|
||||
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
|
||||
* by common SVM code).
|
||||
*/
|
||||
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
|
||||
hostsa->xcr0 = kvm_host.xcr0;
|
||||
hostsa->pkru = read_pkru();
|
||||
hostsa->xss = host_xss;
|
||||
hostsa->xss = kvm_host.xss;
|
||||
|
||||
/*
|
||||
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "svm_onhyperv.h"
|
||||
|
||||
MODULE_AUTHOR("Qumranet");
|
||||
MODULE_DESCRIPTION("KVM support for SVM (AMD-V) extensions");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
#ifdef MODULE
|
||||
@@ -1202,7 +1203,7 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
if (guest_cpuid_is_intel(vcpu)) {
|
||||
if (guest_cpuid_is_intel_compatible(vcpu)) {
|
||||
/*
|
||||
* We must intercept SYSENTER_EIP and SYSENTER_ESP
|
||||
* accesses because the processor only stores 32 bits.
|
||||
@@ -2890,12 +2891,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_EIP:
|
||||
msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
|
||||
if (guest_cpuid_is_intel(vcpu))
|
||||
if (guest_cpuid_is_intel_compatible(vcpu))
|
||||
msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
|
||||
if (guest_cpuid_is_intel(vcpu))
|
||||
if (guest_cpuid_is_intel_compatible(vcpu))
|
||||
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
|
||||
break;
|
||||
case MSR_TSC_AUX:
|
||||
@@ -3122,11 +3123,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
|
||||
* 32 bit part of these msrs to support Intel's
|
||||
* implementation of SYSENTER/SYSEXIT.
|
||||
*/
|
||||
svm->sysenter_eip_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
|
||||
svm->sysenter_eip_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
|
||||
break;
|
||||
case MSR_IA32_SYSENTER_ESP:
|
||||
svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
|
||||
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
|
||||
svm->sysenter_esp_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
|
||||
break;
|
||||
case MSR_TSC_AUX:
|
||||
/*
|
||||
@@ -4387,11 +4388,11 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
|
||||
|
||||
/*
|
||||
* Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
|
||||
* Intercept VMLOAD if the vCPU model is Intel in order to emulate that
|
||||
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
|
||||
* SVM on Intel is bonkers and extremely unlikely to work).
|
||||
*/
|
||||
if (!guest_cpuid_is_intel(vcpu))
|
||||
if (!guest_cpuid_is_intel_compatible(vcpu))
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
|
||||
|
||||
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
|
||||
|
||||
@@ -668,7 +668,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
|
||||
/* avic.c */
|
||||
#define AVIC_REQUIRED_APICV_INHIBITS \
|
||||
( \
|
||||
BIT(APICV_INHIBIT_REASON_DISABLE) | \
|
||||
BIT(APICV_INHIBIT_REASON_DISABLED) | \
|
||||
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
||||
BIT(APICV_INHIBIT_REASON_NESTED) | \
|
||||
|
||||
@@ -1375,6 +1375,10 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
|
||||
__entry->vcpu_id, __entry->timer_index)
|
||||
);
|
||||
|
||||
#define kvm_print_apicv_inhibit_reasons(inhibits) \
|
||||
(inhibits), (inhibits) ? " " : "", \
|
||||
(inhibits) ? __print_flags(inhibits, "|", APICV_INHIBIT_REASONS) : ""
|
||||
|
||||
TRACE_EVENT(kvm_apicv_inhibit_changed,
|
||||
TP_PROTO(int reason, bool set, unsigned long inhibits),
|
||||
TP_ARGS(reason, set, inhibits),
|
||||
@@ -1391,9 +1395,10 @@ TRACE_EVENT(kvm_apicv_inhibit_changed,
|
||||
__entry->inhibits = inhibits;
|
||||
),
|
||||
|
||||
TP_printk("%s reason=%u, inhibits=0x%lx",
|
||||
TP_printk("%s reason=%u, inhibits=0x%lx%s%s",
|
||||
__entry->set ? "set" : "cleared",
|
||||
__entry->reason, __entry->inhibits)
|
||||
__entry->reason,
|
||||
kvm_print_apicv_inhibit_reasons(__entry->inhibits))
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_apicv_accept_irq,
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include "posted_intr.h"
|
||||
|
||||
#define VMX_REQUIRED_APICV_INHIBITS \
|
||||
(BIT(APICV_INHIBIT_REASON_DISABLE)| \
|
||||
(BIT(APICV_INHIBIT_REASON_DISABLED) | \
|
||||
BIT(APICV_INHIBIT_REASON_ABSENT) | \
|
||||
BIT(APICV_INHIBIT_REASON_HYPERV) | \
|
||||
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
|
||||
|
||||
@@ -2425,7 +2425,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
|
||||
if (cpu_has_load_ia32_efer()) {
|
||||
if (guest_efer & EFER_LMA)
|
||||
exec_control |= VM_ENTRY_IA32E_MODE;
|
||||
if (guest_efer != host_efer)
|
||||
if (guest_efer != kvm_host.efer)
|
||||
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
|
||||
}
|
||||
vm_entry_controls_set(vmx, exec_control);
|
||||
@@ -2438,7 +2438,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
|
||||
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
|
||||
*/
|
||||
exec_control = __vm_exit_controls_get(vmcs01);
|
||||
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
|
||||
if (cpu_has_load_ia32_efer() && guest_efer != kvm_host.efer)
|
||||
exec_control |= VM_EXIT_LOAD_IA32_EFER;
|
||||
else
|
||||
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
|
||||
@@ -4665,7 +4665,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
|
||||
return vmcs_read64(GUEST_IA32_EFER);
|
||||
|
||||
if (cpu_has_load_ia32_efer())
|
||||
return host_efer;
|
||||
return kvm_host.efer;
|
||||
|
||||
for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
|
||||
if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
|
||||
@@ -4676,7 +4676,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
|
||||
if (efer_msr)
|
||||
return efer_msr->data;
|
||||
|
||||
return host_efer;
|
||||
return kvm_host.efer;
|
||||
}
|
||||
|
||||
static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user