mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull x86 kvm updates from Paolo Bonzini:
"x86:
- KVM currently invalidates the entirety of the page tables, not just
those for the memslot being touched, when a memslot is moved or
deleted.
This does not traditionally have particularly noticeable overhead,
but Intel's TDX will require the guest to re-accept private pages
if they are dropped from the secure EPT, which is a non starter.
Actually, the only reason why this is not already being done is a
bug which was never fully investigated and caused VM instability
with assigned GeForce GPUs, so allow userspace to opt into the new
behavior.
- Advertise AVX10.1 to userspace (effectively prep work for the
"real" AVX10 functionality that is on the horizon)
- Rework common MSR handling code to suppress errors on userspace
accesses to unsupported-but-advertised MSRs
This will allow removing (almost?) all of KVM's exemptions for
userspace access to MSRs that shouldn't exist based on the vCPU
model (the actual cleanup is non-trivial future work)
- Rework KVM's handling of x2APIC ICR, again, because AMD (x2AVIC)
splits the 64-bit value into the legacy ICR and ICR2 storage,
whereas Intel (APICv) stores the entire 64-bit value at the ICR
offset
- Fix a bug where KVM would fail to exit to userspace if one was
triggered by a fastpath exit handler
- Add fastpath handling of HLT VM-Exit to expedite re-entering the
guest when there's already a pending wake event at the time of the
exit
- Fix a WARN caused by RSM entering a nested guest from SMM with
invalid guest state, by forcing the vCPU out of guest mode prior to
signalling SHUTDOWN (the SHUTDOWN hits the VM altogether, not the
nested guest)
- Overhaul the "unprotect and retry" logic to more precisely identify
cases where retrying is actually helpful, and to harden all retry
paths against putting the guest into an infinite retry loop
- Add support for yielding, e.g. to honor NEED_RESCHED, when zapping
rmaps in the shadow MMU
- Refactor pieces of the shadow MMU related to aging SPTEs in
prepartion for adding multi generation LRU support in KVM
- Don't stuff the RSB after VM-Exit when RETPOLINE=y and AutoIBRS is
enabled, i.e. when the CPU has already flushed the RSB
- Trace the per-CPU host save area as a VMCB pointer to improve
readability and cleanup the retrieval of the SEV-ES host save area
- Remove unnecessary accounting of temporary nested VMCB related
allocations
- Set FINAL/PAGE in the page fault error code for EPT violations if
and only if the GVA is valid. If the GVA is NOT valid, there is no
guest-side page table walk and so stuffing paging related metadata
is nonsensical
- Fix a bug where KVM would incorrectly synthesize a nested VM-Exit
instead of emulating posted interrupt delivery to L2
- Add a lockdep assertion to detect unsafe accesses of vmcs12
structures
- Harden eVMCS loading against an impossible NULL pointer deref
(really truly should be impossible)
- Minor SGX fix and a cleanup
- Misc cleanups
Generic:
- Register KVM's cpuhp and syscore callbacks when enabling
virtualization in hardware, as the sole purpose of said callbacks
is to disable and re-enable virtualization as needed
- Enable virtualization when KVM is loaded, not right before the
first VM is created
Together with the previous change, this simplifies a lot the logic
of the callbacks, because their very existence implies
virtualization is enabled
- Fix a bug that results in KVM prematurely exiting to userspace for
coalesced MMIO/PIO in many cases, clean up the related code, and
add a testcase
- Fix a bug in kvm_clear_guest() where it would trigger a buffer
overflow _if_ the gpa+len crosses a page boundary, which thankfully
is guaranteed to not happen in the current code base. Add WARNs in
more helpers that read/write guest memory to detect similar bugs
Selftests:
- Fix a goof that caused some Hyper-V tests to be skipped when run on
bare metal, i.e. NOT in a VM
- Add a regression test for KVM's handling of SHUTDOWN for an SEV-ES
guest
- Explicitly include one-off assets in .gitignore. Past Sean was
completely wrong about not being able to detect missing .gitignore
entries
- Verify userspace single-stepping works when KVM happens to handle a
VM-Exit in its fastpath
- Misc cleanups"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (127 commits)
Documentation: KVM: fix warning in "make htmldocs"
s390: Enable KVM_S390_UCONTROL config in debug_defconfig
selftests: kvm: s390: Add VM run test case
KVM: SVM: let alternatives handle the cases when RSB filling is required
KVM: VMX: Set PFERR_GUEST_{FINAL,PAGE}_MASK if and only if the GVA is valid
KVM: x86/mmu: Use KVM_PAGES_PER_HPAGE() instead of an open coded equivalent
KVM: x86/mmu: Add KVM_RMAP_MANY to replace open coded '1' and '1ul' literals
KVM: x86/mmu: Fold mmu_spte_age() into kvm_rmap_age_gfn_range()
KVM: x86/mmu: Morph kvm_handle_gfn_range() into an aging specific helper
KVM: x86/mmu: Honor NEED_RESCHED when zapping rmaps and blocking is allowed
KVM: x86/mmu: Add a helper to walk and zap rmaps for a memslot
KVM: x86/mmu: Plumb a @can_yield parameter into __walk_slot_rmaps()
KVM: x86/mmu: Move walk_slot_rmaps() up near for_each_slot_rmap_range()
KVM: x86/mmu: WARN on MMIO cache hit when emulating write-protected gfn
KVM: x86/mmu: Detect if unprotect will do anything based on invalid_list
KVM: x86/mmu: Subsume kvm_mmu_unprotect_page() into the and_retry() version
KVM: x86: Rename reexecute_instruction()=>kvm_unprotect_and_retry_on_failure()
KVM: x86: Update retry protection fields when forcing retry on emulation failure
KVM: x86: Apply retry protection to "unprotect on failure" path
KVM: x86: Check EMULTYPE_WRITE_PF_TO_SP before unprotecting gfn
...
This commit is contained in:
@@ -2677,6 +2677,23 @@
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
kvm.enable_virt_at_load=[KVM,ARM64,LOONGARCH,MIPS,RISCV,X86]
|
||||
If enabled, KVM will enable virtualization in hardware
|
||||
when KVM is loaded, and disable virtualization when KVM
|
||||
is unloaded (if KVM is built as a module).
|
||||
|
||||
If disabled, KVM will dynamically enable and disable
|
||||
virtualization on-demand when creating and destroying
|
||||
VMs, i.e. on the 0=>1 and 1=>0 transitions of the
|
||||
number of VMs.
|
||||
|
||||
Enabling virtualization at module lode avoids potential
|
||||
latency for creation of the 0=>1 VM, as KVM serializes
|
||||
virtualization enabling across all online CPUs. The
|
||||
"cost" of enabling virtualization when KVM is loaded,
|
||||
is that doing so may interfere with using out-of-tree
|
||||
hypervisors that want to "own" virtualization hardware.
|
||||
|
||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||
Default is false (don't support).
|
||||
|
||||
|
||||
@@ -4214,7 +4214,9 @@ whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is
|
||||
enabled. If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace
|
||||
on denied accesses, i.e. userspace effectively intercepts the MSR access. If
|
||||
KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest
|
||||
on denied accesses.
|
||||
on denied accesses. Note, if an MSR access is denied during emulation of MSR
|
||||
load/stores during VMX transitions, KVM ignores KVM_MSR_EXIT_REASON_FILTER.
|
||||
See the below warning for full details.
|
||||
|
||||
If an MSR access is allowed by userspace, KVM will emulate and/or virtualize
|
||||
the access in accordance with the vCPU model. Note, KVM may still ultimately
|
||||
@@ -4229,9 +4231,22 @@ filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
|
||||
an error.
|
||||
|
||||
.. warning::
|
||||
MSR accesses as part of nested VM-Enter/VM-Exit are not filtered.
|
||||
This includes both writes to individual VMCS fields and reads/writes
|
||||
through the MSR lists pointed to by the VMCS.
|
||||
MSR accesses that are side effects of instruction execution (emulated or
|
||||
native) are not filtered as hardware does not honor MSR bitmaps outside of
|
||||
RDMSR and WRMSR, and KVM mimics that behavior when emulating instructions
|
||||
to avoid pointless divergence from hardware. E.g. RDPID reads MSR_TSC_AUX,
|
||||
SYSENTER reads the SYSENTER MSRs, etc.
|
||||
|
||||
MSRs that are loaded/stored via dedicated VMCS fields are not filtered as
|
||||
part of VM-Enter/VM-Exit emulation.
|
||||
|
||||
MSRs that are loaded/store via VMX's load/store lists _are_ filtered as part
|
||||
of VM-Enter/VM-Exit emulation. If an MSR access is denied on VM-Enter, KVM
|
||||
synthesizes a consistency check VM-Exit(EXIT_REASON_MSR_LOAD_FAIL). If an
|
||||
MSR access is denied on VM-Exit, KVM synthesizes a VM-Abort. In short, KVM
|
||||
extends Intel's architectural list of MSRs that cannot be loaded/saved via
|
||||
the VM-Enter/VM-Exit MSR list. It is platform owner's responsibility to
|
||||
to communicate any such restrictions to their end users.
|
||||
|
||||
x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that
|
||||
cover any x2APIC MSRs).
|
||||
@@ -8082,6 +8097,14 @@ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if
|
||||
guest CPUID on writes to MISC_ENABLE if
|
||||
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is
|
||||
disabled.
|
||||
|
||||
KVM_X86_QUIRK_SLOT_ZAP_ALL By default, KVM invalidates all SPTEs in
|
||||
fast way for memslot deletion when VM type
|
||||
is KVM_X86_DEFAULT_VM.
|
||||
When this quirk is disabled or when VM type
|
||||
is other than KVM_X86_DEFAULT_VM, KVM zaps
|
||||
only leaf SPTEs that are within the range of
|
||||
the memslot being deleted.
|
||||
=================================== ============================================
|
||||
|
||||
7.32 KVM_CAP_MAX_VCPU_ID
|
||||
|
||||
@@ -11,6 +11,8 @@ The acquisition orders for mutexes are as follows:
|
||||
|
||||
- cpus_read_lock() is taken outside kvm_lock
|
||||
|
||||
- kvm_usage_lock is taken outside cpus_read_lock()
|
||||
|
||||
- kvm->lock is taken outside vcpu->mutex
|
||||
|
||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||
@@ -24,6 +26,13 @@ The acquisition orders for mutexes are as follows:
|
||||
are taken on the waiting side when modifying memslots, so MMU notifiers
|
||||
must not take either kvm->slots_lock or kvm->slots_arch_lock.
|
||||
|
||||
cpus_read_lock() vs kvm_lock:
|
||||
|
||||
- Taking cpus_read_lock() outside of kvm_lock is problematic, despite that
|
||||
being the official ordering, as it is quite easy to unknowingly trigger
|
||||
cpus_read_lock() while holding kvm_lock. Use caution when walking vm_list,
|
||||
e.g. avoid complex operations when possible.
|
||||
|
||||
For SRCU:
|
||||
|
||||
- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections
|
||||
@@ -227,10 +236,16 @@ time it will be set using the Dirty tracking mechanism described above.
|
||||
:Type: mutex
|
||||
:Arch: any
|
||||
:Protects: - vm_list
|
||||
- kvm_usage_count
|
||||
|
||||
``kvm_usage_lock``
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
:Type: mutex
|
||||
:Arch: any
|
||||
:Protects: - kvm_usage_count
|
||||
- hardware virtualization enable/disable
|
||||
:Comment: KVM also disables CPU hotplug via cpus_read_lock() during
|
||||
enable/disable.
|
||||
:Comment: Exists to allow taking cpus_read_lock() while kvm_usage_count is
|
||||
protected, which simplifies the virtualization enabling logic.
|
||||
|
||||
``kvm->mn_invalidate_lock``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@@ -290,11 +305,12 @@ time it will be set using the Dirty tracking mechanism described above.
|
||||
wakeup.
|
||||
|
||||
``vendor_module_lock``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
:Type: mutex
|
||||
:Arch: x86
|
||||
:Protects: loading a vendor module (kvm_amd or kvm_intel)
|
||||
:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is
|
||||
taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and
|
||||
many operations need to take cpu_hotplug_lock when loading a vendor module,
|
||||
e.g. updating static calls.
|
||||
:Comment: Exists because using kvm_lock leads to deadlock. kvm_lock is taken
|
||||
in notifiers, e.g. __kvmclock_cpufreq_notifier(), that may be invoked while
|
||||
cpu_hotplug_lock is held, e.g. from cpufreq_boost_trigger_state(), and many
|
||||
operations need to take cpu_hotplug_lock when loading a vendor module, e.g.
|
||||
updating static calls.
|
||||
|
||||
@@ -2164,7 +2164,7 @@ static void cpu_hyp_uninit(void *discard)
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
/*
|
||||
* Most calls to this function are made with migration
|
||||
@@ -2184,7 +2184,7 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_timer_cpu_down();
|
||||
kvm_vgic_cpu_down();
|
||||
@@ -2380,7 +2380,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
|
||||
|
||||
/*
|
||||
* The stub hypercalls are now disabled, so set our local flag to
|
||||
* prevent a later re-init attempt in kvm_arch_hardware_enable().
|
||||
* prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu().
|
||||
*/
|
||||
__this_cpu_write(kvm_hyp_initialized, 1);
|
||||
preempt_enable();
|
||||
|
||||
@@ -261,7 +261,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
unsigned long env, gcfg = 0;
|
||||
|
||||
@@ -300,7 +300,7 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
write_csr_gcfg(0);
|
||||
write_csr_gstat(0);
|
||||
|
||||
@@ -728,8 +728,8 @@ struct kvm_mips_callbacks {
|
||||
int (*handle_fpe)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
|
||||
int (*handle_guest_exit)(struct kvm_vcpu *vcpu);
|
||||
int (*hardware_enable)(void);
|
||||
void (*hardware_disable)(void);
|
||||
int (*enable_virtualization_cpu)(void);
|
||||
void (*disable_virtualization_cpu)(void);
|
||||
int (*check_extension)(struct kvm *kvm, long ext);
|
||||
int (*vcpu_init)(struct kvm_vcpu *vcpu);
|
||||
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
|
||||
|
||||
@@ -125,14 +125,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
return kvm_mips_callbacks->hardware_enable();
|
||||
return kvm_mips_callbacks->enable_virtualization_cpu();
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_mips_callbacks->hardware_disable();
|
||||
kvm_mips_callbacks->disable_virtualization_cpu();
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
@@ -2869,7 +2869,7 @@ static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size)
|
||||
return ret + 1;
|
||||
}
|
||||
|
||||
static int kvm_vz_hardware_enable(void)
|
||||
static int kvm_vz_enable_virtualization_cpu(void)
|
||||
{
|
||||
unsigned int mmu_size, guest_mmu_size, ftlb_size;
|
||||
u64 guest_cvmctl, cvmvmconfig;
|
||||
@@ -2983,7 +2983,7 @@ static int kvm_vz_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_vz_hardware_disable(void)
|
||||
static void kvm_vz_disable_virtualization_cpu(void)
|
||||
{
|
||||
u64 cvmvmconfig;
|
||||
unsigned int mmu_size;
|
||||
@@ -3280,8 +3280,8 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = {
|
||||
.handle_msa_disabled = kvm_trap_vz_handle_msa_disabled,
|
||||
.handle_guest_exit = kvm_trap_vz_handle_guest_exit,
|
||||
|
||||
.hardware_enable = kvm_vz_hardware_enable,
|
||||
.hardware_disable = kvm_vz_hardware_disable,
|
||||
.enable_virtualization_cpu = kvm_vz_enable_virtualization_cpu,
|
||||
.disable_virtualization_cpu = kvm_vz_disable_virtualization_cpu,
|
||||
.check_extension = kvm_vz_check_extension,
|
||||
.vcpu_init = kvm_vz_vcpu_init,
|
||||
.vcpu_uninit = kvm_vz_vcpu_uninit,
|
||||
|
||||
@@ -20,7 +20,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int kvm_arch_hardware_enable(void)
|
||||
int kvm_arch_enable_virtualization_cpu(void)
|
||||
{
|
||||
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
|
||||
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
|
||||
@@ -35,7 +35,7 @@ int kvm_arch_hardware_enable(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_hardware_disable(void)
|
||||
void kvm_arch_disable_virtualization_cpu(void)
|
||||
{
|
||||
kvm_riscv_aia_disable();
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ CONFIG_CMM=m
|
||||
CONFIG_APPLDATA_BASE=y
|
||||
CONFIG_S390_HYPFS_FS=y
|
||||
CONFIG_KVM=m
|
||||
CONFIG_KVM_S390_UCONTROL=y
|
||||
CONFIG_S390_UNWIND_SELFTEST=m
|
||||
CONFIG_S390_KPROBES_SANITY_TEST=m
|
||||
CONFIG_S390_MODULES_SANITY_TEST=m
|
||||
|
||||
@@ -348,20 +348,29 @@ static inline int plo_test_bit(unsigned char nr)
|
||||
return cc == 0;
|
||||
}
|
||||
|
||||
static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
|
||||
static __always_inline void __sortl_query(u8 (*query)[32])
|
||||
{
|
||||
asm volatile(
|
||||
" lghi 0,0\n"
|
||||
" lgr 1,%[query]\n"
|
||||
" la 1,%[query]\n"
|
||||
/* Parameter registers are ignored */
|
||||
" .insn rrf,%[opc] << 16,2,4,6,0\n"
|
||||
" .insn rre,0xb9380000,2,4\n"
|
||||
: [query] "=R" (*query)
|
||||
:
|
||||
: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
|
||||
: "cc", "memory", "0", "1");
|
||||
: "cc", "0", "1");
|
||||
}
|
||||
|
||||
#define INSN_SORTL 0xb938
|
||||
#define INSN_DFLTCC 0xb939
|
||||
static __always_inline void __dfltcc_query(u8 (*query)[32])
|
||||
{
|
||||
asm volatile(
|
||||
" lghi 0,0\n"
|
||||
" la 1,%[query]\n"
|
||||
/* Parameter registers are ignored */
|
||||
" .insn rrf,0xb9390000,2,4,6,0\n"
|
||||
: [query] "=R" (*query)
|
||||
:
|
||||
: "cc", "0", "1");
|
||||
}
|
||||
|
||||
static void __init kvm_s390_cpu_feat_init(void)
|
||||
{
|
||||
@@ -415,10 +424,10 @@ static void __init kvm_s390_cpu_feat_init(void)
|
||||
kvm_s390_available_subfunc.kdsa);
|
||||
|
||||
if (test_facility(150)) /* SORTL */
|
||||
__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
|
||||
__sortl_query(&kvm_s390_available_subfunc.sortl);
|
||||
|
||||
if (test_facility(151)) /* DFLTCC */
|
||||
__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
|
||||
__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
|
||||
|
||||
if (MACHINE_HAS_ESOP)
|
||||
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
|
||||
|
||||
@@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x24:
|
||||
case 0x8000001d:
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -14,8 +14,8 @@ BUILD_BUG_ON(1)
|
||||
* be __static_call_return0.
|
||||
*/
|
||||
KVM_X86_OP(check_processor_compatibility)
|
||||
KVM_X86_OP(hardware_enable)
|
||||
KVM_X86_OP(hardware_disable)
|
||||
KVM_X86_OP(enable_virtualization_cpu)
|
||||
KVM_X86_OP(disable_virtualization_cpu)
|
||||
KVM_X86_OP(hardware_unsetup)
|
||||
KVM_X86_OP(has_emulated_msr)
|
||||
KVM_X86_OP(vcpu_after_set_cpuid)
|
||||
@@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
|
||||
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
|
||||
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
|
||||
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
|
||||
KVM_X86_OP(get_msr_feature)
|
||||
KVM_X86_OP(get_feature_msr)
|
||||
KVM_X86_OP(check_emulate_instruction)
|
||||
KVM_X86_OP(apic_init_signal_blocked)
|
||||
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <asm/kvm_page_track.h>
|
||||
#include <asm/kvm_vcpu_regs.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
|
||||
|
||||
@@ -211,6 +212,7 @@ enum exit_fastpath_completion {
|
||||
EXIT_FASTPATH_NONE,
|
||||
EXIT_FASTPATH_REENTER_GUEST,
|
||||
EXIT_FASTPATH_EXIT_HANDLED,
|
||||
EXIT_FASTPATH_EXIT_USERSPACE,
|
||||
};
|
||||
typedef enum exit_fastpath_completion fastpath_t;
|
||||
|
||||
@@ -280,10 +282,6 @@ enum x86_intercept_stage;
|
||||
#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
|
||||
#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
|
||||
|
||||
#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
|
||||
PFERR_WRITE_MASK | \
|
||||
PFERR_PRESENT_MASK)
|
||||
|
||||
/* apic attention bits */
|
||||
#define KVM_APIC_CHECK_VAPIC 0
|
||||
/*
|
||||
@@ -1629,8 +1627,10 @@ struct kvm_x86_ops {
|
||||
|
||||
int (*check_processor_compatibility)(void);
|
||||
|
||||
int (*hardware_enable)(void);
|
||||
void (*hardware_disable)(void);
|
||||
int (*enable_virtualization_cpu)(void);
|
||||
void (*disable_virtualization_cpu)(void);
|
||||
cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
|
||||
|
||||
void (*hardware_unsetup)(void);
|
||||
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
|
||||
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
|
||||
@@ -1727,6 +1727,8 @@ struct kvm_x86_ops {
|
||||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
|
||||
const bool x2apic_icr_is_split;
|
||||
const unsigned long required_apicv_inhibits;
|
||||
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
|
||||
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
||||
@@ -1806,7 +1808,7 @@ struct kvm_x86_ops {
|
||||
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
|
||||
void (*guest_memory_reclaimed)(struct kvm *kvm);
|
||||
|
||||
int (*get_msr_feature)(struct kvm_msr_entry *entry);
|
||||
int (*get_feature_msr)(u32 msr, u64 *data);
|
||||
|
||||
int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
|
||||
void *insn, int insn_len);
|
||||
@@ -2060,6 +2062,8 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_enable_efer_bits(u64);
|
||||
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
|
||||
int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
|
||||
int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
|
||||
int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
|
||||
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
|
||||
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
|
||||
@@ -2136,7 +2140,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
|
||||
|
||||
void kvm_update_dr7(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
|
||||
bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
bool always_retry);
|
||||
|
||||
static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
|
||||
gpa_t cr2_or_gpa)
|
||||
{
|
||||
return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
|
||||
}
|
||||
|
||||
void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
|
||||
ulong roots_to_free);
|
||||
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
|
||||
@@ -2254,6 +2266,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
|
||||
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_has_extint(struct kvm_vcpu *v);
|
||||
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
|
||||
int kvm_cpu_get_extint(struct kvm_vcpu *v);
|
||||
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
|
||||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
|
||||
|
||||
@@ -2345,7 +2358,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
|
||||
KVM_X86_QUIRK_OUT_7E_INC_RIP | \
|
||||
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
|
||||
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
|
||||
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
|
||||
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
|
||||
KVM_X86_QUIRK_SLOT_ZAP_ALL)
|
||||
|
||||
/*
|
||||
* KVM previously used a u32 field in kvm_run to indicate the hypercall was
|
||||
|
||||
@@ -36,6 +36,20 @@
|
||||
#define EFER_FFXSR (1<<_EFER_FFXSR)
|
||||
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
|
||||
|
||||
/*
|
||||
* Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc.
|
||||
* Most MSRs support/allow only a subset of memory types, but the values
|
||||
* themselves are common across all relevant MSRs.
|
||||
*/
|
||||
#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */
|
||||
#define X86_MEMTYPE_WC 1ull /* Write Combining */
|
||||
/* RESERVED 2 */
|
||||
/* RESERVED 3 */
|
||||
#define X86_MEMTYPE_WT 4ull /* Write Through */
|
||||
#define X86_MEMTYPE_WP 5ull /* Write Protected */
|
||||
#define X86_MEMTYPE_WB 6ull /* Write Back */
|
||||
#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */
|
||||
|
||||
/* FRED MSRs */
|
||||
#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
|
||||
#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
|
||||
@@ -365,6 +379,12 @@
|
||||
|
||||
#define MSR_IA32_CR_PAT 0x00000277
|
||||
|
||||
#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \
|
||||
((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \
|
||||
(X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \
|
||||
(X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \
|
||||
(X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56))
|
||||
|
||||
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
|
||||
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
|
||||
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
|
||||
@@ -1159,15 +1179,6 @@
|
||||
#define MSR_IA32_VMX_VMFUNC 0x00000491
|
||||
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
|
||||
|
||||
/* VMX_BASIC bits and bitmasks */
|
||||
#define VMX_BASIC_VMCS_SIZE_SHIFT 32
|
||||
#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
|
||||
#define VMX_BASIC_64 0x0001000000000000LLU
|
||||
#define VMX_BASIC_MEM_TYPE_SHIFT 50
|
||||
#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
|
||||
#define VMX_BASIC_MEM_TYPE_WB 6LLU
|
||||
#define VMX_BASIC_INOUT 0x0040000000000000LLU
|
||||
|
||||
/* Resctrl MSRs: */
|
||||
/* - Intel: */
|
||||
#define MSR_IA32_L3_QOS_CFG 0xc81
|
||||
@@ -1185,11 +1196,6 @@
|
||||
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
|
||||
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
|
||||
|
||||
/* MSR_IA32_VMX_MISC bits */
|
||||
#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
|
||||
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
|
||||
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
|
||||
|
||||
/* AMD-V MSRs */
|
||||
#define MSR_VM_CR 0xc0010114
|
||||
#define MSR_VM_IGNNE 0xc0010115
|
||||
|
||||
@@ -25,8 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
|
||||
#define MRR_BIOS 0
|
||||
#define MRR_APM 1
|
||||
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
|
||||
typedef void (cpu_emergency_virt_cb)(void);
|
||||
#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
|
||||
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
|
||||
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
|
||||
void cpu_emergency_disable_virtualization(void);
|
||||
|
||||
@@ -516,6 +516,20 @@ struct ghcb {
|
||||
u32 ghcb_usage;
|
||||
} __packed;
|
||||
|
||||
struct vmcb {
|
||||
struct vmcb_control_area control;
|
||||
union {
|
||||
struct vmcb_save_area save;
|
||||
|
||||
/*
|
||||
* For SEV-ES VMs, the save area in the VMCB is used only to
|
||||
* save/load host state. Guest state resides in a separate
|
||||
* page, the aptly named VM Save Area (VMSA), that is encrypted
|
||||
* with the guest's private key.
|
||||
*/
|
||||
struct sev_es_save_area host_sev_es_save;
|
||||
};
|
||||
} __packed;
|
||||
|
||||
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
|
||||
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
|
||||
@@ -532,6 +546,7 @@ static inline void __unused_size_checks(void)
|
||||
BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
|
||||
BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
|
||||
|
||||
/* Check offsets of reserved fields */
|
||||
@@ -568,11 +583,6 @@ static inline void __unused_size_checks(void)
|
||||
BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
|
||||
}
|
||||
|
||||
struct vmcb {
|
||||
struct vmcb_control_area control;
|
||||
struct vmcb_save_area save;
|
||||
} __packed;
|
||||
|
||||
#define SVM_CPUID_FUNC 0x8000000a
|
||||
|
||||
#define SVM_SELECTOR_S_SHIFT 4
|
||||
|
||||
@@ -122,19 +122,17 @@
|
||||
|
||||
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
|
||||
|
||||
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
|
||||
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
|
||||
#define VMX_MISC_ACTIVITY_HLT 0x00000040
|
||||
#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100
|
||||
#define VMX_MISC_ZERO_LEN_INS 0x40000000
|
||||
#define VMX_MISC_MSR_LIST_MULTIPLIER 512
|
||||
|
||||
/* VMFUNC functions */
|
||||
#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28)
|
||||
|
||||
#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
|
||||
#define VMFUNC_EPTP_ENTRIES 512
|
||||
|
||||
#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48)
|
||||
#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
|
||||
#define VMX_BASIC_INOUT BIT_ULL(54)
|
||||
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
|
||||
|
||||
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
|
||||
{
|
||||
return vmx_basic & GENMASK_ULL(30, 0);
|
||||
@@ -145,9 +143,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
|
||||
return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
|
||||
}
|
||||
|
||||
static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic)
|
||||
{
|
||||
return (vmx_basic & GENMASK_ULL(53, 50)) >> 50;
|
||||
}
|
||||
|
||||
static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype)
|
||||
{
|
||||
return revision | ((u64)size << 32) | ((u64)memtype << 50);
|
||||
}
|
||||
|
||||
#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5)
|
||||
#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6)
|
||||
#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7)
|
||||
#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8)
|
||||
#define VMX_MISC_INTEL_PT BIT_ULL(14)
|
||||
#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15)
|
||||
#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28)
|
||||
#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29)
|
||||
#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30)
|
||||
#define VMX_MISC_MSR_LIST_MULTIPLIER 512
|
||||
|
||||
static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
|
||||
{
|
||||
return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
|
||||
return vmx_misc & GENMASK_ULL(4, 0);
|
||||
}
|
||||
|
||||
static inline int vmx_misc_cr3_count(u64 vmx_misc)
|
||||
@@ -508,9 +527,10 @@ enum vmcs_field {
|
||||
#define VMX_EPTP_PWL_4 0x18ull
|
||||
#define VMX_EPTP_PWL_5 0x20ull
|
||||
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
|
||||
/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */
|
||||
#define VMX_EPTP_MT_MASK 0x7ull
|
||||
#define VMX_EPTP_MT_WB 0x6ull
|
||||
#define VMX_EPTP_MT_UC 0x0ull
|
||||
#define VMX_EPTP_MT_WB X86_MEMTYPE_WB
|
||||
#define VMX_EPTP_MT_UC X86_MEMTYPE_UC
|
||||
#define VMX_EPT_READABLE_MASK 0x1ull
|
||||
#define VMX_EPT_WRITABLE_MASK 0x2ull
|
||||
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
|
||||
|
||||
@@ -439,6 +439,7 @@ struct kvm_sync_regs {
|
||||
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
|
||||
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
|
||||
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
|
||||
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
|
||||
|
||||
#define KVM_STATE_NESTED_FORMAT_VMX 0
|
||||
#define KVM_STATE_NESTED_FORMAT_SVM 1
|
||||
|
||||
@@ -55,6 +55,12 @@
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE);
|
||||
static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB);
|
||||
static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH);
|
||||
static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT);
|
||||
static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK);
|
||||
|
||||
/* arch_phys_wc_add returns an MTRR register index plus this offset. */
|
||||
#define MTRR_TO_PHYS_WC_OFFSET 1000
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user