mirror of
https://github.com/AtlasLinux/linux.git
synced 2026-02-02 15:22:09 -08:00
Merge branch 'guest-memfd-mmap' into HEAD
Add support for host userspace mapping of guest_memfd-backed memory for VM types that do NOT use support KVM_MEMORY_ATTRIBUTE_PRIVATE (which isn't precisely the same thing as CoCo VMs, since x86's SEV-MEM and SEV-ES have no way to detect private vs. shared). mmap() support paves the way for several evolving KVM use cases: * Allows VMMs like Firecracker to run guests entirely backed by guest_memfd [1]. This provides a unified memory management model for both confidential and non-confidential guests, simplifying VMM design. * Enhanced Security via direct map removal: When combined with Patrick's series for direct map removal [2], this provides additional hardening against Spectre-like transient execution attacks by eliminating the need for host kernel direct maps of guest memory. * Lays the groundwork for *restricted* mmap() support for guest_memfd-backed memory on CoCo platforms [3] that permit in-place sharing of guest memory with the host. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
@@ -6414,6 +6414,15 @@ most one mapping per page, i.e. binding multiple memory regions to a single
|
||||
guest_memfd range is not allowed (any number of memory regions can be bound to
|
||||
a single guest_memfd file, but the bound ranges must not overlap).
|
||||
|
||||
When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field
|
||||
supports GUEST_MEMFD_FLAG_MMAP. Setting this flag on guest_memfd creation
|
||||
enables mmap() and faulting of guest_memfd memory to host userspace.
|
||||
|
||||
When the KVM MMU performs a PFN lookup to service a guest fault and the backing
|
||||
guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be
|
||||
consumed from guest_memfd, regardless of whether it is a shared or a private
|
||||
fault.
|
||||
|
||||
See KVM_SET_USER_MEMORY_REGION2 for additional details.
|
||||
|
||||
4.143 KVM_PRE_FAULT_MEMORY
|
||||
|
||||
@@ -37,6 +37,7 @@ menuconfig KVM
|
||||
select HAVE_KVM_VCPU_RUN_PID_CHANGE
|
||||
select SCHED_INFO
|
||||
select GUEST_PERF_EVENTS if PERF_EVENTS
|
||||
select KVM_GUEST_MEMFD
|
||||
help
|
||||
Support hosting virtualized guest machines.
|
||||
|
||||
|
||||
@@ -1477,13 +1477,132 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
|
||||
}
|
||||
}
|
||||
|
||||
static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache,
|
||||
void **memcache)
|
||||
{
|
||||
int min_pages;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
*memcache = &vcpu->arch.mmu_page_cache;
|
||||
else
|
||||
*memcache = &vcpu->arch.pkvm_memcache;
|
||||
|
||||
if (!topup_memcache)
|
||||
return 0;
|
||||
|
||||
min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return kvm_mmu_topup_memory_cache(*memcache, min_pages);
|
||||
|
||||
return topup_hyp_memcache(*memcache, min_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* Potentially reduce shadow S2 permissions to match the guest's own S2. For
|
||||
* exec faults, we'd only reach this point if the guest actually allowed it (see
|
||||
* kvm_s2_handle_perm_fault).
|
||||
*
|
||||
* Also encode the level of the original translation in the SW bits of the leaf
|
||||
* entry as a proxy for the span of that translation. This will be retrieved on
|
||||
* TLB invalidation from the guest and used to limit the invalidation scope if a
|
||||
* TTL hint or a range isn't provided.
|
||||
*/
|
||||
static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
|
||||
enum kvm_pgtable_prot *prot,
|
||||
bool *writable)
|
||||
{
|
||||
*writable &= kvm_s2_trans_writable(nested);
|
||||
if (!kvm_s2_trans_readable(nested))
|
||||
*prot &= ~KVM_PGTABLE_PROT_R;
|
||||
|
||||
*prot |= kvm_encode_nested_level(nested);
|
||||
}
|
||||
|
||||
#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED)
|
||||
|
||||
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_s2_trans *nested,
|
||||
struct kvm_memory_slot *memslot, bool is_perm)
|
||||
{
|
||||
bool write_fault, exec_fault, writable;
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
|
||||
unsigned long mmu_seq;
|
||||
struct page *page;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
void *memcache;
|
||||
kvm_pfn_t pfn;
|
||||
gfn_t gfn;
|
||||
int ret;
|
||||
|
||||
ret = prepare_mmu_memcache(vcpu, true, &memcache);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (nested)
|
||||
gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT;
|
||||
else
|
||||
gfn = fault_ipa >> PAGE_SHIFT;
|
||||
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
|
||||
VM_WARN_ON_ONCE(write_fault && exec_fault);
|
||||
|
||||
mmu_seq = kvm->mmu_invalidate_seq;
|
||||
/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
|
||||
smp_rmb();
|
||||
|
||||
ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL);
|
||||
if (ret) {
|
||||
kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE,
|
||||
write_fault, exec_fault, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
writable = !(memslot->flags & KVM_MEM_READONLY);
|
||||
|
||||
if (nested)
|
||||
adjust_nested_fault_perms(nested, &prot, &writable);
|
||||
|
||||
if (writable)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
|
||||
if (exec_fault ||
|
||||
(cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
|
||||
(!nested || kvm_s2_trans_executable(nested))))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
kvm_fault_lock(kvm);
|
||||
if (mmu_invalidate_retry(kvm, mmu_seq)) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE,
|
||||
__pfn_to_phys(pfn), prot,
|
||||
memcache, flags);
|
||||
|
||||
out_unlock:
|
||||
kvm_release_faultin_page(kvm, page, !!ret, writable);
|
||||
kvm_fault_unlock(kvm);
|
||||
|
||||
if (writable && !ret)
|
||||
mark_page_dirty_in_slot(kvm, memslot, gfn);
|
||||
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_s2_trans *nested,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
bool fault_is_perm)
|
||||
{
|
||||
int ret = 0;
|
||||
bool write_fault, writable, force_pte = false;
|
||||
bool topup_memcache;
|
||||
bool write_fault, writable;
|
||||
bool exec_fault, mte_allowed, is_vma_cacheable;
|
||||
bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
|
||||
unsigned long mmu_seq;
|
||||
@@ -1495,28 +1614,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
gfn_t gfn;
|
||||
kvm_pfn_t pfn;
|
||||
bool logging_active = memslot_is_logging(memslot);
|
||||
bool force_pte = logging_active;
|
||||
long vma_pagesize, fault_granule;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
|
||||
struct kvm_pgtable *pgt;
|
||||
struct page *page;
|
||||
vm_flags_t vm_flags;
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;
|
||||
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS;
|
||||
|
||||
if (fault_is_perm)
|
||||
fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
|
||||
write_fault = kvm_is_write_fault(vcpu);
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_BUG_ON(write_fault && exec_fault);
|
||||
|
||||
if (fault_is_perm && !write_fault && !exec_fault) {
|
||||
kvm_err("Unexpected L2 read permission error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
memcache = &vcpu->arch.mmu_page_cache;
|
||||
else
|
||||
memcache = &vcpu->arch.pkvm_memcache;
|
||||
VM_WARN_ON_ONCE(write_fault && exec_fault);
|
||||
|
||||
/*
|
||||
* Permission faults just need to update the existing leaf entry,
|
||||
@@ -1524,17 +1634,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (!fault_is_perm || (logging_active && write_fault)) {
|
||||
int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
|
||||
else
|
||||
ret = topup_hyp_memcache(memcache, min_pages);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
topup_memcache = !fault_is_perm || (logging_active && write_fault);
|
||||
ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Let's check if we will get back a huge page backed by hugetlbfs, or
|
||||
@@ -1548,16 +1651,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* logging_active is guaranteed to never be true for VM_PFNMAP
|
||||
* memslots.
|
||||
*/
|
||||
if (logging_active) {
|
||||
force_pte = true;
|
||||
if (force_pte)
|
||||
vma_shift = PAGE_SHIFT;
|
||||
} else {
|
||||
else
|
||||
vma_shift = get_vma_page_shift(vma, hva);
|
||||
}
|
||||
|
||||
switch (vma_shift) {
|
||||
#ifndef __PAGETABLE_PMD_FOLDED
|
||||
@@ -1609,7 +1706,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
max_map_size = PAGE_SIZE;
|
||||
|
||||
force_pte = (max_map_size == PAGE_SIZE);
|
||||
vma_pagesize = min(vma_pagesize, (long)max_map_size);
|
||||
vma_pagesize = min_t(long, vma_pagesize, max_map_size);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1642,7 +1739,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
|
||||
* with the smp_wmb() in kvm_mmu_invalidate_end().
|
||||
*/
|
||||
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
|
||||
mmu_seq = kvm->mmu_invalidate_seq;
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
|
||||
@@ -1698,24 +1795,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
if (exec_fault && s2_force_noncacheable)
|
||||
return -ENOEXEC;
|
||||
|
||||
/*
|
||||
* Potentially reduce shadow S2 permissions to match the guest's own
|
||||
* S2. For exec faults, we'd only reach this point if the guest
|
||||
* actually allowed it (see kvm_s2_handle_perm_fault).
|
||||
*
|
||||
* Also encode the level of the original translation in the SW bits
|
||||
* of the leaf entry as a proxy for the span of that translation.
|
||||
* This will be retrieved on TLB invalidation from the guest and
|
||||
* used to limit the invalidation scope if a TTL hint or a range
|
||||
* isn't provided.
|
||||
*/
|
||||
if (nested) {
|
||||
writable &= kvm_s2_trans_writable(nested);
|
||||
if (!kvm_s2_trans_readable(nested))
|
||||
prot &= ~KVM_PGTABLE_PROT_R;
|
||||
|
||||
prot |= kvm_encode_nested_level(nested);
|
||||
}
|
||||
if (nested)
|
||||
adjust_nested_fault_perms(nested, &prot, &writable);
|
||||
|
||||
kvm_fault_lock(kvm);
|
||||
pgt = vcpu->arch.hw_mmu->pgt;
|
||||
@@ -1981,8 +2062,15 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
|
||||
!write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
|
||||
|
||||
if (kvm_slot_has_gmem(memslot))
|
||||
ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
else
|
||||
ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
|
||||
esr_fsc_is_permission_fault(esr));
|
||||
if (ret == 0)
|
||||
ret = 1;
|
||||
out:
|
||||
@@ -2214,6 +2302,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
if ((new->base_gfn + new->npages) > (kvm_phys_size(&kvm->arch.mmu) >> PAGE_SHIFT))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Only support guest_memfd backed memslots with mappable memory, since
|
||||
* there aren't any CoCo VMs that support only private memory on arm64.
|
||||
*/
|
||||
if (kvm_slot_has_gmem(new) && !kvm_memslot_is_gmem_only(new))
|
||||
return -EINVAL;
|
||||
|
||||
hva = new->userspace_addr;
|
||||
reg_end = hva + (new->npages << PAGE_SHIFT);
|
||||
|
||||
|
||||
@@ -1172,8 +1172,9 @@ static u64 read_vncr_el2(struct kvm_vcpu *vcpu)
|
||||
return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48);
|
||||
}
|
||||
|
||||
static int kvm_translate_vncr(struct kvm_vcpu *vcpu)
|
||||
static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
bool write_fault, writable;
|
||||
unsigned long mmu_seq;
|
||||
struct vncr_tlb *vt;
|
||||
@@ -1216,10 +1217,25 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu)
|
||||
smp_rmb();
|
||||
|
||||
gfn = vt->wr.pa >> PAGE_SHIFT;
|
||||
pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page);
|
||||
if (is_error_noslot_pfn(pfn) || (write_fault && !writable))
|
||||
memslot = gfn_to_memslot(vcpu->kvm, gfn);
|
||||
if (!memslot)
|
||||
return -EFAULT;
|
||||
|
||||
*is_gmem = kvm_slot_has_gmem(memslot);
|
||||
if (!*is_gmem) {
|
||||
pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
|
||||
&writable, &page);
|
||||
if (is_error_noslot_pfn(pfn) || (write_fault && !writable))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
ret = kvm_gmem_get_pfn(vcpu->kvm, memslot, gfn, &pfn, &page, NULL);
|
||||
if (ret) {
|
||||
kvm_prepare_memory_fault_exit(vcpu, vt->wr.pa, PAGE_SIZE,
|
||||
write_fault, false, false);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
|
||||
if (mmu_invalidate_retry(vcpu->kvm, mmu_seq))
|
||||
return -EAGAIN;
|
||||
@@ -1292,23 +1308,36 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
|
||||
if (esr_fsc_is_permission_fault(esr)) {
|
||||
inject_vncr_perm(vcpu);
|
||||
} else if (esr_fsc_is_translation_fault(esr)) {
|
||||
bool valid;
|
||||
bool valid, is_gmem = false;
|
||||
int ret;
|
||||
|
||||
scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
|
||||
valid = kvm_vncr_tlb_lookup(vcpu);
|
||||
|
||||
if (!valid)
|
||||
ret = kvm_translate_vncr(vcpu);
|
||||
ret = kvm_translate_vncr(vcpu, &is_gmem);
|
||||
else
|
||||
ret = -EPERM;
|
||||
|
||||
switch (ret) {
|
||||
case -EAGAIN:
|
||||
case -ENOMEM:
|
||||
/* Let's try again... */
|
||||
break;
|
||||
case -ENOMEM:
|
||||
/*
|
||||
* For guest_memfd, this indicates that it failed to
|
||||
* create a folio to back the memory. Inform userspace.
|
||||
*/
|
||||
if (is_gmem)
|
||||
return 0;
|
||||
/* Otherwise, let's try again... */
|
||||
break;
|
||||
case -EFAULT:
|
||||
case -EIO:
|
||||
case -EHWPOISON:
|
||||
if (is_gmem)
|
||||
return 0;
|
||||
fallthrough;
|
||||
case -EINVAL:
|
||||
case -ENOENT:
|
||||
case -EACCES:
|
||||
|
||||
@@ -145,7 +145,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
|
||||
KVM_X86_OP_OPTIONAL(get_untagged_addr)
|
||||
KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
|
||||
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
|
||||
KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level)
|
||||
KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level)
|
||||
KVM_X86_OP_OPTIONAL(gmem_invalidate)
|
||||
|
||||
#undef KVM_X86_OP
|
||||
|
||||
@@ -1922,7 +1922,7 @@ struct kvm_x86_ops {
|
||||
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
|
||||
int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
|
||||
void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
|
||||
int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn);
|
||||
int (*gmem_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
|
||||
};
|
||||
|
||||
struct kvm_x86_nested_ops {
|
||||
@@ -2276,10 +2276,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
|
||||
int tdp_max_root_level, int tdp_huge_page_level);
|
||||
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
|
||||
#else
|
||||
#define kvm_arch_has_private_mem(kvm) false
|
||||
#endif
|
||||
|
||||
#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
|
||||
|
||||
@@ -46,8 +46,8 @@ config KVM_X86
|
||||
select HAVE_KVM_PM_NOTIFIER if PM
|
||||
select KVM_GENERIC_HARDWARE_ENABLING
|
||||
select KVM_GENERIC_PRE_FAULT_MEMORY
|
||||
select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM
|
||||
select KVM_WERROR if WERROR
|
||||
select KVM_GUEST_MEMFD if X86_64
|
||||
|
||||
config KVM
|
||||
tristate "Kernel-based Virtual Machine (KVM) support"
|
||||
@@ -74,7 +74,7 @@ config KVM_WERROR
|
||||
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
|
||||
# Building KVM with -Werror and KASAN is still doable via enabling
|
||||
# the kernel-wide WERROR=y.
|
||||
depends on KVM && ((EXPERT && !KASAN) || WERROR)
|
||||
depends on KVM_X86 && ((EXPERT && !KASAN) || WERROR)
|
||||
help
|
||||
Add -Werror to the build flags for KVM.
|
||||
|
||||
@@ -83,7 +83,8 @@ config KVM_WERROR
|
||||
config KVM_SW_PROTECTED_VM
|
||||
bool "Enable support for KVM software-protected VMs"
|
||||
depends on EXPERT
|
||||
depends on KVM && X86_64
|
||||
depends on KVM_X86 && X86_64
|
||||
select KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
help
|
||||
Enable support for KVM software-protected VMs. Currently, software-
|
||||
protected VMs are purely a development and testing vehicle for
|
||||
@@ -95,8 +96,6 @@ config KVM_SW_PROTECTED_VM
|
||||
config KVM_INTEL
|
||||
tristate "KVM for Intel (and compatible) processors support"
|
||||
depends on KVM && IA32_FEAT_CTL
|
||||
select KVM_GENERIC_PRIVATE_MEM if INTEL_TDX_HOST
|
||||
select KVM_GENERIC_MEMORY_ATTRIBUTES if INTEL_TDX_HOST
|
||||
help
|
||||
Provides support for KVM on processors equipped with Intel's VT
|
||||
extensions, a.k.a. Virtual Machine Extensions (VMX).
|
||||
@@ -135,6 +134,8 @@ config KVM_INTEL_TDX
|
||||
bool "Intel Trust Domain Extensions (TDX) support"
|
||||
default y
|
||||
depends on INTEL_TDX_HOST
|
||||
select KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
select HAVE_KVM_ARCH_GMEM_POPULATE
|
||||
help
|
||||
Provides support for launching Intel Trust Domain Extensions (TDX)
|
||||
confidential VMs on Intel processors.
|
||||
@@ -157,9 +158,10 @@ config KVM_AMD_SEV
|
||||
depends on KVM_AMD && X86_64
|
||||
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
|
||||
select ARCH_HAS_CC_PLATFORM
|
||||
select KVM_GENERIC_PRIVATE_MEM
|
||||
select KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
select HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
select HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||
select HAVE_KVM_ARCH_GMEM_POPULATE
|
||||
help
|
||||
Provides support for launching encrypted VMs which use Secure
|
||||
Encrypted Virtualization (SEV), Secure Encrypted Virtualization with
|
||||
@@ -169,7 +171,7 @@ config KVM_AMD_SEV
|
||||
config KVM_IOAPIC
|
||||
bool "I/O APIC, PIC, and PIT emulation"
|
||||
default y
|
||||
depends on KVM
|
||||
depends on KVM_X86
|
||||
help
|
||||
Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e.
|
||||
for full in-kernel APIC emulation.
|
||||
@@ -179,7 +181,7 @@ config KVM_IOAPIC
|
||||
config KVM_SMM
|
||||
bool "System Management Mode emulation"
|
||||
default y
|
||||
depends on KVM
|
||||
depends on KVM_X86
|
||||
help
|
||||
Provides support for KVM to emulate System Management Mode (SMM)
|
||||
in virtual machines. This can be used by the virtual machine
|
||||
@@ -189,7 +191,7 @@ config KVM_SMM
|
||||
|
||||
config KVM_HYPERV
|
||||
bool "Support for Microsoft Hyper-V emulation"
|
||||
depends on KVM
|
||||
depends on KVM_X86
|
||||
default y
|
||||
help
|
||||
Provides KVM support for emulating Microsoft Hyper-V. This allows KVM
|
||||
@@ -203,7 +205,7 @@ config KVM_HYPERV
|
||||
|
||||
config KVM_XEN
|
||||
bool "Support for Xen hypercall interface"
|
||||
depends on KVM
|
||||
depends on KVM_X86
|
||||
help
|
||||
Provides KVM support for the hosting Xen HVM guests and
|
||||
passing Xen hypercalls to userspace.
|
||||
@@ -213,7 +215,7 @@ config KVM_XEN
|
||||
config KVM_PROVE_MMU
|
||||
bool "Prove KVM MMU correctness"
|
||||
depends on DEBUG_KERNEL
|
||||
depends on KVM
|
||||
depends on KVM_X86
|
||||
depends on EXPERT
|
||||
help
|
||||
Enables runtime assertions in KVM's MMU that are too costly to enable
|
||||
@@ -228,7 +230,7 @@ config KVM_EXTERNAL_WRITE_TRACKING
|
||||
|
||||
config KVM_MAX_NR_VCPUS
|
||||
int "Maximum number of vCPUs per KVM guest"
|
||||
depends on KVM
|
||||
depends on KVM_X86
|
||||
range 1024 4096
|
||||
default 4096 if MAXSMP
|
||||
default 1024
|
||||
|
||||
@@ -3285,12 +3285,72 @@ out:
|
||||
return level;
|
||||
}
|
||||
|
||||
static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, int max_level, bool is_private)
|
||||
static u8 kvm_max_level_for_order(int order)
|
||||
{
|
||||
BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
|
||||
|
||||
KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
|
||||
return PG_LEVEL_1G;
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
|
||||
return PG_LEVEL_2M;
|
||||
|
||||
return PG_LEVEL_4K;
|
||||
}
|
||||
|
||||
static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn,
|
||||
bool is_private)
|
||||
{
|
||||
u8 max_level, coco_level;
|
||||
kvm_pfn_t pfn;
|
||||
|
||||
/* For faults, use the gmem information that was resolved earlier. */
|
||||
if (fault) {
|
||||
pfn = fault->pfn;
|
||||
max_level = fault->max_level;
|
||||
} else {
|
||||
/* TODO: Call into guest_memfd once hugepages are supported. */
|
||||
WARN_ONCE(1, "Get pfn+order from guest_memfd");
|
||||
pfn = KVM_PFN_ERR_FAULT;
|
||||
max_level = PG_LEVEL_4K;
|
||||
}
|
||||
|
||||
if (max_level == PG_LEVEL_4K)
|
||||
return max_level;
|
||||
|
||||
/*
|
||||
* CoCo may influence the max mapping level, e.g. due to RMP or S-EPT
|
||||
* restrictions. A return of '0' means "no additional restrictions", to
|
||||
* allow for using an optional "ret0" static call.
|
||||
*/
|
||||
coco_level = kvm_x86_call(gmem_max_mapping_level)(kvm, pfn, is_private);
|
||||
if (coco_level)
|
||||
max_level = min(max_level, coco_level);
|
||||
|
||||
return max_level;
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
struct kvm_lpage_info *linfo;
|
||||
int host_level;
|
||||
int host_level, max_level;
|
||||
bool is_private;
|
||||
|
||||
lockdep_assert_held(&kvm->mmu_lock);
|
||||
|
||||
if (fault) {
|
||||
max_level = fault->max_level;
|
||||
is_private = fault->is_private;
|
||||
} else {
|
||||
max_level = PG_LEVEL_NUM;
|
||||
is_private = kvm_mem_is_private(kvm, gfn);
|
||||
}
|
||||
|
||||
max_level = min(max_level, max_huge_page_level);
|
||||
for ( ; max_level > PG_LEVEL_4K; max_level--) {
|
||||
@@ -3299,25 +3359,17 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_private)
|
||||
return max_level;
|
||||
|
||||
if (max_level == PG_LEVEL_4K)
|
||||
return PG_LEVEL_4K;
|
||||
|
||||
host_level = host_pfn_mapping_level(kvm, gfn, slot);
|
||||
if (is_private || kvm_memslot_is_gmem_only(slot))
|
||||
host_level = kvm_gmem_max_mapping_level(kvm, fault, slot, gfn,
|
||||
is_private);
|
||||
else
|
||||
host_level = host_pfn_mapping_level(kvm, gfn, slot);
|
||||
return min(host_level, max_level);
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
bool is_private = kvm_slot_can_be_private(slot) &&
|
||||
kvm_mem_is_private(kvm, gfn);
|
||||
|
||||
return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private);
|
||||
}
|
||||
|
||||
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
||||
{
|
||||
struct kvm_memory_slot *slot = fault->slot;
|
||||
@@ -3338,9 +3390,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
|
||||
* Enforce the iTLB multihit workaround after capturing the requested
|
||||
* level, which will be used to do precise, accurate accounting.
|
||||
*/
|
||||
fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
|
||||
fault->gfn, fault->max_level,
|
||||
fault->is_private);
|
||||
fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, fault,
|
||||
fault->slot, fault->gfn);
|
||||
if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
|
||||
return;
|
||||
|
||||
@@ -4503,42 +4554,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
|
||||
vcpu->stat.pf_fixed++;
|
||||
}
|
||||
|
||||
static inline u8 kvm_max_level_for_order(int order)
|
||||
{
|
||||
BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
|
||||
|
||||
KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
|
||||
order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
|
||||
return PG_LEVEL_1G;
|
||||
|
||||
if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
|
||||
return PG_LEVEL_2M;
|
||||
|
||||
return PG_LEVEL_4K;
|
||||
}
|
||||
|
||||
static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
|
||||
u8 max_level, int gmem_order)
|
||||
{
|
||||
u8 req_max_level;
|
||||
|
||||
if (max_level == PG_LEVEL_4K)
|
||||
return PG_LEVEL_4K;
|
||||
|
||||
max_level = min(kvm_max_level_for_order(gmem_order), max_level);
|
||||
if (max_level == PG_LEVEL_4K)
|
||||
return PG_LEVEL_4K;
|
||||
|
||||
req_max_level = kvm_x86_call(private_max_mapping_level)(kvm, pfn);
|
||||
if (req_max_level)
|
||||
max_level = min(max_level, req_max_level);
|
||||
|
||||
return max_level;
|
||||
}
|
||||
|
||||
static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault, int r)
|
||||
{
|
||||
@@ -4546,12 +4561,12 @@ static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu,
|
||||
r == RET_PF_RETRY, fault->map_writable);
|
||||
}
|
||||
|
||||
static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
static int kvm_mmu_faultin_pfn_gmem(struct kvm_vcpu *vcpu,
|
||||
struct kvm_page_fault *fault)
|
||||
{
|
||||
int max_order, r;
|
||||
|
||||
if (!kvm_slot_can_be_private(fault->slot)) {
|
||||
if (!kvm_slot_has_gmem(fault->slot)) {
|
||||
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
|
||||
return -EFAULT;
|
||||
}
|
||||
@@ -4564,8 +4579,7 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
|
||||
fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn,
|
||||
fault->max_level, max_order);
|
||||
fault->max_level = kvm_max_level_for_order(max_order);
|
||||
|
||||
return RET_PF_CONTINUE;
|
||||
}
|
||||
@@ -4575,8 +4589,8 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
unsigned int foll = fault->write ? FOLL_WRITE : 0;
|
||||
|
||||
if (fault->is_private)
|
||||
return kvm_mmu_faultin_pfn_private(vcpu, fault);
|
||||
if (fault->is_private || kvm_memslot_is_gmem_only(fault->slot))
|
||||
return kvm_mmu_faultin_pfn_gmem(vcpu, fault);
|
||||
|
||||
foll |= FOLL_NOWAIT;
|
||||
fault->pfn = __kvm_faultin_pfn(fault->slot, fault->gfn, foll,
|
||||
@@ -7165,7 +7179,7 @@ restart:
|
||||
* mapping if the indirect sp has level = 1.
|
||||
*/
|
||||
if (sp->role.direct &&
|
||||
sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn)) {
|
||||
sp->role.level < kvm_mmu_max_mapping_level(kvm, NULL, slot, sp->gfn)) {
|
||||
kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
|
||||
|
||||
if (kvm_available_flush_remote_tlbs_range())
|
||||
|
||||
@@ -411,7 +411,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm,
|
||||
int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault,
|
||||
const struct kvm_memory_slot *slot, gfn_t gfn);
|
||||
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
|
||||
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
|
||||
|
||||
@@ -1813,7 +1813,7 @@ retry:
|
||||
if (iter.gfn < start || iter.gfn >= end)
|
||||
continue;
|
||||
|
||||
max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot, iter.gfn);
|
||||
max_mapping_level = kvm_mmu_max_mapping_level(kvm, NULL, slot, iter.gfn);
|
||||
if (max_mapping_level < iter.level)
|
||||
continue;
|
||||
|
||||
|
||||
@@ -2361,7 +2361,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
memslot = gfn_to_memslot(kvm, params.gfn_start);
|
||||
if (!kvm_slot_can_be_private(memslot)) {
|
||||
if (!kvm_slot_has_gmem(memslot)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -4715,7 +4715,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
|
||||
}
|
||||
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
if (!kvm_slot_can_be_private(slot)) {
|
||||
if (!kvm_slot_has_gmem(slot)) {
|
||||
pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n",
|
||||
gpa);
|
||||
return;
|
||||
@@ -4943,7 +4943,7 @@ next_pfn:
|
||||
}
|
||||
}
|
||||
|
||||
int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
|
||||
int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
|
||||
{
|
||||
int level, rc;
|
||||
bool assigned;
|
||||
|
||||
@@ -5180,7 +5180,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
|
||||
.gmem_prepare = sev_gmem_prepare,
|
||||
.gmem_invalidate = sev_gmem_invalidate,
|
||||
.private_max_mapping_level = sev_private_max_mapping_level,
|
||||
.gmem_max_mapping_level = sev_gmem_max_mapping_level,
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -866,7 +866,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
|
||||
void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu);
|
||||
int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
|
||||
void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
|
||||
int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn);
|
||||
int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
|
||||
struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu);
|
||||
void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa);
|
||||
#else
|
||||
@@ -895,7 +895,7 @@ static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, in
|
||||
return 0;
|
||||
}
|
||||
static inline void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) {}
|
||||
static inline int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
|
||||
static inline int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -831,10 +831,11 @@ static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
|
||||
return tdx_vcpu_ioctl(vcpu, argp);
|
||||
}
|
||||
|
||||
static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
|
||||
static int vt_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
|
||||
bool is_private)
|
||||
{
|
||||
if (is_td(kvm))
|
||||
return tdx_gmem_private_max_mapping_level(kvm, pfn);
|
||||
return tdx_gmem_max_mapping_level(kvm, pfn, is_private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1005,7 +1006,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
||||
.mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl),
|
||||
.vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl),
|
||||
|
||||
.private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level)
|
||||
.gmem_max_mapping_level = vt_op_tdx_only(gmem_max_mapping_level)
|
||||
};
|
||||
|
||||
struct kvm_x86_init_ops vt_init_ops __initdata = {
|
||||
|
||||
@@ -3318,8 +3318,11 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
|
||||
int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
|
||||
{
|
||||
if (!is_private)
|
||||
return 0;
|
||||
|
||||
return PG_LEVEL_4K;
|
||||
}
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
|
||||
void tdx_flush_tlb_current(struct kvm_vcpu *vcpu);
|
||||
void tdx_flush_tlb_all(struct kvm_vcpu *vcpu);
|
||||
void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
|
||||
int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn);
|
||||
int tdx_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
|
||||
#endif
|
||||
|
||||
#endif /* __KVM_X86_VMX_X86_OPS_H */
|
||||
|
||||
@@ -13521,6 +13521,16 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST_MEMFD
|
||||
/*
|
||||
* KVM doesn't yet support mmap() on guest_memfd for VMs with private memory
|
||||
* (the private vs. shared tracking needs to be moved into guest_memfd).
|
||||
*/
|
||||
bool kvm_arch_supports_gmem_mmap(struct kvm *kvm)
|
||||
{
|
||||
return !kvm_arch_has_private_mem(kvm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
|
||||
{
|
||||
@@ -13534,6 +13544,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
|
||||
kvm_x86_call(gmem_invalidate)(start, end);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int kvm_spec_ctrl_test_value(u64 value)
|
||||
{
|
||||
|
||||
@@ -52,9 +52,10 @@
|
||||
/*
|
||||
* The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally
|
||||
* used in kvm, other bits are visible for userspace which are defined in
|
||||
* include/linux/kvm_h.
|
||||
* include/uapi/linux/kvm.h.
|
||||
*/
|
||||
#define KVM_MEMSLOT_INVALID (1UL << 16)
|
||||
#define KVM_MEMSLOT_INVALID (1UL << 16)
|
||||
#define KVM_MEMSLOT_GMEM_ONLY (1UL << 17)
|
||||
|
||||
/*
|
||||
* Bit 63 of the memslot generation number is an "update in-progress flag",
|
||||
@@ -602,7 +603,7 @@ struct kvm_memory_slot {
|
||||
short id;
|
||||
u16 as_id;
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
#ifdef CONFIG_KVM_GUEST_MEMFD
|
||||
struct {
|
||||
/*
|
||||
* Writes protected by kvm->slots_lock. Acquiring a
|
||||
@@ -615,7 +616,7 @@ struct kvm_memory_slot {
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
|
||||
static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot)
|
||||
{
|
||||
return slot && (slot->flags & KVM_MEM_GUEST_MEMFD);
|
||||
}
|
||||
@@ -719,17 +720,17 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Arch code must define kvm_arch_has_private_mem if support for private memory
|
||||
* is enabled.
|
||||
*/
|
||||
#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
|
||||
#ifndef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_GUEST_MEMFD
|
||||
bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
|
||||
#endif
|
||||
|
||||
#ifndef kvm_arch_has_readonly_mem
|
||||
static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
|
||||
{
|
||||
@@ -860,7 +861,7 @@ struct kvm {
|
||||
struct notifier_block pm_notifier;
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
/* Protected by slots_locks (for writes) and RCU (for reads) */
|
||||
/* Protected by slots_lock (for writes) and RCU (for reads) */
|
||||
struct xarray mem_attr_array;
|
||||
#endif
|
||||
char stats_id[KVM_STATS_NAME_SIZE];
|
||||
@@ -2490,6 +2491,14 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
|
||||
vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE;
|
||||
}
|
||||
|
||||
static inline bool kvm_memslot_is_gmem_only(const struct kvm_memory_slot *slot)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_KVM_GUEST_MEMFD))
|
||||
return false;
|
||||
|
||||
return slot->flags & KVM_MEMSLOT_GMEM_ONLY;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||
static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
@@ -2505,8 +2514,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||
|
||||
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
|
||||
kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
|
||||
return kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
|
||||
}
|
||||
#else
|
||||
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
|
||||
@@ -2515,7 +2523,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
|
||||
}
|
||||
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
|
||||
|
||||
#ifdef CONFIG_KVM_PRIVATE_MEM
|
||||
#ifdef CONFIG_KVM_GUEST_MEMFD
|
||||
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||
gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
|
||||
int *max_order);
|
||||
@@ -2528,13 +2536,13 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
|
||||
KVM_BUG_ON(1, kvm);
|
||||
return -EIO;
|
||||
}
|
||||
#endif /* CONFIG_KVM_PRIVATE_MEM */
|
||||
#endif /* CONFIG_KVM_GUEST_MEMFD */
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
|
||||
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_POPULATE
|
||||
/**
|
||||
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
|
||||
*
|
||||
|
||||
@@ -962,6 +962,7 @@ struct kvm_enable_cap {
|
||||
#define KVM_CAP_ARM_EL2_E2H0 241
|
||||
#define KVM_CAP_RISCV_MP_STATE_RESET 242
|
||||
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
|
||||
#define KVM_CAP_GUEST_MEMFD_MMAP 244
|
||||
|
||||
struct kvm_irq_routing_irqchip {
|
||||
__u32 irqchip;
|
||||
@@ -1598,6 +1599,7 @@ struct kvm_memory_attributes {
|
||||
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
|
||||
|
||||
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
|
||||
#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
|
||||
|
||||
struct kvm_create_guest_memfd {
|
||||
__u64 size;
|
||||
|
||||
@@ -174,6 +174,7 @@ TEST_GEN_PROGS_arm64 += arch_timer
|
||||
TEST_GEN_PROGS_arm64 += coalesced_io_test
|
||||
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
|
||||
TEST_GEN_PROGS_arm64 += get-reg-list
|
||||
TEST_GEN_PROGS_arm64 += guest_memfd_test
|
||||
TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
|
||||
TEST_GEN_PROGS_arm64 += memslot_perf_test
|
||||
TEST_GEN_PROGS_arm64 += mmu_stress_test
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user