Merge tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv updates from Wei Liu:

 - PCI passthrough for Hyper-V confidential VMs (Michael Kelley)

 - Hyper-V VTL mode support (Saurabh Sengar)

 - Move panic report initialization code earlier (Long Li)

 - Various improvements and bug fixes (Dexuan Cui and Michael Kelley)

* tag 'hyperv-next-signed-20230424' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (22 commits)
  PCI: hv: Replace retarget_msi_interrupt_params with hyperv_pcpu_input_arg
  Drivers: hv: move panic report code from vmbus to hv early init code
  x86/hyperv: VTL support for Hyper-V
  Drivers: hv: Kconfig: Add HYPERV_VTL_MODE
  x86/hyperv: Make hv_get_nmi_reason public
  x86/hyperv: Add VTL specific structs and hypercalls
  x86/init: Make get/set_rtc_noop() public
  x86/hyperv: Exclude lazy TLB mode CPUs from enlightened TLB flushes
  x86/hyperv: Add callback filter to cpumask_to_vpset()
  Drivers: hv: vmbus: Remove the per-CPU post_msg_page
  clocksource: hyper-v: make sure Invariant-TSC is used if it is available
  PCI: hv: Enable PCI pass-thru devices in Confidential VMs
  Drivers: hv: Don't remap addresses that are above shared_gpa_boundary
  hv_netvsc: Remove second mapping of send and recv buffers
  Drivers: hv: vmbus: Remove second way of mapping ring buffers
  Drivers: hv: vmbus: Remove second mapping of VMBus monitor pages
  swiotlb: Remove bounce buffer remapping for Hyper-V
  Driver: VMBus: Add Devicetree support
  dt-bindings: bus: Add Hyper-V VMBus
  Drivers: hv: vmbus: Convert acpi_device to more generic platform_device
  ...
This commit is contained in:
Linus Torvalds
2023-04-27 17:17:12 -07:00
30 changed files with 1065 additions and 687 deletions

View File

@@ -0,0 +1,54 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/bus/microsoft,vmbus.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microsoft Hyper-V VMBus
maintainers:
- Saurabh Sengar <ssengar@linux.microsoft.com>
description:
VMBus is a software bus that implement the protocols for communication
between the root or host OS and guest OSs (virtual machines).
properties:
compatible:
const: microsoft,vmbus
ranges: true
'#address-cells':
const: 2
'#size-cells':
const: 1
required:
- compatible
- ranges
- '#address-cells'
- '#size-cells'
additionalProperties: false
examples:
- |
soc {
#address-cells = <2>;
#size-cells = <1>;
bus {
compatible = "simple-bus";
#address-cells = <2>;
#size-cells = <1>;
ranges;
vmbus@ff0000000 {
compatible = "microsoft,vmbus";
#address-cells = <2>;
#size-cells = <1>;
ranges = <0x0f 0xf0000000 0x0f 0xf0000000 0x10000000>;
};
};
};

View File

@@ -9588,6 +9588,7 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
F: Documentation/ABI/stable/sysfs-bus-vmbus
F: Documentation/ABI/testing/debugfs-hyperv
F: Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml
F: Documentation/virt/hyperv
F: Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
F: arch/arm64/hyperv

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o
ifdef CONFIG_X86_64
obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o

View File

@@ -96,6 +96,11 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
wrmsr(HV_X64_MSR_EOI, val, 0);
}
static bool cpu_is_self(int cpu)
{
return cpu == smp_processor_id();
}
/*
* IPI implementation on Hyper-V.
*/
@@ -128,10 +133,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
*/
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
if (exclude_self)
nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
else
nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be

View File

@@ -63,7 +63,10 @@ static int hyperv_init_ghcb(void)
* memory boundary and map it here.
*/
rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB);
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary;
ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
if (!ghcb_va)
return -ENOMEM;
@@ -217,7 +220,7 @@ static int hv_cpu_die(unsigned int cpu)
if (hv_ghcb_pg) {
ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg);
if (*ghcb_va)
memunmap(*ghcb_va);
iounmap(*ghcb_va);
*ghcb_va = NULL;
}

227
arch/x86/hyperv/hv_vtl.c Normal file
View File

@@ -0,0 +1,227 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2023, Microsoft Corporation.
*
* Author:
* Saurabh Sengar <ssengar@microsoft.com>
*/
#include <asm/apic.h>
#include <asm/boot.h>
#include <asm/desc.h>
#include <asm/i8259.h>
#include <asm/mshyperv.h>
#include <asm/realmode.h>
extern struct boot_params boot_params;
static struct real_mode_header hv_vtl_real_mode_header;
void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
x86_platform.get_wallclock = get_rtc_noop;
x86_platform.set_wallclock = set_rtc_noop;
x86_platform.get_nmi_reason = hv_get_nmi_reason;
x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
x86_platform.legacy.rtc = 0;
x86_platform.legacy.warm_reset = 0;
x86_platform.legacy.reserve_bios_regions = 0;
x86_platform.legacy.devices.pnpbios = 0;
}
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)
{
return ((u64)desc->base3 << 32) | ((u64)desc->base2 << 24) |
(desc->base1 << 16) | desc->base0;
}
static inline u32 hv_vtl_system_desc_limit(struct ldttss_desc *desc)
{
return ((u32)desc->limit1 << 16) | (u32)desc->limit0;
}
typedef void (*secondary_startup_64_fn)(void*, void*);
static void hv_vtl_ap_entry(void)
{
((secondary_startup_64_fn)secondary_startup_64)(&boot_params, &boot_params);
}
static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored)
{
u64 status;
int ret = 0;
struct hv_enable_vp_vtl *input;
unsigned long irq_flags;
struct desc_ptr gdt_ptr;
struct desc_ptr idt_ptr;
struct ldttss_desc *tss;
struct ldttss_desc *ldt;
struct desc_struct *gdt;
u64 rsp = current->thread.sp;
u64 rip = (u64)&hv_vtl_ap_entry;
native_store_gdt(&gdt_ptr);
store_idt(&idt_ptr);
gdt = (struct desc_struct *)((void *)(gdt_ptr.address));
tss = (struct ldttss_desc *)(gdt + GDT_ENTRY_TSS);
ldt = (struct ldttss_desc *)(gdt + GDT_ENTRY_LDT);
local_irq_save(irq_flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
memset(input, 0, sizeof(*input));
input->partition_id = HV_PARTITION_ID_SELF;
input->vp_index = target_vp_index;
input->target_vtl.target_vtl = HV_VTL_MGMT;
/*
* The x86_64 Linux kernel follows the 16-bit -> 32-bit -> 64-bit
* mode transition sequence after waking up an AP with SIPI whose
* vector points to the 16-bit AP startup trampoline code. Here in
* VTL2, we can't perform that sequence as the AP has to start in
* the 64-bit mode.
*
* To make this happen, we tell the hypervisor to load a valid 64-bit
* context (most of which is just magic numbers from the CPU manual)
* so that AP jumps right to the 64-bit entry of the kernel, and the
* control registers are loaded with values that let the AP fetch the
* code and data and carry on with work it gets assigned.
*/
input->vp_context.rip = rip;
input->vp_context.rsp = rsp;
input->vp_context.rflags = 0x0000000000000002;
input->vp_context.efer = __rdmsr(MSR_EFER);
input->vp_context.cr0 = native_read_cr0();
input->vp_context.cr3 = __native_read_cr3();
input->vp_context.cr4 = native_read_cr4();
input->vp_context.msr_cr_pat = __rdmsr(MSR_IA32_CR_PAT);
input->vp_context.idtr.limit = idt_ptr.size;
input->vp_context.idtr.base = idt_ptr.address;
input->vp_context.gdtr.limit = gdt_ptr.size;
input->vp_context.gdtr.base = gdt_ptr.address;
/* Non-system desc (64bit), long, code, present */
input->vp_context.cs.selector = __KERNEL_CS;
input->vp_context.cs.base = 0;
input->vp_context.cs.limit = 0xffffffff;
input->vp_context.cs.attributes = 0xa09b;
/* Non-system desc (64bit), data, present, granularity, default */
input->vp_context.ss.selector = __KERNEL_DS;
input->vp_context.ss.base = 0;
input->vp_context.ss.limit = 0xffffffff;
input->vp_context.ss.attributes = 0xc093;
/* System desc (128bit), present, LDT */
input->vp_context.ldtr.selector = GDT_ENTRY_LDT * 8;
input->vp_context.ldtr.base = hv_vtl_system_desc_base(ldt);
input->vp_context.ldtr.limit = hv_vtl_system_desc_limit(ldt);
input->vp_context.ldtr.attributes = 0x82;
/* System desc (128bit), present, TSS, 0x8b - busy, 0x89 -- default */
input->vp_context.tr.selector = GDT_ENTRY_TSS * 8;
input->vp_context.tr.base = hv_vtl_system_desc_base(tss);
input->vp_context.tr.limit = hv_vtl_system_desc_limit(tss);
input->vp_context.tr.attributes = 0x8b;
status = hv_do_hypercall(HVCALL_ENABLE_VP_VTL, input, NULL);
if (!hv_result_success(status) &&
hv_result(status) != HV_STATUS_VTL_ALREADY_ENABLED) {
pr_err("HVCALL_ENABLE_VP_VTL failed for VP : %d ! [Err: %#llx\n]",
target_vp_index, status);
ret = -EINVAL;
goto free_lock;
}
status = hv_do_hypercall(HVCALL_START_VP, input, NULL);
if (!hv_result_success(status)) {
pr_err("HVCALL_START_VP failed for VP : %d ! [Err: %#llx]\n",
target_vp_index, status);
ret = -EINVAL;
}
free_lock:
local_irq_restore(irq_flags);
return ret;
}
static int hv_vtl_apicid_to_vp_id(u32 apic_id)
{
u64 control;
u64 status;
unsigned long irq_flags;
struct hv_get_vp_from_apic_id_in *input;
u32 *output, ret;
local_irq_save(irq_flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
memset(input, 0, sizeof(*input));
input->partition_id = HV_PARTITION_ID_SELF;
input->apic_ids[0] = apic_id;
output = (u32 *)input;
control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
status = hv_do_hypercall(control, input, output);
ret = output[0];
local_irq_restore(irq_flags);
if (!hv_result_success(status)) {
pr_err("failed to get vp id from apic id %d, status %#llx\n",
apic_id, status);
return -EINVAL;
}
return ret;
}
static int hv_vtl_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
{
int vp_id;
pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid);
vp_id = hv_vtl_apicid_to_vp_id(apicid);
if (vp_id < 0) {
pr_err("Couldn't find CPU with APIC ID %d\n", apicid);
return -EINVAL;
}
if (vp_id > ms_hyperv.max_vp_index) {
pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid);
return -EINVAL;
}
return hv_vtl_bringup_vcpu(vp_id, start_eip);
}
static int __init hv_vtl_early_init(void)
{
/*
* `boot_cpu_has` returns the runtime feature support,
* and here is the earliest it can be used.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
panic("XSAVE has to be disabled as it is not supported by this module.\n"
"Please add 'noxsave' to the kernel command line.\n");
real_mode_header = &hv_vtl_real_mode_header;
apic->wakeup_secondary_cpu_64 = hv_vtl_wakeup_secondary_cpu;
return 0;
}
early_initcall(hv_vtl_early_init);

View File

@@ -376,34 +376,6 @@ void __init hv_vtom_init(void)
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
* hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM.
*/
void *hv_map_memory(void *addr, unsigned long size)
{
unsigned long *pfns = kcalloc(size / PAGE_SIZE,
sizeof(unsigned long), GFP_KERNEL);
void *vaddr;
int i;
if (!pfns)
return NULL;
for (i = 0; i < size / PAGE_SIZE; i++)
pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) +
(ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT);
vaddr = vmap_pfn(pfns, size / PAGE_SIZE, pgprot_decrypted(PAGE_KERNEL));
kfree(pfns);
return vaddr;
}
void hv_unmap_memory(void *addr)
{
vunmap(addr);
}
enum hv_isolation_type hv_get_isolation_type(void)
{
if (!(ms_hyperv.priv_high & HV_ISOLATION))

View File

@@ -52,6 +52,11 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
return gva_n - offset;
}
static bool cpu_is_lazy(int cpu)
{
return per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
}
static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
@@ -60,6 +65,7 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
struct hv_tlb_flush *flush;
u64 status;
unsigned long flags;
bool do_lazy = !info->freed_tables;
trace_hyperv_mmu_flush_tlb_multi(cpus, info);
@@ -112,6 +118,8 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
goto do_ex_hypercall;
for_each_cpu(cpu, cpus) {
if (do_lazy && cpu_is_lazy(cpu))
continue;
vcpu = hv_cpu_number_to_vp_number(cpu);
if (vcpu == VP_INVAL) {
local_irq_restore(flags);
@@ -198,7 +206,8 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
flush->hv_vp_set.valid_bank_mask = 0;
flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
nr_bank = cpumask_to_vpset_skip(&flush->hv_vp_set, cpus,
info->freed_tables ? NULL : cpu_is_lazy);
if (nr_bank < 0)
return HV_STATUS_INVALID_PARAMETER;

View File

@@ -122,6 +122,9 @@
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
/* Use hypercalls for MMIO config space access */
#define HV_X64_USE_MMIO_HYPERCALLS BIT(21)
/*
* CPU management features identification.
* These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits.
@@ -713,6 +716,81 @@ union hv_msi_entry {
} __packed;
};
struct hv_x64_segment_register {
u64 base;
u32 limit;
u16 selector;
union {
struct {
u16 segment_type : 4;
u16 non_system_segment : 1;
u16 descriptor_privilege_level : 2;
u16 present : 1;
u16 reserved : 4;
u16 available : 1;
u16 _long : 1;
u16 _default : 1;
u16 granularity : 1;
} __packed;
u16 attributes;
};
} __packed;
struct hv_x64_table_register {
u16 pad[3];
u16 limit;
u64 base;
} __packed;
struct hv_init_vp_context {
u64 rip;
u64 rsp;
u64 rflags;
struct hv_x64_segment_register cs;
struct hv_x64_segment_register ds;
struct hv_x64_segment_register es;
struct hv_x64_segment_register fs;
struct hv_x64_segment_register gs;
struct hv_x64_segment_register ss;
struct hv_x64_segment_register tr;
struct hv_x64_segment_register ldtr;
struct hv_x64_table_register idtr;
struct hv_x64_table_register gdtr;
u64 efer;
u64 cr0;
u64 cr3;
u64 cr4;
u64 msr_cr_pat;
} __packed;
union hv_input_vtl {
u8 as_uint8;
struct {
u8 target_vtl: 4;
u8 use_target_vtl: 1;
u8 reserved_z: 3;
};
} __packed;
struct hv_enable_vp_vtl {
u64 partition_id;
u32 vp_index;
union hv_input_vtl target_vtl;
u8 mbz0;
u16 mbz1;
struct hv_init_vp_context vp_context;
} __packed;
struct hv_get_vp_from_apic_id_in {
u64 partition_id;
union hv_input_vtl target_vtl;
u8 res[7];
u32 apic_ids[];
} __packed;
#include <asm-generic/hyperv-tlfs.h>
#endif

View File

@@ -19,6 +19,10 @@
*/
#define HV_IOAPIC_BASE_ADDRESS 0xfec00000
#define HV_VTL_NORMAL 0x0
#define HV_VTL_SECURE 0x1
#define HV_VTL_MGMT 0x2
union hv_ghcb;
DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
@@ -29,6 +33,11 @@ typedef int (*hyperv_fill_flush_list_func)(
void hyperv_vector_handler(struct pt_regs *regs);
static inline unsigned char hv_get_nmi_reason(void)
{
return 0;
}
#if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
@@ -271,6 +280,12 @@ static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
#else
static inline void __init hv_vtl_init_platform(void) {}
#endif
#include <asm-generic/mshyperv.h>
#endif

View File

@@ -330,5 +330,7 @@ extern void x86_init_uint_noop(unsigned int unused);
extern bool bool_x86_init_noop(void);
extern void x86_op_int_noop(int cpu);
extern bool x86_pnpbios_disabled(void);
extern int set_rtc_noop(const struct timespec64 *now);
extern void get_rtc_noop(struct timespec64 *now);
#endif

View File

@@ -18,7 +18,6 @@
#include <linux/kexec.h>
#include <linux/i8253.h>
#include <linux/random.h>
#include <linux/swiotlb.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -249,11 +248,6 @@ static uint32_t __init ms_hyperv_platform(void)
return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
}
static unsigned char hv_get_nmi_reason(void)
{
return 0;
}
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
@@ -408,12 +402,8 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
static_branch_enable(&isolation_type_snp);
#ifdef CONFIG_SWIOTLB
swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
#endif
}
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
@@ -524,6 +514,7 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
hv_vtl_init_platform();
#endif
/*
* TSC should be marked as unstable only after Hyper-V

View File

@@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; }
static void iommu_shutdown_noop(void) { }
bool __init bool_x86_init_noop(void) { return false; }
void x86_op_int_noop(int cpu) { }
static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
static void get_rtc_noop(struct timespec64 *now) { }
int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
void get_rtc_noop(struct timespec64 *now) { }
static __initconst const struct of_device_id of_cmos_match[] = {
{ .compatible = "motorola,mc146818" },

View File

@@ -49,7 +49,7 @@ static bool direct_mode_enabled;
static int stimer0_irq = -1;
static int stimer0_message_sint;
static DEFINE_PER_CPU(long, stimer0_evt);
static __maybe_unused DEFINE_PER_CPU(long, stimer0_evt);
/*
* Common code for stimer0 interrupts coming via Direct Mode or
@@ -68,7 +68,7 @@ EXPORT_SYMBOL_GPL(hv_stimer0_isr);
* stimer0 interrupt handler for architectures that support
* per-cpu interrupts, which also implies Direct Mode.
*/
static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id)
static irqreturn_t __maybe_unused hv_stimer0_percpu_isr(int irq, void *dev_id)
{
hv_stimer0_isr();
return IRQ_HANDLED;
@@ -196,6 +196,7 @@ void __weak hv_remove_stimer0_handler(void)
{
};
#ifdef CONFIG_ACPI
/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */
static int hv_setup_stimer0_irq(void)
{
@@ -230,6 +231,16 @@ static void hv_remove_stimer0_irq(void)
stimer0_irq = -1;
}
}
#else
static int hv_setup_stimer0_irq(void)
{
return 0;
}
static void hv_remove_stimer0_irq(void)
{
}
#endif
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
int hv_stimer_alloc(bool have_percpu_irqs)
@@ -506,9 +517,6 @@ static bool __init hv_init_tsc_clocksource(void)
{
union hv_reference_tsc_msr tsc_msr;
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
return false;
/*
* If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
* handles frequency and offset changes due to live migration,
@@ -525,6 +533,9 @@ static bool __init hv_init_tsc_clocksource(void)
hyperv_cs_msr.rating = 250;
}
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
return false;
hv_read_reference_counter = read_hv_clock_tsc;
/*

View File

@@ -4,15 +4,39 @@ menu "Microsoft Hyper-V guest support"
config HYPERV
tristate "Microsoft Hyper-V client drivers"
depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
|| (ARM64 && !CPU_BIG_ENDIAN))
depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
|| (ACPI && ARM64 && !CPU_BIG_ENDIAN)
select PARAVIRT
select X86_HV_CALLBACK_VECTOR if X86
select VMAP_PFN
select OF_EARLY_FLATTREE if OF
help
Select this option to run Linux as a Hyper-V client operating
system.
config HYPERV_VTL_MODE
bool "Enable Linux to boot in VTL context"
depends on X86_64 && HYPERV
default n
help
Virtual Secure Mode (VSM) is a set of hypervisor capabilities and
enlightenments offered to host and guest partitions which enables
the creation and management of new security boundaries within
operating system software.
VSM achieves and maintains isolation through Virtual Trust Levels
(VTLs). Virtual Trust Levels are hierarchical, with higher levels
being more privileged than lower levels. VTL0 is the least privileged
level, and currently only other level supported is VTL2.
Select this option to build a Linux kernel to run at a VTL other than
the normal VTL0, which currently is only VTL2. This option
initializes the x86 platform for VTL2, and adds the ability to boot
secondary CPUs directly into 64-bit context as required for VTLs other
than 0. A kernel built with this option must run at VTL2, and will
not run as a normal guest.
If unsure, say N
config HYPERV_TIMER
def_bool HYPERV && X86

View File

@@ -67,7 +67,7 @@ const struct vmbus_device vmbus_devs[] = {
{ .dev_type = HV_PCIE,
HV_PCIE_GUID,
.perf_device = false,
.allowed_in_isolated = false,
.allowed_in_isolated = true,
},
/* Synthetic Frame Buffer */

View File

@@ -104,8 +104,14 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
}
msg->monitor_page1 = vmbus_connection.monitor_pages_pa[0];
msg->monitor_page2 = vmbus_connection.monitor_pages_pa[1];
/*
* shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always
* bitwise OR it
*/
msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]) |
ms_hyperv.shared_gpa_boundary;
msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]) |
ms_hyperv.shared_gpa_boundary;
msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
@@ -219,72 +225,27 @@ int vmbus_connect(void)
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_zeroed_page();
vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_zeroed_page();
vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page();
vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page();
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
goto cleanup;
}
vmbus_connection.monitor_pages_original[0]
= vmbus_connection.monitor_pages[0];
vmbus_connection.monitor_pages_original[1]
= vmbus_connection.monitor_pages[1];
vmbus_connection.monitor_pages_pa[0]
= virt_to_phys(vmbus_connection.monitor_pages[0]);
vmbus_connection.monitor_pages_pa[1]
= virt_to_phys(vmbus_connection.monitor_pages[1]);
ret = set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[0], 1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1], 1);
if (ret)
goto cleanup;
if (hv_is_isolation_supported()) {
ret = set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[0],
1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1],
1);
if (ret)
goto cleanup;
/*
* Isolation VM with AMD SNP needs to access monitor page via
* address space above shared gpa boundary.
*/
if (hv_isolation_type_snp()) {
vmbus_connection.monitor_pages_pa[0] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages_pa[1] +=
ms_hyperv.shared_gpa_boundary;
vmbus_connection.monitor_pages[0]
= memremap(vmbus_connection.monitor_pages_pa[0],
HV_HYP_PAGE_SIZE,
MEMREMAP_WB);
if (!vmbus_connection.monitor_pages[0]) {
ret = -ENOMEM;
goto cleanup;
}
vmbus_connection.monitor_pages[1]
= memremap(vmbus_connection.monitor_pages_pa[1],
HV_HYP_PAGE_SIZE,
MEMREMAP_WB);
if (!vmbus_connection.monitor_pages[1]) {
ret = -ENOMEM;
goto cleanup;
}
}
/*
* Set memory host visibility hvcall smears memory
* and so zero monitor pages here.
*/
memset(vmbus_connection.monitor_pages[0], 0x00,
HV_HYP_PAGE_SIZE);
memset(vmbus_connection.monitor_pages[1], 0x00,
HV_HYP_PAGE_SIZE);
}
/*
* Set_memory_decrypted() will change the memory contents if
* decryption occurs, so zero monitor pages here.
*/
memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE);
memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE);
msginfo = kzalloc(sizeof(*msginfo) +
sizeof(struct vmbus_channel_initiate_contact),
@@ -376,31 +337,13 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}
if (hv_is_isolation_supported()) {
/*
* memunmap() checks input address is ioremap address or not
* inside. It doesn't unmap any thing in the non-SNP CVM and
* so not check CVM type here.
*/
memunmap(vmbus_connection.monitor_pages[0]);
memunmap(vmbus_connection.monitor_pages[1]);
set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
set_memory_encrypted((unsigned long)
vmbus_connection.monitor_pages_original[0],
1);
set_memory_encrypted((unsigned long)
vmbus_connection.monitor_pages_original[1],
1);
}
hv_free_hyperv_page((unsigned long)
vmbus_connection.monitor_pages_original[0]);
hv_free_hyperv_page((unsigned long)
vmbus_connection.monitor_pages_original[1]);
vmbus_connection.monitor_pages_original[0] =
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages_original[1] =
vmbus_connection.monitor_pages[1] = NULL;
hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages[1] = NULL;
}
/*

View File

@@ -38,42 +38,6 @@ int hv_init(void)
return 0;
}
/*
* Functions for allocating and freeing memory with size and
* alignment HV_HYP_PAGE_SIZE. These functions are needed because
* the guest page size may not be the same as the Hyper-V page
* size. We depend upon kmalloc() aligning power-of-two size
* allocations to the allocation size boundary, so that the
* allocated memory appears to Hyper-V as a page of the size
* it expects.
*/
void *hv_alloc_hyperv_page(void)
{
BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL);
else
return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
void *hv_alloc_hyperv_zeroed_page(void)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
else
return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
void hv_free_hyperv_page(unsigned long addr)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
free_page(addr);
else
kfree((void *)addr);
}
/*
* hv_post_message - Post a message using the hypervisor message IPC.
*
@@ -84,14 +48,15 @@ int hv_post_message(union hv_connection_id connection_id,
void *payload, size_t payload_size)
{
struct hv_input_post_message *aligned_msg;
struct hv_per_cpu_context *hv_cpu;
unsigned long flags;
u64 status;
if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
return -EMSGSIZE;
hv_cpu = get_cpu_ptr(hv_context.cpu_context);
aligned_msg = hv_cpu->post_msg_page;
local_irq_save(flags);
aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg);
aligned_msg->connectionid = connection_id;
aligned_msg->reserved = 0;
aligned_msg->message_type = message_type;
@@ -106,11 +71,7 @@ int hv_post_message(union hv_connection_id connection_id,
status = hv_do_hypercall(HVCALL_POST_MESSAGE,
aligned_msg, NULL);
/* Preemption must remain disabled until after the hypercall
* so some other thread can't get scheduled onto this cpu and
* corrupt the per-cpu post_msg_page
*/
put_cpu_ptr(hv_cpu);
local_irq_restore(flags);
return hv_result(status);
}
@@ -162,12 +123,6 @@ int hv_synic_alloc(void)
goto err;
}
}
hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->post_msg_page == NULL) {
pr_err("Unable to allocate post msg page\n");
goto err;
}
}
return 0;
@@ -190,7 +145,6 @@ void hv_synic_free(void)
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
free_page((unsigned long)hv_cpu->post_msg_page);
}
kfree(hv_context.hv_numa_map);
@@ -217,11 +171,13 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.simp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
= (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_message_page)
pr_err("Fail to map syinc message page.\n");
pr_err("Fail to map synic message page.\n");
} else {
simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> HV_HYP_PAGE_SHIFT;
@@ -234,12 +190,13 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.siefp_enabled = 1;
if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
hv_cpu->synic_event_page
= (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE);
if (!hv_cpu->synic_event_page)
pr_err("Fail to map syinc event page.\n");
pr_err("Fail to map synic event page.\n");
} else {
siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> HV_HYP_PAGE_SHIFT;
@@ -316,7 +273,7 @@ void hv_synic_disable_regs(unsigned int cpu)
*/
simp.simp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
memunmap(hv_cpu->synic_message_page);
iounmap(hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
} else {
simp.base_simp_gpa = 0;
@@ -328,7 +285,7 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.siefp_enabled = 0;
if (hv_isolation_type_snp() || hv_root_partition) {
memunmap(hv_cpu->synic_event_page);
iounmap(hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
} else {
siefp.base_siefp_gpa = 0;

View File

@@ -17,8 +17,11 @@
#include <linux/export.h>
#include <linux/bitfield.h>
#include <linux/cpumask.h>
#include <linux/sched/task_stack.h>
#include <linux/panic_notifier.h>
#include <linux/ptrace.h>
#include <linux/kdebug.h>
#include <linux/kmsg_dump.h>
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
#include <asm/hyperv-tlfs.h>
@@ -54,6 +57,10 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
void * __percpu *hyperv_pcpu_output_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
static void hv_kmsg_dump_unregister(void);
static struct ctl_table_header *hv_ctl_table_hdr;
/*
* Hyper-V specific initialization and shutdown code that is
* common across all architectures. Called from architecture
@@ -62,6 +69,12 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
void __init hv_common_free(void)
{
unregister_sysctl_table(hv_ctl_table_hdr);
hv_ctl_table_hdr = NULL;
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
hv_kmsg_dump_unregister();
kfree(hv_vp_index);
hv_vp_index = NULL;
@@ -72,10 +85,203 @@ void __init hv_common_free(void)
hyperv_pcpu_input_arg = NULL;
}
/*
* Functions for allocating and freeing memory with size and
* alignment HV_HYP_PAGE_SIZE. These functions are needed because
* the guest page size may not be the same as the Hyper-V page
* size. We depend upon kmalloc() aligning power-of-two size
* allocations to the allocation size boundary, so that the
* allocated memory appears to Hyper-V as a page of the size
* it expects.
*/
void *hv_alloc_hyperv_page(void)
{
BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL);
else
return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
void *hv_alloc_hyperv_zeroed_page(void)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
else
return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
void hv_free_hyperv_page(unsigned long addr)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
free_page(addr);
else
kfree((void *)addr);
}
EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
static void *hv_panic_page;
/*
* Boolean to control whether to report panic messages over Hyper-V.
*
* It can be set via /proc/sys/kernel/hyperv_record_panic_msg
*/
static int sysctl_record_panic_msg = 1;
/*
* sysctl option to allow the user to control whether kmsg data should be
* reported to Hyper-V on panic.
*/
static struct ctl_table hv_ctl_table[] = {
{
.procname = "hyperv_record_panic_msg",
.data = &sysctl_record_panic_msg,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{}
};
static int hv_die_panic_notify_crash(struct notifier_block *self,
unsigned long val, void *args);
static struct notifier_block hyperv_die_report_block = {
.notifier_call = hv_die_panic_notify_crash,
};
static struct notifier_block hyperv_panic_report_block = {
.notifier_call = hv_die_panic_notify_crash,
};
/*
* The following callback works both as die and panic notifier; its
* goal is to provide panic information to the hypervisor unless the
* kmsg dumper is used [see hv_kmsg_dump()], which provides more
* information but isn't always available.
*
* Notice that both the panic/die report notifiers are registered only
* if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
*/
static int hv_die_panic_notify_crash(struct notifier_block *self,
unsigned long val, void *args)
{
struct pt_regs *regs;
bool is_die;
/* Don't notify Hyper-V unless we have a die oops event or panic. */
if (self == &hyperv_panic_report_block) {
is_die = false;
regs = current_pt_regs();
} else { /* die event */
if (val != DIE_OOPS)
return NOTIFY_DONE;
is_die = true;
regs = ((struct die_args *)args)->regs;
}
/*
* Hyper-V should be notified only once about a panic/die. If we will
* be calling hv_kmsg_dump() later with kmsg data, don't do the
* notification here.
*/
if (!sysctl_record_panic_msg || !hv_panic_page)
hyperv_report_panic(regs, val, is_die);
return NOTIFY_DONE;
}
/*
* Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
* buffer and call into Hyper-V to transfer the data.
*/
static void hv_kmsg_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
struct kmsg_dump_iter iter;
size_t bytes_written;
/* We are only interested in panics. */
if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
return;
/*
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
kmsg_dump_rewind(&iter);
kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (!bytes_written)
return;
/*
* P3 to contain the physical address of the panic page & P4 to
* contain the size of the panic data in that page. Rest of the
* registers are no-op when the NOTIFY_MSG flag is set.
*/
hv_set_register(HV_REGISTER_CRASH_P0, 0);
hv_set_register(HV_REGISTER_CRASH_P1, 0);
hv_set_register(HV_REGISTER_CRASH_P2, 0);
hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
/*
* Let Hyper-V know there is crash data available along with
* the panic message.
*/
hv_set_register(HV_REGISTER_CRASH_CTL,
(HV_CRASH_CTL_CRASH_NOTIFY |
HV_CRASH_CTL_CRASH_NOTIFY_MSG));
}
static struct kmsg_dumper hv_kmsg_dumper = {
.dump = hv_kmsg_dump,
};
static void hv_kmsg_dump_unregister(void)
{
kmsg_dump_unregister(&hv_kmsg_dumper);
unregister_die_notifier(&hyperv_die_report_block);
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_report_block);
hv_free_hyperv_page((unsigned long)hv_panic_page);
hv_panic_page = NULL;
}
static void hv_kmsg_dump_register(void)
{
int ret;
hv_panic_page = hv_alloc_hyperv_zeroed_page();
if (!hv_panic_page) {
pr_err("Hyper-V: panic message page memory allocation failed\n");
return;
}
ret = kmsg_dump_register(&hv_kmsg_dumper);
if (ret) {
pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
hv_free_hyperv_page((unsigned long)hv_panic_page);
hv_panic_page = NULL;
}
}
int __init hv_common_init(void)
{
int i;
if (hv_is_isolation_supported())
sysctl_record_panic_msg = 0;
/*
* Hyper-V expects to get crash register data or kmsg when
* crash enlightment is available and system crashes. Set
@@ -84,8 +290,33 @@ int __init hv_common_init(void)
* kernel.
*/
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
u64 hyperv_crash_ctl;
crash_kexec_post_notifiers = true;
pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
/*
* Panic message recording (sysctl_record_panic_msg)
* is enabled by default in non-isolated guests and
* disabled by default in isolated guests; the panic
* message recording won't be available in isolated
* guests should the following registration fail.
*/
hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
if (!hv_ctl_table_hdr)
pr_err("Hyper-V: sysctl table register error");
/*
* Register for panic kmsg callback only if the right
* capability is supported by the hypervisor.
*/
hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
hv_kmsg_dump_register();
register_die_notifier(&hyperv_die_report_block);
atomic_notifier_chain_register(&panic_notifier_list,
&hyperv_panic_report_block);
}
/*
@@ -311,14 +542,3 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
void __weak *hv_map_memory(void *addr, unsigned long size)
{
return NULL;
}
EXPORT_SYMBOL_GPL(hv_map_memory);
void __weak hv_unmap_memory(void *addr)
{
}
EXPORT_SYMBOL_GPL(hv_unmap_memory);

View File

@@ -122,10 +122,6 @@ enum {
struct hv_per_cpu_context {
void *synic_message_page;
void *synic_event_page;
/*
* buffer to post messages to the host.
*/
void *post_msg_page;
/*
* Starting with win8, we can take channel interrupts on any CPU;
@@ -241,8 +237,6 @@ struct vmbus_connection {
* is child->parent notification
*/
struct hv_monitor_page *monitor_pages[2];
void *monitor_pages_original[2];
phys_addr_t monitor_pages_pa[2];
struct list_head chn_msg_list;
spinlock_t channelmsg_lock;

Some files were not shown because too many files have changed in this diff Show More