mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
Merge tag 'kvmarm-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm updates for Linux 5.5: - Allow non-ISV data aborts to be reported to userspace - Allow injection of data aborts from userspace - Expose stolen time to guests - GICv4 performance improvements - vgic ITS emulation fixes - Simplify FWB handling - Enable halt pool counters - Make the emulated timer PREEMPT_RT compliant Conflicts: include/uapi/linux/kvm.h
This commit is contained in:
@@ -3083,9 +3083,9 @@
|
||||
[X86,PV_OPS] Disable paravirtualized VMware scheduler
|
||||
clock and use the default one.
|
||||
|
||||
no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
|
||||
steal time is computed, but won't influence scheduler
|
||||
behaviour
|
||||
no-steal-acc [X86,KVM,ARM64] Disable paravirtualized steal time
|
||||
accounting. steal time is computed, but won't
|
||||
influence scheduler behaviour
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
|
||||
@@ -1002,12 +1002,18 @@ Specifying exception.has_esr on a system that does not support it will return
|
||||
-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
|
||||
will return -EINVAL.
|
||||
|
||||
It is not possible to read back a pending external abort (injected via
|
||||
KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
|
||||
directly to the virtual CPU).
|
||||
|
||||
|
||||
struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 serror_pending;
|
||||
__u8 serror_has_esr;
|
||||
__u8 ext_dabt_pending;
|
||||
/* Align it to 8 bytes */
|
||||
__u8 pad[6];
|
||||
__u8 pad[5];
|
||||
__u64 serror_esr;
|
||||
} exception;
|
||||
__u32 reserved[12];
|
||||
@@ -1051,9 +1057,23 @@ contain a valid state and shall be written into the VCPU.
|
||||
|
||||
ARM/ARM64:
|
||||
|
||||
User space may need to inject several types of events to the guest.
|
||||
|
||||
Set the pending SError exception state for this VCPU. It is not possible to
|
||||
'cancel' an Serror that has been made pending.
|
||||
|
||||
If the guest performed an access to I/O memory which could not be handled by
|
||||
userspace, for example because of missing instruction syndrome decode
|
||||
information or because there is no device mapped at the accessed IPA, then
|
||||
userspace can ask the kernel to inject an external abort using the address
|
||||
from the exiting fault on the VCPU. It is a programming error to set
|
||||
ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO or
|
||||
KVM_EXIT_ARM_NISV. This feature is only available if the system supports
|
||||
KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which provides commonality in
|
||||
how userspace reports accesses for the above cases to guests, across different
|
||||
userspace implementations. Nevertheless, userspace can still emulate all Arm
|
||||
exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.
|
||||
|
||||
See KVM_GET_VCPU_EVENTS for the data structure.
|
||||
|
||||
|
||||
@@ -4471,6 +4491,39 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
|
||||
event/message pages and to enable/disable SynIC messages/events processing
|
||||
in userspace.
|
||||
|
||||
/* KVM_EXIT_ARM_NISV */
|
||||
struct {
|
||||
__u64 esr_iss;
|
||||
__u64 fault_ipa;
|
||||
} arm_nisv;
|
||||
|
||||
Used on arm and arm64 systems. If a guest accesses memory not in a memslot,
|
||||
KVM will typically return to userspace and ask it to do MMIO emulation on its
|
||||
behalf. However, for certain classes of instructions, no instruction decode
|
||||
(direction, length of memory access) is provided, and fetching and decoding
|
||||
the instruction from the VM is overly complicated to live in the kernel.
|
||||
|
||||
Historically, when this situation occurred, KVM would print a warning and kill
|
||||
the VM. KVM assumed that if the guest accessed non-memslot memory, it was
|
||||
trying to do I/O, which just couldn't be emulated, and the warning message was
|
||||
phrased accordingly. However, what happened more often was that a guest bug
|
||||
caused access outside the guest memory areas which should lead to a more
|
||||
meaningful warning message and an external abort in the guest, if the access
|
||||
did not fall within an I/O window.
|
||||
|
||||
Userspace implementations can query for KVM_CAP_ARM_NISV_TO_USER, and enable
|
||||
this capability at VM creation. Once this is done, these types of errors will
|
||||
instead return to userspace with KVM_EXIT_ARM_NISV, with the valid bits from
|
||||
the HSR (arm) and ESR_EL2 (arm64) in the esr_iss field, and the faulting IPA
|
||||
in the fault_ipa field. Userspace can either fix up the access if it's
|
||||
actually an I/O access by decoding the instruction from guest memory (if it's
|
||||
very brave) and continue executing the guest, or it can decide to suspend,
|
||||
dump, or restart the guest.
|
||||
|
||||
Note that KVM does not skip the faulting instruction as it does for
|
||||
KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
|
||||
if it decides to decode and emulate the instruction.
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
||||
80
Documentation/virt/kvm/arm/pvtime.rst
Normal file
80
Documentation/virt/kvm/arm/pvtime.rst
Normal file
@@ -0,0 +1,80 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Paravirtualized time support for arm64
|
||||
======================================
|
||||
|
||||
Arm specification DEN0057/A defines a standard for paravirtualised time
|
||||
support for AArch64 guests:
|
||||
|
||||
https://developer.arm.com/docs/den0057/a
|
||||
|
||||
KVM/arm64 implements the stolen time part of this specification by providing
|
||||
some hypervisor service calls to support a paravirtualized guest obtaining a
|
||||
view of the amount of time stolen from its execution.
|
||||
|
||||
Two new SMCCC compatible hypercalls are defined:
|
||||
|
||||
* PV_TIME_FEATURES: 0xC5000020
|
||||
* PV_TIME_ST: 0xC5000021
|
||||
|
||||
These are only available in the SMC64/HVC64 calling convention as
|
||||
paravirtualized time is not available to 32 bit Arm guests. The existence of
|
||||
the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES
|
||||
mechanism before calling it.
|
||||
|
||||
PV_TIME_FEATURES
|
||||
============= ======== ==========
|
||||
Function ID: (uint32) 0xC5000020
|
||||
PV_call_id: (uint32) The function to query for support.
|
||||
Currently only PV_TIME_ST is supported.
|
||||
Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant
|
||||
PV-time feature is supported by the hypervisor.
|
||||
============= ======== ==========
|
||||
|
||||
PV_TIME_ST
|
||||
============= ======== ==========
|
||||
Function ID: (uint32) 0xC5000021
|
||||
Return value: (int64) IPA of the stolen time data structure for this
|
||||
VCPU. On failure:
|
||||
NOT_SUPPORTED (-1)
|
||||
============= ======== ==========
|
||||
|
||||
The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory
|
||||
with inner and outer write back caching attributes, in the inner shareable
|
||||
domain. A total of 16 bytes from the IPA returned are guaranteed to be
|
||||
meaningfully filled by the hypervisor (see structure below).
|
||||
|
||||
PV_TIME_ST returns the structure for the calling VCPU.
|
||||
|
||||
Stolen Time
|
||||
-----------
|
||||
|
||||
The structure pointed to by the PV_TIME_ST hypercall is as follows:
|
||||
|
||||
+-------------+-------------+-------------+----------------------------+
|
||||
| Field | Byte Length | Byte Offset | Description |
|
||||
+=============+=============+=============+============================+
|
||||
| Revision | 4 | 0 | Must be 0 for version 1.0 |
|
||||
+-------------+-------------+-------------+----------------------------+
|
||||
| Attributes | 4 | 4 | Must be 0 |
|
||||
+-------------+-------------+-------------+----------------------------+
|
||||
| Stolen time | 8 | 8 | Stolen time in unsigned |
|
||||
| | | | nanoseconds indicating how |
|
||||
| | | | much time this VCPU thread |
|
||||
| | | | was involuntarily not |
|
||||
| | | | running on a physical CPU. |
|
||||
+-------------+-------------+-------------+----------------------------+
|
||||
|
||||
All values in the structure are stored little-endian.
|
||||
|
||||
The structure will be updated by the hypervisor prior to scheduling a VCPU. It
|
||||
will be present within a reserved region of the normal memory given to the
|
||||
guest. The guest should not attempt to write into this memory. There is a
|
||||
structure per VCPU of the guest.
|
||||
|
||||
It is advisable that one or more 64k pages are set aside for the purpose of
|
||||
these structures and not used for other purposes, this enables the guest to map
|
||||
the region using 64k pages and avoids conflicting attributes with other memory.
|
||||
|
||||
For the user space interface see Documentation/virt/kvm/devices/vcpu.txt
|
||||
section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
|
||||
@@ -60,3 +60,17 @@ time to use the number provided for a given timer, overwriting any previously
|
||||
configured values on other VCPUs. Userspace should configure the interrupt
|
||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||
VCPUs.
|
||||
|
||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||
Architectures: ARM64
|
||||
|
||||
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
||||
Parameters: 64-bit base address
|
||||
Returns: -ENXIO: Stolen time not implemented
|
||||
-EEXIST: Base address already set for this VCPU
|
||||
-EINVAL: Base address not 64 byte aligned
|
||||
|
||||
Specifies the base address of the stolen time structure for this VCPU. The
|
||||
base address must be 64 byte aligned and exist within a valid guest memory
|
||||
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
||||
including the layout of the stolen time structure.
|
||||
|
||||
@@ -162,6 +162,7 @@
|
||||
#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT)
|
||||
#define HSR_SRT_SHIFT (16)
|
||||
#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT)
|
||||
#define HSR_CM (1 << 8)
|
||||
#define HSR_FSC (0x3f)
|
||||
#define HSR_FSC_TYPE (0x3c)
|
||||
#define HSR_SSE (1 << 21)
|
||||
|
||||
@@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
|
||||
return (unsigned long *)&vcpu->arch.hcr;
|
||||
}
|
||||
|
||||
static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr &= ~HCR_TWE;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr |= HCR_TWE;
|
||||
}
|
||||
@@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
|
||||
return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#ifndef __ARM_KVM_HOST_H__
|
||||
#define __ARM_KVM_HOST_H__
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm_types.h>
|
||||
@@ -38,6 +39,7 @@
|
||||
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
|
||||
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
|
||||
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
||||
|
||||
@@ -76,6 +78,14 @@ struct kvm_arch {
|
||||
|
||||
/* Mandated version of PSCI */
|
||||
u32 psci_version;
|
||||
|
||||
/*
|
||||
* If we encounter a data abort without valid instruction syndrome
|
||||
* information, report this to user space. User space can (and
|
||||
* should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
|
||||
* supported.
|
||||
*/
|
||||
bool return_nisv_io_abort_to_user;
|
||||
};
|
||||
|
||||
#define KVM_NR_MEM_OBJS 40
|
||||
@@ -323,6 +333,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
int kvm_perf_init(void);
|
||||
int kvm_perf_teardown(void);
|
||||
|
||||
static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return SMCCC_RET_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return GPA_INVALID;
|
||||
}
|
||||
|
||||
static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
||||
|
||||
@@ -131,8 +131,9 @@ struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 serror_pending;
|
||||
__u8 serror_has_esr;
|
||||
__u8 ext_dabt_pending;
|
||||
/* Align it to 8 bytes */
|
||||
__u8 pad[6];
|
||||
__u8 pad[5];
|
||||
__u64 serror_esr;
|
||||
} exception;
|
||||
__u32 reserved[12];
|
||||
|
||||
@@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o
|
||||
obj-y += handle_exit.o guest.o emulate.o reset.o
|
||||
obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o
|
||||
obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
|
||||
obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
|
||||
obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o
|
||||
obj-y += $(KVM)/arm/aarch32.o
|
||||
|
||||
obj-y += $(KVM)/arm/vgic/vgic.o
|
||||
|
||||
@@ -21,6 +21,10 @@
|
||||
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
|
||||
|
||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
||||
VCPU_STAT(halt_successful_poll),
|
||||
VCPU_STAT(halt_attempted_poll),
|
||||
VCPU_STAT(halt_poll_invalid),
|
||||
VCPU_STAT(halt_wakeup),
|
||||
VCPU_STAT(hvc_exit_stat),
|
||||
VCPU_STAT(wfe_exit_stat),
|
||||
VCPU_STAT(wfi_exit_stat),
|
||||
@@ -255,6 +259,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
|
||||
|
||||
/*
|
||||
* We never return a pending ext_dabt here because we deliver it to
|
||||
* the virtual CPU directly when setting the event and it's no longer
|
||||
* 'pending' at this point.
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -263,12 +273,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
bool serror_pending = events->exception.serror_pending;
|
||||
bool has_esr = events->exception.serror_has_esr;
|
||||
bool ext_dabt_pending = events->exception.ext_dabt_pending;
|
||||
|
||||
if (serror_pending && has_esr)
|
||||
return -EINVAL;
|
||||
else if (serror_pending)
|
||||
kvm_inject_vabt(vcpu);
|
||||
|
||||
if (ext_dabt_pending)
|
||||
kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_coproc.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
#include <trace/events/kvm.h>
|
||||
|
||||
#include "trace.h"
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/psci.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/cp15.h>
|
||||
@@ -75,26 +74,20 @@ static void cpu_v7_spectre_init(void)
|
||||
case ARM_CPU_PART_CORTEX_A72: {
|
||||
struct arm_smccc_res res;
|
||||
|
||||
if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
|
||||
break;
|
||||
arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
|
||||
if ((int)res.a0 != 0)
|
||||
return;
|
||||
|
||||
switch (psci_ops.conduit) {
|
||||
case PSCI_CONDUIT_HVC:
|
||||
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
|
||||
if ((int)res.a0 != 0)
|
||||
break;
|
||||
switch (arm_smccc_1_1_get_conduit()) {
|
||||
case SMCCC_CONDUIT_HVC:
|
||||
per_cpu(harden_branch_predictor_fn, cpu) =
|
||||
call_hvc_arch_workaround_1;
|
||||
cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
|
||||
spectre_v2_method = "hypervisor";
|
||||
break;
|
||||
|
||||
case PSCI_CONDUIT_SMC:
|
||||
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
|
||||
if ((int)res.a0 != 0)
|
||||
break;
|
||||
case SMCCC_CONDUIT_SMC:
|
||||
per_cpu(harden_branch_predictor_fn, cpu) =
|
||||
call_smc_arch_workaround_1;
|
||||
cpu_do_switch_mm = cpu_v7_smc_switch_mm;
|
||||
|
||||
@@ -61,7 +61,6 @@
|
||||
* RW: 64bit by default, can be overridden for 32bit VMs
|
||||
* TAC: Trap ACTLR
|
||||
* TSC: Trap SMC
|
||||
* TVM: Trap VM ops (until M+C set in SCTLR_EL1)
|
||||
* TSW: Trap cache operations by set/way
|
||||
* TWE: Trap WFE
|
||||
* TWI: Trap WFI
|
||||
@@ -74,7 +73,7 @@
|
||||
* SWIO: Turn set/way invalidates into set/way clean+invalidate
|
||||
*/
|
||||
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
|
||||
HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
|
||||
HCR_BSU_IS | HCR_FB | HCR_TAC | \
|
||||
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
|
||||
HCR_FMO | HCR_IMO)
|
||||
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
|
||||
|
||||
@@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
||||
/* trap error record accesses */
|
||||
vcpu->arch.hcr_el2 |= HCR_TERR;
|
||||
}
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
|
||||
vcpu->arch.hcr_el2 |= HCR_FWB;
|
||||
} else {
|
||||
/*
|
||||
* For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
|
||||
* get set in SCTLR_EL1 such that we can detect when the guest
|
||||
* MMU gets turned on and do the necessary cache maintenance
|
||||
* then.
|
||||
*/
|
||||
vcpu->arch.hcr_el2 |= HCR_TVM;
|
||||
}
|
||||
|
||||
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
|
||||
vcpu->arch.hcr_el2 &= ~HCR_RW;
|
||||
@@ -77,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
|
||||
return (unsigned long *)&vcpu->arch.hcr_el2;
|
||||
}
|
||||
|
||||
static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 &= ~HCR_TWE;
|
||||
if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
|
||||
vcpu->arch.hcr_el2 &= ~HCR_TWI;
|
||||
else
|
||||
vcpu->arch.hcr_el2 |= HCR_TWI;
|
||||
}
|
||||
|
||||
static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 |= HCR_TWE;
|
||||
vcpu->arch.hcr_el2 |= HCR_TWI;
|
||||
}
|
||||
|
||||
static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
|
||||
@@ -258,6 +273,11 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
|
||||
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
|
||||
}
|
||||
|
||||
static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
|
||||
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
|
||||
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
||||
|
||||
@@ -83,6 +84,14 @@ struct kvm_arch {
|
||||
|
||||
/* Mandated version of PSCI */
|
||||
u32 psci_version;
|
||||
|
||||
/*
|
||||
* If we encounter a data abort without valid instruction syndrome
|
||||
* information, report this to user space. User space can (and
|
||||
* should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
|
||||
* supported.
|
||||
*/
|
||||
bool return_nisv_io_abort_to_user;
|
||||
};
|
||||
|
||||
#define KVM_NR_MEM_OBJS 40
|
||||
@@ -338,6 +347,13 @@ struct kvm_vcpu_arch {
|
||||
/* True when deferrable sysregs are loaded on the physical CPU,
|
||||
* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
|
||||
bool sysregs_loaded_on_cpu;
|
||||
|
||||
/* Guest PV state */
|
||||
struct {
|
||||
u64 steal;
|
||||
u64 last_steal;
|
||||
gpa_t base;
|
||||
} steal;
|
||||
};
|
||||
|
||||
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
|
||||
@@ -478,6 +494,27 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
int kvm_perf_init(void);
|
||||
int kvm_perf_teardown(void);
|
||||
|
||||
long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
|
||||
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
|
||||
void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
|
||||
|
||||
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
|
||||
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
|
||||
{
|
||||
vcpu_arch->steal.base = GPA_INVALID;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
|
||||
{
|
||||
return (vcpu_arch->steal.base != GPA_INVALID);
|
||||
}
|
||||
|
||||
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
||||
|
||||
@@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu)
|
||||
{
|
||||
return pv_ops.time.steal_clock(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
int __init pv_time_init(void);
|
||||
|
||||
#else
|
||||
|
||||
#define pv_time_init() do {} while (0)
|
||||
|
||||
#endif // CONFIG_PARAVIRT
|
||||
|
||||
#endif
|
||||
|
||||
17
arch/arm64/include/asm/pvclock-abi.h
Normal file
17
arch/arm64/include/asm/pvclock-abi.h
Normal file
@@ -0,0 +1,17 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (C) 2019 Arm Ltd. */
|
||||
|
||||
#ifndef __ASM_PVCLOCK_ABI_H
|
||||
#define __ASM_PVCLOCK_ABI_H
|
||||
|
||||
/* The below structure is defined in ARM DEN0057A */
|
||||
|
||||
struct pvclock_vcpu_stolen_time {
|
||||
__le32 revision;
|
||||
__le32 attributes;
|
||||
__le64 stolen_time;
|
||||
/* Structure must be 64 byte aligned, pad to that size */
|
||||
u8 padding[48];
|
||||
} __packed;
|
||||
|
||||
#endif
|
||||
@@ -164,8 +164,9 @@ struct kvm_vcpu_events {
|
||||
struct {
|
||||
__u8 serror_pending;
|
||||
__u8 serror_has_esr;
|
||||
__u8 ext_dabt_pending;
|
||||
/* Align it to 8 bytes */
|
||||
__u8 pad[6];
|
||||
__u8 pad[5];
|
||||
__u64 serror_esr;
|
||||
} exception;
|
||||
__u32 reserved[12];
|
||||
@@ -323,6 +324,8 @@ struct kvm_vcpu_events {
|
||||
#define KVM_ARM_VCPU_TIMER_CTRL 1
|
||||
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
|
||||
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
|
||||
#define KVM_ARM_VCPU_PVTIME_CTRL 2
|
||||
#define KVM_ARM_VCPU_PVTIME_IPA 0
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/psci.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/cpu.h>
|
||||
@@ -167,9 +166,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
|
||||
}
|
||||
#endif /* CONFIG_KVM_INDIRECT_VECTORS */
|
||||
|
||||
#include <uapi/linux/psci.h>
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/psci.h>
|
||||
|
||||
static void call_smc_arch_workaround_1(void)
|
||||
{
|
||||
@@ -213,43 +210,31 @@ static int detect_harden_bp_fw(void)
|
||||
struct arm_smccc_res res;
|
||||
u32 midr = read_cpuid_id();
|
||||
|
||||
if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
|
||||
return -1;
|
||||
arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
|
||||
|
||||
switch (psci_ops.conduit) {
|
||||
case PSCI_CONDUIT_HVC:
|
||||
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
|
||||
switch ((int)res.a0) {
|
||||
case 1:
|
||||
/* Firmware says we're just fine */
|
||||
return 0;
|
||||
case 0:
|
||||
cb = call_hvc_arch_workaround_1;
|
||||
/* This is a guest, no need to patch KVM vectors */
|
||||
smccc_start = NULL;
|
||||
smccc_end = NULL;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
switch ((int)res.a0) {
|
||||
case 1:
|
||||
/* Firmware says we're just fine */
|
||||
return 0;
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (arm_smccc_1_1_get_conduit()) {
|
||||
case SMCCC_CONDUIT_HVC:
|
||||
cb = call_hvc_arch_workaround_1;
|
||||
/* This is a guest, no need to patch KVM vectors */
|
||||
smccc_start = NULL;
|
||||
smccc_end = NULL;
|
||||
break;
|
||||
|
||||
case PSCI_CONDUIT_SMC:
|
||||
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
|
||||
switch ((int)res.a0) {
|
||||
case 1:
|
||||
/* Firmware says we're just fine */
|
||||
return 0;
|
||||
case 0:
|
||||
cb = call_smc_arch_workaround_1;
|
||||
smccc_start = __smccc_workaround_1_smc_start;
|
||||
smccc_end = __smccc_workaround_1_smc_end;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
case SMCCC_CONDUIT_SMC:
|
||||
cb = call_smc_arch_workaround_1;
|
||||
smccc_start = __smccc_workaround_1_smc_start;
|
||||
smccc_end = __smccc_workaround_1_smc_end;
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -309,11 +294,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
|
||||
|
||||
BUG_ON(nr_inst != 1);
|
||||
|
||||
switch (psci_ops.conduit) {
|
||||
case PSCI_CONDUIT_HVC:
|
||||
switch (arm_smccc_1_1_get_conduit()) {
|
||||
case SMCCC_CONDUIT_HVC:
|
||||
insn = aarch64_insn_get_hvc_value();
|
||||
break;
|
||||
case PSCI_CONDUIT_SMC:
|
||||
case SMCCC_CONDUIT_SMC:
|
||||
insn = aarch64_insn_get_smc_value();
|
||||
break;
|
||||
default:
|
||||
@@ -339,6 +324,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
|
||||
|
||||
void arm64_set_ssbd_mitigation(bool state)
|
||||
{
|
||||
int conduit;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
|
||||
pr_info_once("SSBD disabled by kernel configuration\n");
|
||||
return;
|
||||
@@ -352,19 +339,10 @@ void arm64_set_ssbd_mitigation(bool state)
|
||||
return;
|
||||
}
|
||||
|
||||
switch (psci_ops.conduit) {
|
||||
case PSCI_CONDUIT_HVC:
|
||||
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
|
||||
break;
|
||||
conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
|
||||
NULL);
|
||||
|
||||
case PSCI_CONDUIT_SMC:
|
||||
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
|
||||
}
|
||||
|
||||
static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
|
||||
@@ -374,6 +352,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
|
||||
bool required = true;
|
||||
s32 val;
|
||||
bool this_cpu_safe = false;
|
||||
int conduit;
|
||||
|
||||
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
|
||||
|
||||
@@ -391,25 +370,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
|
||||
goto out_printmsg;
|
||||
}
|
||||
|
||||
if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
|
||||
ssbd_state = ARM64_SSBD_UNKNOWN;
|
||||
if (!this_cpu_safe)
|
||||
__ssb_safe = false;
|
||||
return false;
|
||||
}
|
||||
conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_2, &res);
|
||||
|
||||
switch (psci_ops.conduit) {
|
||||
case PSCI_CONDUIT_HVC:
|
||||
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_2, &res);
|
||||
break;
|
||||
|
||||
case PSCI_CONDUIT_SMC:
|
||||
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_ARCH_WORKAROUND_2, &res);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (conduit == SMCCC_CONDUIT_NONE) {
|
||||
ssbd_state = ARM64_SSBD_UNKNOWN;
|
||||
if (!this_cpu_safe)
|
||||
__ssb_safe = false;
|
||||
|
||||
@@ -6,13 +6,153 @@
|
||||
* Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "arm-pv: " fmt
|
||||
|
||||
#include <linux/arm-smccc.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/psci.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/pvclock-abi.h>
|
||||
#include <asm/smp_plat.h>
|
||||
|
||||
struct static_key paravirt_steal_enabled;
|
||||
struct static_key paravirt_steal_rq_enabled;
|
||||
|
||||
struct paravirt_patch_template pv_ops;
|
||||
EXPORT_SYMBOL_GPL(pv_ops);
|
||||
|
||||
struct pv_time_stolen_time_region {
|
||||
struct pvclock_vcpu_stolen_time *kaddr;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
|
||||
|
||||
static bool steal_acc = true;
|
||||
static int __init parse_no_stealacc(char *arg)
|
||||
{
|
||||
steal_acc = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("no-steal-acc", parse_no_stealacc);
|
||||
|
||||
/* return stolen time in ns by asking the hypervisor */
|
||||
static u64 pv_steal_clock(int cpu)
|
||||
{
|
||||
struct pv_time_stolen_time_region *reg;
|
||||
|
||||
reg = per_cpu_ptr(&stolen_time_region, cpu);
|
||||
if (!reg->kaddr) {
|
||||
pr_warn_once("stolen time enabled but not configured for cpu %d\n",
|
||||
cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
|
||||
}
|
||||
|
||||
static int stolen_time_dying_cpu(unsigned int cpu)
|
||||
{
|
||||
struct pv_time_stolen_time_region *reg;
|
||||
|
||||
reg = this_cpu_ptr(&stolen_time_region);
|
||||
if (!reg->kaddr)
|
||||
return 0;
|
||||
|
||||
memunmap(reg->kaddr);
|
||||
memset(reg, 0, sizeof(*reg));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_stolen_time_cpu(unsigned int cpu)
|
||||
{
|
||||
struct pv_time_stolen_time_region *reg;
|
||||
struct arm_smccc_res res;
|
||||
|
||||
reg = this_cpu_ptr(&stolen_time_region);
|
||||
|
||||
arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
|
||||
|
||||
if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
|
||||
return -EINVAL;
|
||||
|
||||
reg->kaddr = memremap(res.a0,
|
||||
sizeof(struct pvclock_vcpu_stolen_time),
|
||||
MEMREMAP_WB);
|
||||
|
||||
if (!reg->kaddr) {
|
||||
pr_warn("Failed to map stolen time data structure\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (le32_to_cpu(reg->kaddr->revision) != 0 ||
|
||||
le32_to_cpu(reg->kaddr->attributes) != 0) {
|
||||
pr_warn_once("Unexpected revision or attributes in stolen time data\n");
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pv_time_init_stolen_time(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
|
||||
"hypervisor/arm/pvtime:starting",
|
||||
init_stolen_time_cpu, stolen_time_dying_cpu);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool has_pv_steal_clock(void)
|
||||
{
|
||||
struct arm_smccc_res res;
|
||||
|
||||
/* To detect the presence of PV time support we require SMCCC 1.1+ */
|
||||
if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
|
||||
return false;
|
||||
|
||||
arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
|
||||
ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
|
||||
|
||||
if (res.a0 != SMCCC_RET_SUCCESS)
|
||||
return false;
|
||||
|
||||
arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES,
|
||||
ARM_SMCCC_HV_PV_TIME_ST, &res);
|
||||
|
||||
return (res.a0 == SMCCC_RET_SUCCESS);
|
||||
}
|
||||
|
||||
int __init pv_time_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!has_pv_steal_clock())
|
||||
return 0;
|
||||
|
||||
ret = pv_time_init_stolen_time();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pv_ops.time.steal_clock = pv_steal_clock;
|
||||
|
||||
static_key_slow_inc(¶virt_steal_enabled);
|
||||
if (steal_acc)
|
||||
static_key_slow_inc(¶virt_steal_rq_enabled);
|
||||
|
||||
pr_info("using stolen time PV\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user