You've already forked linux-rockchip
mirror of
https://github.com/armbian/linux-rockchip.git
synced 2026-01-06 11:08:10 -08:00
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - support for SVE and Pointer Authentication in guests - PMU improvements POWER: - support for direct access to the POWER9 XIVE interrupt controller - memory and performance optimizations x86: - support for accessing memory not backed by struct page - fixes and refactoring Generic: - dirty page tracking improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (155 commits) kvm: fix compilation on aarch64 Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU" kvm: x86: Fix L1TF mitigation for shadow MMU KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device KVM: PPC: Book3S HV: XIVE: Fix spelling mistake "acessing" -> "accessing" KVM: PPC: Book3S HV: Make sure to load LPID for radix VCPUs kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete tests: kvm: Add tests for KVM_SET_NESTED_STATE KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state tests: kvm: Add tests for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_CPU_ID tests: kvm: Add tests to .gitignore KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one KVM: Fix the bitmap range to copy during clear dirty KVM: arm64: Fix ptrauth ID register masking logic KVM: x86: use direct accessors for RIP and RSP KVM: VMX: Use accessors for GPRs outside of dedicated caching logic KVM: x86: Omit caching logic for always-available GPRs kvm, x86: Properly check whether a pfn is an MMIO or not ...
This commit is contained in:
85
Documentation/arm64/perf.txt
Normal file
85
Documentation/arm64/perf.txt
Normal file
@@ -0,0 +1,85 @@
|
||||
Perf Event Attributes
|
||||
=====================
|
||||
|
||||
Author: Andrew Murray <andrew.murray@arm.com>
|
||||
Date: 2019-03-06
|
||||
|
||||
exclude_user
|
||||
------------
|
||||
|
||||
This attribute excludes userspace.
|
||||
|
||||
Userspace always runs at EL0 and thus this attribute will exclude EL0.
|
||||
|
||||
|
||||
exclude_kernel
|
||||
--------------
|
||||
|
||||
This attribute excludes the kernel.
|
||||
|
||||
The kernel runs at EL2 with VHE and EL1 without. Guest kernels always run
|
||||
at EL1.
|
||||
|
||||
For the host this attribute will exclude EL1 and additionally EL2 on a VHE
|
||||
system.
|
||||
|
||||
For the guest this attribute will exclude EL1. Please note that EL2 is
|
||||
never counted within a guest.
|
||||
|
||||
|
||||
exclude_hv
|
||||
----------
|
||||
|
||||
This attribute excludes the hypervisor.
|
||||
|
||||
For a VHE host this attribute is ignored as we consider the host kernel to
|
||||
be the hypervisor.
|
||||
|
||||
For a non-VHE host this attribute will exclude EL2 as we consider the
|
||||
hypervisor to be any code that runs at EL2 which is predominantly used for
|
||||
guest/host transitions.
|
||||
|
||||
For the guest this attribute has no effect. Please note that EL2 is
|
||||
never counted within a guest.
|
||||
|
||||
|
||||
exclude_host / exclude_guest
|
||||
----------------------------
|
||||
|
||||
These attributes exclude the KVM host and guest, respectively.
|
||||
|
||||
The KVM host may run at EL0 (userspace), EL1 (non-VHE kernel) and EL2 (VHE
|
||||
kernel or non-VHE hypervisor).
|
||||
|
||||
The KVM guest may run at EL0 (userspace) and EL1 (kernel).
|
||||
|
||||
Due to the overlapping exception levels between host and guests we cannot
|
||||
exclusively rely on the PMU's hardware exception filtering - therefore we
|
||||
must enable/disable counting on the entry and exit to the guest. This is
|
||||
performed differently on VHE and non-VHE systems.
|
||||
|
||||
For non-VHE systems we exclude EL2 for exclude_host - upon entering and
|
||||
exiting the guest we disable/enable the event as appropriate based on the
|
||||
exclude_host and exclude_guest attributes.
|
||||
|
||||
For VHE systems we exclude EL1 for exclude_guest and exclude both EL0,EL2
|
||||
for exclude_host. Upon entering and exiting the guest we modify the event
|
||||
to include/exclude EL0 as appropriate based on the exclude_host and
|
||||
exclude_guest attributes.
|
||||
|
||||
The statements above also apply when these attributes are used within a
|
||||
non-VHE guest however please note that EL2 is never counted within a guest.
|
||||
|
||||
|
||||
Accuracy
|
||||
--------
|
||||
|
||||
On non-VHE hosts we enable/disable counters on the entry/exit of host/guest
|
||||
transition at EL2 - however there is a period of time between
|
||||
enabling/disabling the counters and entering/exiting the guest. We are
|
||||
able to eliminate counters counting host events on the boundaries of guest
|
||||
entry/exit when counting guest events by filtering out EL2 for
|
||||
exclude_host. However when using !exclude_hv there is a small blackout
|
||||
window at the guest entry/exit where host events are not captured.
|
||||
|
||||
On VHE systems there are no blackout windows.
|
||||
@@ -87,7 +87,21 @@ used to get and set the keys for a thread.
|
||||
Virtualization
|
||||
--------------
|
||||
|
||||
Pointer authentication is not currently supported in KVM guests. KVM
|
||||
will mask the feature bits from ID_AA64ISAR1_EL1, and attempted use of
|
||||
the feature will result in an UNDEFINED exception being injected into
|
||||
the guest.
|
||||
Pointer authentication is enabled in KVM guest when each virtual cpu is
|
||||
initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and
|
||||
requesting these two separate cpu features to be enabled. The current KVM
|
||||
guest implementation works by enabling both features together, so both
|
||||
these userspace flags are checked before enabling pointer authentication.
|
||||
The separate userspace flag will allow to have no userspace ABI changes
|
||||
if support is added in the future to allow these two features to be
|
||||
enabled independently of one another.
|
||||
|
||||
As Arm Architecture specifies that Pointer Authentication feature is
|
||||
implemented along with the VHE feature so KVM arm64 ptrauth code relies
|
||||
on VHE mode to be present.
|
||||
|
||||
Additionally, when these vcpu feature flags are not set then KVM will
|
||||
filter out the Pointer Authentication system key registers from
|
||||
KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
|
||||
register. Any attempt to use the Pointer Authentication instructions will
|
||||
result in an UNDEFINED exception being injected into the guest.
|
||||
|
||||
@@ -69,23 +69,6 @@ by and on behalf of the VM's process may not be freed/unaccounted when
|
||||
the VM is shut down.
|
||||
|
||||
|
||||
It is important to note that althought VM ioctls may only be issued from
|
||||
the process that created the VM, a VM's lifecycle is associated with its
|
||||
file descriptor, not its creator (process). In other words, the VM and
|
||||
its resources, *including the associated address space*, are not freed
|
||||
until the last reference to the VM's file descriptor has been released.
|
||||
For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will
|
||||
not be freed until both the parent (original) process and its child have
|
||||
put their references to the VM's file descriptor.
|
||||
|
||||
Because a VM's resources are not freed until the last reference to its
|
||||
file descriptor is released, creating additional references to a VM via
|
||||
via fork(), dup(), etc... without careful consideration is strongly
|
||||
discouraged and may have unwanted side effects, e.g. memory allocated
|
||||
by and on behalf of the VM's process may not be freed/unaccounted when
|
||||
the VM is shut down.
|
||||
|
||||
|
||||
3. Extensions
|
||||
-------------
|
||||
|
||||
@@ -347,7 +330,7 @@ They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||
|
||||
The bits in the dirty bitmap are cleared before the ioctl returns, unless
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
|
||||
see the description of the capability.
|
||||
|
||||
4.9 KVM_SET_MEMORY_ALIAS
|
||||
@@ -1117,9 +1100,8 @@ struct kvm_userspace_memory_region {
|
||||
This ioctl allows the user to create, modify or delete a guest physical
|
||||
memory slot. Bits 0-15 of "slot" specify the slot id and this value
|
||||
should be less than the maximum number of user memory slots supported per
|
||||
VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
|
||||
if this capability is supported by the architecture. Slots may not
|
||||
overlap in guest physical address space.
|
||||
VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS.
|
||||
Slots may not overlap in guest physical address space.
|
||||
|
||||
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
|
||||
specifies the address space which is being modified. They must be
|
||||
@@ -1901,6 +1883,12 @@ Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_one_reg (in)
|
||||
Returns: 0 on success, negative value on failure
|
||||
Errors:
|
||||
ENOENT: no such register
|
||||
EINVAL: invalid register ID, or no such register
|
||||
EPERM: (arm64) register access not allowed before vcpu finalization
|
||||
(These error codes are indicative only: do not rely on a specific error
|
||||
code being returned in a specific situation.)
|
||||
|
||||
struct kvm_one_reg {
|
||||
__u64 id;
|
||||
@@ -1985,6 +1973,7 @@ registers, find a list below:
|
||||
PPC | KVM_REG_PPC_TLB3PS | 32
|
||||
PPC | KVM_REG_PPC_EPTCFG | 32
|
||||
PPC | KVM_REG_PPC_ICP_STATE | 64
|
||||
PPC | KVM_REG_PPC_VP_STATE | 128
|
||||
PPC | KVM_REG_PPC_TB_OFFSET | 64
|
||||
PPC | KVM_REG_PPC_SPMC1 | 32
|
||||
PPC | KVM_REG_PPC_SPMC2 | 32
|
||||
@@ -2137,6 +2126,37 @@ contains elements ranging from 32 to 128 bits. The index is a 32bit
|
||||
value in the kvm_regs structure seen as a 32bit array.
|
||||
0x60x0 0000 0010 <index into the kvm_regs struct:16>
|
||||
|
||||
Specifically:
|
||||
Encoding Register Bits kvm_regs member
|
||||
----------------------------------------------------------------
|
||||
0x6030 0000 0010 0000 X0 64 regs.regs[0]
|
||||
0x6030 0000 0010 0002 X1 64 regs.regs[1]
|
||||
...
|
||||
0x6030 0000 0010 003c X30 64 regs.regs[30]
|
||||
0x6030 0000 0010 003e SP 64 regs.sp
|
||||
0x6030 0000 0010 0040 PC 64 regs.pc
|
||||
0x6030 0000 0010 0042 PSTATE 64 regs.pstate
|
||||
0x6030 0000 0010 0044 SP_EL1 64 sp_el1
|
||||
0x6030 0000 0010 0046 ELR_EL1 64 elr_el1
|
||||
0x6030 0000 0010 0048 SPSR_EL1 64 spsr[KVM_SPSR_EL1] (alias SPSR_SVC)
|
||||
0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT]
|
||||
0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND]
|
||||
0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ]
|
||||
0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ]
|
||||
0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] (*)
|
||||
0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] (*)
|
||||
...
|
||||
0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] (*)
|
||||
0x6020 0000 0010 00d4 FPSR 32 fp_regs.fpsr
|
||||
0x6020 0000 0010 00d5 FPCR 32 fp_regs.fpcr
|
||||
|
||||
(*) These encodings are not accepted for SVE-enabled vcpus. See
|
||||
KVM_ARM_VCPU_INIT.
|
||||
|
||||
The equivalent register content can be accessed via bits [127:0] of
|
||||
the corresponding SVE Zn registers instead for vcpus that have SVE
|
||||
enabled (see below).
|
||||
|
||||
arm64 CCSIDR registers are demultiplexed by CSSELR value:
|
||||
0x6020 0000 0011 00 <csselr:8>
|
||||
|
||||
@@ -2146,6 +2166,64 @@ arm64 system registers have the following id bit patterns:
|
||||
arm64 firmware pseudo-registers have the following bit pattern:
|
||||
0x6030 0000 0014 <regno:16>
|
||||
|
||||
arm64 SVE registers have the following bit patterns:
|
||||
0x6080 0000 0015 00 <n:5> <slice:5> Zn bits[2048*slice + 2047 : 2048*slice]
|
||||
0x6050 0000 0015 04 <n:4> <slice:5> Pn bits[256*slice + 255 : 256*slice]
|
||||
0x6050 0000 0015 060 <slice:5> FFR bits[256*slice + 255 : 256*slice]
|
||||
0x6060 0000 0015 ffff KVM_REG_ARM64_SVE_VLS pseudo-register
|
||||
|
||||
Access to register IDs where 2048 * slice >= 128 * max_vq will fail with
|
||||
ENOENT. max_vq is the vcpu's maximum supported vector length in 128-bit
|
||||
quadwords: see (**) below.
|
||||
|
||||
These registers are only accessible on vcpus for which SVE is enabled.
|
||||
See KVM_ARM_VCPU_INIT for details.
|
||||
|
||||
In addition, except for KVM_REG_ARM64_SVE_VLS, these registers are not
|
||||
accessible until the vcpu's SVE configuration has been finalized
|
||||
using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE). See KVM_ARM_VCPU_INIT
|
||||
and KVM_ARM_VCPU_FINALIZE for more information about this procedure.
|
||||
|
||||
KVM_REG_ARM64_SVE_VLS is a pseudo-register that allows the set of vector
|
||||
lengths supported by the vcpu to be discovered and configured by
|
||||
userspace. When transferred to or from user memory via KVM_GET_ONE_REG
|
||||
or KVM_SET_ONE_REG, the value of this register is of type
|
||||
__u64[KVM_ARM64_SVE_VLS_WORDS], and encodes the set of vector lengths as
|
||||
follows:
|
||||
|
||||
__u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS];
|
||||
|
||||
if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX &&
|
||||
((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >>
|
||||
((vq - KVM_ARM64_SVE_VQ_MIN) % 64)) & 1))
|
||||
/* Vector length vq * 16 bytes supported */
|
||||
else
|
||||
/* Vector length vq * 16 bytes not supported */
|
||||
|
||||
(**) The maximum value vq for which the above condition is true is
|
||||
max_vq. This is the maximum vector length available to the guest on
|
||||
this vcpu, and determines which register slices are visible through
|
||||
this ioctl interface.
|
||||
|
||||
(See Documentation/arm64/sve.txt for an explanation of the "vq"
|
||||
nomenclature.)
|
||||
|
||||
KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
|
||||
KVM_ARM_VCPU_INIT initialises it to the best set of vector lengths that
|
||||
the host supports.
|
||||
|
||||
Userspace may subsequently modify it if desired until the vcpu's SVE
|
||||
configuration is finalized using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).
|
||||
|
||||
Apart from simply removing all vector lengths from the host set that
|
||||
exceed some value, support for arbitrarily chosen sets of vector lengths
|
||||
is hardware-dependent and may not be available. Attempting to configure
|
||||
an invalid set of vector lengths via KVM_SET_ONE_REG will fail with
|
||||
EINVAL.
|
||||
|
||||
After the vcpu's SVE configuration is finalized, further attempts to
|
||||
write this register will fail with EPERM.
|
||||
|
||||
|
||||
MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
|
||||
the register group type:
|
||||
@@ -2198,6 +2276,12 @@ Architectures: all
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_one_reg (in and out)
|
||||
Returns: 0 on success, negative value on failure
|
||||
Errors include:
|
||||
ENOENT: no such register
|
||||
EINVAL: invalid register ID, or no such register
|
||||
EPERM: (arm64) register access not allowed before vcpu finalization
|
||||
(These error codes are indicative only: do not rely on a specific error
|
||||
code being returned in a specific situation.)
|
||||
|
||||
This ioctl allows to receive the value of a single register implemented
|
||||
in a vcpu. The register to read is indicated by the "id" field of the
|
||||
@@ -2690,6 +2774,49 @@ Possible features:
|
||||
- KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
|
||||
Depends on KVM_CAP_ARM_PMU_V3.
|
||||
|
||||
- KVM_ARM_VCPU_PTRAUTH_ADDRESS: Enables Address Pointer authentication
|
||||
for arm64 only.
|
||||
Depends on KVM_CAP_ARM_PTRAUTH_ADDRESS.
|
||||
If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
|
||||
both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
|
||||
KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
|
||||
requested.
|
||||
|
||||
- KVM_ARM_VCPU_PTRAUTH_GENERIC: Enables Generic Pointer authentication
|
||||
for arm64 only.
|
||||
Depends on KVM_CAP_ARM_PTRAUTH_GENERIC.
|
||||
If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
|
||||
both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
|
||||
KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
|
||||
requested.
|
||||
|
||||
- KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only).
|
||||
Depends on KVM_CAP_ARM_SVE.
|
||||
Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
|
||||
|
||||
* After KVM_ARM_VCPU_INIT:
|
||||
|
||||
- KVM_REG_ARM64_SVE_VLS may be read using KVM_GET_ONE_REG: the
|
||||
initial value of this pseudo-register indicates the best set of
|
||||
vector lengths possible for a vcpu on this host.
|
||||
|
||||
* Before KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
|
||||
|
||||
- KVM_RUN and KVM_GET_REG_LIST are not available;
|
||||
|
||||
- KVM_GET_ONE_REG and KVM_SET_ONE_REG cannot be used to access
|
||||
the scalable archietctural SVE registers
|
||||
KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() or
|
||||
KVM_REG_ARM64_SVE_FFR;
|
||||
|
||||
- KVM_REG_ARM64_SVE_VLS may optionally be written using
|
||||
KVM_SET_ONE_REG, to modify the set of vector lengths available
|
||||
for the vcpu.
|
||||
|
||||
* After KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
|
||||
|
||||
- the KVM_REG_ARM64_SVE_VLS pseudo-register is immutable, and can
|
||||
no longer be written using KVM_SET_ONE_REG.
|
||||
|
||||
4.83 KVM_ARM_PREFERRED_TARGET
|
||||
|
||||
@@ -3809,7 +3936,7 @@ to I/O ports.
|
||||
|
||||
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
|
||||
Architectures: x86, arm, arm64, mips
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_dirty_log (in)
|
||||
@@ -3842,10 +3969,10 @@ the address space for which you want to return the dirty bitmap.
|
||||
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||
|
||||
This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
|
||||
is enabled; for more information, see the description of the capability.
|
||||
However, it can always be used as long as KVM_CHECK_EXTENSION confirms
|
||||
that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
|
||||
that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
|
||||
|
||||
4.118 KVM_GET_SUPPORTED_HV_CPUID
|
||||
|
||||
@@ -3904,6 +4031,40 @@ number of valid entries in the 'entries' array, which is then filled.
|
||||
'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
|
||||
userspace should not expect to get any particular value there.
|
||||
|
||||
4.119 KVM_ARM_VCPU_FINALIZE
|
||||
|
||||
Architectures: arm, arm64
|
||||
Type: vcpu ioctl
|
||||
Parameters: int feature (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
Errors:
|
||||
EPERM: feature not enabled, needs configuration, or already finalized
|
||||
EINVAL: feature unknown or not present
|
||||
|
||||
Recognised values for feature:
|
||||
arm64 KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE)
|
||||
|
||||
Finalizes the configuration of the specified vcpu feature.
|
||||
|
||||
The vcpu must already have been initialised, enabling the affected feature, by
|
||||
means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set in
|
||||
features[].
|
||||
|
||||
For affected vcpu features, this is a mandatory step that must be performed
|
||||
before the vcpu is fully usable.
|
||||
|
||||
Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be
|
||||
configured by use of ioctls such as KVM_SET_ONE_REG. The exact configuration
|
||||
that should be performaned and how to do it are feature-dependent.
|
||||
|
||||
Other calls that depend on a particular feature being finalized, such as
|
||||
KVM_RUN, KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with
|
||||
-EPERM unless the feature has already been finalized by means of a
|
||||
KVM_ARM_VCPU_FINALIZE call.
|
||||
|
||||
See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
|
||||
using this ioctl.
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
||||
@@ -4505,6 +4666,15 @@ struct kvm_sync_regs {
|
||||
struct kvm_vcpu_events events;
|
||||
};
|
||||
|
||||
6.75 KVM_CAP_PPC_IRQ_XIVE
|
||||
|
||||
Architectures: ppc
|
||||
Target: vcpu
|
||||
Parameters: args[0] is the XIVE device fd
|
||||
args[1] is the XIVE CPU number (server ID) for this vcpu
|
||||
|
||||
This capability connects the vcpu to an in-kernel XIVE device.
|
||||
|
||||
7. Capabilities that can be enabled on VMs
|
||||
------------------------------------------
|
||||
|
||||
@@ -4798,7 +4968,7 @@ and injected exceptions.
|
||||
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
|
||||
will clear DR6.RTM.
|
||||
|
||||
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
|
||||
|
||||
Architectures: x86, arm, arm64, mips
|
||||
Parameters: args[0] whether feature should be enabled or not
|
||||
@@ -4821,6 +4991,11 @@ while userspace can see false reports of dirty pages. Manual reprotection
|
||||
helps reducing this time, improving guest performance and reducing the
|
||||
number of dirty log false positives.
|
||||
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
|
||||
it hard or impossible to use it correctly. The availability of
|
||||
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed.
|
||||
Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT.
|
||||
|
||||
8. Other capabilities.
|
||||
----------------------
|
||||
|
||||
@@ -141,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc {
|
||||
u8 pcc[16]; # valid with Message-Security-Assist-Extension 4
|
||||
u8 ppno[16]; # valid with Message-Security-Assist-Extension 5
|
||||
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
|
||||
u8 reserved[1808]; # reserved for future instructions
|
||||
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
|
||||
u8 reserved[1792]; # reserved for future instructions
|
||||
};
|
||||
|
||||
Parameters: address of a buffer to load the subfunction blocks from.
|
||||
|
||||
197
Documentation/virtual/kvm/devices/xive.txt
Normal file
197
Documentation/virtual/kvm/devices/xive.txt
Normal file
@@ -0,0 +1,197 @@
|
||||
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
|
||||
==========================================================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||
|
||||
This device acts as a VM interrupt controller. It provides the KVM
|
||||
interface to configure the interrupt sources of a VM in the underlying
|
||||
POWER9 XIVE interrupt controller.
|
||||
|
||||
Only one XIVE instance may be instantiated. A guest XIVE device
|
||||
requires a POWER9 host and the guest OS should have support for the
|
||||
XIVE native exploitation interrupt mode. If not, it should run using
|
||||
the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
|
||||
* Device Mappings
|
||||
|
||||
The KVM device exposes different MMIO ranges of the XIVE HW which
|
||||
are required for interrupt management. These are exposed to the
|
||||
guest in VMAs populated with a custom VM fault handler.
|
||||
|
||||
1. Thread Interrupt Management Area (TIMA)
|
||||
|
||||
Each thread has an associated Thread Interrupt Management context
|
||||
composed of a set of registers. These registers let the thread
|
||||
handle priority management and interrupt acknowledgment. The most
|
||||
important are :
|
||||
|
||||
- Interrupt Pending Buffer (IPB)
|
||||
- Current Processor Priority (CPPR)
|
||||
- Notification Source Register (NSR)
|
||||
|
||||
They are exposed to software in four different pages each proposing
|
||||
a view with a different privilege. The first page is for the
|
||||
physical thread context and the second for the hypervisor. Only the
|
||||
third (operating system) and the fourth (user level) are exposed the
|
||||
guest.
|
||||
|
||||
2. Event State Buffer (ESB)
|
||||
|
||||
Each source is associated with an Event State Buffer (ESB) with
|
||||
either a pair of even/odd pair of pages which provides commands to
|
||||
manage the source: to trigger, to EOI, to turn off the source for
|
||||
instance.
|
||||
|
||||
3. Device pass-through
|
||||
|
||||
When a device is passed-through into the guest, the source
|
||||
interrupts are from a different HW controller (PHB4) and the ESB
|
||||
pages exposed to the guest should accommadate this change.
|
||||
|
||||
The passthru_irq helpers, kvmppc_xive_set_mapped() and
|
||||
kvmppc_xive_clr_mapped() are called when the device HW irqs are
|
||||
mapped into or unmapped from the guest IRQ number space. The KVM
|
||||
device extends these helpers to clear the ESB pages of the guest IRQ
|
||||
number being mapped and then lets the VM fault handler repopulate.
|
||||
The handler will insert the ESB page corresponding to the HW
|
||||
interrupt of the device being passed-through or the initial IPI ESB
|
||||
page if the device has being removed.
|
||||
|
||||
The ESB remapping is fully transparent to the guest and the OS
|
||||
device driver. All handling is done within VFIO and the above
|
||||
helpers in KVM-PPC.
|
||||
|
||||
* Groups:
|
||||
|
||||
1. KVM_DEV_XIVE_GRP_CTRL
|
||||
Provides global controls on the device
|
||||
Attributes:
|
||||
1.1 KVM_DEV_XIVE_RESET (write only)
|
||||
Resets the interrupt controller configuration for sources and event
|
||||
queues. To be used by kexec and kdump.
|
||||
Errors: none
|
||||
|
||||
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
|
||||
Sync all the sources and queues and mark the EQ pages dirty. This
|
||||
to make sure that a consistent memory state is captured when
|
||||
migrating the VM.
|
||||
Errors: none
|
||||
|
||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||
Initializes a new source in the XIVE device and mask it.
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 2 | 1 | 0
|
||||
values: | unused | level | type
|
||||
- type: 0:MSI 1:LSI
|
||||
- level: assertion level in case of an LSI.
|
||||
Errors:
|
||||
-E2BIG: Interrupt source number is out of range
|
||||
-ENOMEM: Could not create a new source block
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: Could not allocate underlying HW interrupt
|
||||
|
||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||
Configures source targeting
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||
values: | eisn | mask | server | priority
|
||||
- priority: 0-7 interrupt priority level
|
||||
- server: CPU number chosen to handle the interrupt
|
||||
- mask: mask flag (unused)
|
||||
- eisn: Effective Interrupt Source Number
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid CPU number.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: CPU event queues not configured or configuration of the
|
||||
underlying HW interrupt failed
|
||||
-EBUSY: No CPU available to serve interrupt
|
||||
|
||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||
Configures an event queue of a CPU
|
||||
Attributes:
|
||||
EQ descriptor identifier (64-bit)
|
||||
The EQ descriptor identifier is a tuple (server, priority) :
|
||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||
values: | unused | server | priority
|
||||
The kvm_device_attr.addr points to :
|
||||
struct kvm_ppc_xive_eq {
|
||||
__u32 flags;
|
||||
__u32 qshift;
|
||||
__u64 qaddr;
|
||||
__u32 qtoggle;
|
||||
__u32 qindex;
|
||||
__u8 pad[40];
|
||||
};
|
||||
- flags: queue flags
|
||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||
forces notification without using the coalescing mechanism
|
||||
provided by the XIVE END ESBs.
|
||||
- qshift: queue size (power of 2)
|
||||
- qaddr: real address of queue
|
||||
- qtoggle: current queue toggle bit
|
||||
- qindex: current queue index
|
||||
- pad: reserved for future use
|
||||
Errors:
|
||||
-ENOENT: Invalid CPU number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid flags
|
||||
-EINVAL: Invalid queue size
|
||||
-EINVAL: Invalid queue address
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EIO: Configuration of the underlying HW failed
|
||||
|
||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||
Synchronize the source to flush event notifications
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
|
||||
* VCPU state
|
||||
|
||||
The XIVE IC maintains VP interrupt state in an internal structure
|
||||
called the NVT. When a VP is not dispatched on a HW processor
|
||||
thread, this structure can be updated by HW if the VP is the target
|
||||
of an event notification.
|
||||
|
||||
It is important for migration to capture the cached IPB from the NVT
|
||||
as it synthesizes the priorities of the pending interrupts. We
|
||||
capture a bit more to report debug information.
|
||||
|
||||
KVM_REG_PPC_VP_STATE (2 * 64bits)
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | TIMA word0 | TIMA word1 |
|
||||
bits: | 127 .......... 64 |
|
||||
values: | unused |
|
||||
|
||||
* Migration:
|
||||
|
||||
Saving the state of a VM using the XIVE native exploitation mode
|
||||
should follow a specific sequence. When the VM is stopped :
|
||||
|
||||
1. Mask all sources (PQ=01) to stop the flow of events.
|
||||
|
||||
2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
|
||||
flush any in-flight event notification and to stabilize the EQs. At
|
||||
this stage, the EQ pages are marked dirty to make sure they are
|
||||
transferred in the migration sequence.
|
||||
|
||||
3. Capture the state of the source targeting, the EQs configuration
|
||||
and the state of thread interrupt context registers.
|
||||
|
||||
Restore is similar :
|
||||
|
||||
1. Restore the EQ configuration. As targeting depends on it.
|
||||
2. Restore targeting
|
||||
3. Restore the thread interrupt contexts
|
||||
4. Restore the source states
|
||||
5. Let the vCPU run
|
||||
@@ -343,4 +343,6 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#endif /* __ARM_KVM_EMULATE_H__ */
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#ifndef __ARM_KVM_HOST_H__
|
||||
#define __ARM_KVM_HOST_H__
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <asm/cputype.h>
|
||||
@@ -53,6 +54,8 @@
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
||||
|
||||
static inline int kvm_arm_init_sve(void) { return 0; }
|
||||
|
||||
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
|
||||
int __attribute_const__ kvm_target_cpu(void);
|
||||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
|
||||
@@ -150,9 +153,13 @@ struct kvm_cpu_context {
|
||||
u32 cp15[NR_CP15_REGS];
|
||||
};
|
||||
|
||||
typedef struct kvm_cpu_context kvm_cpu_context_t;
|
||||
struct kvm_host_data {
|
||||
struct kvm_cpu_context host_ctxt;
|
||||
};
|
||||
|
||||
static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt,
|
||||
typedef struct kvm_host_data kvm_host_data_t;
|
||||
|
||||
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
|
||||
int cpu)
|
||||
{
|
||||
/* The host's MPIDR is immutable, so let's set it up at boot time */
|
||||
@@ -182,7 +189,7 @@ struct kvm_vcpu_arch {
|
||||
struct kvm_vcpu_fault_info fault;
|
||||
|
||||
/* Host FP context */
|
||||
kvm_cpu_context_t *host_cpu_context;
|
||||
struct kvm_cpu_context *host_cpu_context;
|
||||
|
||||
/* VGIC state */
|
||||
struct vgic_cpu vgic_cpu;
|
||||
@@ -361,6 +368,9 @@ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
static inline void kvm_arm_vhe_guest_enter(void) {}
|
||||
static inline void kvm_arm_vhe_guest_exit(void) {}
|
||||
|
||||
@@ -409,4 +419,14 @@ static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* __ARM_KVM_HOST_H__ */
|
||||
|
||||
@@ -1341,6 +1341,7 @@ menu "ARMv8.3 architectural features"
|
||||
config ARM64_PTR_AUTH
|
||||
bool "Enable support for pointer authentication"
|
||||
default y
|
||||
depends on !KVM || ARM64_VHE
|
||||
help
|
||||
Pointer authentication (part of the ARMv8.3 Extensions) provides
|
||||
instructions for signing and authenticating pointers against secret
|
||||
@@ -1354,8 +1355,9 @@ config ARM64_PTR_AUTH
|
||||
context-switched along with the process.
|
||||
|
||||
The feature is detected at runtime. If the feature is not present in
|
||||
hardware it will not be advertised to userspace nor will it be
|
||||
enabled.
|
||||
hardware it will not be advertised to userspace/KVM guest nor will it
|
||||
be enabled. However, KVM guest also require VHE mode and hence
|
||||
CONFIG_ARM64_VHE=y option to use this feature.
|
||||
|
||||
endmenu
|
||||
|
||||
|
||||
@@ -24,10 +24,13 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
||||
/* Masks for extracting the FPSR and FPCR from the FPSCR */
|
||||
@@ -56,7 +59,8 @@ extern void fpsimd_restore_current_state(void);
|
||||
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
|
||||
|
||||
extern void fpsimd_bind_task_to_cpu(void);
|
||||
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
|
||||
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
|
||||
void *sve_state, unsigned int sve_vl);
|
||||
|
||||
extern void fpsimd_flush_task_state(struct task_struct *target);
|
||||
extern void fpsimd_flush_cpu_state(void);
|
||||
@@ -87,6 +91,29 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
|
||||
extern u64 read_zcr_features(void);
|
||||
|
||||
extern int __ro_after_init sve_max_vl;
|
||||
extern int __ro_after_init sve_max_virtualisable_vl;
|
||||
extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
|
||||
|
||||
/*
|
||||
* Helpers to translate bit indices in sve_vq_map to VQ values (and
|
||||
* vice versa). This allows find_next_bit() to be used to find the
|
||||
* _maximum_ VQ not exceeding a certain value.
|
||||
*/
|
||||
static inline unsigned int __vq_to_bit(unsigned int vq)
|
||||
{
|
||||
return SVE_VQ_MAX - vq;
|
||||
}
|
||||
|
||||
static inline unsigned int __bit_to_vq(unsigned int bit)
|
||||
{
|
||||
return SVE_VQ_MAX - bit;
|
||||
}
|
||||
|
||||
/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
|
||||
static inline bool sve_vq_available(unsigned int vq)
|
||||
{
|
||||
return test_bit(__vq_to_bit(vq), sve_vq_map);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_SVE
|
||||
|
||||
|
||||
@@ -108,7 +108,8 @@ extern u32 __kvm_get_mdcr_el2(void);
|
||||
.endm
|
||||
|
||||
.macro get_host_ctxt reg, tmp
|
||||
hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
|
||||
hyp_adr_this_cpu \reg, kvm_host_data, \tmp
|
||||
add \reg, \reg, #HOST_DATA_CONTEXT
|
||||
.endm
|
||||
|
||||
.macro get_vcpu_ptr vcpu, ctxt
|
||||
|
||||
@@ -98,6 +98,22 @@ static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.hcr_el2 |= HCR_TWE;
|
||||
}
|
||||
|
||||
static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
|
||||
}
|
||||
|
||||
static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
|
||||
}
|
||||
|
||||
static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu_has_ptrauth(vcpu))
|
||||
vcpu_ptrauth_disable(vcpu);
|
||||
}
|
||||
|
||||
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.vsesr_el2;
|
||||
|
||||
@@ -22,9 +22,13 @@
|
||||
#ifndef __ARM64_KVM_HOST_H__
|
||||
#define __ARM64_KVM_HOST_H__
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <asm/arch_gicv3.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/daifflags.h>
|
||||
#include <asm/fpsimd.h>
|
||||
@@ -45,7 +49,7 @@
|
||||
|
||||
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
|
||||
|
||||
#define KVM_VCPU_MAX_FEATURES 4
|
||||
#define KVM_VCPU_MAX_FEATURES 7
|
||||
|
||||
#define KVM_REQ_SLEEP \
|
||||
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
@@ -54,8 +58,12 @@
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
|
||||
|
||||
extern unsigned int kvm_sve_max_vl;
|
||||
int kvm_arm_init_sve(void);
|
||||
|
||||
int __attribute_const__ kvm_target_cpu(void);
|
||||
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
|
||||
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
|
||||
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
|
||||
|
||||
@@ -117,6 +125,7 @@ enum vcpu_sysreg {
|
||||
SCTLR_EL1, /* System Control Register */
|
||||
ACTLR_EL1, /* Auxiliary Control Register */
|
||||
CPACR_EL1, /* Coprocessor Access Control */
|
||||
ZCR_EL1, /* SVE Control */
|
||||
TTBR0_EL1, /* Translation Table Base Register 0 */
|
||||
TTBR1_EL1, /* Translation Table Base Register 1 */
|
||||
TCR_EL1, /* Translation Control Register */
|
||||
@@ -152,6 +161,18 @@ enum vcpu_sysreg {
|
||||
PMSWINC_EL0, /* Software Increment Register */
|
||||
PMUSERENR_EL0, /* User Enable Register */
|
||||
|
||||
/* Pointer Authentication Registers in a strict increasing order. */
|
||||
APIAKEYLO_EL1,
|
||||
APIAKEYHI_EL1,
|
||||
APIBKEYLO_EL1,
|
||||
APIBKEYHI_EL1,
|
||||
APDAKEYLO_EL1,
|
||||
APDAKEYHI_EL1,
|
||||
APDBKEYLO_EL1,
|
||||
APDBKEYHI_EL1,
|
||||
APGAKEYLO_EL1,
|
||||
APGAKEYHI_EL1,
|
||||
|
||||
/* 32bit specific registers. Keep them at the end of the range */
|
||||
DACR32_EL2, /* Domain Access Control Register */
|
||||
IFSR32_EL2, /* Instruction Fault Status Register */
|
||||
@@ -212,7 +233,17 @@ struct kvm_cpu_context {
|
||||
struct kvm_vcpu *__hyp_running_vcpu;
|
||||
};
|
||||
|
||||
typedef struct kvm_cpu_context kvm_cpu_context_t;
|
||||
struct kvm_pmu_events {
|
||||
u32 events_host;
|
||||
u32 events_guest;
|
||||
};
|
||||
|
||||
struct kvm_host_data {
|
||||
struct kvm_cpu_context host_ctxt;
|
||||
struct kvm_pmu_events pmu_events;
|
||||
};
|
||||
|
||||
typedef struct kvm_host_data kvm_host_data_t;
|
||||
|
||||
struct vcpu_reset_state {
|
||||
unsigned long pc;
|
||||
@@ -223,6 +254,8 @@ struct vcpu_reset_state {
|
||||
|
||||
struct kvm_vcpu_arch {
|
||||
struct kvm_cpu_context ctxt;
|
||||
void *sve_state;
|
||||
unsigned int sve_max_vl;
|
||||
|
||||
/* HYP configuration */
|
||||
u64 hcr_el2;
|
||||
@@ -255,7 +288,7 @@ struct kvm_vcpu_arch {
|
||||
struct kvm_guest_debug_arch external_debug_state;
|
||||
|
||||
/* Pointer to host CPU context */
|
||||
kvm_cpu_context_t *host_cpu_context;
|
||||
struct kvm_cpu_context *host_cpu_context;
|
||||
|
||||
struct thread_info *host_thread_info; /* hyp VA */
|
||||
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
|
||||
@@ -318,12 +351,40 @@ struct kvm_vcpu_arch {
|
||||
bool sysregs_loaded_on_cpu;
|
||||
};
|
||||
|
||||
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
|
||||
#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
|
||||
sve_ffr_offset((vcpu)->arch.sve_max_vl)))
|
||||
|
||||
#define vcpu_sve_state_size(vcpu) ({ \
|
||||
size_t __size_ret; \
|
||||
unsigned int __vcpu_vq; \
|
||||
\
|
||||
if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
|
||||
__size_ret = 0; \
|
||||
} else { \
|
||||
__vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \
|
||||
__size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
|
||||
} \
|
||||
\
|
||||
__size_ret; \
|
||||
})
|
||||
|
||||
/* vcpu_arch flags field values: */
|
||||
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
|
||||
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
|
||||
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
|
||||
#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
|
||||
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
|
||||
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
|
||||
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
|
||||
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
|
||||
|
||||
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
|
||||
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
|
||||
|
||||
#define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \
|
||||
system_supports_generic_auth()) && \
|
||||
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH))
|
||||
|
||||
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
|
||||
|
||||
@@ -432,9 +493,9 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
|
||||
|
||||
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
|
||||
|
||||
DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
|
||||
DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
|
||||
|
||||
static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt,
|
||||
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
|
||||
int cpu)
|
||||
{
|
||||
/* The host's MPIDR is immutable, so let's set it up at boot time */
|
||||
@@ -452,8 +513,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
|
||||
* kernel's mapping to the linear mapping, and store it in tpidr_el2
|
||||
* so that we can use adr_l to access per-cpu variables in EL2.
|
||||
*/
|
||||
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
|
||||
(u64)kvm_ksym_ref(kvm_host_cpu_state));
|
||||
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
|
||||
(u64)kvm_ksym_ref(kvm_host_data));
|
||||
|
||||
/*
|
||||
* Call initialization code, and switch to the full blown HYP code.
|
||||
@@ -491,9 +552,10 @@ static inline bool kvm_arch_requires_vhe(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
||||
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
||||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
@@ -516,11 +578,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
|
||||
{
|
||||
return (!has_vhe() && attr->exclude_host);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
|
||||
static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return kvm_arch_vcpu_run_map_fp(vcpu);
|
||||
}
|
||||
|
||||
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
|
||||
void kvm_clr_pmu_events(u32 clr);
|
||||
|
||||
void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt);
|
||||
bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
|
||||
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
|
||||
#else
|
||||
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
|
||||
static inline void kvm_clr_pmu_events(u32 clr) {}
|
||||
#endif
|
||||
|
||||
static inline void kvm_arm_vhe_guest_enter(void)
|
||||
@@ -594,4 +673,10 @@ void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
|
||||
|
||||
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
|
||||
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define kvm_arm_vcpu_sve_finalized(vcpu) \
|
||||
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
|
||||
|
||||
#endif /* __ARM64_KVM_HOST_H__ */
|
||||
|
||||
@@ -149,7 +149,6 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu);
|
||||
|
||||
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
|
||||
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
|
||||
bool __fpsimd_enabled(void);
|
||||
|
||||
void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
|
||||
void deactivate_traps_vhe_put(void);
|
||||
|
||||
111
arch/arm64/include/asm/kvm_ptrauth.h
Normal file
111
arch/arm64/include/asm/kvm_ptrauth.h
Normal file
@@ -0,0 +1,111 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* arch/arm64/include/asm/kvm_ptrauth.h: Guest/host ptrauth save/restore
|
||||
* Copyright 2019 Arm Limited
|
||||
* Authors: Mark Rutland <mark.rutland@arm.com>
|
||||
* Amit Daniel Kachhap <amit.kachhap@arm.com>
|
||||
*/
|
||||
|
||||
#ifndef __ASM_KVM_PTRAUTH_H
|
||||
#define __ASM_KVM_PTRAUTH_H
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
|
||||
#define PTRAUTH_REG_OFFSET(x) (x - CPU_APIAKEYLO_EL1)
|
||||
|
||||
/*
|
||||
* CPU_AP*_EL1 values exceed immediate offset range (512) for stp
|
||||
* instruction so below macros takes CPU_APIAKEYLO_EL1 as base and
|
||||
* calculates the offset of the keys from this base to avoid an extra add
|
||||
* instruction. These macros assumes the keys offsets follow the order of
|
||||
* the sysreg enum in kvm_host.h.
|
||||
*/
|
||||
.macro ptrauth_save_state base, reg1, reg2
|
||||
mrs_s \reg1, SYS_APIAKEYLO_EL1
|
||||
mrs_s \reg2, SYS_APIAKEYHI_EL1
|
||||
stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)]
|
||||
mrs_s \reg1, SYS_APIBKEYLO_EL1
|
||||
mrs_s \reg2, SYS_APIBKEYHI_EL1
|
||||
stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)]
|
||||
mrs_s \reg1, SYS_APDAKEYLO_EL1
|
||||
mrs_s \reg2, SYS_APDAKEYHI_EL1
|
||||
stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)]
|
||||
mrs_s \reg1, SYS_APDBKEYLO_EL1
|
||||
mrs_s \reg2, SYS_APDBKEYHI_EL1
|
||||
stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)]
|
||||
mrs_s \reg1, SYS_APGAKEYLO_EL1
|
||||
mrs_s \reg2, SYS_APGAKEYHI_EL1
|
||||
stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)]
|
||||
.endm
|
||||
|
||||
.macro ptrauth_restore_state base, reg1, reg2
|
||||
ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)]
|
||||
msr_s SYS_APIAKEYLO_EL1, \reg1
|
||||
msr_s SYS_APIAKEYHI_EL1, \reg2
|
||||
ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)]
|
||||
msr_s SYS_APIBKEYLO_EL1, \reg1
|
||||
msr_s SYS_APIBKEYHI_EL1, \reg2
|
||||
ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)]
|
||||
msr_s SYS_APDAKEYLO_EL1, \reg1
|
||||
msr_s SYS_APDAKEYHI_EL1, \reg2
|
||||
ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)]
|
||||
msr_s SYS_APDBKEYLO_EL1, \reg1
|
||||
msr_s SYS_APDBKEYHI_EL1, \reg2
|
||||
ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)]
|
||||
msr_s SYS_APGAKEYLO_EL1, \reg1
|
||||
msr_s SYS_APGAKEYHI_EL1, \reg2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will
|
||||
* check for the presence of one of the cpufeature flag
|
||||
* ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and
|
||||
* then proceed ahead with the save/restore of Pointer Authentication
|
||||
* key registers.
|
||||
*/
|
||||
.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
|
||||
alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
|
||||
b 1000f
|
||||
alternative_else_nop_endif
|
||||
alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
|
||||
b 1001f
|
||||
alternative_else_nop_endif
|
||||
1000:
|
||||
ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
|
||||
and \reg1, \reg1, #(HCR_API | HCR_APK)
|
||||
cbz \reg1, 1001f
|
||||
add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
|
||||
ptrauth_restore_state \reg1, \reg2, \reg3
|
||||
1001:
|
||||
.endm
|
||||
|
||||
.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
|
||||
alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
|
||||
b 2000f
|
||||
alternative_else_nop_endif
|
||||
alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
|
||||
b 2001f
|
||||
alternative_else_nop_endif
|
||||
2000:
|
||||
ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
|
||||
and \reg1, \reg1, #(HCR_API | HCR_APK)
|
||||
cbz \reg1, 2001f
|
||||
add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
|
||||
ptrauth_save_state \reg1, \reg2, \reg3
|
||||
add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1
|
||||
ptrauth_restore_state \reg1, \reg2, \reg3
|
||||
isb
|
||||
2001:
|
||||
.endm
|
||||
|
||||
#else /* !CONFIG_ARM64_PTR_AUTH */
|
||||
.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
|
||||
.endm
|
||||
.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
|
||||
.endm
|
||||
#endif /* CONFIG_ARM64_PTR_AUTH */
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __ASM_KVM_PTRAUTH_H */
|
||||
@@ -454,6 +454,9 @@
|
||||
#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
|
||||
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
|
||||
|
||||
/* VHE encodings for architectural EL0/1 system registers */
|
||||
#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
|
||||
|
||||
/* Common SCTLR_ELx flags. */
|
||||
#define SCTLR_ELx_DSSBS (_BITUL(44))
|
||||
#define SCTLR_ELx_ENIA (_BITUL(31))
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include <linux/psci.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/sve_context.h>
|
||||
|
||||
#define __KVM_HAVE_GUEST_DEBUG
|
||||
#define __KVM_HAVE_IRQ_LINE
|
||||
@@ -102,6 +103,9 @@ struct kvm_regs {
|
||||
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
|
||||
#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
|
||||
#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
|
||||
#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
|
||||
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
|
||||
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
|
||||
|
||||
struct kvm_vcpu_init {
|
||||
__u32 target;
|
||||
@@ -226,6 +230,45 @@ struct kvm_vcpu_events {
|
||||
KVM_REG_ARM_FW | ((r) & 0xffff))
|
||||
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
|
||||
|
||||
/* SVE registers */
|
||||
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
|
||||
|
||||
/* Z- and P-regs occupy blocks at the following offsets within this range: */
|
||||
#define KVM_REG_ARM64_SVE_ZREG_BASE 0
|
||||
#define KVM_REG_ARM64_SVE_PREG_BASE 0x400
|
||||
#define KVM_REG_ARM64_SVE_FFR_BASE 0x600
|
||||
|
||||
#define KVM_ARM64_SVE_NUM_ZREGS __SVE_NUM_ZREGS
|
||||
#define KVM_ARM64_SVE_NUM_PREGS __SVE_NUM_PREGS
|
||||
|
||||
#define KVM_ARM64_SVE_MAX_SLICES 32
|
||||
|
||||
#define KVM_REG_ARM64_SVE_ZREG(n, i) \
|
||||
(KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_ZREG_BASE | \
|
||||
KVM_REG_SIZE_U2048 | \
|
||||
(((n) & (KVM_ARM64_SVE_NUM_ZREGS - 1)) << 5) | \
|
||||
((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
|
||||
|
||||
#define KVM_REG_ARM64_SVE_PREG(n, i) \
|
||||
(KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_PREG_BASE | \
|
||||
KVM_REG_SIZE_U256 | \
|
||||
(((n) & (KVM_ARM64_SVE_NUM_PREGS - 1)) << 5) | \
|
||||
((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
|
||||
|
||||
#define KVM_REG_ARM64_SVE_FFR(i) \
|
||||
(KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_FFR_BASE | \
|
||||
KVM_REG_SIZE_U256 | \
|
||||
((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
|
||||
|
||||
#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN
|
||||
#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX
|
||||
|
||||
/* Vector lengths pseudo-register: */
|
||||
#define KVM_REG_ARM64_SVE_VLS (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \
|
||||
KVM_REG_SIZE_U512 | 0xffff)
|
||||
#define KVM_ARM64_SVE_VLS_WORDS \
|
||||
((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1)
|
||||
|
||||
/* Device Control API: ARM VGIC */
|
||||
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
|
||||
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
|
||||
|
||||
@@ -125,9 +125,16 @@ int main(void)
|
||||
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
|
||||
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
|
||||
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
|
||||
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
|
||||
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
|
||||
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
|
||||
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
|
||||
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
|
||||
DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1]));
|
||||
DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
|
||||
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
|
||||
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
|
||||
DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
|
||||
#endif
|
||||
#ifdef CONFIG_CPU_PM
|
||||
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
|
||||
|
||||
@@ -1913,7 +1913,7 @@ static void verify_sve_features(void)
|
||||
unsigned int len = zcr & ZCR_ELx_LEN_MASK;
|
||||
|
||||
if (len < safe_len || sve_verify_vq_map()) {
|
||||
pr_crit("CPU%d: SVE: required vector length(s) missing\n",
|
||||
pr_crit("CPU%d: SVE: vector length support mismatch\n",
|
||||
smp_processor_id());
|
||||
cpu_die_early();
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bottom_half.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cache.h>
|
||||
@@ -48,6 +49,7 @@
|
||||
#include <asm/sigcontext.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/virt.h>
|
||||
|
||||
#define FPEXC_IOF (1 << 0)
|
||||
#define FPEXC_DZF (1 << 1)
|
||||
@@ -119,6 +121,8 @@
|
||||
*/
|
||||
struct fpsimd_last_state_struct {
|
||||
struct user_fpsimd_state *st;
|
||||
void *sve_state;
|
||||
unsigned int sve_vl;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
|
||||
@@ -130,14 +134,23 @@ static int sve_default_vl = -1;
|
||||
|
||||
/* Maximum supported vector length across all CPUs (initially poisoned) */
|
||||
int __ro_after_init sve_max_vl = SVE_VL_MIN;
|
||||
/* Set of available vector lengths, as vq_to_bit(vq): */
|
||||
static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
|
||||
int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
|
||||
|
||||
/*
|
||||
* Set of available vector lengths,
|
||||
* where length vq encoded as bit __vq_to_bit(vq):
|
||||
*/
|
||||
__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
|
||||
/* Set of vector lengths present on at least one cpu: */
|
||||
static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
|
||||
|
||||
static void __percpu *efi_sve_state;
|
||||
|
||||
#else /* ! CONFIG_ARM64_SVE */
|
||||
|
||||
/* Dummy declaration for code that will be optimised out: */
|
||||
extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
|
||||
extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
|
||||
extern void __percpu *efi_sve_state;
|
||||
|
||||
#endif /* ! CONFIG_ARM64_SVE */
|
||||
@@ -235,14 +248,15 @@ static void task_fpsimd_load(void)
|
||||
*/
|
||||
void fpsimd_save(void)
|
||||
{
|
||||
struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
|
||||
struct fpsimd_last_state_struct const *last =
|
||||
this_cpu_ptr(&fpsimd_last_state);
|
||||
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
|
||||
|
||||
WARN_ON(!in_softirq() && !irqs_disabled());
|
||||
|
||||
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
|
||||
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
|
||||
if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) {
|
||||
if (WARN_ON(sve_get_vl() != last->sve_vl)) {
|
||||
/*
|
||||
* Can't save the user regs, so current would
|
||||
* re-enter user with corrupt state.
|
||||
@@ -252,31 +266,14 @@ void fpsimd_save(void)
|
||||
return;
|
||||
}
|
||||
|
||||
sve_save_state(sve_pffr(¤t->thread), &st->fpsr);
|
||||
sve_save_state((char *)last->sve_state +
|
||||
sve_ffr_offset(last->sve_vl),
|
||||
&last->st->fpsr);
|
||||
} else
|
||||
fpsimd_save_state(st);
|
||||
fpsimd_save_state(last->st);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helpers to translate bit indices in sve_vq_map to VQ values (and
|
||||
* vice versa). This allows find_next_bit() to be used to find the
|
||||
* _maximum_ VQ not exceeding a certain value.
|
||||
*/
|
||||
|
||||
static unsigned int vq_to_bit(unsigned int vq)
|
||||
{
|
||||
return SVE_VQ_MAX - vq;
|
||||
}
|
||||
|
||||
static unsigned int bit_to_vq(unsigned int bit)
|
||||
{
|
||||
if (WARN_ON(bit >= SVE_VQ_MAX))
|
||||
bit = SVE_VQ_MAX - 1;
|
||||
|
||||
return SVE_VQ_MAX - bit;
|
||||
}
|
||||
|
||||
/*
|
||||
* All vector length selection from userspace comes through here.
|
||||
* We're on a slow path, so some sanity-checks are included.
|
||||
@@ -298,8 +295,8 @@ static unsigned int find_supported_vector_length(unsigned int vl)
|
||||
vl = max_vl;
|
||||
|
||||
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
|
||||
vq_to_bit(sve_vq_from_vl(vl)));
|
||||
return sve_vl_from_vq(bit_to_vq(bit));
|
||||
__vq_to_bit(sve_vq_from_vl(vl)));
|
||||
return sve_vl_from_vq(__bit_to_vq(bit));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
@@ -550,7 +547,6 @@ int sve_set_vector_length(struct task_struct *task,
|
||||
local_bh_disable();
|
||||
|
||||
fpsimd_save();
|
||||
set_thread_flag(TIF_FOREIGN_FPSTATE);
|
||||
}
|
||||
|
||||
fpsimd_flush_task_state(task);
|
||||
@@ -624,12 +620,6 @@ int sve_get_current_vl(void)
|
||||
return sve_prctl_status(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bitmap for temporary storage of the per-CPU set of supported vector lengths
|
||||
* during secondary boot.
|
||||
*/
|
||||
static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
|
||||
|
||||
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
|
||||
{
|
||||
unsigned int vq, vl;
|
||||
@@ -644,40 +634,82 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
|
||||
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
|
||||
vl = sve_get_vl();
|
||||
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
|
||||
set_bit(vq_to_bit(vq), map);
|
||||
set_bit(__vq_to_bit(vq), map);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialise the set of known supported VQs for the boot CPU.
|
||||
* This is called during kernel boot, before secondary CPUs are brought up.
|
||||
*/
|
||||
void __init sve_init_vq_map(void)
|
||||
{
|
||||
sve_probe_vqs(sve_vq_map);
|
||||
bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we haven't committed to the set of supported VQs yet, filter out
|
||||
* those not supported by the current CPU.
|
||||
* This function is called during the bring-up of early secondary CPUs only.
|
||||
*/
|
||||
void sve_update_vq_map(void)
|
||||
{
|
||||
sve_probe_vqs(sve_secondary_vq_map);
|
||||
bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
|
||||
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
|
||||
|
||||
sve_probe_vqs(tmp_map);
|
||||
bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
|
||||
bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
|
||||
}
|
||||
|
||||
/* Check whether the current CPU supports all VQs in the committed set */
|
||||
/*
|
||||
* Check whether the current CPU supports all VQs in the committed set.
|
||||
* This function is called during the bring-up of late secondary CPUs only.
|
||||
*/
|
||||
int sve_verify_vq_map(void)
|
||||
{
|
||||
int ret = 0;
|
||||
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
|
||||
unsigned long b;
|
||||
|
||||
sve_probe_vqs(sve_secondary_vq_map);
|
||||
bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
|
||||
SVE_VQ_MAX);
|
||||
if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
|
||||
sve_probe_vqs(tmp_map);
|
||||
|
||||
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
|
||||
if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
|
||||
pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
|
||||
smp_processor_id());
|
||||
ret = -EINVAL;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* For KVM, it is necessary to ensure that this CPU doesn't
|
||||
* support any vector length that guests may have probed as
|
||||
* unsupported.
|
||||
*/
|
||||
|
||||
/* Recover the set of supported VQs: */
|
||||
bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
|
||||
/* Find VQs supported that are not globally supported: */
|
||||
bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
|
||||
|
||||
/* Find the lowest such VQ, if any: */
|
||||
b = find_last_bit(tmp_map, SVE_VQ_MAX);
|
||||
if (b >= SVE_VQ_MAX)
|
||||
return 0; /* no mismatches */
|
||||
|
||||
/*
|
||||
* Mismatches above sve_max_virtualisable_vl are fine, since
|
||||
* no guest is allowed to configure ZCR_EL2.LEN to exceed this:
|
||||
*/
|
||||
if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
|
||||
pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
|
||||
smp_processor_id());
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init sve_efi_setup(void)
|
||||
@@ -744,6 +776,8 @@ u64 read_zcr_features(void)
|
||||
void __init sve_setup(void)
|
||||
{
|
||||
u64 zcr;
|
||||
DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
|
||||
unsigned long b;
|
||||
|
||||
if (!system_supports_sve())
|
||||
return;
|
||||
@@ -753,8 +787,8 @@ void __init sve_setup(void)
|
||||
* so sve_vq_map must have at least SVE_VQ_MIN set.
|
||||
* If something went wrong, at least try to patch it up:
|
||||
*/
|
||||
if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
|
||||
set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
|
||||
if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
|
||||
set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
|
||||
|
||||
zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
|
||||
sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
|
||||
@@ -772,11 +806,31 @@ void __init sve_setup(void)
|
||||
*/
|
||||
sve_default_vl = find_supported_vector_length(64);
|
||||
|
||||
bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
|
||||
SVE_VQ_MAX);
|
||||
|
||||
b = find_last_bit(tmp_map, SVE_VQ_MAX);
|
||||
if (b >= SVE_VQ_MAX)
|
||||
/* No non-virtualisable VLs found */
|
||||
sve_max_virtualisable_vl = SVE_VQ_MAX;
|
||||
else if (WARN_ON(b == SVE_VQ_MAX - 1))
|
||||
/* No virtualisable VLs? This is architecturally forbidden. */
|
||||
sve_max_virtualisable_vl = SVE_VQ_MIN;
|
||||
else /* b + 1 < SVE_VQ_MAX */
|
||||
sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
|
||||
|
||||
if (sve_max_virtualisable_vl > sve_max_vl)
|
||||
sve_max_virtualisable_vl = sve_max_vl;
|
||||
|
||||
pr_info("SVE: maximum available vector length %u bytes per vector\n",
|
||||
sve_max_vl);
|
||||
pr_info("SVE: default vector length %u bytes per vector\n",
|
||||
sve_default_vl);
|
||||
|
||||
/* KVM decides whether to support mismatched systems. Just warn here: */
|
||||
if (sve_max_virtualisable_vl < sve_max_vl)
|
||||
pr_warn("SVE: unvirtualisable vector lengths present\n");
|
||||
|
||||
sve_efi_setup();
|
||||
}
|
||||
|
||||
@@ -816,12 +870,11 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
|
||||
local_bh_disable();
|
||||
|
||||
fpsimd_save();
|
||||
fpsimd_to_sve(current);
|
||||
|
||||
/* Force ret_to_user to reload the registers: */
|
||||
fpsimd_flush_task_state(current);
|
||||
set_thread_flag(TIF_FOREIGN_FPSTATE);
|
||||
|
||||
fpsimd_to_sve(current);
|
||||
if (test_and_set_thread_flag(TIF_SVE))
|
||||
WARN_ON(1); /* SVE access shouldn't have trapped */
|
||||
|
||||
@@ -894,9 +947,9 @@ void fpsimd_flush_thread(void)
|
||||
|
||||
local_bh_disable();
|
||||
|
||||
fpsimd_flush_task_state(current);
|
||||
memset(¤t->thread.uw.fpsimd_state, 0,
|
||||
sizeof(current->thread.uw.fpsimd_state));
|
||||
fpsimd_flush_task_state(current);
|
||||
|
||||
if (system_supports_sve()) {
|
||||
clear_thread_flag(TIF_SVE);
|
||||
@@ -933,8 +986,6 @@ void fpsimd_flush_thread(void)
|
||||
current->thread.sve_vl_onexec = 0;
|
||||
}
|
||||
|
||||
set_thread_flag(TIF_FOREIGN_FPSTATE);
|
||||
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
@@ -974,6 +1025,8 @@ void fpsimd_bind_task_to_cpu(void)
|
||||
this_cpu_ptr(&fpsimd_last_state);
|
||||
|
||||
last->st = ¤t->thread.uw.fpsimd_state;
|
||||
last->sve_state = current->thread.sve_state;
|
||||
last->sve_vl = current->thread.sve_vl;
|
||||
current->thread.fpsimd_cpu = smp_processor_id();
|
||||
|
||||
if (system_supports_sve()) {
|
||||
@@ -987,7 +1040,8 @@ void fpsimd_bind_task_to_cpu(void)
|
||||
}
|
||||
}
|
||||
|
||||
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
|
||||
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
|
||||
unsigned int sve_vl)
|
||||
{
|
||||
struct fpsimd_last_state_struct *last =
|
||||
this_cpu_ptr(&fpsimd_last_state);
|
||||
@@ -995,6 +1049,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
|
||||
WARN_ON(!in_softirq() && !irqs_disabled());
|
||||
|
||||
last->st = st;
|
||||
last->sve_state = sve_state;
|
||||
last->sve_vl = sve_vl;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1043,12 +1099,29 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
|
||||
|
||||
/*
|
||||
* Invalidate live CPU copies of task t's FPSIMD state
|
||||
*
|
||||
* This function may be called with preemption enabled. The barrier()
|
||||
* ensures that the assignment to fpsimd_cpu is visible to any
|
||||
* preemption/softirq that could race with set_tsk_thread_flag(), so
|
||||
* that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.
|
||||
*
|
||||
* The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any
|
||||
* subsequent code.
|
||||
*/
|
||||
void fpsimd_flush_task_state(struct task_struct *t)
|
||||
{
|
||||
t->thread.fpsimd_cpu = NR_CPUS;
|
||||
|
||||
barrier();
|
||||
set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
|
||||
|
||||
barrier();
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate any task's FPSIMD state that is present on this cpu.
|
||||
* This function must be called with softirqs disabled.
|
||||
*/
|
||||
void fpsimd_flush_cpu_state(void)
|
||||
{
|
||||
__this_cpu_write(fpsimd_last_state.st, NULL);
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/perf/arm_pmu.h>
|
||||
#include <linux/platform_device.h>
|
||||
@@ -528,12 +529,21 @@ static inline int armv8pmu_enable_counter(int idx)
|
||||
|
||||
static inline void armv8pmu_enable_event_counter(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
int idx = event->hw.idx;
|
||||
u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
|
||||
|
||||
armv8pmu_enable_counter(idx);
|
||||
if (armv8pmu_event_is_chained(event))
|
||||
armv8pmu_enable_counter(idx - 1);
|
||||
isb();
|
||||
counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
|
||||
|
||||
kvm_set_pmu_events(counter_bits, attr);
|
||||
|
||||
/* We rely on the hypervisor switch code to enable guest counters */
|
||||
if (!kvm_pmu_counter_deferred(attr)) {
|
||||
armv8pmu_enable_counter(idx);
|
||||
if (armv8pmu_event_is_chained(event))
|
||||
armv8pmu_enable_counter(idx - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int armv8pmu_disable_counter(int idx)
|
||||
@@ -546,11 +556,21 @@ static inline int armv8pmu_disable_counter(int idx)
|
||||
static inline void armv8pmu_disable_event_counter(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
int idx = hwc->idx;
|
||||
u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
|
||||
|
||||
if (armv8pmu_event_is_chained(event))
|
||||
armv8pmu_disable_counter(idx - 1);
|
||||
armv8pmu_disable_counter(idx);
|
||||
counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
|
||||
|
||||
kvm_clr_pmu_events(counter_bits);
|
||||
|
||||
/* We rely on the hypervisor switch code to disable guest counters */
|
||||
if (!kvm_pmu_counter_deferred(attr)) {
|
||||
if (armv8pmu_event_is_chained(event))
|
||||
armv8pmu_disable_counter(idx - 1);
|
||||
armv8pmu_disable_counter(idx);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int armv8pmu_enable_intens(int idx)
|
||||
@@ -827,14 +847,23 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
|
||||
* with other architectures (x86 and Power).
|
||||
*/
|
||||
if (is_kernel_in_hyp_mode()) {
|
||||
if (!attr->exclude_kernel)
|
||||
if (!attr->exclude_kernel && !attr->exclude_host)
|
||||
config_base |= ARMV8_PMU_INCLUDE_EL2;
|
||||
} else {
|
||||
if (attr->exclude_kernel)
|
||||
if (attr->exclude_guest)
|
||||
config_base |= ARMV8_PMU_EXCLUDE_EL1;
|
||||
if (!attr->exclude_hv)
|
||||
if (attr->exclude_host)
|
||||
config_base |= ARMV8_PMU_EXCLUDE_EL0;
|
||||
} else {
|
||||
if (!attr->exclude_hv && !attr->exclude_host)
|
||||
config_base |= ARMV8_PMU_INCLUDE_EL2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Filter out !VHE kernels and guest kernels
|
||||
*/
|
||||
if (attr->exclude_kernel)
|
||||
config_base |= ARMV8_PMU_EXCLUDE_EL1;
|
||||
|
||||
if (attr->exclude_user)
|
||||
config_base |= ARMV8_PMU_EXCLUDE_EL0;
|
||||
|
||||
@@ -864,6 +893,9 @@ static void armv8pmu_reset(void *info)
|
||||
armv8pmu_disable_intens(idx);
|
||||
}
|
||||
|
||||
/* Clear the counters we flip at guest entry/exit */
|
||||
kvm_clr_pmu_events(U32_MAX);
|
||||
|
||||
/*
|
||||
* Initialize & Reset PMNC. Request overflow interrupt for
|
||||
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user