mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"x86:
- Support for userspace to emulate Xen hypercalls
- Raise the maximum number of user memslots
- Scalability improvements for the new MMU.
Instead of the complex "fast page fault" logic that is used in
mmu.c, tdp_mmu.c uses an rwlock so that page faults are concurrent,
but the code that can run against page faults is limited. Right now
only page faults take the lock for reading; in the future this will
be extended to some cases of page table destruction. I hope to
switch the default MMU around 5.12-rc3 (some testing was delayed
due to Chinese New Year).
- Cleanups for MAXPHYADDR checks
- Use static calls for vendor-specific callbacks
- On AMD, use VMLOAD/VMSAVE to save and restore host state
- Stop using deprecated jump label APIs
- Workaround for AMD erratum that made nested virtualization
unreliable
- Support for LBR emulation in the guest
- Support for communicating bus lock vmexits to userspace
- Add support for SEV attestation command
- Miscellaneous cleanups
PPC:
- Support for second data watchpoint on POWER10
- Remove some complex workarounds for buggy early versions of POWER9
- Guest entry/exit fixes
ARM64:
- Make the nVHE EL2 object relocatable
- Cleanups for concurrent translation faults hitting the same page
- Support for the standard TRNG hypervisor call
- A bunch of small PMU/Debug fixes
- Simplification of the early init hypercall handling
Non-KVM changes (with acks):
- Detection of contended rwlocks (implemented only for qrwlocks,
because KVM only needs it for x86)
- Allow __DISABLE_EXPORTS from assembly code
- Provide a saner follow_pfn replacements for modules"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (192 commits)
KVM: x86/xen: Explicitly pad struct compat_vcpu_info to 64 bytes
KVM: selftests: Don't bother mapping GVA for Xen shinfo test
KVM: selftests: Fix hex vs. decimal snafu in Xen test
KVM: selftests: Fix size of memslots created by Xen tests
KVM: selftests: Ignore recently added Xen tests' build output
KVM: selftests: Add missing header file needed by xAPIC IPI tests
KVM: selftests: Add operand to vmsave/vmload/vmrun in svm.c
KVM: SVM: Make symbol 'svm_gp_erratum_intercept' static
locking/arch: Move qrwlock.h include after qspinlock.h
KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests
KVM: PPC: Book3S HV: Ensure radix guest has no SLB entries
KVM: PPC: Don't always report hash MMU capability for P9 < DD2.2
KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path
KVM: PPC: remove unneeded semicolon
KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB
KVM: PPC: Book3S HV: No need to clear radix host SLB before loading HPT guest
KVM: PPC: Book3S HV: Fix radix guest SLB side channel
KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support
KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR
KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR
...
This commit is contained in:
@@ -263,6 +263,27 @@ Returns: 0 on success, -negative on error
|
||||
__u32 trans_len;
|
||||
};
|
||||
|
||||
10. KVM_SEV_GET_ATTESTATION_REPORT
|
||||
----------------------------------
|
||||
|
||||
The KVM_SEV_GET_ATTESTATION_REPORT command can be used by the hypervisor to query the attestation
|
||||
report containing the SHA-256 digest of the guest memory and VMSA passed through the KVM_SEV_LAUNCH
|
||||
commands and signed with the PEK. The digest returned by the command should match the digest
|
||||
used by the guest owner with the KVM_SEV_LAUNCH_MEASURE.
|
||||
|
||||
Parameters (in): struct kvm_sev_attestation
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_sev_attestation_report {
|
||||
__u8 mnonce[16]; /* A random mnonce that will be placed in the report */
|
||||
|
||||
__u64 uaddr; /* userspace address where the report should be copied */
|
||||
__u32 len;
|
||||
};
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
||||
@@ -960,6 +960,14 @@ memory.
|
||||
__u8 pad2[30];
|
||||
};
|
||||
|
||||
If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the
|
||||
KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl.
|
||||
This requests KVM to generate the contents of the hypercall page
|
||||
automatically; hypercalls will be intercepted and passed to userspace
|
||||
through KVM_EXIT_XEN. In this case, all of the blob size and address
|
||||
fields must be zero.
|
||||
|
||||
No other flags are currently valid in the struct kvm_xen_hvm_config.
|
||||
|
||||
4.29 KVM_GET_CLOCK
|
||||
------------------
|
||||
@@ -2268,6 +2276,8 @@ registers, find a list below:
|
||||
PPC KVM_REG_PPC_PSSCR 64
|
||||
PPC KVM_REG_PPC_DEC_EXPIRY 64
|
||||
PPC KVM_REG_PPC_PTCR 64
|
||||
PPC KVM_REG_PPC_DAWR1 64
|
||||
PPC KVM_REG_PPC_DAWRX1 64
|
||||
PPC KVM_REG_PPC_TM_GPR0 64
|
||||
...
|
||||
PPC KVM_REG_PPC_TM_GPR31 64
|
||||
@@ -4831,6 +4841,101 @@ into user space.
|
||||
If a vCPU is in running state while this ioctl is invoked, the vCPU may
|
||||
experience inconsistent filtering behavior on MSR accesses.
|
||||
|
||||
4.127 KVM_XEN_HVM_SET_ATTR
|
||||
--------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_xen_hvm_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_xen_hvm_attr {
|
||||
__u16 type;
|
||||
__u16 pad[3];
|
||||
union {
|
||||
__u8 long_mode;
|
||||
__u8 vector;
|
||||
struct {
|
||||
__u64 gfn;
|
||||
} shared_info;
|
||||
__u64 pad[4];
|
||||
} u;
|
||||
};
|
||||
|
||||
type values:
|
||||
|
||||
KVM_XEN_ATTR_TYPE_LONG_MODE
|
||||
Sets the ABI mode of the VM to 32-bit or 64-bit (long mode). This
|
||||
determines the layout of the shared info pages exposed to the VM.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_SHARED_INFO
|
||||
Sets the guest physical frame number at which the Xen "shared info"
|
||||
page resides. Note that although Xen places vcpu_info for the first
|
||||
32 vCPUs in the shared_info page, KVM does not automatically do so
|
||||
and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
|
||||
explicitly even when the vcpu_info for a given vCPU resides at the
|
||||
"default" location in the shared_info page. This is because KVM is
|
||||
not aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so cannot know the correct default location.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
|
||||
4.128 KVM_XEN_HVM_GET_ATTR
|
||||
--------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_xen_hvm_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
Allows Xen VM attributes to be read. For the structure and types,
|
||||
see KVM_XEN_HVM_SET_ATTR above.
|
||||
|
||||
4.129 KVM_XEN_VCPU_SET_ATTR
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_xen_vcpu_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_xen_vcpu_attr {
|
||||
__u16 type;
|
||||
__u16 pad[3];
|
||||
union {
|
||||
__u64 gpa;
|
||||
__u64 pad[4];
|
||||
} u;
|
||||
};
|
||||
|
||||
type values:
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
|
||||
Sets the guest physical address of the vcpu_info for a given vCPU.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
|
||||
Sets the guest physical address of an additional pvclock structure
|
||||
for a given vCPU. This is typically used for guest vsyscall support.
|
||||
|
||||
4.130 KVM_XEN_VCPU_GET_ATTR
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
:Architectures: x86
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: struct kvm_xen_vcpu_attr
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
Allows Xen vCPU attributes to be read. For the structure and types,
|
||||
see KVM_XEN_VCPU_SET_ATTR above.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
@@ -4893,9 +4998,11 @@ local APIC is not used.
|
||||
__u16 flags;
|
||||
|
||||
More architecture-specific flags detailing state of the VCPU that may
|
||||
affect the device's behavior. The only currently defined flag is
|
||||
KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
|
||||
VCPU is in system management mode.
|
||||
affect the device's behavior. Current defined flags:
|
||||
/* x86, set if the VCPU is in system management mode */
|
||||
#define KVM_RUN_X86_SMM (1 << 0)
|
||||
/* x86, set if bus lock detected in VM */
|
||||
#define KVM_RUN_BUS_LOCK (1 << 1)
|
||||
|
||||
::
|
||||
|
||||
@@ -4996,13 +5103,18 @@ to the byte array.
|
||||
|
||||
.. note::
|
||||
|
||||
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR,
|
||||
For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
|
||||
KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
|
||||
operations are complete (and guest state is consistent) only after userspace
|
||||
has re-entered the kernel with KVM_RUN. The kernel side will first finish
|
||||
incomplete operations and then check for pending signals. Userspace
|
||||
can re-enter the guest with an unmasked signal pending to complete
|
||||
pending operations.
|
||||
incomplete operations and then check for pending signals.
|
||||
|
||||
The pending state of the operation is not preserved in state which is
|
||||
visible to userspace, thus userspace should ensure that the operation is
|
||||
completed before performing a live migration. Userspace can re-enter the
|
||||
guest with an unmasked signal pending or with the immediate_exit field set
|
||||
to complete pending operations without allowing any further instructions
|
||||
to be executed.
|
||||
|
||||
::
|
||||
|
||||
@@ -5327,6 +5439,34 @@ wants to write. Once finished processing the event, user space must continue
|
||||
vCPU execution. If the MSR write was unsuccessful, user space also sets the
|
||||
"error" field to "1".
|
||||
|
||||
::
|
||||
|
||||
|
||||
struct kvm_xen_exit {
|
||||
#define KVM_EXIT_XEN_HCALL 1
|
||||
__u32 type;
|
||||
union {
|
||||
struct {
|
||||
__u32 longmode;
|
||||
__u32 cpl;
|
||||
__u64 input;
|
||||
__u64 result;
|
||||
__u64 params[6];
|
||||
} hcall;
|
||||
} u;
|
||||
};
|
||||
/* KVM_EXIT_XEN */
|
||||
struct kvm_hyperv_exit xen;
|
||||
|
||||
Indicates that the VCPU exits into userspace to process some tasks
|
||||
related to Xen emulation.
|
||||
|
||||
Valid values for 'type' are:
|
||||
|
||||
- KVM_EXIT_XEN_HCALL -- synchronously notify user-space about Xen hypercall.
|
||||
Userspace is expected to place the hypercall result into the appropriate
|
||||
field before invoking KVM_RUN again.
|
||||
|
||||
::
|
||||
|
||||
/* Fix the size of the union. */
|
||||
@@ -6038,6 +6178,53 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
|
||||
can then handle to implement model specific MSR handling and/or user notifications
|
||||
to inform a user that an MSR was not handled.
|
||||
|
||||
7.22 KVM_CAP_X86_BUS_LOCK_EXIT
|
||||
-------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] defines the policy used when bus locks detected in guest
|
||||
:Returns: 0 on success, -EINVAL when args[0] contains invalid bits
|
||||
|
||||
Valid bits in args[0] are::
|
||||
|
||||
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
|
||||
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
|
||||
|
||||
Enabling this capability on a VM provides userspace with a way to select
|
||||
a policy to handle the bus locks detected in guest. Userspace can obtain
|
||||
the supported modes from the result of KVM_CHECK_EXTENSION and define it
|
||||
through the KVM_ENABLE_CAP.
|
||||
|
||||
KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported
|
||||
currently and mutually exclusive with each other. More bits can be added in
|
||||
the future.
|
||||
|
||||
With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits
|
||||
so that no additional actions are needed. This is the default mode.
|
||||
|
||||
With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected
|
||||
in VM. KVM just exits to userspace when handling them. Userspace can enforce
|
||||
its own throttling or other policy based mitigations.
|
||||
|
||||
This capability is aimed to address the thread that VM can exploit bus locks to
|
||||
degree the performance of the whole system. Once the userspace enable this
|
||||
capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the
|
||||
KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning
|
||||
the bus lock vm exit can be preempted by a higher priority VM exit, the exit
|
||||
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
|
||||
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
||||
|
||||
7.22 KVM_CAP_PPC_DAWR1
|
||||
----------------------
|
||||
|
||||
:Architectures: ppc
|
||||
:Parameters: none
|
||||
:Returns: 0 on success, -EINVAL when CPU doesn't support 2nd DAWR
|
||||
|
||||
This capability can be used to check / enable 2nd DAWR feature provided
|
||||
by POWER10 processor.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
@@ -6415,7 +6602,6 @@ guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
|
||||
(0x40000001). Otherwise, a guest may use the paravirtual features
|
||||
regardless of what has actually been exposed through the CPUID leaf.
|
||||
|
||||
|
||||
8.29 KVM_CAP_DIRTY_LOG_RING
|
||||
---------------------------
|
||||
|
||||
@@ -6502,3 +6688,29 @@ KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling
|
||||
KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
|
||||
machine will switch to ring-buffer dirty page tracking and further
|
||||
KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
|
||||
|
||||
8.30 KVM_CAP_XEN_HVM
|
||||
--------------------
|
||||
|
||||
:Architectures: x86
|
||||
|
||||
This capability indicates the features that Xen supports for hosting Xen
|
||||
PVHVM guests. Valid flags are::
|
||||
|
||||
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
|
||||
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
|
||||
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
|
||||
ioctl is available, for the guest to set its hypercall page.
|
||||
|
||||
If KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL is also set, the same flag may also be
|
||||
provided in the flags to KVM_XEN_HVM_CONFIG, without providing hypercall page
|
||||
contents, to request that KVM generate hypercall page content automatically
|
||||
and also enable interception of guest hypercalls with KVM_EXIT_XEN.
|
||||
|
||||
The KVM_XEN_HVM_CONFIG_SHARED_INFO flag indicates the availability of the
|
||||
KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
|
||||
KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
|
||||
for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
|
||||
vcpu_info is set.
|
||||
|
||||
@@ -16,7 +16,14 @@ The acquisition orders for mutexes are as follows:
|
||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||
On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||
|
||||
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is
|
||||
taken inside kvm->arch.mmu_lock, and cannot be taken without already
|
||||
holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
|
||||
there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
#ifndef __ARM64_HYP_IMAGE_H__
|
||||
#define __ARM64_HYP_IMAGE_H__
|
||||
|
||||
#define __HYP_CONCAT(a, b) a ## b
|
||||
#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
|
||||
|
||||
/*
|
||||
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
|
||||
* to separate it from the kernel proper.
|
||||
@@ -21,9 +24,31 @@
|
||||
*/
|
||||
#define HYP_SECTION_NAME(NAME) .hyp##NAME
|
||||
|
||||
/* Symbol defined at the beginning of each hyp section. */
|
||||
#define HYP_SECTION_SYMBOL_NAME(NAME) \
|
||||
HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
|
||||
|
||||
/*
|
||||
* Helper to generate linker script statements starting a hyp section.
|
||||
*
|
||||
* A symbol with a well-known name is defined at the first byte. This
|
||||
* is used as a base for hyp relocations (see gen-hyprel.c). It must
|
||||
* be defined inside the section so the linker of `vmlinux` cannot
|
||||
* separate it from the section data.
|
||||
*/
|
||||
#define BEGIN_HYP_SECTION(NAME) \
|
||||
HYP_SECTION_NAME(NAME) : { \
|
||||
HYP_SECTION_SYMBOL_NAME(NAME) = .;
|
||||
|
||||
/* Helper to generate linker script statements ending a hyp section. */
|
||||
#define END_HYP_SECTION \
|
||||
}
|
||||
|
||||
/* Defines an ELF hyp section from input section @NAME and its subsections. */
|
||||
#define HYP_SECTION(NAME) \
|
||||
HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) }
|
||||
#define HYP_SECTION(NAME) \
|
||||
BEGIN_HYP_SECTION(NAME) \
|
||||
*(NAME NAME##.*) \
|
||||
END_HYP_SECTION
|
||||
|
||||
/*
|
||||
* Defines a linker script alias of a kernel-proper symbol referenced by
|
||||
|
||||
@@ -199,26 +199,6 @@ extern void __vgic_v3_init_lrs(void);
|
||||
|
||||
extern u32 __kvm_get_mdcr_el2(void);
|
||||
|
||||
/*
|
||||
* Obtain the PC-relative address of a kernel symbol
|
||||
* s: symbol
|
||||
*
|
||||
* The goal of this macro is to return a symbol's address based on a
|
||||
* PC-relative computation, as opposed to a loading the VA from a
|
||||
* constant pool or something similar. This works well for HYP, as an
|
||||
* absolute VA is guaranteed to be wrong. Only use this if trying to
|
||||
* obtain the address of a symbol (i.e. not something you obtained by
|
||||
* following a pointer).
|
||||
*/
|
||||
#define hyp_symbol_addr(s) \
|
||||
({ \
|
||||
typeof(s) *addr; \
|
||||
asm("adrp %0, %1\n" \
|
||||
"add %0, %0, :lo12:%1\n" \
|
||||
: "=r" (addr) : "S" (&s)); \
|
||||
addr; \
|
||||
})
|
||||
|
||||
#define __KVM_EXTABLE(from, to) \
|
||||
" .pushsection __kvm_ex_table, \"a\"\n" \
|
||||
" .align 3\n" \
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
|
||||
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
|
||||
|
||||
#define KVM_USER_MEM_SLOTS 512
|
||||
#define KVM_HALT_POLL_NS_DEFAULT 500000
|
||||
|
||||
#include <kvm/arm_vgic.h>
|
||||
@@ -771,4 +770,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
#define kvm_vcpu_has_pmu(vcpu) \
|
||||
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
||||
|
||||
int kvm_trng_call(struct kvm_vcpu *vcpu);
|
||||
|
||||
#endif /* __ARM64_KVM_HOST_H__ */
|
||||
|
||||
@@ -73,8 +73,18 @@ alternative_cb_end
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Convert a kernel image address to a PA
|
||||
* reg: kernel address to be converted in place
|
||||
* Convert a hypervisor VA to a PA
|
||||
* reg: hypervisor address to be converted in place
|
||||
* tmp: temporary register
|
||||
*/
|
||||
.macro hyp_pa reg, tmp
|
||||
ldr_l \tmp, hyp_physvirt_offset
|
||||
add \reg, \reg, \tmp
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Convert a hypervisor VA to a kernel image address
|
||||
* reg: hypervisor address to be converted in place
|
||||
* tmp: temporary register
|
||||
*
|
||||
* The actual code generation takes place in kvm_get_kimage_voffset, and
|
||||
@@ -82,7 +92,11 @@ alternative_cb_end
|
||||
* perform the register allocation (kvm_get_kimage_voffset uses the
|
||||
* specific registers encoded in the instructions).
|
||||
*/
|
||||
.macro kimg_pa reg, tmp
|
||||
.macro hyp_kimg_va reg, tmp
|
||||
/* Convert hyp VA -> PA. */
|
||||
hyp_pa \reg, \tmp
|
||||
|
||||
/* Load kimage_voffset. */
|
||||
alternative_cb kvm_get_kimage_voffset
|
||||
movz \tmp, #0
|
||||
movk \tmp, #0, lsl #16
|
||||
@@ -90,32 +104,8 @@ alternative_cb kvm_get_kimage_voffset
|
||||
movk \tmp, #0, lsl #48
|
||||
alternative_cb_end
|
||||
|
||||
/* reg = __pa(reg) */
|
||||
sub \reg, \reg, \tmp
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Convert a kernel image address to a hyp VA
|
||||
* reg: kernel address to be converted in place
|
||||
* tmp: temporary register
|
||||
*
|
||||
* The actual code generation takes place in kvm_get_kimage_voffset, and
|
||||
* the instructions below are only there to reserve the space and
|
||||
* perform the register allocation (kvm_update_kimg_phys_offset uses the
|
||||
* specific registers encoded in the instructions).
|
||||
*/
|
||||
.macro kimg_hyp_va reg, tmp
|
||||
alternative_cb kvm_update_kimg_phys_offset
|
||||
movz \tmp, #0
|
||||
movk \tmp, #0, lsl #16
|
||||
movk \tmp, #0, lsl #32
|
||||
movk \tmp, #0, lsl #48
|
||||
alternative_cb_end
|
||||
|
||||
sub \reg, \reg, \tmp
|
||||
mov_q \tmp, PAGE_OFFSET
|
||||
orr \reg, \reg, \tmp
|
||||
kern_hyp_va \reg
|
||||
/* Convert PA -> kimg VA. */
|
||||
add \reg, \reg, \tmp
|
||||
.endm
|
||||
|
||||
#else
|
||||
@@ -129,6 +119,7 @@ alternative_cb_end
|
||||
void kvm_update_va_mask(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst);
|
||||
void kvm_compute_layout(void);
|
||||
void kvm_apply_hyp_relocations(void);
|
||||
|
||||
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
{
|
||||
@@ -144,24 +135,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
|
||||
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
|
||||
|
||||
static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
|
||||
{
|
||||
unsigned long offset;
|
||||
|
||||
asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
|
||||
"movk %0, #0, lsl #16\n"
|
||||
"movk %0, #0, lsl #32\n"
|
||||
"movk %0, #0, lsl #48\n",
|
||||
kvm_update_kimg_phys_offset)
|
||||
: "=r" (offset));
|
||||
|
||||
return __kern_hyp_va((v - offset) | PAGE_OFFSET);
|
||||
}
|
||||
|
||||
#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
|
||||
|
||||
#define kimg_fn_ptr(x) (typeof(x) **)(x)
|
||||
|
||||
/*
|
||||
* We currently support using a VM-specified IPA size. For backward
|
||||
* compatibility, the default IPA size is fixed to 40bits.
|
||||
|
||||
@@ -157,6 +157,11 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
* If device attributes are not explicitly requested in @prot, then the
|
||||
* mapping will be normal, cacheable.
|
||||
*
|
||||
* Note that the update of a valid leaf PTE in this function will be aborted,
|
||||
* if it's trying to recreate the exact same mapping or only change the access
|
||||
* permissions. Instead, the vCPU will exit one more time from guest if still
|
||||
* needed and then go through the path of relaxing permissions.
|
||||
*
|
||||
* Note that this function will both coalesce existing table entries and split
|
||||
* existing block mappings, relying on page-faults to fault back areas outside
|
||||
* of the new mapping lazily.
|
||||
|
||||
@@ -11,7 +11,8 @@ extern char __alt_instructions[], __alt_instructions_end[];
|
||||
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
|
||||
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
|
||||
extern char __hyp_text_start[], __hyp_text_end[];
|
||||
extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
|
||||
extern char __hyp_rodata_start[], __hyp_rodata_end[];
|
||||
extern char __hyp_reloc_begin[], __hyp_reloc_end[];
|
||||
extern char __idmap_text_start[], __idmap_text_end[];
|
||||
extern char __initdata_begin[], __initdata_end[];
|
||||
extern char __inittext_begin[], __inittext_end[];
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
#ifndef __ASM_SPINLOCK_H
|
||||
#define __ASM_SPINLOCK_H
|
||||
|
||||
#include <asm/qrwlock.h>
|
||||
#include <asm/qspinlock.h>
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
/* See include/linux/spinlock.h */
|
||||
#define smp_mb__after_spinlock() smp_mb()
|
||||
|
||||
@@ -853,7 +853,10 @@
|
||||
|
||||
#define ID_DFR0_PERFMON_SHIFT 24
|
||||
|
||||
#define ID_DFR0_PERFMON_8_0 0x3
|
||||
#define ID_DFR0_PERFMON_8_1 0x4
|
||||
#define ID_DFR0_PERFMON_8_4 0x5
|
||||
#define ID_DFR0_PERFMON_8_5 0x6
|
||||
|
||||
#define ID_ISAR4_SWP_FRAC_SHIFT 28
|
||||
#define ID_ISAR4_PSR_M_SHIFT 24
|
||||
|
||||
@@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
|
||||
/* Alternative callbacks for init-time patching of nVHE hyp code. */
|
||||
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
|
||||
KVM_NVHE_ALIAS(kvm_update_va_mask);
|
||||
KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
|
||||
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
|
||||
|
||||
/* Global kernel state accessed by nVHE hyp code. */
|
||||
|
||||
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
|
||||
"CPU: CPUs started in inconsistent modes");
|
||||
else
|
||||
pr_info("CPU: All CPU(s) started at EL1\n");
|
||||
if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode())
|
||||
if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
|
||||
kvm_compute_layout();
|
||||
kvm_apply_hyp_relocations();
|
||||
}
|
||||
}
|
||||
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
|
||||
@@ -31,10 +31,11 @@ jiffies = jiffies_64;
|
||||
__stop___kvm_ex_table = .;
|
||||
|
||||
#define HYPERVISOR_DATA_SECTIONS \
|
||||
HYP_SECTION_NAME(.data..ro_after_init) : { \
|
||||
__hyp_data_ro_after_init_start = .; \
|
||||
HYP_SECTION_NAME(.rodata) : { \
|
||||
__hyp_rodata_start = .; \
|
||||
*(HYP_SECTION_NAME(.data..ro_after_init)) \
|
||||
__hyp_data_ro_after_init_end = .; \
|
||||
*(HYP_SECTION_NAME(.rodata)) \
|
||||
__hyp_rodata_end = .; \
|
||||
}
|
||||
|
||||
#define HYPERVISOR_PERCPU_SECTION \
|
||||
@@ -42,10 +43,19 @@ jiffies = jiffies_64;
|
||||
HYP_SECTION_NAME(.data..percpu) : { \
|
||||
*(HYP_SECTION_NAME(.data..percpu)) \
|
||||
}
|
||||
|
||||
#define HYPERVISOR_RELOC_SECTION \
|
||||
.hyp.reloc : ALIGN(4) { \
|
||||
__hyp_reloc_begin = .; \
|
||||
*(.hyp.reloc) \
|
||||
__hyp_reloc_end = .; \
|
||||
}
|
||||
|
||||
#else /* CONFIG_KVM */
|
||||
#define HYPERVISOR_EXTABLE
|
||||
#define HYPERVISOR_DATA_SECTIONS
|
||||
#define HYPERVISOR_PERCPU_SECTION
|
||||
#define HYPERVISOR_RELOC_SECTION
|
||||
#endif
|
||||
|
||||
#define HYPERVISOR_TEXT \
|
||||
@@ -216,6 +226,8 @@ SECTIONS
|
||||
PERCPU_SECTION(L1_CACHE_BYTES)
|
||||
HYPERVISOR_PERCPU_SECTION
|
||||
|
||||
HYPERVISOR_RELOC_SECTION
|
||||
|
||||
.rela.dyn : ALIGN(8) {
|
||||
*(.rela .rela*)
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
|
||||
inject_fault.o va_layout.o handle_exit.o \
|
||||
guest.o debug.o reset.o sys_regs.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pmu.o \
|
||||
arch_timer.o \
|
||||
arch_timer.o trng.o\
|
||||
vgic/vgic.o vgic/vgic-init.o \
|
||||
vgic/vgic-irqfd.o vgic/vgic-v2.o \
|
||||
vgic/vgic-v3.o vgic/vgic-v4.o \
|
||||
|
||||
@@ -1750,11 +1750,10 @@ static int init_hyp_mode(void)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
|
||||
kvm_ksym_ref(__hyp_data_ro_after_init_end),
|
||||
PAGE_HYP_RO);
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
|
||||
kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
|
||||
if (err) {
|
||||
kvm_err("Cannot map .hyp.data..ro_after_init section\n");
|
||||
kvm_err("Cannot map .hyp.rodata section\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
|
||||
@@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
|
||||
struct exception_table_entry *entry, *end;
|
||||
unsigned long elr_el2 = read_sysreg(elr_el2);
|
||||
|
||||
entry = hyp_symbol_addr(__start___kvm_ex_table);
|
||||
end = hyp_symbol_addr(__stop___kvm_ex_table);
|
||||
entry = &__start___kvm_ex_table;
|
||||
end = &__stop___kvm_ex_table;
|
||||
|
||||
while (entry < end) {
|
||||
addr = (unsigned long)&entry->insn + entry->insn;
|
||||
|
||||
2
arch/arm64/kvm/hyp/nvhe/.gitignore
vendored
2
arch/arm64/kvm/hyp/nvhe/.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
gen-hyprel
|
||||
hyp.lds
|
||||
hyp-reloc.S
|
||||
|
||||
@@ -3,8 +3,11 @@
|
||||
# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
|
||||
#
|
||||
|
||||
asflags-y := -D__KVM_NVHE_HYPERVISOR__
|
||||
ccflags-y := -D__KVM_NVHE_HYPERVISOR__
|
||||
asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
|
||||
ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
|
||||
|
||||
hostprogs := gen-hyprel
|
||||
HOST_EXTRACFLAGS += -I$(objtree)/include
|
||||
|
||||
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-main.o hyp-smp.o psci-relay.o
|
||||
@@ -19,7 +22,7 @@ obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
|
||||
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
|
||||
obj-y := kvm_nvhe.o
|
||||
extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds
|
||||
extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
|
||||
|
||||
# 1) Compile all source files to `.nvhe.o` object files. The file extension
|
||||
# avoids file name clashes for files shared with VHE.
|
||||
@@ -42,11 +45,31 @@ LDFLAGS_kvm_nvhe.tmp.o := -r -T
|
||||
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
|
||||
# 4) Generate list of hyp code/data positions that need to be relocated at
|
||||
# runtime. Because the hypervisor is part of the kernel binary, relocations
|
||||
# produce a kernel VA. We enumerate relocations targeting hyp at build time
|
||||
# and convert the kernel VAs at those positions to hyp VAs.
|
||||
$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
|
||||
$(call if_changed,hyprel)
|
||||
|
||||
# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
|
||||
# The object file now contains a section with pointers to hyp positions that
|
||||
# will contain kernel VAs at runtime. These pointers have relocations on them
|
||||
# so that they get updated as the hyp object is linked into `vmlinux`.
|
||||
LDFLAGS_kvm_nvhe.rel.o := -r
|
||||
$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
|
||||
$(call if_changed,ld)
|
||||
|
||||
# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
|
||||
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
|
||||
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE
|
||||
$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
|
||||
$(call if_changed,hypcopy)
|
||||
|
||||
# The HYPREL command calls `gen-hyprel` to generate an assembly file with
|
||||
# a list of relocations targeting hyp code/data.
|
||||
quiet_cmd_hyprel = HYPREL $@
|
||||
cmd_hyprel = $(obj)/gen-hyprel $< > $@
|
||||
|
||||
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
|
||||
# to avoid clashes with VHE code/data.
|
||||
quiet_cmd_hypcopy = HYPCOPY $@
|
||||
|
||||
438
arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
Normal file
438
arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
Normal file
@@ -0,0 +1,438 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 - Google LLC
|
||||
* Author: David Brazdil <dbrazdil@google.com>
|
||||
*
|
||||
* Generates relocation information used by the kernel to convert
|
||||
* absolute addresses in hyp data from kernel VAs to hyp VAs.
|
||||
*
|
||||
* This is necessary because hyp code is linked into the same binary
|
||||
* as the kernel but executes under different memory mappings.
|
||||
* If the compiler used absolute addressing, those addresses need to
|
||||
* be converted before they are used by hyp code.
|
||||
*
|
||||
* The input of this program is the relocatable ELF object containing
|
||||
* all hyp code/data, not yet linked into vmlinux. Hyp section names
|
||||
* should have been prefixed with `.hyp` at this point.
|
||||
*
|
||||
* The output (printed to stdout) is an assembly file containing
|
||||
* an array of 32-bit integers and static relocations that instruct
|
||||
* the linker of `vmlinux` to populate the array entries with offsets
|
||||
* to positions in the kernel binary containing VAs used by hyp code.
|
||||
*
|
||||
* Note that dynamic relocations could be used for the same purpose.
|
||||
* However, those are only generated if CONFIG_RELOCATABLE=y.
|
||||
*/
|
||||
|
||||
#include <elf.h>
|
||||
#include <endian.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <generated/autoconf.h>
|
||||
|
||||
#define HYP_SECTION_PREFIX ".hyp"
|
||||
#define HYP_RELOC_SECTION ".hyp.reloc"
|
||||
#define HYP_SECTION_SYMBOL_PREFIX "__hyp_section_"
|
||||
|
||||
/*
|
||||
* AArch64 relocation type constants.
|
||||
* Included in case these are not defined in the host toolchain.
|
||||
*/
|
||||
#ifndef R_AARCH64_ABS64
|
||||
#define R_AARCH64_ABS64 257
|
||||
#endif
|
||||
#ifndef R_AARCH64_LD_PREL_LO19
|
||||
#define R_AARCH64_LD_PREL_LO19 273
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADR_PREL_LO21
|
||||
#define R_AARCH64_ADR_PREL_LO21 274
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADR_PREL_PG_HI21
|
||||
#define R_AARCH64_ADR_PREL_PG_HI21 275
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADR_PREL_PG_HI21_NC
|
||||
#define R_AARCH64_ADR_PREL_PG_HI21_NC 276
|
||||
#endif
|
||||
#ifndef R_AARCH64_ADD_ABS_LO12_NC
|
||||
#define R_AARCH64_ADD_ABS_LO12_NC 277
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST8_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST8_ABS_LO12_NC 278
|
||||
#endif
|
||||
#ifndef R_AARCH64_TSTBR14
|
||||
#define R_AARCH64_TSTBR14 279
|
||||
#endif
|
||||
#ifndef R_AARCH64_CONDBR19
|
||||
#define R_AARCH64_CONDBR19 280
|
||||
#endif
|
||||
#ifndef R_AARCH64_JUMP26
|
||||
#define R_AARCH64_JUMP26 282
|
||||
#endif
|
||||
#ifndef R_AARCH64_CALL26
|
||||
#define R_AARCH64_CALL26 283
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST16_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST16_ABS_LO12_NC 284
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST32_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST32_ABS_LO12_NC 285
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST64_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST64_ABS_LO12_NC 286
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G0
|
||||
#define R_AARCH64_MOVW_PREL_G0 287
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G0_NC
|
||||
#define R_AARCH64_MOVW_PREL_G0_NC 288
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G1
|
||||
#define R_AARCH64_MOVW_PREL_G1 289
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G1_NC
|
||||
#define R_AARCH64_MOVW_PREL_G1_NC 290
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G2
|
||||
#define R_AARCH64_MOVW_PREL_G2 291
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G2_NC
|
||||
#define R_AARCH64_MOVW_PREL_G2_NC 292
|
||||
#endif
|
||||
#ifndef R_AARCH64_MOVW_PREL_G3
|
||||
#define R_AARCH64_MOVW_PREL_G3 293
|
||||
#endif
|
||||
#ifndef R_AARCH64_LDST128_ABS_LO12_NC
|
||||
#define R_AARCH64_LDST128_ABS_LO12_NC 299
|
||||
#endif
|
||||
|
||||
/* Global state of the processed ELF. */
|
||||
static struct {
|
||||
const char *path;
|
||||
char *begin;
|
||||
size_t size;
|
||||
Elf64_Ehdr *ehdr;
|
||||
Elf64_Shdr *sh_table;
|
||||
const char *sh_string;
|
||||
} elf;
|
||||
|
||||
#if defined(CONFIG_CPU_LITTLE_ENDIAN)
|
||||
|
||||
#define elf16toh(x) le16toh(x)
|
||||
#define elf32toh(x) le32toh(x)
|
||||
#define elf64toh(x) le64toh(x)
|
||||
|
||||
#define ELFENDIAN ELFDATA2LSB
|
||||
|
||||
#elif defined(CONFIG_CPU_BIG_ENDIAN)
|
||||
|
||||
#define elf16toh(x) be16toh(x)
|
||||
#define elf32toh(x) be32toh(x)
|
||||
#define elf64toh(x) be64toh(x)
|
||||
|
||||
#define ELFENDIAN ELFDATA2MSB
|
||||
|
||||
#else
|
||||
|
||||
#error PDP-endian sadly unsupported...
|
||||
|
||||
#endif
|
||||
|
||||
#define fatal_error(fmt, ...) \
|
||||
({ \
|
||||
fprintf(stderr, "error: %s: " fmt "\n", \
|
||||
elf.path, ## __VA_ARGS__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
__builtin_unreachable(); \
|
||||
})
|
||||
|
||||
#define fatal_perror(msg) \
|
||||
({ \
|
||||
fprintf(stderr, "error: %s: " msg ": %s\n", \
|
||||
elf.path, strerror(errno)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
__builtin_unreachable(); \
|
||||
})
|
||||
|
||||
#define assert_op(lhs, rhs, fmt, op) \
|
||||
({ \
|
||||
typeof(lhs) _lhs = (lhs); \
|
||||
typeof(rhs) _rhs = (rhs); \
|
||||
\
|
||||
if (!(_lhs op _rhs)) { \
|
||||
fatal_error("assertion " #lhs " " #op " " #rhs \
|
||||
" failed (lhs=" fmt ", rhs=" fmt \
|
||||
", line=%d)", _lhs, _rhs, __LINE__); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define assert_eq(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, ==)
|
||||
#define assert_ne(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, !=)
|
||||
#define assert_lt(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, <)
|
||||
#define assert_ge(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, >=)
|
||||
|
||||
/*
|
||||
* Return a pointer of a given type at a given offset from
|
||||
* the beginning of the ELF file.
|
||||
*/
|
||||
#define elf_ptr(type, off) ((type *)(elf.begin + (off)))
|
||||
|
||||
/* Iterate over all sections in the ELF. */
|
||||
#define for_each_section(var) \
|
||||
for (var = elf.sh_table; var < elf.sh_table + elf16toh(elf.ehdr->e_shnum); ++var)
|
||||
|
||||
/* Iterate over all Elf64_Rela relocations in a given section. */
|
||||
#define for_each_rela(shdr, var) \
|
||||
for (var = elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset)); \
|
||||
var < elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset) + elf64toh(shdr->sh_size)); var++)
|
||||
|
||||
/* True if a string starts with a given prefix. */
|
||||
static inline bool starts_with(const char *str, const char *prefix)
|
||||
{
|
||||
return memcmp(str, prefix, strlen(prefix)) == 0;
|
||||
}
|
||||
|
||||
/* Returns a string containing the name of a given section. */
|
||||
static inline const char *section_name(Elf64_Shdr *shdr)
|
||||
{
|
||||
return elf.sh_string + elf32toh(shdr->sh_name);
|
||||
}
|
||||
|
||||
/* Returns a pointer to the first byte of section data. */
|
||||
static inline const char *section_begin(Elf64_Shdr *shdr)
|
||||
{
|
||||
return elf_ptr(char, elf64toh(shdr->sh_offset));
|
||||
}
|
||||
|
||||
/* Find a section by its offset from the beginning of the file. */
|
||||
static inline Elf64_Shdr *section_by_off(Elf64_Off off)
|
||||
{
|
||||
assert_ne(off, 0UL, "%lu");
|
||||
return elf_ptr(Elf64_Shdr, off);
|
||||
}
|
||||
|
||||
/* Find a section by its index. */
|
||||
static inline Elf64_Shdr *section_by_idx(uint16_t idx)
|
||||
{
|
||||
assert_ne(idx, SHN_UNDEF, "%u");
|
||||
return &elf.sh_table[idx];
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory-map the given ELF file, perform sanity checks, and
|
||||
* populate global state.
|
||||
*/
|
||||
static void init_elf(const char *path)
|
||||
{
|
||||
int fd, ret;
|
||||
struct stat stat;
|
||||
|
||||
/* Store path in the global struct for error printing. */
|
||||
elf.path = path;
|
||||
|
||||
/* Open the ELF file. */
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
fatal_perror("Could not open ELF file");
|
||||
|
||||
/* Get status of ELF file to obtain its size. */
|
||||
ret = fstat(fd, &stat);
|
||||
if (ret < 0) {
|
||||
close(fd);
|
||||
fatal_perror("Could not get status of ELF file");
|
||||
}
|
||||
|
||||
/* mmap() the entire ELF file read-only at an arbitrary address. */
|
||||
elf.begin = mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (elf.begin == MAP_FAILED) {
|
||||
close(fd);
|
||||
fatal_perror("Could not mmap ELF file");
|
||||
}
|
||||
|
||||
/* mmap() was successful, close the FD. */
|
||||
close(fd);
|
||||
|
||||
/* Get pointer to the ELF header. */
|
||||
assert_ge(stat.st_size, sizeof(*elf.ehdr), "%lu");
|
||||
elf.ehdr = elf_ptr(Elf64_Ehdr, 0);
|
||||
|
||||
/* Check the ELF magic. */
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG0], ELFMAG0, "0x%x");
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG1], ELFMAG1, "0x%x");
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG2], ELFMAG2, "0x%x");
|
||||
assert_eq(elf.ehdr->e_ident[EI_MAG3], ELFMAG3, "0x%x");
|
||||
|
||||
/* Sanity check that this is an ELF64 relocatable object for AArch64. */
|
||||
assert_eq(elf.ehdr->e_ident[EI_CLASS], ELFCLASS64, "%u");
|
||||
assert_eq(elf.ehdr->e_ident[EI_DATA], ELFENDIAN, "%u");
|
||||
assert_eq(elf16toh(elf.ehdr->e_type), ET_REL, "%u");
|
||||
assert_eq(elf16toh(elf.ehdr->e_machine), EM_AARCH64, "%u");
|
||||
|
||||
/* Populate fields of the global struct. */
|
||||
elf.sh_table = section_by_off(elf64toh(elf.ehdr->e_shoff));
|
||||
elf.sh_string = section_begin(section_by_idx(elf16toh(elf.ehdr->e_shstrndx)));
|
||||
}
|
||||
|
||||
/* Print the prologue of the output ASM file. */
|
||||
static void emit_prologue(void)
|
||||
{
|
||||
printf(".data\n"
|
||||
".pushsection " HYP_RELOC_SECTION ", \"a\"\n");
|
||||
}
|
||||
|
||||
/* Print ASM statements needed as a prologue to a processed hyp section. */
|
||||
static void emit_section_prologue(const char *sh_orig_name)
|
||||
{
|
||||
/* Declare the hyp section symbol. */
|
||||
printf(".global %s%s\n", HYP_SECTION_SYMBOL_PREFIX, sh_orig_name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print ASM statements to create a hyp relocation entry for a given
|
||||
* R_AARCH64_ABS64 relocation.
|
||||
*
|
||||
* The linker of vmlinux will populate the position given by `rela` with
|
||||
* an absolute 64-bit kernel VA. If the kernel is relocatable, it will
|
||||
* also generate a dynamic relocation entry so that the kernel can shift
|
||||
* the address at runtime for KASLR.
|
||||
*
|
||||
* Emit a 32-bit offset from the current address to the position given
|
||||
* by `rela`. This way the kernel can iterate over all kernel VAs used
|
||||
* by hyp at runtime and convert them to hyp VAs. However, that offset
|
||||
* will not be known until linking of `vmlinux`, so emit a PREL32
|
||||
* relocation referencing a symbol that the hyp linker script put at
|
||||
* the beginning of the relocated section + the offset from `rela`.
|
||||
*/
|
||||
static void emit_rela_abs64(Elf64_Rela *rela, const char *sh_orig_name)
|
||||
{
|
||||
/* Offset of this reloc from the beginning of HYP_RELOC_SECTION. */
|
||||
static size_t reloc_offset;
|
||||
|
||||
/* Create storage for the 32-bit offset. */
|
||||
printf(".word 0\n");
|
||||
|
||||
/*
|
||||
* Create a PREL32 relocation which instructs the linker of `vmlinux`
|
||||
* to insert offset to position <base> + <offset>, where <base> is
|
||||
* a symbol at the beginning of the relocated section, and <offset>
|
||||
* is `rela->r_offset`.
|
||||
*/
|
||||
printf(".reloc %lu, R_AARCH64_PREL32, %s%s + 0x%lx\n",
|
||||
reloc_offset, HYP_SECTION_SYMBOL_PREFIX, sh_orig_name,
|
||||
elf64toh(rela->r_offset));
|
||||
|
||||
reloc_offset += 4;
|
||||
}
|
||||
|
||||
/* Print the epilogue of the output ASM file. */
|
||||
static void emit_epilogue(void)
|
||||
{
|
||||
printf(".popsection\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all RELA relocations in a given section and emit
|
||||
* hyp relocation data for all absolute addresses in hyp code/data.
|
||||
*
|
||||
* Static relocations that generate PC-relative-addressing are ignored.
|
||||
* Failure is reported for unexpected relocation types.
|
||||
*/
|
||||
static void emit_rela_section(Elf64_Shdr *sh_rela)
|
||||
{
|
||||
Elf64_Shdr *sh_orig = &elf.sh_table[elf32toh(sh_rela->sh_info)];
|
||||
const char *sh_orig_name = section_name(sh_orig);
|
||||
Elf64_Rela *rela;
|
||||
|
||||
/* Skip all non-hyp sections. */
|
||||
if (!starts_with(sh_orig_name, HYP_SECTION_PREFIX))
|
||||
return;
|
||||
|
||||
emit_section_prologue(sh_orig_name);
|
||||
|
||||
for_each_rela(sh_rela, rela) {
|
||||
uint32_t type = (uint32_t)elf64toh(rela->r_info);
|
||||
|
||||
/* Check that rela points inside the relocated section. */
|
||||
assert_lt(elf64toh(rela->r_offset), elf64toh(sh_orig->sh_size), "0x%lx");
|
||||
|
||||
switch (type) {
|
||||
/*
|
||||
* Data relocations to generate absolute addressing.
|
||||
* Emit a hyp relocation.
|
||||
*/
|
||||
case R_AARCH64_ABS64:
|
||||
emit_rela_abs64(rela, sh_orig_name);
|
||||
break;
|
||||
/* Allow relocations to generate PC-relative addressing. */
|
||||
case R_AARCH64_LD_PREL_LO19:
|
||||
case R_AARCH64_ADR_PREL_LO21:
|
||||
case R_AARCH64_ADR_PREL_PG_HI21:
|
||||
case R_AARCH64_ADR_PREL_PG_HI21_NC:
|
||||
case R_AARCH64_ADD_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST8_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST16_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST32_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST64_ABS_LO12_NC:
|
||||
case R_AARCH64_LDST128_ABS_LO12_NC:
|
||||
break;
|
||||
/* Allow relative relocations for control-flow instructions. */
|
||||
case R_AARCH64_TSTBR14:
|
||||
case R_AARCH64_CONDBR19:
|
||||
case R_AARCH64_JUMP26:
|
||||
case R_AARCH64_CALL26:
|
||||
break;
|
||||
/* Allow group relocations to create PC-relative offset inline. */
|
||||
case R_AARCH64_MOVW_PREL_G0:
|
||||
case R_AARCH64_MOVW_PREL_G0_NC:
|
||||
case R_AARCH64_MOVW_PREL_G1:
|
||||
case R_AARCH64_MOVW_PREL_G1_NC:
|
||||
case R_AARCH64_MOVW_PREL_G2:
|
||||
case R_AARCH64_MOVW_PREL_G2_NC:
|
||||
case R_AARCH64_MOVW_PREL_G3:
|
||||
break;
|
||||
default:
|
||||
fatal_error("Unexpected RELA type %u", type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Iterate over all sections and emit hyp relocation data for RELA sections. */
|
||||
static void emit_all_relocs(void)
|
||||
{
|
||||
Elf64_Shdr *shdr;
|
||||
|
||||
for_each_section(shdr) {
|
||||
switch (elf32toh(shdr->sh_type)) {
|
||||
case SHT_REL:
|
||||
fatal_error("Unexpected SHT_REL section \"%s\"",
|
||||
section_name(shdr));
|
||||
case SHT_RELA:
|
||||
emit_rela_section(shdr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: %s <elf_input>\n", argv[0]);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
init_elf(argv[1]);
|
||||
|
||||
emit_prologue();
|
||||
emit_all_relocs();
|
||||
emit_epilogue();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user