Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini: "x86: - Support for userspace to emulate Xen hypercalls - Raise the maximum number of user memslots - Scalability improvements for the new MMU. Instead of the complex "fast page fault" logic that is used in mmu.c, tdp_mmu.c uses an rwlock so that page faults are concurrent, but the code that can run against page faults is limited. Right now only page faults take the lock for reading; in the future this will be extended to some cases of page table destruction. I hope to switch the default MMU around 5.12-rc3 (some testing was delayed due to Chinese New Year). - Cleanups for MAXPHYADDR checks - Use static calls for vendor-specific callbacks - On AMD, use VMLOAD/VMSAVE to save and restore host state - Stop using deprecated jump label APIs - Workaround for AMD erratum that made nested virtualization unreliable - Support for LBR emulation in the guest - Support for communicating bus lock vmexits to userspace - Add support for SEV attestation command - Miscellaneous cleanups PPC: - Support for second data watchpoint on POWER10 - Remove some complex workarounds for buggy early versions of POWER9 - Guest entry/exit fixes ARM64: - Make the nVHE EL2 object relocatable - Cleanups for concurrent translation faults hitting the same page - Support for the standard TRNG hypervisor call - A bunch of small PMU/Debug fixes - Simplification of the early init hypercall handling Non-KVM changes (with acks): - Detection of contended rwlocks (implemented only for qrwlocks, because KVM only needs it for x86) - Allow __DISABLE_EXPORTS from assembly code - Provide a saner follow_pfn replacements for modules" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (192 commits) KVM: x86/xen: Explicitly pad struct compat_vcpu_info to 64 bytes KVM: selftests: Don't bother mapping GVA for Xen shinfo test KVM: selftests: Fix hex vs. decimal snafu in Xen test KVM: selftests: Fix size of memslots created by Xen tests KVM: selftests: Ignore recently added Xen tests' build output KVM: selftests: Add missing header file needed by xAPIC IPI tests KVM: selftests: Add operand to vmsave/vmload/vmrun in svm.c KVM: SVM: Make symbol 'svm_gp_erratum_intercept' static locking/arch: Move qrwlock.h include after qspinlock.h KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests KVM: PPC: Book3S HV: Ensure radix guest has no SLB entries KVM: PPC: Don't always report hash MMU capability for P9 < DD2.2 KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path KVM: PPC: remove unneeded semicolon KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB KVM: PPC: Book3S HV: No need to clear radix host SLB before loading HPT guest KVM: PPC: Book3S HV: Fix radix guest SLB side channel KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR ...
2026-03-06 15:25:10 -08:00 · 2021-02-21 13:31:43 -08:00
parent 9c5b80b795 8c6e67bec3
commit 3e10585335
150 changed files with 6659 additions and 2064 deletions
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -263,6 +263,27 @@ Returns: 0 on success, -negative on error
                __u32 trans_len;
        };

+10. KVM_SEV_GET_ATTESTATION_REPORT
+----------------------------------
+
+The KVM_SEV_GET_ATTESTATION_REPORT command can be used by the hypervisor to query the attestation
+report containing the SHA-256 digest of the guest memory and VMSA passed through the KVM_SEV_LAUNCH
+commands and signed with the PEK. The digest returned by the command should match the digest
+used by the guest owner with the KVM_SEV_LAUNCH_MEASURE.
+
+Parameters (in): struct kvm_sev_attestation
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_attestation_report {
+                __u8 mnonce[16];        /* A random mnonce that will be placed in the report */
+
+                __u64 uaddr;            /* userspace address where the report should be copied */
+                __u32 len;
+        };
+
 References
 ==========

--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -960,6 +960,14 @@ memory.
 	__u8 pad2[30];
  };

+If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the
+KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl.
+This requests KVM to generate the contents of the hypercall page
+automatically; hypercalls will be intercepted and passed to userspace
+through KVM_EXIT_XEN.  In this case, all of the blob size and address
+fields must be zero.
+
+No other flags are currently valid in the struct kvm_xen_hvm_config.

 4.29 KVM_GET_CLOCK
 ------------------
@@ -2268,6 +2276,8 @@ registers, find a list below:
  PPC     KVM_REG_PPC_PSSCR               64
  PPC     KVM_REG_PPC_DEC_EXPIRY          64
  PPC     KVM_REG_PPC_PTCR                64
+  PPC     KVM_REG_PPC_DAWR1               64
+  PPC     KVM_REG_PPC_DAWRX1              64
  PPC     KVM_REG_PPC_TM_GPR0             64
  ...
  PPC     KVM_REG_PPC_TM_GPR31            64
@@ -4831,6 +4841,101 @@ into user space.
 If a vCPU is in running state while this ioctl is invoked, the vCPU may
 experience inconsistent filtering behavior on MSR accesses.

+4.127 KVM_XEN_HVM_SET_ATTR
+--------------------------
+
+:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_xen_hvm_attr
+:Returns: 0 on success, < 0 on error
+
+::
+
+  struct kvm_xen_hvm_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u8 long_mode;
+		__u8 vector;
+		struct {
+			__u64 gfn;
+		} shared_info;
+		__u64 pad[4];
+	} u;
+  };
+
+type values:
+
+KVM_XEN_ATTR_TYPE_LONG_MODE
+  Sets the ABI mode of the VM to 32-bit or 64-bit (long mode). This
+  determines the layout of the shared info pages exposed to the VM.
+
+KVM_XEN_ATTR_TYPE_SHARED_INFO
+  Sets the guest physical frame number at which the Xen "shared info"
+  page resides. Note that although Xen places vcpu_info for the first
+  32 vCPUs in the shared_info page, KVM does not automatically do so
+  and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
+  explicitly even when the vcpu_info for a given vCPU resides at the
+  "default" location in the shared_info page. This is because KVM is
+  not aware of the Xen CPU id which is used as the index into the
+  vcpu_info[] array, so cannot know the correct default location.
+
+KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
+  Sets the exception vector used to deliver Xen event channel upcalls.
+
+4.128 KVM_XEN_HVM_GET_ATTR
+--------------------------
+
+:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_xen_hvm_attr
+:Returns: 0 on success, < 0 on error
+
+Allows Xen VM attributes to be read. For the structure and types,
+see KVM_XEN_HVM_SET_ATTR above.
+
+4.129 KVM_XEN_VCPU_SET_ATTR
+---------------------------
+
+:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xen_vcpu_attr
+:Returns: 0 on success, < 0 on error
+
+::
+
+  struct kvm_xen_vcpu_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u64 gpa;
+		__u64 pad[4];
+	} u;
+  };
+
+type values:
+
+KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
+  Sets the guest physical address of the vcpu_info for a given vCPU.
+
+KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
+  Sets the guest physical address of an additional pvclock structure
+  for a given vCPU. This is typically used for guest vsyscall support.
+
+4.130 KVM_XEN_VCPU_GET_ATTR
+---------------------------
+
+:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_xen_vcpu_attr
+:Returns: 0 on success, < 0 on error
+
+Allows Xen vCPU attributes to be read. For the structure and types,
+see KVM_XEN_VCPU_SET_ATTR above.

 5. The kvm_run structure
 ========================
@@ -4893,9 +4998,11 @@ local APIC is not used.
 	__u16 flags;

 More architecture-specific flags detailing state of the VCPU that may
-affect the device's behavior.  The only currently defined flag is
-KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
-VCPU is in system management mode.
+affect the device's behavior. Current defined flags:
+  /* x86, set if the VCPU is in system management mode */
+  #define KVM_RUN_X86_SMM     (1 << 0)
+  /* x86, set if bus lock detected in VM */
+  #define KVM_RUN_BUS_LOCK    (1 << 1)

 ::

@@ -4996,13 +5103,18 @@ to the byte array.

 .. note::

-      For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR,
+      For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
      KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
      operations are complete (and guest state is consistent) only after userspace
      has re-entered the kernel with KVM_RUN.  The kernel side will first finish
-      incomplete operations and then check for pending signals.  Userspace
-      can re-enter the guest with an unmasked signal pending to complete
-      pending operations.
+      incomplete operations and then check for pending signals.
+
+      The pending state of the operation is not preserved in state which is
+      visible to userspace, thus userspace should ensure that the operation is
+      completed before performing a live migration.  Userspace can re-enter the
+      guest with an unmasked signal pending or with the immediate_exit field set
+      to complete pending operations without allowing any further instructions
+      to be executed.

 ::

@@ -5327,6 +5439,34 @@ wants to write. Once finished processing the event, user space must continue
 vCPU execution. If the MSR write was unsuccessful, user space also sets the
 "error" field to "1".

+::
+
+
+		struct kvm_xen_exit {
+  #define KVM_EXIT_XEN_HCALL          1
+			__u32 type;
+			union {
+				struct {
+					__u32 longmode;
+					__u32 cpl;
+					__u64 input;
+					__u64 result;
+					__u64 params[6];
+				} hcall;
+			} u;
+		};
+		/* KVM_EXIT_XEN */
+                struct kvm_hyperv_exit xen;
+
+Indicates that the VCPU exits into userspace to process some tasks
+related to Xen emulation.
+
+Valid values for 'type' are:
+
+  - KVM_EXIT_XEN_HCALL -- synchronously notify user-space about Xen hypercall.
+    Userspace is expected to place the hypercall result into the appropriate
+    field before invoking KVM_RUN again.
+
 ::

 		/* Fix the size of the union. */
@@ -6038,6 +6178,53 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
 can then handle to implement model specific MSR handling and/or user notifications
 to inform a user that an MSR was not handled.

+7.22 KVM_CAP_X86_BUS_LOCK_EXIT
+-------------------------------
+
+:Architectures: x86
+:Target: VM
+:Parameters: args[0] defines the policy used when bus locks detected in guest
+:Returns: 0 on success, -EINVAL when args[0] contains invalid bits
+
+Valid bits in args[0] are::
+
+  #define KVM_BUS_LOCK_DETECTION_OFF      (1 << 0)
+  #define KVM_BUS_LOCK_DETECTION_EXIT     (1 << 1)
+
+Enabling this capability on a VM provides userspace with a way to select
+a policy to handle the bus locks detected in guest. Userspace can obtain
+the supported modes from the result of KVM_CHECK_EXTENSION and define it
+through the KVM_ENABLE_CAP.
+
+KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported
+currently and mutually exclusive with each other. More bits can be added in
+the future.
+
+With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits
+so that no additional actions are needed. This is the default mode.
+
+With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected
+in VM. KVM just exits to userspace when handling them. Userspace can enforce
+its own throttling or other policy based mitigations.
+
+This capability is aimed to address the thread that VM can exploit bus locks to
+degree the performance of the whole system. Once the userspace enable this
+capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the
+KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning
+the bus lock vm exit can be preempted by a higher priority VM exit, the exit
+notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
+KVM_RUN_BUS_LOCK flag is used to distinguish between them.
+
+7.22 KVM_CAP_PPC_DAWR1
+----------------------
+
+:Architectures: ppc
+:Parameters: none
+:Returns: 0 on success, -EINVAL when CPU doesn't support 2nd DAWR
+
+This capability can be used to check / enable 2nd DAWR feature provided
+by POWER10 processor.
+
 8. Other capabilities.
 ======================

@@ -6415,7 +6602,6 @@ guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
 (0x40000001). Otherwise, a guest may use the paravirtual features
 regardless of what has actually been exposed through the CPUID leaf.

-
 8.29 KVM_CAP_DIRTY_LOG_RING
 ---------------------------

@@ -6502,3 +6688,29 @@ KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG.  After enabling
 KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
 machine will switch to ring-buffer dirty page tracking and further
 KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
+
+8.30 KVM_CAP_XEN_HVM
+--------------------
+
+:Architectures: x86
+
+This capability indicates the features that Xen supports for hosting Xen
+PVHVM guests. Valid flags are::
+
+  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR	(1 << 0)
+  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
+  #define KVM_XEN_HVM_CONFIG_SHARED_INFO	(1 << 2)
+
+The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
+ioctl is available, for the guest to set its hypercall page.
+
+If KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL is also set, the same flag may also be
+provided in the flags to KVM_XEN_HVM_CONFIG, without providing hypercall page
+contents, to request that KVM generate hypercall page content automatically
+and also enable interception of guest hypercalls with KVM_EXIT_XEN.
+
+The KVM_XEN_HVM_CONFIG_SHARED_INFO flag indicates the availability of the
+KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
+KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
+for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
+vcpu_info is set.
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -16,7 +16,14 @@ The acquisition orders for mutexes are as follows:
 - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
  them together is quite rare.

-On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
+On x86:
+
+- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
+
+- kvm->arch.mmu_lock is an rwlock.  kvm->arch.tdp_mmu_pages_lock is
+  taken inside kvm->arch.mmu_lock, and cannot be taken without already
+  holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
+  there's no need to take kvm->arch.tdp_mmu_pages_lock at all).

 Everything else is a leaf: no other lock is taken inside the critical
 sections.
--- a/arch/arm64/include/asm/hyp_image.h
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -7,6 +7,9 @@
 #ifndef __ARM64_HYP_IMAGE_H__
 #define __ARM64_HYP_IMAGE_H__

+#define __HYP_CONCAT(a, b)	a ## b
+#define HYP_CONCAT(a, b)	__HYP_CONCAT(a, b)
+
 /*
 * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
 * to separate it from the kernel proper.
@@ -21,9 +24,31 @@
 */
 #define HYP_SECTION_NAME(NAME)	.hyp##NAME

+/* Symbol defined at the beginning of each hyp section. */
+#define HYP_SECTION_SYMBOL_NAME(NAME) \
+	HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
+
+/*
+ * Helper to generate linker script statements starting a hyp section.
+ *
+ * A symbol with a well-known name is defined at the first byte. This
+ * is used as a base for hyp relocations (see gen-hyprel.c). It must
+ * be defined inside the section so the linker of `vmlinux` cannot
+ * separate it from the section data.
+ */
+#define BEGIN_HYP_SECTION(NAME)				\
+	HYP_SECTION_NAME(NAME) : {			\
+		HYP_SECTION_SYMBOL_NAME(NAME) = .;
+
+/* Helper to generate linker script statements ending a hyp section. */
+#define END_HYP_SECTION					\
+	}
+
 /* Defines an ELF hyp section from input section @NAME and its subsections. */
-#define HYP_SECTION(NAME) \
-	HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) }
+#define HYP_SECTION(NAME)			\
+	BEGIN_HYP_SECTION(NAME)			\
+		*(NAME NAME##.*)		\
+	END_HYP_SECTION

 /*
 * Defines a linker script alias of a kernel-proper symbol referenced by
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -199,26 +199,6 @@ extern void __vgic_v3_init_lrs(void);

 extern u32 __kvm_get_mdcr_el2(void);

-/*
- * Obtain the PC-relative address of a kernel symbol
- * s: symbol
- *
- * The goal of this macro is to return a symbol's address based on a
- * PC-relative computation, as opposed to a loading the VA from a
- * constant pool or something similar. This works well for HYP, as an
- * absolute VA is guaranteed to be wrong. Only use this if trying to
- * obtain the address of a symbol (i.e. not something you obtained by
- * following a pointer).
- */
-#define hyp_symbol_addr(s)						\
-	({								\
-		typeof(s) *addr;					\
-		asm("adrp	%0, %1\n"				\
-		    "add	%0, %0, :lo12:%1\n"			\
-		    : "=r" (addr) : "S" (&s));				\
-		addr;							\
-	})
-
 #define __KVM_EXTABLE(from, to)						\
 	"	.pushsection	__kvm_ex_table, \"a\"\n"		\
 	"	.align		3\n"					\
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,7 +30,6 @@

 #define __KVM_HAVE_ARCH_INTC_INITIALIZED

-#define KVM_USER_MEM_SLOTS 512
 #define KVM_HALT_POLL_NS_DEFAULT 500000

 #include <kvm/arm_vgic.h>
@@ -771,4 +770,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 #define kvm_vcpu_has_pmu(vcpu)					\
 	(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))

+int kvm_trng_call(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HOST_H__ */
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -73,8 +73,18 @@ alternative_cb_end
 .endm

 /*
- * Convert a kernel image address to a PA
- * reg: kernel address to be converted in place
+ * Convert a hypervisor VA to a PA
+ * reg: hypervisor address to be converted in place
+ * tmp: temporary register
+ */
+.macro hyp_pa reg, tmp
+	ldr_l	\tmp, hyp_physvirt_offset
+	add	\reg, \reg, \tmp
+.endm
+
+/*
+ * Convert a hypervisor VA to a kernel image address
+ * reg: hypervisor address to be converted in place
 * tmp: temporary register
 *
 * The actual code generation takes place in kvm_get_kimage_voffset, and
@@ -82,7 +92,11 @@ alternative_cb_end
 * perform the register allocation (kvm_get_kimage_voffset uses the
 * specific registers encoded in the instructions).
 */
-.macro kimg_pa reg, tmp
+.macro hyp_kimg_va reg, tmp
+	/* Convert hyp VA -> PA. */
+	hyp_pa	\reg, \tmp
+
+	/* Load kimage_voffset. */
 alternative_cb kvm_get_kimage_voffset
 	movz	\tmp, #0
 	movk	\tmp, #0, lsl #16
@@ -90,32 +104,8 @@ alternative_cb kvm_get_kimage_voffset
 	movk	\tmp, #0, lsl #48
 alternative_cb_end

-	/* reg = __pa(reg) */
-	sub	\reg, \reg, \tmp
-.endm
-
-/*
- * Convert a kernel image address to a hyp VA
- * reg: kernel address to be converted in place
- * tmp: temporary register
- *
- * The actual code generation takes place in kvm_get_kimage_voffset, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_kimg_phys_offset uses the
- * specific registers encoded in the instructions).
- */
-.macro kimg_hyp_va reg, tmp
-alternative_cb kvm_update_kimg_phys_offset
-	movz	\tmp, #0
-	movk	\tmp, #0, lsl #16
-	movk	\tmp, #0, lsl #32
-	movk	\tmp, #0, lsl #48
-alternative_cb_end
-
-	sub	\reg, \reg, \tmp
-	mov_q	\tmp, PAGE_OFFSET
-	orr	\reg, \reg, \tmp
-	kern_hyp_va \reg
+	/* Convert PA -> kimg VA. */
+	add	\reg, \reg, \tmp
 .endm

 #else
@@ -129,6 +119,7 @@ alternative_cb_end
 void kvm_update_va_mask(struct alt_instr *alt,
 			__le32 *origptr, __le32 *updptr, int nr_inst);
 void kvm_compute_layout(void);
+void kvm_apply_hyp_relocations(void);

 static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 {
@@ -144,24 +135,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)

 #define kern_hyp_va(v) 	((typeof(v))(__kern_hyp_va((unsigned long)(v))))

-static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
-{
-	unsigned long offset;
-
-	asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
-				    "movk %0, #0, lsl #16\n"
-				    "movk %0, #0, lsl #32\n"
-				    "movk %0, #0, lsl #48\n",
-				    kvm_update_kimg_phys_offset)
-		     : "=r" (offset));
-
-	return __kern_hyp_va((v - offset) | PAGE_OFFSET);
-}
-
-#define kimg_fn_hyp_va(v) 	((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
-
-#define kimg_fn_ptr(x)	(typeof(x) **)(x)
-
 /*
 * We currently support using a VM-specified IPA size. For backward
 * compatibility, the default IPA size is fixed to 40bits.
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -157,6 +157,11 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
 * If device attributes are not explicitly requested in @prot, then the
 * mapping will be normal, cacheable.
 *
+ * Note that the update of a valid leaf PTE in this function will be aborted,
+ * if it's trying to recreate the exact same mapping or only change the access
+ * permissions. Instead, the vCPU will exit one more time from guest if still
+ * needed and then go through the path of relaxing permissions.
+ *
 * Note that this function will both coalesce existing table entries and split
 * existing block mappings, relying on page-faults to fault back areas outside
 * of the new mapping lazily.
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,7 +11,8 @@ extern char __alt_instructions[], __alt_instructions_end[];
 extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 extern char __hyp_text_start[], __hyp_text_end[];
-extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
+extern char __hyp_rodata_start[], __hyp_rodata_end[];
+extern char __hyp_reloc_begin[], __hyp_reloc_end[];
 extern char __idmap_text_start[], __idmap_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __inittext_begin[], __inittext_end[];
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -5,8 +5,8 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H

-#include <asm/qrwlock.h>
 #include <asm/qspinlock.h>
+#include <asm/qrwlock.h>

 /* See include/linux/spinlock.h */
 #define smp_mb__after_spinlock()	smp_mb()
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -853,7 +853,10 @@

 #define ID_DFR0_PERFMON_SHIFT		24

+#define ID_DFR0_PERFMON_8_0		0x3
 #define ID_DFR0_PERFMON_8_1		0x4
+#define ID_DFR0_PERFMON_8_4		0x5
+#define ID_DFR0_PERFMON_8_5		0x6

 #define ID_ISAR4_SWP_FRAC_SHIFT		28
 #define ID_ISAR4_PSR_M_SHIFT		24
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,7 +64,6 @@ __efistub__ctype		= _ctype;
 /* Alternative callbacks for init-time patching of nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_patch_vector_branch);
 KVM_NVHE_ALIAS(kvm_update_va_mask);
-KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
 KVM_NVHE_ALIAS(kvm_get_kimage_voffset);

 /* Global kernel state accessed by nVHE hyp code. */
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
 			   "CPU: CPUs started in inconsistent modes");
 	else
 		pr_info("CPU: All CPU(s) started at EL1\n");
-	if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode())
+	if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
 		kvm_compute_layout();
+		kvm_apply_hyp_relocations();
+	}
 }

 void __init smp_cpus_done(unsigned int max_cpus)
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -31,10 +31,11 @@ jiffies = jiffies_64;
 	__stop___kvm_ex_table = .;

 #define HYPERVISOR_DATA_SECTIONS				\
-	HYP_SECTION_NAME(.data..ro_after_init) : {		\
-		__hyp_data_ro_after_init_start = .;		\
+	HYP_SECTION_NAME(.rodata) : {				\
+		__hyp_rodata_start = .;				\
 		*(HYP_SECTION_NAME(.data..ro_after_init))	\
-		__hyp_data_ro_after_init_end = .;		\
+		*(HYP_SECTION_NAME(.rodata))			\
+		__hyp_rodata_end = .;				\
 	}

 #define HYPERVISOR_PERCPU_SECTION				\
@@ -42,10 +43,19 @@ jiffies = jiffies_64;
 	HYP_SECTION_NAME(.data..percpu) : {			\
 		*(HYP_SECTION_NAME(.data..percpu))		\
 	}
+
+#define HYPERVISOR_RELOC_SECTION				\
+	.hyp.reloc : ALIGN(4) {					\
+		__hyp_reloc_begin = .;				\
+		*(.hyp.reloc)					\
+		__hyp_reloc_end = .;				\
+	}
+
 #else /* CONFIG_KVM */
 #define HYPERVISOR_EXTABLE
 #define HYPERVISOR_DATA_SECTIONS
 #define HYPERVISOR_PERCPU_SECTION
+#define HYPERVISOR_RELOC_SECTION
 #endif

 #define HYPERVISOR_TEXT					\
@@ -216,6 +226,8 @@ SECTIONS
 	PERCPU_SECTION(L1_CACHE_BYTES)
 	HYPERVISOR_PERCPU_SECTION

+	HYPERVISOR_RELOC_SECTION
+
 	.rela.dyn : ALIGN(8) {
 		*(.rela .rela*)
 	}
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
 	 inject_fault.o va_layout.o handle_exit.o \
 	 guest.o debug.o reset.o sys_regs.o \
 	 vgic-sys-reg-v3.o fpsimd.o pmu.o \
-	 arch_timer.o \
+	 arch_timer.o trng.o\
 	 vgic/vgic.o vgic/vgic-init.o \
 	 vgic/vgic-irqfd.o vgic/vgic-v2.o \
 	 vgic/vgic-v3.o vgic/vgic-v4.o \
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1750,11 +1750,10 @@ static int init_hyp_mode(void)
 		goto out_err;
 	}

-	err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
-				  kvm_ksym_ref(__hyp_data_ro_after_init_end),
-				  PAGE_HYP_RO);
+	err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
+				  kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
 	if (err) {
-		kvm_err("Cannot map .hyp.data..ro_after_init section\n");
+		kvm_err("Cannot map .hyp.rodata section\n");
 		goto out_err;
 	}

--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
 	struct exception_table_entry *entry, *end;
 	unsigned long elr_el2 = read_sysreg(elr_el2);

-	entry = hyp_symbol_addr(__start___kvm_ex_table);
-	end = hyp_symbol_addr(__stop___kvm_ex_table);
+	entry = &__start___kvm_ex_table;
+	end = &__stop___kvm_ex_table;

 	while (entry < end) {
 		addr = (unsigned long)&entry->insn + entry->insn;
--- a/arch/arm64/kvm/hyp/nvhe/.gitignore
+++ b/arch/arm64/kvm/hyp/nvhe/.gitignore
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
+gen-hyprel
 hyp.lds
+hyp-reloc.S
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -3,8 +3,11 @@
 # Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
 #

-asflags-y := -D__KVM_NVHE_HYPERVISOR__
-ccflags-y := -D__KVM_NVHE_HYPERVISOR__
+asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
+
+hostprogs := gen-hyprel
+HOST_EXTRACFLAGS += -I$(objtree)/include

 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
 	 hyp-main.o hyp-smp.o psci-relay.o
@@ -19,7 +22,7 @@ obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \

 hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
 obj-y := kvm_nvhe.o
-extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds
+extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o

 # 1) Compile all source files to `.nvhe.o` object files. The file extension
 #    avoids file name clashes for files shared with VHE.
@@ -42,11 +45,31 @@ LDFLAGS_kvm_nvhe.tmp.o := -r -T
 $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
 	$(call if_changed,ld)

-# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
+# 4) Generate list of hyp code/data positions that need to be relocated at
+#    runtime. Because the hypervisor is part of the kernel binary, relocations
+#    produce a kernel VA. We enumerate relocations targeting hyp at build time
+#    and convert the kernel VAs at those positions to hyp VAs.
+$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
+	$(call if_changed,hyprel)
+
+# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
+#    The object file now contains a section with pointers to hyp positions that
+#    will contain kernel VAs at runtime. These pointers have relocations on them
+#    so that they get updated as the hyp object is linked into `vmlinux`.
+LDFLAGS_kvm_nvhe.rel.o := -r
+$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
+	$(call if_changed,ld)
+
+# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
 #    Prefixes names of ELF symbols with '__kvm_nvhe_'.
-$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE
+$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
 	$(call if_changed,hypcopy)

+# The HYPREL command calls `gen-hyprel` to generate an assembly file with
+# a list of relocations targeting hyp code/data.
+quiet_cmd_hyprel = HYPREL  $@
+      cmd_hyprel = $(obj)/gen-hyprel $< > $@
+
 # The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
 # to avoid clashes with VHE code/data.
 quiet_cmd_hypcopy = HYPCOPY $@
--- a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
+++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ *
+ * Generates relocation information used by the kernel to convert
+ * absolute addresses in hyp data from kernel VAs to hyp VAs.
+ *
+ * This is necessary because hyp code is linked into the same binary
+ * as the kernel but executes under different memory mappings.
+ * If the compiler used absolute addressing, those addresses need to
+ * be converted before they are used by hyp code.
+ *
+ * The input of this program is the relocatable ELF object containing
+ * all hyp code/data, not yet linked into vmlinux. Hyp section names
+ * should have been prefixed with `.hyp` at this point.
+ *
+ * The output (printed to stdout) is an assembly file containing
+ * an array of 32-bit integers and static relocations that instruct
+ * the linker of `vmlinux` to populate the array entries with offsets
+ * to positions in the kernel binary containing VAs used by hyp code.
+ *
+ * Note that dynamic relocations could be used for the same purpose.
+ * However, those are only generated if CONFIG_RELOCATABLE=y.
+ */
+
+#include <elf.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <generated/autoconf.h>
+
+#define HYP_SECTION_PREFIX		".hyp"
+#define HYP_RELOC_SECTION		".hyp.reloc"
+#define HYP_SECTION_SYMBOL_PREFIX	"__hyp_section_"
+
+/*
+ * AArch64 relocation type constants.
+ * Included in case these are not defined in the host toolchain.
+ */
+#ifndef R_AARCH64_ABS64
+#define R_AARCH64_ABS64			257
+#endif
+#ifndef R_AARCH64_LD_PREL_LO19
+#define R_AARCH64_LD_PREL_LO19		273
+#endif
+#ifndef R_AARCH64_ADR_PREL_LO21
+#define R_AARCH64_ADR_PREL_LO21		274
+#endif
+#ifndef R_AARCH64_ADR_PREL_PG_HI21
+#define R_AARCH64_ADR_PREL_PG_HI21	275
+#endif
+#ifndef R_AARCH64_ADR_PREL_PG_HI21_NC
+#define R_AARCH64_ADR_PREL_PG_HI21_NC	276
+#endif
+#ifndef R_AARCH64_ADD_ABS_LO12_NC
+#define R_AARCH64_ADD_ABS_LO12_NC	277
+#endif
+#ifndef R_AARCH64_LDST8_ABS_LO12_NC
+#define R_AARCH64_LDST8_ABS_LO12_NC	278
+#endif
+#ifndef R_AARCH64_TSTBR14
+#define R_AARCH64_TSTBR14		279
+#endif
+#ifndef R_AARCH64_CONDBR19
+#define R_AARCH64_CONDBR19		280
+#endif
+#ifndef R_AARCH64_JUMP26
+#define R_AARCH64_JUMP26		282
+#endif
+#ifndef R_AARCH64_CALL26
+#define R_AARCH64_CALL26		283
+#endif
+#ifndef R_AARCH64_LDST16_ABS_LO12_NC
+#define R_AARCH64_LDST16_ABS_LO12_NC	284
+#endif
+#ifndef R_AARCH64_LDST32_ABS_LO12_NC
+#define R_AARCH64_LDST32_ABS_LO12_NC	285
+#endif
+#ifndef R_AARCH64_LDST64_ABS_LO12_NC
+#define R_AARCH64_LDST64_ABS_LO12_NC	286
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G0
+#define R_AARCH64_MOVW_PREL_G0		287
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G0_NC
+#define R_AARCH64_MOVW_PREL_G0_NC	288
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G1
+#define R_AARCH64_MOVW_PREL_G1		289
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G1_NC
+#define R_AARCH64_MOVW_PREL_G1_NC	290
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G2
+#define R_AARCH64_MOVW_PREL_G2		291
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G2_NC
+#define R_AARCH64_MOVW_PREL_G2_NC	292
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G3
+#define R_AARCH64_MOVW_PREL_G3		293
+#endif
+#ifndef R_AARCH64_LDST128_ABS_LO12_NC
+#define R_AARCH64_LDST128_ABS_LO12_NC	299
+#endif
+
+/* Global state of the processed ELF. */
+static struct {
+	const char	*path;
+	char		*begin;
+	size_t		size;
+	Elf64_Ehdr	*ehdr;
+	Elf64_Shdr	*sh_table;
+	const char	*sh_string;
+} elf;
+
+#if defined(CONFIG_CPU_LITTLE_ENDIAN)
+
+#define elf16toh(x)	le16toh(x)
+#define elf32toh(x)	le32toh(x)
+#define elf64toh(x)	le64toh(x)
+
+#define ELFENDIAN	ELFDATA2LSB
+
+#elif defined(CONFIG_CPU_BIG_ENDIAN)
+
+#define elf16toh(x)	be16toh(x)
+#define elf32toh(x)	be32toh(x)
+#define elf64toh(x)	be64toh(x)
+
+#define ELFENDIAN	ELFDATA2MSB
+
+#else
+
+#error PDP-endian sadly unsupported...
+
+#endif
+
+#define fatal_error(fmt, ...)						\
+	({								\
+		fprintf(stderr, "error: %s: " fmt "\n",			\
+			elf.path, ## __VA_ARGS__);			\
+		exit(EXIT_FAILURE);					\
+		__builtin_unreachable();				\
+	})
+
+#define fatal_perror(msg)						\
+	({								\
+		fprintf(stderr, "error: %s: " msg ": %s\n",		\
+			elf.path, strerror(errno));			\
+		exit(EXIT_FAILURE);					\
+		__builtin_unreachable();				\
+	})
+
+#define assert_op(lhs, rhs, fmt, op)					\
+	({								\
+		typeof(lhs) _lhs = (lhs);				\
+		typeof(rhs) _rhs = (rhs);				\
+									\
+		if (!(_lhs op _rhs)) {					\
+			fatal_error("assertion " #lhs " " #op " " #rhs	\
+				" failed (lhs=" fmt ", rhs=" fmt	\
+				", line=%d)", _lhs, _rhs, __LINE__);	\
+		}							\
+	})
+
+#define assert_eq(lhs, rhs, fmt)	assert_op(lhs, rhs, fmt, ==)
+#define assert_ne(lhs, rhs, fmt)	assert_op(lhs, rhs, fmt, !=)
+#define assert_lt(lhs, rhs, fmt)	assert_op(lhs, rhs, fmt, <)
+#define assert_ge(lhs, rhs, fmt)	assert_op(lhs, rhs, fmt, >=)
+
+/*
+ * Return a pointer of a given type at a given offset from
+ * the beginning of the ELF file.
+ */
+#define elf_ptr(type, off) ((type *)(elf.begin + (off)))
+
+/* Iterate over all sections in the ELF. */
+#define for_each_section(var) \
+	for (var = elf.sh_table; var < elf.sh_table + elf16toh(elf.ehdr->e_shnum); ++var)
+
+/* Iterate over all Elf64_Rela relocations in a given section. */
+#define for_each_rela(shdr, var)					\
+	for (var = elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset));	\
+	     var < elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset) + elf64toh(shdr->sh_size)); var++)
+
+/* True if a string starts with a given prefix. */
+static inline bool starts_with(const char *str, const char *prefix)
+{
+	return memcmp(str, prefix, strlen(prefix)) == 0;
+}
+
+/* Returns a string containing the name of a given section. */
+static inline const char *section_name(Elf64_Shdr *shdr)
+{
+	return elf.sh_string + elf32toh(shdr->sh_name);
+}
+
+/* Returns a pointer to the first byte of section data. */
+static inline const char *section_begin(Elf64_Shdr *shdr)
+{
+	return elf_ptr(char, elf64toh(shdr->sh_offset));
+}
+
+/* Find a section by its offset from the beginning of the file. */
+static inline Elf64_Shdr *section_by_off(Elf64_Off off)
+{
+	assert_ne(off, 0UL, "%lu");
+	return elf_ptr(Elf64_Shdr, off);
+}
+
+/* Find a section by its index. */
+static inline Elf64_Shdr *section_by_idx(uint16_t idx)
+{
+	assert_ne(idx, SHN_UNDEF, "%u");
+	return &elf.sh_table[idx];
+}
+
+/*
+ * Memory-map the given ELF file, perform sanity checks, and
+ * populate global state.
+ */
+static void init_elf(const char *path)
+{
+	int fd, ret;
+	struct stat stat;
+
+	/* Store path in the global struct for error printing. */
+	elf.path = path;
+
+	/* Open the ELF file. */
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		fatal_perror("Could not open ELF file");
+
+	/* Get status of ELF file to obtain its size. */
+	ret = fstat(fd, &stat);
+	if (ret < 0) {
+		close(fd);
+		fatal_perror("Could not get status of ELF file");
+	}
+
+	/* mmap() the entire ELF file read-only at an arbitrary address. */
+	elf.begin = mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (elf.begin == MAP_FAILED) {
+		close(fd);
+		fatal_perror("Could not mmap ELF file");
+	}
+
+	/* mmap() was successful, close the FD. */
+	close(fd);
+
+	/* Get pointer to the ELF header. */
+	assert_ge(stat.st_size, sizeof(*elf.ehdr), "%lu");
+	elf.ehdr = elf_ptr(Elf64_Ehdr, 0);
+
+	/* Check the ELF magic. */
+	assert_eq(elf.ehdr->e_ident[EI_MAG0], ELFMAG0, "0x%x");
+	assert_eq(elf.ehdr->e_ident[EI_MAG1], ELFMAG1, "0x%x");
+	assert_eq(elf.ehdr->e_ident[EI_MAG2], ELFMAG2, "0x%x");
+	assert_eq(elf.ehdr->e_ident[EI_MAG3], ELFMAG3, "0x%x");
+
+	/* Sanity check that this is an ELF64 relocatable object for AArch64. */
+	assert_eq(elf.ehdr->e_ident[EI_CLASS], ELFCLASS64, "%u");
+	assert_eq(elf.ehdr->e_ident[EI_DATA], ELFENDIAN, "%u");
+	assert_eq(elf16toh(elf.ehdr->e_type), ET_REL, "%u");
+	assert_eq(elf16toh(elf.ehdr->e_machine), EM_AARCH64, "%u");
+
+	/* Populate fields of the global struct. */
+	elf.sh_table = section_by_off(elf64toh(elf.ehdr->e_shoff));
+	elf.sh_string = section_begin(section_by_idx(elf16toh(elf.ehdr->e_shstrndx)));
+}
+
+/* Print the prologue of the output ASM file. */
+static void emit_prologue(void)
+{
+	printf(".data\n"
+	       ".pushsection " HYP_RELOC_SECTION ", \"a\"\n");
+}
+
+/* Print ASM statements needed as a prologue to a processed hyp section. */
+static void emit_section_prologue(const char *sh_orig_name)
+{
+	/* Declare the hyp section symbol. */
+	printf(".global %s%s\n", HYP_SECTION_SYMBOL_PREFIX, sh_orig_name);
+}
+
+/*
+ * Print ASM statements to create a hyp relocation entry for a given
+ * R_AARCH64_ABS64 relocation.
+ *
+ * The linker of vmlinux will populate the position given by `rela` with
+ * an absolute 64-bit kernel VA. If the kernel is relocatable, it will
+ * also generate a dynamic relocation entry so that the kernel can shift
+ * the address at runtime for KASLR.
+ *
+ * Emit a 32-bit offset from the current address to the position given
+ * by `rela`. This way the kernel can iterate over all kernel VAs used
+ * by hyp at runtime and convert them to hyp VAs. However, that offset
+ * will not be known until linking of `vmlinux`, so emit a PREL32
+ * relocation referencing a symbol that the hyp linker script put at
+ * the beginning of the relocated section + the offset from `rela`.
+ */
+static void emit_rela_abs64(Elf64_Rela *rela, const char *sh_orig_name)
+{
+	/* Offset of this reloc from the beginning of HYP_RELOC_SECTION. */
+	static size_t reloc_offset;
+
+	/* Create storage for the 32-bit offset. */
+	printf(".word 0\n");
+
+	/*
+	 * Create a PREL32 relocation which instructs the linker of `vmlinux`
+	 * to insert offset to position <base> + <offset>, where <base> is
+	 * a symbol at the beginning of the relocated section, and <offset>
+	 * is `rela->r_offset`.
+	 */
+	printf(".reloc %lu, R_AARCH64_PREL32, %s%s + 0x%lx\n",
+	       reloc_offset, HYP_SECTION_SYMBOL_PREFIX, sh_orig_name,
+	       elf64toh(rela->r_offset));
+
+	reloc_offset += 4;
+}
+
+/* Print the epilogue of the output ASM file. */
+static void emit_epilogue(void)
+{
+	printf(".popsection\n");
+}
+
+/*
+ * Iterate over all RELA relocations in a given section and emit
+ * hyp relocation data for all absolute addresses in hyp code/data.
+ *
+ * Static relocations that generate PC-relative-addressing are ignored.
+ * Failure is reported for unexpected relocation types.
+ */
+static void emit_rela_section(Elf64_Shdr *sh_rela)
+{
+	Elf64_Shdr *sh_orig = &elf.sh_table[elf32toh(sh_rela->sh_info)];
+	const char *sh_orig_name = section_name(sh_orig);
+	Elf64_Rela *rela;
+
+	/* Skip all non-hyp sections. */
+	if (!starts_with(sh_orig_name, HYP_SECTION_PREFIX))
+		return;
+
+	emit_section_prologue(sh_orig_name);
+
+	for_each_rela(sh_rela, rela) {
+		uint32_t type = (uint32_t)elf64toh(rela->r_info);
+
+		/* Check that rela points inside the relocated section. */
+		assert_lt(elf64toh(rela->r_offset), elf64toh(sh_orig->sh_size), "0x%lx");
+
+		switch (type) {
+		/*
+		 * Data relocations to generate absolute addressing.
+		 * Emit a hyp relocation.
+		 */
+		case R_AARCH64_ABS64:
+			emit_rela_abs64(rela, sh_orig_name);
+			break;
+		/* Allow relocations to generate PC-relative addressing. */
+		case R_AARCH64_LD_PREL_LO19:
+		case R_AARCH64_ADR_PREL_LO21:
+		case R_AARCH64_ADR_PREL_PG_HI21:
+		case R_AARCH64_ADR_PREL_PG_HI21_NC:
+		case R_AARCH64_ADD_ABS_LO12_NC:
+		case R_AARCH64_LDST8_ABS_LO12_NC:
+		case R_AARCH64_LDST16_ABS_LO12_NC:
+		case R_AARCH64_LDST32_ABS_LO12_NC:
+		case R_AARCH64_LDST64_ABS_LO12_NC:
+		case R_AARCH64_LDST128_ABS_LO12_NC:
+			break;
+		/* Allow relative relocations for control-flow instructions. */
+		case R_AARCH64_TSTBR14:
+		case R_AARCH64_CONDBR19:
+		case R_AARCH64_JUMP26:
+		case R_AARCH64_CALL26:
+			break;
+		/* Allow group relocations to create PC-relative offset inline. */
+		case R_AARCH64_MOVW_PREL_G0:
+		case R_AARCH64_MOVW_PREL_G0_NC:
+		case R_AARCH64_MOVW_PREL_G1:
+		case R_AARCH64_MOVW_PREL_G1_NC:
+		case R_AARCH64_MOVW_PREL_G2:
+		case R_AARCH64_MOVW_PREL_G2_NC:
+		case R_AARCH64_MOVW_PREL_G3:
+			break;
+		default:
+			fatal_error("Unexpected RELA type %u", type);
+		}
+	}
+}
+
+/* Iterate over all sections and emit hyp relocation data for RELA sections. */
+static void emit_all_relocs(void)
+{
+	Elf64_Shdr *shdr;
+
+	for_each_section(shdr) {
+		switch (elf32toh(shdr->sh_type)) {
+		case SHT_REL:
+			fatal_error("Unexpected SHT_REL section \"%s\"",
+				section_name(shdr));
+		case SHT_RELA:
+			emit_rela_section(shdr);
+			break;
+		}
+	}
+}
+
+int main(int argc, const char **argv)
+{
+	if (argc != 2) {
+		fprintf(stderr, "Usage: %s <elf_input>\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	init_elf(argv[1]);
+
+	emit_prologue();
+	emit_all_relocs();
+	emit_epilogue();
+
+	return EXIT_SUCCESS;
+}
--- a/Show More
+++ b/Show More