Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "First batch of KVM changes for 4.4.

  s390:
     A bunch of fixes and optimizations for interrupt and time handling.

  PPC:
     Mostly bug fixes.

  ARM:
     No big features, but many small fixes and prerequisites including:

      - a number of fixes for the arch-timer

      - introducing proper level-triggered semantics for the arch-timers

      - a series of patches to synchronously halt a guest (prerequisite
        for IRQ forwarding)

      - some tracepoint improvements

      - a tweak for the EL2 panic handlers

      - some more VGIC cleanups getting rid of redundant state

  x86:
     Quite a few changes:

      - support for VT-d posted interrupts (i.e. PCI devices can inject
        interrupts directly into vCPUs).  This introduces a new
        component (in virt/lib/) that connects VFIO and KVM together.
        The same infrastructure will be used for ARM interrupt
        forwarding as well.

      - more Hyper-V features, though the main one Hyper-V synthetic
        interrupt controller will have to wait for 4.5.  These will let
        KVM expose Hyper-V devices.

      - nested virtualization now supports VPID (same as PCID but for
        vCPUs) which makes it quite a bit faster

      - for future hardware that supports NVDIMM, there is support for
        clflushopt, clwb, pcommit

      - support for "split irqchip", i.e.  LAPIC in kernel +
        IOAPIC/PIC/PIT in userspace, which reduces the attack surface of
        the hypervisor

      - obligatory smattering of SMM fixes

      - on the guest side, stable scheduler clock support was rewritten
        to not require help from the hypervisor"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits)
  KVM: VMX: Fix commit which broke PML
  KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()
  KVM: x86: allow RSM from 64-bit mode
  KVM: VMX: fix SMEP and SMAP without EPT
  KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
  KVM: device assignment: remove pointless #ifdefs
  KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
  KVM: x86: zero apic_arb_prio on reset
  drivers/hv: share Hyper-V SynIC constants with userspace
  KVM: x86: handle SMBASE as physical address in RSM
  KVM: x86: add read_phys to x86_emulate_ops
  KVM: x86: removing unused variable
  KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs
  KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()
  KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
  KVM: arm/arm64: Optimize away redundant LR tracking
  KVM: s390: use simple switch statement as multiplexer
  KVM: s390: drop useless newline in debugging data
  KVM: s390: SCA must not cross page boundaries
  KVM: arm: Do not indent the arguments of DECLARE_BITMAP
  ...
This commit is contained in:
Linus Torvalds
2015-11-05 16:26:26 -08:00
89 changed files with 2956 additions and 1029 deletions
+1
View File
@@ -1585,6 +1585,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nosid disable Source ID checking nosid disable Source ID checking
no_x2apic_optout no_x2apic_optout
BIOS x2APIC opt-out request will be ignored BIOS x2APIC opt-out request will be ignored
nopost disable Interrupt Posting
iomem= Disable strict checking of access to MMIO memory iomem= Disable strict checking of access to MMIO memory
strict regions from userspace. strict regions from userspace.
+47 -5
View File
@@ -401,10 +401,9 @@ Capability: basic
Architectures: x86, ppc, mips Architectures: x86, ppc, mips
Type: vcpu ioctl Type: vcpu ioctl
Parameters: struct kvm_interrupt (in) Parameters: struct kvm_interrupt (in)
Returns: 0 on success, -1 on error Returns: 0 on success, negative on failure.
Queues a hardware interrupt vector to be injected. This is only Queues a hardware interrupt vector to be injected.
useful if in-kernel local APIC or equivalent is not used.
/* for KVM_INTERRUPT */ /* for KVM_INTERRUPT */
struct kvm_interrupt { struct kvm_interrupt {
@@ -414,7 +413,14 @@ struct kvm_interrupt {
X86: X86:
Note 'irq' is an interrupt vector, not an interrupt pin or line. Returns: 0 on success,
-EEXIST if an interrupt is already enqueued
-EINVAL the the irq number is invalid
-ENXIO if the PIC is in the kernel
-EFAULT if the pointer is invalid
Note 'irq' is an interrupt vector, not an interrupt pin or line. This
ioctl is useful if the in-kernel PIC is not used.
PPC: PPC:
@@ -1598,7 +1604,7 @@ provided event instead of triggering an exit.
struct kvm_ioeventfd { struct kvm_ioeventfd {
__u64 datamatch; __u64 datamatch;
__u64 addr; /* legal pio/mmio address */ __u64 addr; /* legal pio/mmio address */
__u32 len; /* 1, 2, 4, or 8 bytes */ __u32 len; /* 0, 1, 2, 4, or 8 bytes */
__s32 fd; __s32 fd;
__u32 flags; __u32 flags;
__u8 pad[36]; __u8 pad[36];
@@ -1621,6 +1627,10 @@ to the registered address is equal to datamatch in struct kvm_ioeventfd.
For virtio-ccw devices, addr contains the subchannel id and datamatch the For virtio-ccw devices, addr contains the subchannel id and datamatch the
virtqueue index. virtqueue index.
With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
the kernel will ignore the length of guest write and may get a faster vmexit.
The speedup may only apply to specific architectures, but the ioeventfd will
work anyway.
4.60 KVM_DIRTY_TLB 4.60 KVM_DIRTY_TLB
@@ -3309,6 +3319,18 @@ Valid values for 'type' are:
to ignore the request, or to gather VM memory core dump and/or to ignore the request, or to gather VM memory core dump and/or
reset/shutdown of the VM. reset/shutdown of the VM.
/* KVM_EXIT_IOAPIC_EOI */
struct {
__u8 vector;
} eoi;
Indicates that the VCPU's in-kernel local APIC received an EOI for a
level-triggered IOAPIC interrupt. This exit only triggers when the
IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
the userspace IOAPIC should process the EOI and retrigger the interrupt if
it is still asserted. Vector is the LAPIC interrupt vector for which the
EOI was received.
/* Fix the size of the union. */ /* Fix the size of the union. */
char padding[256]; char padding[256];
}; };
@@ -3627,6 +3649,26 @@ struct {
KVM handlers should exit to userspace with rc = -EREMOTE. KVM handlers should exit to userspace with rc = -EREMOTE.
7.5 KVM_CAP_SPLIT_IRQCHIP
Architectures: x86
Parameters: args[0] - number of routes reserved for userspace IOAPICs
Returns: 0 on success, -1 on error
Create a local apic for each processor in the kernel. This can be used
instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
IOAPIC and PIC (and also the PIT, even though this has to be enabled
separately).
This capability also enables in kernel routing of interrupt requests;
when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
used in the IRQ routing table. The first args[0] MSI routes are reserved
for the IOAPIC pins. Whenever the LAPIC receives an EOI for these routes,
a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
Fails if VCPU has already been created, or if the irqchip is already in the
kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
8. Other capabilities. 8. Other capabilities.
---------------------- ----------------------
@@ -0,0 +1,187 @@
KVM/ARM VGIC Forwarded Physical Interrupts
==========================================
The KVM/ARM code implements software support for the ARM Generic
Interrupt Controller's (GIC's) hardware support for virtualization by
allowing software to inject virtual interrupts to a VM, which the guest
OS sees as regular interrupts. The code is famously known as the VGIC.
Some of these virtual interrupts, however, correspond to physical
interrupts from real physical devices. One example could be the
architected timer, which itself supports virtualization, and therefore
lets a guest OS program the hardware device directly to raise an
interrupt at some point in time. When such an interrupt is raised, the
host OS initially handles the interrupt and must somehow signal this
event as a virtual interrupt to the guest. Another example could be a
passthrough device, where the physical interrupts are initially handled
by the host, but the device driver for the device lives in the guest OS
and KVM must therefore somehow inject a virtual interrupt on behalf of
the physical one to the guest OS.
These virtual interrupts corresponding to a physical interrupt on the
host are called forwarded physical interrupts, but are also sometimes
referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
Forwarded physical interrupts are handled slightly differently compared
to virtual interrupts generated purely by a software emulated device.
The HW bit
----------
Virtual interrupts are signalled to the guest by programming the List
Registers (LRs) on the GIC before running a VCPU. The LR is programmed
with the virtual IRQ number and the state of the interrupt (Pending,
Active, or Pending+Active). When the guest ACKs and EOIs a virtual
interrupt, the LR state moves from Pending to Active, and finally to
inactive.
The LRs include an extra bit, called the HW bit. When this bit is set,
KVM must also program an additional field in the LR, the physical IRQ
number, to link the virtual with the physical IRQ.
When the HW bit is set, KVM must EITHER set the Pending OR the Active
bit, never both at the same time.
Setting the HW bit causes the hardware to deactivate the physical
interrupt on the physical distributor when the guest deactivates the
corresponding virtual interrupt.
Forwarded Physical Interrupts Life Cycle
----------------------------------------
The state of forwarded physical interrupts is managed in the following way:
- The physical interrupt is acked by the host, and becomes active on
the physical distributor (*).
- KVM sets the LR.Pending bit, because this is the only way the GICV
interface is going to present it to the guest.
- LR.Pending will stay set as long as the guest has not acked the interrupt.
- LR.Pending transitions to LR.Active on the guest read of the IAR, as
expected.
- On guest EOI, the *physical distributor* active bit gets cleared,
but the LR.Active is left untouched (set).
- KVM clears the LR on VM exits when the physical distributor
active state has been cleared.
(*): The host handling is slightly more complicated. For some forwarded
interrupts (shared), KVM directly sets the active state on the physical
distributor before entering the guest, because the interrupt is never actually
handled on the host (see details on the timer as an example below). For other
forwarded interrupts (non-shared) the host does not deactivate the interrupt
when the host ISR completes, but leaves the interrupt active until the guest
deactivates it. Leaving the interrupt active is allowed, because Linux
configures the physical GIC with EOIMode=1, which causes EOI operations to
perform a priority drop allowing the GIC to receive other interrupts of the
default priority.
Forwarded Edge and Level Triggered PPIs and SPIs
------------------------------------------------
Forwarded physical interrupts injected should always be active on the
physical distributor when injected to a guest.
Level-triggered interrupts will keep the interrupt line to the GIC
asserted, typically until the guest programs the device to deassert the
line. This means that the interrupt will remain pending on the physical
distributor until the guest has reprogrammed the device. Since we
always run the VM with interrupts enabled on the CPU, a pending
interrupt will exit the guest as soon as we switch into the guest,
preventing the guest from ever making progress as the process repeats
over and over. Therefore, the active state on the physical distributor
must be set when entering the guest, preventing the GIC from forwarding
the pending interrupt to the CPU. As soon as the guest deactivates the
interrupt, the physical line is sampled by the hardware again and the host
takes a new interrupt if and only if the physical line is still asserted.
Edge-triggered interrupts do not exhibit the same problem with
preventing guest execution that level-triggered interrupts do. One
option is to not use HW bit at all, and inject edge-triggered interrupts
from a physical device as pure virtual interrupts. But that would
potentially slow down handling of the interrupt in the guest, because a
physical interrupt occurring in the middle of the guest ISR would
preempt the guest for the host to handle the interrupt. Additionally,
if you configure the system to handle interrupts on a separate physical
core from that running your VCPU, you still have to interrupt the VCPU
to queue the pending state onto the LR, even though the guest won't use
this information until the guest ISR completes. Therefore, the HW
bit should always be set for forwarded edge-triggered interrupts. With
the HW bit set, the virtual interrupt is injected and additional
physical interrupts occurring before the guest deactivates the interrupt
simply mark the state on the physical distributor as Pending+Active. As
soon as the guest deactivates the interrupt, the host takes another
interrupt if and only if there was a physical interrupt between injecting
the forwarded interrupt to the guest and the guest deactivating the
interrupt.
Consequently, whenever we schedule a VCPU with one or more LRs with the
HW bit set, the interrupt must also be active on the physical
distributor.
Forwarded LPIs
--------------
LPIs, introduced in GICv3, are always edge-triggered and do not have an
active state. They become pending when a device signal them, and as
soon as they are acked by the CPU, they are inactive again.
It therefore doesn't make sense, and is not supported, to set the HW bit
for physical LPIs that are forwarded to a VM as virtual interrupts,
typically virtual SPIs.
For LPIs, there is no other choice than to preempt the VCPU thread if
necessary, and queue the pending state onto the LR.
Putting It Together: The Architected Timer
------------------------------------------
The architected timer is a device that signals interrupts with level
triggered semantics. The timer hardware is directly accessed by VCPUs
which program the timer to fire at some point in time. Each VCPU on a
system programs the timer to fire at different times, and therefore the
hardware is multiplexed between multiple VCPUs. This is implemented by
context-switching the timer state along with each VCPU thread.
However, this means that a scenario like the following is entirely
possible, and in fact, typical:
1. KVM runs the VCPU
2. The guest programs the time to fire in T+100
3. The guest is idle and calls WFI (wait-for-interrupts)
4. The hardware traps to the host
5. KVM stores the timer state to memory and disables the hardware timer
6. KVM schedules a soft timer to fire in T+(100 - time since step 2)
7. KVM puts the VCPU thread to sleep (on a waitqueue)
8. The soft timer fires, waking up the VCPU thread
9. KVM reprograms the timer hardware with the VCPU's values
10. KVM marks the timer interrupt as active on the physical distributor
11. KVM injects a forwarded physical interrupt to the guest
12. KVM runs the VCPU
Notice that KVM injects a forwarded physical interrupt in step 11 without
the corresponding interrupt having actually fired on the host. That is
exactly why we mark the timer interrupt as active in step 10, because
the active state on the physical distributor is part of the state
belonging to the timer hardware, which is context-switched along with
the VCPU thread.
If the guest does not idle because it is busy, the flow looks like this
instead:
1. KVM runs the VCPU
2. The guest programs the time to fire in T+100
4. At T+100 the timer fires and a physical IRQ causes the VM to exit
(note that this initially only traps to EL2 and does not run the host ISR
until KVM has returned to the host).
5. With interrupts still disabled on the CPU coming back from the guest, KVM
stores the virtual timer state to memory and disables the virtual hw timer.
6. KVM looks at the timer state (in memory) and injects a forwarded physical
interrupt because it concludes the timer has expired.
7. KVM marks the timer interrupt as active on the physical distributor
7. KVM enables the timer, enables interrupts, and runs the VCPU
Notice that again the forwarded physical interrupt is injected to the
guest without having actually been handled on the host. In this case it
is because the physical interrupt is never actually seen by the host because the
timer is disabled upon guest return, and the virtual forwarded interrupt is
injected on the KVM guest entry path.
+10 -8
View File
@@ -44,28 +44,29 @@ Groups:
Attributes: Attributes:
The attr field of kvm_device_attr encodes two values: The attr field of kvm_device_attr encodes two values:
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
values: | reserved | cpu id | offset | values: | reserved | vcpu_index | offset |
All distributor regs are (rw, 32-bit) All distributor regs are (rw, 32-bit)
The offset is relative to the "Distributor base address" as defined in the The offset is relative to the "Distributor base address" as defined in the
GICv2 specs. Getting or setting such a register has the same effect as GICv2 specs. Getting or setting such a register has the same effect as
reading or writing the register on the actual hardware from the cpu reading or writing the register on the actual hardware from the cpu whose
specified with cpu id field. Note that most distributor fields are not index is specified with the vcpu_index field. Note that most distributor
banked, but return the same value regardless of the cpu id used to access fields are not banked, but return the same value regardless of the
the register. vcpu_index used to access the register.
Limitations: Limitations:
- Priorities are not implemented, and registers are RAZ/WI - Priorities are not implemented, and registers are RAZ/WI
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
Errors: Errors:
-ENODEV: Getting or setting this register is not yet supported -ENXIO: Getting or setting this register is not yet supported
-EBUSY: One or more VCPUs are running -EBUSY: One or more VCPUs are running
-EINVAL: Invalid vcpu_index supplied
KVM_DEV_ARM_VGIC_GRP_CPU_REGS KVM_DEV_ARM_VGIC_GRP_CPU_REGS
Attributes: Attributes:
The attr field of kvm_device_attr encodes two values: The attr field of kvm_device_attr encodes two values:
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
values: | reserved | cpu id | offset | values: | reserved | vcpu_index | offset |
All CPU interface regs are (rw, 32-bit) All CPU interface regs are (rw, 32-bit)
@@ -91,8 +92,9 @@ Groups:
- Priorities are not implemented, and registers are RAZ/WI - Priorities are not implemented, and registers are RAZ/WI
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
Errors: Errors:
-ENODEV: Getting or setting this register is not yet supported -ENXIO: Getting or setting this register is not yet supported
-EBUSY: One or more VCPUs are running -EBUSY: One or more VCPUs are running
-EINVAL: Invalid vcpu_index supplied
KVM_DEV_ARM_VGIC_GRP_NR_IRQS KVM_DEV_ARM_VGIC_GRP_NR_IRQS
Attributes: Attributes:
+12
View File
@@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g.
MMIO/PIO address->device structure mapping (kvm->buses). MMIO/PIO address->device structure mapping (kvm->buses).
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
if it is needed by multiple functions. if it is needed by multiple functions.
Name: blocked_vcpu_on_cpu_lock
Type: spinlock_t
Arch: x86
Protects: blocked_vcpu_on_cpu
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
When VT-d posted-interrupts is supported and the VM has assigned
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
wakeup notification event since external interrupts from the
assigned devices happens, we will find the vCPU on the list to
wakeup.
+7
View File
@@ -11348,6 +11348,13 @@ L: netdev@vger.kernel.org
S: Maintained S: Maintained
F: drivers/net/ethernet/via/via-velocity.* F: drivers/net/ethernet/via/via-velocity.*
VIRT LIB
M: Alex Williamson <alex.williamson@redhat.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
S: Supported
F: virt/lib/
VIVID VIRTUAL VIDEO DRIVER VIVID VIRTUAL VIDEO DRIVER
M: Hans Verkuil <hverkuil@xs4all.nl> M: Hans Verkuil <hverkuil@xs4all.nl>
L: linux-media@vger.kernel.org L: linux-media@vger.kernel.org
+6 -4
View File
@@ -550,6 +550,7 @@ drivers-y := drivers/ sound/ firmware/
net-y := net/ net-y := net/
libs-y := lib/ libs-y := lib/
core-y := usr/ core-y := usr/
virt-y := virt/
endif # KBUILD_EXTMOD endif # KBUILD_EXTMOD
ifeq ($(dot-config),1) ifeq ($(dot-config),1)
@@ -882,10 +883,10 @@ core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
$(net-y) $(net-m) $(libs-y) $(libs-m))) $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))
vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
$(init-) $(core-) $(drivers-) $(net-) $(libs-)))) $(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-))))
init-y := $(patsubst %/, %/built-in.o, $(init-y)) init-y := $(patsubst %/, %/built-in.o, $(init-y))
core-y := $(patsubst %/, %/built-in.o, $(core-y)) core-y := $(patsubst %/, %/built-in.o, $(core-y))
@@ -894,14 +895,15 @@ net-y := $(patsubst %/, %/built-in.o, $(net-y))
libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) libs-y1 := $(patsubst %/, %/lib.a, $(libs-y))
libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y))
libs-y := $(libs-y1) $(libs-y2) libs-y := $(libs-y1) $(libs-y2)
virt-y := $(patsubst %/, %/built-in.o, $(virt-y))
# Externally visible symbols (used by link-vmlinux.sh) # Externally visible symbols (used by link-vmlinux.sh)
export KBUILD_VMLINUX_INIT := $(head-y) $(init-y) export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
export LDFLAGS_vmlinux export LDFLAGS_vmlinux
# used by scripts/pacmage/Makefile # used by scripts/pacmage/Makefile
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools virt) export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
+20
View File
@@ -218,4 +218,24 @@
#define HSR_DABT_CM (1U << 8) #define HSR_DABT_CM (1U << 8)
#define HSR_DABT_EA (1U << 9) #define HSR_DABT_EA (1U << 9)
#define kvm_arm_exception_type \
{0, "RESET" }, \
{1, "UNDEFINED" }, \
{2, "SOFTWARE" }, \
{3, "PREF_ABORT" }, \
{4, "DATA_ABORT" }, \
{5, "IRQ" }, \
{6, "FIQ" }, \
{7, "HVC" }
#define HSRECN(x) { HSR_EC_##x, #x }
#define kvm_arm_exception_class \
HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
HSRECN(DABT), HSRECN(DABT_HYP)
#endif /* __ARM_KVM_ARM_H__ */ #endif /* __ARM_KVM_ARM_H__ */
+4 -1
View File
@@ -126,7 +126,10 @@ struct kvm_vcpu_arch {
* here. * here.
*/ */
/* Don't run the guest on this vcpu */ /* vcpu power-off state */
bool power_off;
/* Don't run the guest (internal implementation need) */
bool pause; bool pause;
/* IO related fields */ /* IO related fields */
+2
View File
@@ -46,4 +46,6 @@ config KVM_ARM_HOST
---help--- ---help---
Provides host support for ARM processors. Provides host support for ARM processors.
source drivers/vhost/Kconfig
endif # VIRTUALIZATION endif # VIRTUALIZATION
+60 -16
View File
@@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvm_timer_should_fire(vcpu); return kvm_timer_should_fire(vcpu);
} }
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
kvm_timer_schedule(vcpu);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
kvm_timer_unschedule(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{ {
/* Force users to call KVM_ARM_VCPU_INIT */ /* Force users to call KVM_ARM_VCPU_INIT */
@@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state) struct kvm_mp_state *mp_state)
{ {
if (vcpu->arch.pause) if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED; mp_state->mp_state = KVM_MP_STATE_STOPPED;
else else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE; mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
@@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{ {
switch (mp_state->mp_state) { switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE: case KVM_MP_STATE_RUNNABLE:
vcpu->arch.pause = false; vcpu->arch.power_off = false;
break; break;
case KVM_MP_STATE_STOPPED: case KVM_MP_STATE_STOPPED:
vcpu->arch.pause = true; vcpu->arch.power_off = true;
break; break;
default: default:
return -EINVAL; return -EINVAL;
@@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
*/ */
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{ {
return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v); return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
&& !v->arch.power_off && !v->arch.pause);
} }
/* Just ensure a guest exit from a particular CPU */ /* Just ensure a guest exit from a particular CPU */
@@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
return vgic_initialized(kvm); return vgic_initialized(kvm);
} }
static void vcpu_pause(struct kvm_vcpu *vcpu) static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused;
static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
static void kvm_arm_halt_guest(struct kvm *kvm)
{
int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.pause = true;
force_vm_exit(cpu_all_mask);
}
static void kvm_arm_resume_guest(struct kvm *kvm)
{
int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
vcpu->arch.pause = false;
wake_up_interruptible(wq);
}
}
static void vcpu_sleep(struct kvm_vcpu *vcpu)
{ {
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
wait_event_interruptible(*wq, !vcpu->arch.pause); wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
} }
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
update_vttbr(vcpu->kvm); update_vttbr(vcpu->kvm);
if (vcpu->arch.pause) if (vcpu->arch.power_off || vcpu->arch.pause)
vcpu_pause(vcpu); vcpu_sleep(vcpu);
/* /*
* Disarming the background timer must be done in a * Disarming the background timer must be done in a
@@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->exit_reason = KVM_EXIT_INTR; run->exit_reason = KVM_EXIT_INTR;
} }
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
vcpu->arch.power_off || vcpu->arch.pause) {
local_irq_enable(); local_irq_enable();
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu);
preempt_enable(); preempt_enable();
kvm_timer_sync_hwstate(vcpu);
continue; continue;
} }
@@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
* guest time. * guest time.
*/ */
kvm_guest_exit(); kvm_guest_exit();
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
* We must sync the timer state before the vgic state so that
* the vgic can properly sample the updated state of the
* interrupt line.
*/
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu);
preempt_enable(); preempt_enable();
kvm_timer_sync_hwstate(vcpu);
ret = handle_exit(vcpu, run, ret); ret = handle_exit(vcpu, run, ret);
} }
@@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
vcpu_reset_hcr(vcpu); vcpu_reset_hcr(vcpu);
/* /*
* Handle the "start in power-off" case by marking the VCPU as paused. * Handle the "start in power-off" case.
*/ */
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
vcpu->arch.pause = true; vcpu->arch.power_off = true;
else else
vcpu->arch.pause = false; vcpu->arch.power_off = false;
return 0; return 0;
} }
+5 -5
View File
@@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.pause = true; vcpu->arch.power_off = true;
} }
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
@@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/ */
if (!vcpu) if (!vcpu)
return PSCI_RET_INVALID_PARAMS; return PSCI_RET_INVALID_PARAMS;
if (!vcpu->arch.pause) { if (!vcpu->arch.power_off) {
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON; return PSCI_RET_ALREADY_ON;
else else
@@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
* the general puspose registers are undefined upon CPU_ON. * the general puspose registers are undefined upon CPU_ON.
*/ */
*vcpu_reg(vcpu, 0) = context_id; *vcpu_reg(vcpu, 0) = context_id;
vcpu->arch.pause = false; vcpu->arch.power_off = false;
smp_mb(); /* Make sure the above is visible */ smp_mb(); /* Make sure the above is visible */
wq = kvm_arch_vcpu_wq(vcpu); wq = kvm_arch_vcpu_wq(vcpu);
@@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
mpidr = kvm_vcpu_get_mpidr_aff(tmp); mpidr = kvm_vcpu_get_mpidr_aff(tmp);
if ((mpidr & target_affinity_mask) == target_affinity) { if ((mpidr & target_affinity_mask) == target_affinity) {
matching_cpus++; matching_cpus++;
if (!tmp->arch.pause) if (!tmp->arch.power_off)
return PSCI_0_2_AFFINITY_LEVEL_ON; return PSCI_0_2_AFFINITY_LEVEL_ON;
} }
} }
@@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
* re-initialized. * re-initialized.
*/ */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) { kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
tmp->arch.pause = true; tmp->arch.power_off = true;
kvm_vcpu_kick(tmp); kvm_vcpu_kick(tmp);
} }
+7 -3
View File
@@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry,
); );
TRACE_EVENT(kvm_exit, TRACE_EVENT(kvm_exit,
TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
TP_ARGS(exit_reason, vcpu_pc), TP_ARGS(idx, exit_reason, vcpu_pc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( int, idx )
__field( unsigned int, exit_reason ) __field( unsigned int, exit_reason )
__field( unsigned long, vcpu_pc ) __field( unsigned long, vcpu_pc )
), ),
TP_fast_assign( TP_fast_assign(
__entry->idx = idx;
__entry->exit_reason = exit_reason; __entry->exit_reason = exit_reason;
__entry->vcpu_pc = vcpu_pc; __entry->vcpu_pc = vcpu_pc;
), ),
TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx", TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
__print_symbolic(__entry->idx, kvm_arm_exception_type),
__entry->exit_reason, __entry->exit_reason,
__print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
__entry->vcpu_pc) __entry->vcpu_pc)
); );
+16
View File
@@ -200,4 +200,20 @@
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf)) #define HPFAR_MASK (~UL(0xf))
#define kvm_arm_exception_type \
{0, "IRQ" }, \
{1, "TRAP" }
#define ECN(x) { ESR_ELx_EC_##x, #x }
#define kvm_arm_exception_class \
ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
#endif /* __ARM64_KVM_ARM_H__ */ #endif /* __ARM64_KVM_ARM_H__ */
+4 -1
View File
@@ -149,7 +149,10 @@ struct kvm_vcpu_arch {
u32 mdscr_el1; u32 mdscr_el1;
} guest_debug_preserved; } guest_debug_preserved;
/* Don't run the guest */ /* vcpu power-off state */
bool power_off;
/* Don't run the guest (internal implementation need) */
bool pause; bool pause;
/* IO related fields */ /* IO related fields */
+2
View File
@@ -48,4 +48,6 @@ config KVM_ARM_HOST
---help--- ---help---
Provides host support for ARM processors. Provides host support for ARM processors.
source drivers/vhost/Kconfig
endif # VIRTUALIZATION endif # VIRTUALIZATION
+8
View File
@@ -880,6 +880,14 @@ __kvm_hyp_panic:
bl __restore_sysregs bl __restore_sysregs
/*
* Make sure we have a valid host stack, and don't leave junk in the
* frame pointer that will give us a misleading host stack unwinding.
*/
ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
msr sp_el1, x22
mov x29, xzr
1: adr x0, __hyp_panic_str 1: adr x0, __hyp_panic_str
adr x1, 2f adr x1, 2f
ldp x2, x3, [x1] ldp x2, x3, [x1]
+2
View File
@@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {} struct kvm_memory_slot *slot) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#endif /* __MIPS_KVM_HOST_H__ */ #endif /* __MIPS_KVM_HOST_H__ */
+5
View File
@@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst)
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
} }
static inline unsigned int get_tmrn(u32 inst)
{
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
}
static inline unsigned int get_rt(u32 inst) static inline unsigned int get_rt(u32 inst)
{ {
return (inst >> 21) & 0x1f; return (inst >> 21) & 0x1f;
+2
View File
@@ -716,5 +716,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_exit(void) {} static inline void kvm_arch_exit(void) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
#endif /* __POWERPC_KVM_HOST_H__ */ #endif /* __POWERPC_KVM_HOST_H__ */

Some files were not shown because too many files have changed in this diff Show More