Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "PPC:
   - Better machine check handling for HV KVM
   - Ability to support guests with threads=2, 4 or 8 on POWER9
   - Fix for a race that could cause delayed recognition of signals
   - Fix for a bug where POWER9 guests could sleep with interrupts pending.

  ARM:
   - VCPU request overhaul
   - allow timer and PMU to have their interrupt number selected from userspace
   - workaround for Cavium erratum 30115
   - handling of memory poisonning
   - the usual crop of fixes and cleanups

  s390:
   - initial machine check forwarding
   - migration support for the CMMA page hinting information
   - cleanups and fixes

  x86:
   - nested VMX bugfixes and improvements
   - more reliable NMI window detection on AMD
   - APIC timer optimizations

  Generic:
   - VCPU request overhaul + documentation of common code patterns
   - kvm_stat improvements"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
  Update my email address
  kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
  x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12
  kvm: x86: mmu: allow A/D bits to be disabled in an mmu
  x86: kvm: mmu: make spte mmio mask more explicit
  x86: kvm: mmu: dead code thanks to access tracking
  KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code
  KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
  KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
  KVM: x86: remove ignored type attribute
  KVM: LAPIC: Fix lapic timer injection delay
  KVM: lapic: reorganize restart_apic_timer
  KVM: lapic: reorganize start_hv_timer
  kvm: nVMX: Check memory operand to INVVPID
  KVM: s390: Inject machine check into the nested guest
  KVM: s390: Inject machine check into the guest
  tools/kvm_stat: add new interactive command 'b'
  tools/kvm_stat: add new command line switch '-i'
  tools/kvm_stat: fix error on interactive command 'g'
  KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit
  ...
This commit is contained in:
Linus Torvalds
2017-07-06 18:38:31 -07:00
95 changed files with 4255 additions and 972 deletions
+1 -1
View File
@@ -60,7 +60,7 @@ static const unsigned short cc_map[16] = {
/*
* Check if a trapped instruction should have been executed or not.
*/
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
{
unsigned long cpsr;
u32 cpsr_cond;
+125 -14
View File
@@ -21,6 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
@@ -35,6 +36,16 @@ static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
static const struct kvm_irq_level default_ptimer_irq = {
.irq = 30,
.level = 1,
};
static const struct kvm_irq_level default_vtimer_irq = {
.irq = 27,
.level = 1,
};
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu_vtimer(vcpu)->active_cleared_last = false;
@@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
* If the vcpu is blocked we want to wake it up so that it will see
* the timer has expired when entering the guest.
*/
kvm_vcpu_kick(vcpu);
kvm_vcpu_wake_up(vcpu);
}
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
@@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
if (likely(irqchip_in_kernel(vcpu->kvm))) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
timer_ctx->irq.level);
timer_ctx->irq.level,
timer_ctx);
WARN_ON(ret);
}
}
@@ -445,22 +457,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_timer_update_state(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
const struct kvm_irq_level *virt_irq,
const struct kvm_irq_level *phys_irq)
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
* The vcpu timer irq number cannot be determined in
* kvm_timer_vcpu_init() because it is called much before
* kvm_vcpu_set_target(). To handle this, we determine
* vcpu timer irq number when the vcpu is reset.
*/
vtimer->irq.irq = virt_irq->irq;
ptimer->irq.irq = phys_irq->irq;
/*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
* and to 0 for ARMv7. We provide an implementation that always
@@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/* Synchronize cntvoff across all vtimers of a VM. */
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
@@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
vtimer->irq.irq = default_vtimer_irq.irq;
ptimer->irq.irq = default_ptimer_irq.irq;
}
static void kvm_timer_init_interrupt(void *info)
@@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
{
int vtimer_irq, ptimer_irq;
int i, ret;
vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
if (ret)
return false;
ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
if (ret)
return false;
kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
return false;
}
return true;
}
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (!vgic_initialized(vcpu->kvm))
return -ENODEV;
if (!timer_irqs_are_valid(vcpu)) {
kvm_debug("incorrectly configured timer irqs\n");
return -EINVAL;
}
/*
* Find the physical IRQ number corresponding to the host_vtimer_irq
*/
@@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void)
val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
write_sysreg(val, cnthctl_el2);
}
static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
{
struct kvm_vcpu *vcpu;
int i;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
}
}
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
int __user *uaddr = (int __user *)(long)attr->addr;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
int irq;
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
if (get_user(irq, uaddr))
return -EFAULT;
if (!(irq_is_ppi(irq)))
return -EINVAL;
if (vcpu->arch.timer_cpu.enabled)
return -EBUSY;
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
break;
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
break;
default:
return -ENXIO;
}
return 0;
}
int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
int __user *uaddr = (int __user *)(long)attr->addr;
struct arch_timer_context *timer;
int irq;
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
timer = vcpu_vtimer(vcpu);
break;
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
timer = vcpu_ptimer(vcpu);
break;
default:
return -ENXIO;
}
irq = timer->irq.irq;
return put_user(irq, uaddr);
}
int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
switch (attr->attr) {
case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
return 0;
}
return -ENXIO;
}
+53 -29
View File
@@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
}
static void vcpu_power_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.power_off = true;
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu->arch.power_off = false;
break;
case KVM_MP_STATE_STOPPED:
vcpu->arch.power_off = true;
vcpu_power_off(vcpu);
break;
default:
return -EINVAL;
@@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
}
ret = kvm_timer_enable(vcpu);
if (ret)
return ret;
ret = kvm_arm_pmu_v3_enable(vcpu);
return ret;
}
@@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.pause = true;
kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
}
void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
{
vcpu->arch.pause = true;
kvm_vcpu_kick(vcpu);
}
void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
{
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
vcpu->arch.pause = false;
swake_up(wq);
kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
}
void kvm_arm_resume_guest(struct kvm *kvm)
@@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm)
int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arm_resume_vcpu(vcpu);
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
swake_up(kvm_arch_vcpu_wq(vcpu));
}
}
static void vcpu_sleep(struct kvm_vcpu *vcpu)
static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
if (vcpu->arch.power_off || vcpu->arch.pause) {
/* Awaken to handle a signal, request we sleep again later. */
kvm_make_request(KVM_REQ_SLEEP, vcpu);
}
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
return vcpu->arch.target >= 0;
}
static void check_vcpu_requests(struct kvm_vcpu *vcpu)
{
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
vcpu_req_sleep(vcpu);
/*
* Clear IRQ_PENDING requests that were made to guarantee
* that a VCPU sees new virtual interrupts.
*/
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
}
}
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
update_vttbr(vcpu->kvm);
if (vcpu->arch.power_off || vcpu->arch.pause)
vcpu_sleep(vcpu);
check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->exit_reason = KVM_EXIT_INTR;
}
/*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
* Documentation/virtual/kvm/vcpu-requests.rst
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
vcpu->arch.power_off || vcpu->arch.pause) {
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
@@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
vcpu->mode = IN_GUEST_MODE;
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
@@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
* trigger a world-switch round on the running physical CPU to set the
* virtual IRQ/FIQ fields in the HCR appropriately.
*/
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return 0;
@@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
@@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
}
return -EINVAL;
@@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
vcpu->arch.power_off = true;
vcpu_power_off(vcpu);
else
vcpu->arch.power_off = false;
@@ -1115,9 +1141,6 @@ static void cpu_init_hyp_mode(void *dummy)
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
kvm_arm_init_debug();
}
@@ -1137,6 +1160,7 @@ static void cpu_hyp_reinit(void)
* event was cancelled before the CPU was reset.
*/
__cpu_init_stage2();
kvm_timer_init_vhe();
} else {
cpu_init_hyp_mode(NULL);
}
File diff suppressed because it is too large Load Diff
+23
View File
@@ -20,6 +20,7 @@
#include <linux/kvm_host.h>
#include <linux/io.h>
#include <linux/hugetlb.h>
#include <linux/sched/signal.h>
#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
@@ -1262,6 +1263,24 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
__coherent_cache_guest_page(vcpu, pfn, size);
}
static void kvm_send_hwpoison_signal(unsigned long address,
struct vm_area_struct *vma)
{
siginfo_t info;
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_MCEERR_AR;
info.si_addr = (void __user *)address;
if (is_vm_hugetlb_page(vma))
info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
else
info.si_addr_lsb = PAGE_SHIFT;
send_sig_info(SIGBUS, &info, current);
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -1331,6 +1350,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
smp_rmb();
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
if (pfn == KVM_PFN_ERR_HWPOISON) {
kvm_send_hwpoison_signal(hva, vma);
return 0;
}
if (is_error_noslot_pfn(pfn))
return -EFAULT;
+82 -37
View File
@@ -203,6 +203,24 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
return reg;
}
static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
bool overflow = !!kvm_pmu_overflow_status(vcpu);
if (pmu->irq_level == overflow)
return;
pmu->irq_level = overflow;
if (likely(irqchip_in_kernel(vcpu->kvm))) {
int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
pmu->irq_num, overflow,
&vcpu->arch.pmu);
WARN_ON(ret);
}
}
/**
* kvm_pmu_overflow_set - set PMU overflow interrupt
* @vcpu: The vcpu pointer
@@ -210,37 +228,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
*/
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
{
u64 reg;
if (val == 0)
return;
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
reg = kvm_pmu_overflow_status(vcpu);
if (reg != 0)
kvm_vcpu_kick(vcpu);
kvm_pmu_check_overflow(vcpu);
}
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
bool overflow;
if (!kvm_arm_pmu_v3_ready(vcpu))
return;
overflow = !!kvm_pmu_overflow_status(vcpu);
if (pmu->irq_level == overflow)
return;
pmu->irq_level = overflow;
if (likely(irqchip_in_kernel(vcpu->kvm))) {
int ret;
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
pmu->irq_num, overflow);
WARN_ON(ret);
}
kvm_pmu_check_overflow(vcpu);
}
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
@@ -451,25 +450,32 @@ bool kvm_arm_support_pmu_v3(void)
return (perf_num_counters() > 0);
}
static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{
if (!kvm_arm_support_pmu_v3())
return -ENODEV;
if (!vcpu->arch.pmu.created)
return 0;
/*
* We currently require an in-kernel VGIC to use the PMU emulation,
* because we do not support forwarding PMU overflow interrupts to
* userspace yet.
* A valid interrupt configuration for the PMU is either to have a
* properly configured interrupt number and using an in-kernel
* irqchip, or to not have an in-kernel GIC and not set an IRQ.
*/
if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))
return -ENODEV;
if (irqchip_in_kernel(vcpu->kvm)) {
int irq = vcpu->arch.pmu.irq_num;
if (!kvm_arm_pmu_irq_initialized(vcpu))
return -EINVAL;
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) ||
!kvm_arm_pmu_irq_initialized(vcpu))
return -ENXIO;
if (kvm_arm_pmu_v3_ready(vcpu))
return -EBUSY;
/*
* If we are using an in-kernel vgic, at this point we know
* the vgic will be initialized, so we can check the PMU irq
* number against the dimensions of the vgic and make sure
* it's valid.
*/
if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
return -EINVAL;
} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
return -EINVAL;
}
kvm_pmu_vcpu_reset(vcpu);
vcpu->arch.pmu.ready = true;
@@ -477,7 +483,40 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
return 0;
}
#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
{
if (!kvm_arm_support_pmu_v3())
return -ENODEV;
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENXIO;
if (vcpu->arch.pmu.created)
return -EBUSY;
if (irqchip_in_kernel(vcpu->kvm)) {
int ret;
/*
* If using the PMU with an in-kernel virtual GIC
* implementation, we require the GIC to be already
* initialized when initializing the PMU.
*/
if (!vgic_initialized(vcpu->kvm))
return -ENODEV;
if (!kvm_arm_pmu_irq_initialized(vcpu))
return -ENXIO;
ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
&vcpu->arch.pmu);
if (ret)
return ret;
}
vcpu->arch.pmu.created = true;
return 0;
}
/*
* For one VM the interrupt type must be same for each vcpu.
@@ -512,6 +551,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
@@ -519,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EFAULT;
/* The PMU overflow interrupt can be a PPI or a valid SPI. */
if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
return -EINVAL;
if (!pmu_irq_is_valid(vcpu->kvm, irq))
@@ -546,6 +588,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
int __user *uaddr = (int __user *)(long)attr->addr;
int irq;
if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL;
if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
return -ENODEV;
+5 -3
View File
@@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
* for KVM will preserve the register state.
*/
kvm_vcpu_block(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return PSCI_RET_SUCCESS;
}
@@ -64,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.power_off = true;
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
@@ -178,10 +181,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
* after this call is handled and before the VCPUs have been
* re-initialized.
*/
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
tmp->arch.power_off = true;
kvm_vcpu_kick(tmp);
}
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
+1 -1
View File
@@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
return kvm_vgic_inject_irq(kvm, 0, spi_id, level);
return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL);
}
/**
+13 -11
View File
@@ -308,34 +308,36 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
vgic_mmio_read_active, vgic_mmio_write_sactive,
NULL, vgic_mmio_uaccess_write_sactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
vgic_mmio_read_active, vgic_mmio_write_cactive,
NULL, vgic_mmio_uaccess_write_cactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
vgic_mmio_read_target, vgic_mmio_write_target, 8,
vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
vgic_mmio_read_config, vgic_mmio_write_config, 2,
vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
+13 -9
View File
@@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1,
vgic_mmio_read_active, vgic_mmio_write_sactive,
NULL, vgic_mmio_uaccess_write_sactive, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1,
VGIC_ACCESS_32bit),
vgic_mmio_read_active, vgic_mmio_write_cactive,
NULL, vgic_mmio_uaccess_write_cactive,
1, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
@@ -526,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
vgic_mmio_read_pending, vgic_mmio_write_cpending,
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_sactive,
NULL, vgic_mmio_uaccess_write_sactive,
4, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
vgic_mmio_read_active, vgic_mmio_write_cactive,
NULL, vgic_mmio_uaccess_write_cactive,
4, VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+55 -17
View File
@@ -231,40 +231,72 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
* be migrated while we don't hold the IRQ locks and we don't want to be
* chasing moving targets.
*
* For private interrupts, we only have to make sure the single and only VCPU
* that can potentially queue the IRQ is stopped.
* For private interrupts we don't have to do anything because userspace
* accesses to the VGIC state already require all VCPUs to be stopped, and
* only the VCPU itself can modify its private interrupts active state, which
* guarantees that the VCPU is not running.
*/
static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
{
if (intid < VGIC_NR_PRIVATE_IRQS)
kvm_arm_halt_vcpu(vcpu);
else
if (intid > VGIC_NR_PRIVATE_IRQS)
kvm_arm_halt_guest(vcpu->kvm);
}
/* See vgic_change_active_prepare */
static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
{
if (intid < VGIC_NR_PRIVATE_IRQS)
kvm_arm_resume_vcpu(vcpu);
else
if (intid > VGIC_NR_PRIVATE_IRQS)
kvm_arm_resume_guest(vcpu->kvm);
}
static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, false);
vgic_put_irq(vcpu->kvm, irq);
}
}
void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
mutex_lock(&vcpu->kvm->lock);
vgic_change_active_prepare(vcpu, intid);
__vgic_mmio_write_cactive(vcpu, addr, len, val);
vgic_change_active_finish(vcpu, intid);
mutex_unlock(&vcpu->kvm->lock);
}
void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
__vgic_mmio_write_cactive(vcpu, addr, len, val);
}
static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
vgic_change_active_prepare(vcpu, intid);
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, false);
vgic_mmio_change_active(vcpu, irq, true);
vgic_put_irq(vcpu->kvm, irq);
}
vgic_change_active_finish(vcpu, intid);
}
void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
@@ -272,15 +304,21 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
unsigned long val)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
mutex_lock(&vcpu->kvm->lock);
vgic_change_active_prepare(vcpu, intid);
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
vgic_mmio_change_active(vcpu, irq, true);
vgic_put_irq(vcpu->kvm, irq);
}
__vgic_mmio_write_sactive(vcpu, addr, len, val);
vgic_change_active_finish(vcpu, intid);
mutex_unlock(&vcpu->kvm->lock);
}
void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
__vgic_mmio_write_sactive(vcpu, addr, len, val);
}
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
+11 -1
View File
@@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
* The _WITH_LENGTH version instantiates registers with a fixed length
* and is mutually exclusive with the _PER_IRQ version.
*/
#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \
#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \
{ \
.reg_offset = off, \
.bits_per_irq = bpi, \
@@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
.access_flags = acc, \
.read = rd, \
.write = wr, \
.uaccess_read = ur, \
.uaccess_write = uw, \
}
#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \
@@ -165,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
+45
View File
@@ -21,6 +21,10 @@
#include "vgic.h"
static bool group0_trap;
static bool group1_trap;
static bool common_trap;
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -258,6 +262,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
if (group0_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
if (group1_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
if (common_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TC;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -429,6 +439,26 @@ out:
return ret;
}
DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
static int __init early_group0_trap_cfg(char *buf)
{
return strtobool(buf, &group0_trap);
}
early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg);
static int __init early_group1_trap_cfg(char *buf)
{
return strtobool(buf, &group1_trap);
}
early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
static int __init early_common_trap_cfg(char *buf)
{
return strtobool(buf, &common_trap);
}
early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
/**
* vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
* @node: pointer to the DT node
@@ -480,6 +510,21 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (kvm_vgic_global_state.vcpu_base == 0)
kvm_info("disabling GICv2 emulation\n");
#ifdef CONFIG_ARM64
if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}
#endif
if (group0_trap || group1_trap || common_trap) {
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
group0_trap ? "G0" : "",
group1_trap ? "G1" : "",
common_trap ? "C" : "");
static_branch_enable(&vgic_v3_cpuif_trap);
}
kvm_vgic_global_state.vctrl_base = NULL;
kvm_vgic_global_state.type = VGIC_V3;
kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
+57 -11
View File
@@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
/*
* Locking order is always:
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_cpu->ap_list_lock
* kvm->lpi_list_lock
* vgic_irq->irq_lock
* kvm->lock (mutex)
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_cpu->ap_list_lock
* kvm->lpi_list_lock
* vgic_irq->irq_lock
*
* If you need to take multiple locks, always take the upper lock first,
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
@@ -234,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
/*
* Only valid injection if changing level for level-triggered IRQs or for a
* rising edge.
* rising edge, and in-kernel connected IRQ lines can only be controlled by
* their owner.
*/
static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
{
if (irq->owner != owner)
return false;
switch (irq->config) {
case VGIC_CONFIG_LEVEL:
return irq->line_level != level;
@@ -285,8 +290,10 @@ retry:
* won't see this one until it exits for some other
* reason.
*/
if (vcpu)
if (vcpu) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
return false;
}
@@ -332,6 +339,7 @@ retry:
spin_unlock(&irq->irq_lock);
spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return true;
@@ -346,13 +354,16 @@ retry:
* false: to ignore the call
* Level-sensitive true: raise the input signal
* false: lower the input signal
* @owner: The opaque pointer to the owner of the IRQ being raised to verify
* that the caller is allowed to inject this IRQ. Userspace
* injections will have owner == NULL.
*
* The VGIC is not concerned with devices being active-LOW or active-HIGH for
* level-sensitive interrupts. You can think of the level parameter as 1
* being HIGH and 0 being LOW and all devices being active-HIGH.
*/
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level)
bool level, void *owner)
{
struct kvm_vcpu *vcpu;
struct vgic_irq *irq;
@@ -374,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
spin_lock(&irq->irq_lock);
if (!vgic_validate_injection(irq, level)) {
if (!vgic_validate_injection(irq, level, owner)) {
/* Nothing to see here, move along... */
spin_unlock(&irq->irq_lock);
vgic_put_irq(kvm, irq);
@@ -430,6 +441,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
return 0;
}
/**
* kvm_vgic_set_owner - Set the owner of an interrupt for a VM
*
* @vcpu: Pointer to the VCPU (used for PPIs)
* @intid: The virtual INTID identifying the interrupt (PPI or SPI)
* @owner: Opaque pointer to the owner
*
* Returns 0 if intid is not already used by another in-kernel device and the
* owner is set, otherwise returns an error code.
*/
int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
{
struct vgic_irq *irq;
int ret = 0;
if (!vgic_initialized(vcpu->kvm))
return -EAGAIN;
/* SGIs and LPIs cannot be wired up to any device */
if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
return -EINVAL;
irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
spin_lock(&irq->irq_lock);
if (irq->owner && irq->owner != owner)
ret = -EEXIST;
else
irq->owner = owner;
spin_unlock(&irq->irq_lock);
return ret;
}
/**
* vgic_prune_ap_list - Remove non-relevant interrupts from the list
*
@@ -721,8 +765,10 @@ void vgic_kick_vcpus(struct kvm *kvm)
* a good kick...
*/
kvm_for_each_vcpu(c, vcpu, kvm) {
if (kvm_vgic_vcpu_pending_irq(vcpu))
if (kvm_vgic_vcpu_pending_irq(vcpu)) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
}
}
+9 -3
View File
@@ -73,17 +73,17 @@ MODULE_LICENSE("GPL");
/* Architectures should define their poll value according to the halt latency */
unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
module_param(halt_poll_ns, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns);
/* Default doubles per-vcpu halt_poll_ns. */
unsigned int halt_poll_ns_grow = 2;
module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR);
module_param(halt_poll_ns_grow, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
/* Default resets per-vcpu halt_poll_ns . */
unsigned int halt_poll_ns_shrink;
module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
module_param(halt_poll_ns_shrink, uint, 0644);
EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
/*
@@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return PTR_ERR(file);
}
/*
* Don't call kvm_put_kvm anymore at this point; file->f_op is
* already set, with ->release() being kvm_vm_release(). In error
* cases it will be called by the final fput(file) and will take
* care of doing kvm_put_kvm(kvm).
*/
if (kvm_create_vm_debugfs(kvm, r) < 0) {
put_unused_fd(r);
fput(file);