mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
Merge tag 'kvm-ppc-next-5.2-2' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD
PPC KVM update for 5.2 * Support for guests to access the new POWER9 XIVE interrupt controller hardware directly, reducing interrupt latency and overhead for guests. * In-kernel implementation of the H_PAGE_INIT hypercall. * Reduce memory usage of sparsely-populated IOMMU tables. * Several bug fixes. Second PPC KVM update for 5.2 * Fix a bug, fix a spelling mistake, remove some useless code.
This commit is contained in:
@@ -56,3 +56,35 @@ POWER9. Loads and stores to the watchpoint locations will not be
|
||||
trapped in GDB. The watchpoint is remembered, so if the guest is
|
||||
migrated back to the POWER8 host, it will start working again.
|
||||
|
||||
Force enabling the DAWR
|
||||
=============================
|
||||
Kernels (since ~v5.2) have an option to force enable the DAWR via:
|
||||
|
||||
echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
|
||||
|
||||
This enables the DAWR even on POWER9.
|
||||
|
||||
This is a dangerous setting, USE AT YOUR OWN RISK.
|
||||
|
||||
Some users may not care about a bad user crashing their box
|
||||
(ie. single user/desktop systems) and really want the DAWR. This
|
||||
allows them to force enable DAWR.
|
||||
|
||||
This flag can also be used to disable DAWR access. Once this is
|
||||
cleared, all DAWR access should be cleared immediately and your
|
||||
machine once again safe from crashing.
|
||||
|
||||
Userspace may get confused by toggling this. If DAWR is force
|
||||
enabled/disabled between getting the number of breakpoints (via
|
||||
PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
|
||||
inconsistent view of what's available. Similarly for guests.
|
||||
|
||||
For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
|
||||
enabled in the host AND the guest. For this reason, this won't work on
|
||||
POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
|
||||
dawr_enable_dangerous file will fail if the hypervisor doesn't support
|
||||
writing the DAWR.
|
||||
|
||||
To double check the DAWR is working, run this kernel selftest:
|
||||
tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
|
||||
Any errors/failures/skips mean something is wrong.
|
||||
|
||||
@@ -1967,6 +1967,7 @@ registers, find a list below:
|
||||
PPC | KVM_REG_PPC_TLB3PS | 32
|
||||
PPC | KVM_REG_PPC_EPTCFG | 32
|
||||
PPC | KVM_REG_PPC_ICP_STATE | 64
|
||||
PPC | KVM_REG_PPC_VP_STATE | 128
|
||||
PPC | KVM_REG_PPC_TB_OFFSET | 64
|
||||
PPC | KVM_REG_PPC_SPMC1 | 32
|
||||
PPC | KVM_REG_PPC_SPMC2 | 32
|
||||
@@ -4487,6 +4488,15 @@ struct kvm_sync_regs {
|
||||
struct kvm_vcpu_events events;
|
||||
};
|
||||
|
||||
6.75 KVM_CAP_PPC_IRQ_XIVE
|
||||
|
||||
Architectures: ppc
|
||||
Target: vcpu
|
||||
Parameters: args[0] is the XIVE device fd
|
||||
args[1] is the XIVE CPU number (server ID) for this vcpu
|
||||
|
||||
This capability connects the vcpu to an in-kernel XIVE device.
|
||||
|
||||
7. Capabilities that can be enabled on VMs
|
||||
------------------------------------------
|
||||
|
||||
|
||||
197
Documentation/virtual/kvm/devices/xive.txt
Normal file
197
Documentation/virtual/kvm/devices/xive.txt
Normal file
@@ -0,0 +1,197 @@
|
||||
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
|
||||
==========================================================
|
||||
|
||||
Device types supported:
|
||||
KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||
|
||||
This device acts as a VM interrupt controller. It provides the KVM
|
||||
interface to configure the interrupt sources of a VM in the underlying
|
||||
POWER9 XIVE interrupt controller.
|
||||
|
||||
Only one XIVE instance may be instantiated. A guest XIVE device
|
||||
requires a POWER9 host and the guest OS should have support for the
|
||||
XIVE native exploitation interrupt mode. If not, it should run using
|
||||
the legacy interrupt mode, referred as XICS (POWER7/8).
|
||||
|
||||
* Device Mappings
|
||||
|
||||
The KVM device exposes different MMIO ranges of the XIVE HW which
|
||||
are required for interrupt management. These are exposed to the
|
||||
guest in VMAs populated with a custom VM fault handler.
|
||||
|
||||
1. Thread Interrupt Management Area (TIMA)
|
||||
|
||||
Each thread has an associated Thread Interrupt Management context
|
||||
composed of a set of registers. These registers let the thread
|
||||
handle priority management and interrupt acknowledgment. The most
|
||||
important are :
|
||||
|
||||
- Interrupt Pending Buffer (IPB)
|
||||
- Current Processor Priority (CPPR)
|
||||
- Notification Source Register (NSR)
|
||||
|
||||
They are exposed to software in four different pages each proposing
|
||||
a view with a different privilege. The first page is for the
|
||||
physical thread context and the second for the hypervisor. Only the
|
||||
third (operating system) and the fourth (user level) are exposed the
|
||||
guest.
|
||||
|
||||
2. Event State Buffer (ESB)
|
||||
|
||||
Each source is associated with an Event State Buffer (ESB) with
|
||||
either a pair of even/odd pair of pages which provides commands to
|
||||
manage the source: to trigger, to EOI, to turn off the source for
|
||||
instance.
|
||||
|
||||
3. Device pass-through
|
||||
|
||||
When a device is passed-through into the guest, the source
|
||||
interrupts are from a different HW controller (PHB4) and the ESB
|
||||
pages exposed to the guest should accommadate this change.
|
||||
|
||||
The passthru_irq helpers, kvmppc_xive_set_mapped() and
|
||||
kvmppc_xive_clr_mapped() are called when the device HW irqs are
|
||||
mapped into or unmapped from the guest IRQ number space. The KVM
|
||||
device extends these helpers to clear the ESB pages of the guest IRQ
|
||||
number being mapped and then lets the VM fault handler repopulate.
|
||||
The handler will insert the ESB page corresponding to the HW
|
||||
interrupt of the device being passed-through or the initial IPI ESB
|
||||
page if the device has being removed.
|
||||
|
||||
The ESB remapping is fully transparent to the guest and the OS
|
||||
device driver. All handling is done within VFIO and the above
|
||||
helpers in KVM-PPC.
|
||||
|
||||
* Groups:
|
||||
|
||||
1. KVM_DEV_XIVE_GRP_CTRL
|
||||
Provides global controls on the device
|
||||
Attributes:
|
||||
1.1 KVM_DEV_XIVE_RESET (write only)
|
||||
Resets the interrupt controller configuration for sources and event
|
||||
queues. To be used by kexec and kdump.
|
||||
Errors: none
|
||||
|
||||
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
|
||||
Sync all the sources and queues and mark the EQ pages dirty. This
|
||||
to make sure that a consistent memory state is captured when
|
||||
migrating the VM.
|
||||
Errors: none
|
||||
|
||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||
Initializes a new source in the XIVE device and mask it.
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 2 | 1 | 0
|
||||
values: | unused | level | type
|
||||
- type: 0:MSI 1:LSI
|
||||
- level: assertion level in case of an LSI.
|
||||
Errors:
|
||||
-E2BIG: Interrupt source number is out of range
|
||||
-ENOMEM: Could not create a new source block
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: Could not allocate underlying HW interrupt
|
||||
|
||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||
Configures source targeting
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
The kvm_device_attr.addr points to a __u64 value:
|
||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||
values: | eisn | mask | server | priority
|
||||
- priority: 0-7 interrupt priority level
|
||||
- server: CPU number chosen to handle the interrupt
|
||||
- mask: mask flag (unused)
|
||||
- eisn: Effective Interrupt Source Number
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid CPU number.
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-ENXIO: CPU event queues not configured or configuration of the
|
||||
underlying HW interrupt failed
|
||||
-EBUSY: No CPU available to serve interrupt
|
||||
|
||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||
Configures an event queue of a CPU
|
||||
Attributes:
|
||||
EQ descriptor identifier (64-bit)
|
||||
The EQ descriptor identifier is a tuple (server, priority) :
|
||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||
values: | unused | server | priority
|
||||
The kvm_device_attr.addr points to :
|
||||
struct kvm_ppc_xive_eq {
|
||||
__u32 flags;
|
||||
__u32 qshift;
|
||||
__u64 qaddr;
|
||||
__u32 qtoggle;
|
||||
__u32 qindex;
|
||||
__u8 pad[40];
|
||||
};
|
||||
- flags: queue flags
|
||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||
forces notification without using the coalescing mechanism
|
||||
provided by the XIVE END ESBs.
|
||||
- qshift: queue size (power of 2)
|
||||
- qaddr: real address of queue
|
||||
- qtoggle: current queue toggle bit
|
||||
- qindex: current queue index
|
||||
- pad: reserved for future use
|
||||
Errors:
|
||||
-ENOENT: Invalid CPU number
|
||||
-EINVAL: Invalid priority
|
||||
-EINVAL: Invalid flags
|
||||
-EINVAL: Invalid queue size
|
||||
-EINVAL: Invalid queue address
|
||||
-EFAULT: Invalid user pointer for attr->addr.
|
||||
-EIO: Configuration of the underlying HW failed
|
||||
|
||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||
Synchronize the source to flush event notifications
|
||||
Attributes:
|
||||
Interrupt source number (64-bit)
|
||||
Errors:
|
||||
-ENOENT: Unknown source number
|
||||
-EINVAL: Not initialized source number
|
||||
|
||||
* VCPU state
|
||||
|
||||
The XIVE IC maintains VP interrupt state in an internal structure
|
||||
called the NVT. When a VP is not dispatched on a HW processor
|
||||
thread, this structure can be updated by HW if the VP is the target
|
||||
of an event notification.
|
||||
|
||||
It is important for migration to capture the cached IPB from the NVT
|
||||
as it synthesizes the priorities of the pending interrupts. We
|
||||
capture a bit more to report debug information.
|
||||
|
||||
KVM_REG_PPC_VP_STATE (2 * 64bits)
|
||||
bits: | 63 .... 32 | 31 .... 0 |
|
||||
values: | TIMA word0 | TIMA word1 |
|
||||
bits: | 127 .......... 64 |
|
||||
values: | unused |
|
||||
|
||||
* Migration:
|
||||
|
||||
Saving the state of a VM using the XIVE native exploitation mode
|
||||
should follow a specific sequence. When the VM is stopped :
|
||||
|
||||
1. Mask all sources (PQ=01) to stop the flow of events.
|
||||
|
||||
2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
|
||||
flush any in-flight event notification and to stabilize the EQs. At
|
||||
this stage, the EQ pages are marked dirty to make sure they are
|
||||
transferred in the migration sequence.
|
||||
|
||||
3. Capture the state of the source targeting, the EQs configuration
|
||||
and the state of thread interrupt context registers.
|
||||
|
||||
Restore is similar :
|
||||
|
||||
1. Restore the EQ configuration. As targeting depends on it.
|
||||
2. Restore targeting
|
||||
3. Restore the thread interrupt contexts
|
||||
4. Restore the source states
|
||||
5. Let the vCPU run
|
||||
@@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void)
|
||||
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
|
||||
int hw_breakpoint_handler(struct die_args *args);
|
||||
|
||||
extern int set_dawr(struct arch_hw_breakpoint *brk);
|
||||
extern bool dawr_force_enable;
|
||||
static inline bool dawr_enabled(void)
|
||||
{
|
||||
return dawr_force_enable;
|
||||
}
|
||||
|
||||
#else /* CONFIG_HAVE_HW_BREAKPOINT */
|
||||
static inline void hw_breakpoint_disable(void) { }
|
||||
static inline void thread_change_pc(struct task_struct *tsk,
|
||||
struct pt_regs *regs) { }
|
||||
static inline bool dawr_enabled(void) { return false; }
|
||||
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
|
||||
|
||||
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
|
||||
struct kref kref;
|
||||
};
|
||||
|
||||
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
|
||||
|
||||
struct kvmppc_spapr_tce_table {
|
||||
struct list_head list;
|
||||
struct kvm *kvm;
|
||||
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
|
||||
u64 offset; /* in pages */
|
||||
u64 size; /* window size in pages */
|
||||
struct list_head iommu_tables;
|
||||
struct mutex alloc_lock;
|
||||
struct page *pages[0];
|
||||
};
|
||||
|
||||
@@ -222,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops;
|
||||
struct kvmppc_xive;
|
||||
struct kvmppc_xive_vcpu;
|
||||
extern struct kvm_device_ops kvm_xive_ops;
|
||||
extern struct kvm_device_ops kvm_xive_native_ops;
|
||||
|
||||
struct kvmppc_passthru_irqmap;
|
||||
|
||||
@@ -312,7 +316,11 @@ struct kvm_arch {
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
struct kvmppc_xics *xics;
|
||||
struct kvmppc_xive *xive;
|
||||
struct kvmppc_xive *xive; /* Current XIVE device in use */
|
||||
struct {
|
||||
struct kvmppc_xive *native;
|
||||
struct kvmppc_xive *xics_on_xive;
|
||||
} xive_devices;
|
||||
struct kvmppc_passthru_irqmap *pimap;
|
||||
#endif
|
||||
struct kvmppc_ops *kvm_ops;
|
||||
@@ -449,6 +457,7 @@ struct kvmppc_passthru_irqmap {
|
||||
#define KVMPPC_IRQ_DEFAULT 0
|
||||
#define KVMPPC_IRQ_MPIC 1
|
||||
#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
|
||||
#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */
|
||||
|
||||
#define MMIO_HPTE_CACHE_SIZE 4
|
||||
|
||||
|
||||
@@ -197,10 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
|
||||
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
|
||||
(stt)->size, (ioba), (npages)) ? \
|
||||
H_PARAMETER : H_SUCCESS)
|
||||
extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
|
||||
unsigned long *ua, unsigned long **prmap);
|
||||
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
|
||||
unsigned long idx, unsigned long tce);
|
||||
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
unsigned long ioba, unsigned long tce);
|
||||
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
@@ -273,6 +269,7 @@ union kvmppc_one_reg {
|
||||
u64 addr;
|
||||
u64 length;
|
||||
} vpaval;
|
||||
u64 xive_timaval[2];
|
||||
};
|
||||
|
||||
struct kvmppc_ops {
|
||||
@@ -480,6 +477,9 @@ extern void kvm_hv_vm_activated(void);
|
||||
extern void kvm_hv_vm_deactivated(void);
|
||||
extern bool kvm_hv_mode_active(void);
|
||||
|
||||
extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
|
||||
struct kvm_nested_guest *nested);
|
||||
|
||||
#else
|
||||
static inline void __init kvm_cma_reserve(void)
|
||||
{}
|
||||
@@ -594,6 +594,22 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
|
||||
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
|
||||
int level, bool line_status);
|
||||
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
|
||||
}
|
||||
|
||||
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
|
||||
struct kvm_vcpu *vcpu, u32 cpu);
|
||||
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
|
||||
extern void kvmppc_xive_native_init_module(void);
|
||||
extern void kvmppc_xive_native_exit_module(void);
|
||||
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg *val);
|
||||
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg *val);
|
||||
|
||||
#else
|
||||
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
|
||||
u32 priority) { return -1; }
|
||||
@@ -617,6 +633,21 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
|
||||
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
|
||||
int level, bool line_status) { return -ENODEV; }
|
||||
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
|
||||
|
||||
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
||||
{ return 0; }
|
||||
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
|
||||
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
|
||||
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
|
||||
static inline void kvmppc_xive_native_init_module(void) { }
|
||||
static inline void kvmppc_xive_native_exit_module(void) { }
|
||||
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg *val)
|
||||
{ return 0; }
|
||||
static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
|
||||
union kvmppc_one_reg *val)
|
||||
{ return -ENOENT; }
|
||||
|
||||
#endif /* CONFIG_KVM_XIVE */
|
||||
|
||||
#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
|
||||
@@ -665,6 +696,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long pte_index);
|
||||
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long pte_index);
|
||||
long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long dest, unsigned long src);
|
||||
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
|
||||
unsigned long slb_v, unsigned int status, bool data);
|
||||
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
|
||||
|
||||
@@ -186,8 +186,8 @@
|
||||
#define OPAL_XIVE_FREE_IRQ 140
|
||||
#define OPAL_XIVE_SYNC 141
|
||||
#define OPAL_XIVE_DUMP 142
|
||||
#define OPAL_XIVE_RESERVED3 143
|
||||
#define OPAL_XIVE_RESERVED4 144
|
||||
#define OPAL_XIVE_GET_QUEUE_STATE 143
|
||||
#define OPAL_XIVE_SET_QUEUE_STATE 144
|
||||
#define OPAL_SIGNAL_SYSTEM_RESET 145
|
||||
#define OPAL_NPU_INIT_CONTEXT 146
|
||||
#define OPAL_NPU_DESTROY_CONTEXT 147
|
||||
@@ -210,7 +210,8 @@
|
||||
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
|
||||
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
|
||||
#define OPAL_NX_COPROC_INIT 167
|
||||
#define OPAL_LAST 167
|
||||
#define OPAL_XIVE_GET_VP_STATE 170
|
||||
#define OPAL_LAST 170
|
||||
|
||||
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
|
||||
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
|
||||
|
||||
@@ -279,6 +279,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
|
||||
int64_t opal_xive_free_irq(uint32_t girq);
|
||||
int64_t opal_xive_sync(uint32_t type, uint32_t id);
|
||||
int64_t opal_xive_dump(uint32_t type, uint32_t id);
|
||||
int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
|
||||
__be32 *out_qtoggle,
|
||||
__be32 *out_qindex);
|
||||
int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
|
||||
uint32_t qtoggle,
|
||||
uint32_t qindex);
|
||||
int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
|
||||
int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
|
||||
uint64_t desc, uint16_t pe_number);
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
* same offset regardless of where the code is executing
|
||||
*/
|
||||
extern void __iomem *xive_tima;
|
||||
extern unsigned long xive_tima_os;
|
||||
|
||||
/*
|
||||
* Offset in the TM area of our current execution level (provided by
|
||||
@@ -73,6 +74,8 @@ struct xive_q {
|
||||
u32 esc_irq;
|
||||
atomic_t count;
|
||||
atomic_t pending_count;
|
||||
u64 guest_qaddr;
|
||||
u32 guest_qshift;
|
||||
};
|
||||
|
||||
/* Global enable flags for the XIVE support */
|
||||
@@ -109,12 +112,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
|
||||
extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
|
||||
|
||||
extern void xive_native_sync_source(u32 hw_irq);
|
||||
extern void xive_native_sync_queue(u32 hw_irq);
|
||||
extern bool is_xive_irq(struct irq_chip *chip);
|
||||
extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
|
||||
extern int xive_native_disable_vp(u32 vp_id);
|
||||
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
|
||||
extern bool xive_native_has_single_escalation(void);
|
||||
|
||||
extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
|
||||
u64 *out_qpage,
|
||||
u64 *out_qsize,
|
||||
u64 *out_qeoi_page,
|
||||
u32 *out_escalate_irq,
|
||||
u64 *out_qflags);
|
||||
|
||||
extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
|
||||
u32 *qindex);
|
||||
extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
|
||||
u32 qindex);
|
||||
extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
|
||||
|
||||
#else
|
||||
|
||||
static inline bool xive_enabled(void) { return false; }
|
||||
|
||||
@@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char {
|
||||
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
|
||||
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
|
||||
|
||||
#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
|
||||
|
||||
/* Device control API: PPC-specific devices */
|
||||
#define KVM_DEV_MPIC_GRP_MISC 1
|
||||
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
|
||||
@@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char {
|
||||
#define KVM_XICS_PRESENTED (1ULL << 43)
|
||||
#define KVM_XICS_QUEUED (1ULL << 44)
|
||||
|
||||
/* POWER9 XIVE Native Interrupt Controller */
|
||||
#define KVM_DEV_XIVE_GRP_CTRL 1
|
||||
#define KVM_DEV_XIVE_RESET 1
|
||||
#define KVM_DEV_XIVE_EQ_SYNC 2
|
||||
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
|
||||
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
|
||||
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
|
||||
#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */
|
||||
|
||||
/* Layout of 64-bit XIVE source attribute values */
|
||||
#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
|
||||
#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1)
|
||||
|
||||
/* Layout of 64-bit XIVE source configuration attribute values */
|
||||
#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0
|
||||
#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7
|
||||
#define KVM_XIVE_SOURCE_SERVER_SHIFT 3
|
||||
#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL
|
||||
#define KVM_XIVE_SOURCE_MASKED_SHIFT 32
|
||||
#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL
|
||||
#define KVM_XIVE_SOURCE_EISN_SHIFT 33
|
||||
#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL
|
||||
|
||||
/* Layout of 64-bit EQ identifier */
|
||||
#define KVM_XIVE_EQ_PRIORITY_SHIFT 0
|
||||
#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
|
||||
#define KVM_XIVE_EQ_SERVER_SHIFT 3
|
||||
#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL
|
||||
|
||||
/* Layout of EQ configuration values (64 bytes) */
|
||||
struct kvm_ppc_xive_eq {
|
||||
__u32 flags;
|
||||
__u32 qshift;
|
||||
__u64 qaddr;
|
||||
__u32 qtoggle;
|
||||
__u32 qindex;
|
||||
__u8 pad[40];
|
||||
};
|
||||
|
||||
#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001
|
||||
|
||||
#define KVM_XIVE_TIMA_PAGE_OFFSET 0
|
||||
#define KVM_XIVE_ESB_PAGE_OFFSET 4
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
||||
@@ -29,11 +29,15 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/sstep.h>
|
||||
#include <asm/debug.h>
|
||||
#include <asm/debugfs.h>
|
||||
#include <asm/hvcall.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
/*
|
||||
@@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
|
||||
if (!ppc_breakpoint_available())
|
||||
return -ENODEV;
|
||||
length_max = 8; /* DABR */
|
||||
if (cpu_has_feature(CPU_FTR_DAWR)) {
|
||||
if (dawr_enabled()) {
|
||||
length_max = 512 ; /* 64 doublewords */
|
||||
/* DAWR region can't cross 512 boundary */
|
||||
if ((attr->bp_addr >> 9) !=
|
||||
@@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
bool dawr_force_enable;
|
||||
EXPORT_SYMBOL_GPL(dawr_force_enable);
|
||||
|
||||
static ssize_t dawr_write_file_bool(struct file *file,
|
||||
const char __user *user_buf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct arch_hw_breakpoint null_brk = {0, 0, 0};
|
||||
size_t rc;
|
||||
|
||||
/* Send error to user if they hypervisor won't allow us to write DAWR */
|
||||
if ((!dawr_force_enable) &&
|
||||
(firmware_has_feature(FW_FEATURE_LPAR)) &&
|
||||
(set_dawr(&null_brk) != H_SUCCESS))
|
||||
return -1;
|
||||
|
||||
rc = debugfs_write_file_bool(file, user_buf, count, ppos);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* If we are clearing, make sure all CPUs have the DAWR cleared */
|
||||
if (!dawr_force_enable)
|
||||
smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const struct file_operations dawr_enable_fops = {
|
||||
.read = debugfs_read_file_bool,
|
||||
.write = dawr_write_file_bool,
|
||||
.open = simple_open,
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
static int __init dawr_force_setup(void)
|
||||
{
|
||||
dawr_force_enable = false;
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_DAWR)) {
|
||||
/* Don't setup sysfs file for user control on P8 */
|
||||
dawr_force_enable = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
|
||||
/* Turn DAWR off by default, but allow admin to turn it on */
|
||||
dawr_force_enable = false;
|
||||
debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
|
||||
powerpc_debugfs_root,
|
||||
&dawr_force_enable,
|
||||
&dawr_enable_fops);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(dawr_force_setup);
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
#include <asm/cpu_has_feature.h>
|
||||
#include <asm/asm-prototypes.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kdebug.h>
|
||||
@@ -784,7 +785,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
|
||||
return __set_dabr(dabr, dabrx);
|
||||
}
|
||||
|
||||
static inline int set_dawr(struct arch_hw_breakpoint *brk)
|
||||
int set_dawr(struct arch_hw_breakpoint *brk)
|
||||
{
|
||||
unsigned long dawr, dawrx, mrd;
|
||||
|
||||
@@ -816,7 +817,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
|
||||
{
|
||||
memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk));
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_DAWR))
|
||||
if (dawr_enabled())
|
||||
// Power8 or later
|
||||
set_dawr(brk);
|
||||
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
@@ -830,8 +831,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
|
||||
/* Check if we have DAWR or DABR hardware */
|
||||
bool ppc_breakpoint_available(void)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_DAWR))
|
||||
return true; /* POWER8 DAWR */
|
||||
if (dawr_enabled())
|
||||
return true; /* POWER8 DAWR or POWER9 forced DAWR */
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_207S))
|
||||
return false; /* POWER9 with DAWR disabled */
|
||||
/* DABR: Everything but POWER8 and POWER9 */
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include <asm/tm.h>
|
||||
#include <asm/asm-prototypes.h>
|
||||
#include <asm/debug.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
@@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request,
|
||||
dbginfo.sizeof_condition = 0;
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
|
||||
if (cpu_has_feature(CPU_FTR_DAWR))
|
||||
if (dawr_enabled())
|
||||
dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
|
||||
#else
|
||||
dbginfo.features = 0;
|
||||
|
||||
@@ -94,7 +94,7 @@ endif
|
||||
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
|
||||
book3s_xics.o
|
||||
|
||||
kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
|
||||
kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
|
||||
kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
|
||||
|
||||
kvm-book3s_64-module-objs := \
|
||||
|
||||
@@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
|
||||
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
|
||||
break;
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
#ifdef CONFIG_KVM_XIVE
|
||||
case KVM_REG_PPC_VP_STATE:
|
||||
if (!vcpu->arch.xive_vcpu) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
if (xive_enabled())
|
||||
r = kvmppc_xive_native_get_vp(vcpu, val);
|
||||
else
|
||||
r = -ENXIO;
|
||||
break;
|
||||
#endif /* CONFIG_KVM_XIVE */
|
||||
case KVM_REG_PPC_FSCR:
|
||||
*val = get_reg_val(id, vcpu->arch.fscr);
|
||||
break;
|
||||
@@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
|
||||
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
|
||||
break;
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
#ifdef CONFIG_KVM_XIVE
|
||||
case KVM_REG_PPC_VP_STATE:
|
||||
if (!vcpu->arch.xive_vcpu) {
|
||||
r = -ENXIO;
|
||||
break;
|
||||
}
|
||||
if (xive_enabled())
|
||||
r = kvmppc_xive_native_set_vp(vcpu, val);
|
||||
else
|
||||
r = -ENXIO;
|
||||
break;
|
||||
#endif /* CONFIG_KVM_XIVE */
|
||||
case KVM_REG_PPC_FSCR:
|
||||
vcpu->arch.fscr = set_reg_val(id, *val);
|
||||
break;
|
||||
@@ -891,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
|
||||
kvmppc_rtas_tokens_free(kvm);
|
||||
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/*
|
||||
* Free the XIVE devices which are not directly freed by the
|
||||
* device 'release' method
|
||||
*/
|
||||
kfree(kvm->arch.xive_devices.native);
|
||||
kvm->arch.xive_devices.native = NULL;
|
||||
kfree(kvm->arch.xive_devices.xics_on_xive);
|
||||
kvm->arch.xive_devices.xics_on_xive = NULL;
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
}
|
||||
|
||||
int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
|
||||
@@ -1050,6 +1085,9 @@ static int kvmppc_book3s_init(void)
|
||||
if (xics_on_xive()) {
|
||||
kvmppc_xive_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
|
||||
kvmppc_xive_native_init_module();
|
||||
kvm_register_device_ops(&kvm_xive_native_ops,
|
||||
KVM_DEV_TYPE_XIVE);
|
||||
} else
|
||||
#endif
|
||||
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
|
||||
@@ -1060,8 +1098,10 @@ static int kvmppc_book3s_init(void)
|
||||
static void kvmppc_book3s_exit(void)
|
||||
{
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
if (xics_on_xive())
|
||||
if (xics_on_xive()) {
|
||||
kvmppc_xive_exit_module();
|
||||
kvmppc_xive_native_exit_module();
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
||||
kvmppc_book3s_exit_pr();
|
||||
|
||||
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
|
||||
unsigned long i, npages = kvmppc_tce_pages(stt->size);
|
||||
|
||||
for (i = 0; i < npages; i++)
|
||||
__free_page(stt->pages[i]);
|
||||
if (stt->pages[i])
|
||||
__free_page(stt->pages[i]);
|
||||
|
||||
kfree(stt);
|
||||
}
|
||||
|
||||
static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long sttpage)
|
||||
{
|
||||
struct page *page = stt->pages[sttpage];
|
||||
|
||||
if (page)
|
||||
return page;
|
||||
|
||||
mutex_lock(&stt->alloc_lock);
|
||||
page = stt->pages[sttpage];
|
||||
if (!page) {
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
WARN_ON_ONCE(!page);
|
||||
if (page)
|
||||
stt->pages[sttpage] = page;
|
||||
}
|
||||
mutex_unlock(&stt->alloc_lock);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
|
||||
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
|
||||
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
page = stt->pages[vmf->pgoff];
|
||||
page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
|
||||
if (!page)
|
||||
return VM_FAULT_OOM;
|
||||
|
||||
get_page(page);
|
||||
vmf->page = page;
|
||||
return 0;
|
||||
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
struct kvmppc_spapr_tce_table *siter;
|
||||
unsigned long npages, size = args->size;
|
||||
int ret = -ENOMEM;
|
||||
int i;
|
||||
|
||||
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
|
||||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
|
||||
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
stt->offset = args->offset;
|
||||
stt->size = size;
|
||||
stt->kvm = kvm;
|
||||
mutex_init(&stt->alloc_lock);
|
||||
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!stt->pages[i])
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/* Check this LIOBN hasn't been previously allocated */
|
||||
@@ -352,17 +371,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
|
||||
fail:
|
||||
for (i = 0; i < npages; i++)
|
||||
if (stt->pages[i])
|
||||
__free_page(stt->pages[i]);
|
||||
|
||||
kfree(stt);
|
||||
fail_acct:
|
||||
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
|
||||
unsigned long *ua)
|
||||
{
|
||||
unsigned long gfn = tce >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = search_memslots(kvm_memslots(kvm), gfn);
|
||||
if (!memslot)
|
||||
return -EINVAL;
|
||||
|
||||
*ua = __gfn_to_hva_memslot(memslot, gfn) |
|
||||
(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long tce)
|
||||
{
|
||||
@@ -378,7 +408,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
if (iommu_tce_check_gpa(stt->page_shift, gpa))
|
||||
return H_TOO_HARD;
|
||||
|
||||
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
|
||||
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
|
||||
return H_TOO_HARD;
|
||||
|
||||
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
|
||||
@@ -397,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles TCE requests for emulated devices.
|
||||
* Puts guest TCE values to the table and expects user space to convert them.
|
||||
* Cannot fail so kvmppc_tce_validate must be called before it.
|
||||
*/
|
||||
static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long idx, unsigned long tce)
|
||||
{
|
||||
struct page *page;
|
||||
u64 *tbl;
|
||||
unsigned long sttpage;
|
||||
|
||||
idx -= stt->offset;
|
||||
sttpage = idx / TCES_PER_PAGE;
|
||||
page = stt->pages[sttpage];
|
||||
|
||||
if (!page) {
|
||||
/* We allow any TCE, not just with read|write permissions */
|
||||
if (!tce)
|
||||
return;
|
||||
|
||||
page = kvm_spapr_get_tce_page(stt, sttpage);
|
||||
if (!page)
|
||||
return;
|
||||
}
|
||||
tbl = page_to_virt(page);
|
||||
|
||||
tbl[idx % TCES_PER_PAGE] = tce;
|
||||
}
|
||||
|
||||
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
|
||||
unsigned long entry)
|
||||
{
|
||||
@@ -543,15 +603,15 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
ret = kvmppc_tce_validate(stt, tce);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
goto unlock_exit;
|
||||
|
||||
dir = iommu_tce_direction(tce);
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
|
||||
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
|
||||
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
|
||||
ret = H_PARAMETER;
|
||||
goto unlock_exit;
|
||||
}
|
||||
@@ -612,7 +672,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
return ret;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
|
||||
ret = H_TOO_HARD;
|
||||
goto unlock_exit;
|
||||
}
|
||||
@@ -647,7 +707,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
tce = be64_to_cpu(tce);
|
||||
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
|
||||
return H_PARAMETER;
|
||||
|
||||
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
|
||||
|
||||
@@ -66,8 +66,6 @@
|
||||
|
||||
#endif
|
||||
|
||||
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
|
||||
|
||||
/*
|
||||
* Finds a TCE table descriptor by LIOBN.
|
||||
*
|
||||
@@ -88,6 +86,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
|
||||
EXPORT_SYMBOL_GPL(kvmppc_find_table);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce,
|
||||
unsigned long *ua, unsigned long **prmap)
|
||||
{
|
||||
unsigned long gfn = tce >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
|
||||
memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
|
||||
if (!memslot)
|
||||
return -EINVAL;
|
||||
|
||||
*ua = __gfn_to_hva_memslot(memslot, gfn) |
|
||||
(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
|
||||
|
||||
if (prmap)
|
||||
*prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Validates TCE address.
|
||||
* At the moment flags and page mask are validated.
|
||||
@@ -111,7 +128,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
if (iommu_tce_check_gpa(stt->page_shift, gpa))
|
||||
return H_PARAMETER;
|
||||
|
||||
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
|
||||
if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL))
|
||||
return H_TOO_HARD;
|
||||
|
||||
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
|
||||
@@ -129,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
||||
/* Note on the use of page_address() in real mode,
|
||||
*
|
||||
@@ -161,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
|
||||
/*
|
||||
* Handles TCE requests for emulated devices.
|
||||
* Puts guest TCE values to the table and expects user space to convert them.
|
||||
* Called in both real and virtual modes.
|
||||
* Cannot fail so kvmppc_tce_validate must be called before it.
|
||||
*
|
||||
* WARNING: This will be called in real-mode on HV KVM and virtual
|
||||
* mode on PR KVM
|
||||
* Cannot fail so kvmppc_rm_tce_validate must be called before it.
|
||||
*/
|
||||
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long idx, unsigned long tce)
|
||||
{
|
||||
struct page *page;
|
||||
@@ -175,35 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
|
||||
|
||||
idx -= stt->offset;
|
||||
page = stt->pages[idx / TCES_PER_PAGE];
|
||||
/*
|
||||
* page must not be NULL in real mode,
|
||||
* kvmppc_rm_ioba_validate() must have taken care of this.
|
||||
*/
|
||||
WARN_ON_ONCE_RM(!page);
|
||||
tbl = kvmppc_page_address(page);
|
||||
|
||||
tbl[idx % TCES_PER_PAGE] = tce;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
|
||||
|
||||
long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
|
||||
unsigned long *ua, unsigned long **prmap)
|
||||
/*
|
||||
* TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
|
||||
* in real mode.
|
||||
* Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
|
||||
* allocated or not required (when clearing a tce entry).
|
||||
*/
|
||||
static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
|
||||
unsigned long ioba, unsigned long npages, bool clearing)
|
||||
{
|
||||
unsigned long gfn = tce >> PAGE_SHIFT;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long i, idx, sttpage, sttpages;
|
||||
unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
|
||||
|
||||
memslot = search_memslots(kvm_memslots(kvm), gfn);
|
||||
if (!memslot)
|
||||
return -EINVAL;
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* clearing==true says kvmppc_rm_tce_put won't be allocating pages
|
||||
* for empty tces.
|
||||
*/
|
||||
if (clearing)
|
||||
return H_SUCCESS;
|
||||
|
||||
*ua = __gfn_to_hva_memslot(memslot, gfn) |
|
||||
(tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
|
||||
idx = (ioba >> stt->page_shift) - stt->offset;
|
||||
sttpage = idx / TCES_PER_PAGE;
|
||||
sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
|
||||
TCES_PER_PAGE;
|
||||
for (i = sttpage; i < sttpage + sttpages; ++i)
|
||||
if (!stt->pages[i])
|
||||
return H_TOO_HARD;
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
if (prmap)
|
||||
*prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
|
||||
unsigned long entry, unsigned long *hpa,
|
||||
enum dma_data_direction *direction)
|
||||
@@ -381,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
if (!stt)
|
||||
return H_TOO_HARD;
|
||||
|
||||
ret = kvmppc_ioba_validate(stt, ioba, 1);
|
||||
ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -390,7 +415,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
return ret;
|
||||
|
||||
dir = iommu_tce_direction(tce);
|
||||
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
|
||||
if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
|
||||
return H_PARAMETER;
|
||||
|
||||
entry = ioba >> stt->page_shift;
|
||||
@@ -409,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
}
|
||||
}
|
||||
|
||||
kvmppc_tce_put(stt, entry, tce);
|
||||
kvmppc_rm_tce_put(stt, entry, tce);
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
@@ -480,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
if (tce_list & (SZ_4K - 1))
|
||||
return H_PARAMETER;
|
||||
|
||||
ret = kvmppc_ioba_validate(stt, ioba, npages);
|
||||
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -492,7 +517,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
struct mm_iommu_table_group_mem_t *mem;
|
||||
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
|
||||
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
|
||||
return H_TOO_HARD;
|
||||
|
||||
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
|
||||
@@ -508,7 +533,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
* We do not require memory to be preregistered in this case
|
||||
* so lock rmap and do __find_linux_pte_or_hugepte().
|
||||
*/
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
|
||||
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
|
||||
return H_TOO_HARD;
|
||||
|
||||
rmap = (void *) vmalloc_to_phys(rmap);
|
||||
@@ -542,7 +567,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
|
||||
|
||||
ua = 0;
|
||||
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
|
||||
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
|
||||
return H_PARAMETER;
|
||||
|
||||
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
|
||||
@@ -557,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
kvmppc_tce_put(stt, entry + i, tce);
|
||||
kvmppc_rm_tce_put(stt, entry + i, tce);
|
||||
}
|
||||
|
||||
unlock_exit:
|
||||
@@ -583,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
||||
if (!stt)
|
||||
return H_TOO_HARD;
|
||||
|
||||
ret = kvmppc_ioba_validate(stt, ioba, npages);
|
||||
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
@@ -610,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
|
||||
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
|
||||
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
@@ -635,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
||||
|
||||
idx = (ioba >> stt->page_shift) - stt->offset;
|
||||
page = stt->pages[idx / TCES_PER_PAGE];
|
||||
if (!page) {
|
||||
vcpu->arch.regs.gpr[4] = 0;
|
||||
return H_SUCCESS;
|
||||
}
|
||||
tbl = (u64 *)page_address(page);
|
||||
|
||||
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
|
||||
|
||||
@@ -74,6 +74,7 @@
|
||||
#include <asm/opal.h>
|
||||
#include <asm/xics.h>
|
||||
#include <asm/xive.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
|
||||
#include "book3s.h"
|
||||
|
||||
@@ -749,7 +750,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* Ensure that the read of vcore->dpdes comes after the read
|
||||
* of vcpu->doorbell_request. This barrier matches the
|
||||
* smb_wmb() in kvmppc_guest_entry_inject().
|
||||
* smp_wmb() in kvmppc_guest_entry_inject().
|
||||
*/
|
||||
smp_rmb();
|
||||
vc = vcpu->arch.vcore;
|
||||
@@ -801,6 +802,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy guest memory in place - must reside within a single memslot */
|
||||
static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
|
||||
unsigned long len)
|
||||
{
|
||||
struct kvm_memory_slot *to_memslot = NULL;
|
||||
struct kvm_memory_slot *from_memslot = NULL;
|
||||
unsigned long to_addr, from_addr;
|
||||
int r;
|
||||
|
||||
/* Get HPA for from address */
|
||||
from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
|
||||
if (!from_memslot)
|
||||
return -EFAULT;
|
||||
if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
|
||||
<< PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
|
||||
if (kvm_is_error_hva(from_addr))
|
||||
return -EFAULT;
|
||||
from_addr |= (from & (PAGE_SIZE - 1));
|
||||
|
||||
/* Get HPA for to address */
|
||||
to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
|
||||
if (!to_memslot)
|
||||
return -EFAULT;
|
||||
if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
|
||||
<< PAGE_SHIFT))
|
||||
return -EINVAL;
|
||||
to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
|
||||
if (kvm_is_error_hva(to_addr))
|
||||
return -EFAULT;
|
||||
to_addr |= (to & (PAGE_SIZE - 1));
|
||||
|
||||
/* Perform copy */
|
||||
r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
|
||||
len);
|
||||
if (r)
|
||||
return -EFAULT;
|
||||
mark_page_dirty(kvm, to >> PAGE_SHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long dest, unsigned long src)
|
||||
{
|
||||
u64 pg_sz = SZ_4K; /* 4K page size */
|
||||
u64 pg_mask = SZ_4K - 1;
|
||||
int ret;
|
||||
|
||||
/* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
|
||||
if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
|
||||
H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
|
||||
return H_PARAMETER;
|
||||
|
||||
/* dest (and src if copy_page flag set) must be page aligned */
|
||||
if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
|
||||
return H_PARAMETER;
|
||||
|
||||
/* zero and/or copy the page as determined by the flags */
|
||||
if (flags & H_COPY_PAGE) {
|
||||
ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
|
||||
if (ret < 0)
|
||||
return H_PARAMETER;
|
||||
} else if (flags & H_ZERO_PAGE) {
|
||||
ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
|
||||
if (ret < 0)
|
||||
return H_PARAMETER;
|
||||
}
|
||||
|
||||
/* We can ignore the remaining flags */
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
|
||||
{
|
||||
struct kvmppc_vcore *vcore = target->arch.vcore;
|
||||
@@ -1003,6 +1078,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
||||
if (nesting_enabled(vcpu->kvm))
|
||||
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
|
||||
break;
|
||||
case H_PAGE_INIT:
|
||||
ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
|
||||
kvmppc_get_gpr(vcpu, 5),
|
||||
kvmppc_get_gpr(vcpu, 6));
|
||||
break;
|
||||
default:
|
||||
return RESUME_HOST;
|
||||
}
|
||||
@@ -1047,6 +1127,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
|
||||
case H_IPOLL:
|
||||
case H_XIRR_X:
|
||||
#endif
|
||||
case H_PAGE_INIT:
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -2504,37 +2585,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
|
||||
}
|
||||
}
|
||||
|
||||
static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
|
||||
struct kvm_nested_guest *nested)
|
||||
{
|
||||
cpumask_t *need_tlb_flush;
|
||||
int lpid;
|
||||
|
||||
if (!cpu_has_feature(CPU_FTR_HVMODE))
|
||||
return;
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
pcpu &= ~0x3UL;
|
||||
|
||||
if (nested) {
|
||||
lpid = nested->shadow_lpid;
|
||||
need_tlb_flush = &nested->need_tlb_flush;
|
||||
} else {
|
||||
lpid = kvm->arch.lpid;
|
||||
need_tlb_flush = &kvm->arch.need_tlb_flush;
|
||||
}
|
||||
|
||||
mtspr(SPRN_LPID, lpid);
|
||||
isync();
|
||||
smp_mb();
|
||||
|
||||
if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
|
||||
radix__local_flush_tlb_lpid_guest(lpid);
|
||||
/* Clear the bit after the TLB flush */
|
||||
cpumask_clear_cpu(pcpu, need_tlb_flush);
|
||||
}
|
||||
}
|
||||
|
||||
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
|
||||
{
|
||||
int cpu;
|
||||
@@ -3228,19 +3278,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
for (sub = 0; sub < core_info.n_subcores; ++sub)
|
||||
spin_unlock(&core_info.vc[sub]->lock);
|
||||
|
||||
if (kvm_is_radix(vc->kvm)) {
|
||||
/*
|
||||
* Do we need to flush the process scoped TLB for the LPAR?
|
||||
*
|
||||
* On POWER9, individual threads can come in here, but the
|
||||
* TLB is shared between the 4 threads in a core, hence
|
||||
* invalidating on one thread invalidates for all.
|
||||
* Thus we make all 4 threads use the same bit here.
|
||||
*
|
||||
* Hash must be flushed in realmode in order to use tlbiel.
|
||||
*/
|
||||
kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL);
|
||||
}
|
||||
guest_enter_irqoff();
|
||||
|
||||
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
|
||||
|
||||
this_cpu_disable_ftrace();
|
||||
|
||||
/*
|
||||
* Interrupts will be enabled once we get into the guest,
|
||||
@@ -3248,19 +3290,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
|
||||
*/
|
||||
trace_hardirqs_on();
|
||||
|
||||
guest_enter_irqoff();
|
||||
|
||||
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
|
||||
|
||||
this_cpu_disable_ftrace();
|
||||
|
||||
trap = __kvmppc_vcore_entry();
|
||||
|
||||
trace_hardirqs_off();
|
||||
|
||||
this_cpu_enable_ftrace();
|
||||
|
||||
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
|
||||
|
||||
trace_hardirqs_off();
|
||||
set_irq_happened(trap);
|
||||
|
||||
spin_lock(&vc->lock);
|
||||
@@ -3374,7 +3411,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
mtspr(SPRN_PURR, vcpu->arch.purr);
|
||||
mtspr(SPRN_SPURR, vcpu->arch.spurr);
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_DAWR)) {
|
||||
if (dawr_enabled()) {
|
||||
mtspr(SPRN_DAWR, vcpu->arch.dawr);
|
||||
mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
|
||||
}
|
||||
@@ -3423,7 +3460,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
|
||||
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
|
||||
|
||||
mtspr(SPRN_PSSCR, host_psscr);
|
||||
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
|
||||
mtspr(SPRN_PSSCR, host_psscr |
|
||||
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
|
||||
mtspr(SPRN_HFSCR, host_hfscr);
|
||||
mtspr(SPRN_CIABR, host_ciabr);
|
||||
mtspr(SPRN_DAWR, host_dawr);
|
||||
@@ -3511,6 +3550,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
load_vr_state(&vcpu->arch.vr);
|
||||
#endif
|
||||
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
|
||||
|
||||
mtspr(SPRN_DSCR, vcpu->arch.dscr);
|
||||
mtspr(SPRN_IAMR, vcpu->arch.iamr);
|
||||
@@ -3602,6 +3642,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
store_vr_state(&vcpu->arch.vr);
|
||||
#endif
|
||||
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_TM) ||
|
||||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
|
||||
@@ -3967,7 +4008,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
|
||||
unsigned long lpcr)
|
||||
{
|
||||
int trap, r, pcpu;
|
||||
int srcu_idx;
|
||||
int srcu_idx, lpid;
|
||||
struct kvmppc_vcore *vc;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_nested_guest *nested = vcpu->arch.nested;
|
||||
@@ -4043,8 +4084,12 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
|
||||
vc->vcore_state = VCORE_RUNNING;
|
||||
trace_kvmppc_run_core(vc, 0);
|
||||
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE))
|
||||
kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested);
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE)) {
|
||||
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
|
||||
mtspr(SPRN_LPID, lpid);
|
||||
isync();
|
||||
kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
|
||||
}
|
||||
|
||||
trace_hardirqs_on();
|
||||
guest_enter_irqoff();
|
||||
|
||||
@@ -805,3 +805,60 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.doorbell_request = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void flush_guest_tlb(struct kvm *kvm)
|
||||
{
|
||||
unsigned long rb, set;
|
||||
|
||||
rb = PPC_BIT(52); /* IS = 2 */
|
||||
if (kvm_is_radix(kvm)) {
|
||||
/* R=1 PRS=1 RIC=2 */
|
||||
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
|
||||
: : "r" (rb), "i" (1), "i" (1), "i" (2),
|
||||
"r" (0) : "memory");
|
||||
for (set = 1; set < kvm->arch.tlb_sets; ++set) {
|
||||
rb += PPC_BIT(51); /* increment set number */
|
||||
/* R=1 PRS=1 RIC=0 */
|
||||
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
|
||||
: : "r" (rb), "i" (1), "i" (1), "i" (0),
|
||||
"r" (0) : "memory");
|
||||
}
|
||||
} else {
|
||||
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
|
||||
/* R=0 PRS=0 RIC=0 */
|
||||
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
|
||||
: : "r" (rb), "i" (0), "i" (0), "i" (0),
|
||||
"r" (0) : "memory");
|
||||
rb += PPC_BIT(51); /* increment set number */
|
||||
}
|
||||
}
|
||||
asm volatile("ptesync": : :"memory");
|
||||
}
|
||||
|
||||
void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
|
||||
struct kvm_nested_guest *nested)
|
||||
{
|
||||
cpumask_t *need_tlb_flush;
|
||||
|
||||
/*
|
||||
* On POWER9, individual threads can come in here, but the
|
||||
* TLB is shared between the 4 threads in a core, hence
|
||||
* invalidating on one thread invalidates for all.
|
||||
* Thus we make all 4 threads use the same bit.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
pcpu = cpu_first_thread_sibling(pcpu);
|
||||
|
||||
if (nested)
|
||||
need_tlb_flush = &nested->need_tlb_flush;
|
||||
else
|
||||
need_tlb_flush = &kvm->arch.need_tlb_flush;
|
||||
|
||||
if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
|
||||
flush_guest_tlb(kvm);
|
||||
|
||||
/* Clear the bit after the TLB flush */
|
||||
cpumask_clear_cpu(pcpu, need_tlb_flush);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <asm/trace.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
@@ -867,6 +868,149 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa,
|
||||
int writing, unsigned long *hpa,
|
||||
struct kvm_memory_slot **memslot_p)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
|
||||
unsigned int shift;
|
||||
pte_t *ptep, pte;
|
||||
|
||||
/* Find the memslot for this address */
|
||||
gfn = gpa >> PAGE_SHIFT;
|
||||
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
|
||||
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
|
||||
return H_PARAMETER;
|
||||
|
||||
/* Translate to host virtual address */
|
||||
hva = __gfn_to_hva_memslot(memslot, gfn);
|
||||
|
||||
/* Try to find the host pte for that virtual address */
|
||||
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
|
||||
if (!ptep)
|
||||
return H_TOO_HARD;
|
||||
pte = kvmppc_read_update_linux_pte(ptep, writing);
|
||||
if (!pte_present(pte))
|
||||
return H_TOO_HARD;
|
||||
|
||||
/* Convert to a physical address */
|
||||
if (shift)
|
||||
psize = 1UL << shift;
|
||||
pa = pte_pfn(pte) << PAGE_SHIFT;
|
||||
pa |= hva & (psize - 1);
|
||||
pa |= gpa & ~PAGE_MASK;
|
||||
|
||||
if (hpa)
|
||||
*hpa = pa;
|
||||
if (memslot_p)
|
||||
*memslot_p = memslot;
|
||||
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
|
||||
unsigned long dest)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
unsigned long pa, mmu_seq;
|
||||
long ret = H_SUCCESS;
|
||||
int i;
|
||||
|
||||
/* Used later to detect if we might have been invalidated */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
/* Check if we've been invalidated */
|
||||
raw_spin_lock(&kvm->mmu_lock.rlock);
|
||||
if (mmu_notifier_retry(kvm, mmu_seq)) {
|
||||
ret = H_TOO_HARD;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Zero the page */
|
||||
for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
|
||||
dcbz((void *)pa);
|
||||
kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock(&kvm->mmu_lock.rlock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
|
||||
unsigned long dest, unsigned long src)
|
||||
{
|
||||
unsigned long dest_pa, src_pa, mmu_seq;
|
||||
struct kvm_memory_slot *dest_memslot;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
long ret = H_SUCCESS;
|
||||
|
||||
/* Used later to detect if we might have been invalidated */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
smp_rmb();
|
||||
|
||||
ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL);
|
||||
if (ret != H_SUCCESS)
|
||||
return ret;
|
||||
|
||||
/* Check if we've been invalidated */
|
||||
raw_spin_lock(&kvm->mmu_lock.rlock);
|
||||
if (mmu_notifier_retry(kvm, mmu_seq)) {
|
||||
ret = H_TOO_HARD;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Copy the page */
|
||||
memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
|
||||
|
||||
kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock(&kvm->mmu_lock.rlock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||
unsigned long dest, unsigned long src)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
u64 pg_mask = SZ_4K - 1; /* 4K page size */
|
||||
long ret = H_SUCCESS;
|
||||
|
||||
/* Don't handle radix mode here, go up to the virtual mode handler */
|
||||
if (kvm_is_radix(kvm))
|
||||
return H_TOO_HARD;
|
||||
|
||||
/* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
|
||||
if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
|
||||
H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
|
||||
return H_PARAMETER;
|
||||
|
||||
/* dest (and src if copy_page flag set) must be page aligned */
|
||||
if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
|
||||
return H_PARAMETER;
|
||||
|
||||
/* zero and/or copy the page as determined by the flags */
|
||||
if (flags & H_COPY_PAGE)
|
||||
ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
|
||||
else if (flags & H_ZERO_PAGE)
|
||||
ret = kvmppc_do_h_page_init_zero(vcpu, dest);
|
||||
|
||||
/* We can ignore the other flags */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
|
||||
unsigned long pte_index)
|
||||
{
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user