Merge tag 'for-linus-5.16b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:

 - a series to speed up the boot of Xen PV guests

 - some cleanups in Xen related code

 - replacement of license texts with the appropriate SPDX headers and
   fixing of wrong SPDX headers in Xen header files

 - a small series making paravirtualized interrupt masking much simpler
   and at the same time removing complaints of objtool

 - a fix for Xen ballooning hogging workqueues for too long

 - enablement of the Xen pciback driver for Arm

 - some further small fixes/enhancements

* tag 'for-linus-5.16b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits)
  xen/balloon: fix unused-variable warning
  xen/balloon: rename alloc/free_xenballooned_pages
  xen/balloon: add late_initcall_sync() for initial ballooning done
  x86/xen: remove 32-bit awareness from startup_xen
  xen: remove highmem remnants
  xen: allow pv-only hypercalls only with CONFIG_XEN_PV
  x86/xen: remove 32-bit pv leftovers
  xen-pciback: allow compiling on other archs than x86
  x86/xen: switch initial pvops IRQ functions to dummy ones
  x86/xen: remove xen_have_vcpu_info_placement flag
  x86/pvh: add prototype for xen_pvh_init()
  xen: Fix implicit type conversion
  xen: fix wrong SPDX headers of Xen related headers
  xen/pvcalls-back: Remove redundant 'flush_workqueue()' calls
  x86/xen: Remove redundant irq_enter/exit() invocations
  xen-pciback: Fix return in pm_ctrl_init()
  xen/x86: restrict PV Dom0 identity mapping
  xen/x86: there's no highmem anymore in PV mode
  xen/x86: adjust handling of the L3 user vsyscall special page table
  xen/x86: adjust xen_set_fixmap()
  ...
This commit is contained in:
Linus Torvalds
2021-11-10 11:14:21 -08:00
72 changed files with 496 additions and 968 deletions

View File

@@ -6379,6 +6379,13 @@
improve timer resolution at the expense of processing
more timer interrupts.
xen.balloon_boot_timeout= [XEN]
The time (in seconds) to wait before giving up to boot
in case initial ballooning fails to free enough memory.
Applies only when running as HVM or PVH guest and
started with less memory configured than allowed at
max. Default is 180.
xen.event_eoi_delay= [XEN]
How long to delay EOI handling in case of event
storms (jiffies). Default is 10.

View File

@@ -442,7 +442,6 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw);
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);

View File

@@ -88,7 +88,6 @@ HYPERCALL2(hvm_op);
HYPERCALL2(memory_op);
HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
HYPERCALL2(vm_assist);

View File

@@ -80,7 +80,6 @@ HYPERCALL2(hvm_op);
HYPERCALL2(memory_op);
HYPERCALL2(physdev_op);
HYPERCALL3(vcpu_op);
HYPERCALL1(tmem_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
HYPERCALL2(vm_assist);

View File

@@ -577,7 +577,9 @@ void paravirt_leave_lazy_mmu(void);
void paravirt_flush_lazy_mmu(void);
void _paravirt_nop(void);
void paravirt_BUG(void);
u64 _paravirt_ident_64(u64);
unsigned long paravirt_ret0(void);
#define paravirt_nop ((void *)_paravirt_nop)

View File

@@ -248,6 +248,7 @@ privcmd_call(unsigned int call,
return res;
}
#ifdef CONFIG_XEN_PV
static inline int
HYPERVISOR_set_trap_table(struct trap_info *table)
{
@@ -280,6 +281,107 @@ HYPERVISOR_callback_op(int cmd, void *arg)
return _hypercall2(int, callback_op, cmd, arg);
}
static inline int
HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
static inline unsigned long
HYPERVISOR_get_debugreg(int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
}
static inline int
HYPERVISOR_update_descriptor(u64 ma, u64 desc)
{
return _hypercall2(int, update_descriptor, ma, desc);
}
static inline int
HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
unsigned long flags)
{
return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
}
static inline int
HYPERVISOR_set_segment_base(int reg, unsigned long value)
{
return _hypercall2(int, set_segment_base, reg, value);
}
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
mcl->op = __HYPERVISOR_fpu_taskswitch;
mcl->args[0] = set;
trace_xen_mc_entry(mcl, 1);
}
static inline void
MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
pte_t new_val, unsigned long flags)
{
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = va;
mcl->args[1] = new_val.pte;
mcl->args[2] = flags;
trace_xen_mc_entry(mcl, 3);
}
static inline void
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
{
mcl->op = __HYPERVISOR_update_descriptor;
mcl->args[0] = maddr;
mcl->args[1] = *(unsigned long *)&desc;
trace_xen_mc_entry(mcl, 2);
}
static inline void
MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
int count, int *success_count, domid_t domid)
{
mcl->op = __HYPERVISOR_mmu_update;
mcl->args[0] = (unsigned long)req;
mcl->args[1] = count;
mcl->args[2] = (unsigned long)success_count;
mcl->args[3] = domid;
trace_xen_mc_entry(mcl, 4);
}
static inline void
MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
int *success_count, domid_t domid)
{
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)op;
mcl->args[1] = count;
mcl->args[2] = (unsigned long)success_count;
mcl->args[3] = domid;
trace_xen_mc_entry(mcl, 4);
}
static inline void
MULTI_stack_switch(struct multicall_entry *mcl,
unsigned long ss, unsigned long esp)
{
mcl->op = __HYPERVISOR_stack_switch;
mcl->args[0] = ss;
mcl->args[1] = esp;
trace_xen_mc_entry(mcl, 2);
}
#endif
static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
@@ -308,26 +410,6 @@ HYPERVISOR_platform_op(struct xen_platform_op *op)
return _hypercall1(int, platform_op, op);
}
static __always_inline int
HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
static __always_inline unsigned long
HYPERVISOR_get_debugreg(int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
}
static inline int
HYPERVISOR_update_descriptor(u64 ma, u64 desc)
{
if (sizeof(u64) == sizeof(long))
return _hypercall2(int, update_descriptor, ma, desc);
return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
}
static inline long
HYPERVISOR_memory_op(unsigned int cmd, void *arg)
{
@@ -340,18 +422,6 @@ HYPERVISOR_multicall(void *call_list, uint32_t nr_calls)
return _hypercall2(int, multicall, call_list, nr_calls);
}
static inline int
HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
unsigned long flags)
{
if (sizeof(new_val) == sizeof(long))
return _hypercall3(int, update_va_mapping, va,
new_val.pte, flags);
else
return _hypercall4(int, update_va_mapping, va,
new_val.pte, new_val.pte >> 32, flags);
}
static inline int
HYPERVISOR_event_channel_op(int cmd, void *arg)
{
@@ -394,14 +464,6 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
}
#ifdef CONFIG_X86_64
static inline int
HYPERVISOR_set_segment_base(int reg, unsigned long value)
{
return _hypercall2(int, set_segment_base, reg, value);
}
#endif
static inline int
HYPERVISOR_suspend(unsigned long start_info_mfn)
{
@@ -422,13 +484,6 @@ HYPERVISOR_hvm_op(int op, void *arg)
return _hypercall2(unsigned long, hvm_op, op, arg);
}
static inline int
HYPERVISOR_tmem_op(
struct tmem_op *op)
{
return _hypercall1(int, tmem_op, op);
}
static inline int
HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
{
@@ -446,88 +501,4 @@ HYPERVISOR_dm_op(
return ret;
}
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
mcl->op = __HYPERVISOR_fpu_taskswitch;
mcl->args[0] = set;
trace_xen_mc_entry(mcl, 1);
}
static inline void
MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
pte_t new_val, unsigned long flags)
{
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = va;
if (sizeof(new_val) == sizeof(long)) {
mcl->args[1] = new_val.pte;
mcl->args[2] = flags;
} else {
mcl->args[1] = new_val.pte;
mcl->args[2] = new_val.pte >> 32;
mcl->args[3] = flags;
}
trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4);
}
static inline void
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
{
mcl->op = __HYPERVISOR_update_descriptor;
if (sizeof(maddr) == sizeof(long)) {
mcl->args[0] = maddr;
mcl->args[1] = *(unsigned long *)&desc;
} else {
u32 *p = (u32 *)&desc;
mcl->args[0] = maddr;
mcl->args[1] = maddr >> 32;
mcl->args[2] = *p++;
mcl->args[3] = *p;
}
trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
}
static inline void
MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
int count, int *success_count, domid_t domid)
{
mcl->op = __HYPERVISOR_mmu_update;
mcl->args[0] = (unsigned long)req;
mcl->args[1] = count;
mcl->args[2] = (unsigned long)success_count;
mcl->args[3] = domid;
trace_xen_mc_entry(mcl, 4);
}
static inline void
MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
int *success_count, domid_t domid)
{
mcl->op = __HYPERVISOR_mmuext_op;
mcl->args[0] = (unsigned long)op;
mcl->args[1] = count;
mcl->args[2] = (unsigned long)success_count;
mcl->args[3] = domid;
trace_xen_mc_entry(mcl, 4);
}
static inline void
MULTI_stack_switch(struct multicall_entry *mcl,
unsigned long ss, unsigned long esp)
{
mcl->op = __HYPERVISOR_stack_switch;
mcl->args[0] = ss;
mcl->args[1] = esp;
trace_xen_mc_entry(mcl, 2);
}
#endif /* _ASM_X86_XEN_HYPERCALL_H */

View File

@@ -62,4 +62,8 @@ void xen_arch_register_cpu(int num);
void xen_arch_unregister_cpu(int num);
#endif
#ifdef CONFIG_PVH
void __init xen_pvh_init(struct boot_params *boot_params);
#endif
#endif /* _ASM_X86_XEN_HYPERVISOR_H */

View File

@@ -22,25 +22,6 @@ static inline int __init pci_xen_initial_domain(void)
return -1;
}
#endif
#ifdef CONFIG_XEN_DOM0
int xen_find_device_domain_owner(struct pci_dev *dev);
int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
static inline int xen_find_device_domain_owner(struct pci_dev *dev)
{
return -1;
}
static inline int xen_register_device_domain_owner(struct pci_dev *dev,
uint16_t domain)
{
return -1;
}
static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
{
return -1;
}
#endif
#if defined(CONFIG_PCI_MSI)
#if defined(CONFIG_PCI_XEN)

View File

@@ -46,6 +46,17 @@ asm (".pushsection .entry.text, \"ax\"\n"
".type _paravirt_nop, @function\n\t"
".popsection");
/* stub always returning 0. */
asm (".pushsection .entry.text, \"ax\"\n"
".global paravirt_ret0\n"
"paravirt_ret0:\n\t"
"xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
"ret\n\t"
".size paravirt_ret0, . - paravirt_ret0\n\t"
".type paravirt_ret0, @function\n\t"
".popsection");
void __init default_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -53,7 +64,7 @@ void __init default_banner(void)
}
/* Undefined instruction for dealing with missing ops pointers. */
static void paravirt_BUG(void)
noinstr void paravirt_BUG(void)
{
BUG();
}

View File

@@ -23,6 +23,7 @@
#include <xen/features.h>
#include <xen/events.h>
#include <xen/pci.h>
#include <asm/xen/pci.h>
#include <asm/xen/cpuid.h>
#include <asm/apic.h>
@@ -585,78 +586,3 @@ int __init pci_xen_initial_domain(void)
}
#endif
#ifdef CONFIG_XEN_DOM0
struct xen_device_domain_owner {
domid_t domain;
struct pci_dev *dev;
struct list_head list;
};
static DEFINE_SPINLOCK(dev_domain_list_spinlock);
static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
{
struct xen_device_domain_owner *owner;
list_for_each_entry(owner, &dev_domain_list, list) {
if (owner->dev == dev)
return owner;
}
return NULL;
}
int xen_find_device_domain_owner(struct pci_dev *dev)
{
struct xen_device_domain_owner *owner;
int domain = -ENODEV;
spin_lock(&dev_domain_list_spinlock);
owner = find_device(dev);
if (owner)
domain = owner->domain;
spin_unlock(&dev_domain_list_spinlock);
return domain;
}
EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
{
struct xen_device_domain_owner *owner;
owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
if (!owner)
return -ENODEV;
spin_lock(&dev_domain_list_spinlock);
if (find_device(dev)) {
spin_unlock(&dev_domain_list_spinlock);
kfree(owner);
return -EEXIST;
}
owner->domain = domain;
owner->dev = dev;
list_add_tail(&owner->list, &dev_domain_list);
spin_unlock(&dev_domain_list_spinlock);
return 0;
}
EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
int xen_unregister_device_domain_owner(struct pci_dev *dev)
{
struct xen_device_domain_owner *owner;
spin_lock(&dev_domain_list_spinlock);
owner = find_device(dev);
if (!owner) {
spin_unlock(&dev_domain_list_spinlock);
return -ENODEV;
}
list_del(&owner->list);
spin_unlock(&dev_domain_list_spinlock);
kfree(owner);
return 0;
}
EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
#endif /* CONFIG_XEN_DOM0 */

View File

@@ -31,25 +31,10 @@ EXPORT_SYMBOL_GPL(hypercall_page);
* Pointer to the xen_vcpu_info structure or
* &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info
* and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
* but if the hypervisor supports VCPUOP_register_vcpu_info then it can point
* to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to
* acknowledge pending events.
* Also more subtly it is used by the patched version of irq enable/disable
* e.g. xen_irq_enable_direct and xen_iret in PV mode.
*
* The desire to be able to do those mask/unmask operations as a single
* instruction by using the per-cpu offset held in %gs is the real reason
* vcpu info is in a per-cpu pointer and the original reason for this
* hypercall.
*
* but during boot it is switched to point to xen_vcpu_info.
* The pointer is used in __xen_evtchn_do_upcall to acknowledge pending events.
*/
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
/*
* Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info
* hypercall. This can be used both in PV and PVHVM mode. The structure
* overrides the default per_cpu(xen_vcpu, cpu) value.
*/
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
/* Linux <-> Xen vCPU id mapping */
@@ -84,21 +69,6 @@ EXPORT_SYMBOL(xen_start_flags);
*/
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
/*
* Flag to determine whether vcpu info placement is available on all
* VCPUs. We assume it is to start with, and then set it to zero on
* the first failure. This is because it can succeed on some VCPUs
* and not others, since it can involve hypervisor memory allocation,
* or because the guest failed to guarantee all the appropriate
* constraints on all VCPUs (ie buffer can't cross a page boundary).
*
* Note that any particular CPU may be using a placed vcpu structure,
* but we can only optimise if the all are.
*
* 0: not available, 1: available
*/
int xen_have_vcpu_info_placement = 1;
static int xen_cpu_up_online(unsigned int cpu)
{
xen_init_lock_cpu(cpu);
@@ -124,10 +94,8 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
return rc >= 0 ? 0 : rc;
}
static int xen_vcpu_setup_restore(int cpu)
static void xen_vcpu_setup_restore(int cpu)
{
int rc = 0;
/* Any per_cpu(xen_vcpu) is stale, so reset it */
xen_vcpu_info_reset(cpu);
@@ -136,11 +104,8 @@ static int xen_vcpu_setup_restore(int cpu)
* be handled by hotplug.
*/
if (xen_pv_domain() ||
(xen_hvm_domain() && cpu_online(cpu))) {
rc = xen_vcpu_setup(cpu);
}
return rc;
(xen_hvm_domain() && cpu_online(cpu)))
xen_vcpu_setup(cpu);
}
/*
@@ -150,7 +115,7 @@ static int xen_vcpu_setup_restore(int cpu)
*/
void xen_vcpu_restore(void)
{
int cpu, rc;
int cpu;
for_each_possible_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
@@ -170,20 +135,9 @@ void xen_vcpu_restore(void)
if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_runstate_info(cpu);
rc = xen_vcpu_setup_restore(cpu);
if (rc)
pr_emerg_once("vcpu restore failed for cpu=%d err=%d. "
"System will hang.\n", cpu, rc);
/*
* In case xen_vcpu_setup_restore() fails, do not bring up the
* VCPU. This helps us avoid the resulting OOPS when the VCPU
* accesses pvclock_vcpu_time via xen_vcpu (which is NULL.)
* Note that this does not improve the situation much -- now the
* VM hangs instead of OOPSing -- with the VCPUs that did not
* fail, spinning in stop_machine(), waiting for the failed
* VCPUs to come up.
*/
if (other_cpu && is_up && (rc == 0) &&
xen_vcpu_setup_restore(cpu);
if (other_cpu && is_up &&
HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
BUG();
}
@@ -200,7 +154,7 @@ void xen_vcpu_info_reset(int cpu)
}
}
int xen_vcpu_setup(int cpu)
void xen_vcpu_setup(int cpu)
{
struct vcpu_register_vcpu_info info;
int err;
@@ -221,44 +175,26 @@ int xen_vcpu_setup(int cpu)
*/
if (xen_hvm_domain()) {
if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
return 0;
return;
}
if (xen_have_vcpu_info_placement) {
vcpup = &per_cpu(xen_vcpu_info, cpu);
info.mfn = arbitrary_virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
vcpup = &per_cpu(xen_vcpu_info, cpu);
info.mfn = arbitrary_virt_to_mfn(vcpup);
info.offset = offset_in_page(vcpup);
/*
* Check to see if the hypervisor will put the vcpu_info
* structure where we want it, which allows direct access via
* a percpu-variable.
* N.B. This hypercall can _only_ be called once per CPU.
* Subsequent calls will error out with -EINVAL. This is due to
* the fact that hypervisor has no unregister variant and this
* hypercall does not allow to over-write info.mfn and
* info.offset.
*/
err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info,
xen_vcpu_nr(cpu), &info);
/*
* N.B. This hypercall can _only_ be called once per CPU.
* Subsequent calls will error out with -EINVAL. This is due to
* the fact that hypervisor has no unregister variant and this
* hypercall does not allow to over-write info.mfn and
* info.offset.
*/
err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
&info);
if (err)
panic("register_vcpu_info failed: cpu=%d err=%d\n", cpu, err);
if (err) {
pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n",
cpu, err);
xen_have_vcpu_info_placement = 0;
} else {
/*
* This cpu is using the registered vcpu info, even if
* later ones fail to.
*/
per_cpu(xen_vcpu, cpu) = vcpup;
}
}
if (!xen_have_vcpu_info_placement)
xen_vcpu_info_reset(cpu);
return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0);
per_cpu(xen_vcpu, cpu) = vcpup;
}
void __init xen_banner(void)

View File

@@ -163,9 +163,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
else
per_cpu(xen_vcpu_id, cpu) = cpu;
rc = xen_vcpu_setup(cpu);
if (rc || !xen_have_vector_callback)
return rc;
xen_vcpu_setup(cpu);
if (!xen_have_vector_callback)
return 0;
if (xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);

View File

@@ -27,7 +27,6 @@
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/highmem.h>
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/edd.h>
@@ -993,31 +992,13 @@ void __init xen_setup_vcpu_info_placement(void)
for_each_possible_cpu(cpu) {
/* Set up direct vCPU id mapping for PV guests. */
per_cpu(xen_vcpu_id, cpu) = cpu;
/*
* xen_vcpu_setup(cpu) can fail -- in which case it
* falls back to the shared_info version for cpus
* where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
*
* xen_cpu_up_prepare_pv() handles the rest by failing
* them in hotplug.
*/
(void) xen_vcpu_setup(cpu);
xen_vcpu_setup(cpu);
}
/*
* xen_vcpu_setup managed to place the vcpu_info within the
* percpu area for all cpus, so make use of it.
*/
if (xen_have_vcpu_info_placement) {
pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_ops.irq.irq_disable =
__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
pv_ops.irq.irq_enable =
__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
pv_ops.mmu.read_cr2 =
__PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
pv_ops.irq.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
pv_ops.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
static const struct pv_info xen_info __initconst = {
@@ -1247,12 +1228,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
__supported_pte_mask &= ~_PAGE_GLOBAL;
__default_kernel_pte_mask &= ~_PAGE_GLOBAL;
/*
* Prevent page tables from being allocated in highmem, even
* if CONFIG_HIGHPTE is enabled.
*/
__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
/* Get mfn list */
xen_build_dynamic_phys_to_machine();

View File

@@ -24,60 +24,6 @@ noinstr void xen_force_evtchn_callback(void)
(void)HYPERVISOR_xen_version(0, NULL);
}
asmlinkage __visible noinstr unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = this_cpu_read(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
-1 -> 0xffffffff
*/
return (-flags) & X86_EFLAGS_IF;
}
__PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl, ".noinstr.text");
asmlinkage __visible noinstr void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable, ".noinstr.text");
asmlinkage __visible noinstr void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
/*
* We may be preempted as soon as vcpu->evtchn_upcall_mask is
* cleared, so disable preemption to ensure we check for
* events on the VCPU we are still running on.
*/
preempt_disable();
vcpu = this_cpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
xen_force_evtchn_callback();
preempt_enable();
}
__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable, ".noinstr.text");
static void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
@@ -96,10 +42,10 @@ static void xen_halt(void)
static const typeof(pv_ops) xen_irq_ops __initconst = {
.irq = {
.save_fl = PV_CALLEE_SAVE(xen_save_fl),
.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
/* Initial interrupt flag handling only called while interrupts off. */
.save_fl = __PV_IS_CALLEE_SAVE(paravirt_ret0),
.irq_disable = __PV_IS_CALLEE_SAVE(paravirt_nop),
.irq_enable = __PV_IS_CALLEE_SAVE(paravirt_BUG),
.safe_halt = xen_safe_halt,
.halt = xen_halt,

View File

@@ -41,7 +41,6 @@
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
#include <linux/sched/mm.h>
#include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/bug.h>
#include <linux/vmalloc.h>
@@ -86,8 +85,10 @@
#include "mmu.h"
#include "debugfs.h"
#ifdef CONFIG_X86_VSYSCALL_EMULATION
/* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
#endif
/*
* Protects atomic reservation decrease/increase against concurrent increases.
@@ -241,9 +242,11 @@ static void xen_set_pmd(pmd_t *ptr, pmd_t val)
* Associate a virtual page frame with a given physical page frame
* and protection flags for that frame.
*/
void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
void __init set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
{
set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
if (HYPERVISOR_update_va_mapping(vaddr, mfn_pte(mfn, flags),
UVMF_INVLPG))
BUG();
}
static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
@@ -789,7 +792,9 @@ static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
static void __init xen_after_bootmem(void)
{
static_branch_enable(&xen_struct_pages_ready);
#ifdef CONFIG_X86_VSYSCALL_EMULATION
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
@@ -1192,6 +1197,13 @@ static void __init xen_pagetable_p2m_setup(void)
static void __init xen_pagetable_init(void)
{
/*
* The majority of further PTE writes is to pagetables already
* announced as such to Xen. Hence it is more efficient to use
* hypercalls for these updates.
*/
pv_ops.mmu.set_pte = __xen_set_pte;
paging_init();
xen_post_allocator_init();
@@ -1421,10 +1433,18 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
*
* Many of these PTE updates are done on unpinned and writable pages
* and doing a hypercall for these is unnecessary and expensive. At
* this point it is not possible to tell if a page is pinned or not,
* so always write the PTE directly and rely on Xen trapping and
* this point it is rarely possible to tell if a page is pinned, so
* mostly write the PTE directly and rely on Xen trapping and
* emulating any updates as necessary.
*/
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
{
if (unlikely(is_early_ioremap_ptep(ptep)))
__xen_set_pte(ptep, pte);
else
native_set_pte(ptep, pte);
}
__visible pte_t xen_make_pte_init(pteval_t pte)
{
unsigned long pfn;
@@ -1446,11 +1466,6 @@ __visible pte_t xen_make_pte_init(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
{
__xen_set_pte(ptep, pte);
}
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1750,7 +1765,6 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
@@ -1767,6 +1781,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Unpin Xen-provided one */
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
#ifdef CONFIG_X86_VSYSCALL_EMULATION
/* Pin user vsyscall L3 */
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
PFN_DOWN(__pa_symbol(level3_user_vsyscall)));
#endif
/*
* At this stage there can be no user pgd, and no page structure to
* attach it to, so make sure we just set kernel pgd.
@@ -1999,6 +2020,7 @@ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
pte_t pte;
unsigned long vaddr;
phys >>= PAGE_SHIFT;
@@ -2039,15 +2061,15 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
break;
}
__native_set_fixmap(idx, pte);
vaddr = __fix_to_virt(idx);
if (HYPERVISOR_update_va_mapping(vaddr, pte, UVMF_INVLPG))
BUG();
#ifdef CONFIG_X86_VSYSCALL_EMULATION
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
if (idx == VSYSCALL_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
if (idx == VSYSCALL_PAGE)
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
#endif
}

View File

@@ -306,10 +306,6 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
BUG();
}
/* Update kernel mapping, but not for highmem. */
if (pfn >= PFN_UP(__pa(high_memory - 1)))
return;
if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(mfn, PAGE_KERNEL), 0)) {
WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
@@ -429,13 +425,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
}
/*
* If the PFNs are currently mapped, the VA mapping also needs
* to be updated to be 1:1.
* If the PFNs are currently mapped, their VA mappings need to be
* zapped.
*/
for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
(void)HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(pfn, PAGE_KERNEL_IO), 0);
native_make_pte(0), 0);
return remap_pfn;
}

View File

@@ -121,34 +121,10 @@ int xen_smp_intr_init(unsigned int cpu)
void __init xen_smp_cpus_done(unsigned int max_cpus)
{
int cpu, rc, count = 0;
if (xen_hvm_domain())
native_smp_cpus_done(max_cpus);
else
calculate_max_logical_packages();
if (xen_have_vcpu_info_placement)
return;
for_each_online_cpu(cpu) {
if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
continue;
rc = remove_cpu(cpu);
if (rc == 0) {
/*
* Reset vcpu_info so this cpu cannot be onlined again.
*/
xen_vcpu_info_reset(cpu);
count++;
} else {
pr_warn("%s: failed to bring CPU %d down, error %d\n",
__func__, cpu, rc);
}
}
WARN(count, "%s: brought %d CPUs offline\n", __func__, count);
}
void xen_smp_send_reschedule(int cpu)
@@ -268,20 +244,16 @@ void xen_send_IPI_allbutself(int vector)
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_interrupt();
inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
}
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_single_interrupt();
inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
}

View File

@@ -458,10 +458,8 @@ static void xen_pv_stop_other_cpus(int wait)
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
{
irq_enter();
irq_work_run();
inc_irq_stat(apic_irq_work_irqs);
irq_exit();
return IRQ_HANDLED;
}

View File

@@ -45,13 +45,13 @@ SYM_CODE_START(startup_xen)
/* Clear .bss */
xor %eax,%eax
mov $__bss_start, %_ASM_DI
mov $__bss_stop, %_ASM_CX
sub %_ASM_DI, %_ASM_CX
shr $__ASM_SEL(2, 3), %_ASM_CX
rep __ASM_SIZE(stos)
mov $__bss_start, %rdi
mov $__bss_stop, %rcx
sub %rdi, %rcx
shr $3, %rcx
rep stosq
mov %_ASM_SI, xen_start_info
mov %rsi, xen_start_info
mov initial_stack(%rip), %rsp
/* Set up %gs.

View File

@@ -76,9 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu);
extern int xen_have_vcpu_info_placement;
int xen_vcpu_setup(int cpu);
void xen_vcpu_setup(int cpu);
void xen_vcpu_info_reset(int cpu);
void xen_setup_vcpu_info_placement(void);

Some files were not shown because too many files have changed in this diff Show More