You've already forked linux-rockchip
mirror of
https://github.com/armbian/linux-rockchip.git
synced 2026-01-06 11:08:10 -08:00
x86/xen: remove 32-bit Xen PV guest support
Xen is requiring 64-bit machines today and since Xen 4.14 it can be built without 32-bit PV guest support. There is no need to carry the burden of 32-bit PV guest support in the kernel any longer, as new guests can be either HVM or PVH, or they can use a 64 bit kernel. Remove the 32-bit Xen PV support from the kernel. Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Signed-off-by: Juergen Gross <jgross@suse.com>
This commit is contained in:
@@ -449,8 +449,6 @@
|
||||
|
||||
.macro SWITCH_TO_KERNEL_STACK
|
||||
|
||||
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
|
||||
|
||||
BUG_IF_WRONG_CR3
|
||||
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
|
||||
@@ -599,8 +597,6 @@
|
||||
*/
|
||||
.macro SWITCH_TO_ENTRY_STACK
|
||||
|
||||
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
|
||||
|
||||
/* Bytes to copy */
|
||||
movl $PTREGS_SIZE, %ecx
|
||||
|
||||
@@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
|
||||
* will ignore all of the single-step traps generated in this range.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_XEN_PV
|
||||
/*
|
||||
* Xen doesn't set %esp to be precisely what the normal SYSENTER
|
||||
* entry point expects, so fix it up before using the normal path.
|
||||
*/
|
||||
SYM_CODE_START(xen_sysenter_target)
|
||||
addl $5*4, %esp /* remove xen-provided frame */
|
||||
jmp .Lsysenter_past_esp
|
||||
SYM_CODE_END(xen_sysenter_target)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER entry.
|
||||
*
|
||||
@@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
|
||||
|
||||
movl %esp, %eax
|
||||
call do_SYSENTER_32
|
||||
/* XEN PV guests always use IRET path */
|
||||
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
|
||||
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
|
||||
testl %eax, %eax
|
||||
jz .Lsyscall_32_done
|
||||
|
||||
STACKLEAK_ERASE
|
||||
|
||||
@@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32)
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
SYM_CODE_START(native_iret)
|
||||
iret
|
||||
_ASM_EXTABLE(native_iret, asm_iret_error)
|
||||
SYM_CODE_END(native_iret)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_XEN_PV
|
||||
/*
|
||||
* See comment in entry_64.S for further explanation
|
||||
*
|
||||
* Note: This is not an actual IDT entry point. It's a XEN specific entry
|
||||
* point and therefore named to match the 64-bit trampoline counterpart.
|
||||
*/
|
||||
SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback)
|
||||
/*
|
||||
* Check to see if we got the event in the critical
|
||||
* region in xen_iret_direct, after we've reenabled
|
||||
* events and checked for pending events. This simulates
|
||||
* iret instruction's behaviour where it delivers a
|
||||
* pending interrupt when enabling interrupts:
|
||||
*/
|
||||
cmpl $xen_iret_start_crit, (%esp)
|
||||
jb 1f
|
||||
cmpl $xen_iret_end_crit, (%esp)
|
||||
jae 1f
|
||||
call xen_iret_crit_fixup
|
||||
1:
|
||||
pushl $-1 /* orig_ax = -1 => not a system call */
|
||||
SAVE_ALL
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
mov %esp, %eax
|
||||
call xen_pv_evtchn_do_upcall
|
||||
jmp handle_exception_return
|
||||
SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback)
|
||||
|
||||
/*
|
||||
* Hypervisor uses this for application faults while it executes.
|
||||
* We get here for two reasons:
|
||||
* 1. Fault while reloading DS, ES, FS or GS
|
||||
* 2. Fault while executing IRET
|
||||
* Category 1 we fix up by reattempting the load, and zeroing the segment
|
||||
* register if the load fails.
|
||||
* Category 2 we fix up by jumping to do_iret_error. We cannot use the
|
||||
* normal Linux return path in this case because if we use the IRET hypercall
|
||||
* to pop the stack frame we end up in an infinite loop of failsafe callbacks.
|
||||
* We distinguish between categories by maintaining a status value in EAX.
|
||||
*/
|
||||
SYM_FUNC_START(xen_failsafe_callback)
|
||||
pushl %eax
|
||||
movl $1, %eax
|
||||
1: mov 4(%esp), %ds
|
||||
2: mov 8(%esp), %es
|
||||
3: mov 12(%esp), %fs
|
||||
4: mov 16(%esp), %gs
|
||||
/* EAX == 0 => Category 1 (Bad segment)
|
||||
EAX != 0 => Category 2 (Bad IRET) */
|
||||
testl %eax, %eax
|
||||
popl %eax
|
||||
lea 16(%esp), %esp
|
||||
jz 5f
|
||||
jmp asm_iret_error
|
||||
5: pushl $-1 /* orig_ax = -1 => not a system call */
|
||||
SAVE_ALL
|
||||
ENCODE_FRAME_POINTER
|
||||
jmp handle_exception_return
|
||||
|
||||
.section .fixup, "ax"
|
||||
6: xorl %eax, %eax
|
||||
movl %eax, 4(%esp)
|
||||
jmp 1b
|
||||
7: xorl %eax, %eax
|
||||
movl %eax, 8(%esp)
|
||||
jmp 2b
|
||||
8: xorl %eax, %eax
|
||||
movl %eax, 12(%esp)
|
||||
jmp 3b
|
||||
9: xorl %eax, %eax
|
||||
movl %eax, 16(%esp)
|
||||
jmp 4b
|
||||
.previous
|
||||
_ASM_EXTABLE(1b, 6b)
|
||||
_ASM_EXTABLE(2b, 7b)
|
||||
_ASM_EXTABLE(3b, 8b)
|
||||
_ASM_EXTABLE(4b, 9b)
|
||||
SYM_FUNC_END(xen_failsafe_callback)
|
||||
#endif /* CONFIG_XEN_PV */
|
||||
|
||||
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
|
||||
/* the function address is in %gs's slot on the stack */
|
||||
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
|
||||
|
||||
@@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a")
|
||||
ELFNOTE_END
|
||||
|
||||
BUILD_SALT
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Add a special note telling glibc's dynamic linker a fake hardware
|
||||
* flavor that it will use to choose the search path for libraries in the
|
||||
* same way it uses real hardware capabilities like "mmx".
|
||||
* We supply "nosegneg" as the fake capability, to indicate that we
|
||||
* do not like negative offsets in instructions using segment overrides,
|
||||
* since we implement those inefficiently. This makes it possible to
|
||||
* install libraries optimized to avoid those access patterns in someplace
|
||||
* like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
|
||||
* corresponding to the bits here is needed to make ldconfig work right.
|
||||
* It should contain:
|
||||
* hwcap 1 nosegneg
|
||||
* to match the mapping of bit to name that we give here.
|
||||
*
|
||||
* At runtime, the fake hardware feature will be considered to be present
|
||||
* if its bit is set in the mask word. So, we start with the mask 0, and
|
||||
* at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
|
||||
*/
|
||||
|
||||
#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
|
||||
|
||||
ELFNOTE_START(GNU, 2, "a")
|
||||
.long 1 /* ncaps */
|
||||
VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
|
||||
.long 0 /* mask */
|
||||
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
|
||||
ELFNOTE_END
|
||||
#endif
|
||||
|
||||
@@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void);
|
||||
void __end_entry_SYSENTER_compat(void);
|
||||
void entry_SYSCALL_compat(void);
|
||||
void entry_INT80_compat(void);
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
#ifdef CONFIG_XEN_PV
|
||||
void xen_entry_INT80_compat(void);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
|
||||
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
|
||||
extern void early_ignore_irq(void);
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
#ifdef CONFIG_XEN_PV
|
||||
extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
|
||||
#endif
|
||||
|
||||
|
||||
@@ -134,38 +134,7 @@ SYM_CODE_START(startup_32)
|
||||
movl %eax,pa(initial_page_table+0xffc)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
/* This is can only trip for a broken bootloader... */
|
||||
cmpw $0x207, pa(boot_params + BP_version)
|
||||
jb .Ldefault_entry
|
||||
|
||||
/* Paravirt-compatible boot parameters. Look to see what architecture
|
||||
we're booting under. */
|
||||
movl pa(boot_params + BP_hardware_subarch), %eax
|
||||
cmpl $num_subarch_entries, %eax
|
||||
jae .Lbad_subarch
|
||||
|
||||
movl pa(subarch_entries)(,%eax,4), %eax
|
||||
subl $__PAGE_OFFSET, %eax
|
||||
jmp *%eax
|
||||
|
||||
.Lbad_subarch:
|
||||
SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
|
||||
/* Unknown implementation; there's really
|
||||
nothing we can do at this point. */
|
||||
ud2a
|
||||
|
||||
__INITDATA
|
||||
|
||||
subarch_entries:
|
||||
.long .Ldefault_entry /* normal x86/PC */
|
||||
.long xen_entry /* Xen hypervisor */
|
||||
.long .Ldefault_entry /* Moorestown MID */
|
||||
num_subarch_entries = (. - subarch_entries) / 4
|
||||
.previous
|
||||
#else
|
||||
jmp .Ldefault_entry
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
SYM_CODE_END(startup_32)
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
@@ -19,6 +19,7 @@ config XEN_PV
|
||||
bool "Xen PV guest support"
|
||||
default y
|
||||
depends on XEN
|
||||
depends on X86_64
|
||||
select PARAVIRT_XXL
|
||||
select XEN_HAVE_PVMMU
|
||||
select XEN_HAVE_VPMU
|
||||
@@ -50,7 +51,7 @@ config XEN_PVHVM_SMP
|
||||
|
||||
config XEN_512GB
|
||||
bool "Limit Xen pv-domain memory to 512GB"
|
||||
depends on XEN_PV && X86_64
|
||||
depends on XEN_PV
|
||||
default y
|
||||
help
|
||||
Limit paravirtualized user domains to 512GB of RAM.
|
||||
|
||||
@@ -58,10 +58,6 @@ static u32 xen_apic_read(u32 reg)
|
||||
|
||||
if (reg == APIC_LVR)
|
||||
return 0x14;
|
||||
#ifdef CONFIG_X86_32
|
||||
if (reg == APIC_LDR)
|
||||
return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
|
||||
#endif
|
||||
if (reg != APIC_ID)
|
||||
return 0;
|
||||
|
||||
@@ -127,14 +123,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
|
||||
return initial_apic_id >> index_msb;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static int xen_x86_32_early_logical_apicid(int cpu)
|
||||
{
|
||||
/* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
|
||||
return 1 << cpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void xen_noop(void)
|
||||
{
|
||||
}
|
||||
@@ -197,11 +185,6 @@ static struct apic xen_pv_apic = {
|
||||
.icr_write = xen_apic_icr_write,
|
||||
.wait_icr_idle = xen_noop,
|
||||
.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* generic_processor_info and setup_local_APIC. */
|
||||
.x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid,
|
||||
#endif
|
||||
};
|
||||
|
||||
static void __init xen_apic_check(void)
|
||||
|
||||
@@ -119,14 +119,6 @@ static void __init xen_banner(void)
|
||||
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
|
||||
version >> 16, version & 0xffff, extra.extraversion,
|
||||
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
|
||||
"Support for running as 32-bit PV-guest under Xen will soon be removed\n"
|
||||
"from the Linux kernel!\n"
|
||||
"Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
|
||||
"WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __init xen_pv_init_platform(void)
|
||||
@@ -538,30 +530,12 @@ static void load_TLS_descriptor(struct thread_struct *t,
|
||||
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
{
|
||||
/*
|
||||
* XXX sleazy hack: If we're being called in a lazy-cpu zone
|
||||
* and lazy gs handling is enabled, it means we're in a
|
||||
* context switch, and %gs has just been saved. This means we
|
||||
* can zero it out to prevent faults on exit from the
|
||||
* hypervisor if the next process has no %gs. Either way, it
|
||||
* has been saved, and the new value will get loaded properly.
|
||||
* This will go away as soon as Xen has been modified to not
|
||||
* save/restore %gs for normal hypercalls.
|
||||
*
|
||||
* On x86_64, this hack is not used for %gs, because gs points
|
||||
* to KERNEL_GS_BASE (and uses it for PDA references), so we
|
||||
* must not zero %gs on x86_64
|
||||
*
|
||||
* For x86_64, we need to zero %fs, otherwise we may get an
|
||||
* In lazy mode we need to zero %fs, otherwise we may get an
|
||||
* exception between the new %fs descriptor being loaded and
|
||||
* %fs being effectively cleared at __switch_to().
|
||||
*/
|
||||
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
|
||||
#ifdef CONFIG_X86_32
|
||||
lazy_load_gs(0);
|
||||
#else
|
||||
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
|
||||
loadsegment(fs, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
xen_mc_batch();
|
||||
|
||||
@@ -572,13 +546,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void xen_load_gs_index(unsigned int idx)
|
||||
{
|
||||
if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
|
||||
const void *ptr)
|
||||
@@ -597,7 +569,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
void noist_exc_debug(struct pt_regs *regs);
|
||||
|
||||
DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
|
||||
@@ -697,7 +668,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
struct trap_info *info)
|
||||
@@ -710,10 +680,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
|
||||
info->vector = vector;
|
||||
|
||||
addr = gate_offset(val);
|
||||
#ifdef CONFIG_X86_64
|
||||
if (!get_trap_addr((void **)&addr, val->bits.ist))
|
||||
return 0;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
info->address = addr;
|
||||
|
||||
info->cs = gate_segment(val);
|
||||
@@ -958,15 +926,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
|
||||
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
|
||||
{
|
||||
int ret;
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned int which;
|
||||
u64 base;
|
||||
#endif
|
||||
|
||||
ret = 0;
|
||||
|
||||
switch (msr) {
|
||||
#ifdef CONFIG_X86_64
|
||||
case MSR_FS_BASE: which = SEGBASE_FS; goto set;
|
||||
case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
|
||||
case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
|
||||
@@ -976,7 +941,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
|
||||
if (HYPERVISOR_set_segment_base(which, base) != 0)
|
||||
ret = -EIO;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case MSR_STAR:
|
||||
case MSR_CSTAR:
|
||||
@@ -1058,9 +1022,7 @@ void __init xen_setup_vcpu_info_placement(void)
|
||||
static const struct pv_info xen_info __initconst = {
|
||||
.shared_kernel_pmd = 0,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
.extra_user_64bit_cs = FLAT_USER_CS64,
|
||||
#endif
|
||||
.name = "Xen",
|
||||
};
|
||||
|
||||
@@ -1086,18 +1048,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.read_pmc = xen_read_pmc,
|
||||
|
||||
.iret = xen_iret,
|
||||
#ifdef CONFIG_X86_64
|
||||
.usergs_sysret64 = xen_sysret64,
|
||||
#endif
|
||||
|
||||
.load_tr_desc = paravirt_nop,
|
||||
.set_ldt = xen_set_ldt,
|
||||
.load_gdt = xen_load_gdt,
|
||||
.load_idt = xen_load_idt,
|
||||
.load_tls = xen_load_tls,
|
||||
#ifdef CONFIG_X86_64
|
||||
.load_gs_index = xen_load_gs_index,
|
||||
#endif
|
||||
|
||||
.alloc_ldt = xen_alloc_ldt,
|
||||
.free_ldt = xen_free_ldt,
|
||||
@@ -1364,15 +1322,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
||||
|
||||
/* keep using Xen gdt for now; no urgent need to change it */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
pv_info.kernel_rpl = 1;
|
||||
if (xen_feature(XENFEAT_supervisor_mode_kernel))
|
||||
pv_info.kernel_rpl = 0;
|
||||
#else
|
||||
pv_info.kernel_rpl = 0;
|
||||
#endif
|
||||
/* set the limit of our address space */
|
||||
xen_reserve_top();
|
||||
|
||||
/*
|
||||
* We used to do this in xen_arch_setup, but that is too late
|
||||
@@ -1384,12 +1334,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
||||
if (rc != 0)
|
||||
xen_raw_printk("physdev_op failed %d\n", rc);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* set up basic CPUID stuff */
|
||||
cpu_detect(&new_cpu_data);
|
||||
set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
|
||||
new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
|
||||
#endif
|
||||
|
||||
if (xen_start_info->mod_start) {
|
||||
if (xen_start_info->flags & SIF_MOD_START_PFN)
|
||||
@@ -1458,12 +1402,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
||||
xen_efi_init(&boot_params);
|
||||
|
||||
/* Start the world */
|
||||
#ifdef CONFIG_X86_32
|
||||
i386_start_kernel();
|
||||
#else
|
||||
cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
|
||||
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
|
||||
#endif
|
||||
}
|
||||
|
||||
static int xen_cpu_up_prepare_pv(unsigned int cpu)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
|
||||
|
||||
if (type == P2M_TYPE_PFN || i < chunk) {
|
||||
/* Use initial p2m page contents. */
|
||||
#ifdef CONFIG_X86_64
|
||||
mfns = alloc_p2m_page();
|
||||
copy_page(mfns, xen_p2m_addr + pfn);
|
||||
#else
|
||||
mfns = xen_p2m_addr + pfn;
|
||||
#endif
|
||||
ptep = populate_extra_pte((unsigned long)(p2m + pfn));
|
||||
set_pte(ptep,
|
||||
pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
|
||||
@@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
|
||||
* Allocate new pmd(s). It is checked whether the old pmd is still in place.
|
||||
* If not, nothing is changed. This is okay as the only reason for allocating
|
||||
* a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
|
||||
* pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
|
||||
* pmd.
|
||||
*/
|
||||
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
|
||||
{
|
||||
|
||||
@@ -32,7 +32,6 @@
|
||||
#include <xen/features.h>
|
||||
#include <xen/hvc-console.h>
|
||||
#include "xen-ops.h"
|
||||
#include "vdso.h"
|
||||
#include "mmu.h"
|
||||
|
||||
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
|
||||
@@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void)
|
||||
{
|
||||
unsigned long limit;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
limit = GB(64) / PAGE_SIZE;
|
||||
#else
|
||||
limit = MAXMEM / PAGE_SIZE;
|
||||
if (!xen_initial_domain() && xen_512gb_limit)
|
||||
limit = GB(512) / PAGE_SIZE;
|
||||
#endif
|
||||
|
||||
return limit;
|
||||
}
|
||||
|
||||
@@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void)
|
||||
if (!xen_is_e820_reserved(start, size))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Relocating the p2m on 32 bit system to an arbitrary virtual address
|
||||
* is not supported, so just give up.
|
||||
*/
|
||||
xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
|
||||
BUG();
|
||||
#else
|
||||
xen_relocate_p2m();
|
||||
memblock_free(start, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -921,20 +908,6 @@ char * __init xen_memory_setup(void)
|
||||
return "Xen";
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the bit indicating "nosegneg" library variants should be used.
|
||||
* We only need to bother in pure 32-bit mode; compat 32-bit processes
|
||||
* can have un-truncated segments, so wrapping around is allowed.
|
||||
*/
|
||||
static void __init fiddle_vdso(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
u32 *mask = vdso_image_32.data +
|
||||
vdso_image_32.sym_VDSO32_NOTE_MASK;
|
||||
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int register_callback(unsigned type, const void *func)
|
||||
{
|
||||
struct callback_register callback = {
|
||||
@@ -951,11 +924,7 @@ void xen_enable_sysenter(void)
|
||||
int ret;
|
||||
unsigned sysenter_feature;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
sysenter_feature = X86_FEATURE_SEP;
|
||||
#else
|
||||
sysenter_feature = X86_FEATURE_SYSENTER32;
|
||||
#endif
|
||||
|
||||
if (!boot_cpu_has(sysenter_feature))
|
||||
return;
|
||||
@@ -967,7 +936,6 @@ void xen_enable_sysenter(void)
|
||||
|
||||
void xen_enable_syscall(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
int ret;
|
||||
|
||||
ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
|
||||
@@ -983,7 +951,6 @@ void xen_enable_syscall(void)
|
||||
if (ret != 0)
|
||||
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
}
|
||||
|
||||
static void __init xen_pvmmu_arch_setup(void)
|
||||
@@ -1024,7 +991,6 @@ void __init xen_arch_setup(void)
|
||||
disable_cpuidle();
|
||||
disable_cpufreq();
|
||||
WARN_ON(xen_set_default_idle());
|
||||
fiddle_vdso();
|
||||
#ifdef CONFIG_NUMA
|
||||
numa_off = 1;
|
||||
#endif
|
||||
|
||||
@@ -212,15 +212,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
|
||||
* sure the old memory can be recycled. */
|
||||
make_lowmem_page_readwrite(xen_initial_gdt);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Xen starts us with XEN_FLAT_RING1_DS, but linux code
|
||||
* expects __USER_DS
|
||||
*/
|
||||
loadsegment(ds, __USER_DS);
|
||||
loadsegment(es, __USER_DS);
|
||||
#endif
|
||||
|
||||
xen_filter_cpu_maps();
|
||||
xen_setup_vcpu_info_placement();
|
||||
|
||||
@@ -301,10 +292,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
|
||||
gdt = get_cpu_gdt_rw(cpu);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
|
||||
#endif
|
||||
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
||||
|
||||
/*
|
||||
@@ -342,12 +329,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
||||
ctxt->kernel_ss = __KERNEL_DS;
|
||||
ctxt->kernel_sp = task_top_of_stack(idle);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ctxt->event_callback_cs = __KERNEL_CS;
|
||||
ctxt->failsafe_callback_cs = __KERNEL_CS;
|
||||
#else
|
||||
ctxt->gs_base_kernel = per_cpu_offset(cpu);
|
||||
#endif
|
||||
ctxt->event_callback_eip =
|
||||
(unsigned long)xen_asm_exc_xen_hypervisor_callback;
|
||||
ctxt->failsafe_callback_eip =
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
/* Bit used for the pseudo-hwcap for non-negative segments. We use
|
||||
bit 1 to avoid bugs in some versions of glibc when bit 0 is
|
||||
used; the choice is otherwise arbitrary. */
|
||||
#define VDSO_NOTE_NONEGSEG_BIT 1
|
||||
@@ -76,11 +76,7 @@ SYM_FUNC_END(xen_save_fl_direct)
|
||||
*/
|
||||
SYM_FUNC_START(xen_restore_fl_direct)
|
||||
FRAME_BEGIN
|
||||
#ifdef CONFIG_X86_64
|
||||
testw $X86_EFLAGS_IF, %di
|
||||
#else
|
||||
testb $X86_EFLAGS_IF>>8, %ah
|
||||
#endif
|
||||
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
|
||||
/*
|
||||
* Preempt here doesn't matter because that will deal with any
|
||||
@@ -104,15 +100,6 @@ SYM_FUNC_END(xen_restore_fl_direct)
|
||||
*/
|
||||
SYM_FUNC_START(check_events)
|
||||
FRAME_BEGIN
|
||||
#ifdef CONFIG_X86_32
|
||||
push %eax
|
||||
push %ecx
|
||||
push %edx
|
||||
call xen_force_evtchn_callback
|
||||
pop %edx
|
||||
pop %ecx
|
||||
pop %eax
|
||||
#else
|
||||
push %rax
|
||||
push %rcx
|
||||
push %rdx
|
||||
@@ -132,7 +119,6 @@ SYM_FUNC_START(check_events)
|
||||
pop %rdx
|
||||
pop %rcx
|
||||
pop %rax
|
||||
#endif
|
||||
FRAME_END
|
||||
ret
|
||||
SYM_FUNC_END(check_events)
|
||||
|
||||
@@ -1,185 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Asm versions of Xen pv-ops, suitable for direct use.
|
||||
*
|
||||
* We only bother with direct forms (ie, vcpu in pda) of the
|
||||
* operations here; the indirect forms are better handled in C.
|
||||
*/
|
||||
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/asm.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
|
||||
#define XEN_EFLAGS_NMI 0x80000000
|
||||
|
||||
/*
|
||||
* This is run where a normal iret would be run, with the same stack setup:
|
||||
* 8: eflags
|
||||
* 4: cs
|
||||
* esp-> 0: eip
|
||||
*
|
||||
* This attempts to make sure that any pending events are dealt with
|
||||
* on return to usermode, but there is a small window in which an
|
||||
* event can happen just before entering usermode. If the nested
|
||||
* interrupt ends up setting one of the TIF_WORK_MASK pending work
|
||||
* flags, they will not be tested again before returning to
|
||||
* usermode. This means that a process can end up with pending work,
|
||||
* which will be unprocessed until the process enters and leaves the
|
||||
* kernel again, which could be an unbounded amount of time. This
|
||||
* means that a pending signal or reschedule event could be
|
||||
* indefinitely delayed.
|
||||
*
|
||||
* The fix is to notice a nested interrupt in the critical window, and
|
||||
* if one occurs, then fold the nested interrupt into the current
|
||||
* interrupt stack frame, and re-process it iteratively rather than
|
||||
* recursively. This means that it will exit via the normal path, and
|
||||
* all pending work will be dealt with appropriately.
|
||||
*
|
||||
* Because the nested interrupt handler needs to deal with the current
|
||||
* stack state in whatever form its in, we keep things simple by only
|
||||
* using a single register which is pushed/popped on the stack.
|
||||
*/
|
||||
|
||||
.macro POP_FS
|
||||
1:
|
||||
popw %fs
|
||||
.pushsection .fixup, "ax"
|
||||
2: movw $0, (%esp)
|
||||
jmp 1b
|
||||
.popsection
|
||||
_ASM_EXTABLE(1b,2b)
|
||||
.endm
|
||||
|
||||
SYM_CODE_START(xen_iret)
|
||||
/* test eflags for special cases */
|
||||
testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
|
||||
jnz hyper_iret
|
||||
|
||||
push %eax
|
||||
ESP_OFFSET=4 # bytes pushed onto stack
|
||||
|
||||
/* Store vcpu_info pointer for easy access */
|
||||
#ifdef CONFIG_SMP
|
||||
pushw %fs
|
||||
movl $(__KERNEL_PERCPU), %eax
|
||||
movl %eax, %fs
|
||||
movl %fs:xen_vcpu, %eax
|
||||
POP_FS
|
||||
#else
|
||||
movl %ss:xen_vcpu, %eax
|
||||
#endif
|
||||
|
||||
/* check IF state we're restoring */
|
||||
testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
|
||||
|
||||
/*
|
||||
* Maybe enable events. Once this happens we could get a
|
||||
* recursive event, so the critical region starts immediately
|
||||
* afterwards. However, if that happens we don't end up
|
||||
* resuming the code, so we don't have to be worried about
|
||||
* being preempted to another CPU.
|
||||
*/
|
||||
setz %ss:XEN_vcpu_info_mask(%eax)
|
||||
xen_iret_start_crit:
|
||||
|
||||
/* check for unmasked and pending */
|
||||
cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
|
||||
|
||||
/*
|
||||
* If there's something pending, mask events again so we can
|
||||
* jump back into exc_xen_hypervisor_callback. Otherwise do not
|
||||
* touch XEN_vcpu_info_mask.
|
||||
*/
|
||||
jne 1f
|
||||
movb $1, %ss:XEN_vcpu_info_mask(%eax)
|
||||
|
||||
1: popl %eax
|
||||
|
||||
/*
|
||||
* From this point on the registers are restored and the stack
|
||||
* updated, so we don't need to worry about it if we're
|
||||
* preempted
|
||||
*/
|
||||
iret_restore_end:
|
||||
|
||||
/*
|
||||
* Jump to hypervisor_callback after fixing up the stack.
|
||||
* Events are masked, so jumping out of the critical region is
|
||||
* OK.
|
||||
*/
|
||||
je xen_asm_exc_xen_hypervisor_callback
|
||||
|
||||
1: iret
|
||||
xen_iret_end_crit:
|
||||
_ASM_EXTABLE(1b, asm_iret_error)
|
||||
|
||||
hyper_iret:
|
||||
/* put this out of line since its very rarely used */
|
||||
jmp hypercall_page + __HYPERVISOR_iret * 32
|
||||
SYM_CODE_END(xen_iret)
|
||||
|
||||
.globl xen_iret_start_crit, xen_iret_end_crit
|
||||
|
||||
/*
|
||||
* This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees
|
||||
* that the EIP at the time of interrupt was between
|
||||
* xen_iret_start_crit and xen_iret_end_crit.
|
||||
*
|
||||
* The stack format at this point is:
|
||||
* ----------------
|
||||
* ss : (ss/esp may be present if we came from usermode)
|
||||
* esp :
|
||||
* eflags } outer exception info
|
||||
* cs }
|
||||
* eip }
|
||||
* ----------------
|
||||
* eax : outer eax if it hasn't been restored
|
||||
* ----------------
|
||||
* eflags }
|
||||
* cs } nested exception info
|
||||
* eip }
|
||||
* return address : (into xen_asm_exc_xen_hypervisor_callback)
|
||||
*
|
||||
* In order to deliver the nested exception properly, we need to discard the
|
||||
* nested exception frame such that when we handle the exception, we do it
|
||||
* in the context of the outer exception rather than starting a new one.
|
||||
*
|
||||
* The only caveat is that if the outer eax hasn't been restored yet (i.e.
|
||||
* it's still on stack), we need to restore its value here.
|
||||
*/
|
||||
.pushsection .noinstr.text, "ax"
|
||||
SYM_CODE_START(xen_iret_crit_fixup)
|
||||
/*
|
||||
* Paranoia: Make sure we're really coming from kernel space.
|
||||
* One could imagine a case where userspace jumps into the
|
||||
* critical range address, but just before the CPU delivers a
|
||||
* PF, it decides to deliver an interrupt instead. Unlikely?
|
||||
* Definitely. Easy to avoid? Yes.
|
||||
*/
|
||||
testb $2, 2*4(%esp) /* nested CS */
|
||||
jnz 2f
|
||||
|
||||
/*
|
||||
* If eip is before iret_restore_end then stack
|
||||
* hasn't been restored yet.
|
||||
*/
|
||||
cmpl $iret_restore_end, 1*4(%esp)
|
||||
jae 1f
|
||||
|
||||
movl 4*4(%esp), %eax /* load outer EAX */
|
||||
ret $4*4 /* discard nested EIP, CS, and EFLAGS as
|
||||
* well as the just restored EAX */
|
||||
|
||||
1:
|
||||
ret $3*4 /* discard nested EIP, CS, and EFLAGS */
|
||||
|
||||
2:
|
||||
ret
|
||||
SYM_CODE_END(xen_iret_crit_fixup)
|
||||
.popsection
|
||||
@@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen)
|
||||
rep __ASM_SIZE(stos)
|
||||
|
||||
mov %_ASM_SI, xen_start_info
|
||||
#ifdef CONFIG_X86_64
|
||||
mov initial_stack(%rip), %rsp
|
||||
#else
|
||||
mov initial_stack, %esp
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Set up %gs.
|
||||
*
|
||||
* The base of %gs always points to fixed_percpu_data. If the
|
||||
@@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen)
|
||||
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
|
||||
cdq
|
||||
wrmsr
|
||||
#endif
|
||||
|
||||
call xen_start_kernel
|
||||
SYM_CODE_END(startup_xen)
|
||||
|
||||
@@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void);
|
||||
void xen_build_mfn_list_list(void);
|
||||
void xen_setup_machphys_mapping(void);
|
||||
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
||||
void xen_reserve_top(void);
|
||||
void __init xen_reserve_special_pages(void);
|
||||
void __init xen_pt_check_e820(void);
|
||||
|
||||
|
||||
@@ -52,9 +52,7 @@ config XEN_BALLOON_MEMORY_HOTPLUG
|
||||
|
||||
config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
|
||||
int "Hotplugged memory limit (in GiB) for a PV guest"
|
||||
default 512 if X86_64
|
||||
default 4 if X86_32
|
||||
range 0 64 if X86_32
|
||||
default 512
|
||||
depends on XEN_HAVE_PVMMU
|
||||
depends on XEN_BALLOON_MEMORY_HOTPLUG
|
||||
help
|
||||
|
||||
Reference in New Issue
Block a user