Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "Various x86 low level modifications:

   - preparatory work to support virtually mapped kernel stacks (Andy
     Lutomirski)

   - support for 64-bit __get_user() on 32-bit kernels (Benjamin
     LaHaise)

   - (involved) workaround for Knights Landing CPU erratum (Dave Hansen)

   - MPX enhancements (Dave Hansen)

   - mremap() extension to allow remapping of the special VDSO vma, for
     purposes of user level context save/restore (Dmitry Safonov)

   - hweight and entry code cleanups (Borislav Petkov)

   - bitops code generation optimizations and cleanups with modern GCC
     (H. Peter Anvin)

   - syscall entry code optimizations (Paolo Bonzini)"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (43 commits)
  x86/mm/cpa: Add missing comment in populate_pdg()
  x86/mm/cpa: Fix populate_pgd(): Stop trying to deallocate failed PUDs
  x86/syscalls: Add compat_sys_preadv64v2/compat_sys_pwritev64v2
  x86/smp: Remove unnecessary initialization of thread_info::cpu
  x86/smp: Remove stack_smp_processor_id()
  x86/uaccess: Move thread_info::addr_limit to thread_struct
  x86/dumpstack: Rename thread_struct::sig_on_uaccess_error to sig_on_uaccess_err
  x86/uaccess: Move thread_info::uaccess_err and thread_info::sig_on_uaccess_err to thread_struct
  x86/dumpstack: When OOPSing, rewind the stack before do_exit()
  x86/mm/64: In vmalloc_fault(), use CR3 instead of current->active_mm
  x86/dumpstack/64: Handle faults when printing the "Stack: " part of an OOPS
  x86/dumpstack: Try harder to get a call trace on stack overflow
  x86/mm: Remove kernel_unmap_pages_in_pgd() and efi_cleanup_page_tables()
  x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated
  x86/mm/hotplug: Don't remove PGD entries in remove_pagetable()
  x86/mm: Use pte_none() to test for empty PTE
  x86/mm: Disallow running with 32-bit PTEs to work around erratum
  x86/mm: Ignore A/D bits in pte/pmd/pud_none()
  x86/mm: Move swap offset/type up in PTE to work around erratum
  x86/entry: Inline enter_from_user_mode()
  ...
This commit is contained in:
Linus Torvalds
2016-07-25 15:34:18 -07:00
88 changed files with 3069 additions and 406 deletions
-5
View File
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
config ARCH_HWEIGHT_CFLAGS
string
default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
config ARCH_SUPPORTS_UPROBES
def_bool y
+5 -3
View File
@@ -16,14 +16,16 @@
#define BOOT_BITOPS_H
#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
static inline int constant_test_bit(int nr, const void *addr)
#include <linux/types.h>
static inline bool constant_test_bit(int nr, const void *addr)
{
const u32 *p = (const u32 *)addr;
return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
}
static inline int variable_test_bit(int nr, const void *addr)
static inline bool variable_test_bit(int nr, const void *addr)
{
u8 v;
bool v;
const u32 *p = (const u32 *)addr;
asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
+10 -8
View File
@@ -24,6 +24,7 @@
#include <linux/types.h>
#include <linux/edd.h>
#include <asm/setup.h>
#include <asm/asm.h>
#include "bitops.h"
#include "ctype.h"
#include "cpuflags.h"
@@ -176,18 +177,18 @@ static inline void wrgs32(u32 v, addr_t addr)
}
/* Note: these only return true/false, not a signed return value! */
static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
asm volatile("fs; repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
bool diff;
asm volatile("fs; repe; cmpsb" CC_SET(nz)
: CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
asm volatile("gs; repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
bool diff;
asm volatile("gs; repe; cmpsb" CC_SET(nz)
: CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
@@ -294,6 +295,7 @@ static inline int cmdline_find_option_bool(const char *option)
/* cpu.c, cpucheck.c */
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int check_knl_erratum(void);
int validate_cpu(void);
/* early_serial_console.c */
+2
View File
@@ -93,6 +93,8 @@ int validate_cpu(void)
show_cap_strs(err_flags);
putchar('\n');
return -1;
} else if (check_knl_erratum()) {
return -1;
} else {
return 0;
}
+33
View File
@@ -24,6 +24,7 @@
# include "boot.h"
#endif
#include <linux/types.h>
#include <asm/intel-family.h>
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
@@ -175,6 +176,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
}
}
if (!err)
err = check_knl_erratum();
if (err_flags_ptr)
*err_flags_ptr = err ? err_flags : NULL;
@@ -185,3 +188,33 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
return (cpu.level < req_level || err) ? -1 : 0;
}
int check_knl_erratum(void)
{
/*
* First check for the affected model/family:
*/
if (!is_intel() ||
cpu.family != 6 ||
cpu.model != INTEL_FAM6_XEON_PHI_KNL)
return 0;
/*
* This erratum affects the Accessed/Dirty bits, and can
* cause stray bits to be set in !Present PTEs. We have
* enough bits in our 64-bit PTEs (which we have on real
* 64-bit mode or PAE) to avoid using these troublesome
* bits. But, we do not have enough space in our 32-bit
* PTEs. So, refuse to run on 32-bit non-PAE kernels.
*/
if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
return 0;
puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
"processor due to a processor erratum. Use a 64-bit\n"
"kernel, or enable PAE in this 32-bit kernel.\n\n");
return -1;
}
+1
View File
@@ -102,6 +102,7 @@ void get_cpuflags(void)
cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
&cpu.flags[0]);
cpu.level = (tfms >> 8) & 15;
cpu.family = cpu.level;
cpu.model = (tfms >> 4) & 15;
if (cpu.level >= 6)
cpu.model += ((tfms >> 16) & 0xf) << 4;
+1
View File
@@ -6,6 +6,7 @@
struct cpu_features {
int level; /* Family, or 64 for x86-64 */
int family; /* Family, always */
int model;
u32 flags[NCAPINTS];
};
+1 -1
View File
@@ -17,7 +17,7 @@
int memcmp(const void *s1, const void *s2, size_t len)
{
u8 diff;
bool diff;
asm("repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
+3 -3
View File
@@ -40,10 +40,10 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
__visible void enter_from_user_mode(void)
__visible inline void enter_from_user_mode(void)
{
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit();
user_exit_irqoff();
}
#else
static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
ti->status &= ~TS_COMPAT;
#endif
user_enter();
user_enter_irqoff();
}
#define SYSCALL_EXIT_WORK_FLAGS \
+11
View File
@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
jmp error_code
END(async_page_fault)
#endif
ENTRY(rewind_stack_do_exit)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
call do_exit
1: jmp 1b
END(rewind_stack_do_exit)
+11
View File
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
ENTRY(rewind_stack_do_exit)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
call do_exit
1: jmp 1b
END(rewind_stack_do_exit)
+2 -2
View File
@@ -374,5 +374,5 @@
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat compat_sys_execveat/ptregs
534 x32 preadv2 compat_sys_preadv2
535 x32 pwritev2 compat_sys_pwritev2
546 x32 preadv2 compat_sys_preadv64v2
547 x32 pwritev2 compat_sys_pwritev64v2
+3 -3
View File
@@ -33,7 +33,7 @@
.endif
call \func
jmp restore
jmp .L_restore
_ASM_NOKPROBE(\name)
.endm
@@ -54,7 +54,7 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPT)
restore:
.L_restore:
popq %r11
popq %r10
popq %r9
@@ -66,5 +66,5 @@ restore:
popq %rdi
popq %rbp
ret
_ASM_NOKPROBE(restore)
_ASM_NOKPROBE(.L_restore)
#endif
+3 -2
View File
@@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
targets += vdso32/vclock_gettime.o
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
@@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \
$(obj)/vdso32/vclock_gettime.o \
$(obj)/vdso32/note.o \
$(obj)/vdso32/system_call.o
$(obj)/vdso32/system_call.o \
$(obj)/vdso32/sigreturn.o
$(call if_changed,vdso)
#
-8
View File
@@ -1,11 +1,3 @@
/*
* Common code for the sigreturn entry points in vDSO images.
* So far this code is the same for both int80 and sysenter versions.
* This file is #include'd by int80.S et al to define them first thing.
* The kernel assumes that the addresses of these routines are constant
* for all vDSO implementations.
*/
#include <linux/linkage.h>
#include <asm/unistd_32.h>
#include <asm/asm-offsets.h>
+1 -6
View File
@@ -2,16 +2,11 @@
* AT_SYSINFO entry point
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeatures.h>
#include <asm/alternative-asm.h>
/*
* First get the common code for the sigreturn entry points.
* This must come first.
*/
#include "sigreturn.S"
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
+42 -5
View File
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
@@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
return 0;
}
static const struct vm_special_mapping text_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
};
static void vdso_fix_landing(const struct vdso_image *image,
struct vm_area_struct *new_vma)
{
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
if (in_ia32_syscall() && image == &vdso_image_32) {
struct pt_regs *regs = current_pt_regs();
unsigned long vdso_land = image->sym_int80_landing_pad;
unsigned long old_land_addr = vdso_land +
(unsigned long)current->mm->context.vdso;
/* Fixing userspace landing - look at do_fast_syscall_32 */
if (regs->ip == old_land_addr)
regs->ip = new_vma->vm_start + vdso_land;
}
#endif
}
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
const struct vdso_image *image = current->mm->context.vdso_image;
if (image->size != new_size)
return -EINVAL;
if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
return -EFAULT;
vdso_fix_landing(image, new_vma);
current->mm->context.vdso = (void __user *)new_vma->vm_start;
return 0;
}
static int vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
static const struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
.mremap = vdso_mremap,
};
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
@@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&text_mapping);
&vdso_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
+5 -5
View File
@@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
* XXX: if access_ok, get_user, and put_user handled
* sig_on_uaccess_error, this could go away.
* sig_on_uaccess_err, this could go away.
*/
if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
@@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
int prev_sig_on_uaccess_error;
int prev_sig_on_uaccess_err;
long ret;
/*
@@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
*/
prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
current_thread_info()->sig_on_uaccess_error = 1;
prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
current->thread.sig_on_uaccess_err = 1;
ret = -EFAULT;
switch (vsyscall_nr) {
@@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
break;
}
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
check_fault:
if (ret == -EFAULT) {
+3 -3
View File
@@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
: "memory", "cc");
}
static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
u32 ecx_in, u32 *eax)
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
u32 ecx_in, u32 *eax)
{
int cx, dx, si;
u8 error;
bool error;
/*
* N.B. We do NOT need a cld after the BIOS call
+10 -14
View File
@@ -4,8 +4,8 @@
#include <asm/cpufeatures.h>
#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
/* popcnt %edi, %eax */
#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
/* popcnt %rdi, %rax */
#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
#define REG_IN "D"
@@ -17,19 +17,15 @@
#define REG_OUT "a"
#endif
/*
* __sw_hweightXX are called from within the alternatives below
* and callee-clobbered registers need to be taken care of. See
* ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
* compiler switches.
*/
#define __HAVE_ARCH_SW_HWEIGHT
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res = 0;
unsigned int res;
asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
: "="REG_OUT (res)
: REG_IN (w));
return res;
}
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
#else
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res = 0;
unsigned long res;
asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
: "="REG_OUT (res)
: REG_IN (w));
return res;
}

Some files were not shown because too many files have changed in this diff Show More