You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 protection key support from Ingo Molnar:
"This tree adds support for a new memory protection hardware feature
that is available in upcoming Intel CPUs: 'protection keys' (pkeys).
There's a background article at LWN.net:
https://lwn.net/Articles/643797/
The gist is that protection keys allow the encoding of
user-controllable permission masks in the pte. So instead of having a
fixed protection mask in the pte (which needs a system call to change
and works on a per page basis), the user can map a (handful of)
protection mask variants and can change the masks runtime relatively
cheaply, without having to change every single page in the affected
virtual memory range.
This allows the dynamic switching of the protection bits of large
amounts of virtual memory, via user-space instructions. It also
allows more precise control of MMU permission bits: for example the
executable bit is separate from the read bit (see more about that
below).
This tree adds the MM infrastructure and low level x86 glue needed for
that, plus it adds a high level API to make use of protection keys -
if a user-space application calls:
mmap(..., PROT_EXEC);
or
mprotect(ptr, sz, PROT_EXEC);
(note PROT_EXEC-only, without PROT_READ/WRITE), the kernel will notice
this special case, and will set a special protection key on this
memory range. It also sets the appropriate bits in the Protection
Keys User Rights (PKRU) register so that the memory becomes unreadable
and unwritable.
So using protection keys the kernel is able to implement 'true'
PROT_EXEC on x86 CPUs: without protection keys PROT_EXEC implies
PROT_READ as well. Unreadable executable mappings have security
advantages: they cannot be read via information leaks to figure out
ASLR details, nor can they be scanned for ROP gadgets - and they
cannot be used by exploits for data purposes either.
We know about no user-space code that relies on pure PROT_EXEC
mappings today, but binary loaders could start making use of this new
feature to map binaries and libraries in a more secure fashion.
There is other pending pkeys work that offers more high level system
call APIs to manage protection keys - but those are not part of this
pull request.
Right now there's a Kconfig that controls this feature
(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) that is default enabled
(like most x86 CPU feature enablement code that has no runtime
overhead), but it's not user-configurable at the moment. If there's
any serious problem with this then we can make it configurable and/or
flip the default"
* 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits)
x86/mm/pkeys: Fix mismerge of protection keys CPUID bits
mm/pkeys: Fix siginfo ABI breakage caused by new u64 field
x86/mm/pkeys: Fix access_error() denial of writes to write-only VMA
mm/core, x86/mm/pkeys: Add execute-only protection keys support
x86/mm/pkeys: Create an x86 arch_calc_vm_prot_bits() for VMA flags
x86/mm/pkeys: Allow kernel to modify user pkey rights register
x86/fpu: Allow setting of XSAVE state
x86/mm: Factor out LDT init from context init
mm/core, x86/mm/pkeys: Add arch_validate_pkey()
mm/core, arch, powerpc: Pass a protection key in to calc_vm_flag_bits()
x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU
x86/mm/pkeys: Add Kconfig prompt to existing config option
x86/mm/pkeys: Dump pkey from VMA in /proc/pid/smaps
x86/mm/pkeys: Dump PKRU with other kernel registers
mm/core, x86/mm/pkeys: Differentiate instruction fetches
x86/mm/pkeys: Optimize fault handling in access_error()
mm/core: Do not enforce PKEY permissions on remote mm access
um, pkeys: Add UML arch_*_access_permitted() methods
mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys
x86/mm/gup: Simplify get_user_pages() PTE bit handling
...
This commit is contained in:
@@ -987,6 +987,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
||||
See Documentation/x86/intel_mpx.txt for more
|
||||
information about the feature.
|
||||
|
||||
nopku [X86] Disable Memory Protection Keys CPU feature found
|
||||
in some Intel CPUs.
|
||||
|
||||
eagerfpu= [X86]
|
||||
on enable eager fpu restore
|
||||
off disable eager fpu restore
|
||||
|
||||
@@ -2719,9 +2719,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
|
||||
/* Acquire the mm page semaphore. */
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
err = get_user_pages(current,
|
||||
current->mm,
|
||||
(unsigned long int)(oper.indata + prev_ix),
|
||||
err = get_user_pages((unsigned long int)(oper.indata + prev_ix),
|
||||
noinpages,
|
||||
0, /* read access only for in data */
|
||||
0, /* no force */
|
||||
@@ -2736,9 +2734,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
|
||||
}
|
||||
noinpages = err;
|
||||
if (oper.do_cipher){
|
||||
err = get_user_pages(current,
|
||||
current->mm,
|
||||
(unsigned long int)oper.cipher_outdata,
|
||||
err = get_user_pages((unsigned long int)oper.cipher_outdata,
|
||||
nooutpages,
|
||||
1, /* write access for out data */
|
||||
0, /* no force */
|
||||
|
||||
@@ -63,10 +63,15 @@ typedef struct siginfo {
|
||||
unsigned int _flags; /* see below */
|
||||
unsigned long _isr; /* isr */
|
||||
short _addr_lsb; /* lsb of faulting address */
|
||||
union {
|
||||
/* used when si_code=SEGV_BNDERR */
|
||||
struct {
|
||||
void __user *_lower;
|
||||
void __user *_upper;
|
||||
} _addr_bnd;
|
||||
/* used when si_code=SEGV_PKUERR */
|
||||
__u32 _pkey;
|
||||
};
|
||||
} _sigfault;
|
||||
|
||||
/* SIGPOLL */
|
||||
|
||||
@@ -142,8 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
|
||||
u64 virt_addr=simple_strtoull(buf, NULL, 16);
|
||||
int ret;
|
||||
|
||||
ret = get_user_pages(current, current->mm, virt_addr,
|
||||
1, VM_READ, 0, NULL, NULL);
|
||||
ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL);
|
||||
if (ret<=0) {
|
||||
#ifdef ERR_INJ_DEBUG
|
||||
printk("Virtual address %lx is not existing.\n",virt_addr);
|
||||
|
||||
@@ -86,10 +86,15 @@ typedef struct siginfo {
|
||||
int _trapno; /* TRAP # which caused the signal */
|
||||
#endif
|
||||
short _addr_lsb;
|
||||
union {
|
||||
/* used when si_code=SEGV_BNDERR */
|
||||
struct {
|
||||
void __user *_lower;
|
||||
void __user *_upper;
|
||||
} _addr_bnd;
|
||||
/* used when si_code=SEGV_PKUERR */
|
||||
__u32 _pkey;
|
||||
};
|
||||
} _sigfault;
|
||||
|
||||
/* SIGPOLL, SIGXFSZ (To do ...) */
|
||||
|
||||
+1
-2
@@ -286,8 +286,7 @@ slow_irqon:
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
ret = get_user_pages_unlocked(current, mm, start,
|
||||
(end - start) >> PAGE_SHIFT,
|
||||
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
|
||||
write, 0, pages);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
|
||||
@@ -18,11 +18,12 @@
|
||||
* This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
|
||||
* here. How important is the optimization?
|
||||
*/
|
||||
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot)
|
||||
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
|
||||
unsigned long pkey)
|
||||
{
|
||||
return (prot & PROT_SAO) ? VM_SAO : 0;
|
||||
}
|
||||
#define arch_calc_vm_prot_bits(prot) arch_calc_vm_prot_bits(prot)
|
||||
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
|
||||
|
||||
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
|
||||
{
|
||||
|
||||
@@ -148,5 +148,17 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
bool write, bool execute, bool foreign)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
|
||||
|
||||
@@ -136,4 +136,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
bool write, bool execute, bool foreign)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif /* __S390_MMU_CONTEXT_H */
|
||||
|
||||
+1
-3
@@ -210,7 +210,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
struct page **pages)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
int nr, ret;
|
||||
|
||||
might_sleep();
|
||||
@@ -222,8 +221,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
|
||||
/* Try to get the remaining pages with get_user_pages */
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
ret = get_user_pages_unlocked(current, mm, start,
|
||||
nr_pages - nr, write, 0, pages);
|
||||
ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
|
||||
/* Have to be a bit careful with return values */
|
||||
if (nr > 0)
|
||||
ret = (ret < 0) ? nr : ret + nr;
|
||||
|
||||
+1
-1
@@ -257,7 +257,7 @@ slow_irqon:
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
ret = get_user_pages_unlocked(current, mm, start,
|
||||
ret = get_user_pages_unlocked(start,
|
||||
(end - start) >> PAGE_SHIFT, write, 0, pages);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
|
||||
+1
-1
@@ -237,7 +237,7 @@ slow:
|
||||
start += nr << PAGE_SHIFT;
|
||||
pages += nr;
|
||||
|
||||
ret = get_user_pages_unlocked(current, mm, start,
|
||||
ret = get_user_pages_unlocked(start,
|
||||
(end - start) >> PAGE_SHIFT, write, 0, pages);
|
||||
|
||||
/* Have to be a bit careful with return values */
|
||||
|
||||
@@ -27,6 +27,20 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
bool write, bool execute, bool foreign)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* end asm-generic/mm_hooks.h functions
|
||||
*/
|
||||
|
||||
@@ -97,4 +97,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
bool write, bool foreign)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
|
||||
{
|
||||
/* by default, allow everything */
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -156,6 +156,8 @@ config X86
|
||||
select X86_DEV_DMA_OPS if X86_64
|
||||
select X86_FEATURE_NAMES if PROC_FS
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select ARCH_USES_HIGH_VMA_FLAGS if X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
select ARCH_HAS_PKEYS if X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
|
||||
config INSTRUCTION_DECODER
|
||||
def_bool y
|
||||
@@ -1719,6 +1721,20 @@ config X86_INTEL_MPX
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
prompt "Intel Memory Protection Keys"
|
||||
def_bool y
|
||||
# Note: only available in 64-bit mode
|
||||
depends on CPU_SUP_INTEL && X86_64
|
||||
---help---
|
||||
Memory Protection Keys provides a mechanism for enforcing
|
||||
page-based protections, but without requiring modification of the
|
||||
page tables when an application changes protection domains.
|
||||
|
||||
For details, see Documentation/x86/protection-keys.txt
|
||||
|
||||
If unsure, say y.
|
||||
|
||||
config EFI
|
||||
bool "EFI runtime service support"
|
||||
depends on ACPI
|
||||
|
||||
@@ -26,6 +26,7 @@ enum cpuid_leafs
|
||||
CPUID_8000_0008_EBX,
|
||||
CPUID_6_EAX,
|
||||
CPUID_8000_000A_EDX,
|
||||
CPUID_7_ECX,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_FEATURE_NAMES
|
||||
@@ -57,7 +58,14 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6 )) || \
|
||||
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7 )) || \
|
||||
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8 )) || \
|
||||
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
|
||||
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9 )) || \
|
||||
(((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) || \
|
||||
(((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \
|
||||
(((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \
|
||||
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \
|
||||
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \
|
||||
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \
|
||||
(((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
|
||||
|
||||
#define DISABLED_MASK_BIT_SET(bit) \
|
||||
( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \
|
||||
@@ -69,7 +77,14 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
(((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6 )) || \
|
||||
(((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7 )) || \
|
||||
(((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8 )) || \
|
||||
(((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) )
|
||||
(((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9 )) || \
|
||||
(((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) || \
|
||||
(((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \
|
||||
(((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \
|
||||
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \
|
||||
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \
|
||||
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \
|
||||
(((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
|
||||
|
||||
#define cpu_has(c, bit) \
|
||||
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
/*
|
||||
* Defines x86 CPU feature bits
|
||||
*/
|
||||
#define NCAPINTS 16 /* N 32-bit words worth of info */
|
||||
#define NCAPINTS 17 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
@@ -274,6 +274,10 @@
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
|
||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
|
||||
@@ -28,6 +28,14 @@
|
||||
# define DISABLE_CENTAUR_MCR 0
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
# define DISABLE_PKU (1<<(X86_FEATURE_PKU))
|
||||
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE))
|
||||
#else
|
||||
# define DISABLE_PKU 0
|
||||
# define DISABLE_OSPKE 0
|
||||
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
|
||||
|
||||
/*
|
||||
* Make sure to add features to the correct mask
|
||||
*/
|
||||
@@ -41,5 +49,12 @@
|
||||
#define DISABLED_MASK7 0
|
||||
#define DISABLED_MASK8 0
|
||||
#define DISABLED_MASK9 (DISABLE_MPX)
|
||||
#define DISABLED_MASK10 0
|
||||
#define DISABLED_MASK11 0
|
||||
#define DISABLED_MASK12 0
|
||||
#define DISABLED_MASK13 0
|
||||
#define DISABLED_MASK14 0
|
||||
#define DISABLED_MASK15 0
|
||||
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
|
||||
|
||||
#endif /* _ASM_X86_DISABLED_FEATURES_H */
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
extern void fpu__activate_curr(struct fpu *fpu);
|
||||
extern void fpu__activate_fpstate_read(struct fpu *fpu);
|
||||
extern void fpu__activate_fpstate_write(struct fpu *fpu);
|
||||
extern void fpu__current_fpstate_write_begin(void);
|
||||
extern void fpu__current_fpstate_write_end(void);
|
||||
extern void fpu__save(struct fpu *fpu);
|
||||
extern void fpu__restore(struct fpu *fpu);
|
||||
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
|
||||
|
||||
@@ -108,6 +108,8 @@ enum xfeature {
|
||||
XFEATURE_OPMASK,
|
||||
XFEATURE_ZMM_Hi256,
|
||||
XFEATURE_Hi16_ZMM,
|
||||
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
|
||||
XFEATURE_PKRU,
|
||||
|
||||
XFEATURE_MAX,
|
||||
};
|
||||
@@ -120,6 +122,7 @@ enum xfeature {
|
||||
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
|
||||
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
|
||||
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
|
||||
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
|
||||
|
||||
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
|
||||
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
|
||||
@@ -212,6 +215,15 @@ struct avx_512_hi16_state {
|
||||
struct reg_512_bit hi16_zmm[16];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* State component 9: 32-bit PKRU register. The state is
|
||||
* 8 bytes long but only 4 bytes is used currently.
|
||||
*/
|
||||
struct pkru_state {
|
||||
u32 pkru;
|
||||
u32 pad;
|
||||
} __packed;
|
||||
|
||||
struct xstate_header {
|
||||
u64 xfeatures;
|
||||
u64 xcomp_bv;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user