Merge tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Dave Hansen:
 "New Feature:

   - Randomize the per-cpu entry areas

  Cleanups:

   - Have CR3_ADDR_MASK use PHYSICAL_PAGE_MASK instead of open coding it

   - Move to "native" set_memory_rox() helper

   - Clean up pmd_get_atomic() and i386-PAE

   - Remove some unused page table size macros"

* tag 'x86_mm_for_6.2_v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits)
  x86/mm: Ensure forced page table splitting
  x86/kasan: Populate shadow for shared chunk of the CPU entry area
  x86/kasan: Add helpers to align shadow addresses up and down
  x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names
  x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area
  x86/mm: Recompute physical address for every page of per-CPU CEA mapping
  x86/mm: Rename __change_page_attr_set_clr(.checkalias)
  x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias()
  x86/mm: Untangle __change_page_attr_set_clr(.checkalias)
  x86/mm: Add a few comments
  x86/mm: Fix CR3_ADDR_MASK
  x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros
  mm: Convert __HAVE_ARCH_P..P_GET to the new style
  mm: Remove pointless barrier() after pmdp_get_lockless()
  x86/mm/pae: Get rid of set_64bit()
  x86_64: Remove pointless set_64bit() usage
  x86/mm/pae: Be consistent with pXXp_get_and_clear()
  x86/mm/pae: Use WRITE_ONCE()
  x86/mm/pae: Don't (ab)use atomic64
  mm/gup: Fix the lockless PMD access
  ...
This commit is contained in:
Linus Torvalds
2022-12-17 14:06:53 -06:00
55 changed files with 358 additions and 397 deletions

View File

@@ -10,11 +10,11 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/set_memory.h>
#include <asm/fncpy.h>
#include <asm/tlb.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/mach/map.h>
@@ -74,8 +74,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
dst = fncpy(sram, funcp, size);
set_memory_ro(base, pages);
set_memory_x(base, pages);
set_memory_rox(base, pages);
return dst;
}
@@ -126,8 +125,7 @@ static void __init omap_detect_and_map_sram(void)
base = (unsigned long)omap_sram_base;
pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
set_memory_ro(base, pages);
set_memory_x(base, pages);
set_memory_rox(base, pages);
}
static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);

View File

@@ -14,11 +14,11 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/set_memory.h>
#include <asm/fncpy.h>
#include <asm/tlb.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/mach/map.h>
@@ -96,8 +96,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
dst = fncpy(sram, funcp, size);
set_memory_ro(base, pages);
set_memory_x(base, pages);
set_memory_rox(base, pages);
return dst;
}
@@ -217,8 +216,7 @@ static void __init omap2_map_sram(void)
base = (unsigned long)omap_sram_base;
pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
set_memory_ro(base, pages);
set_memory_x(base, pages);
set_memory_rox(base, pages);
}
static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,

View File

@@ -46,7 +46,7 @@ config MIPS
select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT

View File

@@ -263,7 +263,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
}
#ifdef CONFIG_PPC_16K_PAGES
#define __HAVE_ARCH_PTEP_GET
#define ptep_get ptep_get
static inline pte_t ptep_get(pte_t *ptep)
{
pte_basic_t val = READ_ONCE(ptep->pte);

View File

@@ -20,12 +20,12 @@
#include <linux/kdebug.h>
#include <linux/slab.h>
#include <linux/moduleloader.h>
#include <linux/set_memory.h>
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
#include <asm/sections.h>
#include <asm/inst.h>
#include <asm/set_memory.h>
#include <linux/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -134,10 +134,9 @@ void *alloc_insn_page(void)
if (!page)
return NULL;
if (strict_module_rwx_enabled()) {
set_memory_ro((unsigned long)page, 1);
set_memory_x((unsigned long)page, 1);
}
if (strict_module_rwx_enabled())
set_memory_rox((unsigned long)page, 1);
return page;
}

View File

@@ -24,7 +24,7 @@ config SUPERH
select GENERIC_PCI_IOMAP if PCI
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GUP_GET_PTE_LOW_HIGH if X2TLB
select GUP_GET_PXX_LOW_HIGH if X2TLB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER

View File

@@ -28,9 +28,15 @@
#define pmd_ERROR(e) \
printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
typedef struct { unsigned long long pmd; } pmd_t;
typedef struct {
struct {
unsigned long pmd_low;
unsigned long pmd_high;
};
unsigned long long pmd;
} pmd_t;
#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) } )
#define __pmd(x) ((pmd_t) { .pmd = (x) } )
static inline pmd_t *pud_pgtable(pud_t pud)
{

View File

@@ -58,11 +58,7 @@
#define pud_populate(mm, pud, pmd) \
set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
#ifdef CONFIG_64BIT
#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
#else
#define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
#endif
static inline int pgd_newpage(pgd_t pgd)
{
@@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
#ifdef CONFIG_64BIT
#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
#else
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
#endif
static inline void pud_clear (pud_t *pud)
{

View File

@@ -159,7 +159,7 @@ config X86
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
select GENERIC_VDSO_TIME_NS
select GUP_GET_PTE_LOW_HIGH if X86_PAE
select GUP_GET_PXX_LOW_HIGH if X86_PAE
select HARDIRQS_SW_RESEND
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI

View File

@@ -7,34 +7,6 @@
* you need to test for the feature in boot_cpu_data.
*/
/*
* CMPXCHG8B only writes to the target if we had the previous
* value in registers, otherwise it acts as a read and gives us the
* "new previous" value. That is why there is a loop. Preloading
* EDX:EAX is a performance optimization: in the common case it means
* we need only one locked operation.
*
* A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
* least an FPU save and/or %cr0.ts manipulation.
*
* cmpxchg8b must be used with the lock prefix here to allow the
* instruction to be executed atomically. We need to have the reader
* side to see the coherent 64bit value.
*/
static inline void set_64bit(volatile u64 *ptr, u64 value)
{
u32 low = value;
u32 high = value >> 32;
u64 prev = *ptr;
asm volatile("\n1:\t"
LOCK_PREFIX "cmpxchg8b %0\n\t"
"jnz 1b"
: "=m" (*ptr), "+A" (prev)
: "b" (low), "c" (high)
: "memory");
}
#ifdef CONFIG_X86_CMPXCHG64
#define arch_cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \

View File

@@ -2,11 +2,6 @@
#ifndef _ASM_X86_CMPXCHG_64_H
#define _ASM_X86_CMPXCHG_64_H
static inline void set_64bit(volatile u64 *ptr, u64 val)
{
*ptr = val;
}
#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \

View File

@@ -130,10 +130,6 @@ struct cpu_entry_area {
};
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
/* Total size includes the readonly IDT mapping page as well: */
#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);

View File

@@ -28,9 +28,12 @@
#ifdef CONFIG_KASAN
void __init kasan_early_init(void);
void __init kasan_init(void);
void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid);
#else
static inline void kasan_early_init(void) { }
static inline void kasan_init(void) { }
static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size,
int nid) { }
#endif
#endif

View File

@@ -11,20 +11,14 @@
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
/* Cast P*D_MASK to a signed type so that it is sign-extended if
virtual addresses are 32-bits but physical addresses are larger
(ie, 32-bit PAE). */
#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK)
#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK)
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)

View File

@@ -2,8 +2,6 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_H
#define _ASM_X86_PGTABLE_3LEVEL_H
#include <asm/atomic64_32.h>
/*
* Intel Physical Address Extension (PAE) Mode - three-level page
* tables on PPro+ CPUs.
@@ -21,7 +19,15 @@
pr_err("%s:%d: bad pgd %p(%016Lx)\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
/* Rules for using set_pte: the pte being assigned *must* be
#define pxx_xchg64(_pxx, _ptr, _val) ({ \
_pxx##val_t *_p = (_pxx##val_t *)_ptr; \
_pxx##val_t _o = *_p; \
do { } while (!try_cmpxchg64(_p, &_o, (_val))); \
native_make_##_pxx(_o); \
})
/*
* Rules for using set_pte: the pte being assigned *must* be
* either not present or in a state where the hardware will
* not attempt to update the pte. In places where this is
* not possible, use pte_get_and_clear to obtain the old pte
@@ -29,75 +35,19 @@
*/
static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
ptep->pte_high = pte.pte_high;
WRITE_ONCE(ptep->pte_high, pte.pte_high);
smp_wmb();
ptep->pte_low = pte.pte_low;
}
#define pmd_read_atomic pmd_read_atomic
/*
* pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
* a "*pmdp" dereference done by GCC. Problem is, in certain places
* where pte_offset_map_lock() is called, concurrent page faults are
* allowed, if the mmap_lock is hold for reading. An example is mincore
* vs page faults vs MADV_DONTNEED. On the page fault side
* pmd_populate() rightfully does a set_64bit(), but if we're reading the
* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
* because GCC will not read the 64-bit value of the pmd atomically.
*
* To fix this all places running pte_offset_map_lock() while holding the
* mmap_lock in read mode, shall read the pmdp pointer using this
* function to know if the pmd is null or not, and in turn to know if
* they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
* operations.
*
* Without THP if the mmap_lock is held for reading, the pmd can only
* transition from null to not null while pmd_read_atomic() runs. So
* we can always return atomic pmd values with this function.
*
* With THP if the mmap_lock is held for reading, the pmd can become
* trans_huge or none or point to a pte (and in turn become "stable")
* at any time under pmd_read_atomic(). We could read it truly
* atomically here with an atomic64_read() for the THP enabled case (and
* it would be a whole lot simpler), but to avoid using cmpxchg8b we
* only return an atomic pmdval if the low part of the pmdval is later
* found to be stable (i.e. pointing to a pte). We are also returning a
* 'none' (zero) pmdval if the low part of the pmd is zero.
*
* In some cases the high and low part of the pmdval returned may not be
* consistent if THP is enabled (the low part may point to previously
* mapped hugepage, while the high part may point to a more recently
* mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
* needs the low part of the pmd to be read atomically to decide if the
* pmd is unstable or not, with the only exception when the low part
* of the pmd is zero, in which case we return a 'none' pmd.
*/
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
pmdval_t ret;
u32 *tmp = (u32 *)pmdp;
ret = (pmdval_t) (*tmp);
if (ret) {
/*
* If the low part is null, we must not read the high part
* or we can end up with a partial pmd.
*/
smp_rmb();
ret |= ((pmdval_t)*(tmp + 1)) << 32;
}
return (pmd_t) { ret };
WRITE_ONCE(ptep->pte_low, pte.pte_low);
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
pxx_xchg64(pte, ptep, native_pte_val(pte));
}
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
}
static inline void native_set_pud(pud_t *pudp, pud_t pud)
@@ -105,7 +55,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
#endif
set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
pxx_xchg64(pud, pudp, native_pud_val(pud));
}
/*
@@ -116,17 +66,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
ptep->pte_low = 0;
WRITE_ONCE(ptep->pte_low, 0);
smp_wmb();
ptep->pte_high = 0;
WRITE_ONCE(ptep->pte_high, 0);
}
static inline void native_pmd_clear(pmd_t *pmd)
static inline void native_pmd_clear(pmd_t *pmdp)
{
u32 *tmp = (u32 *)pmd;
*tmp = 0;
WRITE_ONCE(pmdp->pmd_low, 0);
smp_wmb();
*(tmp + 1) = 0;
WRITE_ONCE(pmdp->pmd_high, 0);
}
static inline void native_pud_clear(pud_t *pudp)
@@ -149,41 +98,26 @@ static inline void pud_clear(pud_t *pudp)
*/
}
#ifdef CONFIG_SMP
static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
{
pte_t res;
return pxx_xchg64(pte, ptep, 0ULL);
}
res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
{
return pxx_xchg64(pmd, pmdp, 0ULL);
}
return res;
static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
{
return pxx_xchg64(pud, pudp, 0ULL);
}
#else
#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
#endif
union split_pmd {
struct {
u32 pmd_low;
u32 pmd_high;
};
pmd_t pmd;
};
#ifdef CONFIG_SMP
static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
{
union split_pmd res, *orig = (union split_pmd *)pmdp;
/* xchg acts as a barrier before setting of the high bits */
res.pmd_low = xchg(&orig->pmd_low, 0);
res.pmd_high = orig->pmd_high;
orig->pmd_high = 0;
return res.pmd;
}
#else
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
#endif
#ifndef pmdp_establish
@@ -199,55 +133,18 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* anybody.
*/
if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
union split_pmd old, new, *ptr;
ptr = (union split_pmd *)pmdp;
new.pmd = pmd;
/* xchg acts as a barrier before setting of the high bits */
old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low);
old.pmd_high = ptr->pmd_high;
ptr->pmd_high = new.pmd_high;
return old.pmd;
old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
old.pmd_high = READ_ONCE(pmdp->pmd_high);
WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
return old;
}
do {
old = *pmdp;
} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
return old;
return pxx_xchg64(pmd, pmdp, pmd.pmd);
}
#endif
#ifdef CONFIG_SMP
union split_pud {
struct {
u32 pud_low;
u32 pud_high;
};
pud_t pud;
};
static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
{
union split_pud res, *orig = (union split_pud *)pudp;
#ifdef CONFIG_PAGE_TABLE_ISOLATION
pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
#endif
/* xchg acts as a barrier before setting of the high bits */
res.pud_low = xchg(&orig->pud_low, 0);
res.pud_high = orig->pud_high;
orig->pud_high = 0;
return res.pud;
}
#else
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
#endif
/* Encode and de-code a swap entry */
#define SWP_TYPE_BITS 5

View File

@@ -18,6 +18,13 @@ typedef union {
};
pteval_t pte;
} pte_t;
typedef union {
struct {
unsigned long pmd_low, pmd_high;
};
pmdval_t pmd;
} pmd_t;
#endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))

View File

@@ -19,6 +19,7 @@ typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
typedef struct { pmdval_t pmd; } pmd_t;
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;

View File

@@ -11,6 +11,12 @@
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
#ifdef CONFIG_X86_32
#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \
(CPU_ENTRY_AREA_SIZE * NR_CPUS) - \
CPU_ENTRY_AREA_BASE)
#else
#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE
#endif
#endif /* _ASM_X86_PGTABLE_AREAS_H */

View File

@@ -361,11 +361,9 @@ static inline pudval_t native_pud_val(pud_t pud)
#endif
#if CONFIG_PGTABLE_LEVELS > 2
typedef struct { pmdval_t pmd; } pmd_t;
static inline pmd_t native_make_pmd(pmdval_t val)
{
return (pmd_t) { val };
return (pmd_t) { .pmd = val };
}
static inline pmdval_t native_pmd_val(pmd_t pmd)

View File

@@ -35,7 +35,7 @@
*/
#ifdef CONFIG_X86_64
/* Mask off the address space ID and SME encryption bits. */
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
#define CR3_ADDR_MASK __sme_clr(PHYSICAL_PAGE_MASK)
#define CR3_PCID_MASK 0xFFFull
#define CR3_NOFLUSH BIT_ULL(63)

Some files were not shown because too many files have changed in this diff Show More