mm: ptep_get() conversion

Convert all instances of direct pte_t* dereferencing to instead use
ptep_get() helper.  This means that by default, the accesses change from a
C dereference to a READ_ONCE().  This is technically the correct thing to
do since where pgtables are modified by HW (for access/dirty) they are
volatile and therefore we should always ensure READ_ONCE() semantics.

But more importantly, by always using the helper, it can be overridden by
the architecture to fully encapsulate the contents of the pte.  Arch code
is deliberately not converted, as the arch code knows best.  It is
intended that arch code (arm64) will override the default with its own
implementation that can (e.g.) hide certain bits from the core code, or
determine young/dirty status by mixing in state from another source.

Conversion was done using Coccinelle:

----

// $ make coccicheck \
//          COCCI=ptepget.cocci \
//          SPFLAGS="--include-headers" \
//          MODE=patch

virtual patch

@ depends on patch @
pte_t *v;
@@

- *v
+ ptep_get(v)

----

Then reviewed and hand-edited to avoid multiple unnecessary calls to
ptep_get(), instead opting to store the result of a single call in a
variable, where it is correct to do so.  This aims to negate any cost of
READ_ONCE() and will benefit arch-overrides that may be more complex.

Included is a fix for an issue in an earlier version of this patch that
was pointed out by kernel test robot.  The issue arose because config
MMU=n elides definition of the ptep helper functions, including
ptep_get().  HUGETLB_PAGE=n configs still define a simple
huge_ptep_clear_flush() for linking purposes, which dereferences the ptep.
So when both configs are disabled, this caused a build error because
ptep_get() is not defined.  Fix by continuing to do a direct dereference
when MMU=n.  This is safe because for this config the arch code cannot be
trying to virtualize the ptes because none of the ptep helpers are
defined.

Link: https://lkml.kernel.org/r/20230612151545.3317766-4-ryan.roberts@arm.com
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202305120142.yXsNEo6H-lkp@intel.com/
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Ryan Roberts
2023-06-12 16:15:45 +01:00
committed by Andrew Morton
parent 6c1d2a073a
commit c33c794828
47 changed files with 301 additions and 228 deletions

View File

@@ -1681,7 +1681,9 @@ static int igt_mmap_gpu(void *arg)
static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
{
if (!pte_present(*pte) || pte_none(*pte)) {
pte_t ptent = ptep_get(pte);
if (!pte_present(ptent) || pte_none(ptent)) {
pr_err("missing PTE:%lx\n",
(addr - (unsigned long)data) >> PAGE_SHIFT);
return -EINVAL;
@@ -1692,7 +1694,9 @@ static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
{
if (pte_present(*pte) && !pte_none(*pte)) {
pte_t ptent = ptep_get(pte);
if (pte_present(ptent) && !pte_none(ptent)) {
pr_err("present PTE:%lx; expected to be revoked\n",
(addr - (unsigned long)data) >> PAGE_SHIFT);
return -EINVAL;

View File

@@ -228,7 +228,7 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
goto err;
#ifdef CONFIG_X86_64
if (unlikely(pmd_large(*pmdp)))
pte = *(pte_t *) pmdp;
pte = ptep_get((pte_t *)pmdp);
else
#endif
pte = *pte_offset_kernel(pmdp, vaddr);

View File

@@ -514,6 +514,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
bool write_fault)
{
pte_t *ptep;
pte_t pte;
spinlock_t *ptl;
int ret;
@@ -536,10 +537,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
return ret;
}
if (write_fault && !pte_write(*ptep))
pte = ptep_get(ptep);
if (write_fault && !pte_write(pte))
ret = -EFAULT;
else
*pfn = pte_pfn(*ptep);
*pfn = pte_pfn(pte);
pte_unmap_unlock(ptep, ptl);
return ret;

View File

@@ -949,7 +949,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
*/
static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
{
return pte_none(*pte) ? 0 : -EBUSY;
return pte_none(ptep_get(pte)) ? 0 : -EBUSY;
}
static int privcmd_vma_range_is_mapped(

View File

@@ -538,13 +538,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
bool migration = false, young = false, dirty = false;
pte_t ptent = ptep_get(pte);
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
young = pte_young(*pte);
dirty = pte_dirty(*pte);
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
if (pte_present(ptent)) {
page = vm_normal_page(vma, addr, ptent);
young = pte_young(ptent);
dirty = pte_dirty(ptent);
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (!non_swap_entry(swpent)) {
int mapcount;
@@ -732,11 +733,12 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
struct page *page = NULL;
pte_t ptent = ptep_get(pte);
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
if (pte_present(ptent)) {
page = vm_normal_page(vma, addr, ptent);
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
page = pfn_swap_entry_to_page(swpent);
@@ -1105,7 +1107,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* Documentation/admin-guide/mm/soft-dirty.rst for full description
* of how soft-dirty works.
*/
pte_t ptent = *pte;
pte_t ptent = ptep_get(pte);
if (pte_present(ptent)) {
pte_t old_pte;
@@ -1194,7 +1196,7 @@ out:
return 0;
}
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
ptent = ptep_get(pte);
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte);
@@ -1550,7 +1552,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
for (; addr < end; pte++, addr += PAGE_SIZE) {
pagemap_entry_t pme;
pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
@@ -1893,10 +1895,11 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
return 0;
}
do {
struct page *page = can_gather_numa_stats(*pte, vma, addr);
pte_t ptent = ptep_get(pte);
struct page *page = can_gather_numa_stats(ptent, vma, addr);
if (!page)
continue;
gather_stats(page, md, pte_dirty(*pte), 1);
gather_stats(page, md, pte_dirty(ptent), 1);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(orig_pte, ptl);

View File

@@ -335,6 +335,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pud_t *pud;
pmd_t *pmd, _pmd;
pte_t *pte;
pte_t ptent;
bool ret = true;
mmap_assert_locked(mm);
@@ -374,9 +375,10 @@ again:
* changes under us. PTE markers should be handled the same as none
* ptes here.
*/
if (pte_none_mostly(*pte))
ptent = ptep_get(pte);
if (pte_none_mostly(ptent))
ret = true;
if (!pte_write(*pte) && (reason & VM_UFFD_WP))
if (!pte_write(ptent) && (reason & VM_UFFD_WP))
ret = true;
pte_unmap(pte);

View File

@@ -1185,7 +1185,11 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
#ifdef CONFIG_MMU
return ptep_get(ptep);
#else
return *ptep;
#endif
}
static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,

View File

@@ -555,7 +555,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
bool arm_uffd_pte = false;
/* The current status of the pte should be "cleared" before calling */
WARN_ON_ONCE(!pte_none(*pte));
WARN_ON_ONCE(!pte_none(ptep_get(pte)));
/*
* NOTE: userfaultfd_wp_unpopulated() doesn't need this whole

View File

@@ -231,7 +231,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pte_t *ptep)
{
pte_t pte = *ptep;
pte_t pte = ptep_get(ptep);
int r = 1;
if (!pte_young(pte))
r = 0;
@@ -318,7 +318,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address,
pte_t *ptep)
{
pte_t pte = *ptep;
pte_t pte = ptep_get(ptep);
pte_clear(mm, address, ptep);
page_table_check_pte_clear(mm, address, pte);
return pte;
@@ -519,7 +519,7 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pte_t old_pte = *ptep;
pte_t old_pte = ptep_get(ptep);
set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif

View File

@@ -192,7 +192,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
inc_mm_counter(mm, MM_ANONPAGES);
}
flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
ptep_clear_flush_notify(vma, addr, pvmw.pte);
if (new_page)
set_pte_at_notify(mm, addr, pvmw.pte,

View File

@@ -39,7 +39,7 @@ struct folio *damon_get_folio(unsigned long pfn)
void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
{
struct folio *folio = damon_get_folio(pte_pfn(*pte));
struct folio *folio = damon_get_folio(pte_pfn(ptep_get(pte)));
if (!folio)
return;

View File

@@ -89,7 +89,7 @@ static bool __damon_pa_young(struct folio *folio, struct vm_area_struct *vma,
while (page_vma_mapped_walk(&pvmw)) {
addr = pvmw.address;
if (pvmw.pte) {
*accessed = pte_young(*pvmw.pte) ||
*accessed = pte_young(ptep_get(pvmw.pte)) ||
!folio_test_idle(folio) ||
mmu_notifier_test_young(vma->vm_mm, addr);
} else {

View File

@@ -323,7 +323,7 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
walk->action = ACTION_AGAIN;
return 0;
}
if (!pte_present(*pte))
if (!pte_present(ptep_get(pte)))
goto out;
damon_ptep_mkold(pte, walk->vma, addr);
out:
@@ -433,6 +433,7 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pte_t *pte;
pte_t ptent;
spinlock_t *ptl;
struct folio *folio;
struct damon_young_walk_private *priv = walk->private;
@@ -471,12 +472,13 @@ regular_page:
walk->action = ACTION_AGAIN;
return 0;
}
if (!pte_present(*pte))
ptent = ptep_get(pte);
if (!pte_present(ptent))
goto out;
folio = damon_get_folio(pte_pfn(*pte));
folio = damon_get_folio(pte_pfn(ptent));
if (!folio)
goto out;
if (pte_young(*pte) || !folio_test_idle(folio) ||
if (pte_young(ptent) || !folio_test_idle(folio) ||
mmu_notifier_test_young(walk->mm, addr))
priv->young = true;
*priv->folio_sz = folio_size(folio);

View File

@@ -3523,7 +3523,7 @@ again:
* handled in the specific fault path, and it'll prohibit the
* fault-around logic.
*/
if (!pte_none(*vmf->pte))
if (!pte_none(ptep_get(vmf->pte)))
goto unlock;
/* We're about to handle the fault */

View File

@@ -477,13 +477,14 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
pte_t *pte, unsigned int flags)
{
if (flags & FOLL_TOUCH) {
pte_t entry = *pte;
pte_t orig_entry = ptep_get(pte);
pte_t entry = orig_entry;
if (flags & FOLL_WRITE)
entry = pte_mkdirty(entry);
entry = pte_mkyoung(entry);
if (!pte_same(*pte, entry)) {
if (!pte_same(orig_entry, entry)) {
set_pte_at(vma->vm_mm, address, pte, entry);
update_mmu_cache(vma, address, pte);
}
@@ -549,7 +550,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
return no_page_table(vma, flags);
pte = *ptep;
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
@@ -821,6 +822,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pte_t entry;
int ret = -EFAULT;
/* user gate pages are read-only */
@@ -844,16 +846,17 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
pte = pte_offset_map(pmd, address);
if (!pte)
return -EFAULT;
if (pte_none(*pte))
entry = ptep_get(pte);
if (pte_none(entry))
goto unmap;
*vma = get_gate_vma(mm);
if (!page)
goto out;
*page = vm_normal_page(*vma, address, *pte);
*page = vm_normal_page(*vma, address, entry);
if (!*page) {
if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(entry)))
goto unmap;
*page = pte_page(*pte);
*page = pte_page(entry);
}
ret = try_grab_page(*page, gup_flags);
if (unlikely(ret))
@@ -2496,7 +2499,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
}
if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
unlikely(pte_val(pte) != pte_val(*ptep))) {
unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
@@ -2693,7 +2696,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
if (!folio)
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
gup_put_folio(folio, refs, flags);
return 0;
}

View File

@@ -161,7 +161,7 @@ struct page *__kmap_to_page(void *vaddr)
/* kmap() mappings */
if (WARN_ON_ONCE(addr >= PKMAP_ADDR(0) &&
addr < PKMAP_ADDR(LAST_PKMAP)))
return pte_page(pkmap_page_table[PKMAP_NR(addr)]);
return pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(addr)]));
/* kmap_local_page() mappings */
if (WARN_ON_ONCE(base >= __fix_to_virt(FIX_KMAP_END) &&
@@ -191,6 +191,7 @@ static void flush_all_zero_pkmaps(void)
for (i = 0; i < LAST_PKMAP; i++) {
struct page *page;
pte_t ptent;
/*
* zero means we don't have anything to do,
@@ -203,7 +204,8 @@ static void flush_all_zero_pkmaps(void)
pkmap_count[i] = 0;
/* sanity check */
BUG_ON(pte_none(pkmap_page_table[i]));
ptent = ptep_get(&pkmap_page_table[i]);
BUG_ON(pte_none(ptent));
/*
* Don't need an atomic fetch-and-clear op here;
@@ -212,7 +214,7 @@ static void flush_all_zero_pkmaps(void)
* getting the kmap_lock (which is held here).
* So no dangers, even with speculative execution.
*/
page = pte_page(pkmap_page_table[i]);
page = pte_page(ptent);
pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]);
set_page_address(page, NULL);
@@ -511,7 +513,7 @@ static inline bool kmap_high_unmap_local(unsigned long vaddr)
{
#ifdef ARCH_NEEDS_KMAP_HIGH_GET
if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
kunmap_high(pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(vaddr)])));
return true;
}
#endif
@@ -548,7 +550,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
kmap_pte = kmap_get_pte(vaddr, idx);
BUG_ON(!pte_none(*kmap_pte));
BUG_ON(!pte_none(ptep_get(kmap_pte)));
pteval = pfn_pte(pfn, prot);
arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
arch_kmap_local_post_map(vaddr, pteval);

View File

@@ -228,7 +228,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
struct hmm_range *range = hmm_vma_walk->range;
unsigned int required_fault;
unsigned long cpu_flags;
pte_t pte = *ptep;
pte_t pte = ptep_get(ptep);
uint64_t pfn_req_flags = *hmm_pfn;
if (pte_none_mostly(pte)) {

View File

@@ -2063,7 +2063,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
entry = pte_mkspecial(entry);
if (pmd_uffd_wp(old_pmd))
entry = pte_mkuffd_wp(entry);
VM_BUG_ON(!pte_none(*pte));
VM_BUG_ON(!pte_none(ptep_get(pte)));
set_pte_at(mm, addr, pte, entry);
pte++;
}
@@ -2257,7 +2257,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
entry = pte_mkuffd_wp(entry);
page_add_anon_rmap(page + i, vma, addr, false);
}
VM_BUG_ON(!pte_none(*pte));
VM_BUG_ON(!pte_none(ptep_get(pte)));
set_pte_at(mm, addr, pte, entry);
pte++;
}

View File

@@ -7246,7 +7246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
}
BUG_ON(pte && pte_present(*pte) && !pte_huge(*pte));
BUG_ON(pte && pte_present(ptep_get(pte)) && !pte_huge(ptep_get(pte)));
return pte;
}

View File

@@ -105,7 +105,7 @@ static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
* remapping (which is calling @walk->remap_pte).
*/
if (!walk->reuse_page) {
walk->reuse_page = pte_page(*pte);
walk->reuse_page = pte_page(ptep_get(pte));
/*
* Because the reuse address is part of the range that we are
* walking, skip the reuse address range.
@@ -239,7 +239,7 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
* to the tail pages.
*/
pgprot_t pgprot = PAGE_KERNEL_RO;
struct page *page = pte_page(*pte);
struct page *page = pte_page(ptep_get(pte));
pte_t entry;
/* Remapping the head page requires r/w */
@@ -286,7 +286,7 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
struct page *page;
void *to;
BUG_ON(pte_page(*pte) != walk->reuse_page);
BUG_ON(pte_page(ptep_get(pte)) != walk->reuse_page);
page = list_first_entry(walk->vmemmap_pages, struct page, lru);
list_del(&page->lru);

Some files were not shown because too many files have changed in this diff Show More