You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: - a few hotfixes - various misc updates - ocfs2 updates - most of MM * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (108 commits) mm, memory_hotplug: move movable_node to the hotplug proper mm, memory_hotplug: drop CONFIG_MOVABLE_NODE mm, memory_hotplug: drop artificial restriction on online/offline mm: memcontrol: account slab stats per lruvec mm: memcontrol: per-lruvec stats infrastructure mm: memcontrol: use generic mod_memcg_page_state for kmem pages mm: memcontrol: use the node-native slab memory counters mm: vmstat: move slab statistics from zone to node counters mm/zswap.c: delete an error message for a failed memory allocation in zswap_dstmem_prepare() mm/zswap.c: improve a size determination in zswap_frontswap_init() mm/zswap.c: delete an error message for a failed memory allocation in zswap_pool_create() mm/swapfile.c: sort swap entries before free mm/oom_kill: count global and memory cgroup oom kills mm: per-cgroup memory reclaim stats mm: kmemleak: treat vm_struct as alternative reference to vmalloc'ed objects mm: kmemleak: factor object reference updating out of scan_block() mm: kmemleak: slightly reduce the size of some structures on 64-bit architectures mm, mempolicy: don't check cpuset seqlock where it doesn't matter mm, cpuset: always use seqlock when changing task's nodemask mm, mempolicy: simplify rebinding mempolicies when updating cpusets ...
This commit is contained in:
@@ -2315,8 +2315,11 @@
|
||||
that the amount of memory usable for all allocations
|
||||
is not too small.
|
||||
|
||||
movable_node [KNL] Boot-time switch to enable the effects
|
||||
of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details.
|
||||
movable_node [KNL] Boot-time switch to make hotplugable memory
|
||||
NUMA nodes to be movable. This means that the memory
|
||||
of such nodes will be usable only for movable
|
||||
allocations which rules out almost all kernel
|
||||
allocations. Use with caution!
|
||||
|
||||
MTD_Partition= [MTD]
|
||||
Format: <name>,<region-number>,<size>,<offset>
|
||||
@@ -3772,8 +3775,14 @@
|
||||
slab_nomerge [MM]
|
||||
Disable merging of slabs with similar size. May be
|
||||
necessary if there is some reason to distinguish
|
||||
allocs to different slabs. Debug options disable
|
||||
merging on their own.
|
||||
allocs to different slabs, especially in hardened
|
||||
environments where the risk of heap overflows and
|
||||
layout control by attackers can usually be
|
||||
frustrated by disabling merging. This will reduce
|
||||
most of the exposure of a heap attack to a single
|
||||
cache (risks via metadata attacks are mostly
|
||||
unchanged). Debug options disable merging on their
|
||||
own.
|
||||
For more information see Documentation/vm/slub.txt.
|
||||
|
||||
slab_max_order= [MM, SLAB]
|
||||
|
||||
@@ -852,13 +852,25 @@ PAGE_SIZE multiple when read back.
|
||||
|
||||
The number of times the cgroup's memory usage was
|
||||
about to go over the max boundary. If direct reclaim
|
||||
fails to bring it down, the OOM killer is invoked.
|
||||
fails to bring it down, the cgroup goes to OOM state.
|
||||
|
||||
oom
|
||||
|
||||
The number of times the OOM killer has been invoked in
|
||||
the cgroup. This may not exactly match the number of
|
||||
processes killed but should generally be close.
|
||||
The number of time the cgroup's memory usage was
|
||||
reached the limit and allocation was about to fail.
|
||||
|
||||
Depending on context result could be invocation of OOM
|
||||
killer and retrying allocation or failing alloction.
|
||||
|
||||
Failed allocation in its turn could be returned into
|
||||
userspace as -ENOMEM or siletly ignored in cases like
|
||||
disk readahead. For now OOM in memory cgroup kills
|
||||
tasks iff shortage has happened inside page fault.
|
||||
|
||||
oom_kill
|
||||
|
||||
The number of processes belonging to this cgroup
|
||||
killed by any kind of OOM killer.
|
||||
|
||||
memory.stat
|
||||
|
||||
@@ -956,6 +968,34 @@ PAGE_SIZE multiple when read back.
|
||||
|
||||
Number of times a shadow node has been reclaimed
|
||||
|
||||
pgrefill
|
||||
|
||||
Amount of scanned pages (in an active LRU list)
|
||||
|
||||
pgscan
|
||||
|
||||
Amount of scanned pages (in an inactive LRU list)
|
||||
|
||||
pgsteal
|
||||
|
||||
Amount of reclaimed pages
|
||||
|
||||
pgactivate
|
||||
|
||||
Amount of pages moved to the active LRU list
|
||||
|
||||
pgdeactivate
|
||||
|
||||
Amount of pages moved to the inactive LRU lis
|
||||
|
||||
pglazyfree
|
||||
|
||||
Amount of pages postponed to be freed under memory pressure
|
||||
|
||||
pglazyfreed
|
||||
|
||||
Amount of reclaimed lazyfree pages
|
||||
|
||||
memory.swap.current
|
||||
|
||||
A read-only single value file which exists on non-root
|
||||
|
||||
@@ -150,6 +150,7 @@ See the include/linux/kmemleak.h header for the functions prototype.
|
||||
- ``kmemleak_init`` - initialize kmemleak
|
||||
- ``kmemleak_alloc`` - notify of a memory block allocation
|
||||
- ``kmemleak_alloc_percpu`` - notify of a percpu memory block allocation
|
||||
- ``kmemleak_vmalloc`` - notify of a vmalloc() memory allocation
|
||||
- ``kmemleak_free`` - notify of a memory block freeing
|
||||
- ``kmemleak_free_part`` - notify of a partial memory block freeing
|
||||
- ``kmemleak_free_percpu`` - notify of a percpu memory block freeing
|
||||
|
||||
@@ -98,6 +98,50 @@ use_zero_pages - specifies whether empty pages (i.e. allocated pages
|
||||
it is only effective for pages merged after the change.
|
||||
Default: 0 (normal KSM behaviour as in earlier releases)
|
||||
|
||||
max_page_sharing - Maximum sharing allowed for each KSM page. This
|
||||
enforces a deduplication limit to avoid the virtual
|
||||
memory rmap lists to grow too large. The minimum
|
||||
value is 2 as a newly created KSM page will have at
|
||||
least two sharers. The rmap walk has O(N)
|
||||
complexity where N is the number of rmap_items
|
||||
(i.e. virtual mappings) that are sharing the page,
|
||||
which is in turn capped by max_page_sharing. So
|
||||
this effectively spread the the linear O(N)
|
||||
computational complexity from rmap walk context
|
||||
over different KSM pages. The ksmd walk over the
|
||||
stable_node "chains" is also O(N), but N is the
|
||||
number of stable_node "dups", not the number of
|
||||
rmap_items, so it has not a significant impact on
|
||||
ksmd performance. In practice the best stable_node
|
||||
"dup" candidate will be kept and found at the head
|
||||
of the "dups" list. The higher this value the
|
||||
faster KSM will merge the memory (because there
|
||||
will be fewer stable_node dups queued into the
|
||||
stable_node chain->hlist to check for pruning) and
|
||||
the higher the deduplication factor will be, but
|
||||
the slowest the worst case rmap walk could be for
|
||||
any given KSM page. Slowing down the rmap_walk
|
||||
means there will be higher latency for certain
|
||||
virtual memory operations happening during
|
||||
swapping, compaction, NUMA balancing and page
|
||||
migration, in turn decreasing responsiveness for
|
||||
the caller of those virtual memory operations. The
|
||||
scheduler latency of other tasks not involved with
|
||||
the VM operations doing the rmap walk is not
|
||||
affected by this parameter as the rmap walks are
|
||||
always schedule friendly themselves.
|
||||
|
||||
stable_node_chains_prune_millisecs - How frequently to walk the whole
|
||||
list of stable_node "dups" linked in the
|
||||
stable_node "chains" in order to prune stale
|
||||
stable_nodes. Smaller milllisecs values will free
|
||||
up the KSM metadata with lower latency, but they
|
||||
will make ksmd use more CPU during the scan. This
|
||||
only applies to the stable_node chains so it's a
|
||||
noop if not a single KSM page hit the
|
||||
max_page_sharing yet (there would be no stable_node
|
||||
chains in such case).
|
||||
|
||||
The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
|
||||
|
||||
pages_shared - how many shared pages are being used
|
||||
@@ -106,10 +150,29 @@ pages_unshared - how many pages unique but repeatedly checked for merging
|
||||
pages_volatile - how many pages changing too fast to be placed in a tree
|
||||
full_scans - how many times all mergeable areas have been scanned
|
||||
|
||||
stable_node_chains - number of stable node chains allocated, this is
|
||||
effectively the number of KSM pages that hit the
|
||||
max_page_sharing limit
|
||||
stable_node_dups - number of stable node dups queued into the
|
||||
stable_node chains
|
||||
|
||||
A high ratio of pages_sharing to pages_shared indicates good sharing, but
|
||||
a high ratio of pages_unshared to pages_sharing indicates wasted effort.
|
||||
pages_volatile embraces several different kinds of activity, but a high
|
||||
proportion there would also indicate poor use of madvise MADV_MERGEABLE.
|
||||
|
||||
The maximum possible page_sharing/page_shared ratio is limited by the
|
||||
max_page_sharing tunable. To increase the ratio max_page_sharing must
|
||||
be increased accordingly.
|
||||
|
||||
The stable_node_dups/stable_node_chains ratio is also affected by the
|
||||
max_page_sharing tunable, and an high ratio may indicate fragmentation
|
||||
in the stable_node dups, which could be solved by introducing
|
||||
fragmentation algorithms in ksmd which would refile rmap_items from
|
||||
one stable_node dup to another stable_node dup, in order to freeup
|
||||
stable_node "dups" with few rmap_items in them, but that may increase
|
||||
the ksmd CPU usage and possibly slowdown the readonly computations on
|
||||
the KSM pages of the applications.
|
||||
|
||||
Izik Eidus,
|
||||
Hugh Dickins, 17 Nov 2009
|
||||
|
||||
+1
-1
@@ -13,7 +13,7 @@ config ARM64
|
||||
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
||||
select ARCH_HAS_KCOV
|
||||
select ARCH_HAS_SET_MEMORY
|
||||
select ARCH_HAS_SG_CHAIN
|
||||
|
||||
@@ -83,4 +83,8 @@ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||
extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep);
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static inline bool gigantic_page_supported(void) { return true; }
|
||||
#endif
|
||||
|
||||
#endif /* __ASM_HUGETLB_H */
|
||||
|
||||
+22
-31
@@ -42,15 +42,13 @@ int pud_huge(pud_t pud)
|
||||
}
|
||||
|
||||
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, size_t *pgsize)
|
||||
pte_t *ptep, size_t *pgsize)
|
||||
{
|
||||
pgd_t *pgd = pgd_offset(mm, addr);
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
*pgsize = PAGE_SIZE;
|
||||
if (!pte_cont(pte))
|
||||
return 1;
|
||||
pud = pud_offset(pgd, addr);
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if ((pte_t *)pmd == ptep) {
|
||||
@@ -65,15 +63,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
{
|
||||
size_t pgsize;
|
||||
int i;
|
||||
int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize);
|
||||
int ncontig;
|
||||
unsigned long pfn;
|
||||
pgprot_t hugeprot;
|
||||
|
||||
if (ncontig == 1) {
|
||||
if (!pte_cont(pte)) {
|
||||
set_pte_at(mm, addr, ptep, pte);
|
||||
return;
|
||||
}
|
||||
|
||||
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
|
||||
pfn = pte_pfn(pte);
|
||||
hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
|
||||
for (i = 0; i < ncontig; i++) {
|
||||
@@ -132,7 +131,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
@@ -184,21 +184,19 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
||||
if (pte_cont(*ptep)) {
|
||||
int ncontig, i;
|
||||
size_t pgsize;
|
||||
pte_t *cpte;
|
||||
bool is_dirty = false;
|
||||
|
||||
cpte = huge_pte_offset(mm, addr);
|
||||
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
|
||||
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
|
||||
/* save the 1st pte to return */
|
||||
pte = ptep_get_and_clear(mm, addr, cpte);
|
||||
pte = ptep_get_and_clear(mm, addr, ptep);
|
||||
for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
|
||||
/*
|
||||
* If HW_AFDBM is enabled, then the HW could
|
||||
* turn on the dirty bit for any of the page
|
||||
* in the set, so check them all.
|
||||
*/
|
||||
++cpte;
|
||||
if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
|
||||
++ptep;
|
||||
if (pte_dirty(ptep_get_and_clear(mm, addr, ptep)))
|
||||
is_dirty = true;
|
||||
}
|
||||
if (is_dirty)
|
||||
@@ -214,8 +212,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
pte_t pte, int dirty)
|
||||
{
|
||||
pte_t *cpte;
|
||||
|
||||
if (pte_cont(pte)) {
|
||||
int ncontig, i, changed = 0;
|
||||
size_t pgsize = 0;
|
||||
@@ -225,12 +221,11 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
|
||||
__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
|
||||
pte_val(pte));
|
||||
|
||||
cpte = huge_pte_offset(vma->vm_mm, addr);
|
||||
pfn = pte_pfn(*cpte);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
|
||||
*cpte, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
|
||||
changed |= ptep_set_access_flags(vma, addr, cpte,
|
||||
pfn = pte_pfn(pte);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, ptep,
|
||||
&pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize) {
|
||||
changed |= ptep_set_access_flags(vma, addr, ptep,
|
||||
pfn_pte(pfn,
|
||||
hugeprot),
|
||||
dirty);
|
||||
@@ -247,13 +242,11 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
||||
{
|
||||
if (pte_cont(*ptep)) {
|
||||
int ncontig, i;
|
||||
pte_t *cpte;
|
||||
size_t pgsize = 0;
|
||||
|
||||
cpte = huge_pte_offset(mm, addr);
|
||||
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
|
||||
ptep_set_wrprotect(mm, addr, cpte);
|
||||
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
|
||||
ptep_set_wrprotect(mm, addr, ptep);
|
||||
} else {
|
||||
ptep_set_wrprotect(mm, addr, ptep);
|
||||
}
|
||||
@@ -264,14 +257,12 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
|
||||
{
|
||||
if (pte_cont(*ptep)) {
|
||||
int ncontig, i;
|
||||
pte_t *cpte;
|
||||
size_t pgsize = 0;
|
||||
|
||||
cpte = huge_pte_offset(vma->vm_mm, addr);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
|
||||
*cpte, &pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
|
||||
ptep_clear_flush(vma, addr, cpte);
|
||||
ncontig = find_num_contig(vma->vm_mm, addr, ptep,
|
||||
&pgsize);
|
||||
for (i = 0; i < ncontig; ++i, ++ptep, addr += pgsize)
|
||||
ptep_clear_flush(vma, addr, ptep);
|
||||
} else {
|
||||
ptep_clear_flush(vma, addr, ptep);
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
/*
|
||||
* Page table definitions for Qualcomm Hexagon processor.
|
||||
*/
|
||||
#include <linux/swap.h>
|
||||
#include <asm/page.h>
|
||||
#define __ARCH_USE_5LEVEL_HACK
|
||||
#include <asm-generic/pgtable-nopmd.h>
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include <linux/compat.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kbuild.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
* be instantiated for it, differently from a native build.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/hexagon_vm.h>
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
}
|
||||
|
||||
pte_t *
|
||||
huge_pte_offset (struct mm_struct *mm, unsigned long addr)
|
||||
huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
{
|
||||
unsigned long taddr = htlbpage_to_page(addr);
|
||||
pgd_t *pgd;
|
||||
@@ -92,7 +92,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ
|
||||
if (REGION_NUMBER(addr) != RGN_HPAGE)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
ptep = huge_pte_offset(mm, addr);
|
||||
ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
|
||||
if (!ptep || pte_none(*ptep))
|
||||
return NULL;
|
||||
page = pte_page(*ptep);
|
||||
|
||||
+2
-9
@@ -646,20 +646,13 @@ mem_init (void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
||||
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
|
||||
{
|
||||
pg_data_t *pgdat;
|
||||
struct zone *zone;
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
int ret;
|
||||
|
||||
pgdat = NODE_DATA(nid);
|
||||
|
||||
zone = pgdat->node_zones +
|
||||
zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
|
||||
ret = __add_pages(nid, zone, start_pfn, nr_pages);
|
||||
|
||||
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
|
||||
if (ret)
|
||||
printk("%s: Problem encountered in __add_pages() as ret=%d\n",
|
||||
__func__, ret);
|
||||
|
||||
@@ -74,7 +74,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
||||
@@ -36,7 +36,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
|
||||
unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
|
||||
generic-y += barrier.h
|
||||
generic-y += clkdev.h
|
||||
generic-y += device.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
#include <asm-generic/device.h>
|
||||
@@ -1,23 +0,0 @@
|
||||
/* MN10300 Frame buffer stuff
|
||||
*
|
||||
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public Licence
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the Licence, or (at your option) any later version.
|
||||
*/
|
||||
#ifndef _ASM_FB_H
|
||||
#define _ASM_FB_H
|
||||
|
||||
#include <linux/fb.h>
|
||||
|
||||
#define fb_pgprotect(...) do {} while (0)
|
||||
|
||||
static inline int fb_is_primary_device(struct fb_info *info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _ASM_FB_H */
|
||||
@@ -69,7 +69,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm,
|
||||
unsigned long addr, unsigned long sz)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
||||
@@ -50,4 +50,14 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
|
||||
static inline bool gigantic_page_supported(void)
|
||||
{
|
||||
if (radix_enabled())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlb.h>
|
||||
@@ -55,7 +57,7 @@ static unsigned nr_gpages;
|
||||
|
||||
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
|
||||
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
||||
{
|
||||
/* Only called for hugetlbfs pages, hence can ignore THP */
|
||||
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
|
||||
@@ -617,62 +619,39 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
|
||||
} while (addr = next, addr != end);
|
||||
}
|
||||
|
||||
/*
|
||||
* We are holding mmap_sem, so a parallel huge page collapse cannot run.
|
||||
* To prevent hugepage split, disable irq.
|
||||
*/
|
||||
struct page *
|
||||
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
|
||||
struct page *follow_huge_pd(struct vm_area_struct *vma,
|
||||
unsigned long address, hugepd_t hpd,
|
||||
int flags, int pdshift)
|
||||
{
|
||||
bool is_thp;
|
||||
pte_t *ptep, pte;
|
||||
unsigned shift;
|
||||
unsigned long mask, flags;
|
||||
struct page *page = ERR_PTR(-EINVAL);
|
||||
pte_t *ptep;
|
||||
spinlock_t *ptl;
|
||||
struct page *page = NULL;
|
||||
unsigned long mask;
|
||||
int shift = hugepd_shift(hpd);
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
local_irq_save(flags);
|
||||
ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift);
|
||||
if (!ptep)
|
||||
goto no_page;
|
||||
pte = READ_ONCE(*ptep);
|
||||
/*
|
||||
* Verify it is a huge page else bail.
|
||||
* Transparent hugepages are handled by generic code. We can skip them
|
||||
* here.
|
||||
*/
|
||||
if (!shift || is_thp)
|
||||
goto no_page;
|
||||
retry:
|
||||
ptl = &mm->page_table_lock;
|
||||
spin_lock(ptl);
|
||||
|
||||
if (!pte_present(pte)) {
|
||||
page = NULL;
|
||||
goto no_page;
|
||||
ptep = hugepte_offset(hpd, address, pdshift);
|
||||
if (pte_present(*ptep)) {
|
||||
mask = (1UL << shift) - 1;
|
||||
page = pte_page(*ptep);
|
||||
page += ((address & mask) >> PAGE_SHIFT);
|
||||
if (flags & FOLL_GET)
|
||||
get_page(page);
|
||||
} else {
|
||||
if (is_hugetlb_entry_migration(*ptep)) {
|
||||
spin_unlock(ptl);
|
||||
__migration_entry_wait(mm, ptep, ptl);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
mask = (1UL << shift) - 1;
|
||||
page = pte_page(pte);
|
||||
if (page)
|
||||
page += (address & mask) / PAGE_SIZE;
|
||||
|
||||
no_page:
|
||||
local_irq_restore(flags);
|
||||
spin_unlock(ptl);
|
||||
return page;
|
||||
}
|
||||
|
||||
struct page *
|
||||
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pmd_t *pmd, int write)
|
||||
{
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct page *
|
||||
follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
pud_t *pud, int write)
|
||||
{
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
|
||||
unsigned long sz)
|
||||
{
|
||||
@@ -763,8 +742,11 @@ static int __init add_huge_page_size(unsigned long long size)
|
||||
* Hash: 16M and 16G
|
||||
*/
|
||||
if (radix_enabled()) {
|
||||
if (mmu_psize != MMU_PAGE_2M)
|
||||
return -EINVAL;
|
||||
if (mmu_psize != MMU_PAGE_2M) {
|
||||
if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
|
||||
(mmu_psize != MMU_PAGE_1G))
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
|
||||
return -EINVAL;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user