You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'akpm' (patches from Andrew Morton)
Merge more patches from Andrew Morton: "The rest of MM. Plus one misc cleanup" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (35 commits) mm/Kconfig: add MMU dependency for MIGRATION. kernel: replace strict_strto*() with kstrto*() mm, thp: count thp_fault_fallback anytime thp fault fails thp: consolidate code between handle_mm_fault() and do_huge_pmd_anonymous_page() thp: do_huge_pmd_anonymous_page() cleanup thp: move maybe_pmd_mkwrite() out of mk_huge_pmd() mm: cleanup add_to_page_cache_locked() thp: account anon transparent huge pages into NR_ANON_PAGES truncate: drop 'oldsize' truncate_pagecache() parameter mm: make lru_add_drain_all() selective memcg: document cgroup dirty/writeback memory statistics memcg: add per cgroup writeback pages accounting memcg: check for proper lock held in mem_cgroup_update_page_stat memcg: remove MEMCG_NR_FILE_MAPPED memcg: reduce function dereference memcg: avoid overflow caused by PAGE_ALIGN memcg: rename RESOURCE_MAX to RES_COUNTER_MAX memcg: correct RESOURCE_MAX to ULLONG_MAX mm: memcg: do not trap chargers with full callstack on OOM mm: memcg: rework and document OOM waiting and wakeup ...
This commit is contained in:
+2
-2
@@ -245,7 +245,7 @@ config COMPACTION
|
||||
config MIGRATION
|
||||
bool "Page migration"
|
||||
def_bool y
|
||||
depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA
|
||||
depends on (NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA) && MMU
|
||||
help
|
||||
Allows the migration of the physical location of pages of processes
|
||||
while the virtual addresses are not changed. This is useful in
|
||||
@@ -480,7 +480,7 @@ config FRONTSWAP
|
||||
|
||||
config CMA
|
||||
bool "Contiguous Memory Allocator"
|
||||
depends on HAVE_MEMBLOCK
|
||||
depends on HAVE_MEMBLOCK && MMU
|
||||
select MIGRATION
|
||||
select MEMORY_ISOLATION
|
||||
help
|
||||
|
||||
+35
-24
@@ -467,32 +467,34 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
|
||||
error = mem_cgroup_cache_charge(page, current->mm,
|
||||
gfp_mask & GFP_RECLAIM_MASK);
|
||||
if (error)
|
||||
goto out;
|
||||
return error;
|
||||
|
||||
error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
|
||||
if (error == 0) {
|
||||
page_cache_get(page);
|
||||
page->mapping = mapping;
|
||||
page->index = offset;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
error = radix_tree_insert(&mapping->page_tree, offset, page);
|
||||
if (likely(!error)) {
|
||||
mapping->nrpages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
trace_mm_filemap_add_to_page_cache(page);
|
||||
} else {
|
||||
page->mapping = NULL;
|
||||
/* Leave page->index set: truncation relies upon it */
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
mem_cgroup_uncharge_cache_page(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
radix_tree_preload_end();
|
||||
} else
|
||||
if (error) {
|
||||
mem_cgroup_uncharge_cache_page(page);
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
page_cache_get(page);
|
||||
page->mapping = mapping;
|
||||
page->index = offset;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
error = radix_tree_insert(&mapping->page_tree, offset, page);
|
||||
radix_tree_preload_end();
|
||||
if (unlikely(error))
|
||||
goto err_insert;
|
||||
mapping->nrpages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
trace_mm_filemap_add_to_page_cache(page);
|
||||
return 0;
|
||||
err_insert:
|
||||
page->mapping = NULL;
|
||||
/* Leave page->index set: truncation relies upon it */
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
mem_cgroup_uncharge_cache_page(page);
|
||||
page_cache_release(page);
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(add_to_page_cache_locked);
|
||||
@@ -1614,6 +1616,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
struct inode *inode = mapping->host;
|
||||
pgoff_t offset = vmf->pgoff;
|
||||
struct page *page;
|
||||
bool memcg_oom;
|
||||
pgoff_t size;
|
||||
int ret = 0;
|
||||
|
||||
@@ -1622,7 +1625,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
/*
|
||||
* Do we have something in the page cache already?
|
||||
* Do we have something in the page cache already? Either
|
||||
* way, try readahead, but disable the memcg OOM killer for it
|
||||
* as readahead is optional and no errors are propagated up
|
||||
* the fault stack. The OOM killer is enabled while trying to
|
||||
* instantiate the faulting page individually below.
|
||||
*/
|
||||
page = find_get_page(mapping, offset);
|
||||
if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
|
||||
@@ -1630,10 +1637,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
* We found the page, so try async readahead before
|
||||
* waiting for the lock.
|
||||
*/
|
||||
memcg_oom = mem_cgroup_toggle_oom(false);
|
||||
do_async_mmap_readahead(vma, ra, file, page, offset);
|
||||
mem_cgroup_toggle_oom(memcg_oom);
|
||||
} else if (!page) {
|
||||
/* No page in the page cache at all */
|
||||
memcg_oom = mem_cgroup_toggle_oom(false);
|
||||
do_sync_mmap_readahead(vma, ra, file, offset);
|
||||
mem_cgroup_toggle_oom(memcg_oom);
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
||||
ret = VM_FAULT_MAJOR;
|
||||
|
||||
+56
-73
@@ -695,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma)
|
||||
static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
|
||||
{
|
||||
pmd_t entry;
|
||||
entry = mk_pmd(page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
entry = mk_pmd(page, prot);
|
||||
entry = pmd_mkhuge(entry);
|
||||
return entry;
|
||||
}
|
||||
@@ -732,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
|
||||
pte_free(mm, pgtable);
|
||||
} else {
|
||||
pmd_t entry;
|
||||
entry = mk_huge_pmd(page, vma);
|
||||
entry = mk_huge_pmd(page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
page_add_new_anon_rmap(page, vma, haddr);
|
||||
pgtable_trans_huge_deposit(mm, pmd, pgtable);
|
||||
set_pmd_at(mm, haddr, pmd, entry);
|
||||
@@ -788,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long haddr = address & HPAGE_PMD_MASK;
|
||||
pte_t *pte;
|
||||
|
||||
if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
|
||||
if (unlikely(anon_vma_prepare(vma)))
|
||||
if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
|
||||
return VM_FAULT_FALLBACK;
|
||||
if (unlikely(anon_vma_prepare(vma)))
|
||||
return VM_FAULT_OOM;
|
||||
if (unlikely(khugepaged_enter(vma)))
|
||||
return VM_FAULT_OOM;
|
||||
if (!(flags & FAULT_FLAG_WRITE) &&
|
||||
transparent_hugepage_use_zero_page()) {
|
||||
pgtable_t pgtable;
|
||||
struct page *zero_page;
|
||||
bool set;
|
||||
pgtable = pte_alloc_one(mm, haddr);
|
||||
if (unlikely(!pgtable))
|
||||
return VM_FAULT_OOM;
|
||||
if (unlikely(khugepaged_enter(vma)))
|
||||
return VM_FAULT_OOM;
|
||||
if (!(flags & FAULT_FLAG_WRITE) &&
|
||||
transparent_hugepage_use_zero_page()) {
|
||||
pgtable_t pgtable;
|
||||
struct page *zero_page;
|
||||
bool set;
|
||||
pgtable = pte_alloc_one(mm, haddr);
|
||||
if (unlikely(!pgtable))
|
||||
return VM_FAULT_OOM;
|
||||
zero_page = get_huge_zero_page();
|
||||
if (unlikely(!zero_page)) {
|
||||
pte_free(mm, pgtable);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
goto out;
|
||||
}
|
||||
spin_lock(&mm->page_table_lock);
|
||||
set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
|
||||
zero_page);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
if (!set) {
|
||||
pte_free(mm, pgtable);
|
||||
put_huge_zero_page();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
|
||||
vma, haddr, numa_node_id(), 0);
|
||||
if (unlikely(!page)) {
|
||||
zero_page = get_huge_zero_page();
|
||||
if (unlikely(!zero_page)) {
|
||||
pte_free(mm, pgtable);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
goto out;
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
count_vm_event(THP_FAULT_ALLOC);
|
||||
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
|
||||
put_page(page);
|
||||
goto out;
|
||||
spin_lock(&mm->page_table_lock);
|
||||
set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
|
||||
zero_page);
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
if (!set) {
|
||||
pte_free(mm, pgtable);
|
||||
put_huge_zero_page();
|
||||
}
|
||||
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
|
||||
page))) {
|
||||
mem_cgroup_uncharge_page(page);
|
||||
put_page(page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
out:
|
||||
/*
|
||||
* Use __pte_alloc instead of pte_alloc_map, because we can't
|
||||
* run pte_offset_map on the pmd, if an huge pmd could
|
||||
* materialize from under us from a different thread.
|
||||
*/
|
||||
if (unlikely(pmd_none(*pmd)) &&
|
||||
unlikely(__pte_alloc(mm, vma, pmd, address)))
|
||||
return VM_FAULT_OOM;
|
||||
/* if an huge pmd materialized from under us just retry later */
|
||||
if (unlikely(pmd_trans_huge(*pmd)))
|
||||
return 0;
|
||||
/*
|
||||
* A regular pmd is established and it can't morph into a huge pmd
|
||||
* from under us anymore at this point because we hold the mmap_sem
|
||||
* read mode and khugepaged takes it in write mode. So now it's
|
||||
* safe to run pte_offset_map().
|
||||
*/
|
||||
pte = pte_offset_map(pmd, address);
|
||||
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
|
||||
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
|
||||
vma, haddr, numa_node_id(), 0);
|
||||
if (unlikely(!page)) {
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
|
||||
put_page(page);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
|
||||
mem_cgroup_uncharge_page(page);
|
||||
put_page(page);
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
||||
count_vm_event(THP_FAULT_ALLOC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
@@ -1170,7 +1150,6 @@ alloc:
|
||||
new_page = NULL;
|
||||
|
||||
if (unlikely(!new_page)) {
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
if (is_huge_zero_pmd(orig_pmd)) {
|
||||
ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
|
||||
address, pmd, orig_pmd, haddr);
|
||||
@@ -1181,9 +1160,9 @@ alloc:
|
||||
split_huge_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
goto out;
|
||||
}
|
||||
count_vm_event(THP_FAULT_ALLOC);
|
||||
|
||||
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
|
||||
put_page(new_page);
|
||||
@@ -1191,10 +1170,13 @@ alloc:
|
||||
split_huge_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
count_vm_event(THP_FAULT_FALLBACK);
|
||||
ret |= VM_FAULT_OOM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
count_vm_event(THP_FAULT_ALLOC);
|
||||
|
||||
if (is_huge_zero_pmd(orig_pmd))
|
||||
clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
|
||||
else
|
||||
@@ -1215,7 +1197,8 @@ alloc:
|
||||
goto out_mn;
|
||||
} else {
|
||||
pmd_t entry;
|
||||
entry = mk_huge_pmd(new_page, vma);
|
||||
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
pmdp_clear_flush(vma, haddr, pmd);
|
||||
page_add_new_anon_rmap(new_page, vma, haddr);
|
||||
set_pmd_at(mm, haddr, pmd, entry);
|
||||
@@ -1666,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page,
|
||||
BUG_ON(atomic_read(&page->_count) <= 0);
|
||||
|
||||
__mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
|
||||
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
|
||||
|
||||
ClearPageCompound(page);
|
||||
compound_unlock(page);
|
||||
@@ -2364,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||
__SetPageUptodate(new_page);
|
||||
pgtable = pmd_pgtable(_pmd);
|
||||
|
||||
_pmd = mk_huge_pmd(new_page, vma);
|
||||
_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
|
||||
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
|
||||
|
||||
/*
|
||||
* spin_lock() below is not the equivalent of smp_wmb(), so
|
||||
|
||||
+336
-533
File diff suppressed because it is too large
Load Diff
+39
-13
@@ -3695,7 +3695,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
* but allow concurrent faults), and pte mapped but not yet locked.
|
||||
* We return with mmap_sem still held, but pte unmapped and unlocked.
|
||||
*/
|
||||
int handle_pte_fault(struct mm_struct *mm,
|
||||
static int handle_pte_fault(struct mm_struct *mm,
|
||||
struct vm_area_struct *vma, unsigned long address,
|
||||
pte_t *pte, pmd_t *pmd, unsigned int flags)
|
||||
{
|
||||
@@ -3754,22 +3754,14 @@ unlock:
|
||||
/*
|
||||
* By the time we get here, we already hold the mm semaphore
|
||||
*/
|
||||
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags)
|
||||
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
count_vm_event(PGFAULT);
|
||||
mem_cgroup_count_vm_event(mm, PGFAULT);
|
||||
|
||||
/* do counter updates before entering really critical section. */
|
||||
check_sync_rss_stat(current);
|
||||
|
||||
if (unlikely(is_vm_hugetlb_page(vma)))
|
||||
return hugetlb_fault(mm, vma, address, flags);
|
||||
|
||||
@@ -3782,9 +3774,12 @@ retry:
|
||||
if (!pmd)
|
||||
return VM_FAULT_OOM;
|
||||
if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
|
||||
int ret = VM_FAULT_FALLBACK;
|
||||
if (!vma->vm_ops)
|
||||
return do_huge_pmd_anonymous_page(mm, vma, address,
|
||||
pmd, flags);
|
||||
ret = do_huge_pmd_anonymous_page(mm, vma, address,
|
||||
pmd, flags);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
return ret;
|
||||
} else {
|
||||
pmd_t orig_pmd = *pmd;
|
||||
int ret;
|
||||
@@ -3850,6 +3845,37 @@ retry:
|
||||
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
|
||||
}
|
||||
|
||||
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long address, unsigned int flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
count_vm_event(PGFAULT);
|
||||
mem_cgroup_count_vm_event(mm, PGFAULT);
|
||||
|
||||
/* do counter updates before entering really critical section. */
|
||||
check_sync_rss_stat(current);
|
||||
|
||||
/*
|
||||
* Enable the memcg OOM handling for faults triggered in user
|
||||
* space. Kernel faults are handled more gracefully.
|
||||
*/
|
||||
if (flags & FAULT_FLAG_USER)
|
||||
mem_cgroup_enable_oom();
|
||||
|
||||
ret = __handle_mm_fault(mm, vma, address, flags);
|
||||
|
||||
if (flags & FAULT_FLAG_USER)
|
||||
mem_cgroup_disable_oom();
|
||||
|
||||
if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
|
||||
mem_cgroup_oom_synchronize();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef __PAGETABLE_PUD_FOLDED
|
||||
/*
|
||||
* Allocate page upper directory.
|
||||
|
||||
+5
-2
@@ -678,9 +678,12 @@ out:
|
||||
*/
|
||||
void pagefault_out_of_memory(void)
|
||||
{
|
||||
struct zonelist *zonelist = node_zonelist(first_online_node,
|
||||
GFP_KERNEL);
|
||||
struct zonelist *zonelist;
|
||||
|
||||
if (mem_cgroup_oom_synchronize())
|
||||
return;
|
||||
|
||||
zonelist = node_zonelist(first_online_node, GFP_KERNEL);
|
||||
if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
|
||||
out_of_memory(NULL, 0, 0, NULL, false);
|
||||
clear_zonelist_oom(zonelist, GFP_KERNEL);
|
||||
|
||||
@@ -2143,11 +2143,17 @@ EXPORT_SYMBOL(account_page_dirtied);
|
||||
|
||||
/*
|
||||
* Helper function for set_page_writeback family.
|
||||
*
|
||||
* The caller must hold mem_cgroup_begin/end_update_page_stat() lock
|
||||
* while calling this function.
|
||||
* See test_set_page_writeback for example.
|
||||
*
|
||||
* NOTE: Unlike account_page_dirtied this does not rely on being atomic
|
||||
* wrt interrupts.
|
||||
*/
|
||||
void account_page_writeback(struct page *page)
|
||||
{
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
|
||||
inc_zone_page_state(page, NR_WRITEBACK);
|
||||
}
|
||||
EXPORT_SYMBOL(account_page_writeback);
|
||||
@@ -2364,7 +2370,10 @@ int test_clear_page_writeback(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
int ret;
|
||||
bool locked;
|
||||
unsigned long memcg_flags;
|
||||
|
||||
mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
|
||||
if (mapping) {
|
||||
struct backing_dev_info *bdi = mapping->backing_dev_info;
|
||||
unsigned long flags;
|
||||
@@ -2385,9 +2394,11 @@ int test_clear_page_writeback(struct page *page)
|
||||
ret = TestClearPageWriteback(page);
|
||||
}
|
||||
if (ret) {
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
|
||||
dec_zone_page_state(page, NR_WRITEBACK);
|
||||
inc_zone_page_state(page, NR_WRITTEN);
|
||||
}
|
||||
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2395,7 +2406,10 @@ int test_set_page_writeback(struct page *page)
|
||||
{
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
int ret;
|
||||
bool locked;
|
||||
unsigned long memcg_flags;
|
||||
|
||||
mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
|
||||
if (mapping) {
|
||||
struct backing_dev_info *bdi = mapping->backing_dev_info;
|
||||
unsigned long flags;
|
||||
@@ -2422,6 +2436,7 @@ int test_set_page_writeback(struct page *page)
|
||||
}
|
||||
if (!ret)
|
||||
account_page_writeback(page);
|
||||
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
@@ -1052,11 +1052,11 @@ void do_page_add_anon_rmap(struct page *page,
|
||||
{
|
||||
int first = atomic_inc_and_test(&page->_mapcount);
|
||||
if (first) {
|
||||
if (!PageTransHuge(page))
|
||||
__inc_zone_page_state(page, NR_ANON_PAGES);
|
||||
else
|
||||
if (PageTransHuge(page))
|
||||
__inc_zone_page_state(page,
|
||||
NR_ANON_TRANSPARENT_HUGEPAGES);
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
|
||||
hpage_nr_pages(page));
|
||||
}
|
||||
if (unlikely(PageKsm(page)))
|
||||
return;
|
||||
@@ -1085,10 +1085,10 @@ void page_add_new_anon_rmap(struct page *page,
|
||||
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
|
||||
SetPageSwapBacked(page);
|
||||
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
|
||||
if (!PageTransHuge(page))
|
||||
__inc_zone_page_state(page, NR_ANON_PAGES);
|
||||
else
|
||||
if (PageTransHuge(page))
|
||||
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
|
||||
hpage_nr_pages(page));
|
||||
__page_set_anon_rmap(page, vma, address, 1);
|
||||
if (!mlocked_vma_newpage(vma, page)) {
|
||||
SetPageActive(page);
|
||||
@@ -1111,7 +1111,7 @@ void page_add_file_rmap(struct page *page)
|
||||
mem_cgroup_begin_update_page_stat(page, &locked, &flags);
|
||||
if (atomic_inc_and_test(&page->_mapcount)) {
|
||||
__inc_zone_page_state(page, NR_FILE_MAPPED);
|
||||
mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
|
||||
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
}
|
||||
mem_cgroup_end_update_page_stat(page, &locked, &flags);
|
||||
}
|
||||
@@ -1148,14 +1148,14 @@ void page_remove_rmap(struct page *page)
|
||||
goto out;
|
||||
if (anon) {
|
||||
mem_cgroup_uncharge_page(page);
|
||||
if (!PageTransHuge(page))
|
||||
__dec_zone_page_state(page, NR_ANON_PAGES);
|
||||
else
|
||||
if (PageTransHuge(page))
|
||||
__dec_zone_page_state(page,
|
||||
NR_ANON_TRANSPARENT_HUGEPAGES);
|
||||
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
|
||||
-hpage_nr_pages(page));
|
||||
} else {
|
||||
__dec_zone_page_state(page, NR_FILE_MAPPED);
|
||||
mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
|
||||
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
|
||||
mem_cgroup_end_update_page_stat(page, &locked, &flags);
|
||||
}
|
||||
if (unlikely(PageMlocked(page)))
|
||||
|
||||
@@ -432,6 +432,11 @@ static void activate_page_drain(int cpu)
|
||||
pagevec_lru_move_fn(pvec, __activate_page, NULL);
|
||||
}
|
||||
|
||||
static bool need_activate_page_drain(int cpu)
|
||||
{
|
||||
return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
|
||||
}
|
||||
|
||||
void activate_page(struct page *page)
|
||||
{
|
||||
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
|
||||
@@ -449,6 +454,11 @@ static inline void activate_page_drain(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
static bool need_activate_page_drain(int cpu)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void activate_page(struct page *page)
|
||||
{
|
||||
struct zone *zone = page_zone(page);
|
||||
@@ -701,12 +711,36 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
|
||||
lru_add_drain();
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 0 for success
|
||||
*/
|
||||
int lru_add_drain_all(void)
|
||||
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
|
||||
|
||||
void lru_add_drain_all(void)
|
||||
{
|
||||
return schedule_on_each_cpu(lru_add_drain_per_cpu);
|
||||
static DEFINE_MUTEX(lock);
|
||||
static struct cpumask has_work;
|
||||
int cpu;
|
||||
|
||||
mutex_lock(&lock);
|
||||
get_online_cpus();
|
||||
cpumask_clear(&has_work);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
|
||||
|
||||
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
|
||||
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
|
||||
need_activate_page_drain(cpu)) {
|
||||
INIT_WORK(work, lru_add_drain_per_cpu);
|
||||
schedule_work_on(cpu, work);
|
||||
cpumask_set_cpu(cpu, &has_work);
|
||||
}
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, &has_work)
|
||||
flush_work(&per_cpu(lru_add_drain_work, cpu));
|
||||
|
||||
put_online_cpus();
|
||||
mutex_unlock(&lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
+2
-7
@@ -567,7 +567,6 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
|
||||
/**
|
||||
* truncate_pagecache - unmap and remove pagecache that has been truncated
|
||||
* @inode: inode
|
||||
* @oldsize: old file size
|
||||
* @newsize: new file size
|
||||
*
|
||||
* inode's new i_size must already be written before truncate_pagecache
|
||||
@@ -580,7 +579,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
|
||||
* situations such as writepage being called for a page that has already
|
||||
* had its underlying blocks deallocated.
|
||||
*/
|
||||
void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
|
||||
void truncate_pagecache(struct inode *inode, loff_t newsize)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
loff_t holebegin = round_up(newsize, PAGE_SIZE);
|
||||
@@ -614,12 +613,8 @@ EXPORT_SYMBOL(truncate_pagecache);
|
||||
*/
|
||||
void truncate_setsize(struct inode *inode, loff_t newsize)
|
||||
{
|
||||
loff_t oldsize;
|
||||
|
||||
oldsize = inode->i_size;
|
||||
i_size_write(inode, newsize);
|
||||
|
||||
truncate_pagecache(inode, oldsize, newsize);
|
||||
truncate_pagecache(inode, newsize);
|
||||
}
|
||||
EXPORT_SYMBOL(truncate_setsize);
|
||||
|
||||
|
||||
+52
-31
@@ -139,11 +139,23 @@ static bool global_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return !sc->target_mem_cgroup;
|
||||
}
|
||||
|
||||
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *root = sc->target_mem_cgroup;
|
||||
return !mem_cgroup_disabled() &&
|
||||
mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
|
||||
}
|
||||
#else
|
||||
static bool global_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned long zone_reclaimable_pages(struct zone *zone)
|
||||
@@ -2164,9 +2176,11 @@ static inline bool should_continue_reclaim(struct zone *zone,
|
||||
}
|
||||
}
|
||||
|
||||
static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
static int
|
||||
__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
||||
{
|
||||
unsigned long nr_reclaimed, nr_scanned;
|
||||
int groups_scanned = 0;
|
||||
|
||||
do {
|
||||
struct mem_cgroup *root = sc->target_mem_cgroup;
|
||||
@@ -2174,15 +2188,17 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
.zone = zone,
|
||||
.priority = sc->priority,
|
||||
};
|
||||
struct mem_cgroup *memcg;
|
||||
struct mem_cgroup *memcg = NULL;
|
||||
mem_cgroup_iter_filter filter = (soft_reclaim) ?
|
||||
mem_cgroup_soft_reclaim_eligible : NULL;
|
||||
|
||||
nr_reclaimed = sc->nr_reclaimed;
|
||||
nr_scanned = sc->nr_scanned;
|
||||
|
||||
memcg = mem_cgroup_iter(root, NULL, &reclaim);
|
||||
do {
|
||||
while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
groups_scanned++;
|
||||
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
||||
|
||||
shrink_lruvec(lruvec, sc);
|
||||
@@ -2202,8 +2218,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
mem_cgroup_iter_break(root, memcg);
|
||||
break;
|
||||
}
|
||||
memcg = mem_cgroup_iter(root, memcg, &reclaim);
|
||||
} while (memcg);
|
||||
}
|
||||
|
||||
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
|
||||
sc->nr_scanned - nr_scanned,
|
||||
@@ -2211,6 +2226,37 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
|
||||
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
|
||||
sc->nr_scanned - nr_scanned, sc));
|
||||
|
||||
return groups_scanned;
|
||||
}
|
||||
|
||||
|
||||
static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
{
|
||||
bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
|
||||
unsigned long nr_scanned = sc->nr_scanned;
|
||||
int scanned_groups;
|
||||
|
||||
scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
|
||||
/*
|
||||
* memcg iterator might race with other reclaimer or start from
|
||||
* a incomplete tree walk so the tree walk in __shrink_zone
|
||||
* might have missed groups that are above the soft limit. Try
|
||||
* another loop to catch up with others. Do it just once to
|
||||
* prevent from reclaim latencies when other reclaimers always
|
||||
* preempt this one.
|
||||
*/
|
||||
if (do_soft_reclaim && !scanned_groups)
|
||||
__shrink_zone(zone, sc, do_soft_reclaim);
|
||||
|
||||
/*
|
||||
* No group is over the soft limit or those that are do not have
|
||||
* pages in the zone we are reclaiming so we have to reclaim everybody
|
||||
*/
|
||||
if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
|
||||
__shrink_zone(zone, sc, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns true if compaction should go ahead for a high-order request */
|
||||
@@ -2274,8 +2320,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
||||
{
|
||||
struct zoneref *z;
|
||||
struct zone *zone;
|
||||
unsigned long nr_soft_reclaimed;
|
||||
unsigned long nr_soft_scanned;
|
||||
bool aborted_reclaim = false;
|
||||
|
||||
/*
|
||||
@@ -2315,18 +2359,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This steals pages from memory cgroups over softlimit
|
||||
* and returns the number of reclaimed pages and
|
||||
* scanned pages. This works for global memory pressure
|
||||
* and balancing, not for a memcg's limit.
|
||||
*/
|
||||
nr_soft_scanned = 0;
|
||||
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||
sc->order, sc->gfp_mask,
|
||||
&nr_soft_scanned);
|
||||
sc->nr_reclaimed += nr_soft_reclaimed;
|
||||
sc->nr_scanned += nr_soft_scanned;
|
||||
/* need some check for avoid more shrink_zone() */
|
||||
}
|
||||
|
||||
@@ -2920,8 +2952,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
||||
{
|
||||
int i;
|
||||
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
||||
unsigned long nr_soft_reclaimed;
|
||||
unsigned long nr_soft_scanned;
|
||||
struct scan_control sc = {
|
||||
.gfp_mask = GFP_KERNEL,
|
||||
.priority = DEF_PRIORITY,
|
||||
@@ -3036,15 +3066,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
||||
|
||||
sc.nr_scanned = 0;
|
||||
|
||||
nr_soft_scanned = 0;
|
||||
/*
|
||||
* Call soft limit reclaim before calling shrink_zone.
|
||||
*/
|
||||
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||
order, sc.gfp_mask,
|
||||
&nr_soft_scanned);
|
||||
sc.nr_reclaimed += nr_soft_reclaimed;
|
||||
|
||||
/*
|
||||
* There should be no need to raise the scanning
|
||||
* priority if enough pages are already being scanned
|
||||
|
||||
Reference in New Issue
Block a user