You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "26 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits) userfaultfd: remove wrong comment from userfaultfd_ctx_get() fat: fix using uninitialized fields of fat_inode/fsinfo_inode sh: cayman: IDE support fix kasan: fix races in quarantine_remove_cache() kasan: resched in quarantine_remove_cache() mm: do not call mem_cgroup_free() from within mem_cgroup_alloc() thp: fix another corner case of munlock() vs. THPs rmap: fix NULL-pointer dereference on THP munlocking mm/memblock.c: fix memblock_next_valid_pfn() userfaultfd: selftest: vm: allow to build in vm/ directory userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in MADV_DONTNEED userfaultfd: non-cooperative: fix fork fctx->new memleak mm/cgroup: avoid panic when init with low memory drivers/md/bcache/util.h: remove duplicate inclusion of blkdev.h mm/vmstats: add thp_split_pud event for clarity include/linux/fs.h: fix unsigned enum warning with gcc-4.2 userfaultfd: non-cooperative: release all ctx in dup_userfaultfd_complete userfaultfd: non-cooperative: robustness check userfaultfd: non-cooperative: rollback userfaultfd_exit x86, mm: unify exit paths in gup_pte_range() ...
This commit is contained in:
+1
-1
@@ -1828,7 +1828,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
|
||||
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
|
||||
VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
|
||||
|
||||
count_vm_event(THP_SPLIT_PMD);
|
||||
count_vm_event(THP_SPLIT_PUD);
|
||||
|
||||
pudp_huge_clear_flush_notify(vma, haddr, pud);
|
||||
}
|
||||
|
||||
+44
-7
@@ -25,6 +25,7 @@
|
||||
#include <linux/printk.h>
|
||||
#include <linux/shrinker.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
@@ -103,6 +104,7 @@ static int quarantine_tail;
|
||||
/* Total size of all objects in global_quarantine across all batches. */
|
||||
static unsigned long quarantine_size;
|
||||
static DEFINE_SPINLOCK(quarantine_lock);
|
||||
DEFINE_STATIC_SRCU(remove_cache_srcu);
|
||||
|
||||
/* Maximum size of the global queue. */
|
||||
static unsigned long quarantine_max_size;
|
||||
@@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
|
||||
struct qlist_head *q;
|
||||
struct qlist_head temp = QLIST_INIT;
|
||||
|
||||
/*
|
||||
* Note: irq must be disabled until after we move the batch to the
|
||||
* global quarantine. Otherwise quarantine_remove_cache() can miss
|
||||
* some objects belonging to the cache if they are in our local temp
|
||||
* list. quarantine_remove_cache() executes on_each_cpu() at the
|
||||
* beginning which ensures that it either sees the objects in per-cpu
|
||||
* lists or in the global quarantine.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
q = this_cpu_ptr(&cpu_quarantine);
|
||||
qlist_put(q, &info->quarantine_link, cache->size);
|
||||
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
|
||||
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
|
||||
qlist_move_all(q, &temp);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (unlikely(!qlist_empty(&temp))) {
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
spin_lock(&quarantine_lock);
|
||||
WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
|
||||
qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
|
||||
if (global_quarantine[quarantine_tail].bytes >=
|
||||
@@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
|
||||
if (new_tail != quarantine_head)
|
||||
quarantine_tail = new_tail;
|
||||
}
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
spin_unlock(&quarantine_lock);
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void quarantine_reduce(void)
|
||||
{
|
||||
size_t total_size, new_quarantine_size, percpu_quarantines;
|
||||
unsigned long flags;
|
||||
int srcu_idx;
|
||||
struct qlist_head to_free = QLIST_INIT;
|
||||
|
||||
if (likely(READ_ONCE(quarantine_size) <=
|
||||
READ_ONCE(quarantine_max_size)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* srcu critical section ensures that quarantine_remove_cache()
|
||||
* will not miss objects belonging to the cache while they are in our
|
||||
* local to_free list. srcu is chosen because (1) it gives us private
|
||||
* grace period domain that does not interfere with anything else,
|
||||
* and (2) it allows synchronize_srcu() to return without waiting
|
||||
* if there are no pending read critical sections (which is the
|
||||
* expected case).
|
||||
*/
|
||||
srcu_idx = srcu_read_lock(&remove_cache_srcu);
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
|
||||
/*
|
||||
@@ -237,6 +257,7 @@ void quarantine_reduce(void)
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, NULL);
|
||||
srcu_read_unlock(&remove_cache_srcu, srcu_idx);
|
||||
}
|
||||
|
||||
static void qlist_move_cache(struct qlist_head *from,
|
||||
@@ -280,12 +301,28 @@ void quarantine_remove_cache(struct kmem_cache *cache)
|
||||
unsigned long flags, i;
|
||||
struct qlist_head to_free = QLIST_INIT;
|
||||
|
||||
/*
|
||||
* Must be careful to not miss any objects that are being moved from
|
||||
* per-cpu list to the global quarantine in quarantine_put(),
|
||||
* nor objects being freed in quarantine_reduce(). on_each_cpu()
|
||||
* achieves the first goal, while synchronize_srcu() achieves the
|
||||
* second.
|
||||
*/
|
||||
on_each_cpu(per_cpu_remove_cache, cache, 1);
|
||||
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
for (i = 0; i < QUARANTINE_BATCHES; i++)
|
||||
for (i = 0; i < QUARANTINE_BATCHES; i++) {
|
||||
if (qlist_empty(&global_quarantine[i]))
|
||||
continue;
|
||||
qlist_move_cache(&global_quarantine[i], &to_free, cache);
|
||||
/* Scanning whole quarantine can take a while. */
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
cond_resched();
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
}
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, cache);
|
||||
|
||||
synchronize_srcu(&remove_cache_srcu);
|
||||
}
|
||||
|
||||
+41
-3
@@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma,
|
||||
if (!can_madv_dontneed_vma(vma))
|
||||
return -EINVAL;
|
||||
|
||||
userfaultfd_remove(vma, prev, start, end);
|
||||
if (!userfaultfd_remove(vma, start, end)) {
|
||||
*prev = NULL; /* mmap_sem has been dropped, prev is stale */
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, start);
|
||||
if (!vma)
|
||||
return -ENOMEM;
|
||||
if (start < vma->vm_start) {
|
||||
/*
|
||||
* This "vma" under revalidation is the one
|
||||
* with the lowest vma->vm_start where start
|
||||
* is also < vma->vm_end. If start <
|
||||
* vma->vm_start it means an hole materialized
|
||||
* in the user address space within the
|
||||
* virtual range passed to MADV_DONTNEED.
|
||||
*/
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!can_madv_dontneed_vma(vma))
|
||||
return -EINVAL;
|
||||
if (end > vma->vm_end) {
|
||||
/*
|
||||
* Don't fail if end > vma->vm_end. If the old
|
||||
* vma was splitted while the mmap_sem was
|
||||
* released the effect of the concurrent
|
||||
* operation may not cause MADV_DONTNEED to
|
||||
* have an undefined result. There may be an
|
||||
* adjacent next vma that we'll walk
|
||||
* next. userfaultfd_remove() will generate an
|
||||
* UFFD_EVENT_REMOVE repetition on the
|
||||
* end-vma->vm_end range, but the manager can
|
||||
* handle a repetition fine.
|
||||
*/
|
||||
end = vma->vm_end;
|
||||
}
|
||||
VM_WARN_ON(start >= end);
|
||||
}
|
||||
zap_page_range(vma, start, end - start);
|
||||
return 0;
|
||||
}
|
||||
@@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma,
|
||||
* mmap_sem.
|
||||
*/
|
||||
get_file(f);
|
||||
userfaultfd_remove(vma, prev, start, end);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (userfaultfd_remove(vma, start, end)) {
|
||||
/* mmap_sem was not released by userfaultfd_remove() */
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
error = vfs_fallocate(f,
|
||||
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
offset, end - start);
|
||||
|
||||
+4
-1
@@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
|
||||
}
|
||||
} while (left < right);
|
||||
|
||||
return min(PHYS_PFN(type->regions[right].base), max_pfn);
|
||||
if (right == type->cnt)
|
||||
return max_pfn;
|
||||
else
|
||||
return min(PHYS_PFN(type->regions[right].base), max_pfn);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
+13
-5
@@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
|
||||
mctz = soft_limit_tree_from_page(page);
|
||||
if (!mctz)
|
||||
return;
|
||||
/*
|
||||
* Necessary to update all ancestors when hierarchy is used.
|
||||
* because their event counter is not touched.
|
||||
@@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
|
||||
for_each_node(nid) {
|
||||
mz = mem_cgroup_nodeinfo(memcg, nid);
|
||||
mctz = soft_limit_tree_node(nid);
|
||||
mem_cgroup_remove_exceeded(mz, mctz);
|
||||
if (mctz)
|
||||
mem_cgroup_remove_exceeded(mz, mctz);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
* is empty. Do it lockless to prevent lock bouncing. Races
|
||||
* are acceptable as soft limit is best effort anyway.
|
||||
*/
|
||||
if (RB_EMPTY_ROOT(&mctz->rb_root))
|
||||
if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -4135,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
|
||||
kfree(memcg->nodeinfo[node]);
|
||||
}
|
||||
|
||||
static void mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
{
|
||||
int node;
|
||||
|
||||
memcg_wb_domain_exit(memcg);
|
||||
for_each_node(node)
|
||||
free_mem_cgroup_per_node_info(memcg, node);
|
||||
free_percpu(memcg->stat);
|
||||
kfree(memcg);
|
||||
}
|
||||
|
||||
static void mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
{
|
||||
memcg_wb_domain_exit(memcg);
|
||||
__mem_cgroup_free(memcg);
|
||||
}
|
||||
|
||||
static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
@@ -4196,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
fail:
|
||||
if (memcg->id.id > 0)
|
||||
idr_remove(&mem_cgroup_idr, memcg->id.id);
|
||||
mem_cgroup_free(memcg);
|
||||
__mem_cgroup_free(memcg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
+4
-5
@@ -442,7 +442,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||
|
||||
while (start < end) {
|
||||
struct page *page;
|
||||
unsigned int page_mask;
|
||||
unsigned int page_mask = 0;
|
||||
unsigned long page_increm;
|
||||
struct pagevec pvec;
|
||||
struct zone *zone;
|
||||
@@ -456,8 +456,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||
* suits munlock very well (and if somehow an abnormal page
|
||||
* has sneaked into the range, we won't oops here: great).
|
||||
*/
|
||||
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
|
||||
&page_mask);
|
||||
page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
|
||||
|
||||
if (page && !IS_ERR(page)) {
|
||||
if (PageTransTail(page)) {
|
||||
@@ -468,8 +467,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
|
||||
/*
|
||||
* Any THP page found by follow_page_mask() may
|
||||
* have gotten split before reaching
|
||||
* munlock_vma_page(), so we need to recompute
|
||||
* the page_mask here.
|
||||
* munlock_vma_page(), so we need to compute
|
||||
* the page_mask here instead.
|
||||
*/
|
||||
page_mask = munlock_vma_page(page);
|
||||
unlock_page(page);
|
||||
|
||||
@@ -1316,12 +1316,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
while (page_vma_mapped_walk(&pvmw)) {
|
||||
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
|
||||
address = pvmw.address;
|
||||
|
||||
/* Unexpected PMD-mapped THP? */
|
||||
VM_BUG_ON_PAGE(!pvmw.pte, page);
|
||||
|
||||
/*
|
||||
* If the page is mlock()d, we cannot swap it out.
|
||||
* If it's recently referenced (perhaps page_referenced
|
||||
@@ -1345,6 +1339,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Unexpected PMD-mapped THP? */
|
||||
VM_BUG_ON_PAGE(!pvmw.pte, page);
|
||||
|
||||
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
|
||||
address = pvmw.address;
|
||||
|
||||
|
||||
if (!(flags & TTU_IGNORE_ACCESS)) {
|
||||
if (ptep_clear_flush_young_notify(vma, address,
|
||||
pvmw.pte)) {
|
||||
|
||||
@@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = {
|
||||
"thp_split_page_failed",
|
||||
"thp_deferred_split_page",
|
||||
"thp_split_pmd",
|
||||
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
||||
"thp_split_pud",
|
||||
#endif
|
||||
"thp_zero_page_alloc",
|
||||
"thp_zero_page_alloc_failed",
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user