Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "26 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (26 commits)
  userfaultfd: remove wrong comment from userfaultfd_ctx_get()
  fat: fix using uninitialized fields of fat_inode/fsinfo_inode
  sh: cayman: IDE support fix
  kasan: fix races in quarantine_remove_cache()
  kasan: resched in quarantine_remove_cache()
  mm: do not call mem_cgroup_free() from within mem_cgroup_alloc()
  thp: fix another corner case of munlock() vs. THPs
  rmap: fix NULL-pointer dereference on THP munlocking
  mm/memblock.c: fix memblock_next_valid_pfn()
  userfaultfd: selftest: vm: allow to build in vm/ directory
  userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in MADV_DONTNEED
  userfaultfd: non-cooperative: fix fork fctx->new memleak
  mm/cgroup: avoid panic when init with low memory
  drivers/md/bcache/util.h: remove duplicate inclusion of blkdev.h
  mm/vmstats: add thp_split_pud event for clarity
  include/linux/fs.h: fix unsigned enum warning with gcc-4.2
  userfaultfd: non-cooperative: release all ctx in dup_userfaultfd_complete
  userfaultfd: non-cooperative: robustness check
  userfaultfd: non-cooperative: rollback userfaultfd_exit
  x86, mm: unify exit paths in gup_pte_range()
  ...
This commit is contained in:
Linus Torvalds
2017-03-10 08:34:42 -08:00
54 changed files with 277 additions and 185 deletions
+1 -1
View File
@@ -1828,7 +1828,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
count_vm_event(THP_SPLIT_PMD);
count_vm_event(THP_SPLIT_PUD);
pudp_huge_clear_flush_notify(vma, haddr, pud);
}
+44 -7
View File
@@ -25,6 +25,7 @@
#include <linux/printk.h>
#include <linux/shrinker.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -103,6 +104,7 @@ static int quarantine_tail;
/* Total size of all objects in global_quarantine across all batches. */
static unsigned long quarantine_size;
static DEFINE_SPINLOCK(quarantine_lock);
DEFINE_STATIC_SRCU(remove_cache_srcu);
/* Maximum size of the global queue. */
static unsigned long quarantine_max_size;
@@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
struct qlist_head *q;
struct qlist_head temp = QLIST_INIT;
/*
* Note: irq must be disabled until after we move the batch to the
* global quarantine. Otherwise quarantine_remove_cache() can miss
* some objects belonging to the cache if they are in our local temp
* list. quarantine_remove_cache() executes on_each_cpu() at the
* beginning which ensures that it either sees the objects in per-cpu
* lists or in the global quarantine.
*/
local_irq_save(flags);
q = this_cpu_ptr(&cpu_quarantine);
qlist_put(q, &info->quarantine_link, cache->size);
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
qlist_move_all(q, &temp);
local_irq_restore(flags);
if (unlikely(!qlist_empty(&temp))) {
spin_lock_irqsave(&quarantine_lock, flags);
spin_lock(&quarantine_lock);
WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
if (global_quarantine[quarantine_tail].bytes >=
@@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
if (new_tail != quarantine_head)
quarantine_tail = new_tail;
}
spin_unlock_irqrestore(&quarantine_lock, flags);
spin_unlock(&quarantine_lock);
}
local_irq_restore(flags);
}
void quarantine_reduce(void)
{
size_t total_size, new_quarantine_size, percpu_quarantines;
unsigned long flags;
int srcu_idx;
struct qlist_head to_free = QLIST_INIT;
if (likely(READ_ONCE(quarantine_size) <=
READ_ONCE(quarantine_max_size)))
return;
/*
* srcu critical section ensures that quarantine_remove_cache()
* will not miss objects belonging to the cache while they are in our
* local to_free list. srcu is chosen because (1) it gives us private
* grace period domain that does not interfere with anything else,
* and (2) it allows synchronize_srcu() to return without waiting
* if there are no pending read critical sections (which is the
* expected case).
*/
srcu_idx = srcu_read_lock(&remove_cache_srcu);
spin_lock_irqsave(&quarantine_lock, flags);
/*
@@ -237,6 +257,7 @@ void quarantine_reduce(void)
spin_unlock_irqrestore(&quarantine_lock, flags);
qlist_free_all(&to_free, NULL);
srcu_read_unlock(&remove_cache_srcu, srcu_idx);
}
static void qlist_move_cache(struct qlist_head *from,
@@ -280,12 +301,28 @@ void quarantine_remove_cache(struct kmem_cache *cache)
unsigned long flags, i;
struct qlist_head to_free = QLIST_INIT;
/*
* Must be careful to not miss any objects that are being moved from
* per-cpu list to the global quarantine in quarantine_put(),
* nor objects being freed in quarantine_reduce(). on_each_cpu()
* achieves the first goal, while synchronize_srcu() achieves the
* second.
*/
on_each_cpu(per_cpu_remove_cache, cache, 1);
spin_lock_irqsave(&quarantine_lock, flags);
for (i = 0; i < QUARANTINE_BATCHES; i++)
for (i = 0; i < QUARANTINE_BATCHES; i++) {
if (qlist_empty(&global_quarantine[i]))
continue;
qlist_move_cache(&global_quarantine[i], &to_free, cache);
/* Scanning whole quarantine can take a while. */
spin_unlock_irqrestore(&quarantine_lock, flags);
cond_resched();
spin_lock_irqsave(&quarantine_lock, flags);
}
spin_unlock_irqrestore(&quarantine_lock, flags);
qlist_free_all(&to_free, cache);
synchronize_srcu(&remove_cache_srcu);
}
+41 -3
View File
@@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma,
if (!can_madv_dontneed_vma(vma))
return -EINVAL;
userfaultfd_remove(vma, prev, start, end);
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_sem has been dropped, prev is stale */
down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, start);
if (!vma)
return -ENOMEM;
if (start < vma->vm_start) {
/*
* This "vma" under revalidation is the one
* with the lowest vma->vm_start where start
* is also < vma->vm_end. If start <
* vma->vm_start it means an hole materialized
* in the user address space within the
* virtual range passed to MADV_DONTNEED.
*/
return -ENOMEM;
}
if (!can_madv_dontneed_vma(vma))
return -EINVAL;
if (end > vma->vm_end) {
/*
* Don't fail if end > vma->vm_end. If the old
* vma was splitted while the mmap_sem was
* released the effect of the concurrent
* operation may not cause MADV_DONTNEED to
* have an undefined result. There may be an
* adjacent next vma that we'll walk
* next. userfaultfd_remove() will generate an
* UFFD_EVENT_REMOVE repetition on the
* end-vma->vm_end range, but the manager can
* handle a repetition fine.
*/
end = vma->vm_end;
}
VM_WARN_ON(start >= end);
}
zap_page_range(vma, start, end - start);
return 0;
}
@@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma,
* mmap_sem.
*/
get_file(f);
userfaultfd_remove(vma, prev, start, end);
up_read(&current->mm->mmap_sem);
if (userfaultfd_remove(vma, start, end)) {
/* mmap_sem was not released by userfaultfd_remove() */
up_read(&current->mm->mmap_sem);
}
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, end - start);
+4 -1
View File
@@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
}
} while (left < right);
return min(PHYS_PFN(type->regions[right].base), max_pfn);
if (right == type->cnt)
return max_pfn;
else
return min(PHYS_PFN(type->regions[right].base), max_pfn);
}
/**
+13 -5
View File
@@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
struct mem_cgroup_tree_per_node *mctz;
mctz = soft_limit_tree_from_page(page);
if (!mctz)
return;
/*
* Necessary to update all ancestors when hierarchy is used.
* because their event counter is not touched.
@@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
for_each_node(nid) {
mz = mem_cgroup_nodeinfo(memcg, nid);
mctz = soft_limit_tree_node(nid);
mem_cgroup_remove_exceeded(mz, mctz);
if (mctz)
mem_cgroup_remove_exceeded(mz, mctz);
}
}
@@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
* is empty. Do it lockless to prevent lock bouncing. Races
* are acceptable as soft limit is best effort anyway.
*/
if (RB_EMPTY_ROOT(&mctz->rb_root))
if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
return 0;
/*
@@ -4135,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
kfree(memcg->nodeinfo[node]);
}
static void mem_cgroup_free(struct mem_cgroup *memcg)
static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;
memcg_wb_domain_exit(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->stat);
kfree(memcg);
}
static void mem_cgroup_free(struct mem_cgroup *memcg)
{
memcg_wb_domain_exit(memcg);
__mem_cgroup_free(memcg);
}
static struct mem_cgroup *mem_cgroup_alloc(void)
{
struct mem_cgroup *memcg;
@@ -4196,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
fail:
if (memcg->id.id > 0)
idr_remove(&mem_cgroup_idr, memcg->id.id);
mem_cgroup_free(memcg);
__mem_cgroup_free(memcg);
return NULL;
}
+4 -5
View File
@@ -442,7 +442,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
while (start < end) {
struct page *page;
unsigned int page_mask;
unsigned int page_mask = 0;
unsigned long page_increm;
struct pagevec pvec;
struct zone *zone;
@@ -456,8 +456,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
* suits munlock very well (and if somehow an abnormal page
* has sneaked into the range, we won't oops here: great).
*/
page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
&page_mask);
page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
if (page && !IS_ERR(page)) {
if (PageTransTail(page)) {
@@ -468,8 +467,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
/*
* Any THP page found by follow_page_mask() may
* have gotten split before reaching
* munlock_vma_page(), so we need to recompute
* the page_mask here.
* munlock_vma_page(), so we need to compute
* the page_mask here instead.
*/
page_mask = munlock_vma_page(page);
unlock_page(page);
+7 -6
View File
@@ -1316,12 +1316,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
while (page_vma_mapped_walk(&pvmw)) {
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_PAGE(!pvmw.pte, page);
/*
* If the page is mlock()d, we cannot swap it out.
* If it's recently referenced (perhaps page_referenced
@@ -1345,6 +1339,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
continue;
}
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_PAGE(!pvmw.pte, page);
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
+3
View File
@@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = {
"thp_split_page_failed",
"thp_deferred_split_page",
"thp_split_pmd",
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
"thp_split_pud",
#endif
"thp_zero_page_alloc",
"thp_zero_page_alloc_failed",
#endif