mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
mm: start tracking VMAs with maple tree
Start tracking the VMAs with the new maple tree structure in parallel with the rb_tree. Add debug and trace events for maple tree operations and duplicate the rb_tree that is created on forks into the maple tree. The maple tree is added to the mm_struct including the mm_init struct, added support in required mm/mmap functions, added tracking in kernel/fork for process forking, and used to find the unmapped_area and checked against what the rbtree finds. This also moves the mmap_lock() in exit_mmap() since the oom reaper call does walk the VMAs. Otherwise lockdep will be unhappy if oom happens. When splitting a vma fails due to allocations of the maple tree nodes, the error path in __split_vma() calls new->vm_ops->close(new). The page accounting for hugetlb is actually in the close() operation, so it accounts for the removal of 1/2 of the VMA which was not adjusted. This results in a negative exit value. To avoid the negative charge, set vm_start = vm_end and vm_pgoff = 0. There is also a potential accounting issue in special mappings from insert_vm_struct() failing to allocate, so reverse the charge there in the failure scenario. Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
committed by
Andrew Morton
parent
e15e06a839
commit
d4af56c5c7
@@ -96,6 +96,7 @@ void __init tboot_probe(void)
|
||||
static pgd_t *tboot_pg_dir;
|
||||
static struct mm_struct tboot_mm = {
|
||||
.mm_rb = RB_ROOT,
|
||||
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
|
||||
.pgd = swapper_pg_dir,
|
||||
.mm_users = ATOMIC_INIT(2),
|
||||
.mm_count = ATOMIC_INIT(1),
|
||||
|
||||
@@ -58,6 +58,7 @@ static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;
|
||||
|
||||
struct mm_struct efi_mm = {
|
||||
.mm_rb = RB_ROOT,
|
||||
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
|
||||
.mm_users = ATOMIC_INIT(2),
|
||||
.mm_count = ATOMIC_INIT(1),
|
||||
.write_protect_seq = SEQCNT_ZERO(efi_mm.write_protect_seq),
|
||||
|
||||
@@ -2567,6 +2567,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
|
||||
/* nommu.c */
|
||||
extern atomic_long_t mmap_pages_allocated;
|
||||
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
|
||||
/* mmap.c */
|
||||
void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
|
||||
|
||||
/* interval_tree.c */
|
||||
void vma_interval_tree_insert(struct vm_area_struct *node,
|
||||
@@ -2630,6 +2632,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
|
||||
bool *need_rmap_locks);
|
||||
extern void exit_mmap(struct mm_struct *);
|
||||
|
||||
void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
|
||||
void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
|
||||
|
||||
static inline int check_data_rlimit(unsigned long rlim,
|
||||
unsigned long new,
|
||||
unsigned long start,
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/maple_tree.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/cpumask.h>
|
||||
@@ -486,6 +487,7 @@ struct kioctx_table;
|
||||
struct mm_struct {
|
||||
struct {
|
||||
struct vm_area_struct *mmap; /* list of VMAs */
|
||||
struct maple_tree mm_mt;
|
||||
struct rb_root mm_rb;
|
||||
u64 vmacache_seqnum; /* per-thread vmacache */
|
||||
#ifdef CONFIG_MMU
|
||||
@@ -697,6 +699,7 @@ struct mm_struct {
|
||||
unsigned long cpu_bitmap[];
|
||||
};
|
||||
|
||||
#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
|
||||
extern struct mm_struct init_mm;
|
||||
|
||||
/* Pointer magic because the dynamic array size confuses some compilers. */
|
||||
|
||||
@@ -42,6 +42,79 @@ TRACE_EVENT(vm_unmapped_area,
|
||||
__entry->low_limit, __entry->high_limit, __entry->align_mask,
|
||||
__entry->align_offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(vma_mas_szero,
|
||||
TP_PROTO(struct maple_tree *mt, unsigned long start,
|
||||
unsigned long end),
|
||||
|
||||
TP_ARGS(mt, start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct maple_tree *, mt)
|
||||
__field(unsigned long, start)
|
||||
__field(unsigned long, end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mt = mt;
|
||||
__entry->start = start;
|
||||
__entry->end = end;
|
||||
),
|
||||
|
||||
TP_printk("mt_mod %p, (NULL), SNULL, %lu, %lu,",
|
||||
__entry->mt,
|
||||
(unsigned long) __entry->start,
|
||||
(unsigned long) __entry->end
|
||||
)
|
||||
);
|
||||
|
||||
TRACE_EVENT(vma_store,
|
||||
TP_PROTO(struct maple_tree *mt, struct vm_area_struct *vma),
|
||||
|
||||
TP_ARGS(mt, vma),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct maple_tree *, mt)
|
||||
__field(struct vm_area_struct *, vma)
|
||||
__field(unsigned long, vm_start)
|
||||
__field(unsigned long, vm_end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mt = mt;
|
||||
__entry->vma = vma;
|
||||
__entry->vm_start = vma->vm_start;
|
||||
__entry->vm_end = vma->vm_end - 1;
|
||||
),
|
||||
|
||||
TP_printk("mt_mod %p, (%p), STORE, %lu, %lu,",
|
||||
__entry->mt, __entry->vma,
|
||||
(unsigned long) __entry->vm_start,
|
||||
(unsigned long) __entry->vm_end
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
TRACE_EVENT(exit_mmap,
|
||||
TP_PROTO(struct mm_struct *mm),
|
||||
|
||||
TP_ARGS(mm),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(struct mm_struct *, mm)
|
||||
__field(struct maple_tree *, mt)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->mm = mm;
|
||||
__entry->mt = &mm->mm_mt;
|
||||
),
|
||||
|
||||
TP_printk("mt_mod %p, DESTROY\n",
|
||||
__entry->mt
|
||||
)
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
@@ -585,6 +585,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
int retval;
|
||||
unsigned long charge;
|
||||
LIST_HEAD(uf);
|
||||
MA_STATE(mas, &mm->mm_mt, 0, 0);
|
||||
|
||||
uprobe_start_dup_mmap();
|
||||
if (mmap_write_lock_killable(oldmm)) {
|
||||
@@ -614,6 +615,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
goto out;
|
||||
khugepaged_fork(mm, oldmm);
|
||||
|
||||
retval = mas_expected_entries(&mas, oldmm->map_count);
|
||||
if (retval)
|
||||
goto out;
|
||||
|
||||
prev = NULL;
|
||||
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
|
||||
struct file *file;
|
||||
@@ -629,7 +634,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
*/
|
||||
if (fatal_signal_pending(current)) {
|
||||
retval = -EINTR;
|
||||
goto out;
|
||||
goto loop_out;
|
||||
}
|
||||
if (mpnt->vm_flags & VM_ACCOUNT) {
|
||||
unsigned long len = vma_pages(mpnt);
|
||||
@@ -694,6 +699,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
rb_link = &tmp->vm_rb.rb_right;
|
||||
rb_parent = &tmp->vm_rb;
|
||||
|
||||
/* Link the vma into the MT */
|
||||
mas.index = tmp->vm_start;
|
||||
mas.last = tmp->vm_end - 1;
|
||||
mas_store(&mas, tmp);
|
||||
|
||||
mm->map_count++;
|
||||
if (!(tmp->vm_flags & VM_WIPEONFORK))
|
||||
retval = copy_page_range(tmp, mpnt);
|
||||
@@ -702,10 +712,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
tmp->vm_ops->open(tmp);
|
||||
|
||||
if (retval)
|
||||
goto out;
|
||||
goto loop_out;
|
||||
}
|
||||
/* a new mm has just been created */
|
||||
retval = arch_dup_mmap(oldmm, mm);
|
||||
loop_out:
|
||||
mas_destroy(&mas);
|
||||
out:
|
||||
mmap_write_unlock(mm);
|
||||
flush_tlb_mm(oldmm);
|
||||
@@ -721,7 +733,7 @@ fail_nomem_policy:
|
||||
fail_nomem:
|
||||
retval = -ENOMEM;
|
||||
vm_unacct_memory(charge);
|
||||
goto out;
|
||||
goto loop_out;
|
||||
}
|
||||
|
||||
static inline int mm_alloc_pgd(struct mm_struct *mm)
|
||||
@@ -1111,6 +1123,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
{
|
||||
mm->mmap = NULL;
|
||||
mm->mm_rb = RB_ROOT;
|
||||
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
|
||||
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
|
||||
mm->vmacache_seqnum = 0;
|
||||
atomic_set(&mm->mm_users, 1);
|
||||
atomic_set(&mm->mm_count, 1);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/maple_tree.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/list.h>
|
||||
@@ -29,6 +30,7 @@
|
||||
*/
|
||||
struct mm_struct init_mm = {
|
||||
.mm_rb = RB_ROOT,
|
||||
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock),
|
||||
.pgd = swapper_pg_dir,
|
||||
.mm_users = ATOMIC_INIT(2),
|
||||
.mm_count = ATOMIC_INIT(1),
|
||||
|
||||
13
mm/nommu.c
13
mm/nommu.c
@@ -545,6 +545,19 @@ static void put_nommu_region(struct vm_region *region)
|
||||
__put_nommu_region(region);
|
||||
}
|
||||
|
||||
void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
|
||||
{
|
||||
mas_set_range(mas, vma->vm_start, vma->vm_end - 1);
|
||||
mas_store_prealloc(mas, vma);
|
||||
}
|
||||
|
||||
void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
|
||||
{
|
||||
mas->index = vma->vm_start;
|
||||
mas->last = vma->vm_end - 1;
|
||||
mas_store_prealloc(mas, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* add a VMA into a process's mm_struct in the appropriate place in the list
|
||||
* and tree and add to the address space's page tree also if not an anonymous
|
||||
|
||||
Reference in New Issue
Block a user