mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
mmap locking API: convert mmap_sem comments
Convert comments that reference mmap_sem to reference mmap_lock instead. [akpm@linux-foundation.org: fix up linux-next leftovers] [akpm@linux-foundation.org: s/lockaphore/lock/, per Vlastimil] [akpm@linux-foundation.org: more linux-next fixups, per Michel] Signed-off-by: Michel Lespinasse <walken@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Davidlohr Bueso <dbueso@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Laurent Dufour <ldufour@linux.ibm.com> Cc: Liam Howlett <Liam.Howlett@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ying Han <yinghan@google.com> Link: http://lkml.kernel.org/r/20200520052908.204642-13-walken@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
3e4e28c5a8
commit
c1e8d7c6a7
@@ -364,19 +364,19 @@ follows:
|
||||
|
||||
2) for querying the policy, we do not need to take an extra reference on the
|
||||
target task's task policy nor vma policies because we always acquire the
|
||||
task's mm's mmap_sem for read during the query. The set_mempolicy() and
|
||||
mbind() APIs [see below] always acquire the mmap_sem for write when
|
||||
task's mm's mmap_lock for read during the query. The set_mempolicy() and
|
||||
mbind() APIs [see below] always acquire the mmap_lock for write when
|
||||
installing or replacing task or vma policies. Thus, there is no possibility
|
||||
of a task or thread freeing a policy while another task or thread is
|
||||
querying it.
|
||||
|
||||
3) Page allocation usage of task or vma policy occurs in the fault path where
|
||||
we hold them mmap_sem for read. Again, because replacing the task or vma
|
||||
policy requires that the mmap_sem be held for write, the policy can't be
|
||||
we hold them mmap_lock for read. Again, because replacing the task or vma
|
||||
policy requires that the mmap_lock be held for write, the policy can't be
|
||||
freed out from under us while we're using it for page allocation.
|
||||
|
||||
4) Shared policies require special consideration. One task can replace a
|
||||
shared memory policy while another task, with a distinct mmap_sem, is
|
||||
shared memory policy while another task, with a distinct mmap_lock, is
|
||||
querying or allocating a page based on the policy. To resolve this
|
||||
potential race, the shared policy infrastructure adds an extra reference
|
||||
to the shared policy during lookup while holding a spin lock on the shared
|
||||
|
||||
@@ -33,7 +33,7 @@ memory ranges) provides two primary functionalities:
|
||||
The real advantage of userfaults if compared to regular virtual memory
|
||||
management of mremap/mprotect is that the userfaults in all their
|
||||
operations never involve heavyweight structures like vmas (in fact the
|
||||
``userfaultfd`` runtime load never takes the mmap_sem for writing).
|
||||
``userfaultfd`` runtime load never takes the mmap_lock for writing).
|
||||
|
||||
Vmas are not suitable for page- (or hugepage) granular fault tracking
|
||||
when dealing with virtual address spaces that could span
|
||||
|
||||
@@ -615,7 +615,7 @@ prototypes::
|
||||
locking rules:
|
||||
|
||||
============= ======== ===========================
|
||||
ops mmap_sem PageLocked(page)
|
||||
ops mmap_lock PageLocked(page)
|
||||
============= ======== ===========================
|
||||
open: yes
|
||||
close: yes
|
||||
|
||||
@@ -98,9 +98,9 @@ split_huge_page() or split_huge_pmd() has a cost.
|
||||
|
||||
To make pagetable walks huge pmd aware, all you need to do is to call
|
||||
pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the
|
||||
mmap_sem in read (or write) mode to be sure a huge pmd cannot be
|
||||
mmap_lock in read (or write) mode to be sure a huge pmd cannot be
|
||||
created from under you by khugepaged (khugepaged collapse_huge_page
|
||||
takes the mmap_sem in write mode in addition to the anon_vma lock). If
|
||||
takes the mmap_lock in write mode in addition to the anon_vma lock). If
|
||||
pmd_trans_huge returns false, you just fallback in the old code
|
||||
paths. If instead pmd_trans_huge returns true, you have to take the
|
||||
page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the
|
||||
|
||||
@@ -141,7 +141,7 @@ retry:
|
||||
}
|
||||
|
||||
/*
|
||||
* Fault retry nuances, mmap_sem already relinquished by core mm
|
||||
* Fault retry nuances, mmap_lock already relinquished by core mm
|
||||
*/
|
||||
if (unlikely((fault & VM_FAULT_RETRY) &&
|
||||
(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
||||
|
||||
@@ -240,7 +240,7 @@ static int install_vvar(struct mm_struct *mm, unsigned long addr)
|
||||
return PTR_ERR_OR_ZERO(vma);
|
||||
}
|
||||
|
||||
/* assumes mmap_sem is write-locked */
|
||||
/* assumes mmap_lock is write-locked */
|
||||
void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
@@ -293,7 +293,7 @@ retry:
|
||||
fault = __do_page_fault(mm, addr, fsr, flags, tsk);
|
||||
|
||||
/* If we need to retry but a fatal signal is pending, handle the
|
||||
* signal first. We do not need to release the mmap_sem because
|
||||
* signal first. We do not need to release the mmap_lock because
|
||||
* it would already be released in __lock_page_or_retry in
|
||||
* mm/filemap.c. */
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
|
||||
@@ -86,7 +86,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
/*
|
||||
* If fault is in region 5 and we are in the kernel, we may already
|
||||
* have the mmap_sem (pfn_valid macro is called during mmap). There
|
||||
* have the mmap_lock (pfn_valid macro is called during mmap). There
|
||||
* is no vma for region 5 addr's anyway, so skip getting the semaphore
|
||||
* and go directly to the exception handling code.
|
||||
*/
|
||||
|
||||
@@ -124,7 +124,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
|
||||
/* When running in the kernel we expect faults to occur only to
|
||||
* addresses in user space. All other faults represent errors in the
|
||||
* kernel and should generate an OOPS. Unfortunately, in the case of an
|
||||
* erroneous fault occurring in a code path which already holds mmap_sem
|
||||
* erroneous fault occurring in a code path which already holds mmap_lock
|
||||
* we will deadlock attempting to validate the fault against the
|
||||
* address space. Luckily the kernel only validly references user
|
||||
* space from well defined areas of code, which are listed in the
|
||||
|
||||
@@ -210,7 +210,7 @@ good_area:
|
||||
|
||||
/*
|
||||
* If we need to retry but a fatal signal is pending, handle the
|
||||
* signal first. We do not need to release the mmap_sem because it
|
||||
* signal first. We do not need to release the mmap_lock because it
|
||||
* would already be released in __lock_page_or_retry in mm/filemap.c.
|
||||
*/
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
|
||||
@@ -101,7 +101,7 @@ static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
|
||||
|
||||
/*
|
||||
* Returns a positive, 5-bit key on success, or -1 on failure.
|
||||
* Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and
|
||||
* Relies on the mmap_lock to protect against concurrency in mm_pkey_alloc() and
|
||||
* mm_pkey_free().
|
||||
*/
|
||||
static inline int mm_pkey_alloc(struct mm_struct *mm)
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
* Locking order
|
||||
*
|
||||
* 1. kvm->srcu - Protects KVM memslots
|
||||
* 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise
|
||||
* 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise
|
||||
* 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
|
||||
* as sync-points for page-in/out
|
||||
*/
|
||||
@@ -402,8 +402,8 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
|
||||
mig.dst = &dst_pfn;
|
||||
|
||||
/*
|
||||
* We come here with mmap_sem write lock held just for
|
||||
* ksm_madvise(), otherwise we only need read mmap_sem.
|
||||
* We come here with mmap_lock write lock held just for
|
||||
* ksm_madvise(), otherwise we only need read mmap_lock.
|
||||
* Hence downgrade to read lock once ksm_madvise() is done.
|
||||
*/
|
||||
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
|
||||
|
||||
@@ -129,7 +129,7 @@ void flush_tlb_mm(struct mm_struct *mm)
|
||||
|
||||
/*
|
||||
* It is safe to go down the mm's list of vmas when called
|
||||
* from dup_mmap, holding mmap_sem. It would also be safe from
|
||||
* from dup_mmap, holding mmap_lock. It would also be safe from
|
||||
* unmap_region or exit_mmap, but not from vmtruncate on SMP -
|
||||
* but it seems dup_mmap is the only SMP case which gets here.
|
||||
*/
|
||||
|
||||
@@ -237,7 +237,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
|
||||
* to hugepage, we first clear the pmd, then invalidate all
|
||||
* the PTE entries. The assumption here is that any low level
|
||||
* page fault will see a none pmd and take the slow path that
|
||||
* will wait on mmap_sem. But we could very well be in a
|
||||
* will wait on mmap_lock. But we could very well be in a
|
||||
* hash_page with local ptep pointer value. Such a hash page
|
||||
* can result in adding new HPTE entries for normal subpages.
|
||||
* That means we could be modifying the page content as we
|
||||
@@ -251,7 +251,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
|
||||
* Now invalidate the hpte entries in the range
|
||||
* covered by pmd. This make sure we take a
|
||||
* fault and will find the pmd as none, which will
|
||||
* result in a major fault which takes mmap_sem and
|
||||
* result in a major fault which takes mmap_lock and
|
||||
* hence wait for collapse to complete. Without this
|
||||
* the __collapse_huge_page_copy can result in copying
|
||||
* the old content.
|
||||
|
||||
@@ -225,7 +225,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
|
||||
if (!spt) {
|
||||
/*
|
||||
* Allocate subpage prot table if not already done.
|
||||
* Do this with mmap_sem held
|
||||
* Do this with mmap_lock held
|
||||
*/
|
||||
spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
|
||||
if (!spt) {
|
||||
|
||||
@@ -138,7 +138,7 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
|
||||
* 2. T1 : set AMR to deny access to pkey=4, touches, page
|
||||
* 3. T1 : faults...
|
||||
* 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
|
||||
* 5. T1 : enters fault handler, takes mmap_sem, etc...
|
||||
* 5. T1 : enters fault handler, takes mmap_lock, etc...
|
||||
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
|
||||
* faulted on a pte with its pkey=4.
|
||||
*/
|
||||
@@ -525,9 +525,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
/*
|
||||
* We want to do this outside mmap_sem, because reading code around nip
|
||||
* We want to do this outside mmap_lock, because reading code around nip
|
||||
* can result in fault, which will cause a deadlock when called with
|
||||
* mmap_sem held
|
||||
* mmap_lock held
|
||||
*/
|
||||
if (is_user)
|
||||
flags |= FAULT_FLAG_USER;
|
||||
@@ -539,7 +539,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
|
||||
/* When running in the kernel we expect faults to occur only to
|
||||
* addresses in user space. All other faults represent errors in the
|
||||
* kernel and should generate an OOPS. Unfortunately, in the case of an
|
||||
* erroneous fault occurring in a code path which already holds mmap_sem
|
||||
* erroneous fault occurring in a code path which already holds mmap_lock
|
||||
* we will deadlock attempting to validate the fault against the
|
||||
* address space. Luckily the kernel only validly references user
|
||||
* space from well defined areas of code, which are listed in the
|
||||
@@ -615,7 +615,7 @@ good_area:
|
||||
return user_mode(regs) ? 0 : SIGBUS;
|
||||
|
||||
/*
|
||||
* Handle the retry right now, the mmap_sem has been released in that
|
||||
* Handle the retry right now, the mmap_lock has been released in that
|
||||
* case.
|
||||
*/
|
||||
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||
|
||||
@@ -306,7 +306,7 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
|
||||
pmd = pmd_offset(pud, addr);
|
||||
/*
|
||||
* khugepaged to collapse normal pages to hugepage, first set
|
||||
* pmd to none to force page fault/gup to take mmap_sem. After
|
||||
* pmd to none to force page fault/gup to take mmap_lock. After
|
||||
* pmd is set to none, we do a pte_clear which does this assertion
|
||||
* so if we find pmd none, return.
|
||||
*/
|
||||
|
||||
@@ -325,7 +325,7 @@ static vm_fault_t spufs_ps_fault(struct vm_fault *vmf,
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
/*
|
||||
* Because we release the mmap_sem, the context may be destroyed while
|
||||
* Because we release the mmap_lock, the context may be destroyed while
|
||||
* we're in spu_wait. Grab an extra reference so it isn't destroyed
|
||||
* in the meantime.
|
||||
*/
|
||||
@@ -334,8 +334,8 @@ static vm_fault_t spufs_ps_fault(struct vm_fault *vmf,
|
||||
/*
|
||||
* We have to wait for context to be loaded before we have
|
||||
* pages to hand out to the user, but we don't want to wait
|
||||
* with the mmap_sem held.
|
||||
* It is possible to drop the mmap_sem here, but then we need
|
||||
* with the mmap_lock held.
|
||||
* It is possible to drop the mmap_lock here, but then we need
|
||||
* to return VM_FAULT_NOPAGE because the mappings may have
|
||||
* hanged.
|
||||
*/
|
||||
|
||||
@@ -114,7 +114,7 @@ good_area:
|
||||
|
||||
/*
|
||||
* If we need to retry but a fatal signal is pending, handle the
|
||||
* signal first. We do not need to release the mmap_sem because it
|
||||
* signal first. We do not need to release the mmap_lock because it
|
||||
* would already be released in __lock_page_or_retry in mm/filemap.c.
|
||||
*/
|
||||
if (fault_signal_pending(fault, regs))
|
||||
|
||||
@@ -1122,7 +1122,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
|
||||
* Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu)
|
||||
*/
|
||||
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
{
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user