ANDROID: Make SPF aware of fast mremaps

SPF attempts page faults without taking the mmap lock, but takes the
PTL. If there is a concurrent fast mremap (at PMD/PUD level), this
can lead to a UAF as fast mremap will only take the PTL locks at the
PMD/PUD level. SPF cannot take the PTL locks at the larger subtree
granularity since this introduces much contention in the page fault
paths.

To address the race:
  1) Fast mremaps wait until there are no users of the VMA.
  2) Speculative faults detect ongoing fast mremaps and fallback
    to conventional fault handling (taking mmap read lock).

Since this race condition is very rare the performance impact is
negligible.

Bug: 263177905
Change-Id: If9755aa4261337fe180e3093a3cefaae8ac9ff1a
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
This commit is contained in:
Kalesh Singh
2022-12-19 21:07:49 -08:00
parent 16c03440df
commit 134c1aae43
3 changed files with 116 additions and 17 deletions

View File

@@ -1758,6 +1758,9 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
extern wait_queue_head_t vma_users_wait;
extern atomic_t vma_user_waiters;
static inline void vm_write_begin(struct vm_area_struct *vma)
{
/*

View File

@@ -180,7 +180,17 @@ static void __free_vma(struct vm_area_struct *vma)
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
void put_vma(struct vm_area_struct *vma)
{
if (atomic_dec_and_test(&vma->vm_ref_count))
int ref_count = atomic_dec_return(&vma->vm_ref_count);
/*
* Implicit smp_mb due to atomic_dec_return.
*
* If this is the last reference, wake up the mremap waiter
* (if any).
*/
if (ref_count == 1 && unlikely(atomic_read(&vma_user_waiters) > 0))
wake_up(&vma_users_wait);
else if (ref_count <= 0)
__free_vma(vma);
}
#else
@@ -2421,8 +2431,22 @@ struct vm_area_struct *get_vma(struct mm_struct *mm, unsigned long addr)
read_lock(&mm->mm_rb_lock);
vma = __find_vma(mm, addr);
if (vma)
atomic_inc(&vma->vm_ref_count);
/*
* If there is a concurrent fast mremap, bail out since the entire
* PMD/PUD subtree may have been remapped.
*
* This is usually safe for conventional mremap since it takes the
* PTE locks as does SPF. However fast mremap only takes the lock
* at the PMD/PUD level which is ok as it is done with the mmap
* write lock held. But since SPF, as the term implies forgoes,
* taking the mmap read lock and also cannot take PTL lock at the
* larger PMD/PUD granualrity, since it would introduce huge
* contention in the page fault path; fall back to regular fault
* handling.
*/
if (vma && !atomic_inc_unless_negative(&vma->vm_ref_count))
vma = NULL;
read_unlock(&mm->mm_rb_lock);
return vma;

View File

@@ -210,17 +210,74 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
drop_rmap_locks(vma);
}
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
DECLARE_WAIT_QUEUE_HEAD(vma_users_wait);
atomic_t vma_user_waiters = ATOMIC_INIT(0);
static inline void wait_for_vma_users(struct vm_area_struct *vma)
{
/*
* If we have the only reference, swap the refcount to -1. This
* will prevent other concurrent references by get_vma() for SPFs.
*/
if (likely(atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1))
return;
/* Indicate we are waiting for other users of the VMA to finish. */
atomic_inc(&vma_user_waiters);
/* Failed atomic_cmpxchg; no implicit barrier, use an explicit one. */
smp_mb();
/*
* Callers cannot handle failure, sleep uninterruptibly until there
* are no other users of this VMA.
*
* We don't need to worry about references from concurrent waiters,
* since this is only used in the context of fast mremaps, with
* exclusive mmap write lock held.
*/
wait_event(vma_users_wait, atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1);
atomic_dec(&vma_user_waiters);
}
/*
* Speculative page fault handlers will not detect page table changes done
* without ptl locking.
* Restore the VMA reference count to 1 after a fast mremap.
*/
#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT)
static inline void restore_vma_ref_count(struct vm_area_struct *vma)
{
/*
* This should only be called after a corresponding,
* wait_for_vma_users()
*/
VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1,
vma);
}
#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */
static inline void wait_for_vma_users(struct vm_area_struct *vma)
{
}
static inline void restore_vma_ref_count(struct vm_area_struct *vma)
{
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
struct mm_struct *mm = vma->vm_mm;
pmd_t pmd;
bool ret;
/*
* Wait for concurrent users, since these can potentially be
* speculative page faults.
*/
wait_for_vma_users(vma);
/*
* The destination pmd shouldn't be established, free_pgtables()
@@ -245,8 +302,10 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
* One alternative might be to just unmap the target pmd at
* this point, and verify that it really is empty. We'll see.
*/
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
if (WARN_ON_ONCE(!pmd_none(*new_pmd))) {
ret = false;
goto out;
}
/*
* We don't have to worry about the ordering of src and dst
@@ -270,7 +329,11 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
spin_unlock(new_ptl);
spin_unlock(old_ptl);
return true;
ret = true;
out:
restore_vma_ref_count(vma);
return ret;
}
#else
static inline bool move_normal_pmd(struct vm_area_struct *vma,
@@ -281,24 +344,29 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma,
}
#endif
/*
* Speculative page fault handlers will not detect page table changes done
* without ptl locking.
*/
#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT)
#ifdef CONFIG_HAVE_MOVE_PUD
static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
{
spinlock_t *old_ptl, *new_ptl;
struct mm_struct *mm = vma->vm_mm;
pud_t pud;
bool ret;
/*
* Wait for concurrent users, since these can potentially be
* speculative page faults.
*/
wait_for_vma_users(vma);
/*
* The destination pud shouldn't be established, free_pgtables()
* should have released it.
*/
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
if (WARN_ON_ONCE(!pud_none(*new_pud))) {
ret = false;
goto out;
}
/*
* We don't have to worry about the ordering of src and dst
@@ -322,7 +390,11 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
spin_unlock(new_ptl);
spin_unlock(old_ptl);
return true;
ret = true;
out:
restore_vma_ref_count(vma);
return ret;
}
#else
static inline bool move_normal_pud(struct vm_area_struct *vma,