ANDROID: fix a race between speculative page walk and unmap operations

Speculative page fault walks the page tables under RCU protection and
assumes that page tables are stable after ptl lock is taken. Current
implementation has three issues:
1. While pmd can't be destroyed while in RCU read section, it can be
cleared and result in an invalid ptl lock address. Fix that by
rechecking pmd value after obtaining ptl lock.
2. In case of CONFIG_ALLOC_SPLIT_PTLOCKS, ptl lock is separate from the
pmd and is destroyed by a synchronous call to pgtable_pmd_page_dtor,
which can happen while page walker is in RCU section. Prevent this by
adding a dependency for CONFIG_SPECULATIVE_PAGE_FAULT to require
!CONIG_ALLOC_SPLIT_PTLOCKS.
3. Below sequence when do_mmap happens after the last mmap_seq check
would result in use-after-free issue.

__pte_map_lock
      rcu_read_lock()
      mmap_seq_read_check()

      ptl = pte_lockptr(vmf->pmd)

      spin_trylock(ptl)
      mmap_seq_read_check()
                             mmap_write_lock()
                             do_mmap()
                               unmap_region()
                                 unmap_vmas()
                                 free_pgtables()
                                   ...
                                   free_pte_range
                                   pmd_clear
                                     pte_free_tlb
                                        ...
                                        call_rcu(tlb_remove_table_rcu)

      rcu_read_unlock()
                             tlb_remove_table_rcu
      spin_unlock(ptl) <-- UAF!

To prevent that free_pte_range needs to be blocked if ptl is locked and
is in use.

[tyler wang: This is a backport from
https://android-review.googlesource.com/c/kernel/common/+/2330194.
We have adapted the corresponding modifications from 5.15 to 5.10,
including the changes made to the function __pte_map_lock in 5.15, which have
been adapted to the functions pte_spinlock and __pte_map_lock_speculative in
5.10. Additionally, following surenb's suggestion, we have folded
https://android-review.googlesource.com/c/kernel/common/+/2368961 in this patch.]

Bug: 278602292
Change-Id: I7b353f0995fc59e92bb2069bcdc7d1ac29b521b9
Signed-off-by: Dezhi Huang <huangdezhi@hihonor.com>
This commit is contained in:
Dezhi Huang
2023-05-22 21:24:45 +08:00
committed by Suren Baghdasaryan
parent 7cc458acf4
commit 2246168a72

View File

@@ -246,6 +246,16 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr)
{
pgtable_t token = pmd_pgtable(*pmd);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
/*
* Ensure page table destruction is blocked if __pte_map_lock managed
* to take this lock. Without this barrier tlb_remove_table_rcu can
* destroy ptl after __pte_map_lock locked it and during unlock would
* cause a use-after-free.
*/
spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
spin_unlock(ptl);
#endif
pmd_clear(pmd);
pte_free_tlb(tlb, token, addr);
mm_dec_nr_ptes(tlb->mm);
@@ -2627,9 +2637,7 @@ EXPORT_SYMBOL_GPL(apply_to_page_range);
static bool pte_spinlock(struct vm_fault *vmf)
{
bool ret = false;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pmd_t pmdval;
#endif
/* Check if vma is still valid */
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
@@ -2644,24 +2652,28 @@ static bool pte_spinlock(struct vm_fault *vmf)
goto out;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* We check if the pmd value is still the same to ensure that there
* is not a huge collapse operation in progress in our back.
* It also ensures that pmd was not cleared by pmd_clear in
* free_pte_range and ptl is still valid.
*/
pmdval = READ_ONCE(*vmf->pmd);
if (!pmd_same(pmdval, vmf->orig_pmd)) {
trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
goto out;
}
#endif
vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
vmf->ptl = pte_lockptr(vmf->vma->vm_mm, &pmdval);
if (unlikely(!spin_trylock(vmf->ptl))) {
trace_spf_pte_lock(_RET_IP_, vmf->vma, vmf->address);
goto out;
}
/*
* The check below will fail if pte_spinlock passed its ptl barrier
* before we took the ptl lock.
*/
if (vma_has_changed(vmf)) {
spin_unlock(vmf->ptl);
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
@@ -2679,9 +2691,7 @@ static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
bool ret = false;
pte_t *pte;
spinlock_t *ptl;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pmd_t pmdval;
#endif
/*
* The first vma_has_changed() guarantees the page-tables are still
@@ -2696,7 +2706,6 @@ static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
goto out;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* We check if the pmd value is still the same to ensure that there
* is not a huge collapse operation in progress in our back.
@@ -2706,7 +2715,6 @@ static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
trace_spf_pmd_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
#endif
/*
* Same as pte_offset_map_lock() except that we call
@@ -2715,14 +2723,18 @@ static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
* to invalidate TLB but this CPU has irq disabled.
* Since we are in a speculative patch, accept it could fail
*/
ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
pte = pte_offset_map(vmf->pmd, addr);
ptl = pte_lockptr(vmf->vma->vm_mm, &pmdval);
pte = pte_offset_map(&pmdval, addr);
if (unlikely(!spin_trylock(ptl))) {
pte_unmap(pte);
trace_spf_pte_lock(_RET_IP_, vmf->vma, addr);
goto out;
}
/*
* The check below will fail if __pte_map_lock_speculative passed its ptl
* barrier before we took the ptl lock.
*/
if (vma_has_changed(vmf)) {
pte_unmap_unlock(pte, ptl);
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);