Merge tag 'mm-hotfixes-stable-2025-03-08-16-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "33 hotfixes. 24 are cc:stable and the remainder address post-6.13
  issues or aren't considered necessary for -stable kernels.

  26 are for MM and 7 are for non-MM.

   - "mm: memory_failure: unmap poisoned folio during migrate properly"
     from Ma Wupeng fixes a couple of two year old bugs involving the
     migration of hwpoisoned folios.

   - "selftests/damon: three fixes for false results" from SeongJae Park
     fixes three one year old bugs in the SAMON selftest code.

  The remainder are singletons and doubletons. Please see the individual
  changelogs for details"

* tag 'mm-hotfixes-stable-2025-03-08-16-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (33 commits)
  mm/page_alloc: fix uninitialized variable
  rapidio: add check for rio_add_net() in rio_scan_alloc_net()
  rapidio: fix an API misues when rio_add_net() fails
  MAINTAINERS: .mailmap: update Sumit Garg's email address
  Revert "mm/page_alloc.c: don't show protection in zone's ->lowmem_reserve[] for empty zone"
  mm: fix finish_fault() handling for large folios
  mm: don't skip arch_sync_kernel_mappings() in error paths
  mm: shmem: remove unnecessary warning in shmem_writepage()
  userfaultfd: fix PTE unmapping stack-allocated PTE copies
  userfaultfd: do not block on locking a large folio with raised refcount
  mm: zswap: use ATOMIC_LONG_INIT to initialize zswap_stored_pages
  mm: shmem: fix potential data corruption during shmem swapin
  mm: fix kernel BUG when userfaultfd_move encounters swapcache
  selftests/damon/damon_nr_regions: sort collected regiosn before checking with min/max boundaries
  selftests/damon/damon_nr_regions: set ops update for merge results check to 100ms
  selftests/damon/damos_quota: make real expectation of quota exceeds
  include/linux/log2.h: mark is_power_of_2() with __always_inline
  NFS: fix nfs_release_folio() to not deadlock via kcompactd writeback
  mm, swap: avoid BUG_ON in relocate_cluster()
  mm: swap: use correct step in loop to wait all clusters in wait_for_allocation()
  ...
This commit is contained in:
Linus Torvalds
2025-03-08 14:34:06 -10:00
39 changed files with 349 additions and 123 deletions

View File

@@ -691,6 +691,7 @@ Subbaraman Narayanamurthy <quic_subbaram@quicinc.com> <subbaram@codeaurora.org>
Subhash Jadavani <subhashj@codeaurora.org>
Sudarshan Rajagopalan <quic_sudaraja@quicinc.com> <sudaraja@codeaurora.org>
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Sumit Garg <sumit.garg@kernel.org> <sumit.garg@linaro.org>
Sumit Semwal <sumit.semwal@ti.com>
Surabhi Vishnoi <quic_svishnoi@quicinc.com> <svishnoi@codeaurora.org>
Sven Eckelmann <sven@narfation.org> <seckelmann@datto.com>

View File

@@ -12875,7 +12875,7 @@ F: include/keys/trusted_dcp.h
F: security/keys/trusted-keys/trusted_dcp.c
KEYS-TRUSTED-TEE
M: Sumit Garg <sumit.garg@linaro.org>
M: Sumit Garg <sumit.garg@kernel.org>
L: linux-integrity@vger.kernel.org
L: keyrings@vger.kernel.org
S: Supported
@@ -17675,7 +17675,7 @@ F: Documentation/ABI/testing/sysfs-bus-optee-devices
F: drivers/tee/optee/
OP-TEE RANDOM NUMBER GENERATOR (RNG) DRIVER
M: Sumit Garg <sumit.garg@linaro.org>
M: Sumit Garg <sumit.garg@kernel.org>
L: op-tee@lists.trustedfirmware.org
S: Maintained
F: drivers/char/hw_random/optee-rng.c
@@ -23288,7 +23288,7 @@ F: include/media/i2c/tw9910.h
TEE SUBSYSTEM
M: Jens Wiklander <jens.wiklander@linaro.org>
R: Sumit Garg <sumit.garg@linaro.org>
R: Sumit Garg <sumit.garg@kernel.org>
L: op-tee@lists.trustedfirmware.org
S: Maintained
F: Documentation/ABI/testing/sysfs-class-tee

View File

@@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
}
static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
unsigned long pfn, struct vm_fault *vmf)
unsigned long pfn, bool need_lock)
{
spinlock_t *ptl;
pgd_t *pgd;
@@ -99,12 +99,11 @@ again:
if (!pte)
return 0;
/*
* If we are using split PTE locks, then we need to take the page
* lock here. Otherwise we are using shared mm->page_table_lock
* which is already locked, thus cannot take it.
*/
if (ptl != vmf->ptl) {
if (need_lock) {
/*
* Use nested version here to indicate that we are already
* holding one similar spinlock.
*/
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
pte_unmap_unlock(pte, ptl);
@@ -114,7 +113,7 @@ again:
ret = do_adjust_pte(vma, address, pfn, pte);
if (ptl != vmf->ptl)
if (need_lock)
spin_unlock(ptl);
pte_unmap(pte);
@@ -123,9 +122,10 @@ again:
static void
make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep, unsigned long pfn,
struct vm_fault *vmf)
unsigned long addr, pte_t *ptep, unsigned long pfn)
{
const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE);
const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE;
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
unsigned long offset;
@@ -141,6 +141,14 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
*/
flush_dcache_mmap_lock(mapping);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
/*
* If we are using split PTE locks, then we need to take the pte
* lock. Otherwise we are using shared mm->page_table_lock which
* is already locked, thus cannot take it.
*/
bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS);
unsigned long mpnt_addr;
/*
* If this VMA is not in our MM, we can ignore it.
* Note that we intentionally mask out the VMA
@@ -151,7 +159,12 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
if (!(mpnt->vm_flags & VM_MAYSHARE))
continue;
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf);
mpnt_addr = mpnt->vm_start + offset;
/* Avoid deadlocks by not grabbing the same PTE lock again. */
if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr)
need_lock = false;
aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock);
}
flush_dcache_mmap_unlock(mapping);
if (aliases)
@@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma,
__flush_dcache_folio(mapping, folio);
if (mapping) {
if (cache_is_vivt())
make_coherent(mapping, vma, addr, ptep, pfn, vmf);
make_coherent(mapping, vma, addr, ptep, pfn);
else if (vma->vm_flags & VM_EXEC)
__flush_icache_all();
}

View File

@@ -44,8 +44,10 @@ static inline pgd_t * pgd_alloc(struct mm_struct *mm)
pgd_t *new_pgd;
new_pgd = __pgd_alloc(mm, 0);
memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE);
memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT));
if (likely(new_pgd != NULL)) {
memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE);
memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT));
}
return new_pgd;
}

View File

@@ -1742,7 +1742,8 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
err = rio_add_net(net);
if (err) {
rmcd_debug(RDEV, "failed to register net, err=%d", err);
kfree(net);
put_device(&net->dev);
mport->net = NULL;
goto cleanup;
}
}

View File

@@ -871,7 +871,10 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
dev_set_name(&net->dev, "rnet_%d", net->id);
net->dev.parent = &mport->dev;
net->dev.release = rio_scan_release_dev;
rio_add_net(net);
if (rio_add_net(net)) {
put_device(&net->dev);
net = NULL;
}
}
return net;

View File

@@ -29,6 +29,7 @@
#include <linux/pagemap.h>
#include <linux/gfp.h>
#include <linux/swap.h>
#include <linux/compaction.h>
#include <linux/uaccess.h>
#include <linux/filelock.h>
@@ -457,7 +458,7 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
/* If the private flag is set, then the folio is not freeable */
if (folio_test_private(folio)) {
if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
current_is_kswapd())
current_is_kswapd() || current_is_kcompactd())
return false;
if (nfs_wb_folio(folio->mapping->host, folio) < 0)
return false;

View File

@@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order)
return 2UL << order;
}
static inline int current_is_kcompactd(void)
{
return current->flags & PF_KCOMPACTD;
}
#ifdef CONFIG_COMPACTION
extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order);

View File

@@ -682,6 +682,7 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1068,6 +1069,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
static inline void wait_for_freed_hugetlb_folios(void)
{
}
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)

View File

@@ -41,7 +41,7 @@ int __ilog2_u64(u64 n)
* *not* considered a power of two.
* Return: true if @n is a power of 2, otherwise false.
*/
static inline __attribute__((const))
static __always_inline __attribute__((const))
bool is_power_of_2(unsigned long n)
{
return (n != 0 && ((n & (n - 1)) == 0));

View File

@@ -1701,7 +1701,7 @@ extern struct pid *cad_pid;
#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
#define PF__HOLE__00010000 0x00010000
#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */

View File

@@ -2103,7 +2103,7 @@ config FAIL_SKB_REALLOC
reallocated, catching possible invalid pointers to the skb.
For more information, check
Documentation/dev-tools/fault-injection/fault-injection.rst
Documentation/fault-injection/fault-injection.rst
config FAULT_INJECTION_CONFIGFS
bool "Configfs interface for fault-injection capabilities"

View File

@@ -3181,6 +3181,7 @@ static int kcompactd(void *p)
long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
long timeout = default_timeout;
current->flags |= PF_KCOMPACTD;
set_freezable();
pgdat->kcompactd_max_order = 0;
@@ -3237,6 +3238,8 @@ static int kcompactd(void *p)
pgdat->proactive_compact_trigger = false;
}
current->flags &= ~PF_KCOMPACTD;
return 0;
}

View File

@@ -2943,6 +2943,14 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
void wait_for_freed_hugetlb_folios(void)
{
if (llist_empty(&hpage_freelist))
return;
flush_work(&free_hpage_work);
}
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv

View File

@@ -1115,7 +1115,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
* mm/memory-failure.c
*/
#ifdef CONFIG_MEMORY_FAILURE
void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu);
int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill);
void shake_folio(struct folio *folio);
extern int hwpoison_filter(struct page *p);
@@ -1138,8 +1138,9 @@ unsigned long page_mapped_in_vma(const struct page *page,
struct vm_area_struct *vma);
#else
static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill)
{
return -EBUSY;
}
#endif

View File

@@ -357,6 +357,7 @@ void kmsan_handle_dma(struct page *page, size_t offset, size_t size,
size -= to_go;
}
}
EXPORT_SYMBOL_GPL(kmsan_handle_dma);
void kmsan_handle_dma_sg(struct scatterlist *sg, int nents,
enum dma_data_direction dir)

View File

@@ -1556,11 +1556,35 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
return ret;
}
void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill)
{
if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
struct address_space *mapping;
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
struct address_space *mapping;
if (folio_test_swapcache(folio)) {
pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
ttu &= ~TTU_HWPOISON;
}
/*
* Propagate the dirty bit from PTEs to struct page first, because we
* need this to decide if we should kill or just drop the page.
* XXX: the dirty test could be racy: set_page_dirty() may not always
* be called inside page lock (it's recommended but not enforced).
*/
mapping = folio_mapping(folio);
if (!must_kill && !folio_test_dirty(folio) && mapping &&
mapping_can_writeback(mapping)) {
if (folio_mkclean(folio)) {
folio_set_dirty(folio);
} else {
ttu &= ~TTU_HWPOISON;
pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
pfn);
}
}
if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
/*
* For hugetlb folios in shared mappings, try_to_unmap
* could potentially call huge_pmd_unshare. Because of
@@ -1572,7 +1596,7 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
if (!mapping) {
pr_info("%#lx: could not lock mapping for mapped hugetlb folio\n",
folio_pfn(folio));
return;
return -EBUSY;
}
try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
@@ -1580,6 +1604,8 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
} else {
try_to_unmap(folio, ttu);
}
return folio_mapped(folio) ? -EBUSY : 0;
}
/*
@@ -1589,8 +1615,6 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
unsigned long pfn, int flags)
{
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
struct address_space *mapping;
LIST_HEAD(tokill);
bool unmap_success;
int forcekill;
@@ -1613,29 +1637,6 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
if (!folio_mapped(folio))
return true;
if (folio_test_swapcache(folio)) {
pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
ttu &= ~TTU_HWPOISON;
}
/*
* Propagate the dirty bit from PTEs to struct page first, because we
* need this to decide if we should kill or just drop the page.
* XXX: the dirty test could be racy: set_page_dirty() may not always
* be called inside page lock (it's recommended but not enforced).
*/
mapping = folio_mapping(folio);
if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
mapping_can_writeback(mapping)) {
if (folio_mkclean(folio)) {
folio_set_dirty(folio);
} else {
ttu &= ~TTU_HWPOISON;
pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
pfn);
}
}
/*
* First collect all the processes that have the page
* mapped in dirty form. This has to be done before try_to_unmap,
@@ -1643,9 +1644,7 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
*/
collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
unmap_poisoned_folio(folio, ttu);
unmap_success = !folio_mapped(folio);
unmap_success = !unmap_poisoned_folio(folio, pfn, flags & MF_MUST_KILL);
if (!unmap_success)
pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n",
pfn, folio_mapcount(folio));

View File

@@ -3051,8 +3051,10 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
next = pgd_addr_end(addr, end);
if (pgd_none(*pgd) && !create)
continue;
if (WARN_ON_ONCE(pgd_leaf(*pgd)))
return -EINVAL;
if (WARN_ON_ONCE(pgd_leaf(*pgd))) {
err = -EINVAL;
break;
}
if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) {
if (!create)
continue;
@@ -5183,7 +5185,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) &&
!(vma->vm_flags & VM_SHARED);
int type, nr_pages;
unsigned long addr = vmf->address;
unsigned long addr;
bool needs_fallback = false;
fallback:
addr = vmf->address;
/* Did we COW the page? */
if (is_cow)
@@ -5222,7 +5228,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
* approach also applies to non-anonymous-shmem faults to avoid
* inflating the RSS of the process.
*/
if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) {
if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma)) ||
unlikely(needs_fallback)) {
nr_pages = 1;
} else if (nr_pages > 1) {
pgoff_t idx = folio_page_idx(folio, page);
@@ -5258,9 +5265,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
goto unlock;
} else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) {
update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages);
ret = VM_FAULT_NOPAGE;
goto unlock;
needs_fallback = true;
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto fallback;
}
folio_ref_add(folio, nr_pages - 1);

View File

@@ -1822,27 +1822,25 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
if (folio_test_large(folio))
pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1;
/*
* HWPoison pages have elevated reference counts so the migration would
* fail on them. It also doesn't make any sense to migrate them in the
* first place. Still try to unmap such a page in case it is still mapped
* (keep the unmap as the catch all safety net).
*/
if (folio_test_hwpoison(folio) ||
(folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) {
if (WARN_ON(folio_test_lru(folio)))
folio_isolate_lru(folio);
if (folio_mapped(folio))
unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK);
continue;
}
if (!folio_try_get(folio))
continue;
if (unlikely(page_folio(page) != folio))
goto put_folio;
if (folio_test_hwpoison(folio) ||
(folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) {
if (WARN_ON(folio_test_lru(folio)))
folio_isolate_lru(folio);
if (folio_mapped(folio)) {
folio_lock(folio);
unmap_poisoned_folio(folio, pfn, false);
folio_unlock(folio);
}
goto put_folio;
}
if (!isolate_folio_to_list(folio, &source)) {
if (__ratelimit(&migrate_rs)) {
pr_warn("failed to isolate pfn %lx\n",

View File

@@ -4243,6 +4243,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
restart:
compaction_retries = 0;
no_progress_loops = 0;
compact_result = COMPACT_SKIPPED;
compact_priority = DEF_COMPACT_PRIORITY;
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist_iter_cookie = zonelist_iter_begin();
@@ -5849,11 +5850,10 @@ static void setup_per_zone_lowmem_reserve(void)
for (j = i + 1; j < MAX_NR_ZONES; j++) {
struct zone *upper_zone = &pgdat->node_zones[j];
bool empty = !zone_managed_pages(upper_zone);
managed_pages += zone_managed_pages(upper_zone);
if (clear || empty)
if (clear)
zone->lowmem_reserve[j] = 0;
else
zone->lowmem_reserve[j] = managed_pages / ratio;

Some files were not shown because too many files have changed in this diff Show More