18 hotfixes. 11 are cc:stable. 13 are MM and 5 are non-MM.

All patches are singletons - please see the relevant changelogs for
 details.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZ4TI2QAKCRDdBJ7gKXxA
 jgy1AP9fEyueoS8PpDe2Wil7/WPAVr6OVhgjEDx64OSUdxQE+wD6A7Q3TadqeBvx
 GIKupDWYEodJL00DIFOFj/qRZgSW9Ao=
 =Zr4s
 -----END PGP SIGNATURE-----

Merge tag 'mm-hotfixes-stable-2025-01-13-00-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "18 hotfixes. 11 are cc:stable. 13 are MM and 5 are non-MM.

  All patches are singletons - please see the relevant changelogs for
  details"

* tag 'mm-hotfixes-stable-2025-01-13-00-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  fs/proc: fix softlockup in __read_vmcore (part 2)
  mm: fix assertion in folio_end_read()
  mm: vmscan : pgdemote vmstat is not getting updated when MGLRU is enabled.
  vmstat: disable vmstat_work on vmstat_cpu_down_prep()
  zram: fix potential UAF of zram table
  selftests/mm: set allocated memory to non-zero content in cow test
  mm: clear uffd-wp PTE/PMD state on mremap()
  module: fix writing of livepatch relocations in ROX text
  mm: zswap: properly synchronize freeing resources during CPU hotunplug
  Revert "mm: zswap: fix race between [de]compression and CPU hotunplug"
  hugetlb: fix NULL pointer dereference in trace_hugetlbfs_alloc_inode
  mm: fix div by zero in bdi_ratio_from_pages
  x86/execmem: fix ROX cache usage in Xen PV guests
  filemap: avoid truncating 64-bit offset to 32 bits
  tools: fix atomic_set() definition to set the value correctly
  mm/mempolicy: count MPOL_WEIGHTED_INTERLEAVE to "interleave_hit"
  scripts/decode_stacktrace.sh: fix decoding of lines with an additional info
  mm/kmemleak: fix percpu memory leak detection failure
This commit is contained in:
Linus Torvalds 2025-01-13 09:03:18 -08:00
commit c45323b756
20 changed files with 163 additions and 42 deletions

View File

@ -1080,7 +1080,8 @@ struct execmem_info __init *execmem_arch_setup(void)
start = MODULES_VADDR + offset;
if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) {
if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) &&
cpu_feature_enabled(X86_FEATURE_PSE)) {
pgprot = PAGE_KERNEL_ROX;
flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE;
} else {

View File

@ -1468,6 +1468,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
zram->mem_pool = zs_create_pool(zram->disk->disk_name);
if (!zram->mem_pool) {
vfree(zram->table);
zram->table = NULL;
return false;
}

View File

@ -404,6 +404,8 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos)
if (!iov_iter_count(iter))
return acc;
}
cond_resched();
}
return acc;

View File

@ -773,7 +773,8 @@ void *__module_writable_address(struct module *mod, void *loc);
static inline void *module_writable_address(struct module *mod, void *loc)
{
if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod)
if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod ||
mod->state != MODULE_STATE_UNFORMED)
return loc;
return __module_writable_address(mod, loc);
}

View File

@ -247,6 +247,13 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
vma_is_shmem(vma);
}
static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
{
struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0;
}
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
void dup_userfaultfd_fail(struct list_head *);
@ -402,6 +409,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
return false;
}
static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
{
return false;
}
#endif /* CONFIG_USERFAULTFD */
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)

View File

@ -23,7 +23,7 @@ TRACE_EVENT(hugetlbfs_alloc_inode,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->dir = dir ? dir->i_ino : 0;
__entry->mode = mode;
),

View File

@ -1523,7 +1523,7 @@ void folio_end_read(struct folio *folio, bool success)
/* Must be in bottom byte for x86 to work */
BUILD_BUG_ON(PG_uptodate > 7);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
VM_BUG_ON_FOLIO(success && folio_test_uptodate(folio), folio);
if (likely(success))
mask |= 1 << PG_uptodate;
@ -2996,7 +2996,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas,
if (ops->is_partially_uptodate(folio, offset, bsz) ==
seek_data)
break;
start = (start + bsz) & ~(bsz - 1);
start = (start + bsz) & ~((u64)bsz - 1);
offset += bsz;
} while (offset < folio_size(folio));
unlock:

View File

@ -2206,6 +2206,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
return pmd;
}
static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
{
if (pmd_present(pmd))
pmd = pmd_clear_uffd_wp(pmd);
else if (is_swap_pmd(pmd))
pmd = pmd_swp_clear_uffd_wp(pmd);
return pmd;
}
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
@ -2244,6 +2254,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
pmd = move_soft_dirty_pmd(pmd);
if (vma_has_uffd_without_event_remap(vma))
pmd = clear_uffd_wp_pmd(pmd);
set_pmd_at(mm, new_addr, new_pmd, pmd);
if (force_flush)
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);

View File

@ -5402,6 +5402,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
unsigned long sz)
{
bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
spinlock_t *src_ptl, *dst_ptl;
@ -5418,7 +5419,18 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
huge_pte_clear(mm, new_addr, dst_pte, sz);
else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = huge_pte_clear_uffd_wp(pte);
else if (is_swap_pte(pte))
pte = pte_swp_clear_uffd_wp(pte);
}
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
}
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);

View File

@ -1093,7 +1093,7 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size);
if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr))
create_object_percpu((__force unsigned long)ptr, size, 0, gfp);
create_object_percpu((__force unsigned long)ptr, size, 1, gfp);
}
EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);

View File

@ -2268,7 +2268,8 @@ struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order,
page = __alloc_pages_noprof(gfp, order, nid, nodemask);
if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) {
if (unlikely(pol->mode == MPOL_INTERLEAVE ||
pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
/* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */
if (static_branch_likely(&vm_numa_stat_key) &&
page_to_nid(page) == nid) {

View File

@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
pmd_t dummy_pmdval;
@ -216,7 +217,18 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
force_flush = true;
pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
set_pte_at(mm, new_addr, new_pte, pte);
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
pte_clear(mm, new_addr, new_pte);
else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = pte_clear_uffd_wp(pte);
else if (is_swap_pte(pte))
pte = pte_swp_clear_uffd_wp(pte);
}
set_pte_at(mm, new_addr, new_pte, pte);
}
}
arch_leave_lazy_mmu_mode();
@ -278,6 +290,15 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
/* If this pmd belongs to a uffd vma with remap events disabled, we need
* to ensure that the uffd-wp state is cleared from all pgtables. This
* means recursing into lower page tables in move_page_tables(), and we
* can reuse the existing code if we simply treat the entry as "not
* moved".
*/
if (vma_has_uffd_without_event_remap(vma))
return false;
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@ -333,6 +354,15 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
/* If this pud belongs to a uffd vma with remap events disabled, we need
* to ensure that the uffd-wp state is cleared from all pgtables. This
* means recursing into lower page tables in move_page_tables(), and we
* can reuse the existing code if we simply treat the entry as "not
* moved".
*/
if (vma_has_uffd_without_event_remap(vma))
return false;
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.

View File

@ -692,6 +692,8 @@ static unsigned long bdi_ratio_from_pages(unsigned long pages)
unsigned long ratio;
global_dirty_limits(&background_thresh, &dirty_thresh);
if (!dirty_thresh)
return -EINVAL;
ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh);
return ratio;
@ -790,13 +792,15 @@ int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes)
{
int ret;
unsigned long pages = min_bytes >> PAGE_SHIFT;
unsigned long min_ratio;
long min_ratio;
ret = bdi_check_pages_limit(pages);
if (ret)
return ret;
min_ratio = bdi_ratio_from_pages(pages);
if (min_ratio < 0)
return min_ratio;
return __bdi_set_min_ratio(bdi, min_ratio);
}
@ -809,13 +813,15 @@ int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes)
{
int ret;
unsigned long pages = max_bytes >> PAGE_SHIFT;
unsigned long max_ratio;
long max_ratio;
ret = bdi_check_pages_limit(pages);
if (ret)
return ret;
max_ratio = bdi_ratio_from_pages(pages);
if (max_ratio < 0)
return max_ratio;
return __bdi_set_max_ratio(bdi, max_ratio);
}

View File

@ -4642,6 +4642,9 @@ retry:
reset_batch_size(walk);
}
__mod_lruvec_state(lruvec, PGDEMOTE_KSWAPD + reclaimer_offset(),
stat.nr_demoted);
item = PGSTEAL_KSWAPD + reclaimer_offset();
if (!cgroup_reclaim(sc))
__count_vm_events(item, reclaimed);

View File

@ -2122,10 +2122,20 @@ static void __init start_shepherd_timer(void)
{
int cpu;
for_each_possible_cpu(cpu)
for_each_possible_cpu(cpu) {
INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
vmstat_update);
/*
* For secondary CPUs during CPU hotplug scenarios,
* vmstat_cpu_online() will enable the work.
* mm/vmstat:online enables and disables vmstat_work
* symmetrically during CPU hotplug events.
*/
if (!cpu_online(cpu))
disable_delayed_work_sync(&per_cpu(vmstat_work, cpu));
}
schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
}
@ -2148,13 +2158,14 @@ static int vmstat_cpu_online(unsigned int cpu)
if (!node_state(cpu_to_node(cpu), N_CPU)) {
node_set_state(cpu_to_node(cpu), N_CPU);
}
enable_delayed_work(&per_cpu(vmstat_work, cpu));
return 0;
}
static int vmstat_cpu_down_prep(unsigned int cpu)
{
cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
disable_delayed_work_sync(&per_cpu(vmstat_work, cpu));
return 0;
}

View File

@ -251,7 +251,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
struct zswap_pool *pool;
char name[38]; /* 'zswap' + 32 char (max) num + \0 */
gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
int ret;
int ret, cpu;
if (!zswap_has_pool) {
/* if either are unset, pool initialization failed, and we
@ -285,6 +285,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
goto error;
}
for_each_possible_cpu(cpu)
mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
&pool->node);
if (ret)
@ -821,11 +824,12 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
struct acomp_req *req;
int ret;
mutex_init(&acomp_ctx->mutex);
mutex_lock(&acomp_ctx->mutex);
acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
if (!acomp_ctx->buffer)
return -ENOMEM;
if (!acomp_ctx->buffer) {
ret = -ENOMEM;
goto buffer_fail;
}
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
if (IS_ERR(acomp)) {
@ -855,12 +859,15 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &acomp_ctx->wait);
mutex_unlock(&acomp_ctx->mutex);
return 0;
req_fail:
crypto_free_acomp(acomp_ctx->acomp);
acomp_fail:
kfree(acomp_ctx->buffer);
buffer_fail:
mutex_unlock(&acomp_ctx->mutex);
return ret;
}
@ -869,27 +876,43 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
mutex_lock(&acomp_ctx->mutex);
if (!IS_ERR_OR_NULL(acomp_ctx)) {
if (!IS_ERR_OR_NULL(acomp_ctx->req))
acomp_request_free(acomp_ctx->req);
acomp_ctx->req = NULL;
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
crypto_free_acomp(acomp_ctx->acomp);
kfree(acomp_ctx->buffer);
}
mutex_unlock(&acomp_ctx->mutex);
return 0;
}
/* Prevent CPU hotplug from freeing up the per-CPU acomp_ctx resources */
static struct crypto_acomp_ctx *acomp_ctx_get_cpu(struct crypto_acomp_ctx __percpu *acomp_ctx)
static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
{
cpus_read_lock();
return raw_cpu_ptr(acomp_ctx);
struct crypto_acomp_ctx *acomp_ctx;
for (;;) {
acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
if (likely(acomp_ctx->req))
return acomp_ctx;
/*
* It is possible that we were migrated to a different CPU after
* getting the per-CPU ctx but before the mutex was acquired. If
* the old CPU got offlined, zswap_cpu_comp_dead() could have
* already freed ctx->req (among other things) and set it to
* NULL. Just try again on the new CPU that we ended up on.
*/
mutex_unlock(&acomp_ctx->mutex);
}
}
static void acomp_ctx_put_cpu(void)
static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
{
cpus_read_unlock();
mutex_unlock(&acomp_ctx->mutex);
}
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
@ -905,9 +928,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
acomp_ctx = acomp_ctx_get_cpu(pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
acomp_ctx = acomp_ctx_get_cpu_lock(pool);
dst = acomp_ctx->buffer;
sg_init_table(&input, 1);
sg_set_page(&input, page, PAGE_SIZE, 0);
@ -960,8 +981,7 @@ unlock:
else if (alloc_ret)
zswap_reject_alloc_fail++;
mutex_unlock(&acomp_ctx->mutex);
acomp_ctx_put_cpu();
acomp_ctx_put_unlock(acomp_ctx);
return comp_ret == 0 && alloc_ret == 0;
}
@ -972,9 +992,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
struct crypto_acomp_ctx *acomp_ctx;
u8 *src;
acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
/*
* If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
@ -998,11 +1016,10 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
mutex_unlock(&acomp_ctx->mutex);
if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
acomp_ctx_put_cpu();
acomp_ctx_put_unlock(acomp_ctx);
}
/*********************************

View File

@ -286,6 +286,18 @@ handle_line() {
last=$(( $last - 1 ))
fi
# Extract info after the symbol if present. E.g.:
# func_name+0x54/0x80 (P)
# ^^^
# The regex assumes only uppercase letters will be used. To be
# extended if needed.
local info_str=""
if [[ ${words[$last]} =~ \([A-Z]*\) ]]; then
info_str=${words[$last]}
unset words[$last]
last=$(( $last - 1 ))
fi
if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then
module=${words[$last]}
# some traces format is "(%pS)", which like "(foo+0x0/0x1 [bar])"
@ -313,9 +325,9 @@ handle_line() {
# Add up the line number to the symbol
if [[ -z ${module} ]]
then
echo "${words[@]}" "$symbol"
echo "${words[@]}" "$symbol ${info_str}"
else
echo "${words[@]}" "$symbol $module"
echo "${words[@]}" "$symbol $module ${info_str}"
fi
}

View File

@ -758,7 +758,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
}
/* Populate a base page. */
memset(mem, 0, pagesize);
memset(mem, 1, pagesize);
if (swapout) {
madvise(mem, pagesize, MADV_PAGEOUT);
@ -824,12 +824,12 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
* Try to populate a THP. Touch the first sub-page and test if
* we get the last sub-page populated automatically.
*/
mem[0] = 0;
mem[0] = 1;
if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
ksft_test_result_skip("Did not get a THP populated\n");
goto munmap;
}
memset(mem, 0, thpsize);
memset(mem, 1, thpsize);
size = thpsize;
switch (thp_run) {
@ -1012,7 +1012,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
}
/* Populate an huge page. */
memset(mem, 0, hugetlbsize);
memset(mem, 1, hugetlbsize);
/*
* We need a total of two hugetlb pages to handle COW/unsharing

View File

@ -2,6 +2,6 @@
#define atomic_t int32_t
#define atomic_inc(x) uatomic_inc(x)
#define atomic_read(x) uatomic_read(x)
#define atomic_set(x, y) do {} while (0)
#define atomic_set(x, y) uatomic_set(x, y)
#define U8_MAX UCHAR_MAX
#include "../../../../include/linux/maple_tree.h"

View File

@ -6,7 +6,7 @@
#define atomic_t int32_t
#define atomic_inc(x) uatomic_inc(x)
#define atomic_read(x) uatomic_read(x)
#define atomic_set(x, y) do {} while (0)
#define atomic_set(x, y) uatomic_set(x, y)
#define U8_MAX UCHAR_MAX
#endif /* _LINUX_ATOMIC_H */