Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "28 patches.

  Subsystems affected by this series: mm (memblock, pagealloc, hugetlb,
  highmem, kfence, oom-kill, madvise, kasan, userfaultfd, memcg, and
  zram), core-kernel, kconfig, fork, binfmt, MAINTAINERS, kbuild, and
  ia64"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (28 commits)
  zram: fix broken page writeback
  zram: fix return value on writeback_store
  mm/memcg: set memcg when splitting page
  mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument
  ia64: fix ptrace(PTRACE_SYSCALL_INFO_EXIT) sign
  ia64: fix ia64_syscall_get_set_arguments() for break-based syscalls
  mm/userfaultfd: fix memory corruption due to writeprotect
  kasan: fix KASAN_STACK dependency for HW_TAGS
  kasan, mm: fix crash with HW_TAGS and DEBUG_PAGEALLOC
  mm/madvise: replace ptrace attach requirement for process_madvise
  include/linux/sched/mm.h: use rcu_dereference in in_vfork()
  kfence: fix reports if constant function prefixes exist
  kfence, slab: fix cache_alloc_debugcheck_after() for bulk allocations
  kfence: fix printk format for ptrdiff_t
  linux/compiler-clang.h: define HAVE_BUILTIN_BSWAP*
  MAINTAINERS: exclude uapi directories in API/ABI section
  binfmt_misc: fix possible deadlock in bm_register_write
  mm/highmem.c: fix zero_user_segments() with start > end
  hugetlb: do early cow when page pinned on src mm
  mm: use is_cow_mapping() across tree where proper
  ...
This commit is contained in:
Linus Torvalds 2021-03-14 12:23:34 -07:00
commit 50eb842fe5
28 changed files with 332 additions and 214 deletions

View File

@ -261,8 +261,8 @@ ABI/API
L: linux-api@vger.kernel.org L: linux-api@vger.kernel.org
F: include/linux/syscalls.h F: include/linux/syscalls.h
F: kernel/sys_ni.c F: kernel/sys_ni.c
F: include/uapi/ X: include/uapi/
F: arch/*/include/uapi/ X: arch/*/include/uapi/
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
M: Hans de Goede <hdegoede@redhat.com> M: Hans de Goede <hdegoede@redhat.com>

View File

@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs) struct pt_regs *regs)
{ {
return regs->r10 == -1 ? regs->r8:0; return regs->r10 == -1 ? -regs->r8:0;
} }
static inline long syscall_get_return_value(struct task_struct *task, static inline long syscall_get_return_value(struct task_struct *task,

View File

@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
{ {
struct syscall_get_set_args *args = data; struct syscall_get_set_args *args = data;
struct pt_regs *pt = args->regs; struct pt_regs *pt = args->regs;
unsigned long *krbs, cfm, ndirty; unsigned long *krbs, cfm, ndirty, nlocals, nouts;
int i, count; int i, count;
if (unw_unwind_to_user(info) < 0) if (unw_unwind_to_user(info) < 0)
return; return;
/*
* We get here via a few paths:
* - break instruction: cfm is shared with caller.
* syscall args are in out= regs, locals are non-empty.
* - epsinstruction: cfm is set by br.call
* locals don't exist.
*
* For both cases argguments are reachable in cfm.sof - cfm.sol.
* CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ]
*/
cfm = pt->cr_ifs; cfm = pt->cr_ifs;
nlocals = (cfm >> 7) & 0x7f; /* aka sol */
nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */
krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
count = 0; count = 0;
if (in_syscall(pt)) if (in_syscall(pt))
count = min_t(int, args->n, cfm & 0x7f); count = min_t(int, args->n, nouts);
/* Iterate over outs. */
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
int j = ndirty + nlocals + i + args->i;
if (args->rw) if (args->rw)
*ia64_rse_skip_regs(krbs, ndirty + i + args->i) = *ia64_rse_skip_regs(krbs, j) = args->args[i];
args->args[i];
else else
args->args[i] = *ia64_rse_skip_regs(krbs, args->args[i] = *ia64_rse_skip_regs(krbs, j);
ndirty + i + args->i);
} }
if (!args->rw) { if (!args->rw) {

View File

@ -627,7 +627,7 @@ static ssize_t writeback_store(struct device *dev,
struct bio_vec bio_vec; struct bio_vec bio_vec;
struct page *page; struct page *page;
ssize_t ret = len; ssize_t ret = len;
int mode; int mode, err;
unsigned long blk_idx = 0; unsigned long blk_idx = 0;
if (sysfs_streq(buf, "idle")) if (sysfs_streq(buf, "idle"))
@ -638,8 +638,8 @@ static ssize_t writeback_store(struct device *dev,
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
return -EINVAL; return -EINVAL;
ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index); if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
if (ret || index >= nr_pages) index >= nr_pages)
return -EINVAL; return -EINVAL;
nr_pages = 1; nr_pages = 1;
@ -663,7 +663,7 @@ static ssize_t writeback_store(struct device *dev,
goto release_init_lock; goto release_init_lock;
} }
while (nr_pages--) { for (; nr_pages != 0; index++, nr_pages--) {
struct bio_vec bvec; struct bio_vec bvec;
bvec.bv_page = page; bvec.bv_page = page;
@ -728,12 +728,17 @@ static ssize_t writeback_store(struct device *dev,
* XXX: A single page IO would be inefficient for write * XXX: A single page IO would be inefficient for write
* but it would be not bad as starter. * but it would be not bad as starter.
*/ */
ret = submit_bio_wait(&bio); err = submit_bio_wait(&bio);
if (ret) { if (err) {
zram_slot_lock(zram, index); zram_slot_lock(zram, index);
zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_UNDER_WB);
zram_clear_flag(zram, index, ZRAM_IDLE); zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index); zram_slot_unlock(zram, index);
/*
* Return last IO error unless every IO were
* not suceeded.
*/
ret = err;
continue; continue;
} }

View File

@ -500,8 +500,6 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
vm_fault_t ret; vm_fault_t ret;
pgoff_t fault_page_size; pgoff_t fault_page_size;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
bool is_cow_mapping =
(vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
switch (pe_size) { switch (pe_size) {
case PE_SIZE_PMD: case PE_SIZE_PMD:
@ -518,7 +516,7 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
} }
/* Always do write dirty-tracking and COW on PTE level. */ /* Always do write dirty-tracking and COW on PTE level. */
if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping)) if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
ret = ttm_bo_vm_reserve(bo, vmf); ret = ttm_bo_vm_reserve(bo, vmf);

View File

@ -49,7 +49,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
vma->vm_ops = &vmw_vm_ops; vma->vm_ops = &vmw_vm_ops;
/* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */ /* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */
if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE) if (!is_cow_mapping(vma->vm_flags))
vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP; vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
return 0; return 0;

View File

@ -649,12 +649,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
struct super_block *sb = file_inode(file)->i_sb; struct super_block *sb = file_inode(file)->i_sb;
struct dentry *root = sb->s_root, *dentry; struct dentry *root = sb->s_root, *dentry;
int err = 0; int err = 0;
struct file *f = NULL;
e = create_entry(buffer, count); e = create_entry(buffer, count);
if (IS_ERR(e)) if (IS_ERR(e))
return PTR_ERR(e); return PTR_ERR(e);
if (e->flags & MISC_FMT_OPEN_FILE) {
f = open_exec(e->interpreter);
if (IS_ERR(f)) {
pr_notice("register: failed to install interpreter file %s\n",
e->interpreter);
kfree(e);
return PTR_ERR(f);
}
e->interp_file = f;
}
inode_lock(d_inode(root)); inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name)); dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry); err = PTR_ERR(dentry);
@ -678,21 +690,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
goto out2; goto out2;
} }
if (e->flags & MISC_FMT_OPEN_FILE) {
struct file *f;
f = open_exec(e->interpreter);
if (IS_ERR(f)) {
err = PTR_ERR(f);
pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
simple_release_fs(&bm_mnt, &entry_count);
iput(inode);
inode = NULL;
goto out2;
}
e->interp_file = f;
}
e->dentry = dget(dentry); e->dentry = dget(dentry);
inode->i_private = e; inode->i_private = e;
inode->i_fop = &bm_entry_operations; inode->i_fop = &bm_entry_operations;
@ -709,6 +706,8 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
inode_unlock(d_inode(root)); inode_unlock(d_inode(root));
if (err) { if (err) {
if (f)
filp_close(f, NULL);
kfree(e); kfree(e);
return err; return err;
} }

View File

@ -1036,8 +1036,6 @@ struct clear_refs_private {
#ifdef CONFIG_MEM_SOFT_DIRTY #ifdef CONFIG_MEM_SOFT_DIRTY
#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{ {
struct page *page; struct page *page;

View File

@ -31,6 +31,12 @@
#define __no_sanitize_thread #define __no_sanitize_thread
#endif #endif
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
#define __HAVE_BUILTIN_BSWAP32__
#define __HAVE_BUILTIN_BSWAP64__
#define __HAVE_BUILTIN_BSWAP16__
#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
#if __has_feature(undefined_behavior_sanitizer) #if __has_feature(undefined_behavior_sanitizer)
/* GCC does not have __SANITIZE_UNDEFINED__ */ /* GCC does not have __SANITIZE_UNDEFINED__ */
#define __no_sanitize_undefined \ #define __no_sanitize_undefined \

View File

@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
/* /*
* Set the allocation direction to bottom-up or top-down. * Set the allocation direction to bottom-up or top-down.
*/ */
static inline void memblock_set_bottom_up(bool enable) static inline __init void memblock_set_bottom_up(bool enable)
{ {
memblock.bottom_up = enable; memblock.bottom_up = enable;
} }
@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable)
* if this is true, that said, memblock will allocate memory * if this is true, that said, memblock will allocate memory
* in bottom-up direction. * in bottom-up direction.
*/ */
static inline bool memblock_bottom_up(void) static inline __init bool memblock_bottom_up(void)
{ {
return memblock.bottom_up; return memblock.bottom_up;
} }

View File

@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
rcu_read_unlock(); rcu_read_unlock();
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE void split_page_memcg(struct page *head, unsigned int nr);
void mem_cgroup_split_huge_fixup(struct page *head);
#endif
#else /* CONFIG_MEMCG */ #else /* CONFIG_MEMCG */
@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
return 0; return 0;
} }
static inline void mem_cgroup_split_huge_fixup(struct page *head) static inline void split_page_memcg(struct page *head, unsigned int nr)
{ {
} }

View File

@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page)
GUP_PIN_COUNTING_BIAS; GUP_PIN_COUNTING_BIAS;
} }
static inline bool is_cow_mapping(vm_flags_t flags)
{
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
/*
* This should most likely only be called during fork() to see whether we
* should break the cow immediately for a page on the src mm.
*/
static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
struct page *page)
{
if (!is_cow_mapping(vma->vm_flags))
return false;
if (!atomic_read(&vma->vm_mm->has_pinned))
return false;
return page_maybe_dma_pinned(page);
}
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS #define SECTION_IN_PAGE_FLAGS
#endif #endif

View File

@ -23,6 +23,7 @@
#endif #endif
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
#define INIT_PASID 0
struct address_space; struct address_space;
struct mem_cgroup; struct mem_cgroup;

View File

@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk)
* another oom-unkillable task does this it should blame itself. * another oom-unkillable task does this it should blame itself.
*/ */
rcu_read_lock(); rcu_read_lock();
ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; ret = tsk->vfork_done &&
rcu_dereference(tsk->real_parent)->mm == tsk->mm;
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;

View File

@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus); const struct cpumask *cpus);
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus) const struct cpumask *cpus)
{ {
unsigned long flags; unsigned long flags;
@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
return ret; return ret;
} }
static inline int stop_machine(cpu_stop_fn_t fn, void *data, static __always_inline int
const struct cpumask *cpus) stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{ {
return stop_machine_cpuslocked(fn, data, cpus); return stop_machine_cpuslocked(fn, data, cpus);
} }
static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, static __always_inline int
const struct cpumask *cpus) stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{ {
return stop_machine(fn, data, cpus); return stop_machine(fn, data, cpus);
} }

View File

@ -119,8 +119,7 @@ config INIT_ENV_ARG_LIMIT
config COMPILE_TEST config COMPILE_TEST
bool "Compile also drivers which will not load" bool "Compile also drivers which will not load"
depends on !UML && !S390 depends on HAS_IOMEM
default n
help help
Some drivers can be compiled on a different platform than they are Some drivers can be compiled on a different platform than they are
intended to be run on. Despite they cannot be loaded there (or even intended to be run on. Despite they cannot be loaded there (or even

View File

@ -994,6 +994,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif #endif
} }
static void mm_init_pasid(struct mm_struct *mm)
{
#ifdef CONFIG_IOMMU_SUPPORT
mm->pasid = INIT_PASID;
#endif
}
static void mm_init_uprobes_state(struct mm_struct *mm) static void mm_init_uprobes_state(struct mm_struct *mm)
{ {
#ifdef CONFIG_UPROBES #ifdef CONFIG_UPROBES
@ -1024,6 +1031,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_init_cpumask(mm); mm_init_cpumask(mm);
mm_init_aio(mm); mm_init_aio(mm);
mm_init_owner(mm, p); mm_init_owner(mm, p);
mm_init_pasid(mm);
RCU_INIT_POINTER(mm->exe_file, NULL); RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm); mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm); init_tlb_flush_pending(mm);

View File

@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE
config KASAN_STACK config KASAN_STACK
int int
depends on KASAN_GENERIC || KASAN_SW_TAGS
default 1 if KASAN_STACK_ENABLE || CC_IS_GCC default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
default 0 default 0

View File

@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
BUG_ON(end1 > page_size(page) || end2 > page_size(page)); BUG_ON(end1 > page_size(page) || end2 > page_size(page));
if (start1 >= end1)
start1 = end1 = 0;
if (start2 >= end2)
start2 = end2 = 0;
for (i = 0; i < compound_nr(page); i++) { for (i = 0; i < compound_nr(page); i++) {
void *kaddr = NULL; void *kaddr = NULL;
if (start1 < PAGE_SIZE || start2 < PAGE_SIZE)
kaddr = kmap_atomic(page + i);
if (start1 >= PAGE_SIZE) { if (start1 >= PAGE_SIZE) {
start1 -= PAGE_SIZE; start1 -= PAGE_SIZE;
end1 -= PAGE_SIZE; end1 -= PAGE_SIZE;
} else { } else {
unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
if (end1 > start1) if (end1 > start1) {
kaddr = kmap_atomic(page + i);
memset(kaddr + start1, 0, this_end - start1); memset(kaddr + start1, 0, this_end - start1);
}
end1 -= this_end; end1 -= this_end;
start1 = 0; start1 = 0;
} }
@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
} else { } else {
unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); unsigned this_end = min_t(unsigned, end2, PAGE_SIZE);
if (end2 > start2) if (end2 > start2) {
if (!kaddr)
kaddr = kmap_atomic(page + i);
memset(kaddr + start2, 0, this_end - start2); memset(kaddr + start2, 0, this_end - start2);
}
end2 -= this_end; end2 -= this_end;
start2 = 0; start2 = 0;
} }

View File

@ -1100,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* best effort that the pinned pages won't be replaced by another * best effort that the pinned pages won't be replaced by another
* random page during the coming copy-on-write. * random page during the coming copy-on-write.
*/ */
if (unlikely(is_cow_mapping(vma->vm_flags) && if (unlikely(page_needs_cow_for_dma(vma, src_page))) {
atomic_read(&src_mm->has_pinned) &&
page_maybe_dma_pinned(src_page))) {
pte_free(dst_mm, pgtable); pte_free(dst_mm, pgtable);
spin_unlock(src_ptl); spin_unlock(src_ptl);
spin_unlock(dst_ptl); spin_unlock(dst_ptl);
@ -1214,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
} }
/* Please refer to comments in copy_huge_pmd() */ /* Please refer to comments in copy_huge_pmd() */
if (unlikely(is_cow_mapping(vma->vm_flags) && if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) {
atomic_read(&src_mm->has_pinned) &&
page_maybe_dma_pinned(pud_page(pud)))) {
spin_unlock(src_ptl); spin_unlock(src_ptl);
spin_unlock(dst_ptl); spin_unlock(dst_ptl);
__split_huge_pud(vma, src_pud, addr); __split_huge_pud(vma, src_pud, addr);
@ -2471,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
int i; int i;
/* complete memcg works before add pages to LRU */ /* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head); split_page_memcg(head, nr);
if (PageAnon(head) && PageSwapCache(head)) { if (PageAnon(head) && PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) }; swp_entry_t entry = { .val = page_private(head) };

View File

@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
} }
} }
static inline long
hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from,
long to, struct hstate *h, struct hugetlb_cgroup *cg,
long *regions_needed)
{
struct file_region *nrg;
if (!regions_needed) {
nrg = get_file_region_entry_from_cache(map, from, to);
record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg);
list_add(&nrg->link, rg->link.prev);
coalesce_file_region(map, nrg);
} else
*regions_needed += 1;
return to - from;
}
/* /*
* Must be called with resv->lock held. * Must be called with resv->lock held.
* *
@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
long add = 0; long add = 0;
struct list_head *head = &resv->regions; struct list_head *head = &resv->regions;
long last_accounted_offset = f; long last_accounted_offset = f;
struct file_region *rg = NULL, *trg = NULL, *nrg = NULL; struct file_region *rg = NULL, *trg = NULL;
if (regions_needed) if (regions_needed)
*regions_needed = 0; *regions_needed = 0;
@ -369,24 +387,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
/* When we find a region that starts beyond our range, we've /* When we find a region that starts beyond our range, we've
* finished. * finished.
*/ */
if (rg->from > t) if (rg->from >= t)
break; break;
/* Add an entry for last_accounted_offset -> rg->from, and /* Add an entry for last_accounted_offset -> rg->from, and
* update last_accounted_offset. * update last_accounted_offset.
*/ */
if (rg->from > last_accounted_offset) { if (rg->from > last_accounted_offset)
add += rg->from - last_accounted_offset; add += hugetlb_resv_map_add(resv, rg,
if (!regions_needed) { last_accounted_offset,
nrg = get_file_region_entry_from_cache( rg->from, h, h_cg,
resv, last_accounted_offset, rg->from); regions_needed);
record_hugetlb_cgroup_uncharge_info(h_cg, h,
resv, nrg);
list_add(&nrg->link, rg->link.prev);
coalesce_file_region(resv, nrg);
} else
*regions_needed += 1;
}
last_accounted_offset = rg->to; last_accounted_offset = rg->to;
} }
@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
/* Handle the case where our range extends beyond /* Handle the case where our range extends beyond
* last_accounted_offset. * last_accounted_offset.
*/ */
if (last_accounted_offset < t) { if (last_accounted_offset < t)
add += t - last_accounted_offset; add += hugetlb_resv_map_add(resv, rg, last_accounted_offset,
if (!regions_needed) { t, h, h_cg, regions_needed);
nrg = get_file_region_entry_from_cache(
resv, last_accounted_offset, t);
record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
list_add(&nrg->link, rg->link.prev);
coalesce_file_region(resv, nrg);
} else
*regions_needed += 1;
}
VM_BUG_ON(add < 0); VM_BUG_ON(add < 0);
return add; return add;
@ -3725,21 +3728,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
return false; return false;
} }
static void
hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
struct page *new_page)
{
__SetPageUptodate(new_page);
set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
hugepage_add_new_anon_rmap(new_page, vma, addr);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
ClearHPageRestoreReserve(new_page);
SetHPageMigratable(new_page);
}
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
pte_t *src_pte, *dst_pte, entry, dst_entry; pte_t *src_pte, *dst_pte, entry, dst_entry;
struct page *ptepage; struct page *ptepage;
unsigned long addr; unsigned long addr;
int cow; bool cow = is_cow_mapping(vma->vm_flags);
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
unsigned long npages = pages_per_huge_page(h);
struct address_space *mapping = vma->vm_file->f_mapping; struct address_space *mapping = vma->vm_file->f_mapping;
struct mmu_notifier_range range; struct mmu_notifier_range range;
int ret = 0; int ret = 0;
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
if (cow) { if (cow) {
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
vma->vm_start, vma->vm_start,
@ -3784,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte); entry = huge_ptep_get(src_pte);
dst_entry = huge_ptep_get(dst_pte); dst_entry = huge_ptep_get(dst_pte);
again:
if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
/* /*
* Skip if src entry none. Also, skip in the * Skip if src entry none. Also, skip in the
@ -3807,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
} }
set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz); set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
} else { } else {
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
get_page(ptepage);
/*
* This is a rare case where we see pinned hugetlb
* pages while they're prone to COW. We need to do the
* COW earlier during fork.
*
* When pre-allocating the page or copying data, we
* need to be without the pgtable locks since we could
* sleep during the process.
*/
if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
pte_t src_pte_old = entry;
struct page *new;
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
/* Do not use reserve as it's private owned */
new = alloc_huge_page(vma, addr, 1);
if (IS_ERR(new)) {
put_page(ptepage);
ret = PTR_ERR(new);
break;
}
copy_user_huge_page(new, ptepage, addr, vma,
npages);
put_page(ptepage);
/* Install the new huge page if src pte stable */
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
if (!pte_same(src_pte_old, entry)) {
put_page(new);
/* dst_entry won't change as in child */
goto again;
}
hugetlb_install_page(vma, dst_pte, addr, new);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
}
if (cow) { if (cow) {
/* /*
* No need to notify as we are downgrading page * No need to notify as we are downgrading page
@ -3817,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
*/ */
huge_ptep_set_wrprotect(src, addr, src_pte); huge_ptep_set_wrprotect(src, addr, src_pte);
} }
entry = huge_ptep_get(src_pte);
ptepage = pte_page(entry);
get_page(ptepage);
page_dup_rmap(ptepage, true); page_dup_rmap(ptepage, true);
set_huge_pte_at(dst, addr, dst_pte, entry); set_huge_pte_at(dst, addr, dst_pte, entry);
hugetlb_count_add(pages_per_huge_page(h), dst); hugetlb_count_add(npages, dst);
} }
spin_unlock(src_ptl); spin_unlock(src_ptl);
spin_unlock(dst_ptl); spin_unlock(dst_ptl);

View File

@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page)
*/ */
#define buddy_order_unsafe(page) READ_ONCE(page_private(page)) #define buddy_order_unsafe(page) READ_ONCE(page_private(page))
static inline bool is_cow_mapping(vm_flags_t flags)
{
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
}
/* /*
* These three helpers classifies VMAs for virtual memory accounting. * These three helpers classifies VMAs for virtual memory accounting.
*/ */

View File

@ -20,6 +20,11 @@
#include "kfence.h" #include "kfence.h"
/* May be overridden by <asm/kfence.h>. */
#ifndef ARCH_FUNC_PREFIX
#define ARCH_FUNC_PREFIX ""
#endif
extern bool no_hash_pointers; extern bool no_hash_pointers;
/* Helper function to either print to a seq_file or to console. */ /* Helper function to either print to a seq_file or to console. */
@ -67,8 +72,9 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
for (skipnr = 0; skipnr < num_entries; skipnr++) { for (skipnr = 0; skipnr < num_entries; skipnr++) {
int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]); int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") || if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
!strncmp(buf, "__slab_free", len)) { str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
!strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
/* /*
* In case of tail calls from any of the below * In case of tail calls from any of the below
* to any of the above. * to any of the above.
@ -77,10 +83,10 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
} }
/* Also the *_bulk() variants by only checking prefixes. */ /* Also the *_bulk() variants by only checking prefixes. */
if (str_has_prefix(buf, "kfree") || if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
str_has_prefix(buf, "kmem_cache_free") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
str_has_prefix(buf, "__kmalloc") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
str_has_prefix(buf, "kmem_cache_alloc")) str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
goto found; goto found;
} }
if (fallback < num_entries) if (fallback < num_entries)
@ -116,12 +122,12 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met
lockdep_assert_held(&meta->lock); lockdep_assert_held(&meta->lock);
if (meta->state == KFENCE_OBJECT_UNUSED) { if (meta->state == KFENCE_OBJECT_UNUSED) {
seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata); seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata);
return; return;
} }
seq_con_printf(seq, seq_con_printf(seq,
"kfence-#%zd [0x%p-0x%p" "kfence-#%td [0x%p-0x%p"
", size=%d, cache=%s] allocated by task %d:\n", ", size=%d, cache=%s] allocated by task %d:\n",
meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size, meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
(cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid); (cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
@ -204,7 +210,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write), pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
(void *)stack_entries[skipnr]); (void *)stack_entries[skipnr]);
pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n", pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n",
get_access_type(is_write), (void *)address, get_access_type(is_write), (void *)address,
left_of_object ? meta->addr - address : address - meta->addr, left_of_object ? meta->addr - address : address - meta->addr,
left_of_object ? "left" : "right", object_index); left_of_object ? "left" : "right", object_index);
@ -213,14 +219,14 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
case KFENCE_ERROR_UAF: case KFENCE_ERROR_UAF:
pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write), pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
(void *)stack_entries[skipnr]); (void *)stack_entries[skipnr]);
pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n", pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n",
get_access_type(is_write), (void *)address, object_index); get_access_type(is_write), (void *)address, object_index);
break; break;
case KFENCE_ERROR_CORRUPTION: case KFENCE_ERROR_CORRUPTION:
pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]); pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
pr_err("Corrupted memory at 0x%p ", (void *)address); pr_err("Corrupted memory at 0x%p ", (void *)address);
print_diff_canary(address, 16, meta); print_diff_canary(address, 16, meta);
pr_cont(" (in kfence-#%zd):\n", object_index); pr_cont(" (in kfence-#%td):\n", object_index);
break; break;
case KFENCE_ERROR_INVALID: case KFENCE_ERROR_INVALID:
pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write), pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
@ -230,7 +236,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
break; break;
case KFENCE_ERROR_INVALID_FREE: case KFENCE_ERROR_INVALID_FREE:
pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]); pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address, pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address,
object_index); object_index);
break; break;
} }

View File

@ -1198,12 +1198,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
goto release_task; goto release_task;
} }
mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
if (IS_ERR_OR_NULL(mm)) { if (IS_ERR_OR_NULL(mm)) {
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
goto release_task; goto release_task;
} }
/*
* Require CAP_SYS_NICE for influencing process performance. Note that
* only non-destructive hints are currently supported.
*/
if (!capable(CAP_SYS_NICE)) {
ret = -EPERM;
goto release_mm;
}
total_len = iov_iter_count(&iter); total_len = iov_iter_count(&iter);
while (iov_iter_count(&iter)) { while (iov_iter_count(&iter)) {
@ -1218,6 +1228,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
if (ret == 0) if (ret == 0)
ret = total_len - iov_iter_count(&iter); ret = total_len - iov_iter_count(&iter);
release_mm:
mmput(mm); mmput(mm);
release_task: release_task:
put_task_struct(task); put_task_struct(task);

View File

@ -3287,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
#endif /* CONFIG_MEMCG_KMEM */ #endif /* CONFIG_MEMCG_KMEM */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* /*
* Because page_memcg(head) is not set on compound tails, set it now. * Because page_memcg(head) is not set on tails, set it now.
*/ */
void mem_cgroup_split_huge_fixup(struct page *head) void split_page_memcg(struct page *head, unsigned int nr)
{ {
struct mem_cgroup *memcg = page_memcg(head); struct mem_cgroup *memcg = page_memcg(head);
int i; int i;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled() || !memcg)
return; return;
for (i = 1; i < HPAGE_PMD_NR; i++) { for (i = 1; i < nr; i++)
css_get(&memcg->css); head[i].memcg_data = head->memcg_data;
head[i].memcg_data = (unsigned long)memcg; css_get_many(&memcg->css, nr - 1);
}
} }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_MEMCG_SWAP #ifdef CONFIG_MEMCG_SWAP
/** /**

View File

@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
struct page **prealloc, pte_t pte, struct page *page) struct page **prealloc, pte_t pte, struct page *page)
{ {
struct mm_struct *src_mm = src_vma->vm_mm;
struct page *new_page; struct page *new_page;
if (!is_cow_mapping(src_vma->vm_flags))
return 1;
/* /*
* What we want to do is to check whether this page may * What we want to do is to check whether this page may
* have been pinned by the parent process. If so, * have been pinned by the parent process. If so,
@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
* the page count. That might give false positives for * the page count. That might give false positives for
* for pinning, but it will work correctly. * for pinning, but it will work correctly.
*/ */
if (likely(!atomic_read(&src_mm->has_pinned))) if (likely(!page_needs_cow_for_dma(src_vma, page)))
return 1;
if (likely(!page_maybe_dma_pinned(page)))
return 1; return 1;
new_page = *prealloc; new_page = *prealloc;
@ -3103,6 +3097,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
return handle_userfault(vmf, VM_UFFD_WP); return handle_userfault(vmf, VM_UFFD_WP);
} }
/*
* Userfaultfd write-protect can defer flushes. Ensure the TLB
* is flushed in this case before copying.
*/
if (unlikely(userfaultfd_wp(vmf->vma) &&
mm_tlb_flush_pending(vmf->vma->vm_mm)))
flush_tlb_page(vmf->vma, vmf->address);
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
if (!vmf->page) { if (!vmf->page) {
/* /*

View File

@ -1281,6 +1281,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
kernel_poison_pages(page, 1 << order); kernel_poison_pages(page, 1 << order);
/*
* With hardware tag-based KASAN, memory tags must be set before the
* page becomes unavailable via debug_pagealloc or arch_free_page.
*/
kasan_free_nondeferred_pages(page, order);
/* /*
* arch_free_page() can make the page's contents inaccessible. s390 * arch_free_page() can make the page's contents inaccessible. s390
* does this. So nothing which can access the page's contents should * does this. So nothing which can access the page's contents should
@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_prepare(struct page *page,
debug_pagealloc_unmap_pages(page, 1 << order); debug_pagealloc_unmap_pages(page, 1 << order);
kasan_free_nondeferred_pages(page, order);
return true; return true;
} }
@ -3310,6 +3314,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++) for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i); set_page_refcounted(page + i);
split_page_owner(page, 1 << order); split_page_owner(page, 1 << order);
split_page_memcg(page, 1 << order);
} }
EXPORT_SYMBOL_GPL(split_page); EXPORT_SYMBOL_GPL(split_page);
@ -6259,12 +6264,65 @@ static void __meminit zone_init_free_lists(struct zone *zone)
} }
} }
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
/*
* Only struct pages that correspond to ranges defined by memblock.memory
* are zeroed and initialized by going through __init_single_page() during
* memmap_init_zone().
*
* But, there could be struct pages that correspond to holes in
* memblock.memory. This can happen because of the following reasons:
* - physical memory bank size is not necessarily the exact multiple of the
* arbitrary section size
* - early reserved memory may not be listed in memblock.memory
* - memory layouts defined with memmap= kernel parameter may not align
* nicely with memmap sections
*
* Explicitly initialize those struct pages so that:
* - PG_Reserved is set
* - zone and node links point to zone and node that span the page if the
* hole is in the middle of a zone
* - zone and node links point to adjacent zone/node if the hole falls on
* the zone boundary; the pages in such holes will be prepended to the
* zone/node above the hole except for the trailing pages in the last
* section that will be appended to the zone/node below.
*/
static u64 __meminit init_unavailable_range(unsigned long spfn,
unsigned long epfn,
int zone, int node)
{
unsigned long pfn;
u64 pgcnt = 0;
for (pfn = spfn; pfn < epfn; pfn++) {
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+ pageblock_nr_pages - 1;
continue;
}
__init_single_page(pfn_to_page(pfn), pfn, zone, node);
__SetPageReserved(pfn_to_page(pfn));
pgcnt++;
}
return pgcnt;
}
#else
static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
int zone, int node)
{
return 0;
}
#endif
void __meminit __weak memmap_init_zone(struct zone *zone) void __meminit __weak memmap_init_zone(struct zone *zone)
{ {
unsigned long zone_start_pfn = zone->zone_start_pfn; unsigned long zone_start_pfn = zone->zone_start_pfn;
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone); int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
static unsigned long hole_pfn;
unsigned long start_pfn, end_pfn; unsigned long start_pfn, end_pfn;
u64 pgcnt = 0;
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn); start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
@ -6274,7 +6332,29 @@ void __meminit __weak memmap_init_zone(struct zone *zone)
memmap_init_range(end_pfn - start_pfn, nid, memmap_init_range(end_pfn - start_pfn, nid,
zone_id, start_pfn, zone_end_pfn, zone_id, start_pfn, zone_end_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
if (hole_pfn < start_pfn)
pgcnt += init_unavailable_range(hole_pfn, start_pfn,
zone_id, nid);
hole_pfn = end_pfn;
} }
#ifdef CONFIG_SPARSEMEM
/*
* Initialize the hole in the range [zone_end_pfn, section_end].
* If zone boundary falls in the middle of a section, this hole
* will be re-initialized during the call to this function for the
* higher zone.
*/
end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION);
if (hole_pfn < end_pfn)
pgcnt += init_unavailable_range(hole_pfn, end_pfn,
zone_id, nid);
#endif
if (pgcnt)
pr_info(" %s zone: %llu pages in unavailable ranges\n",
zone->name, pgcnt);
} }
static int zone_batchsize(struct zone *zone) static int zone_batchsize(struct zone *zone)
@ -7071,88 +7151,6 @@ void __init free_area_init_memoryless_node(int nid)
free_area_init_node(nid); free_area_init_node(nid);
} }
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
/*
* Initialize all valid struct pages in the range [spfn, epfn) and mark them
* PageReserved(). Return the number of struct pages that were initialized.
*/
static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
{
unsigned long pfn;
u64 pgcnt = 0;
for (pfn = spfn; pfn < epfn; pfn++) {
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+ pageblock_nr_pages - 1;
continue;
}
/*
* Use a fake node/zone (0) for now. Some of these pages
* (in memblock.reserved but not in memblock.memory) will
* get re-initialized via reserve_bootmem_region() later.
*/
__init_single_page(pfn_to_page(pfn), pfn, 0, 0);
__SetPageReserved(pfn_to_page(pfn));
pgcnt++;
}
return pgcnt;
}
/*
* Only struct pages that are backed by physical memory are zeroed and
* initialized by going through __init_single_page(). But, there are some
* struct pages which are reserved in memblock allocator and their fields
* may be accessed (for example page_to_pfn() on some configuration accesses
* flags). We must explicitly initialize those struct pages.
*
* This function also addresses a similar issue where struct pages are left
* uninitialized because the physical address range is not covered by
* memblock.memory or memblock.reserved. That could happen when memblock
* layout is manually configured via memmap=, or when the highest physical
* address (max_pfn) does not end on a section boundary.
*/
static void __init init_unavailable_mem(void)
{
phys_addr_t start, end;
u64 i, pgcnt;
phys_addr_t next = 0;
/*
* Loop through unavailable ranges not covered by memblock.memory.
*/
pgcnt = 0;
for_each_mem_range(i, &start, &end) {
if (next < start)
pgcnt += init_unavailable_range(PFN_DOWN(next),
PFN_UP(start));
next = end;
}
/*
* Early sections always have a fully populated memmap for the whole
* section - see pfn_valid(). If the last section has holes at the
* end and that section is marked "online", the memmap will be
* considered initialized. Make sure that memmap has a well defined
* state.
*/
pgcnt += init_unavailable_range(PFN_DOWN(next),
round_up(max_pfn, PAGES_PER_SECTION));
/*
* Struct pages that do not have backing memory. This could be because
* firmware is using some of this memory, or for some other reasons.
*/
if (pgcnt)
pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
}
#else
static inline void __init init_unavailable_mem(void)
{
}
#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
#if MAX_NUMNODES > 1 #if MAX_NUMNODES > 1
/* /*
* Figure out the number of possible node ids. * Figure out the number of possible node ids.
@ -7576,7 +7574,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
/* Initialise every node */ /* Initialise every node */
mminit_verify_pageflags_layout(); mminit_verify_pageflags_layout();
setup_nr_node_ids(); setup_nr_node_ids();
init_unavailable_mem();
for_each_online_node(nid) { for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid); pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid); free_area_init_node(nid);

View File

@ -2992,7 +2992,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
gfp_t flags, void *objp, unsigned long caller) gfp_t flags, void *objp, unsigned long caller)
{ {
WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
if (!objp) if (!objp || is_kfence_address(objp))
return objp; return objp;
if (cachep->flags & SLAB_POISON) { if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp); check_poison_obj(cachep, objp);