mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-06 05:13:18 +00:00
Merge branch 'akpm' (patches from Andrew)
Merge yet more updates from Andrew Morton: "This is the material which was staged after willystuff in linux-next. Subsystems affected by this patch series: mm (debug, selftests, pagecache, thp, rmap, migration, kasan, hugetlb, pagemap, madvise), and selftests" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (113 commits) selftests: kselftest framework: provide "finished" helper mm: madvise: MADV_DONTNEED_LOCKED mm: fix race between MADV_FREE reclaim and blkdev direct IO read mm: generalize ARCH_HAS_FILTER_PGPROT mm: unmap_mapping_range_tree() with i_mmap_rwsem shared mm: warn on deleting redirtied only if accounted mm/huge_memory: remove stale locking logic from __split_huge_pmd() mm/huge_memory: remove stale page_trans_huge_mapcount() mm/swapfile: remove stale reuse_swap_page() mm/khugepaged: remove reuse_swap_page() usage mm/huge_memory: streamline COW logic in do_huge_pmd_wp_page() mm: streamline COW logic in do_swap_page() mm: slightly clarify KSM logic in do_swap_page() mm: optimize do_wp_page() for fresh pages in local LRU pagevecs mm: optimize do_wp_page() for exclusive pages in the swapcache mm/huge_memory: make is_transparent_hugepage() static userfaultfd/selftests: enable hugetlb remap and remove event testing selftests/vm: add hugetlb madvise MADV_DONTNEED MADV_REMOVE test mm: enable MADV_DONTNEED for hugetlb mappings kasan: disable LOCKDEP when printing reports ...
This commit is contained in:
commit
29c8c18363
@ -30,7 +30,7 @@ Software tag-based KASAN mode is only supported in Clang.
|
||||
|
||||
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
||||
still requires a compiler version that supports memory tagging instructions.
|
||||
This mode is supported in GCC 10+ and Clang 11+.
|
||||
This mode is supported in GCC 10+ and Clang 12+.
|
||||
|
||||
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
||||
while the hardware tag-based KASAN currently only supports SLUB.
|
||||
@ -206,6 +206,9 @@ additional boot parameters that allow disabling KASAN or controlling features:
|
||||
Asymmetric mode: a bad access is detected synchronously on reads and
|
||||
asynchronously on writes.
|
||||
|
||||
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||
allocations (default: ``on``).
|
||||
|
||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||
traces collection (default: ``on``).
|
||||
|
||||
@ -279,8 +282,8 @@ Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Software tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
Software tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and vmalloc memory.
|
||||
|
||||
Hardware tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -303,8 +306,8 @@ Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
Hardware tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and VM_ALLOC-based vmalloc memory.
|
||||
|
||||
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
||||
@ -319,6 +322,8 @@ checking gets disabled.
|
||||
Shadow memory
|
||||
-------------
|
||||
|
||||
The contents of this section are only applicable to software KASAN modes.
|
||||
|
||||
The kernel maps memory in several different parts of the address space.
|
||||
The range of kernel virtual addresses is large: there is not enough real
|
||||
memory to support a real shadow region for every address that could be
|
||||
@ -349,7 +354,7 @@ CONFIG_KASAN_VMALLOC
|
||||
|
||||
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
|
||||
cost of greater memory usage. Currently, this is supported on x86,
|
||||
riscv, s390, and powerpc.
|
||||
arm64, riscv, s390, and powerpc.
|
||||
|
||||
This works by hooking into vmalloc and vmap and dynamically
|
||||
allocating real shadow memory to back the mappings.
|
||||
|
@ -78,7 +78,7 @@ Usage
|
||||
|
||||
2) Enable page owner: add "page_owner=on" to boot cmdline.
|
||||
|
||||
3) Do the job what you want to debug
|
||||
3) Do the job that you want to debug.
|
||||
|
||||
4) Analyze information from page owner::
|
||||
|
||||
@ -89,22 +89,75 @@ Usage
|
||||
|
||||
Page allocated via order XXX, ...
|
||||
PFN XXX ...
|
||||
// Detailed stack
|
||||
// Detailed stack
|
||||
|
||||
Page allocated via order XXX, ...
|
||||
PFN XXX ...
|
||||
// Detailed stack
|
||||
// Detailed stack
|
||||
|
||||
The ``page_owner_sort`` tool ignores ``PFN`` rows, puts the remaining rows
|
||||
in buf, uses regexp to extract the page order value, counts the times
|
||||
and pages of buf, and finally sorts them according to the times.
|
||||
and pages of buf, and finally sorts them according to the parameter(s).
|
||||
|
||||
See the result about who allocated each page
|
||||
in the ``sorted_page_owner.txt``. General output::
|
||||
|
||||
XXX times, XXX pages:
|
||||
Page allocated via order XXX, ...
|
||||
// Detailed stack
|
||||
// Detailed stack
|
||||
|
||||
By default, ``page_owner_sort`` is sorted according to the times of buf.
|
||||
If you want to sort by the pages nums of buf, use the ``-m`` parameter.
|
||||
If you want to sort by the page nums of buf, use the ``-m`` parameter.
|
||||
The detailed parameters are:
|
||||
|
||||
fundamental function:
|
||||
|
||||
Sort:
|
||||
-a Sort by memory allocation time.
|
||||
-m Sort by total memory.
|
||||
-p Sort by pid.
|
||||
-P Sort by tgid.
|
||||
-n Sort by task command name.
|
||||
-r Sort by memory release time.
|
||||
-s Sort by stack trace.
|
||||
-t Sort by times (default).
|
||||
|
||||
additional function:
|
||||
|
||||
Cull:
|
||||
-c Cull by comparing stacktrace instead of total block.
|
||||
--cull <rules>
|
||||
Specify culling rules.Culling syntax is key[,key[,...]].Choose a
|
||||
multi-letter key from the **STANDARD FORMAT SPECIFIERS** section.
|
||||
|
||||
|
||||
<rules> is a single argument in the form of a comma-separated list,
|
||||
which offers a way to specify individual culling rules. The recognized
|
||||
keywords are described in the **STANDARD FORMAT SPECIFIERS** section below.
|
||||
<rules> can be specified by the sequence of keys k1,k2, ..., as described in
|
||||
the STANDARD SORT KEYS section below. Mixed use of abbreviated and
|
||||
complete-form of keys is allowed.
|
||||
|
||||
|
||||
Examples:
|
||||
./page_owner_sort <input> <output> --cull=stacktrace
|
||||
./page_owner_sort <input> <output> --cull=st,pid,name
|
||||
./page_owner_sort <input> <output> --cull=n,f
|
||||
|
||||
Filter:
|
||||
-f Filter out the information of blocks whose memory has been released.
|
||||
|
||||
Select:
|
||||
--pid <PID> Select by pid.
|
||||
--tgid <TGID> Select by tgid.
|
||||
--name <command> Select by task command name.
|
||||
|
||||
STANDARD FORMAT SPECIFIERS
|
||||
==========================
|
||||
|
||||
KEY LONG DESCRIPTION
|
||||
p pid process ID
|
||||
tg tgid thread group ID
|
||||
n name task command name
|
||||
f free whether the page has been released or not
|
||||
st stacktrace stace trace of the page allocation
|
||||
|
@ -74,6 +74,8 @@
|
||||
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
|
||||
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
|
||||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
@ -208,7 +208,7 @@ config ARM64
|
||||
select IOMMU_DMA if IOMMU_SUPPORT
|
||||
select IRQ_DOMAIN
|
||||
select IRQ_FORCED_THREADING
|
||||
select KASAN_VMALLOC if KASAN_GENERIC
|
||||
select KASAN_VMALLOC if KASAN
|
||||
select MODULES_USE_ELF_RELA
|
||||
select NEED_DMA_MAP_STATE
|
||||
select NEED_SG_DMA_LENGTH
|
||||
|
@ -25,4 +25,10 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
|
||||
|
||||
#endif
|
||||
|
||||
#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
|
||||
static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
|
||||
{
|
||||
return pgprot_tagged(prot);
|
||||
}
|
||||
|
||||
#endif /* _ASM_ARM64_VMALLOC_H */
|
||||
|
@ -17,10 +17,13 @@
|
||||
*/
|
||||
static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
|
||||
{
|
||||
void *p;
|
||||
|
||||
BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
|
||||
|
||||
return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
|
||||
p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
|
||||
__builtin_return_address(0));
|
||||
return kasan_reset_tag(p);
|
||||
}
|
||||
|
||||
#endif /* __ASM_VMAP_STACK_H */
|
||||
|
@ -58,12 +58,13 @@ void *module_alloc(unsigned long size)
|
||||
PAGE_KERNEL, 0, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
|
||||
if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
|
||||
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
|
||||
vfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return p;
|
||||
/* Memory is intended to be executable, reset the pointer tag. */
|
||||
return kasan_reset_tag(p);
|
||||
}
|
||||
|
||||
enum aarch64_reloc_op {
|
||||
|
@ -85,7 +85,7 @@ static int change_memory_common(unsigned long addr, int numpages,
|
||||
*/
|
||||
area = find_vm_area((void *)addr);
|
||||
if (!area ||
|
||||
end > (unsigned long)area->addr + area->size ||
|
||||
end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
|
||||
!(area->flags & VM_ALLOC))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -1304,7 +1304,8 @@ u64 bpf_jit_alloc_exec_limit(void)
|
||||
|
||||
void *bpf_jit_alloc_exec(unsigned long size)
|
||||
{
|
||||
return vmalloc(size);
|
||||
/* Memory is intended to be executable, reset the pointer tag. */
|
||||
return kasan_reset_tag(vmalloc(size));
|
||||
}
|
||||
|
||||
void bpf_jit_free_exec(void *addr)
|
||||
|
@ -101,6 +101,8 @@
|
||||
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
|
||||
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
|
||||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
@ -55,6 +55,8 @@
|
||||
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
|
||||
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
|
||||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
|
||||
#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
|
||||
|
||||
|
@ -3,6 +3,5 @@
|
||||
* This file is for defining trace points and trace related helpers.
|
||||
*/
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/thp.h>
|
||||
#endif
|
||||
|
@ -45,7 +45,7 @@ void *module_alloc(unsigned long size)
|
||||
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
|
||||
gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
|
||||
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
|
||||
vfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -337,9 +337,6 @@ config GENERIC_CALIBRATE_DELAY
|
||||
config ARCH_HAS_CPU_RELAX
|
||||
def_bool y
|
||||
|
||||
config ARCH_HAS_FILTER_PGPROT
|
||||
def_bool y
|
||||
|
||||
config ARCH_HIBERNATION_POSSIBLE
|
||||
def_bool y
|
||||
|
||||
|
@ -78,7 +78,7 @@ void *module_alloc(unsigned long size)
|
||||
MODULES_END, gfp_mask,
|
||||
PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
|
||||
__builtin_return_address(0));
|
||||
if (p && (kasan_module_alloc(p, size, gfp_mask) < 0)) {
|
||||
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
|
||||
vfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -31,7 +31,6 @@
|
||||
* We need to define the tracepoints somewhere, and tlb.c
|
||||
* is only compiled when SMP=y.
|
||||
*/
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/tlb.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
@ -109,6 +109,8 @@
|
||||
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
|
||||
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
|
||||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
@ -54,9 +54,17 @@ struct vm_area_struct;
|
||||
#define ___GFP_THISNODE 0x200000u
|
||||
#define ___GFP_ACCOUNT 0x400000u
|
||||
#define ___GFP_ZEROTAGS 0x800000u
|
||||
#define ___GFP_SKIP_KASAN_POISON 0x1000000u
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
#define ___GFP_SKIP_ZERO 0x1000000u
|
||||
#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u
|
||||
#define ___GFP_SKIP_KASAN_POISON 0x4000000u
|
||||
#else
|
||||
#define ___GFP_SKIP_ZERO 0
|
||||
#define ___GFP_SKIP_KASAN_UNPOISON 0
|
||||
#define ___GFP_SKIP_KASAN_POISON 0
|
||||
#endif
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
#define ___GFP_NOLOCKDEP 0x2000000u
|
||||
#define ___GFP_NOLOCKDEP 0x8000000u
|
||||
#else
|
||||
#define ___GFP_NOLOCKDEP 0
|
||||
#endif
|
||||
@ -232,24 +240,33 @@ struct vm_area_struct;
|
||||
*
|
||||
* %__GFP_ZERO returns a zeroed page on success.
|
||||
*
|
||||
* %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if
|
||||
* __GFP_ZERO is set.
|
||||
* %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself
|
||||
* is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that
|
||||
* __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting
|
||||
* memory tags at the same time as zeroing memory has minimal additional
|
||||
* performace impact.
|
||||
*
|
||||
* %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned
|
||||
* on deallocation. Typically used for userspace pages. Currently only has an
|
||||
* effect in HW tags mode.
|
||||
* %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation.
|
||||
* Only effective in HW_TAGS mode.
|
||||
*
|
||||
* %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation.
|
||||
* Typically, used for userspace pages. Only effective in HW_TAGS mode.
|
||||
*/
|
||||
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
|
||||
#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
|
||||
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
|
||||
#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
|
||||
#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON)
|
||||
#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
|
||||
#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON)
|
||||
#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON)
|
||||
|
||||
/* Disable lockdep for GFP context tracking */
|
||||
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
|
||||
|
||||
/* Room for N __GFP_FOO bits */
|
||||
#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
|
||||
#define __GFP_BITS_SHIFT (24 + \
|
||||
3 * IS_ENABLED(CONFIG_KASAN_HW_TAGS) + \
|
||||
IS_ENABLED(CONFIG_LOCKDEP))
|
||||
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
|
||||
|
||||
/**
|
||||
|
@ -183,7 +183,6 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
|
||||
void prep_transhuge_page(struct page *page);
|
||||
void free_transhuge_page(struct page *page);
|
||||
bool is_transparent_hugepage(struct page *page);
|
||||
|
||||
bool can_split_folio(struct folio *folio, int *pextra_pins);
|
||||
int split_huge_page_to_list(struct page *page, struct list_head *list);
|
||||
@ -341,11 +340,6 @@ static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
|
||||
|
||||
static inline void prep_transhuge_page(struct page *page) {}
|
||||
|
||||
static inline bool is_transparent_hugepage(struct page *page)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#define transparent_hugepage_flags 0UL
|
||||
|
||||
#define thp_get_unmapped_area NULL
|
||||
|
@ -19,13 +19,15 @@ struct task_struct;
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/kasan.h>
|
||||
|
||||
/* kasan_data struct is used in KUnit tests for KASAN expected failures */
|
||||
struct kunit_kasan_expectation {
|
||||
bool report_found;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
typedef unsigned int __bitwise kasan_vmalloc_flags_t;
|
||||
|
||||
#define KASAN_VMALLOC_NONE 0x00u
|
||||
#define KASAN_VMALLOC_INIT 0x01u
|
||||
#define KASAN_VMALLOC_VM_ALLOC 0x02u
|
||||
#define KASAN_VMALLOC_PROT_NORMAL 0x04u
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
|
||||
#include <linux/pgtable.h>
|
||||
@ -84,25 +86,8 @@ static inline void kasan_disable_current(void) {}
|
||||
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
|
||||
void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags);
|
||||
void kasan_free_pages(struct page *page, unsigned int order);
|
||||
|
||||
#else /* CONFIG_KASAN_HW_TAGS */
|
||||
|
||||
static __always_inline void kasan_alloc_pages(struct page *page,
|
||||
unsigned int order, gfp_t flags)
|
||||
{
|
||||
/* Only available for integrated init. */
|
||||
BUILD_BUG();
|
||||
}
|
||||
|
||||
static __always_inline void kasan_free_pages(struct page *page,
|
||||
unsigned int order)
|
||||
{
|
||||
/* Only available for integrated init. */
|
||||
BUILD_BUG();
|
||||
}
|
||||
|
||||
#endif /* CONFIG_KASAN_HW_TAGS */
|
||||
|
||||
static inline bool kasan_has_integrated_init(void)
|
||||
@ -282,10 +267,6 @@ static __always_inline bool kasan_check_byte(const void *addr)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool kasan_save_enable_multi_shot(void);
|
||||
void kasan_restore_multi_shot(bool enabled);
|
||||
|
||||
#else /* CONFIG_KASAN */
|
||||
|
||||
static inline slab_flags_t kasan_never_merge(void)
|
||||
@ -414,34 +395,71 @@ static inline void kasan_init_hw_tags(void) { }
|
||||
|
||||
#ifdef CONFIG_KASAN_VMALLOC
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
|
||||
void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
|
||||
int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
|
||||
void kasan_poison_vmalloc(const void *start, unsigned long size);
|
||||
void kasan_unpoison_vmalloc(const void *start, unsigned long size);
|
||||
void kasan_release_vmalloc(unsigned long start, unsigned long end,
|
||||
unsigned long free_region_start,
|
||||
unsigned long free_region_end);
|
||||
|
||||
void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
|
||||
|
||||
#else /* CONFIG_KASAN_VMALLOC */
|
||||
#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
|
||||
|
||||
static inline void kasan_populate_early_vm_area_shadow(void *start,
|
||||
unsigned long size)
|
||||
{ }
|
||||
static inline int kasan_populate_vmalloc(unsigned long start,
|
||||
unsigned long size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
|
||||
{ }
|
||||
static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size)
|
||||
{ }
|
||||
static inline void kasan_release_vmalloc(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long free_region_start,
|
||||
unsigned long free_region_end) {}
|
||||
unsigned long free_region_end) { }
|
||||
|
||||
#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
|
||||
|
||||
void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
|
||||
kasan_vmalloc_flags_t flags);
|
||||
static __always_inline void *kasan_unpoison_vmalloc(const void *start,
|
||||
unsigned long size,
|
||||
kasan_vmalloc_flags_t flags)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
return __kasan_unpoison_vmalloc(start, size, flags);
|
||||
return (void *)start;
|
||||
}
|
||||
|
||||
void __kasan_poison_vmalloc(const void *start, unsigned long size);
|
||||
static __always_inline void kasan_poison_vmalloc(const void *start,
|
||||
unsigned long size)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
__kasan_poison_vmalloc(start, size);
|
||||
}
|
||||
|
||||
#else /* CONFIG_KASAN_VMALLOC */
|
||||
|
||||
static inline void kasan_populate_early_vm_area_shadow(void *start,
|
||||
unsigned long size)
|
||||
unsigned long size) { }
|
||||
static inline int kasan_populate_vmalloc(unsigned long start,
|
||||
unsigned long size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void kasan_release_vmalloc(unsigned long start,
|
||||
unsigned long end,
|
||||
unsigned long free_region_start,
|
||||
unsigned long free_region_end) { }
|
||||
|
||||
static inline void *kasan_unpoison_vmalloc(const void *start,
|
||||
unsigned long size,
|
||||
kasan_vmalloc_flags_t flags)
|
||||
{
|
||||
return (void *)start;
|
||||
}
|
||||
static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
|
||||
{ }
|
||||
|
||||
#endif /* CONFIG_KASAN_VMALLOC */
|
||||
@ -450,17 +468,17 @@ static inline void kasan_populate_early_vm_area_shadow(void *start,
|
||||
!defined(CONFIG_KASAN_VMALLOC)
|
||||
|
||||
/*
|
||||
* These functions provide a special case to support backing module
|
||||
* allocations with real shadow memory. With KASAN vmalloc, the special
|
||||
* case is unnecessary, as the work is handled in the generic case.
|
||||
* These functions allocate and free shadow memory for kernel modules.
|
||||
* They are only required when KASAN_VMALLOC is not supported, as otherwise
|
||||
* shadow memory is allocated by the generic vmalloc handlers.
|
||||
*/
|
||||
int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask);
|
||||
void kasan_free_shadow(const struct vm_struct *vm);
|
||||
int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask);
|
||||
void kasan_free_module_shadow(const struct vm_struct *vm);
|
||||
|
||||
#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
|
||||
|
||||
static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
|
||||
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
|
||||
static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; }
|
||||
static inline void kasan_free_module_shadow(const struct vm_struct *vm) {}
|
||||
|
||||
#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
|
||||
|
||||
|
@ -834,16 +834,11 @@ static inline int total_mapcount(struct page *page)
|
||||
return folio_mapcount(page_folio(page));
|
||||
}
|
||||
|
||||
int page_trans_huge_mapcount(struct page *page);
|
||||
#else
|
||||
static inline int total_mapcount(struct page *page)
|
||||
{
|
||||
return page_mapcount(page);
|
||||
}
|
||||
static inline int page_trans_huge_mapcount(struct page *page)
|
||||
{
|
||||
return page_mapcount(page);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline struct page *virt_to_head_page(const void *x)
|
||||
|
@ -481,7 +481,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
|
||||
TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
|
||||
|
||||
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
|
||||
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
|
||||
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
|
||||
PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
|
||||
PAGEFLAG(Referenced, referenced, PF_HEAD)
|
||||
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
|
||||
|
@ -1009,8 +1009,7 @@ static inline void __set_page_dirty(struct page *page,
|
||||
{
|
||||
__folio_mark_dirty(page_folio(page), mapping, warn);
|
||||
}
|
||||
void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
|
||||
struct bdi_writeback *wb);
|
||||
void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
|
||||
void __folio_cancel_dirty(struct folio *folio);
|
||||
static inline void folio_cancel_dirty(struct folio *folio)
|
||||
{
|
||||
|
@ -515,7 +515,6 @@ extern int __swp_swapcount(swp_entry_t entry);
|
||||
extern int swp_swapcount(swp_entry_t entry);
|
||||
extern struct swap_info_struct *page_swap_info(struct page *);
|
||||
extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
|
||||
extern bool reuse_swap_page(struct page *);
|
||||
extern int try_to_free_swap(struct page *);
|
||||
struct backing_dev_info;
|
||||
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
|
||||
@ -681,9 +680,6 @@ static inline int swp_swapcount(swp_entry_t entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define reuse_swap_page(page) \
|
||||
(page_trans_huge_mapcount(page) == 1)
|
||||
|
||||
static inline int try_to_free_swap(struct page *page)
|
||||
{
|
||||
return 0;
|
||||
|
@ -35,17 +35,6 @@ struct notifier_block; /* in notifier.h */
|
||||
#define VM_DEFER_KMEMLEAK 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC.
|
||||
*
|
||||
* If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after
|
||||
* shadow memory has been mapped. It's used to handle allocation errors so that
|
||||
* we don't try to poison shadow on free if it was never allocated.
|
||||
*
|
||||
* Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to
|
||||
* determine which allocations need the module shadow freed.
|
||||
*/
|
||||
|
||||
/* bits [20..32] reserved for arch specific ioremap internals */
|
||||
|
||||
/*
|
||||
@ -126,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef arch_vmap_pgprot_tagged
|
||||
static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
|
||||
{
|
||||
return prot;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Highlevel APIs for driver use
|
||||
*/
|
||||
|
@ -29,7 +29,6 @@
|
||||
EM( SCAN_VMA_NULL, "vma_null") \
|
||||
EM( SCAN_VMA_CHECK, "vma_check_failed") \
|
||||
EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \
|
||||
EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \
|
||||
EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
|
||||
EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
|
||||
EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \
|
||||
|
@ -105,6 +105,37 @@ TRACE_EVENT(mm_migrate_pages_start,
|
||||
__print_symbolic(__entry->reason, MIGRATE_REASON))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(migration_pte,
|
||||
|
||||
TP_PROTO(unsigned long addr, unsigned long pte, int order),
|
||||
|
||||
TP_ARGS(addr, pte, order),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, addr)
|
||||
__field(unsigned long, pte)
|
||||
__field(int, order)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->addr = addr;
|
||||
__entry->pte = pte;
|
||||
__entry->order = order;
|
||||
),
|
||||
|
||||
TP_printk("addr=%lx, pte=%lx order=%d", __entry->addr, __entry->pte, __entry->order)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(migration_pte, set_migration_pte,
|
||||
TP_PROTO(unsigned long addr, unsigned long pte, int order),
|
||||
TP_ARGS(addr, pte, order)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(migration_pte, remove_migration_pte,
|
||||
TP_PROTO(unsigned long addr, unsigned long pte, int order),
|
||||
TP_ARGS(addr, pte, order)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_MIGRATE_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -49,12 +49,20 @@
|
||||
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
|
||||
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
|
||||
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
|
||||
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
|
||||
{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
|
||||
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"} \
|
||||
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
#define __def_gfpflag_names_kasan , \
|
||||
{(unsigned long)__GFP_SKIP_ZERO, "__GFP_SKIP_ZERO"}, \
|
||||
{(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"}, \
|
||||
{(unsigned long)__GFP_SKIP_KASAN_UNPOISON, "__GFP_SKIP_KASAN_UNPOISON"}
|
||||
#else
|
||||
#define __def_gfpflag_names_kasan
|
||||
#endif
|
||||
|
||||
#define show_gfp_flags(flags) \
|
||||
(flags) ? __print_flags(flags, "|", \
|
||||
__def_gfpflag_names \
|
||||
__def_gfpflag_names __def_gfpflag_names_kasan \
|
||||
) : "none"
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
@ -48,6 +48,33 @@ TRACE_EVENT(hugepage_update,
|
||||
TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(migration_pmd,
|
||||
|
||||
TP_PROTO(unsigned long addr, unsigned long pmd),
|
||||
|
||||
TP_ARGS(addr, pmd),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, addr)
|
||||
__field(unsigned long, pmd)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->addr = addr;
|
||||
__entry->pmd = pmd;
|
||||
),
|
||||
TP_printk("addr=%lx, pmd=%lx", __entry->addr, __entry->pmd)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(migration_pmd, set_migration_pmd,
|
||||
TP_PROTO(unsigned long addr, unsigned long pmd),
|
||||
TP_ARGS(addr, pmd)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(migration_pmd, remove_migration_pmd,
|
||||
TP_PROTO(unsigned long addr, unsigned long pmd),
|
||||
TP_ARGS(addr, pmd)
|
||||
);
|
||||
#endif /* _TRACE_THP_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -75,6 +75,8 @@
|
||||
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
|
||||
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
|
||||
|
||||
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
|
@ -286,11 +286,13 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
||||
if (!s)
|
||||
continue;
|
||||
|
||||
/* Mark stack accessible for KASAN. */
|
||||
/* Reset stack metadata. */
|
||||
kasan_unpoison_range(s->addr, THREAD_SIZE);
|
||||
|
||||
stack = kasan_reset_tag(s->addr);
|
||||
|
||||
/* Clear stale pointers from reused stack. */
|
||||
memset(s->addr, 0, THREAD_SIZE);
|
||||
memset(stack, 0, THREAD_SIZE);
|
||||
|
||||
if (memcg_charge_kernel_stack(s)) {
|
||||
vfree(s->addr);
|
||||
@ -298,7 +300,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
||||
}
|
||||
|
||||
tsk->stack_vm_area = s;
|
||||
tsk->stack = s->addr;
|
||||
tsk->stack = stack;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -326,6 +328,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
|
||||
* so cache the vm_struct.
|
||||
*/
|
||||
tsk->stack_vm_area = vm;
|
||||
stack = kasan_reset_tag(stack);
|
||||
tsk->stack = stack;
|
||||
return 0;
|
||||
}
|
||||
|
12
kernel/scs.c
12
kernel/scs.c
@ -32,15 +32,19 @@ static void *__scs_alloc(int node)
|
||||
for (i = 0; i < NR_CACHED_SCS; i++) {
|
||||
s = this_cpu_xchg(scs_cache[i], NULL);
|
||||
if (s) {
|
||||
kasan_unpoison_vmalloc(s, SCS_SIZE);
|
||||
s = kasan_unpoison_vmalloc(s, SCS_SIZE,
|
||||
KASAN_VMALLOC_PROT_NORMAL);
|
||||
memset(s, 0, SCS_SIZE);
|
||||
return s;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
return __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
|
||||
s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
|
||||
GFP_SCS, PAGE_KERNEL, 0, node,
|
||||
__builtin_return_address(0));
|
||||
|
||||
out:
|
||||
return kasan_reset_tag(s);
|
||||
}
|
||||
|
||||
void *scs_alloc(int node)
|
||||
@ -78,7 +82,7 @@ void scs_free(void *s)
|
||||
if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
|
||||
return;
|
||||
|
||||
kasan_unpoison_vmalloc(s, SCS_SIZE);
|
||||
kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL);
|
||||
vfree_atomic(s);
|
||||
}
|
||||
|
||||
|
@ -178,17 +178,17 @@ config KASAN_TAGS_IDENTIFY
|
||||
memory consumption.
|
||||
|
||||
config KASAN_VMALLOC
|
||||
bool "Back mappings in vmalloc space with real shadow memory"
|
||||
depends on KASAN_GENERIC && HAVE_ARCH_KASAN_VMALLOC
|
||||
bool "Check accesses to vmalloc allocations"
|
||||
depends on HAVE_ARCH_KASAN_VMALLOC
|
||||
help
|
||||
By default, the shadow region for vmalloc space is the read-only
|
||||
zero page. This means that KASAN cannot detect errors involving
|
||||
vmalloc space.
|
||||
This mode makes KASAN check accesses to vmalloc allocations for
|
||||
validity.
|
||||
|
||||
Enabling this option will hook in to vmap/vmalloc and back those
|
||||
mappings with real shadow memory allocated on demand. This allows
|
||||
for KASAN to detect more sorts of errors (and to support vmapped
|
||||
stacks), but at the cost of higher memory usage.
|
||||
With software KASAN modes, checking is done for all types of vmalloc
|
||||
allocations. Enabling this option leads to higher memory usage.
|
||||
|
||||
With hardware tag-based KASAN, only VM_ALLOC mappings are checked.
|
||||
There is no additional memory usage.
|
||||
|
||||
config KASAN_KUNIT_TEST
|
||||
tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS
|
||||
|
241
lib/test_kasan.c
241
lib/test_kasan.c
@ -19,6 +19,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
@ -36,7 +37,7 @@ void *kasan_ptr_result;
|
||||
int kasan_int_result;
|
||||
|
||||
static struct kunit_resource resource;
|
||||
static struct kunit_kasan_expectation fail_data;
|
||||
static struct kunit_kasan_status test_status;
|
||||
static bool multishot;
|
||||
|
||||
/*
|
||||
@ -53,58 +54,63 @@ static int kasan_test_init(struct kunit *test)
|
||||
}
|
||||
|
||||
multishot = kasan_save_enable_multi_shot();
|
||||
fail_data.report_found = false;
|
||||
test_status.report_found = false;
|
||||
test_status.sync_fault = false;
|
||||
kunit_add_named_resource(test, NULL, NULL, &resource,
|
||||
"kasan_data", &fail_data);
|
||||
"kasan_status", &test_status);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kasan_test_exit(struct kunit *test)
|
||||
{
|
||||
kasan_restore_multi_shot(multishot);
|
||||
KUNIT_EXPECT_FALSE(test, fail_data.report_found);
|
||||
KUNIT_EXPECT_FALSE(test, test_status.report_found);
|
||||
}
|
||||
|
||||
/**
|
||||
* KUNIT_EXPECT_KASAN_FAIL() - check that the executed expression produces a
|
||||
* KASAN report; causes a test failure otherwise. This relies on a KUnit
|
||||
* resource named "kasan_data". Do not use this name for KUnit resources
|
||||
* resource named "kasan_status". Do not use this name for KUnit resources
|
||||
* outside of KASAN tests.
|
||||
*
|
||||
* For hardware tag-based KASAN in sync mode, when a tag fault happens, tag
|
||||
* For hardware tag-based KASAN, when a synchronous tag fault happens, tag
|
||||
* checking is auto-disabled. When this happens, this test handler reenables
|
||||
* tag checking. As tag checking can be only disabled or enabled per CPU,
|
||||
* this handler disables migration (preemption).
|
||||
*
|
||||
* Since the compiler doesn't see that the expression can change the fail_data
|
||||
* Since the compiler doesn't see that the expression can change the test_status
|
||||
* fields, it can reorder or optimize away the accesses to those fields.
|
||||
* Use READ/WRITE_ONCE() for the accesses and compiler barriers around the
|
||||
* expression to prevent that.
|
||||
*
|
||||
* In between KUNIT_EXPECT_KASAN_FAIL checks, fail_data.report_found is kept as
|
||||
* false. This allows detecting KASAN reports that happen outside of the checks
|
||||
* by asserting !fail_data.report_found at the start of KUNIT_EXPECT_KASAN_FAIL
|
||||
* and in kasan_test_exit.
|
||||
* In between KUNIT_EXPECT_KASAN_FAIL checks, test_status.report_found is kept
|
||||
* as false. This allows detecting KASAN reports that happen outside of the
|
||||
* checks by asserting !test_status.report_found at the start of
|
||||
* KUNIT_EXPECT_KASAN_FAIL and in kasan_test_exit.
|
||||
*/
|
||||
#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \
|
||||
if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \
|
||||
kasan_sync_fault_possible()) \
|
||||
migrate_disable(); \
|
||||
KUNIT_EXPECT_FALSE(test, READ_ONCE(fail_data.report_found)); \
|
||||
KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found)); \
|
||||
barrier(); \
|
||||
expression; \
|
||||
barrier(); \
|
||||
if (!READ_ONCE(fail_data.report_found)) { \
|
||||
if (kasan_async_fault_possible()) \
|
||||
kasan_force_async_fault(); \
|
||||
if (!READ_ONCE(test_status.report_found)) { \
|
||||
KUNIT_FAIL(test, KUNIT_SUBTEST_INDENT "KASAN failure " \
|
||||
"expected in \"" #expression \
|
||||
"\", but none occurred"); \
|
||||
} \
|
||||
if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) { \
|
||||
if (READ_ONCE(fail_data.report_found)) \
|
||||
kasan_enable_tagging_sync(); \
|
||||
if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \
|
||||
kasan_sync_fault_possible()) { \
|
||||
if (READ_ONCE(test_status.report_found) && \
|
||||
READ_ONCE(test_status.sync_fault)) \
|
||||
kasan_enable_tagging(); \
|
||||
migrate_enable(); \
|
||||
} \
|
||||
WRITE_ONCE(fail_data.report_found, false); \
|
||||
WRITE_ONCE(test_status.report_found, false); \
|
||||
} while (0)
|
||||
|
||||
#define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do { \
|
||||
@ -780,7 +786,7 @@ static void ksize_uaf(struct kunit *test)
|
||||
static void kasan_stack_oob(struct kunit *test)
|
||||
{
|
||||
char stack_array[10];
|
||||
/* See comment in kasan_global_oob. */
|
||||
/* See comment in kasan_global_oob_right. */
|
||||
char *volatile array = stack_array;
|
||||
char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
|
||||
|
||||
@ -793,7 +799,7 @@ static void kasan_alloca_oob_left(struct kunit *test)
|
||||
{
|
||||
volatile int i = 10;
|
||||
char alloca_array[i];
|
||||
/* See comment in kasan_global_oob. */
|
||||
/* See comment in kasan_global_oob_right. */
|
||||
char *volatile array = alloca_array;
|
||||
char *p = array - 1;
|
||||
|
||||
@ -808,7 +814,7 @@ static void kasan_alloca_oob_right(struct kunit *test)
|
||||
{
|
||||
volatile int i = 10;
|
||||
char alloca_array[i];
|
||||
/* See comment in kasan_global_oob. */
|
||||
/* See comment in kasan_global_oob_right. */
|
||||
char *volatile array = alloca_array;
|
||||
char *p = array + i;
|
||||
|
||||
@ -1057,21 +1063,186 @@ static void kmalloc_double_kzfree(struct kunit *test)
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
|
||||
}
|
||||
|
||||
static void vmalloc_oob(struct kunit *test)
|
||||
static void vmalloc_helpers_tags(struct kunit *test)
|
||||
{
|
||||
void *area;
|
||||
void *ptr;
|
||||
|
||||
/* This test is intended for tag-based modes. */
|
||||
KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
|
||||
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
|
||||
|
||||
ptr = vmalloc(PAGE_SIZE);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
|
||||
|
||||
/* Check that the returned pointer is tagged. */
|
||||
KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
|
||||
|
||||
/* Make sure exported vmalloc helpers handle tagged pointers. */
|
||||
KUNIT_ASSERT_TRUE(test, is_vmalloc_addr(ptr));
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vmalloc_to_page(ptr));
|
||||
|
||||
#if !IS_MODULE(CONFIG_KASAN_KUNIT_TEST)
|
||||
{
|
||||
int rv;
|
||||
|
||||
/* Make sure vmalloc'ed memory permissions can be changed. */
|
||||
rv = set_memory_ro((unsigned long)ptr, 1);
|
||||
KUNIT_ASSERT_GE(test, rv, 0);
|
||||
rv = set_memory_rw((unsigned long)ptr, 1);
|
||||
KUNIT_ASSERT_GE(test, rv, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
vfree(ptr);
|
||||
}
|
||||
|
||||
static void vmalloc_oob(struct kunit *test)
|
||||
{
|
||||
char *v_ptr, *p_ptr;
|
||||
struct page *page;
|
||||
size_t size = PAGE_SIZE / 2 - KASAN_GRANULE_SIZE - 5;
|
||||
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
|
||||
|
||||
v_ptr = vmalloc(size);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr);
|
||||
|
||||
OPTIMIZER_HIDE_VAR(v_ptr);
|
||||
|
||||
/*
|
||||
* We have to be careful not to hit the guard page.
|
||||
* We have to be careful not to hit the guard page in vmalloc tests.
|
||||
* The MMU will catch that and crash us.
|
||||
*/
|
||||
area = vmalloc(3000);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, area);
|
||||
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)area)[3100]);
|
||||
vfree(area);
|
||||
/* Make sure in-bounds accesses are valid. */
|
||||
v_ptr[0] = 0;
|
||||
v_ptr[size - 1] = 0;
|
||||
|
||||
/*
|
||||
* An unaligned access past the requested vmalloc size.
|
||||
* Only generic KASAN can precisely detect these.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size]);
|
||||
|
||||
/* An aligned access into the first out-of-bounds granule. */
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size + 5]);
|
||||
|
||||
/* Check that in-bounds accesses to the physical page are valid. */
|
||||
page = vmalloc_to_page(v_ptr);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page);
|
||||
p_ptr = page_address(page);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr);
|
||||
p_ptr[0] = 0;
|
||||
|
||||
vfree(v_ptr);
|
||||
|
||||
/*
|
||||
* We can't check for use-after-unmap bugs in this nor in the following
|
||||
* vmalloc tests, as the page might be fully unmapped and accessing it
|
||||
* will crash the kernel.
|
||||
*/
|
||||
}
|
||||
|
||||
static void vmap_tags(struct kunit *test)
|
||||
{
|
||||
char *p_ptr, *v_ptr;
|
||||
struct page *p_page, *v_page;
|
||||
|
||||
/*
|
||||
* This test is specifically crafted for the software tag-based mode,
|
||||
* the only tag-based mode that poisons vmap mappings.
|
||||
*/
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
|
||||
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
|
||||
|
||||
p_page = alloc_pages(GFP_KERNEL, 1);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_page);
|
||||
p_ptr = page_address(p_page);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr);
|
||||
|
||||
v_ptr = vmap(&p_page, 1, VM_MAP, PAGE_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr);
|
||||
|
||||
/*
|
||||
* We can't check for out-of-bounds bugs in this nor in the following
|
||||
* vmalloc tests, as allocations have page granularity and accessing
|
||||
* the guard page will crash the kernel.
|
||||
*/
|
||||
|
||||
KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN);
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL);
|
||||
|
||||
/* Make sure that in-bounds accesses through both pointers work. */
|
||||
*p_ptr = 0;
|
||||
*v_ptr = 0;
|
||||
|
||||
/* Make sure vmalloc_to_page() correctly recovers the page pointer. */
|
||||
v_page = vmalloc_to_page(v_ptr);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_page);
|
||||
KUNIT_EXPECT_PTR_EQ(test, p_page, v_page);
|
||||
|
||||
vunmap(v_ptr);
|
||||
free_pages((unsigned long)p_ptr, 1);
|
||||
}
|
||||
|
||||
static void vm_map_ram_tags(struct kunit *test)
|
||||
{
|
||||
char *p_ptr, *v_ptr;
|
||||
struct page *page;
|
||||
|
||||
/*
|
||||
* This test is specifically crafted for the software tag-based mode,
|
||||
* the only tag-based mode that poisons vm_map_ram mappings.
|
||||
*/
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
|
||||
|
||||
page = alloc_pages(GFP_KERNEL, 1);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page);
|
||||
p_ptr = page_address(page);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr);
|
||||
|
||||
v_ptr = vm_map_ram(&page, 1, -1);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr);
|
||||
|
||||
KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN);
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL);
|
||||
|
||||
/* Make sure that in-bounds accesses through both pointers work. */
|
||||
*p_ptr = 0;
|
||||
*v_ptr = 0;
|
||||
|
||||
vm_unmap_ram(v_ptr, 1);
|
||||
free_pages((unsigned long)p_ptr, 1);
|
||||
}
|
||||
|
||||
static void vmalloc_percpu(struct kunit *test)
|
||||
{
|
||||
char __percpu *ptr;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* This test is specifically crafted for the software tag-based mode,
|
||||
* the only tag-based mode that poisons percpu mappings.
|
||||
*/
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
|
||||
|
||||
ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
char *c_ptr = per_cpu_ptr(ptr, cpu);
|
||||
|
||||
KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
|
||||
|
||||
/* Make sure that in-bounds accesses don't crash the kernel. */
|
||||
*c_ptr = 0;
|
||||
}
|
||||
|
||||
free_percpu(ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1105,6 +1276,18 @@ static void match_all_not_assigned(struct kunit *test)
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
|
||||
free_pages((unsigned long)ptr, order);
|
||||
}
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KASAN_VMALLOC))
|
||||
return;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
size = (get_random_int() % 1024) + 1;
|
||||
ptr = vmalloc(size);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
|
||||
KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
|
||||
vfree(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check that 0xff works as a match-all pointer tag for tag-based modes. */
|
||||
@ -1210,7 +1393,11 @@ static struct kunit_case kasan_kunit_test_cases[] = {
|
||||
KUNIT_CASE(kasan_bitops_generic),
|
||||
KUNIT_CASE(kasan_bitops_tags),
|
||||
KUNIT_CASE(kmalloc_double_kzfree),
|
||||
KUNIT_CASE(vmalloc_helpers_tags),
|
||||
KUNIT_CASE(vmalloc_oob),
|
||||
KUNIT_CASE(vmap_tags),
|
||||
KUNIT_CASE(vm_map_ram_tags),
|
||||
KUNIT_CASE(vmalloc_percpu),
|
||||
KUNIT_CASE(match_all_not_assigned),
|
||||
KUNIT_CASE(match_all_ptr_tag),
|
||||
KUNIT_CASE(match_all_mem_tag),
|
||||
|
@ -2906,13 +2906,15 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (unlikely(!size))
|
||||
return 0;
|
||||
|
||||
i = vsnprintf(buf, size, fmt, args);
|
||||
|
||||
if (likely(i < size))
|
||||
return i;
|
||||
if (size != 0)
|
||||
return size - 1;
|
||||
return 0;
|
||||
|
||||
return size - 1;
|
||||
}
|
||||
EXPORT_SYMBOL(vscnprintf);
|
||||
|
||||
|
@ -762,6 +762,9 @@ config ARCH_HAS_CURRENT_STACK_POINTER
|
||||
register alias named "current_stack_pointer", this config can be
|
||||
selected.
|
||||
|
||||
config ARCH_HAS_FILTER_PGPROT
|
||||
bool
|
||||
|
||||
config ARCH_HAS_PTE_DEVMAP
|
||||
bool
|
||||
|
||||
|
@ -261,5 +261,4 @@ void page_init_poison(struct page *page, size_t size)
|
||||
if (page_init_poisoning)
|
||||
memset(page, PAGE_POISON_PATTERN, size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(page_init_poison);
|
||||
#endif /* CONFIG_DEBUG_VM */
|
||||
|
63
mm/filemap.c
63
mm/filemap.c
@ -152,25 +152,25 @@ static void filemap_unaccount_folio(struct address_space *mapping,
|
||||
|
||||
VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
|
||||
if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
|
||||
int mapcount;
|
||||
|
||||
pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
|
||||
current->comm, folio_pfn(folio));
|
||||
dump_page(&folio->page, "still mapped when deleted");
|
||||
dump_stack();
|
||||
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
|
||||
|
||||
mapcount = page_mapcount(&folio->page);
|
||||
if (mapping_exiting(mapping) &&
|
||||
folio_ref_count(folio) >= mapcount + 2) {
|
||||
/*
|
||||
* All vmas have already been torn down, so it's
|
||||
* a good bet that actually the folio is unmapped,
|
||||
* and we'd prefer not to leak it: if we're wrong,
|
||||
* some other bad page check should catch it later.
|
||||
*/
|
||||
page_mapcount_reset(&folio->page);
|
||||
folio_ref_sub(folio, mapcount);
|
||||
if (mapping_exiting(mapping) && !folio_test_large(folio)) {
|
||||
int mapcount = page_mapcount(&folio->page);
|
||||
|
||||
if (folio_ref_count(folio) >= mapcount + 2) {
|
||||
/*
|
||||
* All vmas have already been torn down, so it's
|
||||
* a good bet that actually the page is unmapped
|
||||
* and we'd rather not leak it: if we're wrong,
|
||||
* another bad page check should catch it later.
|
||||
*/
|
||||
page_mapcount_reset(&folio->page);
|
||||
folio_ref_sub(folio, mapcount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,16 +193,20 @@ static void filemap_unaccount_folio(struct address_space *mapping,
|
||||
/*
|
||||
* At this point folio must be either written or cleaned by
|
||||
* truncate. Dirty folio here signals a bug and loss of
|
||||
* unwritten data.
|
||||
* unwritten data - on ordinary filesystems.
|
||||
*
|
||||
* This fixes dirty accounting after removing the folio entirely
|
||||
* But it's harmless on in-memory filesystems like tmpfs; and can
|
||||
* occur when a driver which did get_user_pages() sets page dirty
|
||||
* before putting it, while the inode is being finally evicted.
|
||||
*
|
||||
* Below fixes dirty accounting after removing the folio entirely
|
||||
* but leaves the dirty flag set: it has no effect for truncated
|
||||
* folio and anyway will be cleared before returning folio to
|
||||
* buddy allocator.
|
||||
*/
|
||||
if (WARN_ON_ONCE(folio_test_dirty(folio)))
|
||||
folio_account_cleaned(folio, mapping,
|
||||
inode_to_wb(mapping->host));
|
||||
if (WARN_ON_ONCE(folio_test_dirty(folio) &&
|
||||
mapping_can_writeback(mapping)))
|
||||
folio_account_cleaned(folio, inode_to_wb(mapping->host));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1185,24 +1189,17 @@ static void folio_wake_bit(struct folio *folio, int bit_nr)
|
||||
}
|
||||
|
||||
/*
|
||||
* It is possible for other pages to have collided on the waitqueue
|
||||
* hash, so in that case check for a page match. That prevents a long-
|
||||
* term waiter
|
||||
* It's possible to miss clearing waiters here, when we woke our page
|
||||
* waiters, but the hashed waitqueue has waiters for other pages on it.
|
||||
* That's okay, it's a rare case. The next waker will clear it.
|
||||
*
|
||||
* It is still possible to miss a case here, when we woke page waiters
|
||||
* and removed them from the waitqueue, but there are still other
|
||||
* page waiters.
|
||||
* Note that, depending on the page pool (buddy, hugetlb, ZONE_DEVICE,
|
||||
* other), the flag may be cleared in the course of freeing the page;
|
||||
* but that is not required for correctness.
|
||||
*/
|
||||
if (!waitqueue_active(q) || !key.page_match) {
|
||||
if (!waitqueue_active(q) || !key.page_match)
|
||||
folio_clear_waiters(folio);
|
||||
/*
|
||||
* It's possible to miss clearing Waiters here, when we woke
|
||||
* our page waiters, but the hashed waitqueue has waiters for
|
||||
* other pages on it.
|
||||
*
|
||||
* That's okay, it's a rare case. The next waker will clear it.
|
||||
*/
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
|
||||
|
109
mm/huge_memory.c
109
mm/huge_memory.c
@ -40,6 +40,9 @@
|
||||
#include <asm/pgalloc.h>
|
||||
#include "internal.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/thp.h>
|
||||
|
||||
/*
|
||||
* By default, transparent hugepage support is disabled in order to avoid
|
||||
* risking an increased memory footprint for applications that are not
|
||||
@ -530,7 +533,7 @@ void prep_transhuge_page(struct page *page)
|
||||
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
|
||||
}
|
||||
|
||||
bool is_transparent_hugepage(struct page *page)
|
||||
static inline bool is_transparent_hugepage(struct page *page)
|
||||
{
|
||||
if (!PageCompound(page))
|
||||
return false;
|
||||
@ -539,7 +542,6 @@ bool is_transparent_hugepage(struct page *page)
|
||||
return is_huge_zero_page(page) ||
|
||||
page[1].compound_dtor == TRANSHUGE_PAGE_DTOR;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(is_transparent_hugepage);
|
||||
|
||||
static unsigned long __thp_get_unmapped_area(struct file *filp,
|
||||
unsigned long addr, unsigned long len,
|
||||
@ -1301,7 +1303,6 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
|
||||
page = pmd_page(orig_pmd);
|
||||
VM_BUG_ON_PAGE(!PageHead(page), page);
|
||||
|
||||
/* Lock page for reuse_swap_page() */
|
||||
if (!trylock_page(page)) {
|
||||
get_page(page);
|
||||
spin_unlock(vmf->ptl);
|
||||
@ -1317,10 +1318,15 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only reuse the page if nobody else maps the huge page or it's
|
||||
* part.
|
||||
* See do_wp_page(): we can only map the page writable if there are
|
||||
* no additional references. Note that we always drain the LRU
|
||||
* pagevecs immediately after adding a THP.
|
||||
*/
|
||||
if (reuse_swap_page(page)) {
|
||||
if (page_count(page) > 1 + PageSwapCache(page) * thp_nr_pages(page))
|
||||
goto unlock_fallback;
|
||||
if (PageSwapCache(page))
|
||||
try_to_free_swap(page);
|
||||
if (page_count(page) == 1) {
|
||||
pmd_t entry;
|
||||
entry = pmd_mkyoung(orig_pmd);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
@ -1331,6 +1337,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
|
||||
return VM_FAULT_WRITE;
|
||||
}
|
||||
|
||||
unlock_fallback:
|
||||
unlock_page(page);
|
||||
spin_unlock(vmf->ptl);
|
||||
fallback:
|
||||
@ -2126,8 +2133,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
{
|
||||
spinlock_t *ptl;
|
||||
struct mmu_notifier_range range;
|
||||
bool do_unlock_folio = false;
|
||||
pmd_t _pmd;
|
||||
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
|
||||
address & HPAGE_PMD_MASK,
|
||||
@ -2146,42 +2151,12 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
goto out;
|
||||
}
|
||||
|
||||
repeat:
|
||||
if (pmd_trans_huge(*pmd)) {
|
||||
if (!folio) {
|
||||
folio = page_folio(pmd_page(*pmd));
|
||||
/*
|
||||
* An anonymous page must be locked, to ensure that a
|
||||
* concurrent reuse_swap_page() sees stable mapcount;
|
||||
* but reuse_swap_page() is not used on shmem or file,
|
||||
* and page lock must not be taken when zap_pmd_range()
|
||||
* calls __split_huge_pmd() while i_mmap_lock is held.
|
||||
*/
|
||||
if (folio_test_anon(folio)) {
|
||||
if (unlikely(!folio_trylock(folio))) {
|
||||
folio_get(folio);
|
||||
_pmd = *pmd;
|
||||
spin_unlock(ptl);
|
||||
folio_lock(folio);
|
||||
spin_lock(ptl);
|
||||
if (unlikely(!pmd_same(*pmd, _pmd))) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
folio = NULL;
|
||||
goto repeat;
|
||||
}
|
||||
folio_put(folio);
|
||||
}
|
||||
do_unlock_folio = true;
|
||||
}
|
||||
}
|
||||
} else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd)))
|
||||
goto out;
|
||||
__split_huge_pmd_locked(vma, pmd, range.start, freeze);
|
||||
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
|
||||
is_pmd_migration_entry(*pmd))
|
||||
__split_huge_pmd_locked(vma, pmd, range.start, freeze);
|
||||
|
||||
out:
|
||||
spin_unlock(ptl);
|
||||
if (do_unlock_folio)
|
||||
folio_unlock(folio);
|
||||
/*
|
||||
* No need to double call mmu_notifier->invalidate_range() callback.
|
||||
* They are 3 cases to consider inside __split_huge_pmd_locked():
|
||||
@ -2476,54 +2451,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This calculates accurately how many mappings a transparent hugepage
|
||||
* has (unlike page_mapcount() which isn't fully accurate). This full
|
||||
* accuracy is primarily needed to know if copy-on-write faults can
|
||||
* reuse the page and change the mapping to read-write instead of
|
||||
* copying them. At the same time this returns the total_mapcount too.
|
||||
*
|
||||
* The function returns the highest mapcount any one of the subpages
|
||||
* has. If the return value is one, even if different processes are
|
||||
* mapping different subpages of the transparent hugepage, they can
|
||||
* all reuse it, because each process is reusing a different subpage.
|
||||
*
|
||||
* The total_mapcount is instead counting all virtual mappings of the
|
||||
* subpages. If the total_mapcount is equal to "one", it tells the
|
||||
* caller all mappings belong to the same "mm" and in turn the
|
||||
* anon_vma of the transparent hugepage can become the vma->anon_vma
|
||||
* local one as no other process may be mapping any of the subpages.
|
||||
*
|
||||
* It would be more accurate to replace page_mapcount() with
|
||||
* page_trans_huge_mapcount(), however we only use
|
||||
* page_trans_huge_mapcount() in the copy-on-write faults where we
|
||||
* need full accuracy to avoid breaking page pinning, because
|
||||
* page_trans_huge_mapcount() is slower than page_mapcount().
|
||||
*/
|
||||
int page_trans_huge_mapcount(struct page *page)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
/* hugetlbfs shouldn't call it */
|
||||
VM_BUG_ON_PAGE(PageHuge(page), page);
|
||||
|
||||
if (likely(!PageTransCompound(page)))
|
||||
return atomic_read(&page->_mapcount) + 1;
|
||||
|
||||
page = compound_head(page);
|
||||
|
||||
ret = 0;
|
||||
for (i = 0; i < thp_nr_pages(page); i++) {
|
||||
int mapcount = atomic_read(&page[i]._mapcount) + 1;
|
||||
ret = max(ret, mapcount);
|
||||
}
|
||||
|
||||
if (PageDoubleMap(page))
|
||||
ret -= 1;
|
||||
|
||||
return ret + compound_mapcount(page);
|
||||
}
|
||||
|
||||
/* Racy check whether the huge page can be split */
|
||||
bool can_split_folio(struct folio *folio, int *pextra_pins)
|
||||
{
|
||||
@ -3131,6 +3058,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
|
||||
set_pmd_at(mm, address, pvmw->pmd, pmdswp);
|
||||
page_remove_rmap(page, vma, true);
|
||||
put_page(page);
|
||||
trace_set_migration_pmd(address, pmd_val(pmdswp));
|
||||
}
|
||||
|
||||
void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
|
||||
@ -3163,5 +3091,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
|
||||
|
||||
/* No need to invalidate - it was non-present before */
|
||||
update_mmu_cache_pmd(vma, address, pvmw->pmd);
|
||||
trace_remove_migration_pmd(address, pmd_val(pmde));
|
||||
}
|
||||
#endif
|
||||
|
@ -35,7 +35,7 @@ CFLAGS_shadow.o := $(CC_FLAGS_KASAN_RUNTIME)
|
||||
CFLAGS_hw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
|
||||
CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
|
||||
|
||||
obj-$(CONFIG_KASAN) := common.o report.o
|
||||
obj-y := common.o report.o
|
||||
obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o report_generic.o shadow.o quarantine.o
|
||||
obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o tags.o report_tags.o
|
||||
obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o tags.o report_tags.o
|
||||
|
@ -387,7 +387,7 @@ static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip)
|
||||
}
|
||||
|
||||
/*
|
||||
* The object will be poisoned by kasan_free_pages() or
|
||||
* The object will be poisoned by kasan_poison_pages() or
|
||||
* kasan_slab_free_mempool().
|
||||
*/
|
||||
|
||||
@ -538,7 +538,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* The object has already been unpoisoned by kasan_alloc_pages() for
|
||||
* The object has already been unpoisoned by kasan_unpoison_pages() for
|
||||
* alloc_pages() or by kasan_krealloc() for krealloc().
|
||||
*/
|
||||
|
||||
|
@ -32,6 +32,12 @@ enum kasan_arg_mode {
|
||||
KASAN_ARG_MODE_ASYMM,
|
||||
};
|
||||
|
||||
enum kasan_arg_vmalloc {
|
||||
KASAN_ARG_VMALLOC_DEFAULT,
|
||||
KASAN_ARG_VMALLOC_OFF,
|
||||
KASAN_ARG_VMALLOC_ON,
|
||||
};
|
||||
|
||||
enum kasan_arg_stacktrace {
|
||||
KASAN_ARG_STACKTRACE_DEFAULT,
|
||||
KASAN_ARG_STACKTRACE_OFF,
|
||||
@ -40,18 +46,28 @@ enum kasan_arg_stacktrace {
|
||||
|
||||
static enum kasan_arg kasan_arg __ro_after_init;
|
||||
static enum kasan_arg_mode kasan_arg_mode __ro_after_init;
|
||||
static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
|
||||
static enum kasan_arg_vmalloc kasan_arg_vmalloc __initdata;
|
||||
static enum kasan_arg_stacktrace kasan_arg_stacktrace __initdata;
|
||||
|
||||
/* Whether KASAN is enabled at all. */
|
||||
/*
|
||||
* Whether KASAN is enabled at all.
|
||||
* The value remains false until KASAN is initialized by kasan_init_hw_tags().
|
||||
*/
|
||||
DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
|
||||
EXPORT_SYMBOL(kasan_flag_enabled);
|
||||
|
||||
/* Whether the selected mode is synchronous/asynchronous/asymmetric.*/
|
||||
/*
|
||||
* Whether the selected mode is synchronous, asynchronous, or asymmetric.
|
||||
* Defaults to KASAN_MODE_SYNC.
|
||||
*/
|
||||
enum kasan_mode kasan_mode __ro_after_init;
|
||||
EXPORT_SYMBOL_GPL(kasan_mode);
|
||||
|
||||
/* Whether to enable vmalloc tagging. */
|
||||
DEFINE_STATIC_KEY_TRUE(kasan_flag_vmalloc);
|
||||
|
||||
/* Whether to collect alloc/free stack traces. */
|
||||
DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
|
||||
DEFINE_STATIC_KEY_TRUE(kasan_flag_stacktrace);
|
||||
|
||||
/* kasan=off/on */
|
||||
static int __init early_kasan_flag(char *arg)
|
||||
@ -89,6 +105,23 @@ static int __init early_kasan_mode(char *arg)
|
||||
}
|
||||
early_param("kasan.mode", early_kasan_mode);
|
||||
|
||||
/* kasan.vmalloc=off/on */
|
||||
static int __init early_kasan_flag_vmalloc(char *arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(arg, "off"))
|
||||
kasan_arg_vmalloc = KASAN_ARG_VMALLOC_OFF;
|
||||
else if (!strcmp(arg, "on"))
|
||||
kasan_arg_vmalloc = KASAN_ARG_VMALLOC_ON;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("kasan.vmalloc", early_kasan_flag_vmalloc);
|
||||
|
||||
/* kasan.stacktrace=off/on */
|
||||
static int __init early_kasan_flag_stacktrace(char *arg)
|
||||
{
|
||||
@ -116,7 +149,10 @@ static inline const char *kasan_mode_info(void)
|
||||
return "sync";
|
||||
}
|
||||
|
||||
/* kasan_init_hw_tags_cpu() is called for each CPU. */
|
||||
/*
|
||||
* kasan_init_hw_tags_cpu() is called for each CPU.
|
||||
* Not marked as __init as a CPU can be hot-plugged after boot.
|
||||
*/
|
||||
void kasan_init_hw_tags_cpu(void)
|
||||
{
|
||||
/*
|
||||
@ -124,7 +160,11 @@ void kasan_init_hw_tags_cpu(void)
|
||||
* as this function is only called for MTE-capable hardware.
|
||||
*/
|
||||
|
||||
/* If KASAN is disabled via command line, don't initialize it. */
|
||||
/*
|
||||
* If KASAN is disabled via command line, don't initialize it.
|
||||
* When this function is called, kasan_flag_enabled is not yet
|
||||
* set by kasan_init_hw_tags(). Thus, check kasan_arg instead.
|
||||
*/
|
||||
if (kasan_arg == KASAN_ARG_OFF)
|
||||
return;
|
||||
|
||||
@ -132,12 +172,7 @@ void kasan_init_hw_tags_cpu(void)
|
||||
* Enable async or asymm modes only when explicitly requested
|
||||
* through the command line.
|
||||
*/
|
||||
if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
|
||||
hw_enable_tagging_async();
|
||||
else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM)
|
||||
hw_enable_tagging_asymm();
|
||||
else
|
||||
hw_enable_tagging_sync();
|
||||
kasan_enable_tagging();
|
||||
}
|
||||
|
||||
/* kasan_init_hw_tags() is called once on boot CPU. */
|
||||
@ -151,86 +186,168 @@ void __init kasan_init_hw_tags(void)
|
||||
if (kasan_arg == KASAN_ARG_OFF)
|
||||
return;
|
||||
|
||||
/* Enable KASAN. */
|
||||
static_branch_enable(&kasan_flag_enabled);
|
||||
|
||||
switch (kasan_arg_mode) {
|
||||
case KASAN_ARG_MODE_DEFAULT:
|
||||
/*
|
||||
* Default to sync mode.
|
||||
*/
|
||||
fallthrough;
|
||||
/* Default is specified by kasan_mode definition. */
|
||||
break;
|
||||
case KASAN_ARG_MODE_SYNC:
|
||||
/* Sync mode enabled. */
|
||||
kasan_mode = KASAN_MODE_SYNC;
|
||||
break;
|
||||
case KASAN_ARG_MODE_ASYNC:
|
||||
/* Async mode enabled. */
|
||||
kasan_mode = KASAN_MODE_ASYNC;
|
||||
break;
|
||||
case KASAN_ARG_MODE_ASYMM:
|
||||
/* Asymm mode enabled. */
|
||||
kasan_mode = KASAN_MODE_ASYMM;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (kasan_arg_vmalloc) {
|
||||
case KASAN_ARG_VMALLOC_DEFAULT:
|
||||
/* Default is specified by kasan_flag_vmalloc definition. */
|
||||
break;
|
||||
case KASAN_ARG_VMALLOC_OFF:
|
||||
static_branch_disable(&kasan_flag_vmalloc);
|
||||
break;
|
||||
case KASAN_ARG_VMALLOC_ON:
|
||||
static_branch_enable(&kasan_flag_vmalloc);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (kasan_arg_stacktrace) {
|
||||
case KASAN_ARG_STACKTRACE_DEFAULT:
|
||||
/* Default to enabling stack trace collection. */
|
||||
static_branch_enable(&kasan_flag_stacktrace);
|
||||
/* Default is specified by kasan_flag_stacktrace definition. */
|
||||
break;
|
||||
case KASAN_ARG_STACKTRACE_OFF:
|
||||
/* Do nothing, kasan_flag_stacktrace keeps its default value. */
|
||||
static_branch_disable(&kasan_flag_stacktrace);
|
||||
break;
|
||||
case KASAN_ARG_STACKTRACE_ON:
|
||||
static_branch_enable(&kasan_flag_stacktrace);
|
||||
break;
|
||||
}
|
||||
|
||||
pr_info("KernelAddressSanitizer initialized (hw-tags, mode=%s, stacktrace=%s)\n",
|
||||
/* KASAN is now initialized, enable it. */
|
||||
static_branch_enable(&kasan_flag_enabled);
|
||||
|
||||
pr_info("KernelAddressSanitizer initialized (hw-tags, mode=%s, vmalloc=%s, stacktrace=%s)\n",
|
||||
kasan_mode_info(),
|
||||
kasan_vmalloc_enabled() ? "on" : "off",
|
||||
kasan_stack_collection_enabled() ? "on" : "off");
|
||||
}
|
||||
|
||||
void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
|
||||
#ifdef CONFIG_KASAN_VMALLOC
|
||||
|
||||
static void unpoison_vmalloc_pages(const void *addr, u8 tag)
|
||||
{
|
||||
struct vm_struct *area;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* This condition should match the one in post_alloc_hook() in
|
||||
* page_alloc.c.
|
||||
* As hardware tag-based KASAN only tags VM_ALLOC vmalloc allocations
|
||||
* (see the comment in __kasan_unpoison_vmalloc), all of the pages
|
||||
* should belong to a single area.
|
||||
*/
|
||||
bool init = !want_init_on_free() && want_init_on_alloc(flags);
|
||||
area = find_vm_area((void *)addr);
|
||||
if (WARN_ON(!area))
|
||||
return;
|
||||
|
||||
if (flags & __GFP_SKIP_KASAN_POISON)
|
||||
SetPageSkipKASanPoison(page);
|
||||
for (i = 0; i < area->nr_pages; i++) {
|
||||
struct page *page = area->pages[i];
|
||||
|
||||
if (flags & __GFP_ZEROTAGS) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i != 1 << order; ++i)
|
||||
tag_clear_highpage(page + i);
|
||||
} else {
|
||||
kasan_unpoison_pages(page, order, init);
|
||||
page_kasan_tag_set(page, tag);
|
||||
}
|
||||
}
|
||||
|
||||
void kasan_free_pages(struct page *page, unsigned int order)
|
||||
void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
|
||||
kasan_vmalloc_flags_t flags)
|
||||
{
|
||||
u8 tag;
|
||||
unsigned long redzone_start, redzone_size;
|
||||
|
||||
if (!kasan_vmalloc_enabled())
|
||||
return (void *)start;
|
||||
|
||||
if (!is_vmalloc_or_module_addr(start))
|
||||
return (void *)start;
|
||||
|
||||
/*
|
||||
* Skip unpoisoning and assigning a pointer tag for non-VM_ALLOC
|
||||
* mappings as:
|
||||
*
|
||||
* 1. Unlike the software KASAN modes, hardware tag-based KASAN only
|
||||
* supports tagging physical memory. Therefore, it can only tag a
|
||||
* single mapping of normal physical pages.
|
||||
* 2. Hardware tag-based KASAN can only tag memory mapped with special
|
||||
* mapping protection bits, see arch_vmalloc_pgprot_modify().
|
||||
* As non-VM_ALLOC mappings can be mapped outside of vmalloc code,
|
||||
* providing these bits would require tracking all non-VM_ALLOC
|
||||
* mappers.
|
||||
*
|
||||
* Thus, for VM_ALLOC mappings, hardware tag-based KASAN only tags
|
||||
* the first virtual mapping, which is created by vmalloc().
|
||||
* Tagging the page_alloc memory backing that vmalloc() allocation is
|
||||
* skipped, see ___GFP_SKIP_KASAN_UNPOISON.
|
||||
*
|
||||
* For non-VM_ALLOC allocations, page_alloc memory is tagged as usual.
|
||||
*/
|
||||
if (!(flags & KASAN_VMALLOC_VM_ALLOC))
|
||||
return (void *)start;
|
||||
|
||||
/*
|
||||
* Don't tag executable memory.
|
||||
* The kernel doesn't tolerate having the PC register tagged.
|
||||
*/
|
||||
if (!(flags & KASAN_VMALLOC_PROT_NORMAL))
|
||||
return (void *)start;
|
||||
|
||||
tag = kasan_random_tag();
|
||||
start = set_tag(start, tag);
|
||||
|
||||
/* Unpoison and initialize memory up to size. */
|
||||
kasan_unpoison(start, size, flags & KASAN_VMALLOC_INIT);
|
||||
|
||||
/*
|
||||
* Explicitly poison and initialize the in-page vmalloc() redzone.
|
||||
* Unlike software KASAN modes, hardware tag-based KASAN doesn't
|
||||
* unpoison memory when populating shadow for vmalloc() space.
|
||||
*/
|
||||
redzone_start = round_up((unsigned long)start + size,
|
||||
KASAN_GRANULE_SIZE);
|
||||
redzone_size = round_up(redzone_start, PAGE_SIZE) - redzone_start;
|
||||
kasan_poison((void *)redzone_start, redzone_size, KASAN_TAG_INVALID,
|
||||
flags & KASAN_VMALLOC_INIT);
|
||||
|
||||
/*
|
||||
* Set per-page tag flags to allow accessing physical memory for the
|
||||
* vmalloc() mapping through page_address(vmalloc_to_page()).
|
||||
*/
|
||||
unpoison_vmalloc_pages(start, tag);
|
||||
|
||||
return (void *)start;
|
||||
}
|
||||
|
||||
void __kasan_poison_vmalloc(const void *start, unsigned long size)
|
||||
{
|
||||
/*
|
||||
* This condition should match the one in free_pages_prepare() in
|
||||
* page_alloc.c.
|
||||
* No tagging here.
|
||||
* The physical pages backing the vmalloc() allocation are poisoned
|
||||
* through the usual page_alloc paths.
|
||||
*/
|
||||
bool init = want_init_on_free();
|
||||
|
||||
kasan_poison_pages(page, order, init);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
|
||||
|
||||
void kasan_enable_tagging_sync(void)
|
||||
void kasan_enable_tagging(void)
|
||||
{
|
||||
hw_enable_tagging_sync();
|
||||
if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
|
||||
hw_enable_tagging_async();
|
||||
else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM)
|
||||
hw_enable_tagging_asymm();
|
||||
else
|
||||
hw_enable_tagging_sync();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync);
|
||||
EXPORT_SYMBOL_GPL(kasan_enable_tagging);
|
||||
|
||||
void kasan_force_async_fault(void)
|
||||
{
|
||||
|
@ -12,7 +12,8 @@
|
||||
#include <linux/static_key.h>
|
||||
#include "../slab.h"
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
|
||||
DECLARE_STATIC_KEY_TRUE(kasan_flag_vmalloc);
|
||||
DECLARE_STATIC_KEY_TRUE(kasan_flag_stacktrace);
|
||||
|
||||
enum kasan_mode {
|
||||
KASAN_MODE_SYNC,
|
||||
@ -22,6 +23,11 @@ enum kasan_mode {
|
||||
|
||||
extern enum kasan_mode kasan_mode __ro_after_init;
|
||||
|
||||
static inline bool kasan_vmalloc_enabled(void)
|
||||
{
|
||||
return static_branch_likely(&kasan_flag_vmalloc);
|
||||
}
|
||||
|
||||
static inline bool kasan_stack_collection_enabled(void)
|
||||
{
|
||||
return static_branch_unlikely(&kasan_flag_stacktrace);
|
||||
@ -71,17 +77,19 @@ static inline bool kasan_sync_fault_possible(void)
|
||||
#define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */
|
||||
#define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */
|
||||
#define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */
|
||||
#define KASAN_KMALLOC_FREETRACK 0xFA /* object was freed and has free track set */
|
||||
#define KASAN_VMALLOC_INVALID 0xF8 /* unallocated space in vmapped page */
|
||||
#else
|
||||
#define KASAN_FREE_PAGE KASAN_TAG_INVALID
|
||||
#define KASAN_PAGE_REDZONE KASAN_TAG_INVALID
|
||||
#define KASAN_KMALLOC_REDZONE KASAN_TAG_INVALID
|
||||
#define KASAN_KMALLOC_FREE KASAN_TAG_INVALID
|
||||
#define KASAN_KMALLOC_FREETRACK KASAN_TAG_INVALID
|
||||
#define KASAN_VMALLOC_INVALID KASAN_TAG_INVALID /* only for SW_TAGS */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
|
||||
#define KASAN_KMALLOC_FREETRACK 0xFA /* object was freed and has free track set */
|
||||
#define KASAN_GLOBAL_REDZONE 0xF9 /* redzone for global variable */
|
||||
#define KASAN_VMALLOC_INVALID 0xF8 /* unallocated space in vmapped page */
|
||||
|
||||
/*
|
||||
* Stack redzone shadow values
|
||||
@ -110,6 +118,8 @@ static inline bool kasan_sync_fault_possible(void)
|
||||
#define KASAN_ABI_VERSION 1
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_KASAN_GENERIC */
|
||||
|
||||
/* Metadata layout customization. */
|
||||
#define META_BYTES_PER_BLOCK 1
|
||||
#define META_BLOCKS_PER_ROW 16
|
||||
@ -117,9 +127,15 @@ static inline bool kasan_sync_fault_possible(void)
|
||||
#define META_MEM_BYTES_PER_ROW (META_BYTES_PER_ROW * KASAN_GRANULE_SIZE)
|
||||
#define META_ROWS_AROUND_ADDR 2
|
||||
|
||||
struct kasan_access_info {
|
||||
const void *access_addr;
|
||||
const void *first_bad_addr;
|
||||
enum kasan_report_type {
|
||||
KASAN_REPORT_ACCESS,
|
||||
KASAN_REPORT_INVALID_FREE,
|
||||
};
|
||||
|
||||
struct kasan_report_info {
|
||||
enum kasan_report_type type;
|
||||
void *access_addr;
|
||||
void *first_bad_addr;
|
||||
size_t access_size;
|
||||
bool is_write;
|
||||
unsigned long ip;
|
||||
@ -204,6 +220,14 @@ struct kasan_free_meta {
|
||||
#endif
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
|
||||
/* Used in KUnit-compatible KASAN tests. */
|
||||
struct kunit_kasan_status {
|
||||
bool report_found;
|
||||
bool sync_fault;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
|
||||
const void *object);
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
@ -221,7 +245,8 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
|
||||
|
||||
static inline bool addr_has_metadata(const void *addr)
|
||||
{
|
||||
return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
|
||||
return (kasan_reset_tag(addr) >=
|
||||
kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -251,10 +276,10 @@ static inline void kasan_print_tags(u8 addr_tag, const void *addr) { }
|
||||
#endif
|
||||
|
||||
void *kasan_find_first_bad_addr(void *addr, size_t size);
|
||||
const char *kasan_get_bug_type(struct kasan_access_info *info);
|
||||
const char *kasan_get_bug_type(struct kasan_report_info *info);
|
||||
void kasan_metadata_fetch_row(char *buffer, void *row);
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) && defined(CONFIG_KASAN_STACK)
|
||||
#if defined(CONFIG_KASAN_STACK)
|
||||
void kasan_print_address_stack_frame(const void *addr);
|
||||
#else
|
||||
static inline void kasan_print_address_stack_frame(const void *addr) { }
|
||||
@ -340,12 +365,12 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
|
||||
|
||||
#if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
|
||||
|
||||
void kasan_enable_tagging_sync(void);
|
||||
void kasan_enable_tagging(void);
|
||||
void kasan_force_async_fault(void);
|
||||
|
||||
#else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
|
||||
|
||||
static inline void kasan_enable_tagging_sync(void) { }
|
||||
static inline void kasan_enable_tagging(void) { }
|
||||
static inline void kasan_force_async_fault(void) { }
|
||||
|
||||
#endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
|
||||
@ -467,6 +492,13 @@ static inline bool kasan_arch_is_ready(void) { return true; }
|
||||
#error kasan_arch_is_ready only works in KASAN generic outline mode!
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) || IS_ENABLED(CONFIG_KASAN_MODULE_TEST)
|
||||
|
||||
bool kasan_save_enable_multi_shot(void);
|
||||
void kasan_restore_multi_shot(bool enabled);
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Exported functions for interfaces called from assembly or from generated
|
||||
* code. Declarations here to avoid warning about missing declarations.
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sched.h>
|
||||
@ -64,6 +65,40 @@ static int __init early_kasan_fault(char *arg)
|
||||
}
|
||||
early_param("kasan.fault", early_kasan_fault);
|
||||
|
||||
static int __init kasan_set_multi_shot(char *str)
|
||||
{
|
||||
set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
|
||||
return 1;
|
||||
}
|
||||
__setup("kasan_multi_shot", kasan_set_multi_shot);
|
||||
|
||||
/*
|
||||
* Used to suppress reports within kasan_disable/enable_current() critical
|
||||
* sections, which are used for marking accesses to slab metadata.
|
||||
*/
|
||||
static bool report_suppressed(void)
|
||||
{
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
if (current->kasan_depth)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to avoid reporting more than one KASAN bug unless kasan_multi_shot
|
||||
* is enabled. Note that KASAN tests effectively enable kasan_multi_shot
|
||||
* for their duration.
|
||||
*/
|
||||
static bool report_enabled(void)
|
||||
{
|
||||
if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
|
||||
return true;
|
||||
return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) || IS_ENABLED(CONFIG_KASAN_MODULE_TEST)
|
||||
|
||||
bool kasan_save_enable_multi_shot(void)
|
||||
{
|
||||
return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
|
||||
@ -77,15 +112,75 @@ void kasan_restore_multi_shot(bool enabled)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
|
||||
|
||||
static int __init kasan_set_multi_shot(char *str)
|
||||
{
|
||||
set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
|
||||
return 1;
|
||||
}
|
||||
__setup("kasan_multi_shot", kasan_set_multi_shot);
|
||||
#endif
|
||||
|
||||
static void print_error_description(struct kasan_access_info *info)
|
||||
#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
|
||||
static void update_kunit_status(bool sync)
|
||||
{
|
||||
struct kunit *test;
|
||||
struct kunit_resource *resource;
|
||||
struct kunit_kasan_status *status;
|
||||
|
||||
test = current->kunit_test;
|
||||
if (!test)
|
||||
return;
|
||||
|
||||
resource = kunit_find_named_resource(test, "kasan_status");
|
||||
if (!resource) {
|
||||
kunit_set_failure(test);
|
||||
return;
|
||||
}
|
||||
|
||||
status = (struct kunit_kasan_status *)resource->data;
|
||||
WRITE_ONCE(status->report_found, true);
|
||||
WRITE_ONCE(status->sync_fault, sync);
|
||||
|
||||
kunit_put_resource(resource);
|
||||
}
|
||||
#else
|
||||
static void update_kunit_status(bool sync) { }
|
||||
#endif
|
||||
|
||||
static DEFINE_SPINLOCK(report_lock);
|
||||
|
||||
static void start_report(unsigned long *flags, bool sync)
|
||||
{
|
||||
/* Respect the /proc/sys/kernel/traceoff_on_warning interface. */
|
||||
disable_trace_on_warning();
|
||||
/* Update status of the currently running KASAN test. */
|
||||
update_kunit_status(sync);
|
||||
/* Do not allow LOCKDEP mangling KASAN reports. */
|
||||
lockdep_off();
|
||||
/* Make sure we don't end up in loop. */
|
||||
kasan_disable_current();
|
||||
spin_lock_irqsave(&report_lock, *flags);
|
||||
pr_err("==================================================================\n");
|
||||
}
|
||||
|
||||
static void end_report(unsigned long *flags, void *addr)
|
||||
{
|
||||
if (addr)
|
||||
trace_error_report_end(ERROR_DETECTOR_KASAN,
|
||||
(unsigned long)addr);
|
||||
pr_err("==================================================================\n");
|
||||
spin_unlock_irqrestore(&report_lock, *flags);
|
||||
if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
|
||||
panic("panic_on_warn set ...\n");
|
||||
if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC)
|
||||
panic("kasan.fault=panic set ...\n");
|
||||
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
|
||||
lockdep_on();
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
static void print_error_description(struct kasan_report_info *info)
|
||||
{
|
||||
if (info->type == KASAN_REPORT_INVALID_FREE) {
|
||||
pr_err("BUG: KASAN: double-free or invalid-free in %pS\n",
|
||||
(void *)info->ip);
|
||||
return;
|
||||
}
|
||||
|
||||
pr_err("BUG: KASAN: %s in %pS\n",
|
||||
kasan_get_bug_type(info), (void *)info->ip);
|
||||
if (info->access_size)
|
||||
@ -98,32 +193,6 @@ static void print_error_description(struct kasan_access_info *info)
|
||||
info->access_addr, current->comm, task_pid_nr(current));
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(report_lock);
|
||||
|
||||
static void start_report(unsigned long *flags)
|
||||
{
|
||||
/*
|
||||
* Make sure we don't end up in loop.
|
||||
*/
|
||||
kasan_disable_current();
|
||||
spin_lock_irqsave(&report_lock, *flags);
|
||||
pr_err("==================================================================\n");
|
||||
}
|
||||
|
||||
static void end_report(unsigned long *flags, unsigned long addr)
|
||||
{
|
||||
if (!kasan_async_fault_possible())
|
||||
trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
|
||||
pr_err("==================================================================\n");
|
||||
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
|
||||
spin_unlock_irqrestore(&report_lock, *flags);
|
||||
if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
|
||||
panic("panic_on_warn set ...\n");
|
||||
if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC)
|
||||
panic("kasan.fault=panic set ...\n");
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
static void print_track(struct kasan_track *track, const char *prefix)
|
||||
{
|
||||
pr_err("%s by task %u:\n", prefix, track->pid);
|
||||
@ -162,9 +231,6 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,
|
||||
" which belongs to the cache %s of size %d\n",
|
||||
object, cache->name, cache->object_size);
|
||||
|
||||
if (!addr)
|
||||
return;
|
||||
|
||||
if (access_addr < object_addr) {
|
||||
rel_type = "to the left";
|
||||
rel_bytes = object_addr - access_addr;
|
||||
@ -253,19 +319,43 @@ static void print_address_description(void *addr, u8 tag)
|
||||
void *object = nearest_obj(cache, slab, addr);
|
||||
|
||||
describe_object(cache, object, addr, tag);
|
||||
pr_err("\n");
|
||||
}
|
||||
|
||||
if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) {
|
||||
pr_err("The buggy address belongs to the variable:\n");
|
||||
pr_err(" %pS\n", addr);
|
||||
pr_err("\n");
|
||||
}
|
||||
|
||||
if (object_is_on_stack(addr)) {
|
||||
/*
|
||||
* Currently, KASAN supports printing frame information only
|
||||
* for accesses to the task's own stack.
|
||||
*/
|
||||
kasan_print_address_stack_frame(addr);
|
||||
pr_err("\n");
|
||||
}
|
||||
|
||||
if (is_vmalloc_addr(addr)) {
|
||||
struct vm_struct *va = find_vm_area(addr);
|
||||
|
||||
if (va) {
|
||||
pr_err("The buggy address belongs to the virtual mapping at\n"
|
||||
" [%px, %px) created by:\n"
|
||||
" %pS\n",
|
||||
va->addr, va->addr + va->size, va->caller);
|
||||
pr_err("\n");
|
||||
|
||||
page = vmalloc_to_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
if (page) {
|
||||
pr_err("The buggy address belongs to the page:\n");
|
||||
pr_err("The buggy address belongs to the physical page:\n");
|
||||
dump_page(page, "kasan: bad access detected");
|
||||
pr_err("\n");
|
||||
}
|
||||
|
||||
kasan_print_address_stack_frame(addr);
|
||||
}
|
||||
|
||||
static bool meta_row_is_guilty(const void *row, const void *addr)
|
||||
@ -324,56 +414,88 @@ static void print_memory_metadata(const void *addr)
|
||||
}
|
||||
}
|
||||
|
||||
static bool report_enabled(void)
|
||||
static void print_report(struct kasan_report_info *info)
|
||||
{
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
if (current->kasan_depth)
|
||||
return false;
|
||||
#endif
|
||||
if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
|
||||
return true;
|
||||
return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
|
||||
}
|
||||
void *tagged_addr = info->access_addr;
|
||||
void *untagged_addr = kasan_reset_tag(tagged_addr);
|
||||
u8 tag = get_tag(tagged_addr);
|
||||
|
||||
#if IS_ENABLED(CONFIG_KUNIT)
|
||||
static void kasan_update_kunit_status(struct kunit *cur_test)
|
||||
{
|
||||
struct kunit_resource *resource;
|
||||
struct kunit_kasan_expectation *kasan_data;
|
||||
print_error_description(info);
|
||||
if (addr_has_metadata(untagged_addr))
|
||||
kasan_print_tags(tag, info->first_bad_addr);
|
||||
pr_err("\n");
|
||||
|
||||
resource = kunit_find_named_resource(cur_test, "kasan_data");
|
||||
|
||||
if (!resource) {
|
||||
kunit_set_failure(cur_test);
|
||||
return;
|
||||
if (addr_has_metadata(untagged_addr)) {
|
||||
print_address_description(untagged_addr, tag);
|
||||
print_memory_metadata(info->first_bad_addr);
|
||||
} else {
|
||||
dump_stack_lvl(KERN_ERR);
|
||||
}
|
||||
|
||||
kasan_data = (struct kunit_kasan_expectation *)resource->data;
|
||||
WRITE_ONCE(kasan_data->report_found, true);
|
||||
kunit_put_resource(resource);
|
||||
}
|
||||
#endif /* IS_ENABLED(CONFIG_KUNIT) */
|
||||
|
||||
void kasan_report_invalid_free(void *object, unsigned long ip)
|
||||
void kasan_report_invalid_free(void *ptr, unsigned long ip)
|
||||
{
|
||||
unsigned long flags;
|
||||
u8 tag = get_tag(object);
|
||||
struct kasan_report_info info;
|
||||
|
||||
object = kasan_reset_tag(object);
|
||||
/*
|
||||
* Do not check report_suppressed(), as an invalid-free cannot be
|
||||
* caused by accessing slab metadata and thus should not be
|
||||
* suppressed by kasan_disable/enable_current() critical sections.
|
||||
*/
|
||||
if (unlikely(!report_enabled()))
|
||||
return;
|
||||
|
||||
#if IS_ENABLED(CONFIG_KUNIT)
|
||||
if (current->kunit_test)
|
||||
kasan_update_kunit_status(current->kunit_test);
|
||||
#endif /* IS_ENABLED(CONFIG_KUNIT) */
|
||||
start_report(&flags, true);
|
||||
|
||||
start_report(&flags);
|
||||
pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip);
|
||||
kasan_print_tags(tag, object);
|
||||
pr_err("\n");
|
||||
print_address_description(object, tag);
|
||||
pr_err("\n");
|
||||
print_memory_metadata(object);
|
||||
end_report(&flags, (unsigned long)object);
|
||||
info.type = KASAN_REPORT_INVALID_FREE;
|
||||
info.access_addr = ptr;
|
||||
info.first_bad_addr = kasan_reset_tag(ptr);
|
||||
info.access_size = 0;
|
||||
info.is_write = false;
|
||||
info.ip = ip;
|
||||
|
||||
print_report(&info);
|
||||
|
||||
end_report(&flags, ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* kasan_report() is the only reporting function that uses
|
||||
* user_access_save/restore(): kasan_report_invalid_free() cannot be called
|
||||
* from a UACCESS region, and kasan_report_async() is not used on x86.
|
||||
*/
|
||||
bool kasan_report(unsigned long addr, size_t size, bool is_write,
|
||||
unsigned long ip)
|
||||
{
|
||||
bool ret = true;
|
||||
void *ptr = (void *)addr;
|
||||
unsigned long ua_flags = user_access_save();
|
||||
unsigned long irq_flags;
|
||||
struct kasan_report_info info;
|
||||
|
||||
if (unlikely(report_suppressed()) || unlikely(!report_enabled())) {
|
||||
ret = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
start_report(&irq_flags, true);
|
||||
|
||||
info.type = KASAN_REPORT_ACCESS;
|
||||
info.access_addr = ptr;
|
||||
info.first_bad_addr = kasan_find_first_bad_addr(ptr, size);
|
||||
info.access_size = size;
|
||||
info.is_write = is_write;
|
||||
info.ip = ip;
|
||||
|
||||
print_report(&info);
|
||||
|
||||
end_report(&irq_flags, ptr);
|
||||
|
||||
out:
|
||||
user_access_restore(ua_flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
@ -381,82 +503,22 @@ void kasan_report_async(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
#if IS_ENABLED(CONFIG_KUNIT)
|
||||
if (current->kunit_test)
|
||||
kasan_update_kunit_status(current->kunit_test);
|
||||
#endif /* IS_ENABLED(CONFIG_KUNIT) */
|
||||
/*
|
||||
* Do not check report_suppressed(), as kasan_disable/enable_current()
|
||||
* critical sections do not affect Hardware Tag-Based KASAN.
|
||||
*/
|
||||
if (unlikely(!report_enabled()))
|
||||
return;
|
||||
|
||||
start_report(&flags);
|
||||
start_report(&flags, false);
|
||||
pr_err("BUG: KASAN: invalid-access\n");
|
||||
pr_err("Asynchronous mode enabled: no access details available\n");
|
||||
pr_err("Asynchronous fault: no details available\n");
|
||||
pr_err("\n");
|
||||
dump_stack_lvl(KERN_ERR);
|
||||
end_report(&flags, 0);
|
||||
end_report(&flags, NULL);
|
||||
}
|
||||
#endif /* CONFIG_KASAN_HW_TAGS */
|
||||
|
||||
static void __kasan_report(unsigned long addr, size_t size, bool is_write,
|
||||
unsigned long ip)
|
||||
{
|
||||
struct kasan_access_info info;
|
||||
void *tagged_addr;
|
||||
void *untagged_addr;
|
||||
unsigned long flags;
|
||||
|
||||
#if IS_ENABLED(CONFIG_KUNIT)
|
||||
if (current->kunit_test)
|
||||
kasan_update_kunit_status(current->kunit_test);
|
||||
#endif /* IS_ENABLED(CONFIG_KUNIT) */
|
||||
|
||||
disable_trace_on_warning();
|
||||
|
||||
tagged_addr = (void *)addr;
|
||||
untagged_addr = kasan_reset_tag(tagged_addr);
|
||||
|
||||
info.access_addr = tagged_addr;
|
||||
if (addr_has_metadata(untagged_addr))
|
||||
info.first_bad_addr =
|
||||
kasan_find_first_bad_addr(tagged_addr, size);
|
||||
else
|
||||
info.first_bad_addr = untagged_addr;
|
||||
info.access_size = size;
|
||||
info.is_write = is_write;
|
||||
info.ip = ip;
|
||||
|
||||
start_report(&flags);
|
||||
|
||||
print_error_description(&info);
|
||||
if (addr_has_metadata(untagged_addr))
|
||||
kasan_print_tags(get_tag(tagged_addr), info.first_bad_addr);
|
||||
pr_err("\n");
|
||||
|
||||
if (addr_has_metadata(untagged_addr)) {
|
||||
print_address_description(untagged_addr, get_tag(tagged_addr));
|
||||
pr_err("\n");
|
||||
print_memory_metadata(info.first_bad_addr);
|
||||
} else {
|
||||
dump_stack_lvl(KERN_ERR);
|
||||
}
|
||||
|
||||
end_report(&flags, addr);
|
||||
}
|
||||
|
||||
bool kasan_report(unsigned long addr, size_t size, bool is_write,
|
||||
unsigned long ip)
|
||||
{
|
||||
unsigned long flags = user_access_save();
|
||||
bool ret = false;
|
||||
|
||||
if (likely(report_enabled())) {
|
||||
__kasan_report(addr, size, is_write, ip);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
user_access_restore(flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
/*
|
||||
* With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
|
||||
|
@ -34,12 +34,16 @@ void *kasan_find_first_bad_addr(void *addr, size_t size)
|
||||
{
|
||||
void *p = addr;
|
||||
|
||||
if (!addr_has_metadata(p))
|
||||
return p;
|
||||
|
||||
while (p < addr + size && !(*(u8 *)kasan_mem_to_shadow(p)))
|
||||
p += KASAN_GRANULE_SIZE;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static const char *get_shadow_bug_type(struct kasan_access_info *info)
|
||||
static const char *get_shadow_bug_type(struct kasan_report_info *info)
|
||||
{
|
||||
const char *bug_type = "unknown-crash";
|
||||
u8 *shadow_addr;
|
||||
@ -91,7 +95,7 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info)
|
||||
return bug_type;
|
||||
}
|
||||
|
||||
static const char *get_wild_bug_type(struct kasan_access_info *info)
|
||||
static const char *get_wild_bug_type(struct kasan_report_info *info)
|
||||
{
|
||||
const char *bug_type = "unknown-crash";
|
||||
|
||||
@ -105,7 +109,7 @@ static const char *get_wild_bug_type(struct kasan_access_info *info)
|
||||
return bug_type;
|
||||
}
|
||||
|
||||
const char *kasan_get_bug_type(struct kasan_access_info *info)
|
||||
const char *kasan_get_bug_type(struct kasan_report_info *info)
|
||||
{
|
||||
/*
|
||||
* If access_size is a negative number, then it has reason to be
|
||||
@ -180,7 +184,7 @@ static void print_decoded_frame_descr(const char *frame_descr)
|
||||
return;
|
||||
|
||||
pr_err("\n");
|
||||
pr_err("this frame has %lu %s:\n", num_objects,
|
||||
pr_err("This frame has %lu %s:\n", num_objects,
|
||||
num_objects == 1 ? "object" : "objects");
|
||||
|
||||
while (num_objects--) {
|
||||
@ -211,6 +215,7 @@ static void print_decoded_frame_descr(const char *frame_descr)
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns true only if the address is on the current task's stack. */
|
||||
static bool __must_check get_address_stack_frame_info(const void *addr,
|
||||
unsigned long *offset,
|
||||
const char **frame_descr,
|
||||
@ -224,13 +229,6 @@ static bool __must_check get_address_stack_frame_info(const void *addr,
|
||||
|
||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_STACK_GROWSUP));
|
||||
|
||||
/*
|
||||
* NOTE: We currently only support printing frame information for
|
||||
* accesses to the task's own stack.
|
||||
*/
|
||||
if (!object_is_on_stack(addr))
|
||||
return false;
|
||||
|
||||
aligned_addr = round_down((unsigned long)addr, sizeof(long));
|
||||
mem_ptr = round_down(aligned_addr, KASAN_GRANULE_SIZE);
|
||||
shadow_ptr = kasan_mem_to_shadow((void *)aligned_addr);
|
||||
@ -269,17 +267,17 @@ void kasan_print_address_stack_frame(const void *addr)
|
||||
const char *frame_descr;
|
||||
const void *frame_pc;
|
||||
|
||||
if (WARN_ON(!object_is_on_stack(addr)))
|
||||
return;
|
||||
|
||||
pr_err("The buggy address belongs to stack of task %s/%d\n",
|
||||
current->comm, task_pid_nr(current));
|
||||
|
||||
if (!get_address_stack_frame_info(addr, &offset, &frame_descr,
|
||||
&frame_pc))
|
||||
return;
|
||||
|
||||
/*
|
||||
* get_address_stack_frame_info only returns true if the given addr is
|
||||
* on the current task's stack.
|
||||
*/
|
||||
pr_err("\n");
|
||||
pr_err("addr %px is located in stack of task %s/%d at offset %lu in frame:\n",
|
||||
addr, current->comm, task_pid_nr(current), offset);
|
||||
pr_err(" and is located at offset %lu in frame:\n", offset);
|
||||
pr_err(" %pS\n", frame_pc);
|
||||
|
||||
if (!frame_descr)
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
void *kasan_find_first_bad_addr(void *addr, size_t size)
|
||||
{
|
||||
/* Return the same value regardless of whether addr_has_metadata(). */
|
||||
return kasan_reset_tag(addr);
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/stackdepot.h>
|
||||
#include <linux/stacktrace.h>
|
||||
@ -35,8 +36,12 @@ void *kasan_find_first_bad_addr(void *addr, size_t size)
|
||||
void *p = kasan_reset_tag(addr);
|
||||
void *end = p + size;
|
||||
|
||||
if (!addr_has_metadata(p))
|
||||
return p;
|
||||
|
||||
while (p < end && tag == *(u8 *)kasan_mem_to_shadow(p))
|
||||
p += KASAN_GRANULE_SIZE;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
@ -51,3 +56,14 @@ void kasan_print_tags(u8 addr_tag, const void *addr)
|
||||
|
||||
pr_err("Pointer tag: [%02x], memory tag: [%02x]\n", addr_tag, *shadow);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_STACK
|
||||
void kasan_print_address_stack_frame(const void *addr)
|
||||
{
|
||||
if (WARN_ON(!object_is_on_stack(addr)))
|
||||
return;
|
||||
|
||||
pr_err("The buggy address belongs to stack of task %s/%d\n",
|
||||
current->comm, task_pid_nr(current));
|
||||
}
|
||||
#endif
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "kasan.h"
|
||||
#include "../slab.h"
|
||||
|
||||
const char *kasan_get_bug_type(struct kasan_access_info *info)
|
||||
const char *kasan_get_bug_type(struct kasan_report_info *info)
|
||||
{
|
||||
#ifdef CONFIG_KASAN_TAGS_IDENTIFY
|
||||
struct kasan_alloc_meta *alloc_meta;
|
||||
|
@ -345,27 +345,6 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Poison the shadow for a vmalloc region. Called as part of the
|
||||
* freeing process at the time the region is freed.
|
||||
*/
|
||||
void kasan_poison_vmalloc(const void *start, unsigned long size)
|
||||
{
|
||||
if (!is_vmalloc_or_module_addr(start))
|
||||
return;
|
||||
|
||||
size = round_up(size, KASAN_GRANULE_SIZE);
|
||||
kasan_poison(start, size, KASAN_VMALLOC_INVALID, false);
|
||||
}
|
||||
|
||||
void kasan_unpoison_vmalloc(const void *start, unsigned long size)
|
||||
{
|
||||
if (!is_vmalloc_or_module_addr(start))
|
||||
return;
|
||||
|
||||
kasan_unpoison(start, size, false);
|
||||
}
|
||||
|
||||
static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
|
||||
void *unused)
|
||||
{
|
||||
@ -496,9 +475,48 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
|
||||
}
|
||||
}
|
||||
|
||||
void *__kasan_unpoison_vmalloc(const void *start, unsigned long size,
|
||||
kasan_vmalloc_flags_t flags)
|
||||
{
|
||||
/*
|
||||
* Software KASAN modes unpoison both VM_ALLOC and non-VM_ALLOC
|
||||
* mappings, so the KASAN_VMALLOC_VM_ALLOC flag is ignored.
|
||||
* Software KASAN modes can't optimize zeroing memory by combining it
|
||||
* with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored.
|
||||
*/
|
||||
|
||||
if (!is_vmalloc_or_module_addr(start))
|
||||
return (void *)start;
|
||||
|
||||
/*
|
||||
* Don't tag executable memory with the tag-based mode.
|
||||
* The kernel doesn't tolerate having the PC register tagged.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) &&
|
||||
!(flags & KASAN_VMALLOC_PROT_NORMAL))
|
||||
return (void *)start;
|
||||
|
||||
start = set_tag(start, kasan_random_tag());
|
||||
kasan_unpoison(start, size, false);
|
||||
return (void *)start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Poison the shadow for a vmalloc region. Called as part of the
|
||||
* freeing process at the time the region is freed.
|
||||
*/
|
||||
void __kasan_poison_vmalloc(const void *start, unsigned long size)
|
||||
{
|
||||
if (!is_vmalloc_or_module_addr(start))
|
||||
return;
|
||||
|
||||
size = round_up(size, KASAN_GRANULE_SIZE);
|
||||
kasan_poison(start, size, KASAN_VMALLOC_INVALID, false);
|
||||
}
|
||||
|
||||
#else /* CONFIG_KASAN_VMALLOC */
|
||||
|
||||
int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask)
|
||||
int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask)
|
||||
{
|
||||
void *ret;
|
||||
size_t scaled_size;
|
||||
@ -534,7 +552,7 @@ int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void kasan_free_shadow(const struct vm_struct *vm)
|
||||
void kasan_free_module_shadow(const struct vm_struct *vm)
|
||||
{
|
||||
if (vm->flags & VM_KASAN)
|
||||
vfree(kasan_mem_to_shadow(vm->addr));
|
||||
|
@ -46,7 +46,6 @@ enum scan_result {
|
||||
SCAN_VMA_NULL,
|
||||
SCAN_VMA_CHECK,
|
||||
SCAN_ADDRESS_RANGE,
|
||||
SCAN_SWAP_CACHE_PAGE,
|
||||
SCAN_DEL_PAGE_LRU,
|
||||
SCAN_ALLOC_HUGE_PAGE_FAIL,
|
||||
SCAN_CGROUP_CHARGE_FAIL,
|
||||
@ -683,16 +682,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
||||
result = SCAN_PAGE_COUNT;
|
||||
goto out;
|
||||
}
|
||||
if (!pte_write(pteval) && PageSwapCache(page) &&
|
||||
!reuse_swap_page(page)) {
|
||||
/*
|
||||
* Page is in the swap cache and cannot be re-used.
|
||||
* It cannot be collapsed into a THP.
|
||||
*/
|
||||
unlock_page(page);
|
||||
result = SCAN_SWAP_CACHE_PAGE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Isolate the page to avoid collapsing an hugepage
|
||||
|
39
mm/madvise.c
39
mm/madvise.c
@ -52,6 +52,7 @@ static int madvise_need_mmap_write(int behavior)
|
||||
case MADV_REMOVE:
|
||||
case MADV_WILLNEED:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_DONTNEED_LOCKED:
|
||||
case MADV_COLD:
|
||||
case MADV_PAGEOUT:
|
||||
case MADV_FREE:
|
||||
@ -504,7 +505,7 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
|
||||
|
||||
static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
|
||||
return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB));
|
||||
}
|
||||
|
||||
static long madvise_cold(struct vm_area_struct *vma,
|
||||
@ -777,6 +778,29 @@ static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long *end,
|
||||
int behavior)
|
||||
{
|
||||
if (!is_vm_hugetlb_page(vma)) {
|
||||
unsigned int forbidden = VM_PFNMAP;
|
||||
|
||||
if (behavior != MADV_DONTNEED_LOCKED)
|
||||
forbidden |= VM_LOCKED;
|
||||
|
||||
return !(vma->vm_flags & forbidden);
|
||||
}
|
||||
|
||||
if (behavior != MADV_DONTNEED && behavior != MADV_DONTNEED_LOCKED)
|
||||
return false;
|
||||
if (start & ~huge_page_mask(hstate_vma(vma)))
|
||||
return false;
|
||||
|
||||
*end = ALIGN(*end, huge_page_size(hstate_vma(vma)));
|
||||
return true;
|
||||
}
|
||||
|
||||
static long madvise_dontneed_free(struct vm_area_struct *vma,
|
||||
struct vm_area_struct **prev,
|
||||
unsigned long start, unsigned long end,
|
||||
@ -785,7 +809,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
*prev = vma;
|
||||
if (!can_madv_lru_vma(vma))
|
||||
if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior))
|
||||
return -EINVAL;
|
||||
|
||||
if (!userfaultfd_remove(vma, start, end)) {
|
||||
@ -807,7 +831,12 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
||||
*/
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!can_madv_lru_vma(vma))
|
||||
/*
|
||||
* Potential end adjustment for hugetlb vma is OK as
|
||||
* the check below keeps end within vma.
|
||||
*/
|
||||
if (!madvise_dontneed_free_valid_vma(vma, start, &end,
|
||||
behavior))
|
||||
return -EINVAL;
|
||||
if (end > vma->vm_end) {
|
||||
/*
|
||||
@ -827,7 +856,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
|
||||
VM_WARN_ON(start >= end);
|
||||
}
|
||||
|
||||
if (behavior == MADV_DONTNEED)
|
||||
if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED)
|
||||
return madvise_dontneed_single_vma(vma, start, end);
|
||||
else if (behavior == MADV_FREE)
|
||||
return madvise_free_single_vma(vma, start, end);
|
||||
@ -966,6 +995,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
|
||||
return madvise_pageout(vma, prev, start, end);
|
||||
case MADV_FREE:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_DONTNEED_LOCKED:
|
||||
return madvise_dontneed_free(vma, prev, start, end, behavior);
|
||||
case MADV_POPULATE_READ:
|
||||
case MADV_POPULATE_WRITE:
|
||||
@ -1096,6 +1126,7 @@ madvise_behavior_valid(int behavior)
|
||||
case MADV_REMOVE:
|
||||
case MADV_WILLNEED:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_DONTNEED_LOCKED:
|
||||
case MADV_FREE:
|
||||
case MADV_COLD:
|
||||
case MADV_PAGEOUT:
|
||||
|
127
mm/memory.c
127
mm/memory.c
@ -3287,19 +3287,35 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
|
||||
if (PageAnon(vmf->page)) {
|
||||
struct page *page = vmf->page;
|
||||
|
||||
/* PageKsm() doesn't necessarily raise the page refcount */
|
||||
if (PageKsm(page) || page_count(page) != 1)
|
||||
/*
|
||||
* We have to verify under page lock: these early checks are
|
||||
* just an optimization to avoid locking the page and freeing
|
||||
* the swapcache if there is little hope that we can reuse.
|
||||
*
|
||||
* PageKsm() doesn't necessarily raise the page refcount.
|
||||
*/
|
||||
if (PageKsm(page) || page_count(page) > 3)
|
||||
goto copy;
|
||||
if (!PageLRU(page))
|
||||
/*
|
||||
* Note: We cannot easily detect+handle references from
|
||||
* remote LRU pagevecs or references to PageLRU() pages.
|
||||
*/
|
||||
lru_add_drain();
|
||||
if (page_count(page) > 1 + PageSwapCache(page))
|
||||
goto copy;
|
||||
if (!trylock_page(page))
|
||||
goto copy;
|
||||
if (PageKsm(page) || page_mapcount(page) != 1 || page_count(page) != 1) {
|
||||
if (PageSwapCache(page))
|
||||
try_to_free_swap(page);
|
||||
if (PageKsm(page) || page_count(page) != 1) {
|
||||
unlock_page(page);
|
||||
goto copy;
|
||||
}
|
||||
/*
|
||||
* Ok, we've got the only map reference, and the only
|
||||
* page count reference, and the page is locked,
|
||||
* it's dark out, and we're wearing sunglasses. Hit it.
|
||||
* Ok, we've got the only page reference from our mapping
|
||||
* and the page is locked, it's dark out, and we're wearing
|
||||
* sunglasses. Hit it.
|
||||
*/
|
||||
unlock_page(page);
|
||||
wp_page_reuse(vmf);
|
||||
@ -3372,11 +3388,11 @@ void unmap_mapping_folio(struct folio *folio)
|
||||
details.even_cows = false;
|
||||
details.single_folio = folio;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
i_mmap_lock_read(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
|
||||
last_index, &details);
|
||||
i_mmap_unlock_write(mapping);
|
||||
i_mmap_unlock_read(mapping);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3402,11 +3418,11 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
|
||||
if (last_index < first_index)
|
||||
last_index = ULONG_MAX;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
i_mmap_lock_read(mapping);
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, first_index,
|
||||
last_index, &details);
|
||||
i_mmap_unlock_write(mapping);
|
||||
i_mmap_unlock_read(mapping);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unmap_mapping_pages);
|
||||
|
||||
@ -3473,6 +3489,25 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool should_try_to_free_swap(struct page *page,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned int fault_flags)
|
||||
{
|
||||
if (!PageSwapCache(page))
|
||||
return false;
|
||||
if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) ||
|
||||
PageMlocked(page))
|
||||
return true;
|
||||
/*
|
||||
* If we want to map a page that's in the swapcache writable, we
|
||||
* have to detect via the refcount if we're really the exclusive
|
||||
* user. Try freeing the swapcache to get rid of the swapcache
|
||||
* reference only in case it's likely that we'll be the exlusive user.
|
||||
*/
|
||||
return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
|
||||
page_count(page) == 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* We enter with non-exclusive mmap_lock (to exclude vma changes,
|
||||
* but allow concurrent faults), and pte mapped but not yet locked.
|
||||
@ -3591,21 +3626,39 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure try_to_free_swap or reuse_swap_page or swapoff did not
|
||||
* release the swapcache from under us. The page pin, and pte_same
|
||||
* test below, are not enough to exclude that. Even if it is still
|
||||
* swapcache, we need to check that the page's swap has not changed.
|
||||
*/
|
||||
if (unlikely((!PageSwapCache(page) ||
|
||||
page_private(page) != entry.val)) && swapcache)
|
||||
goto out_page;
|
||||
if (swapcache) {
|
||||
/*
|
||||
* Make sure try_to_free_swap or swapoff did not release the
|
||||
* swapcache from under us. The page pin, and pte_same test
|
||||
* below, are not enough to exclude that. Even if it is still
|
||||
* swapcache, we need to check that the page's swap has not
|
||||
* changed.
|
||||
*/
|
||||
if (unlikely(!PageSwapCache(page) ||
|
||||
page_private(page) != entry.val))
|
||||
goto out_page;
|
||||
|
||||
page = ksm_might_need_to_copy(page, vma, vmf->address);
|
||||
if (unlikely(!page)) {
|
||||
ret = VM_FAULT_OOM;
|
||||
page = swapcache;
|
||||
goto out_page;
|
||||
/*
|
||||
* KSM sometimes has to copy on read faults, for example, if
|
||||
* page->index of !PageKSM() pages would be nonlinear inside the
|
||||
* anon VMA -- PageKSM() is lost on actual swapout.
|
||||
*/
|
||||
page = ksm_might_need_to_copy(page, vma, vmf->address);
|
||||
if (unlikely(!page)) {
|
||||
ret = VM_FAULT_OOM;
|
||||
page = swapcache;
|
||||
goto out_page;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we want to map a page that's in the swapcache writable, we
|
||||
* have to detect via the refcount if we're really the exclusive
|
||||
* owner. Try removing the extra reference from the local LRU
|
||||
* pagevecs if required.
|
||||
*/
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache &&
|
||||
!PageKsm(page) && !PageLRU(page))
|
||||
lru_add_drain();
|
||||
}
|
||||
|
||||
cgroup_throttle_swaprate(page, GFP_KERNEL);
|
||||
@ -3624,19 +3677,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
}
|
||||
|
||||
/*
|
||||
* The page isn't present yet, go ahead with the fault.
|
||||
*
|
||||
* Be careful about the sequence of operations here.
|
||||
* To get its accounting right, reuse_swap_page() must be called
|
||||
* while the page is counted on swap but not yet in mapcount i.e.
|
||||
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
|
||||
* must be called after the swap_free(), or it will never succeed.
|
||||
* Remove the swap entry and conditionally try to free up the swapcache.
|
||||
* We're already holding a reference on the page but haven't mapped it
|
||||
* yet.
|
||||
*/
|
||||
swap_free(entry);
|
||||
if (should_try_to_free_swap(page, vma, vmf->flags))
|
||||
try_to_free_swap(page);
|
||||
|
||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
|
||||
pte = mk_pte(page, vma->vm_page_prot);
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
|
||||
|
||||
/*
|
||||
* Same logic as in do_wp_page(); however, optimize for fresh pages
|
||||
* that are certainly not shared because we just allocated them without
|
||||
* exposing them to the swapcache.
|
||||
*/
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
|
||||
(page != swapcache || page_count(page) == 1)) {
|
||||
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
|
||||
vmf->flags &= ~FAULT_FLAG_WRITE;
|
||||
ret |= VM_FAULT_WRITE;
|
||||
@ -3662,10 +3721,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
|
||||
arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
|
||||
|
||||
swap_free(entry);
|
||||
if (mem_cgroup_swap_full(page) ||
|
||||
(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
|
||||
try_to_free_swap(page);
|
||||
unlock_page(page);
|
||||
if (page != swapcache && swapcache) {
|
||||
/*
|
||||
|
@ -456,8 +456,6 @@ void free_zone_device_page(struct page *page)
|
||||
if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free))
|
||||
return;
|
||||
|
||||
__ClearPageWaiters(page);
|
||||
|
||||
mem_cgroup_uncharge(page_folio(page));
|
||||
|
||||
/*
|
||||
|
@ -53,7 +53,6 @@
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/migrate.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -249,6 +248,9 @@ static bool remove_migration_pte(struct folio *folio,
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
mlock_page_drain(smp_processor_id());
|
||||
|
||||
trace_remove_migration_pte(pvmw.address, pte_val(pte),
|
||||
compound_order(new));
|
||||
|
||||
/* No need to invalidate - it was non-present before */
|
||||
update_mmu_cache(vma, pvmw.address, pvmw.pte);
|
||||
}
|
||||
|
@ -2465,16 +2465,14 @@ static void folio_account_dirtied(struct folio *folio,
|
||||
*
|
||||
* Caller must hold lock_page_memcg().
|
||||
*/
|
||||
void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
|
||||
struct bdi_writeback *wb)
|
||||
void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
|
||||
{
|
||||
if (mapping_can_writeback(mapping)) {
|
||||
long nr = folio_nr_pages(folio);
|
||||
lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
|
||||
zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
|
||||
wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
|
||||
task_io_account_cancelled_write(nr * PAGE_SIZE);
|
||||
}
|
||||
long nr = folio_nr_pages(folio);
|
||||
|
||||
lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
|
||||
zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
|
||||
wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
|
||||
task_io_account_cancelled_write(nr * PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2683,7 +2681,7 @@ void __folio_cancel_dirty(struct folio *folio)
|
||||
wb = unlocked_inode_to_wb_begin(inode, &cookie);
|
||||
|
||||
if (folio_test_clear_dirty(folio))
|
||||
folio_account_cleaned(folio, mapping, wb);
|
||||
folio_account_cleaned(folio, wb);
|
||||
|
||||
unlocked_inode_to_wb_end(inode, &cookie);
|
||||
folio_memcg_unlock(folio);
|
||||
|
154
mm/page_alloc.c
154
mm/page_alloc.c
@ -378,25 +378,9 @@ int page_group_by_mobility_disabled __read_mostly;
|
||||
*/
|
||||
static DEFINE_STATIC_KEY_TRUE(deferred_pages);
|
||||
|
||||
/*
|
||||
* Calling kasan_poison_pages() only after deferred memory initialization
|
||||
* has completed. Poisoning pages during deferred memory init will greatly
|
||||
* lengthen the process and cause problem in large memory systems as the
|
||||
* deferred pages initialization is done with interrupt disabled.
|
||||
*
|
||||
* Assuming that there will be no reference to those newly initialized
|
||||
* pages before they are ever allocated, this should have no effect on
|
||||
* KASAN memory tracking as the poison will be properly inserted at page
|
||||
* allocation time. The only corner case is when pages are allocated by
|
||||
* on-demand allocation and then freed again before the deferred pages
|
||||
* initialization is done, but this is not likely to happen.
|
||||
*/
|
||||
static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
|
||||
static inline bool deferred_pages_enabled(void)
|
||||
{
|
||||
return static_branch_unlikely(&deferred_pages) ||
|
||||
(!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
|
||||
(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
|
||||
PageSkipKASanPoison(page);
|
||||
return static_branch_unlikely(&deferred_pages);
|
||||
}
|
||||
|
||||
/* Returns true if the struct page for the pfn is uninitialised */
|
||||
@ -447,11 +431,9 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
|
||||
static inline bool deferred_pages_enabled(void)
|
||||
{
|
||||
return (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
|
||||
(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
|
||||
PageSkipKASanPoison(page);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool early_page_uninitialised(unsigned long pfn)
|
||||
@ -1267,16 +1249,39 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
|
||||
/*
|
||||
* Skip KASAN memory poisoning when either:
|
||||
*
|
||||
* 1. Deferred memory initialization has not yet completed,
|
||||
* see the explanation below.
|
||||
* 2. Skipping poisoning is requested via FPI_SKIP_KASAN_POISON,
|
||||
* see the comment next to it.
|
||||
* 3. Skipping poisoning is requested via __GFP_SKIP_KASAN_POISON,
|
||||
* see the comment next to it.
|
||||
*
|
||||
* Poisoning pages during deferred memory init will greatly lengthen the
|
||||
* process and cause problem in large memory systems as the deferred pages
|
||||
* initialization is done with interrupt disabled.
|
||||
*
|
||||
* Assuming that there will be no reference to those newly initialized
|
||||
* pages before they are ever allocated, this should have no effect on
|
||||
* KASAN memory tracking as the poison will be properly inserted at page
|
||||
* allocation time. The only corner case is when pages are allocated by
|
||||
* on-demand allocation and then freed again before the deferred pages
|
||||
* initialization is done, but this is not likely to happen.
|
||||
*/
|
||||
static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
|
||||
{
|
||||
return deferred_pages_enabled() ||
|
||||
(!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
|
||||
(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
|
||||
PageSkipKASanPoison(page);
|
||||
}
|
||||
|
||||
static void kernel_init_free_pages(struct page *page, int numpages)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (zero_tags) {
|
||||
for (i = 0; i < numpages; i++)
|
||||
tag_clear_highpage(page + i);
|
||||
return;
|
||||
}
|
||||
|
||||
/* s390's use of memset() could override KASAN redzones. */
|
||||
kasan_disable_current();
|
||||
for (i = 0; i < numpages; i++) {
|
||||
@ -1292,7 +1297,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
unsigned int order, bool check_free, fpi_t fpi_flags)
|
||||
{
|
||||
int bad = 0;
|
||||
bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags);
|
||||
bool init = want_init_on_free();
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
|
||||
@ -1359,23 +1364,21 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_free_pages and kernel_init_free_pages must be
|
||||
* KASAN poisoning and memory initialization code must be
|
||||
* kept together to avoid discrepancies in behavior.
|
||||
*
|
||||
* With hardware tag-based KASAN, memory tags must be set before the
|
||||
* page becomes unavailable via debug_pagealloc or arch_free_page.
|
||||
*/
|
||||
if (kasan_has_integrated_init()) {
|
||||
if (!skip_kasan_poison)
|
||||
kasan_free_pages(page, order);
|
||||
} else {
|
||||
bool init = want_init_on_free();
|
||||
if (!should_skip_kasan_poison(page, fpi_flags)) {
|
||||
kasan_poison_pages(page, order, init);
|
||||
|
||||
if (init)
|
||||
kernel_init_free_pages(page, 1 << order, false);
|
||||
if (!skip_kasan_poison)
|
||||
kasan_poison_pages(page, order, init);
|
||||
/* Memory is already initialized if KASAN did it internally. */
|
||||
if (kasan_has_integrated_init())
|
||||
init = false;
|
||||
}
|
||||
if (init)
|
||||
kernel_init_free_pages(page, 1 << order);
|
||||
|
||||
/*
|
||||
* arch_free_page() can make the page's contents inaccessible. s390
|
||||
@ -2340,9 +2343,43 @@ static inline bool check_new_pcp(struct page *page, unsigned int order)
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_VM */
|
||||
|
||||
static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags)
|
||||
{
|
||||
/* Don't skip if a software KASAN mode is enabled. */
|
||||
if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
|
||||
IS_ENABLED(CONFIG_KASAN_SW_TAGS))
|
||||
return false;
|
||||
|
||||
/* Skip, if hardware tag-based KASAN is not enabled. */
|
||||
if (!kasan_hw_tags_enabled())
|
||||
return true;
|
||||
|
||||
/*
|
||||
* With hardware tag-based KASAN enabled, skip if either:
|
||||
*
|
||||
* 1. Memory tags have already been cleared via tag_clear_highpage().
|
||||
* 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON.
|
||||
*/
|
||||
return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON);
|
||||
}
|
||||
|
||||
static inline bool should_skip_init(gfp_t flags)
|
||||
{
|
||||
/* Don't skip, if hardware tag-based KASAN is not enabled. */
|
||||
if (!kasan_hw_tags_enabled())
|
||||
return false;
|
||||
|
||||
/* For hardware tag-based KASAN, skip if requested. */
|
||||
return (flags & __GFP_SKIP_ZERO);
|
||||
}
|
||||
|
||||
inline void post_alloc_hook(struct page *page, unsigned int order,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
|
||||
!should_skip_init(gfp_flags);
|
||||
bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS);
|
||||
|
||||
set_page_private(page, 0);
|
||||
set_page_refcounted(page);
|
||||
|
||||
@ -2358,19 +2395,38 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_alloc_pages and kernel_init_free_pages must be
|
||||
* KASAN unpoisoning and memory initializion code must be
|
||||
* kept together to avoid discrepancies in behavior.
|
||||
*/
|
||||
if (kasan_has_integrated_init()) {
|
||||
kasan_alloc_pages(page, order, gfp_flags);
|
||||
} else {
|
||||
bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
|
||||
|
||||
kasan_unpoison_pages(page, order, init);
|
||||
if (init)
|
||||
kernel_init_free_pages(page, 1 << order,
|
||||
gfp_flags & __GFP_ZEROTAGS);
|
||||
/*
|
||||
* If memory tags should be zeroed (which happens only when memory
|
||||
* should be initialized as well).
|
||||
*/
|
||||
if (init_tags) {
|
||||
int i;
|
||||
|
||||
/* Initialize both memory and tags. */
|
||||
for (i = 0; i != 1 << order; ++i)
|
||||
tag_clear_highpage(page + i);
|
||||
|
||||
/* Note that memory is already initialized by the loop above. */
|
||||
init = false;
|
||||
}
|
||||
if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) {
|
||||
/* Unpoison shadow memory or set memory tags. */
|
||||
kasan_unpoison_pages(page, order, init);
|
||||
|
||||
/* Note that memory is already initialized by KASAN. */
|
||||
if (kasan_has_integrated_init())
|
||||
init = false;
|
||||
}
|
||||
/* If memory is still not initialized, do it now. */
|
||||
if (init)
|
||||
kernel_init_free_pages(page, 1 << order);
|
||||
/* Propagate __GFP_SKIP_KASAN_POISON to page flags. */
|
||||
if (kasan_hw_tags_enabled() && (gfp_flags & __GFP_SKIP_KASAN_POISON))
|
||||
SetPageSkipKASanPoison(page);
|
||||
|
||||
set_page_owner(page, order, gfp_flags);
|
||||
page_table_check_alloc(page, order);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/stackdepot.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -28,7 +29,9 @@ struct page_owner {
|
||||
depot_stack_handle_t free_handle;
|
||||
u64 ts_nsec;
|
||||
u64 free_ts_nsec;
|
||||
char comm[TASK_COMM_LEN];
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
};
|
||||
|
||||
static bool page_owner_enabled = false;
|
||||
@ -163,7 +166,10 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
|
||||
page_owner->gfp_mask = gfp_mask;
|
||||
page_owner->last_migrate_reason = -1;
|
||||
page_owner->pid = current->pid;
|
||||
page_owner->tgid = current->tgid;
|
||||
page_owner->ts_nsec = local_clock();
|
||||
strlcpy(page_owner->comm, current->comm,
|
||||
sizeof(page_owner->comm));
|
||||
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
|
||||
__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
|
||||
|
||||
@ -229,8 +235,10 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
|
||||
old_page_owner->last_migrate_reason;
|
||||
new_page_owner->handle = old_page_owner->handle;
|
||||
new_page_owner->pid = old_page_owner->pid;
|
||||
new_page_owner->tgid = old_page_owner->tgid;
|
||||
new_page_owner->ts_nsec = old_page_owner->ts_nsec;
|
||||
new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
|
||||
strcpy(new_page_owner->comm, old_page_owner->comm);
|
||||
|
||||
/*
|
||||
* We don't clear the bit on the old folio as it's going to be freed
|
||||
@ -325,6 +333,45 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
/*
|
||||
* Looking for memcg information and print it out
|
||||
*/
|
||||
static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
|
||||
struct page *page)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG
|
||||
unsigned long memcg_data;
|
||||
struct mem_cgroup *memcg;
|
||||
bool online;
|
||||
char name[80];
|
||||
|
||||
rcu_read_lock();
|
||||
memcg_data = READ_ONCE(page->memcg_data);
|
||||
if (!memcg_data)
|
||||
goto out_unlock;
|
||||
|
||||
if (memcg_data & MEMCG_DATA_OBJCGS)
|
||||
ret += scnprintf(kbuf + ret, count - ret,
|
||||
"Slab cache page\n");
|
||||
|
||||
memcg = page_memcg_check(page);
|
||||
if (!memcg)
|
||||
goto out_unlock;
|
||||
|
||||
online = (memcg->css.flags & CSS_ONLINE);
|
||||
cgroup_name(memcg->css.cgroup, name, sizeof(name));
|
||||
ret += scnprintf(kbuf + ret, count - ret,
|
||||
"Charged %sto %smemcg %s\n",
|
||||
PageMemcgKmem(page) ? "(via objcg) " : "",
|
||||
online ? "" : "offline ",
|
||||
name);
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
print_page_owner(char __user *buf, size_t count, unsigned long pfn,
|
||||
struct page *page, struct page_owner *page_owner,
|
||||
@ -338,19 +385,17 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
|
||||
if (!kbuf)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = snprintf(kbuf, count,
|
||||
"Page allocated via order %u, mask %#x(%pGg), pid %d, ts %llu ns, free_ts %llu ns\n",
|
||||
ret = scnprintf(kbuf, count,
|
||||
"Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns, free_ts %llu ns\n",
|
||||
page_owner->order, page_owner->gfp_mask,
|
||||
&page_owner->gfp_mask, page_owner->pid,
|
||||
page_owner->tgid, page_owner->comm,
|
||||
page_owner->ts_nsec, page_owner->free_ts_nsec);
|
||||
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
|
||||
/* Print information relevant to grouping pages by mobility */
|
||||
pageblock_mt = get_pageblock_migratetype(page);
|
||||
page_mt = gfp_migratetype(page_owner->gfp_mask);
|
||||
ret += snprintf(kbuf + ret, count - ret,
|
||||
ret += scnprintf(kbuf + ret, count - ret,
|
||||
"PFN %lu type %s Block %lu type %s Flags %pGp\n",
|
||||
pfn,
|
||||
migratetype_names[page_mt],
|
||||
@ -358,21 +403,18 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
|
||||
migratetype_names[pageblock_mt],
|
||||
&page->flags);
|
||||
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
|
||||
ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
|
||||
if (page_owner->last_migrate_reason != -1) {
|
||||
ret += snprintf(kbuf + ret, count - ret,
|
||||
ret += scnprintf(kbuf + ret, count - ret,
|
||||
"Page has been migrated, last migrate reason: %s\n",
|
||||
migrate_reason_names[page_owner->last_migrate_reason]);
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = print_page_owner_memcg(kbuf, count, ret, page);
|
||||
|
||||
ret += snprintf(kbuf + ret, count - ret, "\n");
|
||||
if (ret >= count)
|
||||
goto err;
|
||||
@ -415,9 +457,10 @@ void __dump_page_owner(const struct page *page)
|
||||
else
|
||||
pr_alert("page_owner tracks the page as freed\n");
|
||||
|
||||
pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu, free_ts %llu\n",
|
||||
pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n",
|
||||
page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
|
||||
page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
|
||||
page_owner->pid, page_owner->tgid, page_owner->comm,
|
||||
page_owner->ts_nsec, page_owner->free_ts_nsec);
|
||||
|
||||
handle = READ_ONCE(page_owner->handle);
|
||||
if (!handle)
|
||||
|
62
mm/rmap.c
62
mm/rmap.c
@ -76,7 +76,9 @@
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/tlb.h>
|
||||
#include <trace/events/migrate.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -1236,14 +1238,14 @@ void page_add_new_anon_rmap(struct page *page,
|
||||
void page_add_file_rmap(struct page *page,
|
||||
struct vm_area_struct *vma, bool compound)
|
||||
{
|
||||
int i, nr = 1;
|
||||
int i, nr = 0;
|
||||
|
||||
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
|
||||
lock_page_memcg(page);
|
||||
if (compound && PageTransHuge(page)) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
for (i = 0, nr = 0; i < nr_pages; i++) {
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if (atomic_inc_and_test(&page[i]._mapcount))
|
||||
nr++;
|
||||
}
|
||||
@ -1271,11 +1273,12 @@ void page_add_file_rmap(struct page *page,
|
||||
VM_WARN_ON_ONCE(!PageLocked(page));
|
||||
SetPageDoubleMap(compound_head(page));
|
||||
}
|
||||
if (!atomic_inc_and_test(&page->_mapcount))
|
||||
goto out;
|
||||
if (atomic_inc_and_test(&page->_mapcount))
|
||||
nr++;
|
||||
}
|
||||
__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
|
||||
out:
|
||||
if (nr)
|
||||
__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
|
||||
unlock_page_memcg(page);
|
||||
|
||||
mlock_vma_page(page, vma, compound);
|
||||
@ -1283,7 +1286,7 @@ void page_add_file_rmap(struct page *page,
|
||||
|
||||
static void page_remove_file_rmap(struct page *page, bool compound)
|
||||
{
|
||||
int i, nr = 1;
|
||||
int i, nr = 0;
|
||||
|
||||
VM_BUG_ON_PAGE(compound && !PageHead(page), page);
|
||||
|
||||
@ -1298,12 +1301,12 @@ static void page_remove_file_rmap(struct page *page, bool compound)
|
||||
if (compound && PageTransHuge(page)) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
for (i = 0, nr = 0; i < nr_pages; i++) {
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if (atomic_add_negative(-1, &page[i]._mapcount))
|
||||
nr++;
|
||||
}
|
||||
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
|
||||
return;
|
||||
goto out;
|
||||
if (PageSwapBacked(page))
|
||||
__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
|
||||
-nr_pages);
|
||||
@ -1311,16 +1314,12 @@ static void page_remove_file_rmap(struct page *page, bool compound)
|
||||
__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
|
||||
-nr_pages);
|
||||
} else {
|
||||
if (!atomic_add_negative(-1, &page->_mapcount))
|
||||
return;
|
||||
if (atomic_add_negative(-1, &page->_mapcount))
|
||||
nr++;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the irq-unsafe __{inc|mod}_lruvec_page_state because
|
||||
* these counters are not modified in interrupt context, and
|
||||
* pte lock(a spinlock) is held, which implies preemption disabled.
|
||||
*/
|
||||
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
|
||||
out:
|
||||
if (nr)
|
||||
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
|
||||
}
|
||||
|
||||
static void page_remove_anon_compound_rmap(struct page *page)
|
||||
@ -1589,7 +1588,30 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
|
||||
|
||||
/* MADV_FREE page check */
|
||||
if (!folio_test_swapbacked(folio)) {
|
||||
if (!folio_test_dirty(folio)) {
|
||||
int ref_count, map_count;
|
||||
|
||||
/*
|
||||
* Synchronize with gup_pte_range():
|
||||
* - clear PTE; barrier; read refcount
|
||||
* - inc refcount; barrier; read PTE
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
ref_count = folio_ref_count(folio);
|
||||
map_count = folio_mapcount(folio);
|
||||
|
||||
/*
|
||||
* Order reads for page refcount and dirty flag
|
||||
* (see comments in __remove_mapping()).
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
/*
|
||||
* The only page refs must be one from isolation
|
||||
* plus the rmap(s) (dropped by discard:).
|
||||
*/
|
||||
if (ref_count == 1 + map_count &&
|
||||
!folio_test_dirty(folio)) {
|
||||
/* Invalidate as we cleared the pte */
|
||||
mmu_notifier_invalidate_range(mm,
|
||||
address, address + PAGE_SIZE);
|
||||
@ -1852,6 +1874,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
|
||||
if (pte_swp_uffd_wp(pteval))
|
||||
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
||||
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
|
||||
trace_set_migration_pte(pvmw.address, pte_val(swp_pte),
|
||||
compound_order(&folio->page));
|
||||
/*
|
||||
* No need to invalidate here it will synchronize on
|
||||
* against the special swap migration pte.
|
||||
@ -1920,6 +1944,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
|
||||
if (pte_uffd_wp(pteval))
|
||||
swp_pte = pte_swp_mkuffd_wp(swp_pte);
|
||||
set_pte_at(mm, address, pvmw.pte, swp_pte);
|
||||
trace_set_migration_pte(address, pte_val(swp_pte),
|
||||
compound_order(&folio->page));
|
||||
/*
|
||||
* No need to invalidate here it will synchronize on
|
||||
* against the special swap migration pte.
|
||||
|
@ -97,7 +97,6 @@ static void __page_cache_release(struct page *page)
|
||||
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||
count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
|
||||
}
|
||||
__ClearPageWaiters(page);
|
||||
}
|
||||
|
||||
static void __put_single_page(struct page *page)
|
||||
@ -152,7 +151,6 @@ void put_pages_list(struct list_head *pages)
|
||||
continue;
|
||||
}
|
||||
/* Cannot be PageLRU because it's passed to us using the lru */
|
||||
__ClearPageWaiters(page);
|
||||
}
|
||||
|
||||
free_unref_page_list(pages);
|
||||
@ -971,8 +969,6 @@ void release_pages(struct page **pages, int nr)
|
||||
count_vm_event(UNEVICTABLE_PGCLEARED);
|
||||
}
|
||||
|
||||
__ClearPageWaiters(page);
|
||||
|
||||
list_add(&page->lru, &pages_to_free);
|
||||
}
|
||||
if (lruvec)
|
||||
|
104
mm/swapfile.c
104
mm/swapfile.c
@ -1167,16 +1167,6 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
|
||||
p = _swap_info_get(entry);
|
||||
if (p)
|
||||
spin_lock(&p->lock);
|
||||
return p;
|
||||
}
|
||||
|
||||
static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
|
||||
struct swap_info_struct *q)
|
||||
{
|
||||
@ -1601,100 +1591,6 @@ static bool page_swapped(struct page *page)
|
||||
return false;
|
||||
}
|
||||
|
||||
static int page_trans_huge_map_swapcount(struct page *page,
|
||||
int *total_swapcount)
|
||||
{
|
||||
int i, map_swapcount, _total_swapcount;
|
||||
unsigned long offset = 0;
|
||||
struct swap_info_struct *si;
|
||||
struct swap_cluster_info *ci = NULL;
|
||||
unsigned char *map = NULL;
|
||||
int swapcount = 0;
|
||||
|
||||
/* hugetlbfs shouldn't call it */
|
||||
VM_BUG_ON_PAGE(PageHuge(page), page);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
|
||||
if (PageSwapCache(page))
|
||||
swapcount = page_swapcount(page);
|
||||
if (total_swapcount)
|
||||
*total_swapcount = swapcount;
|
||||
return swapcount + page_trans_huge_mapcount(page);
|
||||
}
|
||||
|
||||
page = compound_head(page);
|
||||
|
||||
_total_swapcount = map_swapcount = 0;
|
||||
if (PageSwapCache(page)) {
|
||||
swp_entry_t entry;
|
||||
|
||||
entry.val = page_private(page);
|
||||
si = _swap_info_get(entry);
|
||||
if (si) {
|
||||
map = si->swap_map;
|
||||
offset = swp_offset(entry);
|
||||
}
|
||||
}
|
||||
if (map)
|
||||
ci = lock_cluster(si, offset);
|
||||
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||
int mapcount = atomic_read(&page[i]._mapcount) + 1;
|
||||
if (map) {
|
||||
swapcount = swap_count(map[offset + i]);
|
||||
_total_swapcount += swapcount;
|
||||
}
|
||||
map_swapcount = max(map_swapcount, mapcount + swapcount);
|
||||
}
|
||||
unlock_cluster(ci);
|
||||
|
||||
if (PageDoubleMap(page))
|
||||
map_swapcount -= 1;
|
||||
|
||||
if (total_swapcount)
|
||||
*total_swapcount = _total_swapcount;
|
||||
|
||||
return map_swapcount + compound_mapcount(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can write to an anon page without COW if there are no other references
|
||||
* to it. And as a side-effect, free up its swap: because the old content
|
||||
* on disk will never be read, and seeking back there to write new content
|
||||
* later would only waste time away from clustering.
|
||||
*/
|
||||
bool reuse_swap_page(struct page *page)
|
||||
{
|
||||
int count, total_swapcount;
|
||||
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
if (unlikely(PageKsm(page)))
|
||||
return false;
|
||||
count = page_trans_huge_map_swapcount(page, &total_swapcount);
|
||||
if (count == 1 && PageSwapCache(page) &&
|
||||
(likely(!PageTransCompound(page)) ||
|
||||
/* The remaining swap count will be freed soon */
|
||||
total_swapcount == page_swapcount(page))) {
|
||||
if (!PageWriteback(page)) {
|
||||
page = compound_head(page);
|
||||
delete_from_swap_cache(page);
|
||||
SetPageDirty(page);
|
||||
} else {
|
||||
swp_entry_t entry;
|
||||
struct swap_info_struct *p;
|
||||
|
||||
entry.val = page_private(page);
|
||||
p = swap_info_get(entry);
|
||||
if (p->flags & SWP_STABLE_WRITES) {
|
||||
spin_unlock(&p->lock);
|
||||
return false;
|
||||
}
|
||||
spin_unlock(&p->lock);
|
||||
}
|
||||
}
|
||||
|
||||
return count <= 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If swap is getting full, or if there are no more mappings of this page,
|
||||
* then try_to_free_swap is called to free its swap space.
|
||||
|
99
mm/vmalloc.c
99
mm/vmalloc.c
@ -74,7 +74,7 @@ static const bool vmap_allow_huge = false;
|
||||
|
||||
bool is_vmalloc_addr(const void *x)
|
||||
{
|
||||
unsigned long addr = (unsigned long)x;
|
||||
unsigned long addr = (unsigned long)kasan_reset_tag(x);
|
||||
|
||||
return addr >= VMALLOC_START && addr < VMALLOC_END;
|
||||
}
|
||||
@ -631,7 +631,7 @@ int is_vmalloc_or_module_addr(const void *x)
|
||||
* just put it in the vmalloc space.
|
||||
*/
|
||||
#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
|
||||
unsigned long addr = (unsigned long)x;
|
||||
unsigned long addr = (unsigned long)kasan_reset_tag(x);
|
||||
if (addr >= MODULES_VADDR && addr < MODULES_END)
|
||||
return 1;
|
||||
#endif
|
||||
@ -795,6 +795,8 @@ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
|
||||
struct vmap_area *va = NULL;
|
||||
struct rb_node *n = vmap_area_root.rb_node;
|
||||
|
||||
addr = (unsigned long)kasan_reset_tag((void *)addr);
|
||||
|
||||
while (n) {
|
||||
struct vmap_area *tmp;
|
||||
|
||||
@ -816,6 +818,8 @@ static struct vmap_area *__find_vmap_area(unsigned long addr)
|
||||
{
|
||||
struct rb_node *n = vmap_area_root.rb_node;
|
||||
|
||||
addr = (unsigned long)kasan_reset_tag((void *)addr);
|
||||
|
||||
while (n) {
|
||||
struct vmap_area *va;
|
||||
|
||||
@ -2166,7 +2170,7 @@ EXPORT_SYMBOL_GPL(vm_unmap_aliases);
|
||||
void vm_unmap_ram(const void *mem, unsigned int count)
|
||||
{
|
||||
unsigned long size = (unsigned long)count << PAGE_SHIFT;
|
||||
unsigned long addr = (unsigned long)mem;
|
||||
unsigned long addr = (unsigned long)kasan_reset_tag(mem);
|
||||
struct vmap_area *va;
|
||||
|
||||
might_sleep();
|
||||
@ -2227,14 +2231,19 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node)
|
||||
mem = (void *)addr;
|
||||
}
|
||||
|
||||
kasan_unpoison_vmalloc(mem, size);
|
||||
|
||||
if (vmap_pages_range(addr, addr + size, PAGE_KERNEL,
|
||||
pages, PAGE_SHIFT) < 0) {
|
||||
vm_unmap_ram(mem, count);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the pages as accessible, now that they are mapped.
|
||||
* With hardware tag-based KASAN, marking is skipped for
|
||||
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
|
||||
*/
|
||||
mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL);
|
||||
|
||||
return mem;
|
||||
}
|
||||
EXPORT_SYMBOL(vm_map_ram);
|
||||
@ -2460,10 +2469,20 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
|
||||
|
||||
setup_vmalloc_vm(area, va, flags, caller);
|
||||
|
||||
/*
|
||||
* Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a
|
||||
* best-effort approach, as they can be mapped outside of vmalloc code.
|
||||
* For VM_ALLOC mappings, the pages are marked as accessible after
|
||||
* getting mapped in __vmalloc_node_range().
|
||||
* With hardware tag-based KASAN, marking is skipped for
|
||||
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
|
||||
*/
|
||||
if (!(flags & VM_ALLOC))
|
||||
area->addr = kasan_unpoison_vmalloc(area->addr, requested_size,
|
||||
KASAN_VMALLOC_PROT_NORMAL);
|
||||
|
||||
return area;
|
||||
}
|
||||
|
||||
@ -2547,7 +2566,7 @@ struct vm_struct *remove_vm_area(const void *addr)
|
||||
va->vm = NULL;
|
||||
spin_unlock(&vmap_area_lock);
|
||||
|
||||
kasan_free_shadow(vm);
|
||||
kasan_free_module_shadow(vm);
|
||||
free_unmap_vmap_area(va);
|
||||
|
||||
return vm;
|
||||
@ -3071,7 +3090,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
||||
const void *caller)
|
||||
{
|
||||
struct vm_struct *area;
|
||||
void *addr;
|
||||
void *ret;
|
||||
kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE;
|
||||
unsigned long real_size = size;
|
||||
unsigned long real_align = align;
|
||||
unsigned int shift = PAGE_SHIFT;
|
||||
@ -3124,10 +3144,50 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
|
||||
if (!addr)
|
||||
/*
|
||||
* Prepare arguments for __vmalloc_area_node() and
|
||||
* kasan_unpoison_vmalloc().
|
||||
*/
|
||||
if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) {
|
||||
if (kasan_hw_tags_enabled()) {
|
||||
/*
|
||||
* Modify protection bits to allow tagging.
|
||||
* This must be done before mapping.
|
||||
*/
|
||||
prot = arch_vmap_pgprot_tagged(prot);
|
||||
|
||||
/*
|
||||
* Skip page_alloc poisoning and zeroing for physical
|
||||
* pages backing VM_ALLOC mapping. Memory is instead
|
||||
* poisoned and zeroed by kasan_unpoison_vmalloc().
|
||||
*/
|
||||
gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO;
|
||||
}
|
||||
|
||||
/* Take note that the mapping is PAGE_KERNEL. */
|
||||
kasan_flags |= KASAN_VMALLOC_PROT_NORMAL;
|
||||
}
|
||||
|
||||
/* Allocate physical pages and map them into vmalloc space. */
|
||||
ret = __vmalloc_area_node(area, gfp_mask, prot, shift, node);
|
||||
if (!ret)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Mark the pages as accessible, now that they are mapped.
|
||||
* The init condition should match the one in post_alloc_hook()
|
||||
* (except for the should_skip_init() check) to make sure that memory
|
||||
* is initialized under the same conditions regardless of the enabled
|
||||
* KASAN mode.
|
||||
* Tag-based KASAN modes only assign tags to normal non-executable
|
||||
* allocations, see __kasan_unpoison_vmalloc().
|
||||
*/
|
||||
kasan_flags |= KASAN_VMALLOC_VM_ALLOC;
|
||||
if (!want_init_on_free() && want_init_on_alloc(gfp_mask))
|
||||
kasan_flags |= KASAN_VMALLOC_INIT;
|
||||
/* KASAN_VMALLOC_PROT_NORMAL already set if required. */
|
||||
area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags);
|
||||
|
||||
/*
|
||||
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
|
||||
* flag. It means that vm_struct is not fully initialized.
|
||||
@ -3139,7 +3199,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
||||
if (!(vm_flags & VM_DEFER_KMEMLEAK))
|
||||
kmemleak_vmalloc(area, size, gfp_mask);
|
||||
|
||||
return addr;
|
||||
return area->addr;
|
||||
|
||||
fail:
|
||||
if (shift > PAGE_SHIFT) {
|
||||
@ -3424,6 +3484,8 @@ long vread(char *buf, char *addr, unsigned long count)
|
||||
unsigned long buflen = count;
|
||||
unsigned long n;
|
||||
|
||||
addr = kasan_reset_tag(addr);
|
||||
|
||||
/* Don't allow overflow */
|
||||
if ((unsigned long) addr + count < count)
|
||||
count = -(unsigned long) addr;
|
||||
@ -3809,9 +3871,6 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
|
||||
for (area = 0; area < nr_vms; area++) {
|
||||
if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
|
||||
goto err_free_shadow;
|
||||
|
||||
kasan_unpoison_vmalloc((void *)vas[area]->va_start,
|
||||
sizes[area]);
|
||||
}
|
||||
|
||||
/* insert all vm's */
|
||||
@ -3824,6 +3883,16 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
|
||||
}
|
||||
spin_unlock(&vmap_area_lock);
|
||||
|
||||
/*
|
||||
* Mark allocated areas as accessible. Do it now as a best-effort
|
||||
* approach, as they can be mapped outside of vmalloc code.
|
||||
* With hardware tag-based KASAN, marking is skipped for
|
||||
* non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc().
|
||||
*/
|
||||
for (area = 0; area < nr_vms; area++)
|
||||
vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr,
|
||||
vms[area]->size, KASAN_VMALLOC_PROT_NORMAL);
|
||||
|
||||
kfree(vas);
|
||||
return vms;
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
*
|
||||
* When all tests are finished, clean up and exit the program with one of:
|
||||
*
|
||||
* ksft_finished();
|
||||
* ksft_exit(condition);
|
||||
* ksft_exit_pass();
|
||||
* ksft_exit_fail();
|
||||
@ -235,6 +236,15 @@ static inline int ksft_exit_fail(void)
|
||||
ksft_exit_fail(); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* ksft_finished() - Exit selftest with success if all tests passed
|
||||
*/
|
||||
#define ksft_finished() \
|
||||
ksft_exit(ksft_plan == \
|
||||
ksft_cnt.ksft_pass + \
|
||||
ksft_cnt.ksft_xfail + \
|
||||
ksft_cnt.ksft_xskip)
|
||||
|
||||
static inline int ksft_exit_fail_msg(const char *msg, ...)
|
||||
{
|
||||
int saved_errno = errno;
|
||||
|
1
tools/testing/selftests/vm/.gitignore
vendored
1
tools/testing/selftests/vm/.gitignore
vendored
@ -3,6 +3,7 @@ hugepage-mmap
|
||||
hugepage-mremap
|
||||
hugepage-shm
|
||||
hugepage-vmemmap
|
||||
hugetlb-madvise
|
||||
khugepaged
|
||||
map_hugetlb
|
||||
map_populate
|
||||
|
@ -30,6 +30,7 @@ LDLIBS = -lrt -lpthread
|
||||
TEST_GEN_FILES = compaction_test
|
||||
TEST_GEN_FILES += gup_test
|
||||
TEST_GEN_FILES += hmm-tests
|
||||
TEST_GEN_FILES += hugetlb-madvise
|
||||
TEST_GEN_FILES += hugepage-mmap
|
||||
TEST_GEN_FILES += hugepage-mremap
|
||||
TEST_GEN_FILES += hugepage-shm
|
||||
|
@ -10,8 +10,9 @@
|
||||
#include <assert.h>
|
||||
#include "../../../../mm/gup_test.h"
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#define MB (1UL << 20)
|
||||
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
|
||||
|
||||
/* Just the flags we need, copied from mm.h: */
|
||||
#define FOLL_WRITE 0x01 /* check pte is writable */
|
||||
|
410
tools/testing/selftests/vm/hugetlb-madvise.c
Normal file
410
tools/testing/selftests/vm/hugetlb-madvise.c
Normal file
@ -0,0 +1,410 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* hugepage-madvise:
|
||||
*
|
||||
* Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
|
||||
* on hugetlb mappings.
|
||||
*
|
||||
* Before running this test, make sure the administrator has pre-allocated
|
||||
* at least MIN_FREE_PAGES hugetlb pages and they are free. In addition,
|
||||
* the test takes an argument that is the path to a file in a hugetlbfs
|
||||
* filesystem. Therefore, a hugetlbfs filesystem must be mounted on some
|
||||
* directory.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#define __USE_GNU
|
||||
#include <fcntl.h>
|
||||
|
||||
#define USAGE "USAGE: %s <hugepagefile_name>\n"
|
||||
#define MIN_FREE_PAGES 20
|
||||
#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
|
||||
|
||||
#define validate_free_pages(exp_free) \
|
||||
do { \
|
||||
int fhp = get_free_hugepages(); \
|
||||
if (fhp != (exp_free)) { \
|
||||
printf("Unexpected number of free huge " \
|
||||
"pages line %d\n", __LINE__); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
unsigned long huge_page_size;
|
||||
unsigned long base_page_size;
|
||||
|
||||
/*
|
||||
* default_huge_page_size copied from mlock2-tests.c
|
||||
*/
|
||||
unsigned long default_huge_page_size(void)
|
||||
{
|
||||
unsigned long hps = 0;
|
||||
char *line = NULL;
|
||||
size_t linelen = 0;
|
||||
FILE *f = fopen("/proc/meminfo", "r");
|
||||
|
||||
if (!f)
|
||||
return 0;
|
||||
while (getline(&line, &linelen, f) > 0) {
|
||||
if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
|
||||
hps <<= 10;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(line);
|
||||
fclose(f);
|
||||
return hps;
|
||||
}
|
||||
|
||||
unsigned long get_free_hugepages(void)
|
||||
{
|
||||
unsigned long fhp = 0;
|
||||
char *line = NULL;
|
||||
size_t linelen = 0;
|
||||
FILE *f = fopen("/proc/meminfo", "r");
|
||||
|
||||
if (!f)
|
||||
return fhp;
|
||||
while (getline(&line, &linelen, f) > 0) {
|
||||
if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
|
||||
break;
|
||||
}
|
||||
|
||||
free(line);
|
||||
fclose(f);
|
||||
return fhp;
|
||||
}
|
||||
|
||||
void write_fault_pages(void *addr, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
*((unsigned long *)(addr + (i * huge_page_size))) = i;
|
||||
}
|
||||
|
||||
void read_fault_pages(void *addr, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long i, tmp;
|
||||
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
tmp += *((unsigned long *)(addr + (i * huge_page_size)));
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
unsigned long free_hugepages;
|
||||
void *addr, *addr2;
|
||||
int fd;
|
||||
int ret;
|
||||
|
||||
if (argc != 2) {
|
||||
printf(USAGE, argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
huge_page_size = default_huge_page_size();
|
||||
if (!huge_page_size) {
|
||||
printf("Unable to determine huge page size, exiting!\n");
|
||||
exit(1);
|
||||
}
|
||||
base_page_size = sysconf(_SC_PAGE_SIZE);
|
||||
if (!huge_page_size) {
|
||||
printf("Unable to determine base page size, exiting!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
free_hugepages = get_free_hugepages();
|
||||
if (free_hugepages < MIN_FREE_PAGES) {
|
||||
printf("Not enough free huge pages to test, exiting!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fd = open(argv[1], O_CREAT | O_RDWR, 0755);
|
||||
if (fd < 0) {
|
||||
perror("Open failed");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test validity of MADV_DONTNEED addr and length arguments. mmap
|
||||
* size is NR_HUGE_PAGES + 2. One page at the beginning and end of
|
||||
* the mapping will be unmapped so we KNOW there is nothing mapped
|
||||
* there.
|
||||
*/
|
||||
addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
|
||||
-1, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
if (munmap(addr, huge_page_size) ||
|
||||
munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
|
||||
huge_page_size)) {
|
||||
perror("munmap");
|
||||
exit(1);
|
||||
}
|
||||
addr = addr + huge_page_size;
|
||||
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* addr before mapping should fail */
|
||||
ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
|
||||
MADV_DONTNEED);
|
||||
if (!ret) {
|
||||
printf("Unexpected success of madvise call with invalid addr line %d\n",
|
||||
__LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* addr + length after mapping should fail */
|
||||
ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
|
||||
MADV_DONTNEED);
|
||||
if (!ret) {
|
||||
printf("Unexpected success of madvise call with invalid length line %d\n",
|
||||
__LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
|
||||
|
||||
/*
|
||||
* Test alignment of MADV_DONTNEED addr and length arguments
|
||||
*/
|
||||
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
|
||||
-1, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* addr is not huge page size aligned and should fail */
|
||||
ret = madvise(addr + base_page_size,
|
||||
NR_HUGE_PAGES * huge_page_size - base_page_size,
|
||||
MADV_DONTNEED);
|
||||
if (!ret) {
|
||||
printf("Unexpected success of madvise call with unaligned start address %d\n",
|
||||
__LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* addr + length should be aligned up to huge page size */
|
||||
if (madvise(addr,
|
||||
((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
|
||||
MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* should free all pages in mapping */
|
||||
validate_free_pages(free_hugepages);
|
||||
|
||||
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
|
||||
|
||||
/*
|
||||
* Test MADV_DONTNEED on anonymous private mapping
|
||||
*/
|
||||
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
|
||||
-1, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* should free all pages in mapping */
|
||||
validate_free_pages(free_hugepages);
|
||||
|
||||
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
|
||||
|
||||
/*
|
||||
* Test MADV_DONTNEED on private mapping of hugetlb file
|
||||
*/
|
||||
if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
|
||||
perror("fallocate");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE, fd, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* read should not consume any pages */
|
||||
read_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* madvise should not free any pages */
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* writes should allocate private pages */
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
|
||||
|
||||
/* madvise should free private pages */
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* writes should allocate private pages */
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
|
||||
|
||||
/*
|
||||
* The fallocate below certainly should free the pages associated
|
||||
* with the file. However, pages in the private mapping are also
|
||||
* freed. This is not the 'correct' behavior, but is expected
|
||||
* because this is how it has worked since the initial hugetlb
|
||||
* implementation.
|
||||
*/
|
||||
if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
0, NR_HUGE_PAGES * huge_page_size)) {
|
||||
perror("fallocate");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages);
|
||||
|
||||
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
|
||||
|
||||
/*
|
||||
* Test MADV_DONTNEED on shared mapping of hugetlb file
|
||||
*/
|
||||
if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
|
||||
perror("fallocate");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, fd, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* write should not consume any pages */
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* madvise should not free any pages */
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/*
|
||||
* Test MADV_REMOVE on shared mapping of hugetlb file
|
||||
*
|
||||
* madvise is same as hole punch and should free all pages.
|
||||
*/
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages);
|
||||
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
|
||||
|
||||
/*
|
||||
* Test MADV_REMOVE on shared and private mapping of hugetlb file
|
||||
*/
|
||||
if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
|
||||
perror("fallocate");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, fd, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* shared write should not consume any additional pages */
|
||||
write_fault_pages(addr, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE, fd, 0);
|
||||
if (addr2 == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* private read should not consume any pages */
|
||||
read_fault_pages(addr2, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* private write should consume additional pages */
|
||||
write_fault_pages(addr2, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
|
||||
|
||||
/* madvise of shared mapping should not free any pages */
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
|
||||
|
||||
/* madvise of private mapping should free private pages */
|
||||
if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages - NR_HUGE_PAGES);
|
||||
|
||||
/* private write should consume additional pages again */
|
||||
write_fault_pages(addr2, NR_HUGE_PAGES);
|
||||
validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
|
||||
|
||||
/*
|
||||
* madvise should free both file and private pages although this is
|
||||
* not correct. private pages should not be freed, but this is
|
||||
* expected. See comment associated with FALLOC_FL_PUNCH_HOLE call.
|
||||
*/
|
||||
if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
|
||||
perror("madvise");
|
||||
exit(1);
|
||||
}
|
||||
validate_free_pages(free_hugepages);
|
||||
|
||||
(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
|
||||
(void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
|
||||
|
||||
close(fd);
|
||||
unlink(argv[1]);
|
||||
return 0;
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "../kselftest.h"
|
||||
#include "../../../../include/vdso/time64.h"
|
||||
#include "util.h"
|
||||
|
||||
#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
|
||||
#define KSM_FP(s) (KSM_SYSFS_PATH s)
|
||||
@ -22,15 +23,6 @@
|
||||
#define KSM_MERGE_ACROSS_NODES_DEFAULT true
|
||||
#define MB (1ul << 20)
|
||||
|
||||
#define PAGE_SHIFT 12
|
||||
#define HPAGE_SHIFT 21
|
||||
|
||||
#define PAGE_SIZE (1 << PAGE_SHIFT)
|
||||
#define HPAGE_SIZE (1 << HPAGE_SHIFT)
|
||||
|
||||
#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
|
||||
#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
|
||||
|
||||
struct ksm_sysfs {
|
||||
unsigned long max_page_sharing;
|
||||
unsigned long merge_across_nodes;
|
||||
@ -456,34 +448,6 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a
|
||||
return KSFT_FAIL;
|
||||
}
|
||||
|
||||
int64_t allocate_transhuge(void *ptr, int pagemap_fd)
|
||||
{
|
||||
uint64_t ent[2];
|
||||
|
||||
/* drop pmd */
|
||||
if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_FIXED | MAP_ANONYMOUS |
|
||||
MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
|
||||
errx(2, "mmap transhuge");
|
||||
|
||||
if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
|
||||
err(2, "MADV_HUGEPAGE");
|
||||
|
||||
/* allocate transparent huge page */
|
||||
*(volatile void **)ptr = ptr;
|
||||
|
||||
if (pread(pagemap_fd, ent, sizeof(ent),
|
||||
(uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
|
||||
err(2, "read pagemap");
|
||||
|
||||
if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
|
||||
PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
|
||||
!(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
|
||||
return PAGEMAP_PFN(ent[0]);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t map_size)
|
||||
{
|
||||
void *map_ptr, *map_ptr_orig;
|
||||
|
@ -282,7 +282,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
close(fd);
|
||||
|
||||
ksft_exit(!ksft_get_fail_cnt());
|
||||
ksft_finished();
|
||||
}
|
||||
|
||||
#else /* __NR_memfd_secret */
|
||||
|
@ -131,6 +131,18 @@ else
|
||||
echo "[PASS]"
|
||||
fi
|
||||
|
||||
echo "-----------------------"
|
||||
echo "running hugetlb-madvise"
|
||||
echo "-----------------------"
|
||||
./hugetlb-madvise $mnt/madvise-test
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[FAIL]"
|
||||
exitcode=1
|
||||
else
|
||||
echo "[PASS]"
|
||||
fi
|
||||
rm -f $mnt/madvise-test
|
||||
|
||||
echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
|
||||
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
|
||||
echo " hugetlb regression testing."
|
||||
@ -196,14 +208,13 @@ echo "running userfaultfd_hugetlb"
|
||||
echo "---------------------------"
|
||||
# Test requires source and destination huge pages. Size of source
|
||||
# (half_ufd_size_MB) is passed as argument to test.
|
||||
./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
|
||||
./userfaultfd hugetlb $half_ufd_size_MB 32
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[FAIL]"
|
||||
exitcode=1
|
||||
else
|
||||
echo "[PASS]"
|
||||
fi
|
||||
rm -f $mnt/ufd_test_file
|
||||
|
||||
echo "-------------------------"
|
||||
echo "running userfaultfd_shmem"
|
||||
|
@ -15,48 +15,12 @@
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include "util.h"
|
||||
|
||||
#define PAGE_SHIFT 12
|
||||
#define HPAGE_SHIFT 21
|
||||
|
||||
#define PAGE_SIZE (1 << PAGE_SHIFT)
|
||||
#define HPAGE_SIZE (1 << HPAGE_SHIFT)
|
||||
|
||||
#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
|
||||
#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
|
||||
|
||||
int pagemap_fd;
|
||||
int backing_fd = -1;
|
||||
int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
|
||||
#define PROT_RW (PROT_READ | PROT_WRITE)
|
||||
|
||||
int64_t allocate_transhuge(void *ptr)
|
||||
{
|
||||
uint64_t ent[2];
|
||||
|
||||
/* drop pmd */
|
||||
if (mmap(ptr, HPAGE_SIZE, PROT_RW, MAP_FIXED | mmap_flags,
|
||||
backing_fd, 0) != ptr)
|
||||
errx(2, "mmap transhuge");
|
||||
|
||||
if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
|
||||
err(2, "MADV_HUGEPAGE");
|
||||
|
||||
/* allocate transparent huge page */
|
||||
*(volatile void **)ptr = ptr;
|
||||
|
||||
if (pread(pagemap_fd, ent, sizeof(ent),
|
||||
(uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
|
||||
err(2, "read pagemap");
|
||||
|
||||
if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
|
||||
PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
|
||||
!(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
|
||||
return PAGEMAP_PFN(ent[0]);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
size_t ram, len;
|
||||
@ -67,6 +31,7 @@ int main(int argc, char **argv)
|
||||
double s;
|
||||
uint8_t *map;
|
||||
size_t map_len;
|
||||
int pagemap_fd;
|
||||
|
||||
ram = sysconf(_SC_PHYS_PAGES);
|
||||
if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
|
||||
@ -122,7 +87,7 @@ int main(int argc, char **argv)
|
||||
for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
|
||||
int64_t pfn;
|
||||
|
||||
pfn = allocate_transhuge(p);
|
||||
pfn = allocate_transhuge(p, pagemap_fd);
|
||||
|
||||
if (pfn < 0) {
|
||||
nr_failed++;
|
||||
|
@ -89,7 +89,6 @@ static bool test_uffdio_minor = false;
|
||||
static bool map_shared;
|
||||
static int shm_fd;
|
||||
static int huge_fd;
|
||||
static char *huge_fd_off0;
|
||||
static unsigned long long *count_verify;
|
||||
static int uffd = -1;
|
||||
static int uffd_flags, finished, *pipefd;
|
||||
@ -128,9 +127,9 @@ const char *examples =
|
||||
"./userfaultfd anon 100 99999\n\n"
|
||||
"# Run share memory test on 1GiB region with 99 bounces:\n"
|
||||
"./userfaultfd shmem 1000 99\n\n"
|
||||
"# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
|
||||
"./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
|
||||
"# Run the same hugetlb test but using shmem:\n"
|
||||
"# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
|
||||
"./userfaultfd hugetlb 256 50\n\n"
|
||||
"# Run the same hugetlb test but using shared file:\n"
|
||||
"./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
|
||||
"# 10MiB-~6GiB 999 bounces anonymous test, "
|
||||
"continue forever unless an error triggers\n"
|
||||
@ -227,10 +226,13 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
|
||||
|
||||
static void hugetlb_release_pages(char *rel_area)
|
||||
{
|
||||
if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
|
||||
nr_pages * page_size))
|
||||
err("fallocate() failed");
|
||||
if (!map_shared) {
|
||||
if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
|
||||
err("madvise(MADV_DONTNEED) failed");
|
||||
} else {
|
||||
if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
|
||||
err("madvise(MADV_REMOVE) failed");
|
||||
}
|
||||
}
|
||||
|
||||
static void hugetlb_allocate_area(void **alloc_area)
|
||||
@ -238,26 +240,37 @@ static void hugetlb_allocate_area(void **alloc_area)
|
||||
void *area_alias = NULL;
|
||||
char **alloc_area_alias;
|
||||
|
||||
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
|
||||
(map_shared ? MAP_SHARED : MAP_PRIVATE) |
|
||||
MAP_HUGETLB |
|
||||
(*alloc_area == area_src ? 0 : MAP_NORESERVE),
|
||||
huge_fd, *alloc_area == area_src ? 0 :
|
||||
nr_pages * page_size);
|
||||
if (!map_shared)
|
||||
*alloc_area = mmap(NULL,
|
||||
nr_pages * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB |
|
||||
(*alloc_area == area_src ? 0 : MAP_NORESERVE),
|
||||
-1,
|
||||
0);
|
||||
else
|
||||
*alloc_area = mmap(NULL,
|
||||
nr_pages * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED |
|
||||
(*alloc_area == area_src ? 0 : MAP_NORESERVE),
|
||||
huge_fd,
|
||||
*alloc_area == area_src ? 0 : nr_pages * page_size);
|
||||
if (*alloc_area == MAP_FAILED)
|
||||
err("mmap of hugetlbfs file failed");
|
||||
|
||||
if (map_shared) {
|
||||
area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_HUGETLB,
|
||||
huge_fd, *alloc_area == area_src ? 0 :
|
||||
nr_pages * page_size);
|
||||
area_alias = mmap(NULL,
|
||||
nr_pages * page_size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED,
|
||||
huge_fd,
|
||||
*alloc_area == area_src ? 0 : nr_pages * page_size);
|
||||
if (area_alias == MAP_FAILED)
|
||||
err("mmap of hugetlb file alias failed");
|
||||
}
|
||||
|
||||
if (*alloc_area == area_src) {
|
||||
huge_fd_off0 = *alloc_area;
|
||||
alloc_area_alias = &area_src_alias;
|
||||
} else {
|
||||
alloc_area_alias = &area_dst_alias;
|
||||
@ -270,12 +283,7 @@ static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset
|
||||
{
|
||||
if (!map_shared)
|
||||
return;
|
||||
/*
|
||||
* We can't zap just the pagetable with hugetlbfs because
|
||||
* MADV_DONTEED won't work. So exercise -EEXIST on a alias
|
||||
* mapping where the pagetables are not established initially,
|
||||
* this way we'll exercise the -EEXEC at the fs level.
|
||||
*/
|
||||
|
||||
*start = (unsigned long) area_dst_alias + offset;
|
||||
}
|
||||
|
||||
@ -428,7 +436,6 @@ static void uffd_test_ctx_clear(void)
|
||||
uffd = -1;
|
||||
}
|
||||
|
||||
huge_fd_off0 = NULL;
|
||||
munmap_area((void **)&area_src);
|
||||
munmap_area((void **)&area_src_alias);
|
||||
munmap_area((void **)&area_dst);
|
||||
@ -926,10 +933,7 @@ static int faulting_process(int signal_test)
|
||||
struct sigaction act;
|
||||
unsigned long signalled = 0;
|
||||
|
||||
if (test_type != TEST_HUGETLB)
|
||||
split_nr_pages = (nr_pages + 1) / 2;
|
||||
else
|
||||
split_nr_pages = nr_pages;
|
||||
split_nr_pages = (nr_pages + 1) / 2;
|
||||
|
||||
if (signal_test) {
|
||||
sigbuf = &jbuf;
|
||||
@ -986,9 +990,6 @@ static int faulting_process(int signal_test)
|
||||
if (signal_test)
|
||||
return signalled != split_nr_pages;
|
||||
|
||||
if (test_type == TEST_HUGETLB)
|
||||
return 0;
|
||||
|
||||
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
|
||||
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
|
||||
if (area_dst == MAP_FAILED)
|
||||
@ -1676,7 +1677,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
nr_pages = nr_pages_per_cpu * nr_cpus;
|
||||
|
||||
if (test_type == TEST_HUGETLB) {
|
||||
if (test_type == TEST_HUGETLB && map_shared) {
|
||||
if (argc < 5)
|
||||
usage();
|
||||
huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
|
||||
|
69
tools/testing/selftests/vm/util.h
Normal file
69
tools/testing/selftests/vm/util.h
Normal file
@ -0,0 +1,69 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef __KSELFTEST_VM_UTIL_H
|
||||
#define __KSELFTEST_VM_UTIL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/mman.h>
|
||||
#include <err.h>
|
||||
#include <string.h> /* ffsl() */
|
||||
#include <unistd.h> /* _SC_PAGESIZE */
|
||||
|
||||
static unsigned int __page_size;
|
||||
static unsigned int __page_shift;
|
||||
|
||||
static inline unsigned int page_size(void)
|
||||
{
|
||||
if (!__page_size)
|
||||
__page_size = sysconf(_SC_PAGESIZE);
|
||||
return __page_size;
|
||||
}
|
||||
|
||||
static inline unsigned int page_shift(void)
|
||||
{
|
||||
if (!__page_shift)
|
||||
__page_shift = (ffsl(page_size()) - 1);
|
||||
return __page_shift;
|
||||
}
|
||||
|
||||
#define PAGE_SHIFT (page_shift())
|
||||
#define PAGE_SIZE (page_size())
|
||||
/*
|
||||
* On ppc64 this will only work with radix 2M hugepage size
|
||||
*/
|
||||
#define HPAGE_SHIFT 21
|
||||
#define HPAGE_SIZE (1 << HPAGE_SHIFT)
|
||||
|
||||
#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
|
||||
#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
|
||||
|
||||
|
||||
static inline int64_t allocate_transhuge(void *ptr, int pagemap_fd)
|
||||
{
|
||||
uint64_t ent[2];
|
||||
|
||||
/* drop pmd */
|
||||
if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_FIXED | MAP_ANONYMOUS |
|
||||
MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
|
||||
errx(2, "mmap transhuge");
|
||||
|
||||
if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
|
||||
err(2, "MADV_HUGEPAGE");
|
||||
|
||||
/* allocate transparent huge page */
|
||||
*(volatile void **)ptr = ptr;
|
||||
|
||||
if (pread(pagemap_fd, ent, sizeof(ent),
|
||||
(uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
|
||||
err(2, "read pagemap");
|
||||
|
||||
if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
|
||||
PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
|
||||
!(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
|
||||
return PAGEMAP_PFN(ent[0]);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif
|
@ -20,21 +20,56 @@
|
||||
#include <string.h>
|
||||
#include <regex.h>
|
||||
#include <errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#define bool int
|
||||
#define true 1
|
||||
#define false 0
|
||||
#define TASK_COMM_LEN 16
|
||||
|
||||
struct block_list {
|
||||
char *txt;
|
||||
char *comm; // task command name
|
||||
char *stacktrace;
|
||||
__u64 ts_nsec;
|
||||
__u64 free_ts_nsec;
|
||||
int len;
|
||||
int num;
|
||||
int page_num;
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
};
|
||||
|
||||
static int sort_by_memory;
|
||||
enum FILTER_BIT {
|
||||
FILTER_UNRELEASE = 1<<1,
|
||||
FILTER_PID = 1<<2,
|
||||
FILTER_TGID = 1<<3,
|
||||
FILTER_COMM = 1<<4
|
||||
};
|
||||
enum CULL_BIT {
|
||||
CULL_UNRELEASE = 1<<1,
|
||||
CULL_PID = 1<<2,
|
||||
CULL_TGID = 1<<3,
|
||||
CULL_COMM = 1<<4,
|
||||
CULL_STACKTRACE = 1<<5
|
||||
};
|
||||
struct filter_condition {
|
||||
pid_t tgid;
|
||||
pid_t pid;
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
static struct filter_condition fc;
|
||||
static regex_t order_pattern;
|
||||
static regex_t pid_pattern;
|
||||
static regex_t tgid_pattern;
|
||||
static regex_t comm_pattern;
|
||||
static regex_t ts_nsec_pattern;
|
||||
static regex_t free_ts_nsec_pattern;
|
||||
static struct block_list *list;
|
||||
static int list_size;
|
||||
static int max_size;
|
||||
|
||||
struct block_list *block_head;
|
||||
static int cull;
|
||||
static int filter;
|
||||
|
||||
int read_block(char *buf, int buf_size, FILE *fin)
|
||||
{
|
||||
@ -58,6 +93,13 @@ static int compare_txt(const void *p1, const void *p2)
|
||||
return strcmp(l1->txt, l2->txt);
|
||||
}
|
||||
|
||||
static int compare_stacktrace(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
return strcmp(l1->stacktrace, l2->stacktrace);
|
||||
}
|
||||
|
||||
static int compare_num(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
@ -72,41 +114,260 @@ static int compare_page_num(const void *p1, const void *p2)
|
||||
return l2->page_num - l1->page_num;
|
||||
}
|
||||
|
||||
static int get_page_num(char *buf)
|
||||
static int compare_pid(const void *p1, const void *p2)
|
||||
{
|
||||
int err, val_len, order_val;
|
||||
char order_str[4] = {0};
|
||||
char *endptr;
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
return l1->pid - l2->pid;
|
||||
}
|
||||
|
||||
static int compare_tgid(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
return l1->tgid - l2->tgid;
|
||||
}
|
||||
|
||||
static int compare_comm(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
return strcmp(l1->comm, l2->comm);
|
||||
}
|
||||
|
||||
static int compare_ts(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
return l1->ts_nsec < l2->ts_nsec ? -1 : 1;
|
||||
}
|
||||
|
||||
static int compare_free_ts(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
return l1->free_ts_nsec < l2->free_ts_nsec ? -1 : 1;
|
||||
}
|
||||
|
||||
|
||||
static int compare_release(const void *p1, const void *p2)
|
||||
{
|
||||
const struct block_list *l1 = p1, *l2 = p2;
|
||||
|
||||
if (!l1->free_ts_nsec && !l2->free_ts_nsec)
|
||||
return 0;
|
||||
if (l1->free_ts_nsec && l2->free_ts_nsec)
|
||||
return 0;
|
||||
return l1->free_ts_nsec ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
static int compare_cull_condition(const void *p1, const void *p2)
|
||||
{
|
||||
if (cull == 0)
|
||||
return compare_txt(p1, p2);
|
||||
if ((cull & CULL_STACKTRACE) && compare_stacktrace(p1, p2))
|
||||
return compare_stacktrace(p1, p2);
|
||||
if ((cull & CULL_PID) && compare_pid(p1, p2))
|
||||
return compare_pid(p1, p2);
|
||||
if ((cull & CULL_TGID) && compare_tgid(p1, p2))
|
||||
return compare_tgid(p1, p2);
|
||||
if ((cull & CULL_COMM) && compare_comm(p1, p2))
|
||||
return compare_comm(p1, p2);
|
||||
if ((cull & CULL_UNRELEASE) && compare_release(p1, p2))
|
||||
return compare_release(p1, p2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int search_pattern(regex_t *pattern, char *pattern_str, char *buf)
|
||||
{
|
||||
int err, val_len;
|
||||
regmatch_t pmatch[2];
|
||||
|
||||
err = regexec(&order_pattern, buf, 2, pmatch, REG_NOTBOL);
|
||||
err = regexec(pattern, buf, 2, pmatch, REG_NOTBOL);
|
||||
if (err != 0 || pmatch[1].rm_so == -1) {
|
||||
printf("no order pattern in %s\n", buf);
|
||||
return 0;
|
||||
printf("no matching pattern in %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
val_len = pmatch[1].rm_eo - pmatch[1].rm_so;
|
||||
if (val_len > 2) /* max_order should not exceed 2 digits */
|
||||
goto wrong_order;
|
||||
|
||||
memcpy(order_str, buf + pmatch[1].rm_so, val_len);
|
||||
memcpy(pattern_str, buf + pmatch[1].rm_so, val_len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void check_regcomp(regex_t *pattern, const char *regex)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = regcomp(pattern, regex, REG_EXTENDED | REG_NEWLINE);
|
||||
if (err != 0 || pattern->re_nsub != 1) {
|
||||
printf("Invalid pattern %s code %d\n", regex, err);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static char **explode(char sep, const char *str, int *size)
|
||||
{
|
||||
int count = 0, len = strlen(str);
|
||||
int lastindex = -1, j = 0;
|
||||
|
||||
for (int i = 0; i < len; i++)
|
||||
if (str[i] == sep)
|
||||
count++;
|
||||
char **ret = calloc(++count, sizeof(char *));
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (str[i] == sep) {
|
||||
ret[j] = calloc(i - lastindex, sizeof(char));
|
||||
memcpy(ret[j++], str + lastindex + 1, i - lastindex - 1);
|
||||
lastindex = i;
|
||||
}
|
||||
}
|
||||
if (lastindex <= len - 1) {
|
||||
ret[j] = calloc(len - lastindex, sizeof(char));
|
||||
memcpy(ret[j++], str + lastindex + 1, strlen(str) - 1 - lastindex);
|
||||
}
|
||||
*size = j;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void free_explode(char **arr, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
free(arr[i]);
|
||||
free(arr);
|
||||
}
|
||||
|
||||
# define FIELD_BUFF 25
|
||||
|
||||
static int get_page_num(char *buf)
|
||||
{
|
||||
int order_val;
|
||||
char order_str[FIELD_BUFF] = {0};
|
||||
char *endptr;
|
||||
|
||||
search_pattern(&order_pattern, order_str, buf);
|
||||
errno = 0;
|
||||
order_val = strtol(order_str, &endptr, 10);
|
||||
if (errno != 0 || endptr == order_str || *endptr != '\0')
|
||||
goto wrong_order;
|
||||
if (order_val > 64 || errno != 0 || endptr == order_str || *endptr != '\0') {
|
||||
printf("wrong order in follow buf:\n%s\n", buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1 << order_val;
|
||||
}
|
||||
|
||||
wrong_order:
|
||||
printf("wrong order in follow buf:\n%s\n", buf);
|
||||
return 0;
|
||||
static pid_t get_pid(char *buf)
|
||||
{
|
||||
pid_t pid;
|
||||
char pid_str[FIELD_BUFF] = {0};
|
||||
char *endptr;
|
||||
|
||||
search_pattern(&pid_pattern, pid_str, buf);
|
||||
errno = 0;
|
||||
pid = strtol(pid_str, &endptr, 10);
|
||||
if (errno != 0 || endptr == pid_str || *endptr != '\0') {
|
||||
printf("wrong/invalid pid in follow buf:\n%s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return pid;
|
||||
|
||||
}
|
||||
|
||||
static pid_t get_tgid(char *buf)
|
||||
{
|
||||
pid_t tgid;
|
||||
char tgid_str[FIELD_BUFF] = {0};
|
||||
char *endptr;
|
||||
|
||||
search_pattern(&tgid_pattern, tgid_str, buf);
|
||||
errno = 0;
|
||||
tgid = strtol(tgid_str, &endptr, 10);
|
||||
if (errno != 0 || endptr == tgid_str || *endptr != '\0') {
|
||||
printf("wrong/invalid tgid in follow buf:\n%s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return tgid;
|
||||
|
||||
}
|
||||
|
||||
static __u64 get_ts_nsec(char *buf)
|
||||
{
|
||||
__u64 ts_nsec;
|
||||
char ts_nsec_str[FIELD_BUFF] = {0};
|
||||
char *endptr;
|
||||
|
||||
search_pattern(&ts_nsec_pattern, ts_nsec_str, buf);
|
||||
errno = 0;
|
||||
ts_nsec = strtoull(ts_nsec_str, &endptr, 10);
|
||||
if (errno != 0 || endptr == ts_nsec_str || *endptr != '\0') {
|
||||
printf("wrong ts_nsec in follow buf:\n%s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ts_nsec;
|
||||
}
|
||||
|
||||
static __u64 get_free_ts_nsec(char *buf)
|
||||
{
|
||||
__u64 free_ts_nsec;
|
||||
char free_ts_nsec_str[FIELD_BUFF] = {0};
|
||||
char *endptr;
|
||||
|
||||
search_pattern(&free_ts_nsec_pattern, free_ts_nsec_str, buf);
|
||||
errno = 0;
|
||||
free_ts_nsec = strtoull(free_ts_nsec_str, &endptr, 10);
|
||||
if (errno != 0 || endptr == free_ts_nsec_str || *endptr != '\0') {
|
||||
printf("wrong free_ts_nsec in follow buf:\n%s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return free_ts_nsec;
|
||||
}
|
||||
|
||||
static char *get_comm(char *buf)
|
||||
{
|
||||
char *comm_str = malloc(TASK_COMM_LEN);
|
||||
|
||||
memset(comm_str, 0, TASK_COMM_LEN);
|
||||
|
||||
search_pattern(&comm_pattern, comm_str, buf);
|
||||
errno = 0;
|
||||
if (errno != 0) {
|
||||
printf("wrong comm in follow buf:\n%s\n", buf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return comm_str;
|
||||
}
|
||||
|
||||
static bool is_need(char *buf)
|
||||
{
|
||||
if ((filter & FILTER_UNRELEASE) && get_free_ts_nsec(buf) != 0)
|
||||
return false;
|
||||
if ((filter & FILTER_PID) && get_pid(buf) != fc.pid)
|
||||
return false;
|
||||
if ((filter & FILTER_TGID) && get_tgid(buf) != fc.tgid)
|
||||
return false;
|
||||
|
||||
char *comm = get_comm(buf);
|
||||
|
||||
if ((filter & FILTER_COMM) &&
|
||||
strncmp(comm, fc.comm, TASK_COMM_LEN) != 0) {
|
||||
free(comm);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void add_list(char *buf, int len)
|
||||
{
|
||||
if (list_size != 0 &&
|
||||
len == list[list_size-1].len &&
|
||||
memcmp(buf, list[list_size-1].txt, len) == 0) {
|
||||
len == list[list_size-1].len &&
|
||||
memcmp(buf, list[list_size-1].txt, len) == 0) {
|
||||
list[list_size-1].num++;
|
||||
list[list_size-1].page_num += get_page_num(buf);
|
||||
return;
|
||||
@ -115,12 +376,27 @@ static void add_list(char *buf, int len)
|
||||
printf("max_size too small??\n");
|
||||
exit(1);
|
||||
}
|
||||
if (!is_need(buf))
|
||||
return;
|
||||
list[list_size].pid = get_pid(buf);
|
||||
list[list_size].tgid = get_tgid(buf);
|
||||
list[list_size].comm = get_comm(buf);
|
||||
list[list_size].txt = malloc(len+1);
|
||||
if (!list[list_size].txt) {
|
||||
printf("Out of memory\n");
|
||||
exit(1);
|
||||
}
|
||||
memcpy(list[list_size].txt, buf, len);
|
||||
list[list_size].txt[len] = 0;
|
||||
list[list_size].len = len;
|
||||
list[list_size].num = 1;
|
||||
list[list_size].page_num = get_page_num(buf);
|
||||
memcpy(list[list_size].txt, buf, len);
|
||||
list[list_size].txt[len] = 0;
|
||||
|
||||
list[list_size].stacktrace = strchr(list[list_size].txt, '\n') ?: "";
|
||||
if (*list[list_size].stacktrace == '\n')
|
||||
list[list_size].stacktrace++;
|
||||
list[list_size].ts_nsec = get_ts_nsec(buf);
|
||||
list[list_size].free_ts_nsec = get_free_ts_nsec(buf);
|
||||
list_size++;
|
||||
if (list_size % 1000 == 0) {
|
||||
printf("loaded %d\r", list_size);
|
||||
@ -128,29 +404,129 @@ static void add_list(char *buf, int len)
|
||||
}
|
||||
}
|
||||
|
||||
static bool parse_cull_args(const char *arg_str)
|
||||
{
|
||||
int size = 0;
|
||||
char **args = explode(',', arg_str, &size);
|
||||
|
||||
for (int i = 0; i < size; ++i)
|
||||
if (!strcmp(args[i], "pid") || !strcmp(args[i], "p"))
|
||||
cull |= CULL_PID;
|
||||
else if (!strcmp(args[i], "tgid") || !strcmp(args[i], "tg"))
|
||||
cull |= CULL_TGID;
|
||||
else if (!strcmp(args[i], "name") || !strcmp(args[i], "n"))
|
||||
cull |= CULL_COMM;
|
||||
else if (!strcmp(args[i], "stacktrace") || !strcmp(args[i], "st"))
|
||||
cull |= CULL_STACKTRACE;
|
||||
else if (!strcmp(args[i], "free") || !strcmp(args[i], "f"))
|
||||
cull |= CULL_UNRELEASE;
|
||||
else {
|
||||
free_explode(args, size);
|
||||
return false;
|
||||
}
|
||||
free_explode(args, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define BUF_SIZE (128 * 1024)
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
printf("Usage: ./page_owner_sort [-m] <input> <output>\n"
|
||||
"-m Sort by total memory. If this option is unset, sort by times\n"
|
||||
printf("Usage: ./page_owner_sort [OPTIONS] <input> <output>\n"
|
||||
"-m\t\tSort by total memory.\n"
|
||||
"-s\t\tSort by the stack trace.\n"
|
||||
"-t\t\tSort by times (default).\n"
|
||||
"-p\t\tSort by pid.\n"
|
||||
"-P\t\tSort by tgid.\n"
|
||||
"-n\t\tSort by task command name.\n"
|
||||
"-a\t\tSort by memory allocate time.\n"
|
||||
"-r\t\tSort by memory release time.\n"
|
||||
"-c\t\tCull by comparing stacktrace instead of total block.\n"
|
||||
"-f\t\tFilter out the information of blocks whose memory has been released.\n"
|
||||
"--pid <PID>\tSelect by pid. This selects the information of blocks whose process ID number equals to <PID>.\n"
|
||||
"--tgid <TGID>\tSelect by tgid. This selects the information of blocks whose Thread Group ID number equals to <TGID>.\n"
|
||||
"--name <command>\n\t\tSelect by command name. This selects the information of blocks whose command name identical to <command>.\n"
|
||||
"--cull <rules>\tCull by user-defined rules. <rules> is a single argument in the form of a comma-separated list with some common fields predefined\n"
|
||||
);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int (*cmp)(const void *, const void *) = compare_num;
|
||||
FILE *fin, *fout;
|
||||
char *buf;
|
||||
char *buf, *endptr;
|
||||
int ret, i, count;
|
||||
struct block_list *list2;
|
||||
struct stat st;
|
||||
int err;
|
||||
int opt;
|
||||
struct option longopts[] = {
|
||||
{ "pid", required_argument, NULL, 1 },
|
||||
{ "tgid", required_argument, NULL, 2 },
|
||||
{ "name", required_argument, NULL, 3 },
|
||||
{ "cull", required_argument, NULL, 4 },
|
||||
{ 0, 0, 0, 0},
|
||||
};
|
||||
|
||||
while ((opt = getopt(argc, argv, "m")) != -1)
|
||||
while ((opt = getopt_long(argc, argv, "acfmnprstP", longopts, NULL)) != -1)
|
||||
switch (opt) {
|
||||
case 'a':
|
||||
cmp = compare_ts;
|
||||
break;
|
||||
case 'c':
|
||||
cull = cull | CULL_STACKTRACE;
|
||||
break;
|
||||
case 'f':
|
||||
filter = filter | FILTER_UNRELEASE;
|
||||
break;
|
||||
case 'm':
|
||||
sort_by_memory = 1;
|
||||
cmp = compare_page_num;
|
||||
break;
|
||||
case 'p':
|
||||
cmp = compare_pid;
|
||||
break;
|
||||
case 'r':
|
||||
cmp = compare_free_ts;
|
||||
break;
|
||||
case 's':
|
||||
cmp = compare_stacktrace;
|
||||
break;
|
||||
case 't':
|
||||
cmp = compare_num;
|
||||
break;
|
||||
case 'P':
|
||||
cmp = compare_tgid;
|
||||
break;
|
||||
case 'n':
|
||||
cmp = compare_comm;
|
||||
break;
|
||||
case 1:
|
||||
filter = filter | FILTER_PID;
|
||||
errno = 0;
|
||||
fc.pid = strtol(optarg, &endptr, 10);
|
||||
if (errno != 0 || endptr == optarg || *endptr != '\0') {
|
||||
printf("wrong/invalid pid in from the command line:%s\n", optarg);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
filter = filter | FILTER_TGID;
|
||||
errno = 0;
|
||||
fc.tgid = strtol(optarg, &endptr, 10);
|
||||
if (errno != 0 || endptr == optarg || *endptr != '\0') {
|
||||
printf("wrong/invalid tgid in from the command line:%s\n", optarg);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
filter = filter | FILTER_COMM;
|
||||
strncpy(fc.comm, optarg, TASK_COMM_LEN);
|
||||
fc.comm[TASK_COMM_LEN-1] = '\0';
|
||||
break;
|
||||
case 4:
|
||||
if (!parse_cull_args(optarg)) {
|
||||
printf("wrong argument after --cull in from the command line:%s\n",
|
||||
optarg);
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
@ -170,13 +546,12 @@ int main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
err = regcomp(&order_pattern, "order\\s*([0-9]*),", REG_EXTENDED|REG_NEWLINE);
|
||||
if (err != 0 || order_pattern.re_nsub != 1) {
|
||||
printf("%s: Invalid pattern 'order\\s*([0-9]*),' code %d\n",
|
||||
argv[0], err);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
check_regcomp(&order_pattern, "order\\s*([0-9]*),");
|
||||
check_regcomp(&pid_pattern, "pid\\s*([0-9]*),");
|
||||
check_regcomp(&tgid_pattern, "tgid\\s*([0-9]*) ");
|
||||
check_regcomp(&comm_pattern, "tgid\\s*[0-9]*\\s*\\((.*)\\),\\s*ts");
|
||||
check_regcomp(&ts_nsec_pattern, "ts\\s*([0-9]*)\\s*ns,");
|
||||
check_regcomp(&free_ts_nsec_pattern, "free_ts\\s*([0-9]*)\\s*ns");
|
||||
fstat(fileno(fin), &st);
|
||||
max_size = st.st_size / 100; /* hack ... */
|
||||
|
||||
@ -199,35 +574,48 @@ int main(int argc, char **argv)
|
||||
|
||||
printf("sorting ....\n");
|
||||
|
||||
qsort(list, list_size, sizeof(list[0]), compare_txt);
|
||||
|
||||
list2 = malloc(sizeof(*list) * list_size);
|
||||
if (!list2) {
|
||||
printf("Out of memory\n");
|
||||
exit(1);
|
||||
}
|
||||
qsort(list, list_size, sizeof(list[0]), compare_cull_condition);
|
||||
|
||||
printf("culling\n");
|
||||
|
||||
for (i = count = 0; i < list_size; i++) {
|
||||
if (count == 0 ||
|
||||
strcmp(list2[count-1].txt, list[i].txt) != 0) {
|
||||
list2[count++] = list[i];
|
||||
compare_cull_condition((void *)(&list[count-1]), (void *)(&list[i])) != 0) {
|
||||
list[count++] = list[i];
|
||||
} else {
|
||||
list2[count-1].num += list[i].num;
|
||||
list2[count-1].page_num += list[i].page_num;
|
||||
list[count-1].num += list[i].num;
|
||||
list[count-1].page_num += list[i].page_num;
|
||||
}
|
||||
}
|
||||
|
||||
if (sort_by_memory)
|
||||
qsort(list2, count, sizeof(list[0]), compare_page_num);
|
||||
else
|
||||
qsort(list2, count, sizeof(list[0]), compare_num);
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
fprintf(fout, "%d times, %d pages:\n%s\n",
|
||||
list2[i].num, list2[i].page_num, list2[i].txt);
|
||||
qsort(list, count, sizeof(list[0]), cmp);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
if (cull == 0)
|
||||
fprintf(fout, "%d times, %d pages:\n%s\n",
|
||||
list[i].num, list[i].page_num, list[i].txt);
|
||||
else {
|
||||
fprintf(fout, "%d times, %d pages",
|
||||
list[i].num, list[i].page_num);
|
||||
if (cull & CULL_PID || filter & FILTER_PID)
|
||||
fprintf(fout, ", PID %d", list[i].pid);
|
||||
if (cull & CULL_TGID || filter & FILTER_TGID)
|
||||
fprintf(fout, ", TGID %d", list[i].pid);
|
||||
if (cull & CULL_COMM || filter & FILTER_COMM)
|
||||
fprintf(fout, ", task_comm_name: %s", list[i].comm);
|
||||
if (cull & CULL_UNRELEASE)
|
||||
fprintf(fout, " (%s)",
|
||||
list[i].free_ts_nsec ? "UNRELEASED" : "RELEASED");
|
||||
if (cull & CULL_STACKTRACE)
|
||||
fprintf(fout, ":\n%s", list[i].stacktrace);
|
||||
fprintf(fout, "\n");
|
||||
}
|
||||
}
|
||||
regfree(&order_pattern);
|
||||
regfree(&pid_pattern);
|
||||
regfree(&tgid_pattern);
|
||||
regfree(&comm_pattern);
|
||||
regfree(&ts_nsec_pattern);
|
||||
regfree(&free_ts_nsec_pattern);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user