mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 02:14:58 +00:00
e7ac4daeed
When the proportion of folios from the zeromap is small, missing their accounting may not significantly impact profiling. However, it's easy to construct a scenario where this becomes an issue—for example, allocating 1 GB of memory, writing zeros from userspace, followed by MADV_PAGEOUT, and then swapping it back in. In this case, the swap-out and swap-in counts seem to vanish into a black hole, potentially causing semantic ambiguity. On the other hand, Usama reported that zero-filled pages can exceed 10% in workloads utilizing zswap, while Hailong noted that some app in Android have more than 6% zero-filled pages. Before commit 0ca0c24e3211 ("mm: store zero pages to be swapped out in a bitmap"), both zswap and zRAM implemented similar optimizations, leading to these optimized-out pages being counted in either zswap or zRAM counters (with pswpin/pswpout also increasing for zRAM). With zeromap functioning prior to both zswap and zRAM, userspace will no longer detect these swap-out and swap-in actions. We have three ways to address this: 1. Introduce a dedicated counter specifically for the zeromap. 2. Use pswpin/pswpout accounting, treating the zero map as a standard backend. This approach aligns with zRAM's current handling of same-page fills at the device level. However, it would mean losing the optimized-out page counters previously available in zRAM and would not align with systems using zswap. Additionally, as noted by Nhat Pham, pswpin/pswpout counters apply only to I/O done directly to the backend device. 3. Count zeromap pages under zswap, aligning with system behavior when zswap is enabled. However, this would not be consistent with zRAM, nor would it align with systems lacking both zswap and zRAM. Given the complications with options 2 and 3, this patch selects option 1. We can find these counters from /proc/vmstat (counters for the whole system) and memcg's memory.stat (counters for the interested memcg). For example: $ grep -E 'swpin_zero|swpout_zero' /proc/vmstat swpin_zero 1648 swpout_zero 33536 $ grep -E 'swpin_zero|swpout_zero' /sys/fs/cgroup/system.slice/memory.stat swpin_zero 3905 swpout_zero 3985 This patch does not address any specific zeromap bug, but the missing swpout and swpin counts for zero-filled pages can be highly confusing and may mislead user-space agents that rely on changes in these counters as indicators. Therefore, we add a Fixes tag to encourage the inclusion of this counter in any kernel versions with zeromap. Many thanks to Kanchana for the contribution of changing count_objcg_event() to count_objcg_events() to support large folios[1], which has now been incorporated into this patch. [1] https://lkml.kernel.org/r/20241001053222.6944-5-kanchana.p.sridhar@intel.com Link: https://lkml.kernel.org/r/20241107011246.59137-1-21cnbao@gmail.com Fixes: 0ca0c24e3211 ("mm: store zero pages to be swapped out in a bitmap") Co-developed-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com> Signed-off-by: Barry Song <v-songbaohua@oppo.com> Reviewed-by: Nhat Pham <nphamcs@gmail.com> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Usama Arif <usamaarif642@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Cc: Hailong Liu <hailong.liu@oppo.com> Cc: David Hildenbrand <david@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Shakeel Butt <shakeel.butt@linux.dev> Cc: Andi Kleen <ak@linux.intel.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Chris Li <chrisl@kernel.org> Cc: "Huang, Ying" <ying.huang@intel.com> Cc: Kairui Song <kasong@tencent.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
196 lines
4.3 KiB
C
196 lines
4.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef VM_EVENT_ITEM_H_INCLUDED
|
|
#define VM_EVENT_ITEM_H_INCLUDED
|
|
|
|
#ifdef CONFIG_ZONE_DMA
|
|
#define DMA_ZONE(xx) xx##_DMA,
|
|
#else
|
|
#define DMA_ZONE(xx)
|
|
#endif
|
|
|
|
#ifdef CONFIG_ZONE_DMA32
|
|
#define DMA32_ZONE(xx) xx##_DMA32,
|
|
#else
|
|
#define DMA32_ZONE(xx)
|
|
#endif
|
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
#define HIGHMEM_ZONE(xx) xx##_HIGH,
|
|
#else
|
|
#define HIGHMEM_ZONE(xx)
|
|
#endif
|
|
|
|
#ifdef CONFIG_ZONE_DEVICE
|
|
#define DEVICE_ZONE(xx) xx##_DEVICE,
|
|
#else
|
|
#define DEVICE_ZONE(xx)
|
|
#endif
|
|
|
|
#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \
|
|
HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx)
|
|
|
|
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
|
FOR_ALL_ZONES(PGALLOC)
|
|
FOR_ALL_ZONES(ALLOCSTALL)
|
|
FOR_ALL_ZONES(PGSCAN_SKIP)
|
|
PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
|
|
PGFAULT, PGMAJFAULT,
|
|
PGLAZYFREED,
|
|
PGREFILL,
|
|
PGREUSE,
|
|
PGSTEAL_KSWAPD,
|
|
PGSTEAL_DIRECT,
|
|
PGSTEAL_KHUGEPAGED,
|
|
PGSCAN_KSWAPD,
|
|
PGSCAN_DIRECT,
|
|
PGSCAN_KHUGEPAGED,
|
|
PGSCAN_DIRECT_THROTTLE,
|
|
PGSCAN_ANON,
|
|
PGSCAN_FILE,
|
|
PGSTEAL_ANON,
|
|
PGSTEAL_FILE,
|
|
#ifdef CONFIG_NUMA
|
|
PGSCAN_ZONE_RECLAIM_SUCCESS,
|
|
PGSCAN_ZONE_RECLAIM_FAILED,
|
|
#endif
|
|
PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
|
|
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
|
|
PAGEOUTRUN, PGROTATED,
|
|
DROP_PAGECACHE, DROP_SLAB,
|
|
OOM_KILL,
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
NUMA_PTE_UPDATES,
|
|
NUMA_HUGE_PTE_UPDATES,
|
|
NUMA_HINT_FAULTS,
|
|
NUMA_HINT_FAULTS_LOCAL,
|
|
NUMA_PAGE_MIGRATE,
|
|
#endif
|
|
#ifdef CONFIG_MIGRATION
|
|
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
|
|
THP_MIGRATION_SUCCESS,
|
|
THP_MIGRATION_FAIL,
|
|
THP_MIGRATION_SPLIT,
|
|
#endif
|
|
#ifdef CONFIG_COMPACTION
|
|
COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
|
|
COMPACTISOLATED,
|
|
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
|
|
KCOMPACTD_WAKE,
|
|
KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED,
|
|
#endif
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
|
|
#endif
|
|
#ifdef CONFIG_CMA
|
|
CMA_ALLOC_SUCCESS,
|
|
CMA_ALLOC_FAIL,
|
|
#endif
|
|
UNEVICTABLE_PGCULLED, /* culled to noreclaim list */
|
|
UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */
|
|
UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */
|
|
UNEVICTABLE_PGMLOCKED,
|
|
UNEVICTABLE_PGMUNLOCKED,
|
|
UNEVICTABLE_PGCLEARED, /* on COW, page truncate */
|
|
UNEVICTABLE_PGSTRANDED, /* unable to isolate on unlock */
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
THP_FAULT_ALLOC,
|
|
THP_FAULT_FALLBACK,
|
|
THP_FAULT_FALLBACK_CHARGE,
|
|
THP_COLLAPSE_ALLOC,
|
|
THP_COLLAPSE_ALLOC_FAILED,
|
|
THP_FILE_ALLOC,
|
|
THP_FILE_FALLBACK,
|
|
THP_FILE_FALLBACK_CHARGE,
|
|
THP_FILE_MAPPED,
|
|
THP_SPLIT_PAGE,
|
|
THP_SPLIT_PAGE_FAILED,
|
|
THP_DEFERRED_SPLIT_PAGE,
|
|
THP_UNDERUSED_SPLIT_PAGE,
|
|
THP_SPLIT_PMD,
|
|
THP_SCAN_EXCEED_NONE_PTE,
|
|
THP_SCAN_EXCEED_SWAP_PTE,
|
|
THP_SCAN_EXCEED_SHARED_PTE,
|
|
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
|
THP_SPLIT_PUD,
|
|
#endif
|
|
THP_ZERO_PAGE_ALLOC,
|
|
THP_ZERO_PAGE_ALLOC_FAILED,
|
|
THP_SWPOUT,
|
|
THP_SWPOUT_FALLBACK,
|
|
#endif
|
|
#ifdef CONFIG_MEMORY_BALLOON
|
|
BALLOON_INFLATE,
|
|
BALLOON_DEFLATE,
|
|
#ifdef CONFIG_BALLOON_COMPACTION
|
|
BALLOON_MIGRATE,
|
|
#endif
|
|
#endif
|
|
#ifdef CONFIG_DEBUG_TLBFLUSH
|
|
NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */
|
|
NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
|
|
NR_TLB_LOCAL_FLUSH_ALL,
|
|
NR_TLB_LOCAL_FLUSH_ONE,
|
|
#endif /* CONFIG_DEBUG_TLBFLUSH */
|
|
#ifdef CONFIG_SWAP
|
|
SWAP_RA,
|
|
SWAP_RA_HIT,
|
|
SWPIN_ZERO,
|
|
SWPOUT_ZERO,
|
|
#ifdef CONFIG_KSM
|
|
KSM_SWPIN_COPY,
|
|
#endif
|
|
#endif
|
|
#ifdef CONFIG_KSM
|
|
COW_KSM,
|
|
#endif
|
|
#ifdef CONFIG_ZSWAP
|
|
ZSWPIN,
|
|
ZSWPOUT,
|
|
ZSWPWB,
|
|
#endif
|
|
#ifdef CONFIG_X86
|
|
DIRECT_MAP_LEVEL2_SPLIT,
|
|
DIRECT_MAP_LEVEL3_SPLIT,
|
|
#endif
|
|
#ifdef CONFIG_PER_VMA_LOCK_STATS
|
|
VMA_LOCK_SUCCESS,
|
|
VMA_LOCK_ABORT,
|
|
VMA_LOCK_RETRY,
|
|
VMA_LOCK_MISS,
|
|
#endif
|
|
#ifdef CONFIG_DEBUG_STACK_USAGE
|
|
KSTACK_1K,
|
|
#if THREAD_SIZE > 1024
|
|
KSTACK_2K,
|
|
#endif
|
|
#if THREAD_SIZE > 2048
|
|
KSTACK_4K,
|
|
#endif
|
|
#if THREAD_SIZE > 4096
|
|
KSTACK_8K,
|
|
#endif
|
|
#if THREAD_SIZE > 8192
|
|
KSTACK_16K,
|
|
#endif
|
|
#if THREAD_SIZE > 16384
|
|
KSTACK_32K,
|
|
#endif
|
|
#if THREAD_SIZE > 32768
|
|
KSTACK_64K,
|
|
#endif
|
|
#if THREAD_SIZE > 65536
|
|
KSTACK_REST,
|
|
#endif
|
|
#endif /* CONFIG_DEBUG_STACK_USAGE */
|
|
NR_VM_EVENT_ITEMS
|
|
};
|
|
|
|
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
|
#define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
|
|
#define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; })
|
|
#define THP_FILE_FALLBACK_CHARGE ({ BUILD_BUG(); 0; })
|
|
#define THP_FILE_MAPPED ({ BUILD_BUG(); 0; })
|
|
#endif
|
|
|
|
#endif /* VM_EVENT_ITEM_H_INCLUDED */
|