mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-19 14:56:21 +00:00
0ce20dd840
Patch series "KFENCE: A low-overhead sampling-based memory safety error detector", v7. This adds the Kernel Electric-Fence (KFENCE) infrastructure. KFENCE is a low-overhead sampling-based memory safety error detector of heap use-after-free, invalid-free, and out-of-bounds access errors. This series enables KFENCE for the x86 and arm64 architectures, and adds KFENCE hooks to the SLAB and SLUB allocators. KFENCE is designed to be enabled in production kernels, and has near zero performance overhead. Compared to KASAN, KFENCE trades performance for precision. The main motivation behind KFENCE's design, is that with enough total uptime KFENCE will detect bugs in code paths not typically exercised by non-production test workloads. One way to quickly achieve a large enough total uptime is when the tool is deployed across a large fleet of machines. KFENCE objects each reside on a dedicated page, at either the left or right page boundaries. The pages to the left and right of the object page are "guard pages", whose attributes are changed to a protected state, and cause page faults on any attempted access to them. Such page faults are then intercepted by KFENCE, which handles the fault gracefully by reporting a memory access error. Guarded allocations are set up based on a sample interval (can be set via kfence.sample_interval). After expiration of the sample interval, the next allocation through the main allocator (SLAB or SLUB) returns a guarded allocation from the KFENCE object pool. At this point, the timer is reset, and the next allocation is set up after the expiration of the interval. To enable/disable a KFENCE allocation through the main allocator's fast-path without overhead, KFENCE relies on static branches via the static keys infrastructure. The static branch is toggled to redirect the allocation to KFENCE. The KFENCE memory pool is of fixed size, and if the pool is exhausted no further KFENCE allocations occur. The default config is conservative with only 255 objects, resulting in a pool size of 2 MiB (with 4 KiB pages). We have verified by running synthetic benchmarks (sysbench I/O, hackbench) and production server-workload benchmarks that a kernel with KFENCE (using sample intervals 100-500ms) is performance-neutral compared to a non-KFENCE baseline kernel. KFENCE is inspired by GWP-ASan [1], a userspace tool with similar properties. The name "KFENCE" is a homage to the Electric Fence Malloc Debugger [2]. For more details, see Documentation/dev-tools/kfence.rst added in the series -- also viewable here: https://raw.githubusercontent.com/google/kasan/kfence/Documentation/dev-tools/kfence.rst [1] http://llvm.org/docs/GwpAsan.html [2] https://linux.die.net/man/3/efence This patch (of 9): This adds the Kernel Electric-Fence (KFENCE) infrastructure. KFENCE is a low-overhead sampling-based memory safety error detector of heap use-after-free, invalid-free, and out-of-bounds access errors. KFENCE is designed to be enabled in production kernels, and has near zero performance overhead. Compared to KASAN, KFENCE trades performance for precision. The main motivation behind KFENCE's design, is that with enough total uptime KFENCE will detect bugs in code paths not typically exercised by non-production test workloads. One way to quickly achieve a large enough total uptime is when the tool is deployed across a large fleet of machines. KFENCE objects each reside on a dedicated page, at either the left or right page boundaries. The pages to the left and right of the object page are "guard pages", whose attributes are changed to a protected state, and cause page faults on any attempted access to them. Such page faults are then intercepted by KFENCE, which handles the fault gracefully by reporting a memory access error. To detect out-of-bounds writes to memory within the object's page itself, KFENCE also uses pattern-based redzones. The following figure illustrates the page layout: ---+-----------+-----------+-----------+-----------+-----------+--- | xxxxxxxxx | O : | xxxxxxxxx | : O | xxxxxxxxx | | xxxxxxxxx | B : | xxxxxxxxx | : B | xxxxxxxxx | | x GUARD x | J : RED- | x GUARD x | RED- : J | x GUARD x | | xxxxxxxxx | E : ZONE | xxxxxxxxx | ZONE : E | xxxxxxxxx | | xxxxxxxxx | C : | xxxxxxxxx | : C | xxxxxxxxx | | xxxxxxxxx | T : | xxxxxxxxx | : T | xxxxxxxxx | ---+-----------+-----------+-----------+-----------+-----------+--- Guarded allocations are set up based on a sample interval (can be set via kfence.sample_interval). After expiration of the sample interval, a guarded allocation from the KFENCE object pool is returned to the main allocator (SLAB or SLUB). At this point, the timer is reset, and the next allocation is set up after the expiration of the interval. To enable/disable a KFENCE allocation through the main allocator's fast-path without overhead, KFENCE relies on static branches via the static keys infrastructure. The static branch is toggled to redirect the allocation to KFENCE. To date, we have verified by running synthetic benchmarks (sysbench I/O, hackbench) that a kernel compiled with KFENCE is performance-neutral compared to the non-KFENCE baseline. For more details, see Documentation/dev-tools/kfence.rst (added later in the series). [elver@google.com: fix parameter description for kfence_object_start()] Link: https://lkml.kernel.org/r/20201106092149.GA2851373@elver.google.com [elver@google.com: avoid stalling work queue task without allocations] Link: https://lkml.kernel.org/r/CADYN=9J0DQhizAGB0-jz4HOBBh+05kMBXb4c0cXMS7Qi5NAJiw@mail.gmail.com Link: https://lkml.kernel.org/r/20201110135320.3309507-1-elver@google.com [elver@google.com: fix potential deadlock due to wake_up()] Link: https://lkml.kernel.org/r/000000000000c0645805b7f982e4@google.com Link: https://lkml.kernel.org/r/20210104130749.1768991-1-elver@google.com [elver@google.com: add option to use KFENCE without static keys] Link: https://lkml.kernel.org/r/20210111091544.3287013-1-elver@google.com [elver@google.com: add missing copyright and description headers] Link: https://lkml.kernel.org/r/20210118092159.145934-1-elver@google.com Link: https://lkml.kernel.org/r/20201103175841.3495947-2-elver@google.com Signed-off-by: Marco Elver <elver@google.com> Signed-off-by: Alexander Potapenko <glider@google.com> Reviewed-by: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: SeongJae Park <sjpark@amazon.de> Co-developed-by: Marco Elver <elver@google.com> Reviewed-by: Jann Horn <jannh@google.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Andrey Konovalov <andreyknvl@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christopher Lameter <cl@linux.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hillf Danton <hdanton@sina.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Joern Engel <joern@purestorage.com> Cc: Kees Cook <keescook@chromium.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
123 lines
4.3 KiB
Makefile
123 lines
4.3 KiB
Makefile
# SPDX-License-Identifier: GPL-2.0
|
|
#
|
|
# Makefile for the linux memory manager.
|
|
#
|
|
|
|
KASAN_SANITIZE_slab_common.o := n
|
|
KASAN_SANITIZE_slab.o := n
|
|
KASAN_SANITIZE_slub.o := n
|
|
KCSAN_SANITIZE_kmemleak.o := n
|
|
|
|
# These produce frequent data race reports: most of them are due to races on
|
|
# the same word but accesses to different bits of that word. Re-enable KCSAN
|
|
# for these when we have more consensus on what to do about them.
|
|
KCSAN_SANITIZE_slab_common.o := n
|
|
KCSAN_SANITIZE_slab.o := n
|
|
KCSAN_SANITIZE_slub.o := n
|
|
KCSAN_SANITIZE_page_alloc.o := n
|
|
|
|
# These files are disabled because they produce non-interesting and/or
|
|
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
|
|
# free pages, or a task is migrated between nodes.
|
|
KCOV_INSTRUMENT_slab_common.o := n
|
|
KCOV_INSTRUMENT_slob.o := n
|
|
KCOV_INSTRUMENT_slab.o := n
|
|
KCOV_INSTRUMENT_slub.o := n
|
|
KCOV_INSTRUMENT_page_alloc.o := n
|
|
KCOV_INSTRUMENT_debug-pagealloc.o := n
|
|
KCOV_INSTRUMENT_kmemleak.o := n
|
|
KCOV_INSTRUMENT_memcontrol.o := n
|
|
KCOV_INSTRUMENT_mmzone.o := n
|
|
KCOV_INSTRUMENT_vmstat.o := n
|
|
KCOV_INSTRUMENT_failslab.o := n
|
|
|
|
CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
|
|
CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
|
|
|
|
mmu-y := nommu.o
|
|
mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
|
|
mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
|
|
msync.o page_vma_mapped.o pagewalk.o \
|
|
pgtable-generic.o rmap.o vmalloc.o ioremap.o
|
|
|
|
|
|
ifdef CONFIG_CROSS_MEMORY_ATTACH
|
|
mmu-$(CONFIG_MMU) += process_vm_access.o
|
|
endif
|
|
|
|
obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
|
|
maccess.o page-writeback.o \
|
|
readahead.o swap.o truncate.o vmscan.o shmem.o \
|
|
util.o mmzone.o vmstat.o backing-dev.o \
|
|
mm_init.o percpu.o slab_common.o \
|
|
compaction.o vmacache.o \
|
|
interval_tree.o list_lru.o workingset.o \
|
|
debug.o gup.o mmap_lock.o $(mmu-y)
|
|
|
|
# Give 'page_alloc' its own module-parameter namespace
|
|
page-alloc-y := page_alloc.o
|
|
page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
|
|
|
|
obj-y += page-alloc.o
|
|
obj-y += init-mm.o
|
|
obj-y += memblock.o
|
|
|
|
ifdef CONFIG_MMU
|
|
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
|
endif
|
|
|
|
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o swap_slots.o
|
|
obj-$(CONFIG_FRONTSWAP) += frontswap.o
|
|
obj-$(CONFIG_ZSWAP) += zswap.o
|
|
obj-$(CONFIG_HAS_DMA) += dmapool.o
|
|
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
|
|
obj-$(CONFIG_NUMA) += mempolicy.o
|
|
obj-$(CONFIG_SPARSEMEM) += sparse.o
|
|
obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
|
|
obj-$(CONFIG_SLOB) += slob.o
|
|
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
|
|
obj-$(CONFIG_KSM) += ksm.o
|
|
obj-$(CONFIG_PAGE_POISONING) += page_poison.o
|
|
obj-$(CONFIG_SLAB) += slab.o
|
|
obj-$(CONFIG_SLUB) += slub.o
|
|
obj-$(CONFIG_KASAN) += kasan/
|
|
obj-$(CONFIG_KFENCE) += kfence/
|
|
obj-$(CONFIG_FAILSLAB) += failslab.o
|
|
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
|
obj-$(CONFIG_MEMTEST) += memtest.o
|
|
obj-$(CONFIG_MIGRATION) += migrate.o
|
|
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
|
|
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
|
|
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
|
|
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
|
|
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
|
|
obj-$(CONFIG_GUP_TEST) += gup_test.o
|
|
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
|
|
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
|
|
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
|
|
obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
|
|
obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o
|
|
obj-$(CONFIG_PAGE_OWNER) += page_owner.o
|
|
obj-$(CONFIG_CLEANCACHE) += cleancache.o
|
|
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
|
|
obj-$(CONFIG_ZPOOL) += zpool.o
|
|
obj-$(CONFIG_ZBUD) += zbud.o
|
|
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
|
|
obj-$(CONFIG_Z3FOLD) += z3fold.o
|
|
obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
|
|
obj-$(CONFIG_CMA) += cma.o
|
|
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
|
|
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
|
|
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
|
|
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
|
|
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
|
|
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
|
|
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
|
|
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
|
|
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
|
|
obj-$(CONFIG_HMM_MIRROR) += hmm.o
|
|
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
|
|
obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
|
|
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
|
|
obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
|