mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 18:26:42 +00:00
slab updates for 6.8
-----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmWWu9EACgkQu+CwddJF iJpXvQf/aGL7uEY57VpTm0t4gPwoZ9r2P89HxI/nQs9XgVzDcBmVp/cC0LDvSdcm t91kJO538KeGjMgvlhLMTEuoShH5FlPs6cOwrGAYUoAGa4NwiOpGvliGky+nNHqY w887ZgSzVLq0UOuSvn86N6enumMvewt4V+872+OWo6O1HWOJhC0SgHTIa8QPQtwb yZ9BghO5IqMRXiZEsSIwyO+tQHcaU6l2G5huFXzgMFUhkQqAB9KTFc3h6rYI+i80 L4ppNXo2KNPGTDRb9dA8LNMWgvmfjhCb7chs8o1zSY2PwZlkzOix7EUBLCAIbc/2 EIaFC8AsZjfT47D1t72r8QpHB+C14Q== =J+E7 -----END PGP SIGNATURE----- Merge tag 'slab-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab Pull slab updates from Vlastimil Babka: - SLUB: delayed freezing of CPU partial slabs (Chengming Zhou) Freezing is an operation involving double_cmpxchg() that makes a slab exclusive for a particular CPU. Chengming noticed that we use it also in situations where we are not yet installing the slab as the CPU slab, because freezing also indicates that the slab is not on the shared list. This results in redundant freeze/unfreeze operation and can be avoided by marking separately the shared list presence by reusing the PG_workingset flag. This approach neatly avoids the issues described in 9b1ea29bc0d7 ("Revert "mm, slub: consider rest of partial list if acquire_slab() fails"") as we can now grab a slab from the shared list in a quick and guaranteed way without the cmpxchg_double() operation that amplifies the lock contention and can fail. As a result, lkp has reported 34.2% improvement of stress-ng.rawudp.ops_per_sec - SLAB removal and SLUB cleanups (Vlastimil Babka) The SLAB allocator has been deprecated since 6.5 and nobody has objected so far. We agreed at LSF/MM to wait until the next LTS, which is 6.6, so we should be good to go now. This doesn't yet erase all traces of SLAB outside of mm/ so some dead code, comments or documentation remain, and will be cleaned up gradually (some series are already in the works). Removing the choice of allocators has already allowed to simplify and optimize the code wiring up the kmalloc APIs to the SLUB implementation. * tag 'slab-for-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: (34 commits) mm/slub: free KFENCE objects in slab_free_hook() mm/slub: handle bulk and single object freeing separately mm/slub: introduce __kmem_cache_free_bulk() without free hooks mm/slub: fix bulk alloc and free stats mm/slub: optimize free fast path code layout mm/slub: optimize alloc fastpath code layout mm/slub: remove slab_alloc() and __kmem_cache_alloc_lru() wrappers mm/slab: move kmalloc() functions from slab_common.c to slub.c mm/slab: move kmalloc_slab() to mm/slab.h mm/slab: move kfree() from slab_common.c to slub.c mm/slab: move struct kmem_cache_node from slab.h to slub.c mm/slab: move memcg related functions from slab.h to slub.c mm/slab: move pre/post-alloc hooks from slab.h to slub.c mm/slab: consolidate includes in the internal mm/slab.h mm/slab: move the rest of slub_def.h to mm/slab.h mm/slab: move struct kmem_cache_cpu declaration to slub.c mm/slab: remove mm/slab.c and slab_def.h mm/mempool/dmapool: remove CONFIG_DEBUG_SLAB ifdefs mm/slab: remove CONFIG_SLAB code from slab common code cpu/hotplug: remove CPUHP_SLAB_PREPARE hooks ...
This commit is contained in:
commit
d30e51aa7b
12
CREDITS
12
CREDITS
@ -9,10 +9,6 @@
|
||||
Linus
|
||||
----------
|
||||
|
||||
N: Matt Mackal
|
||||
E: mpm@selenic.com
|
||||
D: SLOB slab allocator
|
||||
|
||||
N: Matti Aarnio
|
||||
E: mea@nic.funet.fi
|
||||
D: Alpha systems hacking, IPv6 and other network related stuff
|
||||
@ -1572,6 +1568,10 @@ S: Ampferstr. 50 / 4
|
||||
S: 6020 Innsbruck
|
||||
S: Austria
|
||||
|
||||
N: Mark Hemment
|
||||
E: markhe@nextd.demon.co.uk
|
||||
D: SLAB allocator implementation
|
||||
|
||||
N: Richard Henderson
|
||||
E: rth@twiddle.net
|
||||
E: rth@cygnus.com
|
||||
@ -2445,6 +2445,10 @@ D: work on suspend-to-ram/disk, killing duplicates from ioctl32,
|
||||
D: Altera SoCFPGA and Nokia N900 support.
|
||||
S: Czech Republic
|
||||
|
||||
N: Olivia Mackall
|
||||
E: olivia@selenic.com
|
||||
D: SLOB slab allocator
|
||||
|
||||
N: Paul Mackerras
|
||||
E: paulus@samba.org
|
||||
D: PPP driver
|
||||
|
@ -37,7 +37,7 @@ The Slab Cache
|
||||
.. kernel-doc:: include/linux/slab.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: mm/slab.c
|
||||
.. kernel-doc:: mm/slub.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: mm/slab_common.c
|
||||
|
@ -154,7 +154,7 @@ config ARM64
|
||||
select HAVE_MOVE_PUD
|
||||
select HAVE_PCI
|
||||
select HAVE_ACPI_APEI if (ACPI && EFI)
|
||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ALIGNED_STRUCT_PAGE
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_BITREVERSE
|
||||
select HAVE_ARCH_COMPILER_H
|
||||
|
@ -146,7 +146,7 @@ config S390
|
||||
select GENERIC_TIME_VSYSCALL
|
||||
select GENERIC_VDSO_TIME_NS
|
||||
select GENERIC_IOREMAP if PCI
|
||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ALIGNED_STRUCT_PAGE
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_JUMP_LABEL_RELATIVE
|
||||
|
@ -169,7 +169,7 @@ config X86
|
||||
select HAS_IOPORT
|
||||
select HAVE_ACPI_APEI if ACPI
|
||||
select HAVE_ACPI_APEI_NMI if ACPI
|
||||
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
|
||||
select HAVE_ALIGNED_STRUCT_PAGE
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
|
||||
select HAVE_ARCH_HUGE_VMALLOC if X86_64
|
||||
|
@ -104,7 +104,6 @@ enum cpuhp_state {
|
||||
CPUHP_X2APIC_PREPARE,
|
||||
CPUHP_SMPCFD_PREPARE,
|
||||
CPUHP_RELAY_PREPARE,
|
||||
CPUHP_SLAB_PREPARE,
|
||||
CPUHP_MD_RAID5_PREPARE,
|
||||
CPUHP_RCUTREE_PREP,
|
||||
CPUHP_CPUIDLE_COUPLED_PREPARE,
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
/*
|
||||
* Flags to pass to kmem_cache_create().
|
||||
* The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set.
|
||||
* The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op
|
||||
*/
|
||||
/* DEBUG: Perform (expensive) checks on alloc/free */
|
||||
#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U)
|
||||
@ -302,25 +302,15 @@ static inline unsigned int arch_slab_minalign(void)
|
||||
* Kmalloc array related definitions
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
/*
|
||||
* SLAB and SLUB directly allocates requests fitting in to an order-1 page
|
||||
* SLUB directly allocates requests fitting in to an order-1 page
|
||||
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
||||
*/
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 5
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Maximum allocatable size */
|
||||
#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
|
||||
@ -788,12 +778,4 @@ size_t kmalloc_size_roundup(size_t size);
|
||||
|
||||
void __init kmem_cache_init_late(void);
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
|
||||
int slab_prepare_cpu(unsigned int cpu);
|
||||
int slab_dead_cpu(unsigned int cpu);
|
||||
#else
|
||||
#define slab_prepare_cpu NULL
|
||||
#define slab_dead_cpu NULL
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SLAB_H */
|
||||
|
@ -1,124 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SLAB_DEF_H
|
||||
#define _LINUX_SLAB_DEF_H
|
||||
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
|
||||
/*
|
||||
* Definitions unique to the original Linux SLAB allocator.
|
||||
*/
|
||||
|
||||
struct kmem_cache {
|
||||
struct array_cache __percpu *cpu_cache;
|
||||
|
||||
/* 1) Cache tunables. Protected by slab_mutex */
|
||||
unsigned int batchcount;
|
||||
unsigned int limit;
|
||||
unsigned int shared;
|
||||
|
||||
unsigned int size;
|
||||
struct reciprocal_value reciprocal_buffer_size;
|
||||
/* 2) touched by every alloc & free from the backend */
|
||||
|
||||
slab_flags_t flags; /* constant flags */
|
||||
unsigned int num; /* # of objs per slab */
|
||||
|
||||
/* 3) cache_grow/shrink */
|
||||
/* order of pgs per slab (2^n) */
|
||||
unsigned int gfporder;
|
||||
|
||||
/* force GFP flags, e.g. GFP_DMA */
|
||||
gfp_t allocflags;
|
||||
|
||||
size_t colour; /* cache colouring range */
|
||||
unsigned int colour_off; /* colour offset */
|
||||
unsigned int freelist_size;
|
||||
|
||||
/* constructor func */
|
||||
void (*ctor)(void *obj);
|
||||
|
||||
/* 4) cache creation/removal */
|
||||
const char *name;
|
||||
struct list_head list;
|
||||
int refcount;
|
||||
int object_size;
|
||||
int align;
|
||||
|
||||
/* 5) statistics */
|
||||
#ifdef CONFIG_DEBUG_SLAB
|
||||
unsigned long num_active;
|
||||
unsigned long num_allocations;
|
||||
unsigned long high_mark;
|
||||
unsigned long grown;
|
||||
unsigned long reaped;
|
||||
unsigned long errors;
|
||||
unsigned long max_freeable;
|
||||
unsigned long node_allocs;
|
||||
unsigned long node_frees;
|
||||
unsigned long node_overflow;
|
||||
atomic_t allochit;
|
||||
atomic_t allocmiss;
|
||||
atomic_t freehit;
|
||||
atomic_t freemiss;
|
||||
|
||||
/*
|
||||
* If debugging is enabled, then the allocator can add additional
|
||||
* fields and/or padding to every object. 'size' contains the total
|
||||
* object size including these internal fields, while 'obj_offset'
|
||||
* and 'object_size' contain the offset to the user object and its
|
||||
* size.
|
||||
*/
|
||||
int obj_offset;
|
||||
#endif /* CONFIG_DEBUG_SLAB */
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
unsigned int useroffset; /* Usercopy region offset */
|
||||
unsigned int usersize; /* Usercopy region size */
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
|
||||
void *x)
|
||||
{
|
||||
void *object = x - (x - slab->s_mem) % cache->size;
|
||||
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
|
||||
|
||||
if (unlikely(object > last_object))
|
||||
return last_object;
|
||||
else
|
||||
return object;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to avoid an expensive divide : (offset / cache->size)
|
||||
* Using the fact that size is a constant for a particular cache,
|
||||
* we can replace (offset / cache->size) by
|
||||
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
|
||||
*/
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct slab *slab, void *obj)
|
||||
{
|
||||
u32 offset = (obj - slab->s_mem);
|
||||
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
|
||||
}
|
||||
|
||||
static inline int objs_per_slab(const struct kmem_cache *cache,
|
||||
const struct slab *slab)
|
||||
{
|
||||
if (is_kfence_address(slab_address(slab)))
|
||||
return 1;
|
||||
return cache->num;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_SLAB_DEF_H */
|
@ -1,204 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_SLUB_DEF_H
|
||||
#define _LINUX_SLUB_DEF_H
|
||||
|
||||
/*
|
||||
* SLUB : A Slab allocator without object queues.
|
||||
*
|
||||
* (C) 2007 SGI, Christoph Lameter
|
||||
*/
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
#include <linux/local_lock.h>
|
||||
|
||||
enum stat_item {
|
||||
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
||||
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
||||
FREE_FASTPATH, /* Free to cpu slab */
|
||||
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
||||
FREE_FROZEN, /* Freeing to frozen slab */
|
||||
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
||||
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
||||
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
||||
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
||||
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
||||
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
||||
FREE_SLAB, /* Slab freed to the page allocator */
|
||||
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
||||
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
||||
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
||||
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
||||
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
||||
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
||||
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
||||
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
||||
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
|
||||
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
|
||||
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
||||
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
||||
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
||||
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
||||
NR_SLUB_STAT_ITEMS
|
||||
};
|
||||
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
/*
|
||||
* When changing the layout, make sure freelist and tid are still compatible
|
||||
* with this_cpu_cmpxchg_double() alignment requirements.
|
||||
*/
|
||||
struct kmem_cache_cpu {
|
||||
union {
|
||||
struct {
|
||||
void **freelist; /* Pointer to next available object */
|
||||
unsigned long tid; /* Globally unique transaction id */
|
||||
};
|
||||
freelist_aba_t freelist_tid;
|
||||
};
|
||||
struct slab *slab; /* The slab from which we are allocating */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
struct slab *partial; /* Partially allocated frozen slabs */
|
||||
#endif
|
||||
local_lock_t lock; /* Protects the fields above */
|
||||
#ifdef CONFIG_SLUB_STATS
|
||||
unsigned stat[NR_SLUB_STAT_ITEMS];
|
||||
#endif
|
||||
};
|
||||
#endif /* CONFIG_SLUB_TINY */
|
||||
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
#define slub_percpu_partial(c) ((c)->partial)
|
||||
|
||||
#define slub_set_percpu_partial(c, p) \
|
||||
({ \
|
||||
slub_percpu_partial(c) = (p)->next; \
|
||||
})
|
||||
|
||||
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
|
||||
#else
|
||||
#define slub_percpu_partial(c) NULL
|
||||
|
||||
#define slub_set_percpu_partial(c, p)
|
||||
|
||||
#define slub_percpu_partial_read_once(c) NULL
|
||||
#endif // CONFIG_SLUB_CPU_PARTIAL
|
||||
|
||||
/*
|
||||
* Word size structure that can be atomically updated or read and that
|
||||
* contains both the order and the number of objects that a slab of the
|
||||
* given order would contain.
|
||||
*/
|
||||
struct kmem_cache_order_objects {
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
/*
|
||||
* Slab cache management.
|
||||
*/
|
||||
struct kmem_cache {
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
struct kmem_cache_cpu __percpu *cpu_slab;
|
||||
#endif
|
||||
/* Used for retrieving partial slabs, etc. */
|
||||
slab_flags_t flags;
|
||||
unsigned long min_partial;
|
||||
unsigned int size; /* The size of an object including metadata */
|
||||
unsigned int object_size;/* The size of an object without metadata */
|
||||
struct reciprocal_value reciprocal_size;
|
||||
unsigned int offset; /* Free pointer offset */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
/* Number of per cpu partial objects to keep around */
|
||||
unsigned int cpu_partial;
|
||||
/* Number of per cpu partial slabs to keep around */
|
||||
unsigned int cpu_partial_slabs;
|
||||
#endif
|
||||
struct kmem_cache_order_objects oo;
|
||||
|
||||
/* Allocation and freeing of slabs */
|
||||
struct kmem_cache_order_objects min;
|
||||
gfp_t allocflags; /* gfp flags to use on each alloc */
|
||||
int refcount; /* Refcount for slab cache destroy */
|
||||
void (*ctor)(void *);
|
||||
unsigned int inuse; /* Offset to metadata */
|
||||
unsigned int align; /* Alignment */
|
||||
unsigned int red_left_pad; /* Left redzone padding size */
|
||||
const char *name; /* Name (only for display!) */
|
||||
struct list_head list; /* List of slab caches */
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_SLAB_FREELIST_HARDENED
|
||||
unsigned long random;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* Defragmentation by allocating from a remote node.
|
||||
*/
|
||||
unsigned int remote_node_defrag_ratio;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
unsigned int useroffset; /* Usercopy region offset */
|
||||
unsigned int usersize; /* Usercopy region size */
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
|
||||
#define SLAB_SUPPORTS_SYSFS
|
||||
void sysfs_slab_unlink(struct kmem_cache *);
|
||||
void sysfs_slab_release(struct kmem_cache *);
|
||||
#else
|
||||
static inline void sysfs_slab_unlink(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
static inline void sysfs_slab_release(struct kmem_cache *s)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void *fixup_red_left(struct kmem_cache *s, void *p);
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
|
||||
void *x) {
|
||||
void *object = x - (x - slab_address(slab)) % cache->size;
|
||||
void *last_object = slab_address(slab) +
|
||||
(slab->objects - 1) * cache->size;
|
||||
void *result = (unlikely(object > last_object)) ? last_object : object;
|
||||
|
||||
result = fixup_red_left(cache, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Determine object index from a given position */
|
||||
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
|
||||
void *addr, void *obj)
|
||||
{
|
||||
return reciprocal_divide(kasan_reset_tag(obj) - addr,
|
||||
cache->reciprocal_size);
|
||||
}
|
||||
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct slab *slab, void *obj)
|
||||
{
|
||||
if (is_kfence_address(obj))
|
||||
return 0;
|
||||
return __obj_to_index(cache, slab_address(slab), obj);
|
||||
}
|
||||
|
||||
static inline int objs_per_slab(const struct kmem_cache *cache,
|
||||
const struct slab *slab)
|
||||
{
|
||||
return slab->objects;
|
||||
}
|
||||
#endif /* _LINUX_SLUB_DEF_H */
|
@ -2125,11 +2125,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
|
||||
.startup.single = relay_prepare_cpu,
|
||||
.teardown.single = NULL,
|
||||
},
|
||||
[CPUHP_SLAB_PREPARE] = {
|
||||
.name = "slab:prepare",
|
||||
.startup.single = slab_prepare_cpu,
|
||||
.teardown.single = slab_dead_cpu,
|
||||
},
|
||||
[CPUHP_RCUTREE_PREP] = {
|
||||
.name = "RCU/tree:prepare",
|
||||
.startup.single = rcutree_prepare_cpu,
|
||||
|
@ -1970,7 +1970,6 @@ config FAULT_INJECTION
|
||||
config FAILSLAB
|
||||
bool "Fault-injection capability for kmalloc"
|
||||
depends on FAULT_INJECTION
|
||||
depends on SLAB || SLUB
|
||||
help
|
||||
Provide fault-injection capability for kmalloc.
|
||||
|
||||
|
@ -37,7 +37,7 @@ menuconfig KASAN
|
||||
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
|
||||
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
|
||||
HAVE_ARCH_KASAN_HW_TAGS
|
||||
depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
|
||||
depends on SYSFS && !SLUB_TINY
|
||||
select STACKDEPOT_ALWAYS_INIT
|
||||
help
|
||||
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
|
||||
@ -78,7 +78,7 @@ config KASAN_GENERIC
|
||||
bool "Generic KASAN"
|
||||
depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
|
||||
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
|
||||
select SLUB_DEBUG if SLUB
|
||||
select SLUB_DEBUG
|
||||
select CONSTRUCTORS
|
||||
help
|
||||
Enables Generic KASAN.
|
||||
@ -89,13 +89,11 @@ config KASAN_GENERIC
|
||||
overhead of ~50% for dynamic allocations.
|
||||
The performance slowdown is ~x3.
|
||||
|
||||
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
|
||||
|
||||
config KASAN_SW_TAGS
|
||||
bool "Software Tag-Based KASAN"
|
||||
depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
|
||||
depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
|
||||
select SLUB_DEBUG if SLUB
|
||||
select SLUB_DEBUG
|
||||
select CONSTRUCTORS
|
||||
help
|
||||
Enables Software Tag-Based KASAN.
|
||||
@ -110,12 +108,9 @@ config KASAN_SW_TAGS
|
||||
May potentially introduce problems related to pointer casting and
|
||||
comparison, as it embeds a tag into the top byte of each pointer.
|
||||
|
||||
(Incompatible with CONFIG_DEBUG_SLAB: the kernel does not boot.)
|
||||
|
||||
config KASAN_HW_TAGS
|
||||
bool "Hardware Tag-Based KASAN"
|
||||
depends on HAVE_ARCH_KASAN_HW_TAGS
|
||||
depends on SLUB
|
||||
help
|
||||
Enables Hardware Tag-Based KASAN.
|
||||
|
||||
|
@ -5,7 +5,7 @@ config HAVE_ARCH_KFENCE
|
||||
|
||||
menuconfig KFENCE
|
||||
bool "KFENCE: low-overhead sampling-based memory safety error detector"
|
||||
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
|
||||
depends on HAVE_ARCH_KFENCE
|
||||
select STACKTRACE
|
||||
select IRQ_WORK
|
||||
help
|
||||
|
@ -11,7 +11,7 @@ config HAVE_KMSAN_COMPILER
|
||||
config KMSAN
|
||||
bool "KMSAN: detector of uninitialized values use"
|
||||
depends on HAVE_ARCH_KMSAN && HAVE_KMSAN_COMPILER
|
||||
depends on SLUB && DEBUG_KERNEL && !KASAN && !KCSAN
|
||||
depends on DEBUG_KERNEL && !KASAN && !KCSAN
|
||||
depends on !PREEMPT_RT
|
||||
select STACKDEPOT
|
||||
select STACKDEPOT_ALWAYS_INIT
|
||||
|
68
mm/Kconfig
68
mm/Kconfig
@ -226,52 +226,17 @@ config ZSMALLOC_CHAIN_SIZE
|
||||
|
||||
For more information, see zsmalloc documentation.
|
||||
|
||||
menu "SLAB allocator options"
|
||||
|
||||
choice
|
||||
prompt "Choose SLAB allocator"
|
||||
default SLUB
|
||||
help
|
||||
This option allows to select a slab allocator.
|
||||
|
||||
config SLAB_DEPRECATED
|
||||
bool "SLAB (DEPRECATED)"
|
||||
depends on !PREEMPT_RT
|
||||
help
|
||||
Deprecated and scheduled for removal in a few cycles. Replaced by
|
||||
SLUB.
|
||||
|
||||
If you cannot migrate to SLUB, please contact linux-mm@kvack.org
|
||||
and the people listed in the SLAB ALLOCATOR section of MAINTAINERS
|
||||
file, explaining why.
|
||||
|
||||
The regular slab allocator that is established and known to work
|
||||
well in all environments. It organizes cache hot objects in
|
||||
per cpu and per node queues.
|
||||
menu "Slab allocator options"
|
||||
|
||||
config SLUB
|
||||
bool "SLUB (Unqueued Allocator)"
|
||||
help
|
||||
SLUB is a slab allocator that minimizes cache line usage
|
||||
instead of managing queues of cached objects (SLAB approach).
|
||||
Per cpu caching is realized using slabs of objects instead
|
||||
of queues of objects. SLUB can use memory efficiently
|
||||
and has enhanced diagnostics. SLUB is the default choice for
|
||||
a slab allocator.
|
||||
|
||||
endchoice
|
||||
|
||||
config SLAB
|
||||
bool
|
||||
default y
|
||||
depends on SLAB_DEPRECATED
|
||||
def_bool y
|
||||
|
||||
config SLUB_TINY
|
||||
bool "Configure SLUB for minimal memory footprint"
|
||||
depends on SLUB && EXPERT
|
||||
bool "Configure for minimal memory footprint"
|
||||
depends on EXPERT
|
||||
select SLAB_MERGE_DEFAULT
|
||||
help
|
||||
Configures the SLUB allocator in a way to achieve minimal memory
|
||||
Configures the slab allocator in a way to achieve minimal memory
|
||||
footprint, sacrificing scalability, debugging and other features.
|
||||
This is intended only for the smallest system that had used the
|
||||
SLOB allocator and is not recommended for systems with more than
|
||||
@ -282,7 +247,6 @@ config SLUB_TINY
|
||||
config SLAB_MERGE_DEFAULT
|
||||
bool "Allow slab caches to be merged"
|
||||
default y
|
||||
depends on SLAB || SLUB
|
||||
help
|
||||
For reduced kernel memory fragmentation, slab caches can be
|
||||
merged when they share the same size and other characteristics.
|
||||
@ -296,7 +260,7 @@ config SLAB_MERGE_DEFAULT
|
||||
|
||||
config SLAB_FREELIST_RANDOM
|
||||
bool "Randomize slab freelist"
|
||||
depends on SLAB || (SLUB && !SLUB_TINY)
|
||||
depends on !SLUB_TINY
|
||||
help
|
||||
Randomizes the freelist order used on creating new pages. This
|
||||
security feature reduces the predictability of the kernel slab
|
||||
@ -304,21 +268,19 @@ config SLAB_FREELIST_RANDOM
|
||||
|
||||
config SLAB_FREELIST_HARDENED
|
||||
bool "Harden slab freelist metadata"
|
||||
depends on SLAB || (SLUB && !SLUB_TINY)
|
||||
depends on !SLUB_TINY
|
||||
help
|
||||
Many kernel heap attacks try to target slab cache metadata and
|
||||
other infrastructure. This options makes minor performance
|
||||
sacrifices to harden the kernel slab allocator against common
|
||||
freelist exploit methods. Some slab implementations have more
|
||||
sanity-checking than others. This option is most effective with
|
||||
CONFIG_SLUB.
|
||||
freelist exploit methods.
|
||||
|
||||
config SLUB_STATS
|
||||
default n
|
||||
bool "Enable SLUB performance statistics"
|
||||
depends on SLUB && SYSFS && !SLUB_TINY
|
||||
bool "Enable performance statistics"
|
||||
depends on SYSFS && !SLUB_TINY
|
||||
help
|
||||
SLUB statistics are useful to debug SLUBs allocation behavior in
|
||||
The statistics are useful to debug slab allocation behavior in
|
||||
order find ways to optimize the allocator. This should never be
|
||||
enabled for production use since keeping statistics slows down
|
||||
the allocator by a few percentage points. The slabinfo command
|
||||
@ -328,8 +290,8 @@ config SLUB_STATS
|
||||
|
||||
config SLUB_CPU_PARTIAL
|
||||
default y
|
||||
depends on SLUB && SMP && !SLUB_TINY
|
||||
bool "SLUB per cpu partial cache"
|
||||
depends on SMP && !SLUB_TINY
|
||||
bool "Enable per cpu partial caches"
|
||||
help
|
||||
Per cpu partial caches accelerate objects allocation and freeing
|
||||
that is local to a processor at the price of more indeterminism
|
||||
@ -339,7 +301,7 @@ config SLUB_CPU_PARTIAL
|
||||
|
||||
config RANDOM_KMALLOC_CACHES
|
||||
default n
|
||||
depends on SLUB && !SLUB_TINY
|
||||
depends on !SLUB_TINY
|
||||
bool "Randomize slab caches for normal kmalloc"
|
||||
help
|
||||
A hardening feature that creates multiple copies of slab caches for
|
||||
@ -354,7 +316,7 @@ config RANDOM_KMALLOC_CACHES
|
||||
limited degree of memory and CPU overhead that relates to hardware and
|
||||
system workload.
|
||||
|
||||
endmenu # SLAB allocator options
|
||||
endmenu # Slab allocator options
|
||||
|
||||
config SHUFFLE_PAGE_ALLOCATOR
|
||||
bool "Page allocator randomization"
|
||||
|
@ -45,18 +45,10 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
|
||||
Enable debug page memory allocations by default? This value
|
||||
can be overridden by debug_pagealloc=off|on.
|
||||
|
||||
config DEBUG_SLAB
|
||||
bool "Debug slab memory allocations"
|
||||
depends on DEBUG_KERNEL && SLAB
|
||||
help
|
||||
Say Y here to have the kernel do limited verification on memory
|
||||
allocation as well as poisoning memory on free to catch use of freed
|
||||
memory. This can make kmalloc/kfree-intensive workloads much slower.
|
||||
|
||||
config SLUB_DEBUG
|
||||
default y
|
||||
bool "Enable SLUB debugging support" if EXPERT
|
||||
depends on SLUB && SYSFS && !SLUB_TINY
|
||||
depends on SYSFS && !SLUB_TINY
|
||||
select STACKDEPOT if STACKTRACE_SUPPORT
|
||||
help
|
||||
SLUB has extensive debug support features. Disabling these can
|
||||
@ -66,7 +58,7 @@ config SLUB_DEBUG
|
||||
|
||||
config SLUB_DEBUG_ON
|
||||
bool "SLUB debugging on by default"
|
||||
depends on SLUB && SLUB_DEBUG
|
||||
depends on SLUB_DEBUG
|
||||
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
|
||||
default n
|
||||
help
|
||||
@ -231,8 +223,8 @@ config DEBUG_KMEMLEAK
|
||||
allocations. See Documentation/dev-tools/kmemleak.rst for more
|
||||
details.
|
||||
|
||||
Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
|
||||
of finding leaks due to the slab objects poisoning.
|
||||
Enabling SLUB_DEBUG may increase the chances of finding leaks
|
||||
due to the slab objects poisoning.
|
||||
|
||||
In order to access the kmemleak file, debugfs needs to be
|
||||
mounted (usually at /sys/kernel/debug).
|
||||
|
@ -4,7 +4,6 @@
|
||||
#
|
||||
|
||||
KASAN_SANITIZE_slab_common.o := n
|
||||
KASAN_SANITIZE_slab.o := n
|
||||
KASAN_SANITIZE_slub.o := n
|
||||
KCSAN_SANITIZE_kmemleak.o := n
|
||||
|
||||
@ -12,7 +11,6 @@ KCSAN_SANITIZE_kmemleak.o := n
|
||||
# the same word but accesses to different bits of that word. Re-enable KCSAN
|
||||
# for these when we have more consensus on what to do about them.
|
||||
KCSAN_SANITIZE_slab_common.o := n
|
||||
KCSAN_SANITIZE_slab.o := n
|
||||
KCSAN_SANITIZE_slub.o := n
|
||||
KCSAN_SANITIZE_page_alloc.o := n
|
||||
# But enable explicit instrumentation for memory barriers.
|
||||
@ -22,7 +20,6 @@ KCSAN_INSTRUMENT_BARRIERS := y
|
||||
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
|
||||
# free pages, or a task is migrated between nodes.
|
||||
KCOV_INSTRUMENT_slab_common.o := n
|
||||
KCOV_INSTRUMENT_slab.o := n
|
||||
KCOV_INSTRUMENT_slub.o := n
|
||||
KCOV_INSTRUMENT_page_alloc.o := n
|
||||
KCOV_INSTRUMENT_debug-pagealloc.o := n
|
||||
@ -66,6 +63,7 @@ obj-y += page-alloc.o
|
||||
obj-y += init-mm.o
|
||||
obj-y += memblock.o
|
||||
obj-y += $(memory-hotplug-y)
|
||||
obj-y += slub.o
|
||||
|
||||
ifdef CONFIG_MMU
|
||||
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
||||
@ -82,8 +80,6 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
|
||||
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
|
||||
obj-$(CONFIG_KSM) += ksm.o
|
||||
obj-$(CONFIG_PAGE_POISONING) += page_poison.o
|
||||
obj-$(CONFIG_SLAB) += slab.o
|
||||
obj-$(CONFIG_SLUB) += slub.o
|
||||
obj-$(CONFIG_KASAN) += kasan/
|
||||
obj-$(CONFIG_KFENCE) += kfence/
|
||||
obj-$(CONFIG_KMSAN) += kmsan/
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
|
||||
#ifdef CONFIG_SLUB_DEBUG_ON
|
||||
#define DMAPOOL_DEBUG 1
|
||||
#endif
|
||||
|
||||
|
@ -153,10 +153,6 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
|
||||
* 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be
|
||||
* accessed after being freed. We preassign tags for objects in these
|
||||
* caches as well.
|
||||
* 3. For SLAB allocator we can't preassign tags randomly since the freelist
|
||||
* is stored as an array of indexes instead of a linked list. Assign tags
|
||||
* based on objects indexes, so that objects that are next to each other
|
||||
* get different tags.
|
||||
*/
|
||||
static inline u8 assign_tag(struct kmem_cache *cache,
|
||||
const void *object, bool init)
|
||||
@ -171,17 +167,12 @@ static inline u8 assign_tag(struct kmem_cache *cache,
|
||||
if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
|
||||
return init ? KASAN_TAG_KERNEL : kasan_random_tag();
|
||||
|
||||
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
|
||||
#ifdef CONFIG_SLAB
|
||||
/* For SLAB assign tags based on the object index in the freelist. */
|
||||
return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
|
||||
#else
|
||||
/*
|
||||
* For SLUB assign a random tag during slab creation, otherwise reuse
|
||||
* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU,
|
||||
* assign a random tag during slab creation, otherwise reuse
|
||||
* the already assigned tag.
|
||||
*/
|
||||
return init ? kasan_random_tag() : get_tag(object);
|
||||
#endif
|
||||
}
|
||||
|
||||
void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
|
||||
|
@ -373,8 +373,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags);
|
||||
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags);
|
||||
void kasan_save_free_info(struct kmem_cache *cache, void *object);
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) && \
|
||||
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
bool kasan_quarantine_put(struct kmem_cache *cache, void *object);
|
||||
void kasan_quarantine_reduce(void);
|
||||
void kasan_quarantine_remove_cache(struct kmem_cache *cache);
|
||||
|
@ -144,10 +144,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
|
||||
{
|
||||
void *object = qlink_to_object(qlink, cache);
|
||||
struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
|
||||
unsigned long flags;
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLAB))
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
* If init_on_free is enabled and KASAN's free metadata is stored in
|
||||
@ -166,9 +162,6 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
|
||||
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
|
||||
|
||||
___cache_free(cache, object, _THIS_IP_);
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLAB))
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
@ -463,11 +463,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
|
||||
/* Set required slab fields. */
|
||||
slab = virt_to_slab((void *)meta->addr);
|
||||
slab->slab_cache = cache;
|
||||
#if defined(CONFIG_SLUB)
|
||||
slab->objects = 1;
|
||||
#elif defined(CONFIG_SLAB)
|
||||
slab->s_mem = addr;
|
||||
#endif
|
||||
|
||||
/* Memory initialization. */
|
||||
set_canary(meta);
|
||||
|
@ -64,6 +64,7 @@
|
||||
#include <linux/psi.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include "internal.h"
|
||||
#include <net/sock.h>
|
||||
#include <net/ip.h>
|
||||
@ -5150,7 +5151,7 @@ out_kfree:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_MEMCG_KMEM) && (defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
|
||||
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
|
||||
static int mem_cgroup_slab_show(struct seq_file *m, void *p)
|
||||
{
|
||||
/*
|
||||
@ -5259,8 +5260,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
|
||||
.write = mem_cgroup_reset,
|
||||
.read_u64 = mem_cgroup_read_u64,
|
||||
},
|
||||
#if defined(CONFIG_MEMCG_KMEM) && \
|
||||
(defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG))
|
||||
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_SLUB_DEBUG)
|
||||
{
|
||||
.name = "kmem.slabinfo",
|
||||
.seq_show = mem_cgroup_slab_show,
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <linux/writeback.h>
|
||||
#include "slab.h"
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
|
||||
#ifdef CONFIG_SLUB_DEBUG_ON
|
||||
static void poison_error(mempool_t *pool, void *element, size_t size,
|
||||
size_t byte)
|
||||
{
|
||||
@ -95,14 +95,14 @@ static void poison_element(mempool_t *pool, void *element)
|
||||
kunmap_atomic(addr);
|
||||
}
|
||||
}
|
||||
#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
|
||||
#else /* CONFIG_SLUB_DEBUG_ON */
|
||||
static inline void check_element(mempool_t *pool, void *element)
|
||||
{
|
||||
}
|
||||
static inline void poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
|
||||
#endif /* CONFIG_SLUB_DEBUG_ON */
|
||||
|
||||
static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
|
||||
{
|
||||
|
551
mm/slab.h
551
mm/slab.h
@ -1,10 +1,20 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef MM_SLAB_H
|
||||
#define MM_SLAB_H
|
||||
|
||||
#include <linux/reciprocal_div.h>
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/local_lock.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/kasan.h>
|
||||
|
||||
/*
|
||||
* Internal slab definitions
|
||||
*/
|
||||
void __init kmem_cache_init(void);
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
# ifdef system_has_cmpxchg128
|
||||
@ -42,21 +52,6 @@ typedef union {
|
||||
struct slab {
|
||||
unsigned long __page_flags;
|
||||
|
||||
#if defined(CONFIG_SLAB)
|
||||
|
||||
struct kmem_cache *slab_cache;
|
||||
union {
|
||||
struct {
|
||||
struct list_head slab_list;
|
||||
void *freelist; /* array of free object indexes */
|
||||
void *s_mem; /* first object */
|
||||
};
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
unsigned int active;
|
||||
|
||||
#elif defined(CONFIG_SLUB)
|
||||
|
||||
struct kmem_cache *slab_cache;
|
||||
union {
|
||||
struct {
|
||||
@ -91,10 +86,6 @@ struct slab {
|
||||
};
|
||||
unsigned int __unused;
|
||||
|
||||
#else
|
||||
#error "Unexpected slab allocator configured"
|
||||
#endif
|
||||
|
||||
atomic_t __page_refcount;
|
||||
#ifdef CONFIG_MEMCG
|
||||
unsigned long memcg_data;
|
||||
@ -111,7 +102,7 @@ SLAB_MATCH(memcg_data, memcg_data);
|
||||
#endif
|
||||
#undef SLAB_MATCH
|
||||
static_assert(sizeof(struct slab) <= sizeof(struct page));
|
||||
#if defined(system_has_freelist_aba) && defined(CONFIG_SLUB)
|
||||
#if defined(system_has_freelist_aba)
|
||||
static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
|
||||
#endif
|
||||
|
||||
@ -228,21 +219,138 @@ static inline size_t slab_size(const struct slab *slab)
|
||||
return PAGE_SIZE << slab_order(slab);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLAB
|
||||
#include <linux/slab_def.h>
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
#define slub_percpu_partial(c) ((c)->partial)
|
||||
|
||||
#define slub_set_percpu_partial(c, p) \
|
||||
({ \
|
||||
slub_percpu_partial(c) = (p)->next; \
|
||||
})
|
||||
|
||||
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
|
||||
#else
|
||||
#define slub_percpu_partial(c) NULL
|
||||
|
||||
#define slub_set_percpu_partial(c, p)
|
||||
|
||||
#define slub_percpu_partial_read_once(c) NULL
|
||||
#endif // CONFIG_SLUB_CPU_PARTIAL
|
||||
|
||||
/*
|
||||
* Word size structure that can be atomically updated or read and that
|
||||
* contains both the order and the number of objects that a slab of the
|
||||
* given order would contain.
|
||||
*/
|
||||
struct kmem_cache_order_objects {
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
/*
|
||||
* Slab cache management.
|
||||
*/
|
||||
struct kmem_cache {
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
struct kmem_cache_cpu __percpu *cpu_slab;
|
||||
#endif
|
||||
/* Used for retrieving partial slabs, etc. */
|
||||
slab_flags_t flags;
|
||||
unsigned long min_partial;
|
||||
unsigned int size; /* Object size including metadata */
|
||||
unsigned int object_size; /* Object size without metadata */
|
||||
struct reciprocal_value reciprocal_size;
|
||||
unsigned int offset; /* Free pointer offset */
|
||||
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
||||
/* Number of per cpu partial objects to keep around */
|
||||
unsigned int cpu_partial;
|
||||
/* Number of per cpu partial slabs to keep around */
|
||||
unsigned int cpu_partial_slabs;
|
||||
#endif
|
||||
struct kmem_cache_order_objects oo;
|
||||
|
||||
/* Allocation and freeing of slabs */
|
||||
struct kmem_cache_order_objects min;
|
||||
gfp_t allocflags; /* gfp flags to use on each alloc */
|
||||
int refcount; /* Refcount for slab cache destroy */
|
||||
void (*ctor)(void *object); /* Object constructor */
|
||||
unsigned int inuse; /* Offset to metadata */
|
||||
unsigned int align; /* Alignment */
|
||||
unsigned int red_left_pad; /* Left redzone padding size */
|
||||
const char *name; /* Name (only for display!) */
|
||||
struct list_head list; /* List of slab caches */
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct kobject kobj; /* For sysfs */
|
||||
#endif
|
||||
#ifdef CONFIG_SLAB_FREELIST_HARDENED
|
||||
unsigned long random;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
#include <linux/slub_def.h>
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* Defragmentation by allocating from a remote node.
|
||||
*/
|
||||
unsigned int remote_node_defrag_ratio;
|
||||
#endif
|
||||
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/list_lru.h>
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
unsigned int *random_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_GENERIC
|
||||
struct kasan_cache kasan_info;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
unsigned int useroffset; /* Usercopy region offset */
|
||||
unsigned int usersize; /* Usercopy region size */
|
||||
#endif
|
||||
|
||||
struct kmem_cache_node *node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
|
||||
#define SLAB_SUPPORTS_SYSFS
|
||||
void sysfs_slab_unlink(struct kmem_cache *s);
|
||||
void sysfs_slab_release(struct kmem_cache *s);
|
||||
#else
|
||||
static inline void sysfs_slab_unlink(struct kmem_cache *s) { }
|
||||
static inline void sysfs_slab_release(struct kmem_cache *s) { }
|
||||
#endif
|
||||
|
||||
void *fixup_red_left(struct kmem_cache *s, void *p);
|
||||
|
||||
static inline void *nearest_obj(struct kmem_cache *cache,
|
||||
const struct slab *slab, void *x)
|
||||
{
|
||||
void *object = x - (x - slab_address(slab)) % cache->size;
|
||||
void *last_object = slab_address(slab) +
|
||||
(slab->objects - 1) * cache->size;
|
||||
void *result = (unlikely(object > last_object)) ? last_object : object;
|
||||
|
||||
result = fixup_red_left(cache, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Determine object index from a given position */
|
||||
static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
|
||||
void *addr, void *obj)
|
||||
{
|
||||
return reciprocal_divide(kasan_reset_tag(obj) - addr,
|
||||
cache->reciprocal_size);
|
||||
}
|
||||
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct slab *slab, void *obj)
|
||||
{
|
||||
if (is_kfence_address(obj))
|
||||
return 0;
|
||||
return __obj_to_index(cache, slab_address(slab), obj);
|
||||
}
|
||||
|
||||
static inline int objs_per_slab(const struct kmem_cache *cache,
|
||||
const struct slab *slab)
|
||||
{
|
||||
return slab->objects;
|
||||
}
|
||||
|
||||
/*
|
||||
* State of the slab allocator.
|
||||
@ -281,19 +389,39 @@ extern const struct kmalloc_info_struct {
|
||||
void setup_kmalloc_cache_index_table(void);
|
||||
void create_kmalloc_caches(slab_flags_t);
|
||||
|
||||
/* Find the kmalloc slab corresponding for a certain size */
|
||||
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
|
||||
extern u8 kmalloc_size_index[24];
|
||||
|
||||
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t orig_size,
|
||||
unsigned long caller);
|
||||
void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller);
|
||||
static inline unsigned int size_index_elem(unsigned int bytes)
|
||||
{
|
||||
return (bytes - 1) / 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the kmem_cache structure that serves a given size of
|
||||
* allocation
|
||||
*
|
||||
* This assumes size is larger than zero and not larger than
|
||||
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
|
||||
*/
|
||||
static inline struct kmem_cache *
|
||||
kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
|
||||
{
|
||||
unsigned int index;
|
||||
|
||||
if (size <= 192)
|
||||
index = kmalloc_size_index[size_index_elem(size)];
|
||||
else
|
||||
index = fls(size - 1);
|
||||
|
||||
return kmalloc_caches[kmalloc_type(flags, caller)][index];
|
||||
}
|
||||
|
||||
gfp_t kmalloc_fix_flags(gfp_t flags);
|
||||
|
||||
/* Functions provided by the slab allocators */
|
||||
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
|
||||
|
||||
void __init kmem_cache_init(void);
|
||||
void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
|
||||
slab_flags_t flags);
|
||||
extern void create_boot_cache(struct kmem_cache *, const char *name,
|
||||
@ -320,26 +448,16 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
|
||||
SLAB_CACHE_DMA32 | SLAB_PANIC | \
|
||||
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
|
||||
|
||||
#if defined(CONFIG_DEBUG_SLAB)
|
||||
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
|
||||
#elif defined(CONFIG_SLUB_DEBUG)
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
|
||||
SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
|
||||
#else
|
||||
#define SLAB_DEBUG_FLAGS (0)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SLAB)
|
||||
#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
|
||||
SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
|
||||
SLAB_ACCOUNT | SLAB_NO_MERGE)
|
||||
#elif defined(CONFIG_SLUB)
|
||||
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
|
||||
SLAB_TEMPORARY | SLAB_ACCOUNT | \
|
||||
SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
|
||||
#else
|
||||
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
|
||||
#endif
|
||||
|
||||
/* Common flags available with current configuration */
|
||||
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
|
||||
@ -387,12 +505,6 @@ void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
|
||||
ssize_t slabinfo_write(struct file *file, const char __user *buffer,
|
||||
size_t count, loff_t *ppos);
|
||||
|
||||
static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
|
||||
{
|
||||
return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
|
||||
NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
#ifdef CONFIG_SLUB_DEBUG_ON
|
||||
DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
|
||||
@ -452,238 +564,32 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
|
||||
gfp_t gfp, bool new_slab);
|
||||
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
|
||||
enum node_stat_item idx, int nr);
|
||||
|
||||
static inline void memcg_free_slab_cgroups(struct slab *slab)
|
||||
{
|
||||
kfree(slab_objcgs(slab));
|
||||
slab->memcg_data = 0;
|
||||
}
|
||||
|
||||
static inline size_t obj_full_size(struct kmem_cache *s)
|
||||
{
|
||||
/*
|
||||
* For each accounted object there is an extra space which is used
|
||||
* to store obj_cgroup membership. Charge it too.
|
||||
*/
|
||||
return s->size + sizeof(struct obj_cgroup *);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns false if the allocation should fail.
|
||||
*/
|
||||
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t objects, gfp_t flags)
|
||||
{
|
||||
struct obj_cgroup *objcg;
|
||||
|
||||
if (!memcg_kmem_online())
|
||||
return true;
|
||||
|
||||
if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* The obtained objcg pointer is safe to use within the current scope,
|
||||
* defined by current task or set_active_memcg() pair.
|
||||
* obj_cgroup_get() is used to get a permanent reference.
|
||||
*/
|
||||
objcg = current_obj_cgroup();
|
||||
if (!objcg)
|
||||
return true;
|
||||
|
||||
if (lru) {
|
||||
int ret;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
memcg = get_mem_cgroup_from_objcg(objcg);
|
||||
ret = memcg_list_lru_alloc(memcg, lru, flags);
|
||||
css_put(&memcg->css);
|
||||
|
||||
if (ret)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
|
||||
return false;
|
||||
|
||||
*objcgp = objcg;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
struct slab *slab;
|
||||
unsigned long off;
|
||||
size_t i;
|
||||
|
||||
if (!memcg_kmem_online() || !objcg)
|
||||
return;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if (likely(p[i])) {
|
||||
slab = virt_to_slab(p[i]);
|
||||
|
||||
if (!slab_objcgs(slab) &&
|
||||
memcg_alloc_slab_cgroups(slab, s, flags,
|
||||
false)) {
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
continue;
|
||||
}
|
||||
|
||||
off = obj_to_index(s, slab, p[i]);
|
||||
obj_cgroup_get(objcg);
|
||||
slab_objcgs(slab)[off] = objcg;
|
||||
mod_objcg_state(objcg, slab_pgdat(slab),
|
||||
cache_vmstat_idx(s), obj_full_size(s));
|
||||
} else {
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects)
|
||||
{
|
||||
struct obj_cgroup **objcgs;
|
||||
int i;
|
||||
|
||||
if (!memcg_kmem_online())
|
||||
return;
|
||||
|
||||
objcgs = slab_objcgs(slab);
|
||||
if (!objcgs)
|
||||
return;
|
||||
|
||||
for (i = 0; i < objects; i++) {
|
||||
struct obj_cgroup *objcg;
|
||||
unsigned int off;
|
||||
|
||||
off = obj_to_index(s, slab, p[i]);
|
||||
objcg = objcgs[off];
|
||||
if (!objcg)
|
||||
continue;
|
||||
|
||||
objcgs[off] = NULL;
|
||||
obj_cgroup_uncharge(objcg, obj_full_size(s));
|
||||
mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
-obj_full_size(s));
|
||||
obj_cgroup_put(objcg);
|
||||
}
|
||||
}
|
||||
|
||||
#else /* CONFIG_MEMCG_KMEM */
|
||||
static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int memcg_alloc_slab_cgroups(struct slab *slab,
|
||||
struct kmem_cache *s, gfp_t gfp,
|
||||
bool new_slab)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_free_slab_cgroups(struct slab *slab)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t objects, gfp_t flags)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg,
|
||||
gfp_t flags, size_t size,
|
||||
void **p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
|
||||
void **p, int objects)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
static inline struct kmem_cache *virt_to_cache(const void *obj)
|
||||
{
|
||||
struct slab *slab;
|
||||
|
||||
slab = virt_to_slab(obj);
|
||||
if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
|
||||
__func__))
|
||||
return NULL;
|
||||
return slab->slab_cache;
|
||||
}
|
||||
|
||||
static __always_inline void account_slab(struct slab *slab, int order,
|
||||
struct kmem_cache *s, gfp_t gfp)
|
||||
{
|
||||
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
|
||||
memcg_alloc_slab_cgroups(slab, s, gfp, true);
|
||||
|
||||
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
static __always_inline void unaccount_slab(struct slab *slab, int order,
|
||||
struct kmem_cache *s)
|
||||
{
|
||||
if (memcg_kmem_online())
|
||||
memcg_free_slab_cgroups(slab);
|
||||
|
||||
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
|
||||
-(PAGE_SIZE << order));
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
|
||||
{
|
||||
struct kmem_cache *cachep;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
|
||||
!kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
|
||||
return s;
|
||||
|
||||
cachep = virt_to_cache(x);
|
||||
if (WARN(cachep && cachep != s,
|
||||
"%s: Wrong slab cache. %s but object is from %s\n",
|
||||
__func__, s->name, cachep->name))
|
||||
print_tracking(cachep, x);
|
||||
return cachep;
|
||||
}
|
||||
|
||||
void free_large_kmalloc(struct folio *folio, void *object);
|
||||
|
||||
size_t __ksize(const void *objp);
|
||||
|
||||
static inline size_t slab_ksize(const struct kmem_cache *s)
|
||||
{
|
||||
#ifndef CONFIG_SLUB
|
||||
return s->object_size;
|
||||
|
||||
#else /* CONFIG_SLUB */
|
||||
# ifdef CONFIG_SLUB_DEBUG
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
/*
|
||||
* Debugging requires use of the padding between object
|
||||
* and whatever may come after it.
|
||||
*/
|
||||
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
|
||||
return s->object_size;
|
||||
# endif
|
||||
#endif
|
||||
if (s->flags & SLAB_KASAN)
|
||||
return s->object_size;
|
||||
/*
|
||||
@ -697,128 +603,9 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
|
||||
* Else we can use all the padding etc for the allocation
|
||||
*/
|
||||
return s->size;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
|
||||
struct list_lru *lru,
|
||||
struct obj_cgroup **objcgp,
|
||||
size_t size, gfp_t flags)
|
||||
{
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
might_alloc(flags);
|
||||
|
||||
if (should_failslab(s, flags))
|
||||
return NULL;
|
||||
|
||||
if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))
|
||||
return NULL;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline void slab_post_alloc_hook(struct kmem_cache *s,
|
||||
struct obj_cgroup *objcg, gfp_t flags,
|
||||
size_t size, void **p, bool init,
|
||||
unsigned int orig_size)
|
||||
{
|
||||
unsigned int zero_size = s->object_size;
|
||||
bool kasan_init = init;
|
||||
size_t i;
|
||||
|
||||
flags &= gfp_allowed_mask;
|
||||
|
||||
/*
|
||||
* For kmalloc object, the allocated memory size(object_size) is likely
|
||||
* larger than the requested size(orig_size). If redzone check is
|
||||
* enabled for the extra space, don't zero it, as it will be redzoned
|
||||
* soon. The redzone operation for this extra space could be seen as a
|
||||
* replacement of current poisoning under certain debug option, and
|
||||
* won't break other sanity checks.
|
||||
*/
|
||||
if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
|
||||
(s->flags & SLAB_KMALLOC))
|
||||
zero_size = orig_size;
|
||||
|
||||
/*
|
||||
* When slub_debug is enabled, avoid memory initialization integrated
|
||||
* into KASAN and instead zero out the memory via the memset below with
|
||||
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
|
||||
* cause false-positive reports. This does not lead to a performance
|
||||
* penalty on production builds, as slub_debug is not intended to be
|
||||
* enabled there.
|
||||
*/
|
||||
if (__slub_debug_enabled())
|
||||
kasan_init = false;
|
||||
|
||||
/*
|
||||
* As memory initialization might be integrated into KASAN,
|
||||
* kasan_slab_alloc and initialization memset must be
|
||||
* kept together to avoid discrepancies in behavior.
|
||||
*
|
||||
* As p[i] might get tagged, memset and kmemleak hook come after KASAN.
|
||||
*/
|
||||
for (i = 0; i < size; i++) {
|
||||
p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init);
|
||||
if (p[i] && init && (!kasan_init || !kasan_has_integrated_init()))
|
||||
memset(p[i], 0, zero_size);
|
||||
kmemleak_alloc_recursive(p[i], s->object_size, 1,
|
||||
s->flags, flags);
|
||||
kmsan_slab_alloc(s, p[i], flags);
|
||||
}
|
||||
|
||||
memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* The slab lists for all objects.
|
||||
*/
|
||||
struct kmem_cache_node {
|
||||
#ifdef CONFIG_SLAB
|
||||
raw_spinlock_t list_lock;
|
||||
struct list_head slabs_partial; /* partial list first, better asm code */
|
||||
struct list_head slabs_full;
|
||||
struct list_head slabs_free;
|
||||
unsigned long total_slabs; /* length of all slab lists */
|
||||
unsigned long free_slabs; /* length of free slab list only */
|
||||
unsigned long free_objects;
|
||||
unsigned int free_limit;
|
||||
unsigned int colour_next; /* Per-node cache coloring */
|
||||
struct array_cache *shared; /* shared per node */
|
||||
struct alien_cache **alien; /* on other nodes */
|
||||
unsigned long next_reap; /* updated without locking */
|
||||
int free_touched; /* updated without locking */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
spinlock_t list_lock;
|
||||
unsigned long nr_partial;
|
||||
struct list_head partial;
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
atomic_long_t nr_slabs;
|
||||
atomic_long_t total_objects;
|
||||
struct list_head full;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
|
||||
{
|
||||
return s->node[node];
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterator over all nodes. The body will be executed for each node that has
|
||||
* a kmem_cache_node structure allocated (which is true for all online nodes)
|
||||
*/
|
||||
#define for_each_kmem_cache_node(__s, __node, __n) \
|
||||
for (__node = 0; __node < nr_node_ids; __node++) \
|
||||
if ((__n = get_node(__s, __node)))
|
||||
|
||||
|
||||
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
|
||||
void dump_unreclaimable_slab(void);
|
||||
#else
|
||||
static inline void dump_unreclaimable_slab(void)
|
||||
|
231
mm/slab_common.c
231
mm/slab_common.c
@ -21,6 +21,7 @@
|
||||
#include <linux/swiotlb.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@ -71,10 +72,8 @@ static int __init setup_slab_merge(char *str)
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
|
||||
__setup_param("slub_merge", slub_merge, setup_slab_merge, 0);
|
||||
#endif
|
||||
|
||||
__setup("slab_nomerge", setup_slab_nomerge);
|
||||
__setup("slab_merge", setup_slab_merge);
|
||||
@ -197,10 +196,6 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
|
||||
if (s->size - size >= sizeof(void *))
|
||||
continue;
|
||||
|
||||
if (IS_ENABLED(CONFIG_SLAB) && align &&
|
||||
(align > s->align || s->align % align))
|
||||
continue;
|
||||
|
||||
return s;
|
||||
}
|
||||
return NULL;
|
||||
@ -670,7 +665,7 @@ EXPORT_SYMBOL(random_kmalloc_seed);
|
||||
* of two cache sizes there. The size of larger slabs can be determined using
|
||||
* fls.
|
||||
*/
|
||||
static u8 size_index[24] __ro_after_init = {
|
||||
u8 kmalloc_size_index[24] __ro_after_init = {
|
||||
3, /* 8 */
|
||||
4, /* 16 */
|
||||
5, /* 24 */
|
||||
@ -697,33 +692,6 @@ static u8 size_index[24] __ro_after_init = {
|
||||
2 /* 192 */
|
||||
};
|
||||
|
||||
static inline unsigned int size_index_elem(unsigned int bytes)
|
||||
{
|
||||
return (bytes - 1) / 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the kmem_cache structure that serves a given size of
|
||||
* allocation
|
||||
*/
|
||||
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
|
||||
{
|
||||
unsigned int index;
|
||||
|
||||
if (size <= 192) {
|
||||
if (!size)
|
||||
return ZERO_SIZE_PTR;
|
||||
|
||||
index = size_index[size_index_elem(size)];
|
||||
} else {
|
||||
if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
|
||||
return NULL;
|
||||
index = fls(size - 1);
|
||||
}
|
||||
|
||||
return kmalloc_caches[kmalloc_type(flags, caller)][index];
|
||||
}
|
||||
|
||||
size_t kmalloc_size_roundup(size_t size)
|
||||
{
|
||||
if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
|
||||
@ -848,9 +816,9 @@ void __init setup_kmalloc_cache_index_table(void)
|
||||
for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
|
||||
unsigned int elem = size_index_elem(i);
|
||||
|
||||
if (elem >= ARRAY_SIZE(size_index))
|
||||
if (elem >= ARRAY_SIZE(kmalloc_size_index))
|
||||
break;
|
||||
size_index[elem] = KMALLOC_SHIFT_LOW;
|
||||
kmalloc_size_index[elem] = KMALLOC_SHIFT_LOW;
|
||||
}
|
||||
|
||||
if (KMALLOC_MIN_SIZE >= 64) {
|
||||
@ -859,7 +827,7 @@ void __init setup_kmalloc_cache_index_table(void)
|
||||
* is 64 byte.
|
||||
*/
|
||||
for (i = 64 + 8; i <= 96; i += 8)
|
||||
size_index[size_index_elem(i)] = 7;
|
||||
kmalloc_size_index[size_index_elem(i)] = 7;
|
||||
|
||||
}
|
||||
|
||||
@ -870,7 +838,7 @@ void __init setup_kmalloc_cache_index_table(void)
|
||||
* instead.
|
||||
*/
|
||||
for (i = 128 + 8; i <= 192; i += 8)
|
||||
size_index[size_index_elem(i)] = 8;
|
||||
kmalloc_size_index[size_index_elem(i)] = 8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -968,95 +936,6 @@ void __init create_kmalloc_caches(slab_flags_t flags)
|
||||
slab_state = UP;
|
||||
}
|
||||
|
||||
void free_large_kmalloc(struct folio *folio, void *object)
|
||||
{
|
||||
unsigned int order = folio_order(folio);
|
||||
|
||||
if (WARN_ON_ONCE(order == 0))
|
||||
pr_warn_once("object pointer: 0x%p\n", object);
|
||||
|
||||
kmemleak_free(object);
|
||||
kasan_kfree_large(object);
|
||||
kmsan_kfree_large(object);
|
||||
|
||||
mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
|
||||
-(PAGE_SIZE << order));
|
||||
__free_pages(folio_page(folio, 0), order);
|
||||
}
|
||||
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node);
|
||||
static __always_inline
|
||||
void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
|
||||
{
|
||||
struct kmem_cache *s;
|
||||
void *ret;
|
||||
|
||||
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
|
||||
ret = __kmalloc_large_node(size, flags, node);
|
||||
trace_kmalloc(caller, ret, size,
|
||||
PAGE_SIZE << get_order(size), flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
s = kmalloc_slab(size, flags, caller);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(s)))
|
||||
return s;
|
||||
|
||||
ret = __kmem_cache_alloc_node(s, flags, node, size, caller);
|
||||
ret = kasan_kmalloc(s, ret, size, flags);
|
||||
trace_kmalloc(caller, ret, size, s->size, flags, node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *__kmalloc_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node);
|
||||
|
||||
void *__kmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc);
|
||||
|
||||
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
|
||||
int node, unsigned long caller)
|
||||
{
|
||||
return __do_kmalloc_node(size, flags, node, caller);
|
||||
}
|
||||
EXPORT_SYMBOL(__kmalloc_node_track_caller);
|
||||
|
||||
/**
|
||||
* kfree - free previously allocated memory
|
||||
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
|
||||
*
|
||||
* If @object is NULL, no operation is performed.
|
||||
*/
|
||||
void kfree(const void *object)
|
||||
{
|
||||
struct folio *folio;
|
||||
struct slab *slab;
|
||||
struct kmem_cache *s;
|
||||
|
||||
trace_kfree(_RET_IP_, object);
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(object)))
|
||||
return;
|
||||
|
||||
folio = virt_to_folio(object);
|
||||
if (unlikely(!folio_test_slab(folio))) {
|
||||
free_large_kmalloc(folio, (void *)object);
|
||||
return;
|
||||
}
|
||||
|
||||
slab = folio_slab(folio);
|
||||
s = slab->slab_cache;
|
||||
__kmem_cache_free(s, (void *)object, _RET_IP_);
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
/**
|
||||
* __ksize -- Report full size of underlying allocation
|
||||
* @object: pointer to the object
|
||||
@ -1093,30 +972,6 @@ size_t __ksize(const void *object)
|
||||
return slab_ksize(folio_slab(folio)->slab_cache);
|
||||
}
|
||||
|
||||
void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
||||
{
|
||||
void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE,
|
||||
size, _RET_IP_);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_trace);
|
||||
|
||||
void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
|
||||
int node, size_t size)
|
||||
{
|
||||
void *ret = __kmem_cache_alloc_node(s, gfpflags, node, size, _RET_IP_);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node);
|
||||
|
||||
ret = kasan_kmalloc(s, ret, size, gfpflags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_node_trace);
|
||||
|
||||
gfp_t kmalloc_fix_flags(gfp_t flags)
|
||||
{
|
||||
gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
|
||||
@ -1129,57 +984,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags)
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid unnecessary overhead, we pass through large allocation requests
|
||||
* directly to the page allocator. We use __GFP_COMP, because we will need to
|
||||
* know the allocation order to free the pages properly in kfree.
|
||||
*/
|
||||
|
||||
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
struct page *page;
|
||||
void *ptr = NULL;
|
||||
unsigned int order = get_order(size);
|
||||
|
||||
if (unlikely(flags & GFP_SLAB_BUG_MASK))
|
||||
flags = kmalloc_fix_flags(flags);
|
||||
|
||||
flags |= __GFP_COMP;
|
||||
page = alloc_pages_node(node, flags, order);
|
||||
if (page) {
|
||||
ptr = page_address(page);
|
||||
mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
|
||||
PAGE_SIZE << order);
|
||||
}
|
||||
|
||||
ptr = kasan_kmalloc_large(ptr, size, flags);
|
||||
/* As ptr might get tagged, call kmemleak hook after KASAN. */
|
||||
kmemleak_alloc(ptr, size, 1, flags);
|
||||
kmsan_kmalloc_large(ptr, size, flags);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *kmalloc_large(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, NUMA_NO_NODE);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large);
|
||||
|
||||
void *kmalloc_large_node(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
void *ret = __kmalloc_large_node(size, flags, node);
|
||||
|
||||
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
|
||||
flags, node);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kmalloc_large_node);
|
||||
|
||||
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
||||
/* Randomize a generic freelist */
|
||||
static void freelist_randomize(unsigned int *list,
|
||||
@ -1222,12 +1026,8 @@ void cache_random_seq_destroy(struct kmem_cache *cachep)
|
||||
}
|
||||
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
|
||||
|
||||
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
|
||||
#ifdef CONFIG_SLAB
|
||||
#define SLABINFO_RIGHTS (0600)
|
||||
#else
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
#define SLABINFO_RIGHTS (0400)
|
||||
#endif
|
||||
|
||||
static void print_slabinfo_header(struct seq_file *m)
|
||||
{
|
||||
@ -1235,18 +1035,10 @@ static void print_slabinfo_header(struct seq_file *m)
|
||||
* Output format version, so at least we can change it
|
||||
* without _too_ many complaints.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_SLAB
|
||||
seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
|
||||
#else
|
||||
seq_puts(m, "slabinfo - version: 2.1\n");
|
||||
#endif
|
||||
seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
|
||||
seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
|
||||
seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
|
||||
#ifdef CONFIG_DEBUG_SLAB
|
||||
seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
|
||||
seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
|
||||
#endif
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
@ -1370,7 +1162,7 @@ static int __init slab_proc_init(void)
|
||||
}
|
||||
module_init(slab_proc_init);
|
||||
|
||||
#endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
static __always_inline __realloc_size(2) void *
|
||||
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
||||
@ -1488,10 +1280,3 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kfree);
|
||||
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
|
||||
|
||||
int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
|
||||
{
|
||||
if (__should_failslab(s, gfpflags))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
|
||||
|
Loading…
x
Reference in New Issue
Block a user