mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-29 09:12:07 +00:00
slab updates for 6.13
-----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEe7vIQRWZI0iWSE3xu+CwddJFiJoFAmdERvEACgkQu+CwddJF iJre6Af9EBMVQiWJrmoMOjbGLqLgmZzSXRNxR862WGn4D/wesA1HmSlWgEn54hgc GIYIeD++v4JaIRNH0yZqb2UBSKjF/rYPDkKstnqgFaVakLoDrwkkwV2n3Gk5BEgR m/SzLGgoDWKR65I/oMpL6e2KrMOfMfjpB31qiVvdlaQd2Nv/5rw+gUVylxhNIZEH W11N3IC+e9hmgT3ZBpTmHeqNrlXE1+USWPrp/HV05Ndz6yf97JnP4Wr9f9pcyN3R aflLHR38+Q9cCfO7y8wNqtYvIV/kbqgdaqD76frSgalC4Lmz9+L+TZ2NuENCPoGj Xdbip2z+iffWhvqM+qooOLVxR0XqTA== =Sepb -----END PGP SIGNATURE----- Merge tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab Pull slab updates from Vlastimil Babka: - Add new slab_strict_numa boot parameter to enforce per-object memory policies on top of slab folio policies, for systems where saving cost of remote accesses is more important than minimizing slab allocation overhead (Christoph Lameter) - Fix for freeptr_offset alignment check being too strict for m68k (Geert Uytterhoeven) - krealloc() fixes for not violating __GFP_ZERO guarantees on krealloc() when slub_debug (redzone and object tracking) is enabled (Feng Tang) - Fix a memory leak in case sysfs registration fails for a slab cache, and also no longer fail to create the cache in that case (Hyeonggon Yoo) - Fix handling of detected consistency problems (due to buggy slab user) with slub_debug enabled, so that it does not cause further list corruption bugs (yuan.gao) - Code cleanup and kerneldocs polishing (Zhen Lei, Vlastimil Babka) * tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: slab: Fix too strict alignment check in create_cache() mm/slab: Allow cache creation to proceed even if sysfs registration fails mm/slub: Avoid list corruption when removing a slab from the full list mm/slub, kunit: Add testcase for krealloc redzone and zeroing mm/slub: Improve redzone check and zeroing for krealloc() mm/slub: Consider kfence case for get_orig_size() SLUB: Add support for per object memory policies mm, slab: add kerneldocs for common SLAB_ flags mm/slab: remove duplicate check in create_cache() mm/slub: Move krealloc() and related code to slub.c mm/kasan: Don't store metadata inside kmalloc object when slub_debug_orig_size is on
This commit is contained in:
commit
e06635e26c
@ -6158,6 +6158,16 @@
|
||||
For more information see Documentation/mm/slub.rst.
|
||||
(slub_nomerge legacy name also accepted for now)
|
||||
|
||||
slab_strict_numa [MM]
|
||||
Support memory policies on a per object level
|
||||
in the slab allocator. The default is for memory
|
||||
policies to be applied at the folio level when
|
||||
a new folio is needed or a partial folio is
|
||||
retrieved from the lists. Increases overhead
|
||||
in the slab fastpaths but gains more accurate
|
||||
NUMA kernel object placement which helps with slow
|
||||
interconnects in NUMA systems.
|
||||
|
||||
slram= [HW,MTD]
|
||||
|
||||
smart2= [HW]
|
||||
|
@ -175,6 +175,15 @@ can be influenced by kernel parameters:
|
||||
``slab_max_order`` to 0, what cause minimum possible order of
|
||||
slabs allocation.
|
||||
|
||||
``slab_strict_numa``
|
||||
Enables the application of memory policies on each
|
||||
allocation. This results in more accurate placement of
|
||||
objects which may result in the reduction of accesses
|
||||
to remote nodes. The default is to only apply memory
|
||||
policies at the folio level when a new folio is acquired
|
||||
or a folio is retrieved from the lists. Enabling this
|
||||
option reduces the fastpath performance of the slab allocator.
|
||||
|
||||
SLUB Debug output
|
||||
=================
|
||||
|
||||
|
@ -77,7 +77,17 @@ enum _slab_flag_bits {
|
||||
#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
|
||||
/* Indicate a kmalloc slab */
|
||||
#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
|
||||
/* Align objs on cache lines */
|
||||
/**
|
||||
* define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
|
||||
*
|
||||
* Sufficiently large objects are aligned on cache line boundary. For object
|
||||
* size smaller than a half of cache line size, the alignment is on the half of
|
||||
* cache line size. In general, if object size is smaller than 1/2^n of cache
|
||||
* line size, the alignment is adjusted to 1/2^n.
|
||||
*
|
||||
* If explicit alignment is also requested by the respective
|
||||
* &struct kmem_cache_args field, the greater of both is alignments is applied.
|
||||
*/
|
||||
#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
|
||||
/* Use GFP_DMA memory */
|
||||
#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
|
||||
@ -87,8 +97,8 @@ enum _slab_flag_bits {
|
||||
#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
|
||||
/* Panic if kmem_cache_create() fails */
|
||||
#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
|
||||
/*
|
||||
* SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
|
||||
/**
|
||||
* define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
|
||||
*
|
||||
* This delays freeing the SLAB page by a grace period, it does _NOT_
|
||||
* delay object freeing. This means that if you do kmem_cache_free()
|
||||
@ -99,20 +109,22 @@ enum _slab_flag_bits {
|
||||
* stays valid, the trick to using this is relying on an independent
|
||||
* object validation pass. Something like:
|
||||
*
|
||||
* begin:
|
||||
* rcu_read_lock();
|
||||
* obj = lockless_lookup(key);
|
||||
* if (obj) {
|
||||
* if (!try_get_ref(obj)) // might fail for free objects
|
||||
* rcu_read_unlock();
|
||||
* goto begin;
|
||||
* ::
|
||||
*
|
||||
* if (obj->key != key) { // not the object we expected
|
||||
* put_ref(obj);
|
||||
* rcu_read_unlock();
|
||||
* goto begin;
|
||||
* }
|
||||
* }
|
||||
* begin:
|
||||
* rcu_read_lock();
|
||||
* obj = lockless_lookup(key);
|
||||
* if (obj) {
|
||||
* if (!try_get_ref(obj)) // might fail for free objects
|
||||
* rcu_read_unlock();
|
||||
* goto begin;
|
||||
*
|
||||
* if (obj->key != key) { // not the object we expected
|
||||
* put_ref(obj);
|
||||
* rcu_read_unlock();
|
||||
* goto begin;
|
||||
* }
|
||||
* }
|
||||
* rcu_read_unlock();
|
||||
*
|
||||
* This is useful if we need to approach a kernel structure obliquely,
|
||||
@ -137,7 +149,6 @@ enum _slab_flag_bits {
|
||||
*
|
||||
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
|
||||
*/
|
||||
/* Defer freeing slabs to RCU */
|
||||
#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
|
||||
/* Trace allocations and frees */
|
||||
#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
|
||||
@ -170,7 +181,12 @@ enum _slab_flag_bits {
|
||||
#else
|
||||
# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
|
||||
#endif
|
||||
/* Account to memcg */
|
||||
/**
|
||||
* define SLAB_ACCOUNT - Account allocations to memcg.
|
||||
*
|
||||
* All object allocations from this cache will be memcg accounted, regardless of
|
||||
* __GFP_ACCOUNT being or not being passed to individual allocations.
|
||||
*/
|
||||
#ifdef CONFIG_MEMCG
|
||||
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
|
||||
#else
|
||||
@ -197,7 +213,13 @@ enum _slab_flag_bits {
|
||||
#endif
|
||||
|
||||
/* The following flags affect the page allocator grouping pages by mobility */
|
||||
/* Objects are reclaimable */
|
||||
/**
|
||||
* define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
|
||||
*
|
||||
* Use this flag for caches that have an associated shrinker. As a result, slab
|
||||
* pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by
|
||||
* mobility, and are accounted in SReclaimable counter in /proc/meminfo
|
||||
*/
|
||||
#ifndef CONFIG_SLUB_TINY
|
||||
#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
|
||||
#else
|
||||
|
@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, 2, slab_errors);
|
||||
}
|
||||
|
||||
static void test_krealloc_redzone_zeroing(struct kunit *test)
|
||||
{
|
||||
u8 *p;
|
||||
int i;
|
||||
struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
|
||||
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
|
||||
|
||||
p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
|
||||
memset(p, 0xff, 48);
|
||||
|
||||
kasan_disable_current();
|
||||
OPTIMIZER_HIDE_VAR(p);
|
||||
|
||||
/* Test shrink */
|
||||
p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
|
||||
for (i = 40; i < 64; i++)
|
||||
KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
|
||||
|
||||
/* Test grow within the same 64B kmalloc object */
|
||||
p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
|
||||
for (i = 40; i < 56; i++)
|
||||
KUNIT_EXPECT_EQ(test, p[i], 0);
|
||||
for (i = 56; i < 64; i++)
|
||||
KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
|
||||
|
||||
validate_slab_cache(s);
|
||||
KUNIT_EXPECT_EQ(test, 0, slab_errors);
|
||||
|
||||
memset(p, 0xff, 56);
|
||||
/* Test grow with allocating a bigger 128B object */
|
||||
p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
|
||||
for (i = 0; i < 56; i++)
|
||||
KUNIT_EXPECT_EQ(test, p[i], 0xff);
|
||||
for (i = 56; i < 112; i++)
|
||||
KUNIT_EXPECT_EQ(test, p[i], 0);
|
||||
|
||||
kfree(p);
|
||||
kasan_enable_current();
|
||||
kmem_cache_destroy(s);
|
||||
}
|
||||
|
||||
static int test_init(struct kunit *test)
|
||||
{
|
||||
slab_errors = 0;
|
||||
@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = {
|
||||
KUNIT_CASE(test_kmalloc_redzone_access),
|
||||
KUNIT_CASE(test_kfree_rcu),
|
||||
KUNIT_CASE(test_leak_destroy),
|
||||
KUNIT_CASE(test_krealloc_redzone_zeroing),
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -392,9 +392,12 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
|
||||
* 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
|
||||
* be touched after it was freed, or
|
||||
* 2. Object has a constructor, which means it's expected to
|
||||
* retain its content until the next allocation.
|
||||
* retain its content until the next allocation, or
|
||||
* 3. It is from a kmalloc cache which enables the debug option
|
||||
* to store original size.
|
||||
*/
|
||||
if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor) {
|
||||
if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor ||
|
||||
slub_debug_orig_size(cache)) {
|
||||
cache->kasan_info.free_meta_offset = *size;
|
||||
*size += sizeof(struct kasan_free_meta);
|
||||
goto free_meta_added;
|
||||
|
11
mm/slab.h
11
mm/slab.h
@ -73,6 +73,11 @@ struct slab {
|
||||
struct {
|
||||
unsigned inuse:16;
|
||||
unsigned objects:15;
|
||||
/*
|
||||
* If slab debugging is enabled then the
|
||||
* frozen bit can be reused to indicate
|
||||
* that the slab was corrupted
|
||||
*/
|
||||
unsigned frozen:1;
|
||||
};
|
||||
};
|
||||
@ -695,6 +700,12 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
|
||||
void __check_heap_object(const void *ptr, unsigned long n,
|
||||
const struct slab *slab, bool to_user);
|
||||
|
||||
static inline bool slub_debug_orig_size(struct kmem_cache *s)
|
||||
{
|
||||
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
|
||||
(s->flags & SLAB_KMALLOC));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
void skip_orig_size_check(struct kmem_cache *s, const void *object);
|
||||
#endif
|
||||
|
103
mm/slab_common.c
103
mm/slab_common.c
@ -222,15 +222,12 @@ static struct kmem_cache *create_cache(const char *name,
|
||||
struct kmem_cache *s;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(args->useroffset + args->usersize > object_size))
|
||||
args->useroffset = args->usersize = 0;
|
||||
|
||||
/* If a custom freelist pointer is requested make sure it's sane. */
|
||||
err = -EINVAL;
|
||||
if (args->use_freeptr_offset &&
|
||||
(args->freeptr_offset >= object_size ||
|
||||
!(flags & SLAB_TYPESAFE_BY_RCU) ||
|
||||
!IS_ALIGNED(args->freeptr_offset, sizeof(freeptr_t))))
|
||||
!IS_ALIGNED(args->freeptr_offset, __alignof__(freeptr_t))))
|
||||
goto out;
|
||||
|
||||
err = -ENOMEM;
|
||||
@ -257,11 +254,23 @@ static struct kmem_cache *create_cache(const char *name,
|
||||
* @object_size: The size of objects to be created in this cache.
|
||||
* @args: Additional arguments for the cache creation (see
|
||||
* &struct kmem_cache_args).
|
||||
* @flags: See %SLAB_* flags for an explanation of individual @flags.
|
||||
* @flags: See the desriptions of individual flags. The common ones are listed
|
||||
* in the description below.
|
||||
*
|
||||
* Not to be called directly, use the kmem_cache_create() wrapper with the same
|
||||
* parameters.
|
||||
*
|
||||
* Commonly used @flags:
|
||||
*
|
||||
* &SLAB_ACCOUNT - Account allocations to memcg.
|
||||
*
|
||||
* &SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
|
||||
*
|
||||
* &SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
|
||||
*
|
||||
* &SLAB_TYPESAFE_BY_RCU - Slab page (not individual objects) freeing delayed
|
||||
* by a grace period - see the full description before using.
|
||||
*
|
||||
* Context: Cannot be called within a interrupt, but can be interrupted.
|
||||
*
|
||||
* Return: a pointer to the cache on success, NULL on failure.
|
||||
@ -1199,90 +1208,6 @@ module_init(slab_proc_init);
|
||||
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
static __always_inline __realloc_size(2) void *
|
||||
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
||||
{
|
||||
void *ret;
|
||||
size_t ks;
|
||||
|
||||
/* Check for double-free before calling ksize. */
|
||||
if (likely(!ZERO_OR_NULL_PTR(p))) {
|
||||
if (!kasan_check_byte(p))
|
||||
return NULL;
|
||||
ks = ksize(p);
|
||||
} else
|
||||
ks = 0;
|
||||
|
||||
/* If the object still fits, repoison it precisely. */
|
||||
if (ks >= new_size) {
|
||||
/* Zero out spare memory. */
|
||||
if (want_init_on_alloc(flags)) {
|
||||
kasan_disable_current();
|
||||
memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
p = kasan_krealloc((void *)p, new_size, flags);
|
||||
return (void *)p;
|
||||
}
|
||||
|
||||
ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||
if (ret && p) {
|
||||
/* Disable KASAN checks as the object's redzone is accessed. */
|
||||
kasan_disable_current();
|
||||
memcpy(ret, kasan_reset_tag(p), ks);
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* krealloc - reallocate memory. The contents will remain unchanged.
|
||||
* @p: object to reallocate memory for.
|
||||
* @new_size: how many bytes of memory are required.
|
||||
* @flags: the type of memory to allocate.
|
||||
*
|
||||
* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
|
||||
* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
|
||||
*
|
||||
* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
|
||||
* initial memory allocation, every subsequent call to this API for the same
|
||||
* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
|
||||
* __GFP_ZERO is not fully honored by this API.
|
||||
*
|
||||
* This is the case, since krealloc() only knows about the bucket size of an
|
||||
* allocation (but not the exact size it was allocated with) and hence
|
||||
* implements the following semantics for shrinking and growing buffers with
|
||||
* __GFP_ZERO.
|
||||
*
|
||||
* new bucket
|
||||
* 0 size size
|
||||
* |--------|----------------|
|
||||
* | keep | zero |
|
||||
*
|
||||
* In any case, the contents of the object pointed to are preserved up to the
|
||||
* lesser of the new and old sizes.
|
||||
*
|
||||
* Return: pointer to the allocated memory or %NULL in case of error
|
||||
*/
|
||||
void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
if (unlikely(!new_size)) {
|
||||
kfree(p);
|
||||
return ZERO_SIZE_PTR;
|
||||
}
|
||||
|
||||
ret = __do_krealloc(p, new_size, flags);
|
||||
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
|
||||
kfree(p);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(krealloc_noprof);
|
||||
|
||||
/**
|
||||
* kfree_sensitive - Clear sensitive information in memory before freeing
|
||||
* @p: object to free memory of
|
||||
|
220
mm/slub.c
220
mm/slub.c
@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
|
||||
#endif
|
||||
#endif /* CONFIG_SLUB_DEBUG */
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static DEFINE_STATIC_KEY_FALSE(strict_numa);
|
||||
#endif
|
||||
|
||||
/* Structure holding parameters for get_partial() call chain */
|
||||
struct partial_context {
|
||||
gfp_t flags;
|
||||
@ -230,12 +234,6 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
|
||||
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
|
||||
}
|
||||
|
||||
static inline bool slub_debug_orig_size(struct kmem_cache *s)
|
||||
{
|
||||
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
|
||||
(s->flags & SLAB_KMALLOC));
|
||||
}
|
||||
|
||||
void *fixup_red_left(struct kmem_cache *s, void *p)
|
||||
{
|
||||
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
|
||||
@ -760,21 +758,10 @@ static inline void set_orig_size(struct kmem_cache *s,
|
||||
void *object, unsigned int orig_size)
|
||||
{
|
||||
void *p = kasan_reset_tag(object);
|
||||
unsigned int kasan_meta_size;
|
||||
|
||||
if (!slub_debug_orig_size(s))
|
||||
return;
|
||||
|
||||
/*
|
||||
* KASAN can save its free meta data inside of the object at offset 0.
|
||||
* If this meta data size is larger than 'orig_size', it will overlap
|
||||
* the data redzone in [orig_size+1, object_size]. Thus, we adjust
|
||||
* 'orig_size' to be as at least as big as KASAN's meta data.
|
||||
*/
|
||||
kasan_meta_size = kasan_metadata_size(s, true);
|
||||
if (kasan_meta_size > orig_size)
|
||||
orig_size = kasan_meta_size;
|
||||
|
||||
p += get_info_end(s);
|
||||
p += sizeof(struct track) * 2;
|
||||
|
||||
@ -785,6 +772,9 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
|
||||
{
|
||||
void *p = kasan_reset_tag(object);
|
||||
|
||||
if (is_kfence_address(object))
|
||||
return kfence_ksize(object);
|
||||
|
||||
if (!slub_debug_orig_size(s))
|
||||
return s->object_size;
|
||||
|
||||
@ -1423,6 +1413,11 @@ static int check_slab(struct kmem_cache *s, struct slab *slab)
|
||||
slab->inuse, slab->objects);
|
||||
return 0;
|
||||
}
|
||||
if (slab->frozen) {
|
||||
slab_err(s, slab, "Slab disabled since SLUB metadata consistency check failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Slab_pad_check fixes things up after itself */
|
||||
slab_pad_check(s, slab);
|
||||
return 1;
|
||||
@ -1603,6 +1598,7 @@ static noinline bool alloc_debug_processing(struct kmem_cache *s,
|
||||
slab_fix(s, "Marking all objects used");
|
||||
slab->inuse = slab->objects;
|
||||
slab->freelist = NULL;
|
||||
slab->frozen = 1; /* mark consistency-failed slab as frozen */
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -2744,7 +2740,8 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
|
||||
slab->inuse++;
|
||||
|
||||
if (!alloc_debug_processing(s, slab, object, orig_size)) {
|
||||
remove_partial(n, slab);
|
||||
if (folio_test_slab(slab_folio(slab)))
|
||||
remove_partial(n, slab);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -3956,6 +3953,28 @@ static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
|
||||
object = c->freelist;
|
||||
slab = c->slab;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
if (static_branch_unlikely(&strict_numa) &&
|
||||
node == NUMA_NO_NODE) {
|
||||
|
||||
struct mempolicy *mpol = current->mempolicy;
|
||||
|
||||
if (mpol) {
|
||||
/*
|
||||
* Special BIND rule support. If existing slab
|
||||
* is in permitted set then do not redirect
|
||||
* to a particular node.
|
||||
* Otherwise we apply the memory policy to get
|
||||
* the node we need to allocate on.
|
||||
*/
|
||||
if (mpol->mode != MPOL_BIND || !slab ||
|
||||
!node_isset(slab_nid(slab), mpol->nodes))
|
||||
|
||||
node = mempolicy_slab_node();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!USE_LOCKLESS_FAST_PATH() ||
|
||||
unlikely(!object || !slab || !node_match(slab, node))) {
|
||||
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
|
||||
@ -4728,6 +4747,126 @@ void kfree(const void *object)
|
||||
}
|
||||
EXPORT_SYMBOL(kfree);
|
||||
|
||||
static __always_inline __realloc_size(2) void *
|
||||
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
|
||||
{
|
||||
void *ret;
|
||||
size_t ks = 0;
|
||||
int orig_size = 0;
|
||||
struct kmem_cache *s = NULL;
|
||||
|
||||
if (unlikely(ZERO_OR_NULL_PTR(p)))
|
||||
goto alloc_new;
|
||||
|
||||
/* Check for double-free. */
|
||||
if (!kasan_check_byte(p))
|
||||
return NULL;
|
||||
|
||||
if (is_kfence_address(p)) {
|
||||
ks = orig_size = kfence_ksize(p);
|
||||
} else {
|
||||
struct folio *folio;
|
||||
|
||||
folio = virt_to_folio(p);
|
||||
if (unlikely(!folio_test_slab(folio))) {
|
||||
/* Big kmalloc object */
|
||||
WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
|
||||
WARN_ON(p != folio_address(folio));
|
||||
ks = folio_size(folio);
|
||||
} else {
|
||||
s = folio_slab(folio)->slab_cache;
|
||||
orig_size = get_orig_size(s, (void *)p);
|
||||
ks = s->object_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the old object doesn't fit, allocate a bigger one */
|
||||
if (new_size > ks)
|
||||
goto alloc_new;
|
||||
|
||||
/* Zero out spare memory. */
|
||||
if (want_init_on_alloc(flags)) {
|
||||
kasan_disable_current();
|
||||
if (orig_size && orig_size < new_size)
|
||||
memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size);
|
||||
else
|
||||
memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
/* Setup kmalloc redzone when needed */
|
||||
if (s && slub_debug_orig_size(s)) {
|
||||
set_orig_size(s, (void *)p, new_size);
|
||||
if (s->flags & SLAB_RED_ZONE && new_size < ks)
|
||||
memset_no_sanitize_memory(kasan_reset_tag(p) + new_size,
|
||||
SLUB_RED_ACTIVE, ks - new_size);
|
||||
}
|
||||
|
||||
p = kasan_krealloc(p, new_size, flags);
|
||||
return (void *)p;
|
||||
|
||||
alloc_new:
|
||||
ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
|
||||
if (ret && p) {
|
||||
/* Disable KASAN checks as the object's redzone is accessed. */
|
||||
kasan_disable_current();
|
||||
memcpy(ret, kasan_reset_tag(p), orig_size ?: ks);
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* krealloc - reallocate memory. The contents will remain unchanged.
|
||||
* @p: object to reallocate memory for.
|
||||
* @new_size: how many bytes of memory are required.
|
||||
* @flags: the type of memory to allocate.
|
||||
*
|
||||
* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
|
||||
* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
|
||||
*
|
||||
* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
|
||||
* initial memory allocation, every subsequent call to this API for the same
|
||||
* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
|
||||
* __GFP_ZERO is not fully honored by this API.
|
||||
*
|
||||
* When slub_debug_orig_size() is off, krealloc() only knows about the bucket
|
||||
* size of an allocation (but not the exact size it was allocated with) and
|
||||
* hence implements the following semantics for shrinking and growing buffers
|
||||
* with __GFP_ZERO.
|
||||
*
|
||||
* new bucket
|
||||
* 0 size size
|
||||
* |--------|----------------|
|
||||
* | keep | zero |
|
||||
*
|
||||
* Otherwise, the original allocation size 'orig_size' could be used to
|
||||
* precisely clear the requested size, and the new size will also be stored
|
||||
* as the new 'orig_size'.
|
||||
*
|
||||
* In any case, the contents of the object pointed to are preserved up to the
|
||||
* lesser of the new and old sizes.
|
||||
*
|
||||
* Return: pointer to the allocated memory or %NULL in case of error
|
||||
*/
|
||||
void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
if (unlikely(!new_size)) {
|
||||
kfree(p);
|
||||
return ZERO_SIZE_PTR;
|
||||
}
|
||||
|
||||
ret = __do_krealloc(p, new_size, flags);
|
||||
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
|
||||
kfree(p);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(krealloc_noprof);
|
||||
|
||||
struct detached_freelist {
|
||||
struct slab *slab;
|
||||
void *tail;
|
||||
@ -5602,6 +5741,23 @@ static int __init setup_slub_min_objects(char *str)
|
||||
__setup("slab_min_objects=", setup_slub_min_objects);
|
||||
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static int __init setup_slab_strict_numa(char *str)
|
||||
{
|
||||
if (nr_node_ids > 1) {
|
||||
static_branch_enable(&strict_numa);
|
||||
pr_info("SLUB: Strict NUMA enabled.\n");
|
||||
} else {
|
||||
pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("slab_strict_numa", setup_slab_strict_numa);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef CONFIG_HARDENED_USERCOPY
|
||||
/*
|
||||
* Rejects incorrectly sized objects and objects that are to be copied
|
||||
@ -5960,7 +6116,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
|
||||
s = find_mergeable(size, align, flags, name, ctor);
|
||||
if (s) {
|
||||
if (sysfs_slab_alias(s, name))
|
||||
return NULL;
|
||||
pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
|
||||
name);
|
||||
|
||||
s->refcount++;
|
||||
|
||||
@ -6042,16 +6199,19 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
|
||||
if (!alloc_kmem_cache_cpus(s))
|
||||
goto out;
|
||||
|
||||
/* Mutex is not taken during early boot */
|
||||
if (slab_state <= UP) {
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
err = 0;
|
||||
|
||||
err = sysfs_slab_add(s);
|
||||
if (err)
|
||||
/* Mutex is not taken during early boot */
|
||||
if (slab_state <= UP)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Failing to create sysfs files is not critical to SLUB functionality.
|
||||
* If it fails, proceed with cache creation without these files.
|
||||
*/
|
||||
if (sysfs_slab_add(s))
|
||||
pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name);
|
||||
|
||||
if (s->flags & SLAB_STORE_USER)
|
||||
debugfs_slab_add(s);
|
||||
|
||||
@ -7120,7 +7280,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
|
||||
|
||||
void sysfs_slab_unlink(struct kmem_cache *s)
|
||||
{
|
||||
kobject_del(&s->kobj);
|
||||
if (s->kobj.state_in_sysfs)
|
||||
kobject_del(&s->kobj);
|
||||
}
|
||||
|
||||
void sysfs_slab_release(struct kmem_cache *s)
|
||||
@ -7149,6 +7310,11 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
|
||||
* If we have a leftover link then remove it.
|
||||
*/
|
||||
sysfs_remove_link(&slab_kset->kobj, name);
|
||||
/*
|
||||
* The original cache may have failed to generate sysfs file.
|
||||
* In that case, sysfs_create_link() returns -ENOENT and
|
||||
* symbolic link creation is skipped.
|
||||
*/
|
||||
return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user