Merge branch 'slab/for-6.11/buckets' into slab/for-next

Merge all the slab patches previously collected on top of v6.10-rc1,
over cleanups/fixes that had to be based on rc6.
This commit is contained in:
Vlastimil Babka 2024-07-15 10:44:16 +02:00
commit 436381eaf2
16 changed files with 316 additions and 137 deletions

View File

@ -144,8 +144,10 @@ configuration, but it is a good practice to use `kmalloc` for objects
smaller than page size.
The address of a chunk allocated with `kmalloc` is aligned to at least
ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
alignment is also guaranteed to be at least the respective size.
ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
alignment is also guaranteed to be at least the respective size. For other
sizes, the alignment is guaranteed to be at least the largest power-of-two
divisor of the size.
Chunks allocated with kmalloc() can be resized with krealloc(). Similarly
to kmalloc_array(): a helper for resizing arrays is provided in the form of

View File

@ -1110,7 +1110,7 @@ static inline unsigned int compound_order(struct page *page)
*
* Return: The order of the folio.
*/
static inline unsigned int folio_order(struct folio *folio)
static inline unsigned int folio_order(const struct folio *folio)
{
if (!folio_test_large(folio))
return 0;
@ -2150,7 +2150,7 @@ static inline struct folio *folio_next(struct folio *folio)
* it from being split. It is not necessary for the folio to be locked.
* Return: The base-2 logarithm of the size of this folio.
*/
static inline unsigned int folio_shift(struct folio *folio)
static inline unsigned int folio_shift(const struct folio *folio)
{
return PAGE_SHIFT + folio_order(folio);
}
@ -2163,7 +2163,7 @@ static inline unsigned int folio_shift(struct folio *folio)
* it from being split. It is not necessary for the folio to be locked.
* Return: The number of bytes in this folio.
*/
static inline size_t folio_size(struct folio *folio)
static inline size_t folio_size(const struct folio *folio)
{
return PAGE_SIZE << folio_order(folio);
}

View File

@ -38,11 +38,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */

View File

@ -426,8 +426,9 @@ enum kmalloc_cache_type {
NR_KMALLOC_TYPES
};
extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];
extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
/*
* Define gfp bits that should not be set for KMALLOC_NORMAL.
@ -528,9 +529,6 @@ static_assert(PAGE_SHIFT <= 20);
#include <linux/alloc_tag.h>
void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__))
/**
* kmem_cache_alloc - Allocate an object
* @cachep: The cache to allocate from.
@ -551,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
void kmem_cache_free(struct kmem_cache *s, void *objp);
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize,
void (*ctor)(void *));
/*
* Bulk allocation and freeing operations. These are accelerated in an
* allocator specific way to avoid taking locks repeatedly or building
@ -568,31 +570,49 @@ static __always_inline void kfree_bulk(size_t size, void **p)
kmem_cache_free_bulk(NULL, size, p);
}
void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
__alloc_size(1);
#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))
void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
int node) __assume_slab_alignment __malloc;
#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
__assume_kmalloc_alignment __alloc_size(3);
/*
* These macros allow declaring a kmem_buckets * parameter alongside size, which
* can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
* sites don't have to pass NULL.
*/
#ifdef CONFIG_SLAB_BUCKETS
#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b)
#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b)
#define PASS_BUCKET_PARAM(_b) (_b)
#else
#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size)
#define PASS_BUCKET_PARAMS(_size, _b) (_size)
#define PASS_BUCKET_PARAM(_b) NULL
#endif
void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size) __assume_kmalloc_alignment
__alloc_size(4);
#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
/*
* The following functions are not to be used directly and are intended only
* for internal use from kmalloc() and kmalloc_node()
* with the exception of kunit tests
*/
#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
void *__kmalloc_noprof(size_t size, gfp_t flags)
__assume_kmalloc_alignment __alloc_size(1);
void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
__alloc_size(1);
#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
__assume_kmalloc_alignment __alloc_size(1);
void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
__alloc_size(1);
#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
__assume_kmalloc_alignment __alloc_size(3);
void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
__assume_kmalloc_alignment __alloc_size(4);
void *__kmalloc_large_noprof(size_t size, gfp_t flags)
__assume_page_alignment __alloc_size(1);
void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
__assume_page_alignment __alloc_size(1);
/**
* kmalloc - allocate kernel memory
@ -604,7 +624,8 @@ void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_pag
*
* The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
* bytes. For @size of power of two bytes, the alignment is also guaranteed
* to be at least to the size.
* to be at least to the size. For other sizes, the alignment is guaranteed to
* be at least the largest power-of-two divisor of @size.
*
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp_types.h and described at
@ -654,10 +675,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_noprof(size, flags);
return __kmalloc_large_noprof(size, flags);
index = kmalloc_index(size);
return kmalloc_trace_noprof(
return __kmalloc_cache_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
@ -665,20 +686,26 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
}
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
#define kmem_buckets_alloc(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \
alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))
static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_node_noprof(size, flags, node);
return __kmalloc_large_node_noprof(size, flags, node);
index = kmalloc_index(size);
return kmalloc_node_trace_noprof(
return __kmalloc_cache_node_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
return __kmalloc_node_noprof(size, flags, node);
return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node);
}
#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
@ -729,8 +756,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi
*/
#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO)
void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
unsigned long caller) __alloc_size(1);
void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node,
unsigned long caller) __alloc_size(1);
#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \
__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller)
#define kmalloc_node_track_caller(...) \
alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
@ -756,7 +785,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_
return NULL;
if (__builtin_constant_p(n) && __builtin_constant_p(size))
return kmalloc_node_noprof(bytes, flags, node);
return __kmalloc_node_noprof(bytes, flags, node);
return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node);
}
#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
@ -780,7 +809,9 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__))
#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
#define kvmalloc_node_noprof(size, flags, node) \
__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE)
@ -788,6 +819,8 @@ extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_si
#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO)
#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
#define kmem_buckets_valloc(_b, _size, _flags) \
alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
static inline __alloc_size(1, 2) void *
kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)

View File

@ -42,6 +42,17 @@ struct msg_msgseg {
#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg))
static kmem_buckets *msg_buckets __ro_after_init;
static int __init init_msg_buckets(void)
{
msg_buckets = kmem_buckets_create("msg_msg", SLAB_ACCOUNT,
sizeof(struct msg_msg),
DATALEN_MSG, NULL);
return 0;
}
subsys_initcall(init_msg_buckets);
static struct msg_msg *alloc_msg(size_t len)
{
@ -50,7 +61,7 @@ static struct msg_msg *alloc_msg(size_t len)
size_t alen;
alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
msg = kmem_buckets_alloc(msg_buckets, sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
return NULL;

View File

@ -20,6 +20,7 @@ CONFIG_RANDOMIZE_MEMORY=y
# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_SLAB_BUCKETS=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
CONFIG_RANDOM_KMALLOC_CACHES=y

View File

@ -233,8 +233,6 @@ static void fortify_test_alloc_size_##allocator##_dynamic(struct kunit *test) \
kfree(p)); \
checker(expected_size, \
kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \
kfree(p)); \
checker(expected_size, __kmalloc(alloc_size, gfp), \
kfree(p)); \
\
orig = kmalloc(alloc_size, gfp); \

View File

@ -140,7 +140,7 @@ static void test_kmalloc_redzone_access(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18);
kasan_disable_current();

View File

@ -273,6 +273,23 @@ config SLAB_FREELIST_HARDENED
sacrifices to harden the kernel slab allocator against common
freelist exploit methods.
config SLAB_BUCKETS
bool "Support allocation from separate kmalloc buckets"
depends on !SLUB_TINY
default SLAB_FREELIST_HARDENED
help
Kernel heap attacks frequently depend on being able to create
specifically-sized allocations with user-controlled contents
that will be allocated into the same kmalloc bucket as a
target object. To avoid sharing these allocation buckets,
provide an explicitly separated set of buckets to be used for
user-controlled allocations. This may very slightly increase
memory fragmentation, though in practice it's only a handful
of extra pages since the bulk of user-controlled allocations
are relatively long-lived.
If unsure, say Y.
config SLUB_STATS
default n
bool "Enable performance statistics"

View File

@ -168,7 +168,7 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
*/
static inline bool slab_test_pfmemalloc(const struct slab *slab)
{
return folio_test_active((struct folio *)slab_folio(slab));
return folio_test_active(slab_folio(slab));
}
static inline void slab_set_pfmemalloc(struct slab *slab)
@ -213,7 +213,7 @@ static inline struct slab *virt_to_slab(const void *addr)
static inline int slab_order(const struct slab *slab)
{
return folio_order((struct folio *)slab_folio(slab));
return folio_order(slab_folio(slab));
}
static inline size_t slab_size(const struct slab *slab)
@ -405,16 +405,18 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
{
unsigned int index;
if (!b)
b = &kmalloc_caches[kmalloc_type(flags, caller)];
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
index = fls(size - 1);
return kmalloc_caches[kmalloc_type(flags, caller)][index];
return (*b)[index];
}
gfp_t kmalloc_fix_flags(gfp_t flags);

View File

@ -392,6 +392,98 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
}
EXPORT_SYMBOL(kmem_cache_create);
static struct kmem_cache *kmem_buckets_cache __ro_after_init;
/**
* kmem_buckets_create - Create a set of caches that handle dynamic sized
* allocations via kmem_buckets_alloc()
* @name: A prefix string which is used in /proc/slabinfo to identify this
* cache. The individual caches with have their sizes as the suffix.
* @flags: SLAB flags (see kmem_cache_create() for details).
* @useroffset: Starting offset within an allocation that may be copied
* to/from userspace.
* @usersize: How many bytes, starting at @useroffset, may be copied
* to/from userspace.
* @ctor: A constructor for the objects, run when new allocations are made.
*
* Cannot be called within an interrupt, but can be interrupted.
*
* Return: a pointer to the cache on success, NULL on failure. When
* CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and
* subsequent calls to kmem_buckets_alloc() will fall back to kmalloc().
* (i.e. callers only need to check for NULL on failure.)
*/
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
unsigned int useroffset,
unsigned int usersize,
void (*ctor)(void *))
{
kmem_buckets *b;
int idx;
/*
* When the separate buckets API is not built in, just return
* a non-NULL value for the kmem_buckets pointer, which will be
* unused when performing allocations.
*/
if (!IS_ENABLED(CONFIG_SLAB_BUCKETS))
return ZERO_SIZE_PTR;
if (WARN_ON(!kmem_buckets_cache))
return NULL;
b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
if (WARN_ON(!b))
return NULL;
flags |= SLAB_NO_MERGE;
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
char *short_size, *cache_name;
unsigned int cache_useroffset, cache_usersize;
unsigned int size;
if (!kmalloc_caches[KMALLOC_NORMAL][idx])
continue;
size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
if (!size)
continue;
short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
if (WARN_ON(!short_size))
goto fail;
cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
if (WARN_ON(!cache_name))
goto fail;
if (useroffset >= size) {
cache_useroffset = 0;
cache_usersize = 0;
} else {
cache_useroffset = useroffset;
cache_usersize = min(size - cache_useroffset, usersize);
}
(*b)[idx] = kmem_cache_create_usercopy(cache_name, size,
0, flags, cache_useroffset,
cache_usersize, ctor);
kfree(cache_name);
if (WARN_ON(!(*b)[idx]))
goto fail;
}
return b;
fail:
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++)
kmem_cache_destroy((*b)[idx]);
kfree(b);
return NULL;
}
EXPORT_SYMBOL(kmem_buckets_create);
#ifdef SLAB_SUPPORTS_SYSFS
/*
* For a given kmem_cache, kmem_cache_destroy() should only be called
@ -617,11 +709,12 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
s->size = s->object_size = size;
/*
* For power of two sizes, guarantee natural alignment for kmalloc
* caches, regardless of SL*B debugging options.
* kmalloc caches guarantee alignment of at least the largest
* power-of-two divisor of the size. For power-of-two sizes,
* it is the size itself.
*/
if (is_power_of_2(size))
align = max(align, size);
if (flags & SLAB_KMALLOC)
align = max(align, 1U << (ffs(size) - 1));
s->align = calculate_alignment(flags, align, size);
#ifdef CONFIG_HARDENED_USERCOPY
@ -653,8 +746,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
return s;
}
struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init =
{ /* initialization for https://llvm.org/pr42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
@ -703,7 +795,7 @@ size_t kmalloc_size_roundup(size_t size)
* The flags don't matter since size_index is common to all.
* Neither does the caller for just getting ->object_size.
*/
return kmalloc_slab(size, GFP_KERNEL, 0)->object_size;
return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size;
}
/* Above the smaller buckets, size is a multiple of page size. */
@ -932,6 +1024,11 @@ void __init create_kmalloc_caches(void)
/* Kmalloc array is now usable */
slab_state = UP;
if (IS_ENABLED(CONFIG_SLAB_BUCKETS))
kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
sizeof(kmem_buckets),
0, SLAB_NO_MERGE, NULL);
}
/**

131
mm/slub.c
View File

@ -788,8 +788,24 @@ static bool slab_add_kunit_errors(void)
kunit_put_resource(resource);
return true;
}
static bool slab_in_kunit_test(void)
{
struct kunit_resource *resource;
if (!kunit_get_current_test())
return false;
resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
if (!resource)
return false;
kunit_put_resource(resource);
return true;
}
#else
static inline bool slab_add_kunit_errors(void) { return false; }
static inline bool slab_in_kunit_test(void) { return false; }
#endif
static inline unsigned int size_from_object(struct kmem_cache *s)
@ -962,11 +978,9 @@ void print_tracking(struct kmem_cache *s, void *object)
static void print_slab_info(const struct slab *slab)
{
struct folio *folio = (struct folio *)slab_folio(slab);
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
folio_flags(folio, 0));
&slab->__page_flags);
}
/*
@ -1192,8 +1206,6 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
restore_bytes(s, what, value, fault, end);
@ -1216,8 +1228,8 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
* Padding is extended by another word if Redzoning is enabled and
* object_size == inuse.
*
* We fill with 0xbb (RED_INACTIVE) for inactive objects and with
* 0xcc (RED_ACTIVE) for objects in use.
* We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
* 0xcc (SLUB_RED_ACTIVE) for objects in use.
*
* object + s->inuse
* Meta data starts here.
@ -1302,15 +1314,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
u8 *p = object;
u8 *endobject = object + s->object_size;
unsigned int orig_size, kasan_meta_size;
int ret = 1;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
return 0;
ret = 0;
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
ret = 0;
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
orig_size = get_orig_size(s, object);
@ -1319,14 +1332,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, object,
"kmalloc Redzone", p + orig_size,
val, s->object_size - orig_size)) {
return 0;
ret = 0;
}
}
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, slab, p, "Alignment padding",
if (!check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
s->inuse - s->object_size);
s->inuse - s->object_size))
ret = 0;
}
}
@ -1342,27 +1356,25 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
!check_bytes_and_report(s, slab, p, "Poison",
p + kasan_meta_size, POISON_FREE,
s->object_size - kasan_meta_size - 1))
return 0;
ret = 0;
if (kasan_meta_size < s->object_size &&
!check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1))
return 0;
ret = 0;
}
/*
* check_pad_bytes cleans up on its own.
*/
check_pad_bytes(s, slab, p);
if (!check_pad_bytes(s, slab, p))
ret = 0;
}
if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
/*
* Object and freepointer overlap. Cannot check
* freepointer while object is allocated.
*/
return 1;
/* Check free pointer validity */
if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
/*
* Cannot check freepointer while object is allocated if
* object and freepointer overlap.
*/
if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) &&
!check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
@ -1370,9 +1382,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
* another error because the object count is now wrong.
*/
set_freepointer(s, p, NULL);
return 0;
ret = 0;
}
return 1;
if (!ret && !slab_in_kunit_test()) {
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
return ret;
}
static int check_slab(struct kmem_cache *s, struct slab *slab)
@ -2554,7 +2572,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
*/
static inline bool slab_test_node_partial(const struct slab *slab)
{
return folio_test_workingset((struct folio *)slab_folio(slab));
return folio_test_workingset(slab_folio(slab));
}
static inline void slab_set_node_partial(struct slab *slab)
@ -4063,7 +4081,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct folio *folio;
void *ptr = NULL;
@ -4088,35 +4106,35 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
return ptr;
}
void *kmalloc_large_noprof(size_t size, gfp_t flags)
void *__kmalloc_large_noprof(size_t size, gfp_t flags)
{
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_noprof);
EXPORT_SYMBOL(__kmalloc_large_noprof);
void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
{
void *ret = __kmalloc_large_node(size, flags, node);
void *ret = ___kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
EXPORT_SYMBOL(kmalloc_large_node_noprof);
EXPORT_SYMBOL(__kmalloc_large_node_noprof);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
ret = __kmalloc_large_node(size, flags, node);
ret = __kmalloc_large_node_noprof(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
@ -4125,34 +4143,34 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
if (unlikely(!size))
return ZERO_SIZE_PTR;
s = kmalloc_slab(size, flags, caller);
s = kmalloc_slab(size, b, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node_noprof);
void *__kmalloc_noprof(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_noprof);
void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags,
int node, unsigned long caller)
void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(kmalloc_node_track_caller_noprof);
return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
}
EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
@ -4162,10 +4180,10 @@ void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_trace_noprof);
EXPORT_SYMBOL(__kmalloc_cache_noprof);
void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
@ -4174,7 +4192,7 @@ void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
EXPORT_SYMBOL(kmalloc_node_trace_noprof);
EXPORT_SYMBOL(__kmalloc_cache_node_noprof);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
@ -5159,10 +5177,9 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
if (slub_debug_orig_size(s) ||
(flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
s->ctor) {
if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor ||
((flags & SLAB_RED_ZONE) &&
(s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
@ -5170,7 +5187,9 @@ static int calculate_sizes(struct kmem_cache *s)
*
* This is the case if we do RCU, have a constructor or
* destructor, are poisoning the objects, or are
* redzoning an object smaller than sizeof(void *).
* redzoning an object smaller than sizeof(void *) or are
* redzoning an object with slub_debug_orig_size() enabled,
* in which case the right redzone may be extended.
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the

View File

@ -198,6 +198,16 @@ char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
}
EXPORT_SYMBOL(kmemdup_nul);
static kmem_buckets *user_buckets __ro_after_init;
static int __init init_user_buckets(void)
{
user_buckets = kmem_buckets_create("memdup_user", 0, 0, INT_MAX, NULL);
return 0;
}
subsys_initcall(init_user_buckets);
/**
* memdup_user - duplicate memory region from user space
*
@ -211,7 +221,7 @@ void *memdup_user(const void __user *src, size_t len)
{
void *p;
p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
p = kmem_buckets_alloc_track_caller(user_buckets, len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
@ -237,7 +247,7 @@ void *vmemdup_user(const void __user *src, size_t len)
{
void *p;
p = kvmalloc(len, GFP_USER);
p = kmem_buckets_valloc(user_buckets, len, GFP_USER);
if (!p)
return ERR_PTR(-ENOMEM);
@ -594,9 +604,10 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
EXPORT_SYMBOL(vm_mmap);
/**
* kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
* failure, fall back to non-contiguous (vmalloc) allocation.
* @size: size of the request.
* @b: which set of kmalloc buckets to allocate from.
* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
* @node: numa node to allocate from
*
@ -609,7 +620,7 @@ EXPORT_SYMBOL(vm_mmap);
*
* Return: pointer to the allocated memory of %NULL in case of failure
*/
void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;
@ -631,7 +642,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
kmalloc_flags &= ~__GFP_NOFAIL;
}
ret = kmalloc_node_noprof(size, kmalloc_flags, node);
ret = __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, b), kmalloc_flags, node);
/*
* It doesn't really make sense to fallback to vmalloc for sub page
@ -660,7 +671,7 @@ void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kvmalloc_node_noprof);
EXPORT_SYMBOL(__kvmalloc_node_noprof);
/**
* kvfree() - Free memory.

View File

@ -18,23 +18,16 @@ pub(crate) unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: F
// Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
let layout = new_layout.pad_to_align();
let mut size = layout.size();
if layout.align() > bindings::ARCH_SLAB_MINALIGN {
// The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
// to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
// more information).
//
// Note that `layout.size()` (after padding) is guaranteed to be a multiple of
// `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
size = size.next_power_of_two();
}
// Note that `layout.size()` (after padding) is guaranteed to be a multiple of `layout.align()`
// which together with the slab guarantees means the `krealloc` will return a properly aligned
// object (see comments in `kmalloc()` for more information).
let size = layout.size();
// SAFETY:
// - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
// function safety requirement.
// - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
// according to the function safety requirement) or a result from `next_power_of_two()`.
// - `size` is greater than 0 since it's from `layout.size()` (which cannot be zero according
// to the function safety requirement)
unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags.0) as *mut u8 }
}

View File

@ -1729,6 +1729,7 @@ sub dump_function($$) {
$prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
$prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
$prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
$prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/;
my $define = $prototype =~ s/^#\s*define\s+//; #ak added
$prototype =~ s/__attribute_const__ +//;
$prototype =~ s/__attribute__\s*\(\(

View File

@ -47,11 +47,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
#define SLUB_RED_INACTIVE 0xbb
#define SLUB_RED_ACTIVE 0xcc
#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */