mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
Merge branch 'replace-page_frag-with-page_frag_cache-part-1'
Yunsheng Lin says:
====================
Replace page_frag with page_frag_cache (Part-1)
This is part 1 of "Replace page_frag with page_frag_cache",
which mainly contain refactoring and optimization for the
implementation of page_frag API before the replacing.
As the discussion in [1], it would be better to target net-next
tree to get more testing as all the callers page_frag API are
in networking, and the chance of conflicting with MM tree seems
low as implementation of page_frag API seems quite self-contained.
After [2], there are still two implementations for page frag:
1. mm/page_alloc.c: net stack seems to be using it in the
rx part with 'struct page_frag_cache' and the main API
being page_frag_alloc_align().
2. net/core/sock.c: net stack seems to be using it in the
tx part with 'struct page_frag' and the main API being
skb_page_frag_refill().
This patchset tries to unfiy the page frag implementation
by replacing page_frag with page_frag_cache for sk_page_frag()
first. net_high_order_alloc_disable_key for the implementation
in net/core/sock.c doesn't seems matter that much now as pcp
is also supported for high-order pages:
commit 44042b4498
("mm/page_alloc: allow high-order pages to
be stored on the per-cpu lists")
As the related change is mostly related to networking, so
targeting the net-next. And will try to replace the rest
of page_frag in the follow patchset.
After this patchset:
1. Unify the page frag implementation by taking the best out of
two the existing implementations: we are able to save some space
for the 'page_frag_cache' API user, and avoid 'get_page()' for
the old 'page_frag' API user.
2. Future bugfix and performance can be done in one place, hence
improving maintainability of page_frag's implementation.
Kernel Image changing:
Linux Kernel total | text data bss
------------------------------------------------------
after 45250307 | 27274279 17209996 766032
before 45254134 | 27278118 17209984 766032
delta -3827 | -3839 +12 +0
1. https://lore.kernel.org/all/add10dd4-7f5d-4aa1-aa04-767590f944e0@redhat.com/
2. https://lore.kernel.org/all/20240228093013.8263-1-linyunsheng@huawei.com/
====================
Link: https://patch.msgid.link/20241028115343.3405838-1-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
4d1d3b52dc
@ -109,26 +109,8 @@ typedef struct page *pgtable_t;
|
||||
#define __pgd(x) ((pgd_t) { (x) } )
|
||||
#define __pgprot(x) ((pgprot_t) { (x) } )
|
||||
|
||||
/*
|
||||
* Pure 2^n version of get_order
|
||||
* Use 'nsau' instructions if supported by the processor or the generic version.
|
||||
*/
|
||||
|
||||
#if XCHAL_HAVE_NSA
|
||||
|
||||
static inline __attribute_const__ int get_order(unsigned long size)
|
||||
{
|
||||
int lz;
|
||||
asm ("nsau %0, %1" : "=r" (lz) : "r" ((size - 1) >> PAGE_SHIFT));
|
||||
return 32 - lz;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# include <asm-generic/getorder.h>
|
||||
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
struct vm_area_struct;
|
||||
extern void clear_page(void *page);
|
||||
|
@ -1325,7 +1325,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
|
||||
vqs[VHOST_NET_VQ_RX]);
|
||||
|
||||
f->private_data = n;
|
||||
n->pf_cache.va = NULL;
|
||||
page_frag_cache_init(&n->pf_cache);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
|
||||
extern void __free_pages(struct page *page, unsigned int order);
|
||||
extern void free_pages(unsigned long addr, unsigned int order);
|
||||
|
||||
struct page_frag_cache;
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc);
|
||||
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
|
||||
gfp_t gfp_mask, unsigned int align_mask);
|
||||
|
||||
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align)
|
||||
{
|
||||
WARN_ON_ONCE(!is_power_of_2(align));
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
|
||||
}
|
||||
|
||||
static inline void *page_frag_alloc(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask)
|
||||
{
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
|
||||
}
|
||||
|
||||
extern void page_frag_free(void *addr);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
#define free_page(addr) free_pages((addr), 0)
|
||||
|
||||
|
@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
|
||||
*/
|
||||
#define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page)))
|
||||
|
||||
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
|
||||
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
|
||||
/*
|
||||
* page_private can be used on tail pages. However, PagePrivate is only
|
||||
* checked by the VM on the head page. So page_private on the tail pages
|
||||
@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
|
||||
return folio->private;
|
||||
}
|
||||
|
||||
struct page_frag_cache {
|
||||
void * va;
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
__u16 offset;
|
||||
__u16 size;
|
||||
#else
|
||||
__u32 offset;
|
||||
#endif
|
||||
/* we maintain a pagecount bias, so that we dont dirty cache line
|
||||
* containing page->_refcount every time we allocate a fragment.
|
||||
*/
|
||||
unsigned int pagecnt_bias;
|
||||
bool pfmemalloc;
|
||||
};
|
||||
|
||||
typedef unsigned long vm_flags_t;
|
||||
|
||||
/*
|
||||
|
@ -8,6 +8,7 @@
|
||||
* (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
|
||||
*/
|
||||
|
||||
#include <linux/align.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
@ -43,6 +44,26 @@ struct page_frag {
|
||||
#endif
|
||||
};
|
||||
|
||||
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
|
||||
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
struct page_frag_cache {
|
||||
/* encoded_page consists of the virtual address, pfmemalloc bit and
|
||||
* order of a page.
|
||||
*/
|
||||
unsigned long encoded_page;
|
||||
|
||||
/* we maintain a pagecount bias, so that we dont dirty cache line
|
||||
* containing page->_refcount every time we allocate a fragment.
|
||||
*/
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
|
||||
__u16 offset;
|
||||
__u16 pagecnt_bias;
|
||||
#else
|
||||
__u32 offset;
|
||||
__u32 pagecnt_bias;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Track pages that require TLB flushes */
|
||||
struct tlbflush_unmap_batch {
|
||||
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
|
61
include/linux/page_frag_cache.h
Normal file
61
include/linux/page_frag_cache.h
Normal file
@ -0,0 +1,61 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _LINUX_PAGE_FRAG_CACHE_H
|
||||
#define _LINUX_PAGE_FRAG_CACHE_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/mm_types_task.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* Use a full byte here to enable assembler optimization as the shift
|
||||
* operation is usually expecting a byte.
|
||||
*/
|
||||
#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0)
|
||||
#else
|
||||
/* Compiler should be able to figure out we don't read things as any value
|
||||
* ANDed with 0 is 0.
|
||||
*/
|
||||
#define PAGE_FRAG_CACHE_ORDER_MASK 0
|
||||
#endif
|
||||
|
||||
#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1)
|
||||
|
||||
static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
|
||||
{
|
||||
return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
|
||||
}
|
||||
|
||||
static inline void page_frag_cache_init(struct page_frag_cache *nc)
|
||||
{
|
||||
nc->encoded_page = 0;
|
||||
}
|
||||
|
||||
static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
|
||||
{
|
||||
return encoded_page_decode_pfmemalloc(nc->encoded_page);
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc);
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count);
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
|
||||
gfp_t gfp_mask, unsigned int align_mask);
|
||||
|
||||
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align)
|
||||
{
|
||||
WARN_ON_ONCE(!is_power_of_2(align));
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
|
||||
}
|
||||
|
||||
static inline void *page_frag_alloc(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask)
|
||||
{
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
|
||||
}
|
||||
|
||||
void page_frag_free(void *addr);
|
||||
|
||||
#endif
|
@ -31,6 +31,7 @@
|
||||
#include <linux/in6.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/page_frag_cache.h>
|
||||
#include <net/flow.h>
|
||||
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
||||
#include <linux/netfilter/nf_conntrack_common.h>
|
||||
|
@ -65,6 +65,7 @@ page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
|
||||
memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
||||
|
||||
obj-y += page-alloc.o
|
||||
obj-y += page_frag_cache.o
|
||||
obj-y += init-mm.o
|
||||
obj-y += memblock.o
|
||||
obj-y += $(memory-hotplug-y)
|
||||
|
136
mm/page_alloc.c
136
mm/page_alloc.c
@ -4836,142 +4836,6 @@ void free_pages(unsigned long addr, unsigned int order)
|
||||
|
||||
EXPORT_SYMBOL(free_pages);
|
||||
|
||||
/*
|
||||
* Page Fragment:
|
||||
* An arbitrary-length arbitrary-offset area of memory which resides
|
||||
* within a 0 or higher order page. Multiple fragments within that page
|
||||
* are individually refcounted, in the page's reference counter.
|
||||
*
|
||||
* The page_frag functions below provide a simple allocation framework for
|
||||
* page fragments. This is used by the network stack and network device
|
||||
* drivers to provide a backing region of memory for use as either an
|
||||
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
|
||||
*/
|
||||
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
gfp_t gfp = gfp_mask;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
|
||||
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
|
||||
PAGE_FRAG_CACHE_MAX_ORDER);
|
||||
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
|
||||
#endif
|
||||
if (unlikely(!page))
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
|
||||
|
||||
nc->va = page ? page_address(page) : NULL;
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc)
|
||||
{
|
||||
if (!nc->va)
|
||||
return;
|
||||
|
||||
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
|
||||
nc->va = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_cache_drain);
|
||||
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count)
|
||||
{
|
||||
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
||||
|
||||
if (page_ref_sub_and_test(page, count))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_cache_drain);
|
||||
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
unsigned int size = PAGE_SIZE;
|
||||
struct page *page;
|
||||
int offset;
|
||||
|
||||
if (unlikely(!nc->va)) {
|
||||
refill:
|
||||
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
/* Even if we own the page, we do not use atomic_set().
|
||||
* This would break get_page_unless_zero() users.
|
||||
*/
|
||||
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pfmemalloc = page_is_pfmemalloc(page);
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
nc->offset = size;
|
||||
}
|
||||
|
||||
offset = nc->offset - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
page = virt_to_page(nc->va);
|
||||
|
||||
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
|
||||
goto refill;
|
||||
|
||||
if (unlikely(nc->pfmemalloc)) {
|
||||
free_unref_page(page, compound_order(page));
|
||||
goto refill;
|
||||
}
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
/* OK, page count is 0, we can safely set it */
|
||||
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
offset = size - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
/*
|
||||
* The caller is trying to allocate a fragment
|
||||
* with fragsz > PAGE_SIZE but the cache isn't big
|
||||
* enough to satisfy the request, this may
|
||||
* happen in low memory conditions.
|
||||
* We don't release the cache page because
|
||||
* it could make memory pressure worse
|
||||
* so we simply return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
nc->pagecnt_bias--;
|
||||
offset &= align_mask;
|
||||
nc->offset = offset;
|
||||
|
||||
return nc->va + offset;
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_alloc_align);
|
||||
|
||||
/*
|
||||
* Frees a page fragment allocated out of either a compound or order 0 page.
|
||||
*/
|
||||
void page_frag_free(void *addr)
|
||||
{
|
||||
struct page *page = virt_to_head_page(addr);
|
||||
|
||||
if (unlikely(put_page_testzero(page)))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_free);
|
||||
|
||||
static void *make_alloc_exact(unsigned long addr, unsigned int order,
|
||||
size_t size)
|
||||
{
|
||||
|
171
mm/page_frag_cache.c
Normal file
171
mm/page_frag_cache.c
Normal file
@ -0,0 +1,171 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Page fragment allocator
|
||||
*
|
||||
* Page Fragment:
|
||||
* An arbitrary-length arbitrary-offset area of memory which resides within a
|
||||
* 0 or higher order page. Multiple fragments within that page are
|
||||
* individually refcounted, in the page's reference counter.
|
||||
*
|
||||
* The page_frag functions provide a simple allocation framework for page
|
||||
* fragments. This is used by the network stack and network device drivers to
|
||||
* provide a backing region of memory for use as either an sk_buff->head, or to
|
||||
* be used in the "frags" portion of skb_shared_info.
|
||||
*/
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp_types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/page_frag_cache.h>
|
||||
#include "internal.h"
|
||||
|
||||
static unsigned long encoded_page_create(struct page *page, unsigned int order,
|
||||
bool pfmemalloc)
|
||||
{
|
||||
BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
|
||||
BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
|
||||
|
||||
return (unsigned long)page_address(page) |
|
||||
(order & PAGE_FRAG_CACHE_ORDER_MASK) |
|
||||
((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
|
||||
}
|
||||
|
||||
static unsigned long encoded_page_decode_order(unsigned long encoded_page)
|
||||
{
|
||||
return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
|
||||
}
|
||||
|
||||
static void *encoded_page_decode_virt(unsigned long encoded_page)
|
||||
{
|
||||
return (void *)(encoded_page & PAGE_MASK);
|
||||
}
|
||||
|
||||
static struct page *encoded_page_decode_page(unsigned long encoded_page)
|
||||
{
|
||||
return virt_to_page((void *)encoded_page);
|
||||
}
|
||||
|
||||
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
|
||||
struct page *page = NULL;
|
||||
gfp_t gfp = gfp_mask;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
|
||||
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
|
||||
page = __alloc_pages(gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER,
|
||||
numa_mem_id(), NULL);
|
||||
#endif
|
||||
if (unlikely(!page)) {
|
||||
page = __alloc_pages(gfp, 0, numa_mem_id(), NULL);
|
||||
order = 0;
|
||||
}
|
||||
|
||||
nc->encoded_page = page ?
|
||||
encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0;
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc)
|
||||
{
|
||||
if (!nc->encoded_page)
|
||||
return;
|
||||
|
||||
__page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page),
|
||||
nc->pagecnt_bias);
|
||||
nc->encoded_page = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_cache_drain);
|
||||
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count)
|
||||
{
|
||||
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
||||
|
||||
if (page_ref_sub_and_test(page, count))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_cache_drain);
|
||||
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
unsigned long encoded_page = nc->encoded_page;
|
||||
unsigned int size, offset;
|
||||
struct page *page;
|
||||
|
||||
if (unlikely(!encoded_page)) {
|
||||
refill:
|
||||
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
encoded_page = nc->encoded_page;
|
||||
|
||||
/* Even if we own the page, we do not use atomic_set().
|
||||
* This would break get_page_unless_zero() users.
|
||||
*/
|
||||
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
nc->offset = 0;
|
||||
}
|
||||
|
||||
size = PAGE_SIZE << encoded_page_decode_order(encoded_page);
|
||||
offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
|
||||
if (unlikely(offset + fragsz > size)) {
|
||||
if (unlikely(fragsz > PAGE_SIZE)) {
|
||||
/*
|
||||
* The caller is trying to allocate a fragment
|
||||
* with fragsz > PAGE_SIZE but the cache isn't big
|
||||
* enough to satisfy the request, this may
|
||||
* happen in low memory conditions.
|
||||
* We don't release the cache page because
|
||||
* it could make memory pressure worse
|
||||
* so we simply return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page = encoded_page_decode_page(encoded_page);
|
||||
|
||||
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
|
||||
goto refill;
|
||||
|
||||
if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) {
|
||||
free_unref_page(page,
|
||||
encoded_page_decode_order(encoded_page));
|
||||
goto refill;
|
||||
}
|
||||
|
||||
/* OK, page count is 0, we can safely set it */
|
||||
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
nc->pagecnt_bias--;
|
||||
nc->offset = offset + fragsz;
|
||||
|
||||
return encoded_page_decode_virt(encoded_page) + offset;
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_alloc_align);
|
||||
|
||||
/*
|
||||
* Frees a page fragment allocated out of either a compound or order 0 page.
|
||||
*/
|
||||
void page_frag_free(void *addr)
|
||||
{
|
||||
struct page *page = virt_to_head_page(addr);
|
||||
|
||||
if (unlikely(put_page_testzero(page)))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_free);
|
@ -753,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
|
||||
if (in_hardirq() || irqs_disabled()) {
|
||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||
data = page_frag_alloc(nc, len, gfp_mask);
|
||||
pfmemalloc = nc->pfmemalloc;
|
||||
pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
|
||||
} else {
|
||||
local_bh_disable();
|
||||
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
|
||||
|
||||
nc = this_cpu_ptr(&napi_alloc_cache.page);
|
||||
data = page_frag_alloc(nc, len, gfp_mask);
|
||||
pfmemalloc = nc->pfmemalloc;
|
||||
pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
|
||||
|
||||
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
|
||||
local_bh_enable();
|
||||
@ -850,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
|
||||
len = SKB_HEAD_ALIGN(len);
|
||||
|
||||
data = page_frag_alloc(&nc->page, len, gfp_mask);
|
||||
pfmemalloc = nc->page.pfmemalloc;
|
||||
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
|
||||
}
|
||||
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
|
||||
|
||||
|
@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
|
||||
*/
|
||||
rxrpc_purge_queue(&conn->rx_queue);
|
||||
|
||||
if (conn->tx_data_alloc.va)
|
||||
__page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
|
||||
conn->tx_data_alloc.pagecnt_bias);
|
||||
page_frag_cache_drain(&conn->tx_data_alloc);
|
||||
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
|
||||
}
|
||||
|
||||
|
@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
|
||||
#endif
|
||||
rxrpc_purge_queue(&local->rx_queue);
|
||||
rxrpc_purge_client_connections(local);
|
||||
if (local->tx_alloc.va)
|
||||
__page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
|
||||
local->tx_alloc.pagecnt_bias);
|
||||
page_frag_cache_drain(&local->tx_alloc);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1608,7 +1608,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
|
||||
static void svc_sock_free(struct svc_xprt *xprt)
|
||||
{
|
||||
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
|
||||
struct page_frag_cache *pfc = &svsk->sk_frag_cache;
|
||||
struct socket *sock = svsk->sk_sock;
|
||||
|
||||
trace_svcsock_free(svsk, sock);
|
||||
@ -1618,8 +1617,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
|
||||
sockfd_put(sock);
|
||||
else
|
||||
sock_release(sock);
|
||||
if (pfc->va)
|
||||
__page_frag_cache_drain(virt_to_head_page(pfc->va),
|
||||
pfc->pagecnt_bias);
|
||||
|
||||
page_frag_cache_drain(&svsk->sk_frag_cache);
|
||||
kfree(svsk);
|
||||
}
|
||||
|
@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules
|
||||
CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
|
||||
LDLIBS = -lrt -lpthread -lm
|
||||
|
||||
TEST_GEN_MODS_DIR := page_frag
|
||||
|
||||
TEST_GEN_FILES = cow
|
||||
TEST_GEN_FILES += compaction_test
|
||||
TEST_GEN_FILES += gup_longterm
|
||||
@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh
|
||||
TEST_FILES += va_high_addr_switch.sh
|
||||
TEST_FILES += charge_reserved_hugetlb.sh
|
||||
TEST_FILES += hugetlb_reparenting_test.sh
|
||||
TEST_FILES += test_page_frag.sh
|
||||
|
||||
# required by charge_reserved_hugetlb.sh
|
||||
TEST_FILES += write_hugetlb_memory.sh
|
||||
|
18
tools/testing/selftests/mm/page_frag/Makefile
Normal file
18
tools/testing/selftests/mm/page_frag/Makefile
Normal file
@ -0,0 +1,18 @@
|
||||
PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
|
||||
KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..)
|
||||
|
||||
ifeq ($(V),1)
|
||||
Q =
|
||||
else
|
||||
Q = @
|
||||
endif
|
||||
|
||||
MODULES = page_frag_test.ko
|
||||
|
||||
obj-m += page_frag_test.o
|
||||
|
||||
all:
|
||||
+$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
|
||||
|
||||
clean:
|
||||
+$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean
|
198
tools/testing/selftests/mm/page_frag/page_frag_test.c
Normal file
198
tools/testing/selftests/mm/page_frag/page_frag_test.c
Normal file
@ -0,0 +1,198 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Test module for page_frag cache
|
||||
*
|
||||
* Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/ptr_ring.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/page_frag_cache.h>
|
||||
|
||||
#define TEST_FAILED_PREFIX "page_frag_test failed: "
|
||||
|
||||
static struct ptr_ring ptr_ring;
|
||||
static int nr_objs = 512;
|
||||
static atomic_t nthreads;
|
||||
static struct completion wait;
|
||||
static struct page_frag_cache test_nc;
|
||||
static int test_popped;
|
||||
static int test_pushed;
|
||||
static bool force_exit;
|
||||
|
||||
static int nr_test = 2000000;
|
||||
module_param(nr_test, int, 0);
|
||||
MODULE_PARM_DESC(nr_test, "number of iterations to test");
|
||||
|
||||
static bool test_align;
|
||||
module_param(test_align, bool, 0);
|
||||
MODULE_PARM_DESC(test_align, "use align API for testing");
|
||||
|
||||
static int test_alloc_len = 2048;
|
||||
module_param(test_alloc_len, int, 0);
|
||||
MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
|
||||
|
||||
static int test_push_cpu;
|
||||
module_param(test_push_cpu, int, 0);
|
||||
MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment");
|
||||
|
||||
static int test_pop_cpu;
|
||||
module_param(test_pop_cpu, int, 0);
|
||||
MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment");
|
||||
|
||||
static int page_frag_pop_thread(void *arg)
|
||||
{
|
||||
struct ptr_ring *ring = arg;
|
||||
|
||||
pr_info("page_frag pop test thread begins on cpu %d\n",
|
||||
smp_processor_id());
|
||||
|
||||
while (test_popped < nr_test) {
|
||||
void *obj = __ptr_ring_consume(ring);
|
||||
|
||||
if (obj) {
|
||||
test_popped++;
|
||||
page_frag_free(obj);
|
||||
} else {
|
||||
if (force_exit)
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
if (atomic_dec_and_test(&nthreads))
|
||||
complete(&wait);
|
||||
|
||||
pr_info("page_frag pop test thread exits on cpu %d\n",
|
||||
smp_processor_id());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int page_frag_push_thread(void *arg)
|
||||
{
|
||||
struct ptr_ring *ring = arg;
|
||||
|
||||
pr_info("page_frag push test thread begins on cpu %d\n",
|
||||
smp_processor_id());
|
||||
|
||||
while (test_pushed < nr_test && !force_exit) {
|
||||
void *va;
|
||||
int ret;
|
||||
|
||||
if (test_align) {
|
||||
va = page_frag_alloc_align(&test_nc, test_alloc_len,
|
||||
GFP_KERNEL, SMP_CACHE_BYTES);
|
||||
|
||||
if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) {
|
||||
force_exit = true;
|
||||
WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n");
|
||||
}
|
||||
} else {
|
||||
va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL);
|
||||
}
|
||||
|
||||
if (!va)
|
||||
continue;
|
||||
|
||||
ret = __ptr_ring_produce(ring, va);
|
||||
if (ret) {
|
||||
page_frag_free(va);
|
||||
cond_resched();
|
||||
} else {
|
||||
test_pushed++;
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("page_frag push test thread exits on cpu %d\n",
|
||||
smp_processor_id());
|
||||
|
||||
if (atomic_dec_and_test(&nthreads))
|
||||
complete(&wait);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init page_frag_test_init(void)
|
||||
{
|
||||
struct task_struct *tsk_push, *tsk_pop;
|
||||
int last_pushed = 0, last_popped = 0;
|
||||
ktime_t start;
|
||||
u64 duration;
|
||||
int ret;
|
||||
|
||||
page_frag_cache_init(&test_nc);
|
||||
atomic_set(&nthreads, 2);
|
||||
init_completion(&wait);
|
||||
|
||||
if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 ||
|
||||
!cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu))
|
||||
return -EINVAL;
|
||||
|
||||
ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring,
|
||||
test_push_cpu, "page_frag_push");
|
||||
if (IS_ERR(tsk_push))
|
||||
return PTR_ERR(tsk_push);
|
||||
|
||||
tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring,
|
||||
test_pop_cpu, "page_frag_pop");
|
||||
if (IS_ERR(tsk_pop)) {
|
||||
kthread_stop(tsk_push);
|
||||
return PTR_ERR(tsk_pop);
|
||||
}
|
||||
|
||||
start = ktime_get();
|
||||
wake_up_process(tsk_push);
|
||||
wake_up_process(tsk_pop);
|
||||
|
||||
pr_info("waiting for test to complete\n");
|
||||
|
||||
while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) {
|
||||
/* exit if there is no progress for push or pop size */
|
||||
if (last_pushed == test_pushed || last_popped == test_popped) {
|
||||
WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n");
|
||||
force_exit = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
last_pushed = test_pushed;
|
||||
last_popped = test_popped;
|
||||
pr_info("page_frag_test progress: pushed = %d, popped = %d\n",
|
||||
test_pushed, test_popped);
|
||||
}
|
||||
|
||||
if (force_exit) {
|
||||
pr_err(TEST_FAILED_PREFIX "exit with error\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
duration = (u64)ktime_us_delta(ktime_get(), start);
|
||||
pr_info("%d of iterations for %s testing took: %lluus\n", nr_test,
|
||||
test_align ? "aligned" : "non-aligned", duration);
|
||||
|
||||
out:
|
||||
ptr_ring_cleanup(&ptr_ring, NULL);
|
||||
page_frag_cache_drain(&test_nc);
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
static void __exit page_frag_test_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(page_frag_test_init);
|
||||
module_exit(page_frag_test_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Yunsheng Lin <linyunsheng@huawei.com>");
|
||||
MODULE_DESCRIPTION("Test module for page_frag");
|
@ -75,6 +75,8 @@ separated by spaces:
|
||||
read-only VMAs
|
||||
- mdwe
|
||||
test prctl(PR_SET_MDWE, ...)
|
||||
- page_frag
|
||||
test handling of page fragment allocation and freeing
|
||||
|
||||
example: ./run_vmtests.sh -t "hmm mmap ksm"
|
||||
EOF
|
||||
@ -456,6 +458,12 @@ CATEGORY="mkdirty" run_test ./mkdirty
|
||||
|
||||
CATEGORY="mdwe" run_test ./mdwe_test
|
||||
|
||||
CATEGORY="page_frag" run_test ./test_page_frag.sh smoke
|
||||
|
||||
CATEGORY="page_frag" run_test ./test_page_frag.sh aligned
|
||||
|
||||
CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
|
||||
|
||||
echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
|
||||
echo "1..${count_total}" | tap_output
|
||||
|
||||
|
175
tools/testing/selftests/mm/test_page_frag.sh
Executable file
175
tools/testing/selftests/mm/test_page_frag.sh
Executable file
@ -0,0 +1,175 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
|
||||
# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
|
||||
#
|
||||
# This is a test script for the kernel test driver to test the
|
||||
# correctness and performance of page_frag's implementation.
|
||||
# Therefore it is just a kernel module loader. You can specify
|
||||
# and pass different parameters in order to:
|
||||
# a) analyse performance of page fragment allocations;
|
||||
# b) stressing and stability check of page_frag subsystem.
|
||||
|
||||
DRIVER="./page_frag/page_frag_test.ko"
|
||||
CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
|
||||
TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}')
|
||||
|
||||
if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then
|
||||
TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}')
|
||||
NR_TEST=100000000
|
||||
else
|
||||
TEST_CPU_1=$TEST_CPU_0
|
||||
NR_TEST=1000000
|
||||
fi
|
||||
|
||||
# 1 if fails
|
||||
exitcode=1
|
||||
|
||||
# Kselftest framework requirement - SKIP code is 4.
|
||||
ksft_skip=4
|
||||
|
||||
check_test_failed_prefix() {
|
||||
if dmesg | grep -q 'page_frag_test failed:';then
|
||||
echo "page_frag_test failed, please check dmesg"
|
||||
exit $exitcode
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# Static templates for testing of page_frag APIs.
|
||||
# Also it is possible to pass any supported parameters manually.
|
||||
#
|
||||
SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1"
|
||||
NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST"
|
||||
ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1"
|
||||
|
||||
check_test_requirements()
|
||||
{
|
||||
uid=$(id -u)
|
||||
if [ $uid -ne 0 ]; then
|
||||
echo "$0: Must be run as root"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
if ! which insmod > /dev/null 2>&1; then
|
||||
echo "$0: You need insmod installed"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
|
||||
if [ ! -f $DRIVER ]; then
|
||||
echo "$0: You need to compile page_frag_test module"
|
||||
exit $ksft_skip
|
||||
fi
|
||||
}
|
||||
|
||||
run_nonaligned_check()
|
||||
{
|
||||
echo "Run performance tests to evaluate how fast nonaligned alloc API is."
|
||||
|
||||
insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1
|
||||
}
|
||||
|
||||
run_aligned_check()
|
||||
{
|
||||
echo "Run performance tests to evaluate how fast aligned alloc API is."
|
||||
|
||||
insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1
|
||||
}
|
||||
|
||||
run_smoke_check()
|
||||
{
|
||||
echo "Run smoke test."
|
||||
|
||||
insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1
|
||||
}
|
||||
|
||||
usage()
|
||||
{
|
||||
echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | "
|
||||
echo "manual parameters"
|
||||
echo
|
||||
echo "Valid tests and parameters:"
|
||||
echo
|
||||
modinfo $DRIVER
|
||||
echo
|
||||
echo "Example usage:"
|
||||
echo
|
||||
echo "# Shows help message"
|
||||
echo "$0"
|
||||
echo
|
||||
echo "# Smoke testing"
|
||||
echo "$0 smoke"
|
||||
echo
|
||||
echo "# Performance testing for nonaligned alloc API"
|
||||
echo "$0 nonaligned"
|
||||
echo
|
||||
echo "# Performance testing for aligned alloc API"
|
||||
echo "$0 aligned"
|
||||
echo
|
||||
exit 0
|
||||
}
|
||||
|
||||
function validate_passed_args()
|
||||
{
|
||||
VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'`
|
||||
|
||||
#
|
||||
# Something has been passed, check it.
|
||||
#
|
||||
for passed_arg in $@; do
|
||||
key=${passed_arg//=*/}
|
||||
valid=0
|
||||
|
||||
for valid_arg in $VALID_ARGS; do
|
||||
if [[ $key = $valid_arg ]]; then
|
||||
valid=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $valid -ne 1 ]]; then
|
||||
echo "Error: key is not correct: ${key}"
|
||||
exit $exitcode
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function run_manual_check()
|
||||
{
|
||||
#
|
||||
# Validate passed parameters. If there is wrong one,
|
||||
# the script exists and does not execute further.
|
||||
#
|
||||
validate_passed_args $@
|
||||
|
||||
echo "Run the test with following parameters: $@"
|
||||
insmod $DRIVER $@ > /dev/null 2>&1
|
||||
}
|
||||
|
||||
function run_test()
|
||||
{
|
||||
if [ $# -eq 0 ]; then
|
||||
usage
|
||||
else
|
||||
if [[ "$1" = "smoke" ]]; then
|
||||
run_smoke_check
|
||||
elif [[ "$1" = "nonaligned" ]]; then
|
||||
run_nonaligned_check
|
||||
elif [[ "$1" = "aligned" ]]; then
|
||||
run_aligned_check
|
||||
else
|
||||
run_manual_check $@
|
||||
fi
|
||||
fi
|
||||
|
||||
check_test_failed_prefix
|
||||
|
||||
echo "Done."
|
||||
echo "Check the kernel ring buffer to see the summary."
|
||||
}
|
||||
|
||||
check_test_requirements
|
||||
run_test $@
|
||||
|
||||
exit 0
|
Loading…
Reference in New Issue
Block a user