Merge branch 'replace-page_frag-with-page_frag_cache-part-1'

Yunsheng Lin says:

====================
Replace page_frag with page_frag_cache (Part-1)

This is part 1 of "Replace page_frag with page_frag_cache",
which mainly contain refactoring and optimization for the
implementation of page_frag API before the replacing.

As the discussion in [1], it would be better to target net-next
tree to get more testing as all the callers page_frag API are
in networking, and the chance of conflicting with MM tree seems
low as implementation of page_frag API seems quite self-contained.

After [2], there are still two implementations for page frag:

1. mm/page_alloc.c: net stack seems to be using it in the
   rx part with 'struct page_frag_cache' and the main API
   being page_frag_alloc_align().
2. net/core/sock.c: net stack seems to be using it in the
   tx part with 'struct page_frag' and the main API being
   skb_page_frag_refill().

This patchset tries to unfiy the page frag implementation
by replacing page_frag with page_frag_cache for sk_page_frag()
first. net_high_order_alloc_disable_key for the implementation
in net/core/sock.c doesn't seems matter that much now as pcp
is also supported for high-order pages:
commit 44042b4498 ("mm/page_alloc: allow high-order pages to
be stored on the per-cpu lists")

As the related change is mostly related to networking, so
targeting the net-next. And will try to replace the rest
of page_frag in the follow patchset.

After this patchset:
1. Unify the page frag implementation by taking the best out of
   two the existing implementations: we are able to save some space
   for the 'page_frag_cache' API user, and avoid 'get_page()' for
   the old 'page_frag' API user.
2. Future bugfix and performance can be done in one place, hence
   improving maintainability of page_frag's implementation.

Kernel Image changing:
    Linux Kernel   total |      text      data        bss
    ------------------------------------------------------
    after     45250307 |   27274279   17209996     766032
    before    45254134 |   27278118   17209984     766032
    delta        -3827 |      -3839        +12         +0

1. https://lore.kernel.org/all/add10dd4-7f5d-4aa1-aa04-767590f944e0@redhat.com/
2. https://lore.kernel.org/all/20240228093013.8263-1-linyunsheng@huawei.com/
====================

Link: https://patch.msgid.link/20241028115343.3405838-1-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-11-11 10:56:30 -08:00
commit 4d1d3b52dc
19 changed files with 665 additions and 208 deletions

View File

@ -109,26 +109,8 @@ typedef struct page *pgtable_t;
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
/*
* Pure 2^n version of get_order
* Use 'nsau' instructions if supported by the processor or the generic version.
*/
#if XCHAL_HAVE_NSA
static inline __attribute_const__ int get_order(unsigned long size)
{
int lz;
asm ("nsau %0, %1" : "=r" (lz) : "r" ((size - 1) >> PAGE_SHIFT));
return 32 - lz;
}
#else
# include <asm-generic/getorder.h>
#endif
struct page;
struct vm_area_struct;
extern void clear_page(void *page);

View File

@ -1325,7 +1325,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
vqs[VHOST_NET_VQ_RX]);
f->private_data = n;
n->pf_cache.va = NULL;
page_frag_cache_init(&n->pf_cache);
return 0;
}

View File

@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
struct page_frag_cache;
void page_frag_cache_drain(struct page_frag_cache *nc);
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
gfp_t gfp_mask, unsigned int align_mask);
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align)
{
WARN_ON_ONCE(!is_power_of_2(align));
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
}
static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}
extern void page_frag_free(void *addr);
#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)

View File

@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
*/
#define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page)))
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
/*
* page_private can be used on tail pages. However, PagePrivate is only
* checked by the VM on the head page. So page_private on the tail pages
@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
return folio->private;
}
struct page_frag_cache {
void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
__u16 offset;
__u16 size;
#else
__u32 offset;
#endif
/* we maintain a pagecount bias, so that we dont dirty cache line
* containing page->_refcount every time we allocate a fragment.
*/
unsigned int pagecnt_bias;
bool pfmemalloc;
};
typedef unsigned long vm_flags_t;
/*

View File

@ -8,6 +8,7 @@
* (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
*/
#include <linux/align.h>
#include <linux/types.h>
#include <asm/page.h>
@ -43,6 +44,26 @@ struct page_frag {
#endif
};
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
struct page_frag_cache {
/* encoded_page consists of the virtual address, pfmemalloc bit and
* order of a page.
*/
unsigned long encoded_page;
/* we maintain a pagecount bias, so that we dont dirty cache line
* containing page->_refcount every time we allocate a fragment.
*/
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
__u16 offset;
__u16 pagecnt_bias;
#else
__u32 offset;
__u32 pagecnt_bias;
#endif
};
/* Track pages that require TLB flushes */
struct tlbflush_unmap_batch {
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH

View File

@ -0,0 +1,61 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PAGE_FRAG_CACHE_H
#define _LINUX_PAGE_FRAG_CACHE_H
#include <linux/bits.h>
#include <linux/log2.h>
#include <linux/mm_types_task.h>
#include <linux/types.h>
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* Use a full byte here to enable assembler optimization as the shift
* operation is usually expecting a byte.
*/
#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0)
#else
/* Compiler should be able to figure out we don't read things as any value
* ANDed with 0 is 0.
*/
#define PAGE_FRAG_CACHE_ORDER_MASK 0
#endif
#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1)
static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
{
return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
}
static inline void page_frag_cache_init(struct page_frag_cache *nc)
{
nc->encoded_page = 0;
}
static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
{
return encoded_page_decode_pfmemalloc(nc->encoded_page);
}
void page_frag_cache_drain(struct page_frag_cache *nc);
void __page_frag_cache_drain(struct page *page, unsigned int count);
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
gfp_t gfp_mask, unsigned int align_mask);
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align)
{
WARN_ON_ONCE(!is_power_of_2(align));
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
}
static inline void *page_frag_alloc(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}
void page_frag_free(void *addr);
#endif

View File

@ -31,6 +31,7 @@
#include <linux/in6.h>
#include <linux/if_packet.h>
#include <linux/llist.h>
#include <linux/page_frag_cache.h>
#include <net/flow.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <linux/netfilter/nf_conntrack_common.h>

View File

@ -65,6 +65,7 @@ page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-y += page-alloc.o
obj-y += page_frag_cache.o
obj-y += init-mm.o
obj-y += memblock.o
obj-y += $(memory-hotplug-y)

View File

@ -4836,142 +4836,6 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
/*
* Page Fragment:
* An arbitrary-length arbitrary-offset area of memory which resides
* within a 0 or higher order page. Multiple fragments within that page
* are individually refcounted, in the page's reference counter.
*
* The page_frag functions below provide a simple allocation framework for
* page fragments. This is used by the network stack and network device
* drivers to provide a backing region of memory for use as either an
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
*/
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp_mask)
{
struct page *page = NULL;
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
#endif
if (unlikely(!page))
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
nc->va = page ? page_address(page) : NULL;
return page;
}
void page_frag_cache_drain(struct page_frag_cache *nc)
{
if (!nc->va)
return;
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
nc->va = NULL;
}
EXPORT_SYMBOL(page_frag_cache_drain);
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
if (page_ref_sub_and_test(page, count))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(__page_frag_cache_drain);
void *__page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
int offset;
if (unlikely(!nc->va)) {
refill:
page = __page_frag_cache_refill(nc, gfp_mask);
if (!page)
return NULL;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = size;
}
offset = nc->offset - fragsz;
if (unlikely(offset < 0)) {
page = virt_to_page(nc->va);
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
goto refill;
if (unlikely(nc->pfmemalloc)) {
free_unref_page(page, compound_order(page));
goto refill;
}
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
/* OK, page count is 0, we can safely set it */
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = size - fragsz;
if (unlikely(offset < 0)) {
/*
* The caller is trying to allocate a fragment
* with fragsz > PAGE_SIZE but the cache isn't big
* enough to satisfy the request, this may
* happen in low memory conditions.
* We don't release the cache page because
* it could make memory pressure worse
* so we simply return NULL here.
*/
return NULL;
}
}
nc->pagecnt_bias--;
offset &= align_mask;
nc->offset = offset;
return nc->va + offset;
}
EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
*/
void page_frag_free(void *addr)
{
struct page *page = virt_to_head_page(addr);
if (unlikely(put_page_testzero(page)))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(page_frag_free);
static void *make_alloc_exact(unsigned long addr, unsigned int order,
size_t size)
{

171
mm/page_frag_cache.c Normal file
View File

@ -0,0 +1,171 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Page fragment allocator
*
* Page Fragment:
* An arbitrary-length arbitrary-offset area of memory which resides within a
* 0 or higher order page. Multiple fragments within that page are
* individually refcounted, in the page's reference counter.
*
* The page_frag functions provide a simple allocation framework for page
* fragments. This is used by the network stack and network device drivers to
* provide a backing region of memory for use as either an sk_buff->head, or to
* be used in the "frags" portion of skb_shared_info.
*/
#include <linux/build_bug.h>
#include <linux/export.h>
#include <linux/gfp_types.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/page_frag_cache.h>
#include "internal.h"
static unsigned long encoded_page_create(struct page *page, unsigned int order,
bool pfmemalloc)
{
BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
return (unsigned long)page_address(page) |
(order & PAGE_FRAG_CACHE_ORDER_MASK) |
((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
}
static unsigned long encoded_page_decode_order(unsigned long encoded_page)
{
return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
}
static void *encoded_page_decode_virt(unsigned long encoded_page)
{
return (void *)(encoded_page & PAGE_MASK);
}
static struct page *encoded_page_decode_page(unsigned long encoded_page)
{
return virt_to_page((void *)encoded_page);
}
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp_mask)
{
unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
struct page *page = NULL;
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = __alloc_pages(gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER,
numa_mem_id(), NULL);
#endif
if (unlikely(!page)) {
page = __alloc_pages(gfp, 0, numa_mem_id(), NULL);
order = 0;
}
nc->encoded_page = page ?
encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0;
return page;
}
void page_frag_cache_drain(struct page_frag_cache *nc)
{
if (!nc->encoded_page)
return;
__page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page),
nc->pagecnt_bias);
nc->encoded_page = 0;
}
EXPORT_SYMBOL(page_frag_cache_drain);
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
if (page_ref_sub_and_test(page, count))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(__page_frag_cache_drain);
void *__page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align_mask)
{
unsigned long encoded_page = nc->encoded_page;
unsigned int size, offset;
struct page *page;
if (unlikely(!encoded_page)) {
refill:
page = __page_frag_cache_refill(nc, gfp_mask);
if (!page)
return NULL;
encoded_page = nc->encoded_page;
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = 0;
}
size = PAGE_SIZE << encoded_page_decode_order(encoded_page);
offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
if (unlikely(offset + fragsz > size)) {
if (unlikely(fragsz > PAGE_SIZE)) {
/*
* The caller is trying to allocate a fragment
* with fragsz > PAGE_SIZE but the cache isn't big
* enough to satisfy the request, this may
* happen in low memory conditions.
* We don't release the cache page because
* it could make memory pressure worse
* so we simply return NULL here.
*/
return NULL;
}
page = encoded_page_decode_page(encoded_page);
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
goto refill;
if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) {
free_unref_page(page,
encoded_page_decode_order(encoded_page));
goto refill;
}
/* OK, page count is 0, we can safely set it */
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
offset = 0;
}
nc->pagecnt_bias--;
nc->offset = offset + fragsz;
return encoded_page_decode_virt(encoded_page) + offset;
}
EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
*/
void page_frag_free(void *addr)
{
struct page *page = virt_to_head_page(addr);
if (unlikely(put_page_testzero(page)))
free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(page_frag_free);

View File

@ -753,14 +753,14 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
} else {
local_bh_disable();
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache.page);
data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
pfmemalloc = page_frag_cache_is_pfmemalloc(nc);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
local_bh_enable();
@ -850,7 +850,7 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
len = SKB_HEAD_ALIGN(len);
data = page_frag_alloc(&nc->page, len, gfp_mask);
pfmemalloc = nc->page.pfmemalloc;
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
}
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);

View File

@ -337,9 +337,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
*/
rxrpc_purge_queue(&conn->rx_queue);
if (conn->tx_data_alloc.va)
__page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
conn->tx_data_alloc.pagecnt_bias);
page_frag_cache_drain(&conn->tx_data_alloc);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
}

View File

@ -452,9 +452,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
if (local->tx_alloc.va)
__page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
local->tx_alloc.pagecnt_bias);
page_frag_cache_drain(&local->tx_alloc);
}
/*

View File

@ -1608,7 +1608,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@ -1618,8 +1617,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
if (pfc->va)
__page_frag_cache_drain(virt_to_head_page(pfc->va),
pfc->pagecnt_bias);
page_frag_cache_drain(&svsk->sk_frag_cache);
kfree(svsk);
}

View File

@ -36,6 +36,8 @@ MAKEFLAGS += --no-builtin-rules
CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
TEST_GEN_MODS_DIR := page_frag
TEST_GEN_FILES = cow
TEST_GEN_FILES += compaction_test
TEST_GEN_FILES += gup_longterm
@ -126,6 +128,7 @@ TEST_FILES += test_hmm.sh
TEST_FILES += va_high_addr_switch.sh
TEST_FILES += charge_reserved_hugetlb.sh
TEST_FILES += hugetlb_reparenting_test.sh
TEST_FILES += test_page_frag.sh
# required by charge_reserved_hugetlb.sh
TEST_FILES += write_hugetlb_memory.sh

View File

@ -0,0 +1,18 @@
PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
KDIR ?= $(abspath $(PAGE_FRAG_TEST_DIR)/../../../../..)
ifeq ($(V),1)
Q =
else
Q = @
endif
MODULES = page_frag_test.ko
obj-m += page_frag_test.o
all:
+$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
clean:
+$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean

View File

@ -0,0 +1,198 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Test module for page_frag cache
*
* Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
*/
#include <linux/module.h>
#include <linux/cpumask.h>
#include <linux/completion.h>
#include <linux/ptr_ring.h>
#include <linux/kthread.h>
#include <linux/page_frag_cache.h>
#define TEST_FAILED_PREFIX "page_frag_test failed: "
static struct ptr_ring ptr_ring;
static int nr_objs = 512;
static atomic_t nthreads;
static struct completion wait;
static struct page_frag_cache test_nc;
static int test_popped;
static int test_pushed;
static bool force_exit;
static int nr_test = 2000000;
module_param(nr_test, int, 0);
MODULE_PARM_DESC(nr_test, "number of iterations to test");
static bool test_align;
module_param(test_align, bool, 0);
MODULE_PARM_DESC(test_align, "use align API for testing");
static int test_alloc_len = 2048;
module_param(test_alloc_len, int, 0);
MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
static int test_push_cpu;
module_param(test_push_cpu, int, 0);
MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment");
static int test_pop_cpu;
module_param(test_pop_cpu, int, 0);
MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment");
static int page_frag_pop_thread(void *arg)
{
struct ptr_ring *ring = arg;
pr_info("page_frag pop test thread begins on cpu %d\n",
smp_processor_id());
while (test_popped < nr_test) {
void *obj = __ptr_ring_consume(ring);
if (obj) {
test_popped++;
page_frag_free(obj);
} else {
if (force_exit)
break;
cond_resched();
}
}
if (atomic_dec_and_test(&nthreads))
complete(&wait);
pr_info("page_frag pop test thread exits on cpu %d\n",
smp_processor_id());
return 0;
}
static int page_frag_push_thread(void *arg)
{
struct ptr_ring *ring = arg;
pr_info("page_frag push test thread begins on cpu %d\n",
smp_processor_id());
while (test_pushed < nr_test && !force_exit) {
void *va;
int ret;
if (test_align) {
va = page_frag_alloc_align(&test_nc, test_alloc_len,
GFP_KERNEL, SMP_CACHE_BYTES);
if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) {
force_exit = true;
WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n");
}
} else {
va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL);
}
if (!va)
continue;
ret = __ptr_ring_produce(ring, va);
if (ret) {
page_frag_free(va);
cond_resched();
} else {
test_pushed++;
}
}
pr_info("page_frag push test thread exits on cpu %d\n",
smp_processor_id());
if (atomic_dec_and_test(&nthreads))
complete(&wait);
return 0;
}
static int __init page_frag_test_init(void)
{
struct task_struct *tsk_push, *tsk_pop;
int last_pushed = 0, last_popped = 0;
ktime_t start;
u64 duration;
int ret;
page_frag_cache_init(&test_nc);
atomic_set(&nthreads, 2);
init_completion(&wait);
if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 ||
!cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu))
return -EINVAL;
ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL);
if (ret)
return ret;
tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring,
test_push_cpu, "page_frag_push");
if (IS_ERR(tsk_push))
return PTR_ERR(tsk_push);
tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring,
test_pop_cpu, "page_frag_pop");
if (IS_ERR(tsk_pop)) {
kthread_stop(tsk_push);
return PTR_ERR(tsk_pop);
}
start = ktime_get();
wake_up_process(tsk_push);
wake_up_process(tsk_pop);
pr_info("waiting for test to complete\n");
while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) {
/* exit if there is no progress for push or pop size */
if (last_pushed == test_pushed || last_popped == test_popped) {
WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n");
force_exit = true;
continue;
}
last_pushed = test_pushed;
last_popped = test_popped;
pr_info("page_frag_test progress: pushed = %d, popped = %d\n",
test_pushed, test_popped);
}
if (force_exit) {
pr_err(TEST_FAILED_PREFIX "exit with error\n");
goto out;
}
duration = (u64)ktime_us_delta(ktime_get(), start);
pr_info("%d of iterations for %s testing took: %lluus\n", nr_test,
test_align ? "aligned" : "non-aligned", duration);
out:
ptr_ring_cleanup(&ptr_ring, NULL);
page_frag_cache_drain(&test_nc);
return -EAGAIN;
}
static void __exit page_frag_test_exit(void)
{
}
module_init(page_frag_test_init);
module_exit(page_frag_test_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yunsheng Lin <linyunsheng@huawei.com>");
MODULE_DESCRIPTION("Test module for page_frag");

View File

@ -75,6 +75,8 @@ separated by spaces:
read-only VMAs
- mdwe
test prctl(PR_SET_MDWE, ...)
- page_frag
test handling of page fragment allocation and freeing
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@ -456,6 +458,12 @@ CATEGORY="mkdirty" run_test ./mkdirty
CATEGORY="mdwe" run_test ./mdwe_test
CATEGORY="page_frag" run_test ./test_page_frag.sh smoke
CATEGORY="page_frag" run_test ./test_page_frag.sh aligned
CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
echo "1..${count_total}" | tap_output

View File

@ -0,0 +1,175 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
#
# This is a test script for the kernel test driver to test the
# correctness and performance of page_frag's implementation.
# Therefore it is just a kernel module loader. You can specify
# and pass different parameters in order to:
# a) analyse performance of page fragment allocations;
# b) stressing and stability check of page_frag subsystem.
DRIVER="./page_frag/page_frag_test.ko"
CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}')
if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then
TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}')
NR_TEST=100000000
else
TEST_CPU_1=$TEST_CPU_0
NR_TEST=1000000
fi
# 1 if fails
exitcode=1
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
check_test_failed_prefix() {
if dmesg | grep -q 'page_frag_test failed:';then
echo "page_frag_test failed, please check dmesg"
exit $exitcode
fi
}
#
# Static templates for testing of page_frag APIs.
# Also it is possible to pass any supported parameters manually.
#
SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1"
NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST"
ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1"
check_test_requirements()
{
uid=$(id -u)
if [ $uid -ne 0 ]; then
echo "$0: Must be run as root"
exit $ksft_skip
fi
if ! which insmod > /dev/null 2>&1; then
echo "$0: You need insmod installed"
exit $ksft_skip
fi
if [ ! -f $DRIVER ]; then
echo "$0: You need to compile page_frag_test module"
exit $ksft_skip
fi
}
run_nonaligned_check()
{
echo "Run performance tests to evaluate how fast nonaligned alloc API is."
insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1
}
run_aligned_check()
{
echo "Run performance tests to evaluate how fast aligned alloc API is."
insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1
}
run_smoke_check()
{
echo "Run smoke test."
insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1
}
usage()
{
echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | "
echo "manual parameters"
echo
echo "Valid tests and parameters:"
echo
modinfo $DRIVER
echo
echo "Example usage:"
echo
echo "# Shows help message"
echo "$0"
echo
echo "# Smoke testing"
echo "$0 smoke"
echo
echo "# Performance testing for nonaligned alloc API"
echo "$0 nonaligned"
echo
echo "# Performance testing for aligned alloc API"
echo "$0 aligned"
echo
exit 0
}
function validate_passed_args()
{
VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'`
#
# Something has been passed, check it.
#
for passed_arg in $@; do
key=${passed_arg//=*/}
valid=0
for valid_arg in $VALID_ARGS; do
if [[ $key = $valid_arg ]]; then
valid=1
break
fi
done
if [[ $valid -ne 1 ]]; then
echo "Error: key is not correct: ${key}"
exit $exitcode
fi
done
}
function run_manual_check()
{
#
# Validate passed parameters. If there is wrong one,
# the script exists and does not execute further.
#
validate_passed_args $@
echo "Run the test with following parameters: $@"
insmod $DRIVER $@ > /dev/null 2>&1
}
function run_test()
{
if [ $# -eq 0 ]; then
usage
else
if [[ "$1" = "smoke" ]]; then
run_smoke_check
elif [[ "$1" = "nonaligned" ]]; then
run_nonaligned_check
elif [[ "$1" = "aligned" ]]; then
run_aligned_check
else
run_manual_check $@
fi
fi
check_test_failed_prefix
echo "Done."
echo "Check the kernel ring buffer to see the summary."
}
check_test_requirements
run_test $@
exit 0