mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 00:35:01 +00:00
mm: move the page fragment allocator from page_alloc into its own file
Inspired by [1], move the page fragment allocator from page_alloc into its own c file and header file, as we are about to make more change for it to replace another page_frag implementation in sock.c As this patchset is going to replace 'struct page_frag' with 'struct page_frag_cache' in sched.h, including page_frag_cache.h in sched.h has a compiler error caused by interdependence between mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler error by moving 'struct page_frag_cache' to mm_types_task.h as suggested by Alexander, see [3]. 1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowells@redhat.com/ 2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956920@gmail.com/ 3. https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=YM_aiA4OomD-2wXykO42bizaWMt_HOA@mail.gmail.com/ CC: David Howells <dhowells@redhat.com> CC: Linux-MM <linux-mm@kvack.org> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> Link: https://patch.msgid.link/20241028115343.3405838-3-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
7fef0dec41
commit
65941f10ca
@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
|
||||
extern void __free_pages(struct page *page, unsigned int order);
|
||||
extern void free_pages(unsigned long addr, unsigned int order);
|
||||
|
||||
struct page_frag_cache;
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc);
|
||||
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
|
||||
gfp_t gfp_mask, unsigned int align_mask);
|
||||
|
||||
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align)
|
||||
{
|
||||
WARN_ON_ONCE(!is_power_of_2(align));
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
|
||||
}
|
||||
|
||||
static inline void *page_frag_alloc(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask)
|
||||
{
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
|
||||
}
|
||||
|
||||
extern void page_frag_free(void *addr);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
#define free_page(addr) free_pages((addr), 0)
|
||||
|
||||
|
@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
|
||||
*/
|
||||
#define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page)))
|
||||
|
||||
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
|
||||
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
|
||||
/*
|
||||
* page_private can be used on tail pages. However, PagePrivate is only
|
||||
* checked by the VM on the head page. So page_private on the tail pages
|
||||
@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
|
||||
return folio->private;
|
||||
}
|
||||
|
||||
struct page_frag_cache {
|
||||
void * va;
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
__u16 offset;
|
||||
__u16 size;
|
||||
#else
|
||||
__u32 offset;
|
||||
#endif
|
||||
/* we maintain a pagecount bias, so that we dont dirty cache line
|
||||
* containing page->_refcount every time we allocate a fragment.
|
||||
*/
|
||||
unsigned int pagecnt_bias;
|
||||
bool pfmemalloc;
|
||||
};
|
||||
|
||||
typedef unsigned long vm_flags_t;
|
||||
|
||||
/*
|
||||
|
@ -8,6 +8,7 @@
|
||||
* (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
|
||||
*/
|
||||
|
||||
#include <linux/align.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
@ -43,6 +44,23 @@ struct page_frag {
|
||||
#endif
|
||||
};
|
||||
|
||||
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
|
||||
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
struct page_frag_cache {
|
||||
void *va;
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
__u16 offset;
|
||||
__u16 size;
|
||||
#else
|
||||
__u32 offset;
|
||||
#endif
|
||||
/* we maintain a pagecount bias, so that we dont dirty cache line
|
||||
* containing page->_refcount every time we allocate a fragment.
|
||||
*/
|
||||
unsigned int pagecnt_bias;
|
||||
bool pfmemalloc;
|
||||
};
|
||||
|
||||
/* Track pages that require TLB flushes */
|
||||
struct tlbflush_unmap_batch {
|
||||
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
|
31
include/linux/page_frag_cache.h
Normal file
31
include/linux/page_frag_cache.h
Normal file
@ -0,0 +1,31 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _LINUX_PAGE_FRAG_CACHE_H
|
||||
#define _LINUX_PAGE_FRAG_CACHE_H
|
||||
|
||||
#include <linux/log2.h>
|
||||
#include <linux/mm_types_task.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc);
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count);
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
|
||||
gfp_t gfp_mask, unsigned int align_mask);
|
||||
|
||||
static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align)
|
||||
{
|
||||
WARN_ON_ONCE(!is_power_of_2(align));
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
|
||||
}
|
||||
|
||||
static inline void *page_frag_alloc(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask)
|
||||
{
|
||||
return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
|
||||
}
|
||||
|
||||
void page_frag_free(void *addr);
|
||||
|
||||
#endif
|
@ -31,6 +31,7 @@
|
||||
#include <linux/in6.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/page_frag_cache.h>
|
||||
#include <net/flow.h>
|
||||
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
||||
#include <linux/netfilter/nf_conntrack_common.h>
|
||||
|
@ -65,6 +65,7 @@ page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
|
||||
memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
||||
|
||||
obj-y += page-alloc.o
|
||||
obj-y += page_frag_cache.o
|
||||
obj-y += init-mm.o
|
||||
obj-y += memblock.o
|
||||
obj-y += $(memory-hotplug-y)
|
||||
|
136
mm/page_alloc.c
136
mm/page_alloc.c
@ -4836,142 +4836,6 @@ void free_pages(unsigned long addr, unsigned int order)
|
||||
|
||||
EXPORT_SYMBOL(free_pages);
|
||||
|
||||
/*
|
||||
* Page Fragment:
|
||||
* An arbitrary-length arbitrary-offset area of memory which resides
|
||||
* within a 0 or higher order page. Multiple fragments within that page
|
||||
* are individually refcounted, in the page's reference counter.
|
||||
*
|
||||
* The page_frag functions below provide a simple allocation framework for
|
||||
* page fragments. This is used by the network stack and network device
|
||||
* drivers to provide a backing region of memory for use as either an
|
||||
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
|
||||
*/
|
||||
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
gfp_t gfp = gfp_mask;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
|
||||
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
|
||||
PAGE_FRAG_CACHE_MAX_ORDER);
|
||||
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
|
||||
#endif
|
||||
if (unlikely(!page))
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
|
||||
|
||||
nc->va = page ? page_address(page) : NULL;
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc)
|
||||
{
|
||||
if (!nc->va)
|
||||
return;
|
||||
|
||||
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
|
||||
nc->va = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_cache_drain);
|
||||
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count)
|
||||
{
|
||||
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
||||
|
||||
if (page_ref_sub_and_test(page, count))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_cache_drain);
|
||||
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
unsigned int size = PAGE_SIZE;
|
||||
struct page *page;
|
||||
int offset;
|
||||
|
||||
if (unlikely(!nc->va)) {
|
||||
refill:
|
||||
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
/* Even if we own the page, we do not use atomic_set().
|
||||
* This would break get_page_unless_zero() users.
|
||||
*/
|
||||
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pfmemalloc = page_is_pfmemalloc(page);
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
nc->offset = size;
|
||||
}
|
||||
|
||||
offset = nc->offset - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
page = virt_to_page(nc->va);
|
||||
|
||||
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
|
||||
goto refill;
|
||||
|
||||
if (unlikely(nc->pfmemalloc)) {
|
||||
free_unref_page(page, compound_order(page));
|
||||
goto refill;
|
||||
}
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
/* OK, page count is 0, we can safely set it */
|
||||
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
offset = size - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
/*
|
||||
* The caller is trying to allocate a fragment
|
||||
* with fragsz > PAGE_SIZE but the cache isn't big
|
||||
* enough to satisfy the request, this may
|
||||
* happen in low memory conditions.
|
||||
* We don't release the cache page because
|
||||
* it could make memory pressure worse
|
||||
* so we simply return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
nc->pagecnt_bias--;
|
||||
offset &= align_mask;
|
||||
nc->offset = offset;
|
||||
|
||||
return nc->va + offset;
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_alloc_align);
|
||||
|
||||
/*
|
||||
* Frees a page fragment allocated out of either a compound or order 0 page.
|
||||
*/
|
||||
void page_frag_free(void *addr)
|
||||
{
|
||||
struct page *page = virt_to_head_page(addr);
|
||||
|
||||
if (unlikely(put_page_testzero(page)))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_free);
|
||||
|
||||
static void *make_alloc_exact(unsigned long addr, unsigned int order,
|
||||
size_t size)
|
||||
{
|
||||
|
145
mm/page_frag_cache.c
Normal file
145
mm/page_frag_cache.c
Normal file
@ -0,0 +1,145 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Page fragment allocator
|
||||
*
|
||||
* Page Fragment:
|
||||
* An arbitrary-length arbitrary-offset area of memory which resides within a
|
||||
* 0 or higher order page. Multiple fragments within that page are
|
||||
* individually refcounted, in the page's reference counter.
|
||||
*
|
||||
* The page_frag functions provide a simple allocation framework for page
|
||||
* fragments. This is used by the network stack and network device drivers to
|
||||
* provide a backing region of memory for use as either an sk_buff->head, or to
|
||||
* be used in the "frags" portion of skb_shared_info.
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp_types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/page_frag_cache.h>
|
||||
#include "internal.h"
|
||||
|
||||
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
gfp_t gfp = gfp_mask;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
|
||||
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
|
||||
PAGE_FRAG_CACHE_MAX_ORDER);
|
||||
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
|
||||
#endif
|
||||
if (unlikely(!page))
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
|
||||
|
||||
nc->va = page ? page_address(page) : NULL;
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc)
|
||||
{
|
||||
if (!nc->va)
|
||||
return;
|
||||
|
||||
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
|
||||
nc->va = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_cache_drain);
|
||||
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count)
|
||||
{
|
||||
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
||||
|
||||
if (page_ref_sub_and_test(page, count))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_cache_drain);
|
||||
|
||||
void *__page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
unsigned int size = PAGE_SIZE;
|
||||
struct page *page;
|
||||
int offset;
|
||||
|
||||
if (unlikely(!nc->va)) {
|
||||
refill:
|
||||
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
/* Even if we own the page, we do not use atomic_set().
|
||||
* This would break get_page_unless_zero() users.
|
||||
*/
|
||||
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pfmemalloc = page_is_pfmemalloc(page);
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
nc->offset = size;
|
||||
}
|
||||
|
||||
offset = nc->offset - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
page = virt_to_page(nc->va);
|
||||
|
||||
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
|
||||
goto refill;
|
||||
|
||||
if (unlikely(nc->pfmemalloc)) {
|
||||
free_unref_page(page, compound_order(page));
|
||||
goto refill;
|
||||
}
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
/* OK, page count is 0, we can safely set it */
|
||||
set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
offset = size - fragsz;
|
||||
if (unlikely(offset < 0)) {
|
||||
/*
|
||||
* The caller is trying to allocate a fragment
|
||||
* with fragsz > PAGE_SIZE but the cache isn't big
|
||||
* enough to satisfy the request, this may
|
||||
* happen in low memory conditions.
|
||||
* We don't release the cache page because
|
||||
* it could make memory pressure worse
|
||||
* so we simply return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
nc->pagecnt_bias--;
|
||||
offset &= align_mask;
|
||||
nc->offset = offset;
|
||||
|
||||
return nc->va + offset;
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_alloc_align);
|
||||
|
||||
/*
|
||||
* Frees a page fragment allocated out of either a compound or order 0 page.
|
||||
*/
|
||||
void page_frag_free(void *addr)
|
||||
{
|
||||
struct page *page = virt_to_head_page(addr);
|
||||
|
||||
if (unlikely(put_page_testzero(page)))
|
||||
free_unref_page(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_free);
|
@ -6,12 +6,12 @@
|
||||
* Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com>
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/ptr_ring.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/page_frag_cache.h>
|
||||
|
||||
#define TEST_FAILED_PREFIX "page_frag_test failed: "
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user