linux-next/mm/swap_slots.c
Kairui Song bb579b48dd mm, swap_slots: remove slot cache for freeing path
The slot cache for freeing path is mostly for reducing the overhead of
si->lock.  As we have basically eliminated the si->lock usage for freeing
path, it can be removed.

This helps simplify the code, and avoids swap entries from being hold in
cache upon freeing.  The delayed freeing of entries have been causing
trouble for further optimizations for zswap [1] and in theory will also
cause more fragmentation, and extra overhead.

Test with build linux kernel showed both performance and fragmentation is
better without the cache:

tiem make -j96 / 768M memcg, 4K pages, 10G ZRAM, avg of 4 test run::
Before:
Sys time: 36047.78, Real time: 472.43
After: (-7.6% sys time, -7.3% real time)
Sys time: 33314.76, Real time: 437.67

time make -j96 / 1152M memcg, 64K mTHP, 10G ZRAM, avg of 4 test run:
Before:
Sys time: 46859.04, Real time: 562.63
hugepages-64kB/stats/swpout: 1783392
hugepages-64kB/stats/swpout_fallback: 240875
After: (-23.3% sys time, -21.3% real time)
Sys time: 35958.87, Real time: 442.69
hugepages-64kB/stats/swpout: 1866267
hugepages-64kB/stats/swpout_fallback: 158330

Sequential SWAP should be also slightly faster, tests didn't show a
measurable difference though, at least no regression:

Swapin 4G zero page on ZRAM (time in us):
Before (avg. 1923756)
1912391 1927023 1927957 1916527 1918263 1914284 1934753 1940813 1921791
After (avg. 1922290):
1919101 1925743 1916810 1917007 1923930 1935152 1917403 1923549 1921913

Link: https://lore.kernel.org/all/CAMgjq7ACohT_uerSz8E_994ZZCv709Zor+43hdmesW_59W1BWw@mail.gmail.com/[1]
Link: https://lkml.kernel.org/r/20241230174621.61185-14-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Suggested-by: Chris Li <chrisl@kernel.org>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-01-11 23:19:22 -08:00

296 lines
7.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Manage cache of swap slots to be used for and returned from
* swap.
*
* Copyright(c) 2016 Intel Corporation.
*
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* We allocate the swap slots from the global pool and put
* it into local per cpu caches. This has the advantage
* of no needing to acquire the swap_info lock every time
* we need a new slot.
*
* There is also opportunity to simply return the slot
* to local caches without needing to acquire swap_info
* lock. We do not reuse the returned slots directly but
* move them back to the global pool in a batch. This
* allows the slots to coalesce and reduce fragmentation.
*
* The swap entry allocated is marked with SWAP_HAS_CACHE
* flag in map_count that prevents it from being allocated
* again from the global pool.
*
* The swap slots cache is protected by a mutex instead of
* a spin lock as when we search for slots with scan_swap_map,
* we can possibly sleep.
*/
#include <linux/swap_slots.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
static bool swap_slot_cache_active;
bool swap_slot_cache_enabled;
static bool swap_slot_cache_initialized;
static DEFINE_MUTEX(swap_slots_cache_mutex);
/* Serialize swap slots cache enable/disable operations */
static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
static void __drain_swap_slots_cache(void);
#define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled)
static void deactivate_swap_slots_cache(void)
{
mutex_lock(&swap_slots_cache_mutex);
swap_slot_cache_active = false;
__drain_swap_slots_cache();
mutex_unlock(&swap_slots_cache_mutex);
}
static void reactivate_swap_slots_cache(void)
{
mutex_lock(&swap_slots_cache_mutex);
swap_slot_cache_active = true;
mutex_unlock(&swap_slots_cache_mutex);
}
/* Must not be called with cpu hot plug lock */
void disable_swap_slots_cache_lock(void)
{
mutex_lock(&swap_slots_cache_enable_mutex);
swap_slot_cache_enabled = false;
if (swap_slot_cache_initialized) {
/* serialize with cpu hotplug operations */
cpus_read_lock();
__drain_swap_slots_cache();
cpus_read_unlock();
}
}
static void __reenable_swap_slots_cache(void)
{
swap_slot_cache_enabled = has_usable_swap();
}
void reenable_swap_slots_cache_unlock(void)
{
__reenable_swap_slots_cache();
mutex_unlock(&swap_slots_cache_enable_mutex);
}
static bool check_cache_active(void)
{
long pages;
if (!swap_slot_cache_enabled)
return false;
pages = get_nr_swap_pages();
if (!swap_slot_cache_active) {
if (pages > num_online_cpus() *
THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
reactivate_swap_slots_cache();
goto out;
}
/* if global pool of slot caches too low, deactivate cache */
if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
deactivate_swap_slots_cache();
out:
return swap_slot_cache_active;
}
static int alloc_swap_slot_cache(unsigned int cpu)
{
struct swap_slots_cache *cache;
swp_entry_t *slots;
/*
* Do allocation outside swap_slots_cache_mutex
* as kvzalloc could trigger reclaim and folio_alloc_swap,
* which can lock swap_slots_cache_mutex.
*/
slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
GFP_KERNEL);
if (!slots)
return -ENOMEM;
mutex_lock(&swap_slots_cache_mutex);
cache = &per_cpu(swp_slots, cpu);
if (cache->slots) {
/* cache already allocated */
mutex_unlock(&swap_slots_cache_mutex);
kvfree(slots);
return 0;
}
if (!cache->lock_initialized) {
mutex_init(&cache->alloc_lock);
cache->lock_initialized = true;
}
cache->nr = 0;
cache->cur = 0;
cache->n_ret = 0;
/*
* We initialized alloc_lock and free_lock earlier. We use
* !cache->slots or !cache->slots_ret to know if it is safe to acquire
* the corresponding lock and use the cache. Memory barrier below
* ensures the assumption.
*/
mb();
cache->slots = slots;
mutex_unlock(&swap_slots_cache_mutex);
return 0;
}
static void drain_slots_cache_cpu(unsigned int cpu, bool free_slots)
{
struct swap_slots_cache *cache;
cache = &per_cpu(swp_slots, cpu);
if (cache->slots) {
mutex_lock(&cache->alloc_lock);
swapcache_free_entries(cache->slots + cache->cur, cache->nr);
cache->cur = 0;
cache->nr = 0;
if (free_slots && cache->slots) {
kvfree(cache->slots);
cache->slots = NULL;
}
mutex_unlock(&cache->alloc_lock);
}
}
static void __drain_swap_slots_cache(void)
{
unsigned int cpu;
/*
* This function is called during
* 1) swapoff, when we have to make sure no
* left over slots are in cache when we remove
* a swap device;
* 2) disabling of swap slot cache, when we run low
* on swap slots when allocating memory and need
* to return swap slots to global pool.
*
* We cannot acquire cpu hot plug lock here as
* this function can be invoked in the cpu
* hot plug path:
* cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
* -> memory allocation -> direct reclaim -> folio_alloc_swap
* -> drain_swap_slots_cache
*
* Hence the loop over current online cpu below could miss cpu that
* is being brought online but not yet marked as online.
* That is okay as we do not schedule and run anything on a
* cpu before it has been marked online. Hence, we will not
* fill any swap slots in slots cache of such cpu.
* There are no slots on such cpu that need to be drained.
*/
for_each_online_cpu(cpu)
drain_slots_cache_cpu(cpu, false);
}
static int free_slot_cache(unsigned int cpu)
{
mutex_lock(&swap_slots_cache_mutex);
drain_slots_cache_cpu(cpu, true);
mutex_unlock(&swap_slots_cache_mutex);
return 0;
}
void enable_swap_slots_cache(void)
{
mutex_lock(&swap_slots_cache_enable_mutex);
if (!swap_slot_cache_initialized) {
int ret;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
alloc_swap_slot_cache, free_slot_cache);
if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating "
"without swap slots cache.\n", __func__))
goto out_unlock;
swap_slot_cache_initialized = true;
}
__reenable_swap_slots_cache();
out_unlock:
mutex_unlock(&swap_slots_cache_enable_mutex);
}
/* called with swap slot cache's alloc lock held */
static int refill_swap_slots_cache(struct swap_slots_cache *cache)
{
if (!use_swap_slot_cache)
return 0;
cache->cur = 0;
if (swap_slot_cache_active)
cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
cache->slots, 0);
return cache->nr;
}
swp_entry_t folio_alloc_swap(struct folio *folio)
{
swp_entry_t entry;
struct swap_slots_cache *cache;
entry.val = 0;
if (folio_test_large(folio)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
get_swap_pages(1, &entry, folio_order(folio));
goto out;
}
/*
* Preemption is allowed here, because we may sleep
* in refill_swap_slots_cache(). But it is safe, because
* accesses to the per-CPU data structure are protected by the
* mutex cache->alloc_lock.
*
* The alloc path here does not touch cache->slots_ret
* so cache->free_lock is not taken.
*/
cache = raw_cpu_ptr(&swp_slots);
if (likely(check_cache_active() && cache->slots)) {
mutex_lock(&cache->alloc_lock);
if (cache->slots) {
repeat:
if (cache->nr) {
entry = cache->slots[cache->cur];
cache->slots[cache->cur++].val = 0;
cache->nr--;
} else if (refill_swap_slots_cache(cache)) {
goto repeat;
}
}
mutex_unlock(&cache->alloc_lock);
if (entry.val)
goto out;
}
get_swap_pages(1, &entry, 0);
out:
if (mem_cgroup_try_charge_swap(folio, entry)) {
put_swap_folio(folio, entry);
entry.val = 0;
}
return entry;
}