mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-01 10:43:43 +00:00
25 hotfixes, mainly for MM. 13 are cc:stable.
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCY9x+swAKCRDdBJ7gKXxA joPwAP95XqB7gzy2l1Mc++Ta7Ih0fS34Pj1vTAxwsRQnqzr6rwD/QOt3YU9KgXpy D7Fp8NnaQZq6m5o8cvV5+fBqA3uarAM= =IIB8 -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2023-02-02-19-24-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc fixes from Andrew Morton: "25 hotfixes, mainly for MM. 13 are cc:stable" * tag 'mm-hotfixes-stable-2023-02-02-19-24-2' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (26 commits) mm: memcg: fix NULL pointer in mem_cgroup_track_foreign_dirty_slowpath() Kconfig.debug: fix the help description in SCHED_DEBUG mm/swapfile: add cond_resched() in get_swap_pages() mm: use stack_depot_early_init for kmemleak Squashfs: fix handling and sanity checking of xattr_ids count sh: define RUNTIME_DISCARD_EXIT highmem: round down the address passed to kunmap_flush_on_unmap() migrate: hugetlb: check for hugetlb shared PMD in node migration mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups Revert "mm: kmemleak: alloc gray object for reserved region with direct map" freevxfs: Kconfig: fix spelling maple_tree: should get pivots boundary by type .mailmap: update e-mail address for Eugen Hristev mm, mremap: fix mremap() expanding for vma's with vm_ops->close() squashfs: harden sanity check in squashfs_read_xattr_id_table ia64: fix build error due to switch case label appearing next to declaration mm: multi-gen LRU: fix crash during cgroup migration Revert "mm: add nodes= arg to memory.reclaim" zsmalloc: fix a race with deferred_handles storing ...
This commit is contained in:
commit
0c272a1d33
1
.mailmap
1
.mailmap
@ -130,6 +130,7 @@ Domen Puncer <domen@coderock.org>
|
||||
Douglas Gilbert <dougg@torque.net>
|
||||
Ed L. Cashin <ecashin@coraid.com>
|
||||
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
|
||||
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
|
||||
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
|
||||
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
|
||||
Felipe W Damasio <felipewd@terra.com.br>
|
||||
|
@ -1245,13 +1245,17 @@ PAGE_SIZE multiple when read back.
|
||||
This is a simple interface to trigger memory reclaim in the
|
||||
target cgroup.
|
||||
|
||||
This file accepts a string which contains the number of bytes to
|
||||
reclaim.
|
||||
This file accepts a single key, the number of bytes to reclaim.
|
||||
No nested keys are currently supported.
|
||||
|
||||
Example::
|
||||
|
||||
echo "1G" > memory.reclaim
|
||||
|
||||
The interface can be later extended with nested keys to
|
||||
configure the reclaim behavior. For example, specify the
|
||||
type of memory to reclaim from (anon, file, ..).
|
||||
|
||||
Please note that the kernel can over or under reclaim from
|
||||
the target cgroup. If less bytes are reclaimed than the
|
||||
specified amount, -EAGAIN is returned.
|
||||
@ -1263,13 +1267,6 @@ PAGE_SIZE multiple when read back.
|
||||
This means that the networking layer will not adapt based on
|
||||
reclaim induced by memory.reclaim.
|
||||
|
||||
This file also allows the user to specify the nodes to reclaim from,
|
||||
via the 'nodes=' key, for example::
|
||||
|
||||
echo "1G nodes=0,1" > memory.reclaim
|
||||
|
||||
The above instructs the kernel to reclaim memory from nodes 0,1.
|
||||
|
||||
memory.peak
|
||||
A read-only single value file which exists on non-root
|
||||
cgroups.
|
||||
|
@ -170,6 +170,9 @@ ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, u
|
||||
asmlinkage long
|
||||
ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp)
|
||||
{
|
||||
struct timespec64 rtn_tp;
|
||||
s64 tick_ns;
|
||||
|
||||
/*
|
||||
* ia64's clock_gettime() syscall is implemented as a vdso call
|
||||
* fsys_clock_gettime(). Currently it handles only
|
||||
@ -185,8 +188,8 @@ ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *
|
||||
switch (which_clock) {
|
||||
case CLOCK_REALTIME:
|
||||
case CLOCK_MONOTONIC:
|
||||
s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
|
||||
struct timespec64 rtn_tp = ns_to_timespec64(tick_ns);
|
||||
tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
|
||||
rtn_tp = ns_to_timespec64(tick_ns);
|
||||
return put_timespec64(&rtn_tp, tp);
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
* Written by Niibe Yutaka and Paul Mundt
|
||||
*/
|
||||
OUTPUT_ARCH(sh)
|
||||
#define RUNTIME_DISCARD_EXIT
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/vmlinux.lds.h>
|
||||
|
@ -26,7 +26,6 @@
|
||||
#include <linux/serial_core.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/kmemleak.h>
|
||||
|
||||
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
|
||||
#include <asm/page.h>
|
||||
@ -525,12 +524,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
|
||||
size = dt_mem_next_cell(dt_root_size_cells, &prop);
|
||||
|
||||
if (size &&
|
||||
early_init_dt_reserve_memory(base, size, nomap) == 0) {
|
||||
early_init_dt_reserve_memory(base, size, nomap) == 0)
|
||||
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
|
||||
uname, &base, (unsigned long)(size / SZ_1M));
|
||||
if (!nomap)
|
||||
kmemleak_alloc_phys(base, size, 0);
|
||||
}
|
||||
else
|
||||
pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
|
||||
uname, &base, (unsigned long)(size / SZ_1M));
|
||||
|
@ -8,7 +8,7 @@ config VXFS_FS
|
||||
of SCO UnixWare (and possibly others) and optionally available
|
||||
for Sunsoft Solaris, HP-UX and many other operating systems. However
|
||||
these particular OS implementations of vxfs may differ in on-disk
|
||||
data endianess and/or superblock offset. The vxfs module has been
|
||||
data endianness and/or superblock offset. The vxfs module has been
|
||||
tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.)
|
||||
Currently only readonly access is supported and VxFX versions
|
||||
2, 3 and 4. Tests were performed with HP-UX VxFS version 3.
|
||||
|
@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
|
||||
page = pfn_swap_entry_to_page(swpent);
|
||||
}
|
||||
if (page) {
|
||||
int mapcount = page_mapcount(page);
|
||||
|
||||
if (mapcount >= 2)
|
||||
if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
|
||||
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
|
||||
else
|
||||
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
|
||||
|
@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
|
||||
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
|
||||
sizeof(u64))
|
||||
/* xattr id lookup table defines */
|
||||
#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
|
||||
#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
|
||||
|
||||
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
|
||||
SQUASHFS_METADATA_SIZE)
|
||||
|
@ -63,7 +63,7 @@ struct squashfs_sb_info {
|
||||
long long bytes_used;
|
||||
unsigned int inodes;
|
||||
unsigned int fragments;
|
||||
int xattr_ids;
|
||||
unsigned int xattr_ids;
|
||||
unsigned int ids;
|
||||
bool panic_on_errors;
|
||||
const struct squashfs_decompressor_thread_ops *thread_ops;
|
||||
|
@ -10,12 +10,12 @@
|
||||
|
||||
#ifdef CONFIG_SQUASHFS_XATTR
|
||||
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
|
||||
u64 *, int *);
|
||||
u64 *, unsigned int *);
|
||||
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
|
||||
unsigned int *, unsigned long long *);
|
||||
#else
|
||||
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
|
||||
u64 start, u64 *xattr_table_start, int *xattr_ids)
|
||||
u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
|
||||
{
|
||||
struct squashfs_xattr_id_table *id_table;
|
||||
|
||||
|
@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
|
||||
* Read uncompressed xattr id lookup table indexes from disk into memory
|
||||
*/
|
||||
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
|
||||
u64 *xattr_table_start, int *xattr_ids)
|
||||
u64 *xattr_table_start, unsigned int *xattr_ids)
|
||||
{
|
||||
struct squashfs_sb_info *msblk = sb->s_fs_info;
|
||||
unsigned int len, indexes;
|
||||
@ -76,7 +76,7 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
|
||||
/* Sanity check values */
|
||||
|
||||
/* there is always at least one xattr id */
|
||||
if (*xattr_ids == 0)
|
||||
if (*xattr_ids <= 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
|
||||
|
@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
|
||||
static inline void __kunmap_local(const void *addr)
|
||||
{
|
||||
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
|
||||
kunmap_flush_on_unmap(addr);
|
||||
kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
|
||||
static inline void __kunmap_atomic(const void *addr)
|
||||
{
|
||||
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
|
||||
kunmap_flush_on_unmap(addr);
|
||||
kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
|
||||
#endif
|
||||
pagefault_enable();
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/hugetlb_inline.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/page_ref.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/pgtable.h>
|
||||
@ -1187,6 +1188,18 @@ static inline __init void hugetlb_cma_reserve(int order)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
|
||||
static inline bool hugetlb_pmd_shared(pte_t *pte)
|
||||
{
|
||||
return page_count(virt_to_page(pte)) > 1;
|
||||
}
|
||||
#else
|
||||
static inline bool hugetlb_pmd_shared(pte_t *pte)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
|
||||
|
||||
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
|
||||
|
@ -1666,10 +1666,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
|
||||
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
|
||||
struct bdi_writeback *wb)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
|
||||
memcg = folio_memcg(folio);
|
||||
if (unlikely(memcg && &memcg->css != wb->memcg_css))
|
||||
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
|
||||
}
|
||||
|
||||
|
@ -418,8 +418,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
|
||||
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||
unsigned long nr_pages,
|
||||
gfp_t gfp_mask,
|
||||
unsigned int reclaim_options,
|
||||
nodemask_t *nodemask);
|
||||
unsigned int reclaim_options);
|
||||
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
|
||||
gfp_t gfp_mask, bool noswap,
|
||||
pg_data_t *pgdat,
|
||||
|
@ -754,6 +754,7 @@ config DEBUG_KMEMLEAK
|
||||
select KALLSYMS
|
||||
select CRC32
|
||||
select STACKDEPOT
|
||||
select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF
|
||||
help
|
||||
Say Y here if you want to enable the memory leak
|
||||
detector. The memory allocation/freeing is traced in a way
|
||||
@ -1207,7 +1208,7 @@ config SCHED_DEBUG
|
||||
depends on DEBUG_KERNEL && PROC_FS
|
||||
default y
|
||||
help
|
||||
If you say Y here, the /proc/sched_debug file will be provided
|
||||
If you say Y here, the /sys/kernel/debug/sched file will be provided
|
||||
that can help debug the scheduler. The runtime overhead of this
|
||||
option is minimal.
|
||||
|
||||
|
@ -670,12 +670,13 @@ static inline unsigned long mte_pivot(const struct maple_enode *mn,
|
||||
unsigned char piv)
|
||||
{
|
||||
struct maple_node *node = mte_to_node(mn);
|
||||
enum maple_type type = mte_node_type(mn);
|
||||
|
||||
if (piv >= mt_pivots[piv]) {
|
||||
if (piv >= mt_pivots[type]) {
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
switch (mte_node_type(mn)) {
|
||||
switch (type) {
|
||||
case maple_arange_64:
|
||||
return node->ma64.pivot[piv];
|
||||
case maple_range_64:
|
||||
@ -4887,7 +4888,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
|
||||
unsigned long *pivots, *gaps;
|
||||
void __rcu **slots;
|
||||
unsigned long gap = 0;
|
||||
unsigned long max, min, index;
|
||||
unsigned long max, min;
|
||||
unsigned char offset;
|
||||
|
||||
if (unlikely(mas_is_err(mas)))
|
||||
@ -4909,8 +4910,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
|
||||
min = mas_safe_min(mas, pivots, --offset);
|
||||
|
||||
max = mas_safe_pivot(mas, pivots, offset, type);
|
||||
index = mas->index;
|
||||
while (index <= max) {
|
||||
while (mas->index <= max) {
|
||||
gap = 0;
|
||||
if (gaps)
|
||||
gap = gaps[offset];
|
||||
@ -4941,10 +4941,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
|
||||
min = mas_safe_min(mas, pivots, offset);
|
||||
}
|
||||
|
||||
if (unlikely(index > max)) {
|
||||
mas_set_err(mas, -EBUSY);
|
||||
return false;
|
||||
}
|
||||
if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
|
||||
goto no_space;
|
||||
|
||||
if (unlikely(ma_is_leaf(type))) {
|
||||
mas->offset = offset;
|
||||
@ -4961,9 +4959,11 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
|
||||
return false;
|
||||
|
||||
ascend:
|
||||
if (mte_is_root(mas->node))
|
||||
mas_set_err(mas, -EBUSY);
|
||||
if (!mte_is_root(mas->node))
|
||||
return false;
|
||||
|
||||
no_space:
|
||||
mas_set_err(mas, -EBUSY);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2517,6 +2517,91 @@ static noinline void check_bnode_min_spanning(struct maple_tree *mt)
|
||||
mt_set_non_kernel(0);
|
||||
}
|
||||
|
||||
static noinline void check_empty_area_window(struct maple_tree *mt)
|
||||
{
|
||||
unsigned long i, nr_entries = 20;
|
||||
MA_STATE(mas, mt, 0, 0);
|
||||
|
||||
for (i = 1; i <= nr_entries; i++)
|
||||
mtree_store_range(mt, i*10, i*10 + 9,
|
||||
xa_mk_value(i), GFP_KERNEL);
|
||||
|
||||
/* Create another hole besides the one at 0 */
|
||||
mtree_store_range(mt, 160, 169, NULL, GFP_KERNEL);
|
||||
|
||||
/* Check lower bounds that don't fit */
|
||||
rcu_read_lock();
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 10) != -EBUSY);
|
||||
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 6, 90, 5) != -EBUSY);
|
||||
|
||||
/* Check lower bound that does fit */
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 5) != 0);
|
||||
MT_BUG_ON(mt, mas.index != 5);
|
||||
MT_BUG_ON(mt, mas.last != 9);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* Check one gap that doesn't fit and one that does */
|
||||
rcu_read_lock();
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 217, 9) != 0);
|
||||
MT_BUG_ON(mt, mas.index != 161);
|
||||
MT_BUG_ON(mt, mas.last != 169);
|
||||
|
||||
/* Check one gap that does fit above the min */
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 3) != 0);
|
||||
MT_BUG_ON(mt, mas.index != 216);
|
||||
MT_BUG_ON(mt, mas.last != 218);
|
||||
|
||||
/* Check size that doesn't fit any gap */
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 16) != -EBUSY);
|
||||
|
||||
/*
|
||||
* Check size that doesn't fit the lower end of the window but
|
||||
* does fit the gap
|
||||
*/
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 167, 200, 4) != -EBUSY);
|
||||
|
||||
/*
|
||||
* Check size that doesn't fit the upper end of the window but
|
||||
* does fit the gap
|
||||
*/
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 162, 4) != -EBUSY);
|
||||
|
||||
/* Check mas_empty_area forward */
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 9) != 0);
|
||||
MT_BUG_ON(mt, mas.index != 0);
|
||||
MT_BUG_ON(mt, mas.last != 8);
|
||||
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 4) != 0);
|
||||
MT_BUG_ON(mt, mas.index != 0);
|
||||
MT_BUG_ON(mt, mas.last != 3);
|
||||
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 11) != -EBUSY);
|
||||
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area(&mas, 5, 100, 6) != -EBUSY);
|
||||
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 8, 10) != -EBUSY);
|
||||
|
||||
mas_reset(&mas);
|
||||
mas_empty_area(&mas, 100, 165, 3);
|
||||
|
||||
mas_reset(&mas);
|
||||
MT_BUG_ON(mt, mas_empty_area(&mas, 100, 163, 6) != -EBUSY);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static DEFINE_MTREE(tree);
|
||||
static int maple_tree_seed(void)
|
||||
{
|
||||
@ -2765,6 +2850,10 @@ static int maple_tree_seed(void)
|
||||
check_bnode_min_spanning(&tree);
|
||||
mtree_destroy(&tree);
|
||||
|
||||
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
|
||||
check_empty_area_window(&tree);
|
||||
mtree_destroy(&tree);
|
||||
|
||||
#if defined(BENCH)
|
||||
skip:
|
||||
#endif
|
||||
|
@ -5051,6 +5051,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
entry = huge_pte_clear_uffd_wp(entry);
|
||||
set_huge_pte_at(dst, addr, dst_pte, entry);
|
||||
} else if (unlikely(is_pte_marker(entry))) {
|
||||
/* No swap on hugetlb */
|
||||
WARN_ON_ONCE(
|
||||
is_swapin_error_entry(pte_to_swp_entry(entry)));
|
||||
/*
|
||||
* We copy the pte marker only if the dst vma has
|
||||
* uffd-wp enabled.
|
||||
|
@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
||||
return SCAN_SUCCEED;
|
||||
}
|
||||
|
||||
/*
|
||||
* See pmd_trans_unstable() for how the result may change out from
|
||||
* underneath us, even if we hold mmap_lock in read.
|
||||
*/
|
||||
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
|
||||
unsigned long address,
|
||||
pmd_t **pmd)
|
||||
@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
|
||||
#endif
|
||||
if (pmd_none(pmde))
|
||||
return SCAN_PMD_NONE;
|
||||
if (!pmd_present(pmde))
|
||||
return SCAN_PMD_NULL;
|
||||
if (pmd_trans_huge(pmde))
|
||||
return SCAN_PMD_MAPPED;
|
||||
if (pmd_devmap(pmde))
|
||||
return SCAN_PMD_NULL;
|
||||
if (pmd_bad(pmde))
|
||||
return SCAN_PMD_NULL;
|
||||
return SCAN_SUCCEED;
|
||||
@ -1642,7 +1650,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
|
||||
* has higher cost too. It would also probably require locking
|
||||
* the anon_vma.
|
||||
*/
|
||||
if (vma->anon_vma) {
|
||||
if (READ_ONCE(vma->anon_vma)) {
|
||||
result = SCAN_PAGE_ANON;
|
||||
goto next;
|
||||
}
|
||||
@ -1670,6 +1678,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
|
||||
result = SCAN_PTE_MAPPED_HUGEPAGE;
|
||||
if ((cc->is_khugepaged || is_target) &&
|
||||
mmap_write_trylock(mm)) {
|
||||
/*
|
||||
* Re-check whether we have an ->anon_vma, because
|
||||
* collapse_and_free_pmd() requires that either no
|
||||
* ->anon_vma exists or the anon_vma is locked.
|
||||
* We already checked ->anon_vma above, but that check
|
||||
* is racy because ->anon_vma can be populated under the
|
||||
* mmap lock in read mode.
|
||||
*/
|
||||
if (vma->anon_vma) {
|
||||
result = SCAN_PAGE_ANON;
|
||||
goto unlock_next;
|
||||
}
|
||||
/*
|
||||
* When a vma is registered with uffd-wp, we can't
|
||||
* recycle the pmd pgtable because there can be pte
|
||||
|
@ -2070,8 +2070,10 @@ static int __init kmemleak_boot_config(char *str)
|
||||
return -EINVAL;
|
||||
if (strcmp(str, "off") == 0)
|
||||
kmemleak_disable();
|
||||
else if (strcmp(str, "on") == 0)
|
||||
else if (strcmp(str, "on") == 0) {
|
||||
kmemleak_skip_disable = 1;
|
||||
stack_depot_want_early_init();
|
||||
}
|
||||
else
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
@ -2093,7 +2095,6 @@ void __init kmemleak_init(void)
|
||||
if (kmemleak_error)
|
||||
return;
|
||||
|
||||
stack_depot_init();
|
||||
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
|
||||
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
|
||||
|
||||
|
@ -63,7 +63,6 @@
|
||||
#include <linux/resume_user_mode.h>
|
||||
#include <linux/psi.h>
|
||||
#include <linux/seq_buf.h>
|
||||
#include <linux/parser.h>
|
||||
#include "internal.h"
|
||||
#include <net/sock.h>
|
||||
#include <net/ip.h>
|
||||
@ -2393,8 +2392,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
|
||||
psi_memstall_enter(&pflags);
|
||||
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
|
||||
gfp_mask,
|
||||
MEMCG_RECLAIM_MAY_SWAP,
|
||||
NULL);
|
||||
MEMCG_RECLAIM_MAY_SWAP);
|
||||
psi_memstall_leave(&pflags);
|
||||
} while ((memcg = parent_mem_cgroup(memcg)) &&
|
||||
!mem_cgroup_is_root(memcg));
|
||||
@ -2685,8 +2683,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
|
||||
psi_memstall_enter(&pflags);
|
||||
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
|
||||
gfp_mask, reclaim_options,
|
||||
NULL);
|
||||
gfp_mask, reclaim_options);
|
||||
psi_memstall_leave(&pflags);
|
||||
|
||||
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
|
||||
@ -3506,8 +3503,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
|
||||
}
|
||||
|
||||
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
|
||||
memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP,
|
||||
NULL)) {
|
||||
memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
@ -3618,8 +3614,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
|
||||
return -EINTR;
|
||||
|
||||
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
|
||||
MEMCG_RECLAIM_MAY_SWAP,
|
||||
NULL))
|
||||
MEMCG_RECLAIM_MAY_SWAP))
|
||||
nr_retries--;
|
||||
}
|
||||
|
||||
@ -6429,8 +6424,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
|
||||
}
|
||||
|
||||
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
|
||||
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
|
||||
NULL);
|
||||
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
|
||||
|
||||
if (!reclaimed && !nr_retries--)
|
||||
break;
|
||||
@ -6479,8 +6473,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
|
||||
|
||||
if (nr_reclaims) {
|
||||
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
|
||||
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
|
||||
NULL))
|
||||
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
|
||||
nr_reclaims--;
|
||||
continue;
|
||||
}
|
||||
@ -6603,54 +6596,21 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
enum {
|
||||
MEMORY_RECLAIM_NODES = 0,
|
||||
MEMORY_RECLAIM_NULL,
|
||||
};
|
||||
|
||||
static const match_table_t if_tokens = {
|
||||
{ MEMORY_RECLAIM_NODES, "nodes=%s" },
|
||||
{ MEMORY_RECLAIM_NULL, NULL },
|
||||
};
|
||||
|
||||
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
||||
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
|
||||
unsigned long nr_to_reclaim, nr_reclaimed = 0;
|
||||
unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
|
||||
MEMCG_RECLAIM_PROACTIVE;
|
||||
char *old_buf, *start;
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
int token;
|
||||
char value[256];
|
||||
nodemask_t nodemask = NODE_MASK_ALL;
|
||||
unsigned int reclaim_options;
|
||||
int err;
|
||||
|
||||
buf = strstrip(buf);
|
||||
err = page_counter_memparse(buf, "", &nr_to_reclaim);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
old_buf = buf;
|
||||
nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
|
||||
if (buf == old_buf)
|
||||
return -EINVAL;
|
||||
|
||||
buf = strstrip(buf);
|
||||
|
||||
while ((start = strsep(&buf, " ")) != NULL) {
|
||||
if (!strlen(start))
|
||||
continue;
|
||||
token = match_token(start, if_tokens, args);
|
||||
match_strlcpy(value, args, sizeof(value));
|
||||
switch (token) {
|
||||
case MEMORY_RECLAIM_NODES:
|
||||
if (nodelist_parse(value, nodemask) < 0)
|
||||
return -EINVAL;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
|
||||
while (nr_reclaimed < nr_to_reclaim) {
|
||||
unsigned long reclaimed;
|
||||
|
||||
@ -6667,8 +6627,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
|
||||
|
||||
reclaimed = try_to_free_mem_cgroup_pages(memcg,
|
||||
nr_to_reclaim - nr_reclaimed,
|
||||
GFP_KERNEL, reclaim_options,
|
||||
&nodemask);
|
||||
GFP_KERNEL, reclaim_options);
|
||||
|
||||
if (!reclaimed && !nr_retries--)
|
||||
return -EAGAIN;
|
||||
|
14
mm/memory.c
14
mm/memory.c
@ -828,12 +828,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
return -EBUSY;
|
||||
return -ENOENT;
|
||||
} else if (is_pte_marker_entry(entry)) {
|
||||
/*
|
||||
* We're copying the pgtable should only because dst_vma has
|
||||
* uffd-wp enabled, do sanity check.
|
||||
*/
|
||||
WARN_ON_ONCE(!userfaultfd_wp(dst_vma));
|
||||
set_pte_at(dst_mm, addr, dst_pte, pte);
|
||||
if (is_swapin_error_entry(entry) || userfaultfd_wp(dst_vma))
|
||||
set_pte_at(dst_mm, addr, dst_pte, pte);
|
||||
return 0;
|
||||
}
|
||||
if (!userfaultfd_wp(dst_vma))
|
||||
@ -3629,8 +3625,12 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
|
||||
/*
|
||||
* Be careful so that we will only recover a special uffd-wp pte into a
|
||||
* none pte. Otherwise it means the pte could have changed, so retry.
|
||||
*
|
||||
* This should also cover the case where e.g. the pte changed
|
||||
* quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_SWAPIN_ERROR.
|
||||
* So is_pte_marker() check is not enough to safely drop the pte.
|
||||
*/
|
||||
if (is_pte_marker(*vmf->pte))
|
||||
if (pte_same(vmf->orig_pte, *vmf->pte))
|
||||
pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return 0;
|
||||
|
@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
|
||||
|
||||
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
|
||||
if (flags & (MPOL_MF_MOVE_ALL) ||
|
||||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
|
||||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
|
||||
!hugetlb_pmd_shared(pte))) {
|
||||
if (isolate_hugetlb(page, qp->pagelist) &&
|
||||
(flags & MPOL_MF_STRICT))
|
||||
/*
|
||||
|
@ -245,7 +245,13 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
|
||||
newpte = pte_swp_mksoft_dirty(newpte);
|
||||
if (pte_swp_uffd_wp(oldpte))
|
||||
newpte = pte_swp_mkuffd_wp(newpte);
|
||||
} else if (pte_marker_entry_uffd_wp(entry)) {
|
||||
} else if (is_pte_marker_entry(entry)) {
|
||||
/*
|
||||
* Ignore swapin errors unconditionally,
|
||||
* because any access should sigbus anyway.
|
||||
*/
|
||||
if (is_swapin_error_entry(entry))
|
||||
continue;
|
||||
/*
|
||||
* If this is uffd-wp pte marker and we'd like
|
||||
* to unprotect it, drop it; the next page
|
||||
|
25
mm/mremap.c
25
mm/mremap.c
@ -1027,16 +1027,29 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
|
||||
}
|
||||
|
||||
/*
|
||||
* Function vma_merge() is called on the extension we are adding to
|
||||
* the already existing vma, vma_merge() will merge this extension with
|
||||
* the already existing vma (expand operation itself) and possibly also
|
||||
* with the next vma if it becomes adjacent to the expanded vma and
|
||||
* otherwise compatible.
|
||||
* Function vma_merge() is called on the extension we
|
||||
* are adding to the already existing vma, vma_merge()
|
||||
* will merge this extension with the already existing
|
||||
* vma (expand operation itself) and possibly also with
|
||||
* the next vma if it becomes adjacent to the expanded
|
||||
* vma and otherwise compatible.
|
||||
*
|
||||
* However, vma_merge() can currently fail due to
|
||||
* is_mergeable_vma() check for vm_ops->close (see the
|
||||
* comment there). Yet this should not prevent vma
|
||||
* expanding, so perform a simple expand for such vma.
|
||||
* Ideally the check for close op should be only done
|
||||
* when a vma would be actually removed due to a merge.
|
||||
*/
|
||||
vma = vma_merge(mm, vma, extension_start, extension_end,
|
||||
if (!vma->vm_ops || !vma->vm_ops->close) {
|
||||
vma = vma_merge(mm, vma, extension_start, extension_end,
|
||||
vma->vm_flags, vma->anon_vma, vma->vm_file,
|
||||
extension_pgoff, vma_policy(vma),
|
||||
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
|
||||
} else if (vma_adjust(vma, vma->vm_start, addr + new_len,
|
||||
vma->vm_pgoff, NULL)) {
|
||||
vma = NULL;
|
||||
}
|
||||
if (!vma) {
|
||||
vm_unacct_memory(pages);
|
||||
ret = -ENOMEM;
|
||||
|
@ -1100,6 +1100,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
|
||||
goto check_out;
|
||||
pr_debug("scan_swap_map of si %d failed to find offset\n",
|
||||
si->type);
|
||||
cond_resched();
|
||||
|
||||
spin_lock(&swap_avail_lock);
|
||||
nextsi:
|
||||
|
@ -3323,13 +3323,16 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
/* migration can happen before addition */
|
||||
if (!mm->lru_gen.memcg)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
memcg = mem_cgroup_from_task(task);
|
||||
rcu_read_unlock();
|
||||
if (memcg == mm->lru_gen.memcg)
|
||||
return;
|
||||
|
||||
VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
|
||||
VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
|
||||
|
||||
lru_gen_del_mm(mm);
|
||||
@ -6754,8 +6757,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
|
||||
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||
unsigned long nr_pages,
|
||||
gfp_t gfp_mask,
|
||||
unsigned int reclaim_options,
|
||||
nodemask_t *nodemask)
|
||||
unsigned int reclaim_options)
|
||||
{
|
||||
unsigned long nr_reclaimed;
|
||||
unsigned int noreclaim_flag;
|
||||
@ -6770,7 +6772,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
|
||||
.may_unmap = 1,
|
||||
.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
|
||||
.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
|
||||
.nodemask = nodemask,
|
||||
};
|
||||
/*
|
||||
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
|
||||
|
237
mm/zsmalloc.c
237
mm/zsmalloc.c
@ -113,7 +113,23 @@
|
||||
* have room for two bit at least.
|
||||
*/
|
||||
#define OBJ_ALLOCATED_TAG 1
|
||||
#define OBJ_TAG_BITS 1
|
||||
|
||||
#ifdef CONFIG_ZPOOL
|
||||
/*
|
||||
* The second least-significant bit in the object's header identifies if the
|
||||
* value stored at the header is a deferred handle from the last reclaim
|
||||
* attempt.
|
||||
*
|
||||
* As noted above, this is valid because we have room for two bits.
|
||||
*/
|
||||
#define OBJ_DEFERRED_HANDLE_TAG 2
|
||||
#define OBJ_TAG_BITS 2
|
||||
#define OBJ_TAG_MASK (OBJ_ALLOCATED_TAG | OBJ_DEFERRED_HANDLE_TAG)
|
||||
#else
|
||||
#define OBJ_TAG_BITS 1
|
||||
#define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
|
||||
#endif /* CONFIG_ZPOOL */
|
||||
|
||||
#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
|
||||
#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
|
||||
|
||||
@ -222,6 +238,12 @@ struct link_free {
|
||||
* Handle of allocated object.
|
||||
*/
|
||||
unsigned long handle;
|
||||
#ifdef CONFIG_ZPOOL
|
||||
/*
|
||||
* Deferred handle of a reclaimed object.
|
||||
*/
|
||||
unsigned long deferred_handle;
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
@ -272,8 +294,6 @@ struct zspage {
|
||||
/* links the zspage to the lru list in the pool */
|
||||
struct list_head lru;
|
||||
bool under_reclaim;
|
||||
/* list of unfreed handles whose objects have been reclaimed */
|
||||
unsigned long *deferred_handles;
|
||||
#endif
|
||||
|
||||
struct zs_pool *pool;
|
||||
@ -897,7 +917,8 @@ static unsigned long handle_to_obj(unsigned long handle)
|
||||
return *(unsigned long *)handle;
|
||||
}
|
||||
|
||||
static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
|
||||
static bool obj_tagged(struct page *page, void *obj, unsigned long *phandle,
|
||||
int tag)
|
||||
{
|
||||
unsigned long handle;
|
||||
struct zspage *zspage = get_zspage(page);
|
||||
@ -908,13 +929,27 @@ static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
|
||||
} else
|
||||
handle = *(unsigned long *)obj;
|
||||
|
||||
if (!(handle & OBJ_ALLOCATED_TAG))
|
||||
if (!(handle & tag))
|
||||
return false;
|
||||
|
||||
*phandle = handle & ~OBJ_ALLOCATED_TAG;
|
||||
/* Clear all tags before returning the handle */
|
||||
*phandle = handle & ~OBJ_TAG_MASK;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
|
||||
{
|
||||
return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZPOOL
|
||||
static bool obj_stores_deferred_handle(struct page *page, void *obj,
|
||||
unsigned long *phandle)
|
||||
{
|
||||
return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void reset_page(struct page *page)
|
||||
{
|
||||
__ClearPageMovable(page);
|
||||
@ -946,22 +981,36 @@ static int trylock_zspage(struct zspage *zspage)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZPOOL
|
||||
static unsigned long find_deferred_handle_obj(struct size_class *class,
|
||||
struct page *page, int *obj_idx);
|
||||
|
||||
/*
|
||||
* Free all the deferred handles whose objects are freed in zs_free.
|
||||
*/
|
||||
static void free_handles(struct zs_pool *pool, struct zspage *zspage)
|
||||
static void free_handles(struct zs_pool *pool, struct size_class *class,
|
||||
struct zspage *zspage)
|
||||
{
|
||||
unsigned long handle = (unsigned long)zspage->deferred_handles;
|
||||
int obj_idx = 0;
|
||||
struct page *page = get_first_page(zspage);
|
||||
unsigned long handle;
|
||||
|
||||
while (handle) {
|
||||
unsigned long nxt_handle = handle_to_obj(handle);
|
||||
while (1) {
|
||||
handle = find_deferred_handle_obj(class, page, &obj_idx);
|
||||
if (!handle) {
|
||||
page = get_next_page(page);
|
||||
if (!page)
|
||||
break;
|
||||
obj_idx = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
cache_free_handle(pool, handle);
|
||||
handle = nxt_handle;
|
||||
obj_idx++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {}
|
||||
static inline void free_handles(struct zs_pool *pool, struct size_class *class,
|
||||
struct zspage *zspage) {}
|
||||
#endif
|
||||
|
||||
static void __free_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
@ -979,7 +1028,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
VM_BUG_ON(fg != ZS_EMPTY);
|
||||
|
||||
/* Free all deferred handles from zs_free */
|
||||
free_handles(pool, zspage);
|
||||
free_handles(pool, class, zspage);
|
||||
|
||||
next = page = get_first_page(zspage);
|
||||
do {
|
||||
@ -1067,7 +1116,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
|
||||
#ifdef CONFIG_ZPOOL
|
||||
INIT_LIST_HEAD(&zspage->lru);
|
||||
zspage->under_reclaim = false;
|
||||
zspage->deferred_handles = NULL;
|
||||
#endif
|
||||
|
||||
set_freeobj(zspage, 0);
|
||||
@ -1568,7 +1616,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zs_malloc);
|
||||
|
||||
static void obj_free(int class_size, unsigned long obj)
|
||||
static void obj_free(int class_size, unsigned long obj, unsigned long *handle)
|
||||
{
|
||||
struct link_free *link;
|
||||
struct zspage *zspage;
|
||||
@ -1582,15 +1630,29 @@ static void obj_free(int class_size, unsigned long obj)
|
||||
zspage = get_zspage(f_page);
|
||||
|
||||
vaddr = kmap_atomic(f_page);
|
||||
|
||||
/* Insert this object in containing zspage's freelist */
|
||||
link = (struct link_free *)(vaddr + f_offset);
|
||||
if (likely(!ZsHugePage(zspage)))
|
||||
link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
|
||||
else
|
||||
f_page->index = 0;
|
||||
|
||||
if (handle) {
|
||||
#ifdef CONFIG_ZPOOL
|
||||
/* Stores the (deferred) handle in the object's header */
|
||||
*handle |= OBJ_DEFERRED_HANDLE_TAG;
|
||||
*handle &= ~OBJ_ALLOCATED_TAG;
|
||||
|
||||
if (likely(!ZsHugePage(zspage)))
|
||||
link->deferred_handle = *handle;
|
||||
else
|
||||
f_page->index = *handle;
|
||||
#endif
|
||||
} else {
|
||||
/* Insert this object in containing zspage's freelist */
|
||||
if (likely(!ZsHugePage(zspage)))
|
||||
link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
|
||||
else
|
||||
f_page->index = 0;
|
||||
set_freeobj(zspage, f_objidx);
|
||||
}
|
||||
|
||||
kunmap_atomic(vaddr);
|
||||
set_freeobj(zspage, f_objidx);
|
||||
mod_zspage_inuse(zspage, -1);
|
||||
}
|
||||
|
||||
@ -1615,7 +1677,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
|
||||
zspage = get_zspage(f_page);
|
||||
class = zspage_class(pool, zspage);
|
||||
|
||||
obj_free(class->size, obj);
|
||||
class_stat_dec(class, OBJ_USED, 1);
|
||||
|
||||
#ifdef CONFIG_ZPOOL
|
||||
@ -1624,15 +1685,15 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
|
||||
* Reclaim needs the handles during writeback. It'll free
|
||||
* them along with the zspage when it's done with them.
|
||||
*
|
||||
* Record current deferred handle at the memory location
|
||||
* whose address is given by handle.
|
||||
* Record current deferred handle in the object's header.
|
||||
*/
|
||||
record_obj(handle, (unsigned long)zspage->deferred_handles);
|
||||
zspage->deferred_handles = (unsigned long *)handle;
|
||||
obj_free(class->size, obj, &handle);
|
||||
spin_unlock(&pool->lock);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
obj_free(class->size, obj, NULL);
|
||||
|
||||
fullness = fix_fullness_group(class, zspage);
|
||||
if (fullness == ZS_EMPTY)
|
||||
free_zspage(pool, class, zspage);
|
||||
@ -1713,11 +1774,11 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
|
||||
}
|
||||
|
||||
/*
|
||||
* Find alloced object in zspage from index object and
|
||||
* Find object with a certain tag in zspage from index object and
|
||||
* return handle.
|
||||
*/
|
||||
static unsigned long find_alloced_obj(struct size_class *class,
|
||||
struct page *page, int *obj_idx)
|
||||
static unsigned long find_tagged_obj(struct size_class *class,
|
||||
struct page *page, int *obj_idx, int tag)
|
||||
{
|
||||
unsigned int offset;
|
||||
int index = *obj_idx;
|
||||
@ -1728,7 +1789,7 @@ static unsigned long find_alloced_obj(struct size_class *class,
|
||||
offset += class->size * index;
|
||||
|
||||
while (offset < PAGE_SIZE) {
|
||||
if (obj_allocated(page, addr + offset, &handle))
|
||||
if (obj_tagged(page, addr + offset, &handle, tag))
|
||||
break;
|
||||
|
||||
offset += class->size;
|
||||
@ -1742,6 +1803,28 @@ static unsigned long find_alloced_obj(struct size_class *class,
|
||||
return handle;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find alloced object in zspage from index object and
|
||||
* return handle.
|
||||
*/
|
||||
static unsigned long find_alloced_obj(struct size_class *class,
|
||||
struct page *page, int *obj_idx)
|
||||
{
|
||||
return find_tagged_obj(class, page, obj_idx, OBJ_ALLOCATED_TAG);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ZPOOL
|
||||
/*
|
||||
* Find object storing a deferred handle in header in zspage from index object
|
||||
* and return handle.
|
||||
*/
|
||||
static unsigned long find_deferred_handle_obj(struct size_class *class,
|
||||
struct page *page, int *obj_idx)
|
||||
{
|
||||
return find_tagged_obj(class, page, obj_idx, OBJ_DEFERRED_HANDLE_TAG);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct zs_compact_control {
|
||||
/* Source spage for migration which could be a subpage of zspage */
|
||||
struct page *s_page;
|
||||
@ -1784,7 +1867,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
|
||||
zs_object_copy(class, free_obj, used_obj);
|
||||
obj_idx++;
|
||||
record_obj(handle, free_obj);
|
||||
obj_free(class->size, used_obj);
|
||||
obj_free(class->size, used_obj, NULL);
|
||||
}
|
||||
|
||||
/* Remember last position in this iteration */
|
||||
@ -2478,6 +2561,90 @@ void zs_destroy_pool(struct zs_pool *pool)
|
||||
EXPORT_SYMBOL_GPL(zs_destroy_pool);
|
||||
|
||||
#ifdef CONFIG_ZPOOL
|
||||
static void restore_freelist(struct zs_pool *pool, struct size_class *class,
|
||||
struct zspage *zspage)
|
||||
{
|
||||
unsigned int obj_idx = 0;
|
||||
unsigned long handle, off = 0; /* off is within-page offset */
|
||||
struct page *page = get_first_page(zspage);
|
||||
struct link_free *prev_free = NULL;
|
||||
void *prev_page_vaddr = NULL;
|
||||
|
||||
/* in case no free object found */
|
||||
set_freeobj(zspage, (unsigned int)(-1UL));
|
||||
|
||||
while (page) {
|
||||
void *vaddr = kmap_atomic(page);
|
||||
struct page *next_page;
|
||||
|
||||
while (off < PAGE_SIZE) {
|
||||
void *obj_addr = vaddr + off;
|
||||
|
||||
/* skip allocated object */
|
||||
if (obj_allocated(page, obj_addr, &handle)) {
|
||||
obj_idx++;
|
||||
off += class->size;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* free deferred handle from reclaim attempt */
|
||||
if (obj_stores_deferred_handle(page, obj_addr, &handle))
|
||||
cache_free_handle(pool, handle);
|
||||
|
||||
if (prev_free)
|
||||
prev_free->next = obj_idx << OBJ_TAG_BITS;
|
||||
else /* first free object found */
|
||||
set_freeobj(zspage, obj_idx);
|
||||
|
||||
prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
|
||||
/* if last free object in a previous page, need to unmap */
|
||||
if (prev_page_vaddr) {
|
||||
kunmap_atomic(prev_page_vaddr);
|
||||
prev_page_vaddr = NULL;
|
||||
}
|
||||
|
||||
obj_idx++;
|
||||
off += class->size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the last (full or partial) object on this page.
|
||||
*/
|
||||
next_page = get_next_page(page);
|
||||
if (next_page) {
|
||||
if (!prev_free || prev_page_vaddr) {
|
||||
/*
|
||||
* There is no free object in this page, so we can safely
|
||||
* unmap it.
|
||||
*/
|
||||
kunmap_atomic(vaddr);
|
||||
} else {
|
||||
/* update prev_page_vaddr since prev_free is on this page */
|
||||
prev_page_vaddr = vaddr;
|
||||
}
|
||||
} else { /* this is the last page */
|
||||
if (prev_free) {
|
||||
/*
|
||||
* Reset OBJ_TAG_BITS bit to last link to tell
|
||||
* whether it's allocated object or not.
|
||||
*/
|
||||
prev_free->next = -1UL << OBJ_TAG_BITS;
|
||||
}
|
||||
|
||||
/* unmap previous page (if not done yet) */
|
||||
if (prev_page_vaddr) {
|
||||
kunmap_atomic(prev_page_vaddr);
|
||||
prev_page_vaddr = NULL;
|
||||
}
|
||||
|
||||
kunmap_atomic(vaddr);
|
||||
}
|
||||
|
||||
page = next_page;
|
||||
off %= PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
|
||||
{
|
||||
int i, obj_idx, ret = 0;
|
||||
@ -2561,6 +2728,12 @@ static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Eviction fails on one of the handles, so we need to restore zspage.
|
||||
* We need to rebuild its freelist (and free stored deferred handles),
|
||||
* put it back to the correct size class, and add it to the LRU list.
|
||||
*/
|
||||
restore_freelist(pool, class, zspage);
|
||||
putback_zspage(class, zspage);
|
||||
list_add(&zspage->lru, &pool->lru);
|
||||
unlock_zspage(zspage);
|
||||
|
0
tools/testing/selftests/filesystems/fat/run_fat_tests.sh
Normal file → Executable file
0
tools/testing/selftests/filesystems/fat/run_fat_tests.sh
Normal file → Executable file
@ -17,7 +17,6 @@
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#define __USE_GNU
|
||||
#include <fcntl.h>
|
||||
|
||||
#define MIN_FREE_PAGES 20
|
||||
|
Loading…
Reference in New Issue
Block a user