mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-17 13:58:46 +00:00
5c00ff742b
Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from Maíra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZzwFqgAKCRDdBJ7gKXxA jkeuAQCkl+BmeYHE6uG0hi3pRxkupseR6DEOAYIiTv0/l8/GggD/Z3jmEeqnZaNq xyyenpibWgUoShU2wZ/Ha8FE5WDINwg= =JfWR -----END PGP SIGNATURE----- Merge tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull MM updates from Andrew Morton: - The series "zram: optimal post-processing target selection" from Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from Maíra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. * tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (215 commits) cma: enforce non-zero pageblock_order during cma_init_reserved_mem() mm/kfence: add a new kunit test test_use_after_free_read_nofault() zram: fix NULL pointer in comp_algorithm_show() memcg/hugetlb: add hugeTLB counters to memcg vmstat: call fold_vm_zone_numa_events() before show per zone NUMA event mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount zram: ZRAM_DEF_COMP should depend on ZRAM MAINTAINERS/MEMORY MANAGEMENT: add document files for mm Docs/mm/damon: recommend academic papers to read and/or cite mm: define general function pXd_init() kmemleak: iommu/iova: fix transient kmemleak false positive mm/list_lru: simplify the list_lru walk callback function mm/list_lru: split the lock to per-cgroup scope mm/list_lru: simplify reparenting and initial allocation mm/list_lru: code clean up for reparenting mm/list_lru: don't export list_lru_add mm/list_lru: don't pass unnecessary key parameters kasan: add kunit tests for kmalloc_track_caller, kmalloc_node_track_caller kasan: change kasan_atomics kunit test as KUNIT_CASE_SLOW kasan: use EXPORT_SYMBOL_IF_KUNIT to export symbols ...
222 lines
7.7 KiB
C
222 lines
7.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __SHMEM_FS_H
|
|
#define __SHMEM_FS_H
|
|
|
|
#include <linux/file.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/percpu_counter.h>
|
|
#include <linux/xattr.h>
|
|
#include <linux/fs_parser.h>
|
|
#include <linux/userfaultfd_k.h>
|
|
|
|
/* inode in-kernel data */
|
|
|
|
#ifdef CONFIG_TMPFS_QUOTA
|
|
#define SHMEM_MAXQUOTAS 2
|
|
#endif
|
|
|
|
struct shmem_inode_info {
|
|
spinlock_t lock;
|
|
unsigned int seals; /* shmem seals */
|
|
unsigned long flags;
|
|
unsigned long alloced; /* data pages alloced to file */
|
|
unsigned long swapped; /* subtotal assigned to swap */
|
|
union {
|
|
struct offset_ctx dir_offsets; /* stable directory offsets */
|
|
struct {
|
|
struct list_head shrinklist; /* shrinkable hpage inodes */
|
|
struct list_head swaplist; /* chain of maybes on swap */
|
|
};
|
|
};
|
|
struct timespec64 i_crtime; /* file creation time */
|
|
struct shared_policy policy; /* NUMA memory alloc policy */
|
|
struct simple_xattrs xattrs; /* list of xattrs */
|
|
pgoff_t fallocend; /* highest fallocate endindex */
|
|
unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
|
|
atomic_t stop_eviction; /* hold when working on inode */
|
|
#ifdef CONFIG_TMPFS_QUOTA
|
|
struct dquot __rcu *i_dquot[MAXQUOTAS];
|
|
#endif
|
|
struct inode vfs_inode;
|
|
};
|
|
|
|
#define SHMEM_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_CASEFOLD_FL)
|
|
#define SHMEM_FL_USER_MODIFIABLE \
|
|
(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
|
|
#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
|
|
|
|
struct shmem_quota_limits {
|
|
qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
|
|
qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
|
|
qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
|
|
qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
|
|
};
|
|
|
|
struct shmem_sb_info {
|
|
unsigned long max_blocks; /* How many blocks are allowed */
|
|
struct percpu_counter used_blocks; /* How many are allocated */
|
|
unsigned long max_inodes; /* How many inodes are allowed */
|
|
unsigned long free_ispace; /* How much ispace left for allocation */
|
|
raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
|
|
umode_t mode; /* Mount mode for root directory */
|
|
unsigned char huge; /* Whether to try for hugepages */
|
|
kuid_t uid; /* Mount uid for root directory */
|
|
kgid_t gid; /* Mount gid for root directory */
|
|
bool full_inums; /* If i_ino should be uint or ino_t */
|
|
bool noswap; /* ignores VM reclaim / swap requests */
|
|
ino_t next_ino; /* The next per-sb inode number to use */
|
|
ino_t __percpu *ino_batch; /* The next per-cpu inode number to use */
|
|
struct mempolicy *mpol; /* default memory policy for mappings */
|
|
spinlock_t shrinklist_lock; /* Protects shrinklist */
|
|
struct list_head shrinklist; /* List of shinkable inodes */
|
|
unsigned long shrinklist_len; /* Length of shrinklist */
|
|
struct shmem_quota_limits qlimits; /* Default quota limits */
|
|
};
|
|
|
|
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
|
|
{
|
|
return container_of(inode, struct shmem_inode_info, vfs_inode);
|
|
}
|
|
|
|
/*
|
|
* Functions in mm/shmem.c called directly from elsewhere:
|
|
*/
|
|
extern const struct fs_parameter_spec shmem_fs_parameters[];
|
|
extern void shmem_init(void);
|
|
extern int shmem_init_fs_context(struct fs_context *fc);
|
|
extern struct file *shmem_file_setup(const char *name,
|
|
loff_t size, unsigned long flags);
|
|
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
|
|
unsigned long flags);
|
|
extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
|
|
const char *name, loff_t size, unsigned long flags);
|
|
extern int shmem_zero_setup(struct vm_area_struct *);
|
|
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags);
|
|
extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
|
|
#ifdef CONFIG_SHMEM
|
|
bool shmem_mapping(struct address_space *mapping);
|
|
#else
|
|
static inline bool shmem_mapping(struct address_space *mapping)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_SHMEM */
|
|
extern void shmem_unlock_mapping(struct address_space *mapping);
|
|
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
|
|
pgoff_t index, gfp_t gfp_mask);
|
|
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
|
int shmem_unuse(unsigned int type);
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
unsigned long shmem_allowable_huge_orders(struct inode *inode,
|
|
struct vm_area_struct *vma, pgoff_t index,
|
|
loff_t write_end, bool shmem_huge_force);
|
|
bool shmem_hpage_pmd_enabled(void);
|
|
#else
|
|
static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
|
|
struct vm_area_struct *vma, pgoff_t index,
|
|
loff_t write_end, bool shmem_huge_force)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline bool shmem_hpage_pmd_enabled(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_SHMEM
|
|
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
|
#else
|
|
static inline unsigned long shmem_swap_usage(struct vm_area_struct *vma)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
|
pgoff_t start, pgoff_t end);
|
|
|
|
/* Flag allocation requirements to shmem_get_folio */
|
|
enum sgp_type {
|
|
SGP_READ, /* don't exceed i_size, don't allocate page */
|
|
SGP_NOALLOC, /* similar, but fail on hole or use fallocated page */
|
|
SGP_CACHE, /* don't exceed i_size, may allocate page */
|
|
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
|
|
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
|
|
};
|
|
|
|
int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
|
|
struct folio **foliop, enum sgp_type sgp);
|
|
struct folio *shmem_read_folio_gfp(struct address_space *mapping,
|
|
pgoff_t index, gfp_t gfp);
|
|
|
|
static inline struct folio *shmem_read_folio(struct address_space *mapping,
|
|
pgoff_t index)
|
|
{
|
|
return shmem_read_folio_gfp(mapping, index, mapping_gfp_mask(mapping));
|
|
}
|
|
|
|
static inline struct page *shmem_read_mapping_page(
|
|
struct address_space *mapping, pgoff_t index)
|
|
{
|
|
return shmem_read_mapping_page_gfp(mapping, index,
|
|
mapping_gfp_mask(mapping));
|
|
}
|
|
|
|
static inline bool shmem_file(struct file *file)
|
|
{
|
|
if (!IS_ENABLED(CONFIG_SHMEM))
|
|
return false;
|
|
if (!file || !file->f_mapping)
|
|
return false;
|
|
return shmem_mapping(file->f_mapping);
|
|
}
|
|
|
|
/*
|
|
* If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
|
|
* beyond i_size's notion of EOF, which fallocate has committed to reserving:
|
|
* which split_huge_page() must therefore not delete. This use of a single
|
|
* "fallocend" per inode errs on the side of not deleting a reservation when
|
|
* in doubt: there are plenty of cases when it preserves unreserved pages.
|
|
*/
|
|
static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof)
|
|
{
|
|
return max(eof, SHMEM_I(inode)->fallocend);
|
|
}
|
|
|
|
extern bool shmem_charge(struct inode *inode, long pages);
|
|
extern void shmem_uncharge(struct inode *inode, long pages);
|
|
|
|
#ifdef CONFIG_USERFAULTFD
|
|
#ifdef CONFIG_SHMEM
|
|
extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
|
|
struct vm_area_struct *dst_vma,
|
|
unsigned long dst_addr,
|
|
unsigned long src_addr,
|
|
uffd_flags_t flags,
|
|
struct folio **foliop);
|
|
#else /* !CONFIG_SHMEM */
|
|
#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \
|
|
src_addr, flags, foliop) ({ BUG(); 0; })
|
|
#endif /* CONFIG_SHMEM */
|
|
#endif /* CONFIG_USERFAULTFD */
|
|
|
|
/*
|
|
* Used space is stored as unsigned 64-bit value in bytes but
|
|
* quota core supports only signed 64-bit values so use that
|
|
* as a limit
|
|
*/
|
|
#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
|
|
#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
|
|
|
|
#ifdef CONFIG_TMPFS_QUOTA
|
|
extern const struct dquot_operations shmem_quota_operations;
|
|
extern struct quota_format_type shmem_quota_format;
|
|
#endif /* CONFIG_TMPFS_QUOTA */
|
|
|
|
#endif
|