mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
9651fcedf7
The vDSO getrandom() implementation works with a buffer allocated with a new system call that has certain requirements: - It shouldn't be written to core dumps. * Easy: VM_DONTDUMP. - It should be zeroed on fork. * Easy: VM_WIPEONFORK. - It shouldn't be written to swap. * Uh-oh: mlock is rlimited. * Uh-oh: mlock isn't inherited by forks. - It shouldn't reserve actual memory, but it also shouldn't crash when page faulting in memory if none is available * Uh-oh: VM_NORESERVE means segfaults. It turns out that the vDSO getrandom() function has three really nice characteristics that we can exploit to solve this problem: 1) Due to being wiped during fork(), the vDSO code is already robust to having the contents of the pages it reads zeroed out midway through the function's execution. 2) In the absolute worst case of whatever contingency we're coding for, we have the option to fallback to the getrandom() syscall, and everything is fine. 3) The buffers the function uses are only ever useful for a maximum of 60 seconds -- a sort of cache, rather than a long term allocation. These characteristics mean that we can introduce VM_DROPPABLE, which has the following semantics: a) It never is written out to swap. b) Under memory pressure, mm can just drop the pages (so that they're zero when read back again). c) It is inherited by fork. d) It doesn't count against the mlock budget, since nothing is locked. e) If there's not enough memory to service a page fault, it's not fatal, and no signal is sent. This way, allocations used by vDSO getrandom() can use: VM_DROPPABLE | VM_DONTDUMP | VM_WIPEONFORK | VM_NORESERVE And there will be no problem with OOMing, crashing on overcommitment, using memory when not in use, not wiping on fork(), coredumps, or writing out to swap. In order to let vDSO getrandom() use this, expose these via mmap(2) as MAP_DROPPABLE. Note that this involves removing the MADV_FREE special case from sort_folio(), which according to Yu Zhao is unnecessary and will simply result in an extra call to shrink_folio_list() in the worst case. The chunk removed reenables the swapbacked flag, which we don't want for VM_DROPPABLE, and we can't conditionalize it here because there isn't a vma reference available. Finally, the provided self test ensures that this is working as desired. Cc: linux-mm@kvack.org Acked-by: David Hildenbrand <david@redhat.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
60 lines
1.8 KiB
C
60 lines
1.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
|
#ifndef _UAPI_LINUX_MMAN_H
|
|
#define _UAPI_LINUX_MMAN_H
|
|
|
|
#include <asm/mman.h>
|
|
#include <asm-generic/hugetlb_encode.h>
|
|
#include <linux/types.h>
|
|
|
|
#define MREMAP_MAYMOVE 1
|
|
#define MREMAP_FIXED 2
|
|
#define MREMAP_DONTUNMAP 4
|
|
|
|
#define OVERCOMMIT_GUESS 0
|
|
#define OVERCOMMIT_ALWAYS 1
|
|
#define OVERCOMMIT_NEVER 2
|
|
|
|
#define MAP_SHARED 0x01 /* Share changes */
|
|
#define MAP_PRIVATE 0x02 /* Changes are private */
|
|
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
|
|
#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */
|
|
|
|
/*
|
|
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
|
|
* size other than the default is desired. See hugetlb_encode.h.
|
|
* All known huge page size encodings are provided here. It is the
|
|
* responsibility of the application to know which sizes are supported on
|
|
* the running system. See mmap(2) man page for details.
|
|
*/
|
|
#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
|
|
#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
|
|
|
|
#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB
|
|
#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
|
|
#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
|
|
#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
|
|
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
|
|
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
|
|
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
|
|
#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
|
|
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
|
|
#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
|
|
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
|
|
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
|
|
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
|
|
|
|
struct cachestat_range {
|
|
__u64 off;
|
|
__u64 len;
|
|
};
|
|
|
|
struct cachestat {
|
|
__u64 nr_cache;
|
|
__u64 nr_dirty;
|
|
__u64 nr_writeback;
|
|
__u64 nr_evicted;
|
|
__u64 nr_recently_evicted;
|
|
};
|
|
|
|
#endif /* _UAPI_LINUX_MMAN_H */
|