mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-06 13:16:22 +00:00
4ad10a5f5f
Provide a generic C vDSO getrandom() implementation, which operates on an opaque state returned by vgetrandom_alloc() and produces random bytes the same way as getrandom(). This has the following API signature: ssize_t vgetrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); The return value and the first three arguments are the same as ordinary getrandom(), while the last two arguments are a pointer to the opaque allocated state and its size. Were all five arguments passed to the getrandom() syscall, nothing different would happen, and the functions would have the exact same behavior. The actual vDSO RNG algorithm implemented is the same one implemented by drivers/char/random.c, using the same fast-erasure techniques as that. Should the in-kernel implementation change, so too will the vDSO one. It requires an implementation of ChaCha20 that does not use any stack, in order to maintain forward secrecy if a multi-threaded program forks (though this does not account for a similar issue with SA_SIGINFO copying registers to the stack), so this is left as an architecture-specific fill-in. Stack-less ChaCha20 is an easy algorithm to implement on a variety of architectures, so this shouldn't be too onerous. Initially, the state is keyless, and so the first call makes a getrandom() syscall to generate that key, and then uses it for subsequent calls. By keeping track of a generation counter, it knows when its key is invalidated and it should fetch a new one using the syscall. Later, more than just a generation counter might be used. Since MADV_WIPEONFORK is set on the opaque state, the key and related state is wiped during a fork(), so secrets don't roll over into new processes, and the same state doesn't accidentally generate the same random stream. The generation counter, as well, is always >0, so that the 0 counter is a useful indication of a fork() or otherwise uninitialized state. If the kernel RNG is not yet initialized, then the vDSO always calls the syscall, because that behavior cannot be emulated in userspace, but fortunately that state is short lived and only during early boot. If it has been initialized, then there is no need to inspect the `flags` argument, because the behavior does not change post-initialization regardless of the `flags` value. Since the opaque state passed to it is mutated, vDSO getrandom() is not reentrant, when used with the same opaque state, which libc should be mindful of. The function works over an opaque per-thread state of a particular size, which must be marked VM_WIPEONFORK, VM_DONTDUMP, VM_NORESERVE, and VM_DROPPABLE for proper operation. Over time, the nuances of these allocations may change or grow or even differ based on architectural features. The opaque state passed to vDSO getrandom() must be allocated using the mmap_flags and mmap_prot parameters provided by the vgetrandom_opaque_params struct, which also contains the size of each state. That struct can be obtained with a call to vgetrandom(NULL, 0, 0, ¶ms, ~0UL). Then, libc can call mmap(2) and slice up the returned array into a state per each thread, while ensuring that no single state straddles a page boundary. Libc is expected to allocate a chunk of these on first use, and then dole them out to threads as they're created, allocating more when needed. vDSO getrandom() provides the ability for userspace to generate random bytes quickly and safely, and is intended to be integrated into libc's thread management. As an illustrative example, the introduced code in the vdso_test_getrandom self test later in this series might be used to do the same outside of libc. In a libc the various pthread-isms are expected to be elided into libc internals. Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
166 lines
4.7 KiB
C
166 lines
4.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __VDSO_DATAPAGE_H
|
|
#define __VDSO_DATAPAGE_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/compiler.h>
|
|
#include <uapi/linux/time.h>
|
|
#include <uapi/linux/types.h>
|
|
#include <uapi/asm-generic/errno-base.h>
|
|
|
|
#include <vdso/bits.h>
|
|
#include <vdso/clocksource.h>
|
|
#include <vdso/ktime.h>
|
|
#include <vdso/limits.h>
|
|
#include <vdso/math64.h>
|
|
#include <vdso/processor.h>
|
|
#include <vdso/time.h>
|
|
#include <vdso/time32.h>
|
|
#include <vdso/time64.h>
|
|
|
|
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
|
|
#include <asm/vdso/data.h>
|
|
#else
|
|
struct arch_vdso_data {};
|
|
#endif
|
|
|
|
#define VDSO_BASES (CLOCK_TAI + 1)
|
|
#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
|
|
BIT(CLOCK_MONOTONIC) | \
|
|
BIT(CLOCK_BOOTTIME) | \
|
|
BIT(CLOCK_TAI))
|
|
#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
|
|
BIT(CLOCK_MONOTONIC_COARSE))
|
|
#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
|
|
|
|
#define CS_HRES_COARSE 0
|
|
#define CS_RAW 1
|
|
#define CS_BASES (CS_RAW + 1)
|
|
|
|
/**
|
|
* struct vdso_timestamp - basetime per clock_id
|
|
* @sec: seconds
|
|
* @nsec: nanoseconds
|
|
*
|
|
* There is one vdso_timestamp object in vvar for each vDSO-accelerated
|
|
* clock_id. For high-resolution clocks, this encodes the time
|
|
* corresponding to vdso_data.cycle_last. For coarse clocks this encodes
|
|
* the actual time.
|
|
*
|
|
* To be noticed that for highres clocks nsec is left-shifted by
|
|
* vdso_data.cs[x].shift.
|
|
*/
|
|
struct vdso_timestamp {
|
|
u64 sec;
|
|
u64 nsec;
|
|
};
|
|
|
|
/**
|
|
* struct vdso_data - vdso datapage representation
|
|
* @seq: timebase sequence counter
|
|
* @clock_mode: clock mode
|
|
* @cycle_last: timebase at clocksource init
|
|
* @max_cycles: maximum cycles which won't overflow 64bit multiplication
|
|
* @mask: clocksource mask
|
|
* @mult: clocksource multiplier
|
|
* @shift: clocksource shift
|
|
* @basetime[clock_id]: basetime per clock_id
|
|
* @offset[clock_id]: time namespace offset per clock_id
|
|
* @tz_minuteswest: minutes west of Greenwich
|
|
* @tz_dsttime: type of DST correction
|
|
* @hrtimer_res: hrtimer resolution
|
|
* @__unused: unused
|
|
* @arch_data: architecture specific data (optional, defaults
|
|
* to an empty struct)
|
|
*
|
|
* vdso_data will be accessed by 64 bit and compat code at the same time
|
|
* so we should be careful before modifying this structure.
|
|
*
|
|
* @basetime is used to store the base time for the system wide time getter
|
|
* VVAR page.
|
|
*
|
|
* @offset is used by the special time namespace VVAR pages which are
|
|
* installed instead of the real VVAR page. These namespace pages must set
|
|
* @seq to 1 and @clock_mode to VDSO_CLOCKMODE_TIMENS to force the code into
|
|
* the time namespace slow path. The namespace aware functions retrieve the
|
|
* real system wide VVAR page, read host time and add the per clock offset.
|
|
* For clocks which are not affected by time namespace adjustment the
|
|
* offset must be zero.
|
|
*/
|
|
struct vdso_data {
|
|
u32 seq;
|
|
|
|
s32 clock_mode;
|
|
u64 cycle_last;
|
|
#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
|
|
u64 max_cycles;
|
|
#endif
|
|
u64 mask;
|
|
u32 mult;
|
|
u32 shift;
|
|
|
|
union {
|
|
struct vdso_timestamp basetime[VDSO_BASES];
|
|
struct timens_offset offset[VDSO_BASES];
|
|
};
|
|
|
|
s32 tz_minuteswest;
|
|
s32 tz_dsttime;
|
|
u32 hrtimer_res;
|
|
u32 __unused;
|
|
|
|
struct arch_vdso_data arch_data;
|
|
};
|
|
|
|
/**
|
|
* struct vdso_rng_data - vdso RNG state information
|
|
* @generation: counter representing the number of RNG reseeds
|
|
* @is_ready: boolean signaling whether the RNG is initialized
|
|
*/
|
|
struct vdso_rng_data {
|
|
u64 generation;
|
|
u8 is_ready;
|
|
};
|
|
|
|
/*
|
|
* We use the hidden visibility to prevent the compiler from generating a GOT
|
|
* relocation. Not only is going through a GOT useless (the entry couldn't and
|
|
* must not be overridden by another library), it does not even work: the linker
|
|
* cannot generate an absolute address to the data page.
|
|
*
|
|
* With the hidden visibility, the compiler simply generates a PC-relative
|
|
* relocation, and this is what we need.
|
|
*/
|
|
extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
|
|
extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden")));
|
|
extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden")));
|
|
|
|
/**
|
|
* union vdso_data_store - Generic vDSO data page
|
|
*/
|
|
union vdso_data_store {
|
|
struct vdso_data data[CS_BASES];
|
|
u8 page[1U << CONFIG_PAGE_SHIFT];
|
|
};
|
|
|
|
/*
|
|
* The generic vDSO implementation requires that gettimeofday.h
|
|
* provides:
|
|
* - __arch_get_vdso_data(): to get the vdso datapage.
|
|
* - __arch_get_hw_counter(): to get the hw counter based on the
|
|
* clock_mode.
|
|
* - gettimeofday_fallback(): fallback for gettimeofday.
|
|
* - clock_gettime_fallback(): fallback for clock_gettime.
|
|
* - clock_getres_fallback(): fallback for clock_getres.
|
|
*/
|
|
#ifdef ENABLE_COMPAT_VDSO
|
|
#include <asm/vdso/compat_gettimeofday.h>
|
|
#else
|
|
#include <asm/vdso/gettimeofday.h>
|
|
#endif /* ENABLE_COMPAT_VDSO */
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* __VDSO_DATAPAGE_H */
|