mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-19 20:12:32 +00:00
Merge patch series "riscv: Create and document PR_RISCV_SET_ICACHE_FLUSH_CTX prctl"
Charlie Jenkins <charlie@rivosinc.com> says: Improve the performance of icache flushing by creating a new prctl flag PR_RISCV_SET_ICACHE_FLUSH_CTX. The interface is left generic to allow for future expansions such as with the proposed J extension [1]. Documentation is also provided to explain the use case. Patch sent to add PR_RISCV_SET_ICACHE_FLUSH_CTX to man-pages [2]. [1] https://github.com/riscv/riscv-j-extension [2] https://lore.kernel.org/linux-man/20240124-fencei_prctl-v1-1-0bddafcef331@rivosinc.com * b4-shazam-merge: cpumask: Add assign cpu documentation: Document PR_RISCV_SET_ICACHE_FLUSH_CTX prctl riscv: Include riscv_set_icache_flush_ctx prctl riscv: Remove unnecessary irqflags processor.h include Link: https://lore.kernel.org/r/20240312-fencei-v13-0-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
commit
4202f62cb6
98
Documentation/arch/riscv/cmodx.rst
Normal file
98
Documentation/arch/riscv/cmodx.rst
Normal file
@ -0,0 +1,98 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================================================================
|
||||
Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
|
||||
==============================================================================
|
||||
|
||||
CMODX is a programming technique where a program executes instructions that were
|
||||
modified by the program itself. Instruction storage and the instruction cache
|
||||
(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
|
||||
program must enforce its own synchronization with the unprivileged fence.i
|
||||
instruction.
|
||||
|
||||
However, the default Linux ABI prohibits the use of fence.i in userspace
|
||||
applications. At any point the scheduler may migrate a task onto a new hart. If
|
||||
migration occurs after the userspace synchronized the icache and instruction
|
||||
storage with fence.i, the icache on the new hart will no longer be clean. This
|
||||
is due to the behavior of fence.i only affecting the hart that it is called on.
|
||||
Thus, the hart that the task has been migrated to may not have synchronized
|
||||
instruction storage and icache.
|
||||
|
||||
There are two ways to solve this problem: use the riscv_flush_icache() syscall,
|
||||
or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
|
||||
userspace. The syscall performs a one-off icache flushing operation. The prctl
|
||||
changes the Linux ABI to allow userspace to emit icache flushing operations.
|
||||
|
||||
As an aside, "deferred" icache flushes can sometimes be triggered in the kernel.
|
||||
At the time of writing, this only occurs during the riscv_flush_icache() syscall
|
||||
and when the kernel uses copy_to_user_page(). These deferred flushes happen only
|
||||
when the memory map being used by a hart changes. If the prctl() context caused
|
||||
an icache flush, this deferred icache flush will be skipped as it is redundant.
|
||||
Therefore, there will be no additional flush when using the riscv_flush_icache()
|
||||
syscall inside of the prctl() context.
|
||||
|
||||
prctl() Interface
|
||||
---------------------
|
||||
|
||||
Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
|
||||
remaining arguments will be delegated to the riscv_set_icache_flush_ctx
|
||||
function detailed below.
|
||||
|
||||
.. kernel-doc:: arch/riscv/mm/cacheflush.c
|
||||
:identifiers: riscv_set_icache_flush_ctx
|
||||
|
||||
Example usage:
|
||||
|
||||
The following files are meant to be compiled and linked with each other. The
|
||||
modify_instruction() function replaces an add with 0 with an add with one,
|
||||
causing the instruction sequence in get_value() to change from returning a zero
|
||||
to returning a one.
|
||||
|
||||
cmodx.c::
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
extern int get_value();
|
||||
extern void modify_instruction();
|
||||
|
||||
int main()
|
||||
{
|
||||
int value = get_value();
|
||||
printf("Value before cmodx: %d\n", value);
|
||||
|
||||
// Call prctl before first fence.i is called inside modify_instruction
|
||||
prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
|
||||
modify_instruction();
|
||||
// Call prctl after final fence.i is called in process
|
||||
prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
|
||||
|
||||
value = get_value();
|
||||
printf("Value after cmodx: %d\n", value);
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmodx.S::
|
||||
|
||||
.option norvc
|
||||
|
||||
.text
|
||||
.global modify_instruction
|
||||
modify_instruction:
|
||||
lw a0, new_insn
|
||||
lui a5,%hi(old_insn)
|
||||
sw a0,%lo(old_insn)(a5)
|
||||
fence.i
|
||||
ret
|
||||
|
||||
.section modifiable, "awx"
|
||||
.global get_value
|
||||
get_value:
|
||||
li a0, 0
|
||||
old_insn:
|
||||
addi a0, a0, 0
|
||||
ret
|
||||
|
||||
.data
|
||||
new_insn:
|
||||
addi a0, a0, 1
|
@ -13,6 +13,7 @@ RISC-V architecture
|
||||
patch-acceptance
|
||||
uabi
|
||||
vector
|
||||
cmodx
|
||||
|
||||
features
|
||||
|
||||
|
@ -7,7 +7,6 @@
|
||||
#ifndef _ASM_RISCV_IRQFLAGS_H
|
||||
#define _ASM_RISCV_IRQFLAGS_H
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/csr.h>
|
||||
|
||||
/* read interrupt enabled status */
|
||||
|
@ -19,6 +19,8 @@ typedef struct {
|
||||
#ifdef CONFIG_SMP
|
||||
/* A local icache flush is needed before user execution can resume. */
|
||||
cpumask_t icache_stale_mask;
|
||||
/* Force local icache flush on all migrations. */
|
||||
bool force_icache_flush;
|
||||
#endif
|
||||
#ifdef CONFIG_BINFMT_ELF_FDPIC
|
||||
unsigned long exec_fdpic_loadmap;
|
||||
|
@ -68,6 +68,7 @@
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
struct task_struct;
|
||||
struct pt_regs;
|
||||
@ -122,6 +123,12 @@ struct thread_struct {
|
||||
struct __riscv_v_ext_state vstate;
|
||||
unsigned long align_ctl;
|
||||
struct __riscv_v_ext_state kernel_vstate;
|
||||
#ifdef CONFIG_SMP
|
||||
/* Flush the icache on migration */
|
||||
bool force_icache_flush;
|
||||
/* A forced icache flush is not needed if migrating to the previous cpu. */
|
||||
unsigned int prev_cpu;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Whitelist the fstate from the task_struct for hardened usercopy */
|
||||
@ -183,6 +190,9 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
|
||||
#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr))
|
||||
#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
|
||||
|
||||
#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2)
|
||||
extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread);
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_RISCV_PROCESSOR_H */
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <asm/vector.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
@ -72,14 +73,36 @@ static __always_inline bool has_fpu(void) { return false; }
|
||||
extern struct task_struct *__switch_to(struct task_struct *,
|
||||
struct task_struct *);
|
||||
|
||||
static inline bool switch_to_should_flush_icache(struct task_struct *task)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
bool stale_mm = task->mm && task->mm->context.force_icache_flush;
|
||||
bool stale_thread = task->thread.force_icache_flush;
|
||||
bool thread_migrated = smp_processor_id() != task->thread.prev_cpu;
|
||||
|
||||
return thread_migrated && (stale_mm || stale_thread);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define __set_prev_cpu(thread) ((thread).prev_cpu = smp_processor_id())
|
||||
#else
|
||||
#define __set_prev_cpu(thread)
|
||||
#endif
|
||||
|
||||
#define switch_to(prev, next, last) \
|
||||
do { \
|
||||
struct task_struct *__prev = (prev); \
|
||||
struct task_struct *__next = (next); \
|
||||
__set_prev_cpu(__prev->thread); \
|
||||
if (has_fpu()) \
|
||||
__switch_to_fpu(__prev, __next); \
|
||||
if (has_vector()) \
|
||||
__switch_to_vector(__prev, __next); \
|
||||
if (switch_to_should_flush_icache(__next)) \
|
||||
local_flush_icache_all(); \
|
||||
((last) = __switch_to(__prev, __next)); \
|
||||
} while (0)
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
@ -152,3 +153,115 @@ void __init riscv_init_cbo_blocksizes(void)
|
||||
if (cboz_block_size)
|
||||
riscv_cboz_block_size = cboz_block_size;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void set_icache_stale_mask(void)
|
||||
{
|
||||
cpumask_t *mask;
|
||||
bool stale_cpu;
|
||||
|
||||
/*
|
||||
* Mark every other hart's icache as needing a flush for
|
||||
* this MM. Maintain the previous value of the current
|
||||
* cpu to handle the case when this function is called
|
||||
* concurrently on different harts.
|
||||
*/
|
||||
mask = ¤t->mm->context.icache_stale_mask;
|
||||
stale_cpu = cpumask_test_cpu(smp_processor_id(), mask);
|
||||
|
||||
cpumask_setall(mask);
|
||||
cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in
|
||||
* userspace.
|
||||
* @ctx: Set the type of icache flushing instructions permitted/prohibited in
|
||||
* userspace. Supported values described below.
|
||||
*
|
||||
* Supported values for ctx:
|
||||
*
|
||||
* * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in user space.
|
||||
*
|
||||
* * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in user space. All threads in
|
||||
* a process will be affected when ``scope == PR_RISCV_SCOPE_PER_PROCESS``.
|
||||
* Therefore, caution must be taken; use this flag only when you can guarantee
|
||||
* that no thread in the process will emit fence.i from this point onward.
|
||||
*
|
||||
* @scope: Set scope of where icache flushing instructions are allowed to be
|
||||
* emitted. Supported values described below.
|
||||
*
|
||||
* Supported values for scope:
|
||||
*
|
||||
* * %PR_RISCV_SCOPE_PER_PROCESS: Ensure the icache of any thread in this process
|
||||
* is coherent with instruction storage upon
|
||||
* migration.
|
||||
*
|
||||
* * %PR_RISCV_SCOPE_PER_THREAD: Ensure the icache of the current thread is
|
||||
* coherent with instruction storage upon
|
||||
* migration.
|
||||
*
|
||||
* When ``scope == PR_RISCV_SCOPE_PER_PROCESS``, all threads in the process are
|
||||
* permitted to emit icache flushing instructions. Whenever any thread in the
|
||||
* process is migrated, the corresponding hart's icache will be guaranteed to be
|
||||
* consistent with instruction storage. This does not enforce any guarantees
|
||||
* outside of migration. If a thread modifies an instruction that another thread
|
||||
* may attempt to execute, the other thread must still emit an icache flushing
|
||||
* instruction before attempting to execute the potentially modified
|
||||
* instruction. This must be performed by the user-space program.
|
||||
*
|
||||
* In per-thread context (eg. ``scope == PR_RISCV_SCOPE_PER_THREAD``) only the
|
||||
* thread calling this function is permitted to emit icache flushing
|
||||
* instructions. When the thread is migrated, the corresponding hart's icache
|
||||
* will be guaranteed to be consistent with instruction storage.
|
||||
*
|
||||
* On kernels configured without SMP, this function is a nop as migrations
|
||||
* across harts will not occur.
|
||||
*/
|
||||
int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
switch (ctx) {
|
||||
case PR_RISCV_CTX_SW_FENCEI_ON:
|
||||
switch (scope) {
|
||||
case PR_RISCV_SCOPE_PER_PROCESS:
|
||||
current->mm->context.force_icache_flush = true;
|
||||
break;
|
||||
case PR_RISCV_SCOPE_PER_THREAD:
|
||||
current->thread.force_icache_flush = true;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case PR_RISCV_CTX_SW_FENCEI_OFF:
|
||||
switch (scope) {
|
||||
case PR_RISCV_SCOPE_PER_PROCESS:
|
||||
current->mm->context.force_icache_flush = false;
|
||||
|
||||
set_icache_stale_mask();
|
||||
break;
|
||||
case PR_RISCV_SCOPE_PER_THREAD:
|
||||
current->thread.force_icache_flush = false;
|
||||
|
||||
set_icache_stale_mask();
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
switch (ctx) {
|
||||
case PR_RISCV_CTX_SW_FENCEI_ON:
|
||||
case PR_RISCV_CTX_SW_FENCEI_OFF:
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
@ -297,21 +298,23 @@ static inline void set_mm(struct mm_struct *prev,
|
||||
*
|
||||
* The "cpu" argument must be the current local CPU number.
|
||||
*/
|
||||
static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu)
|
||||
static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu,
|
||||
struct task_struct *task)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
cpumask_t *mask = &mm->context.icache_stale_mask;
|
||||
|
||||
if (cpumask_test_cpu(cpu, mask)) {
|
||||
cpumask_clear_cpu(cpu, mask);
|
||||
if (cpumask_test_and_clear_cpu(cpu, &mm->context.icache_stale_mask)) {
|
||||
/*
|
||||
* Ensure the remote hart's writes are visible to this hart.
|
||||
* This pairs with a barrier in flush_icache_mm.
|
||||
*/
|
||||
smp_mb();
|
||||
local_flush_icache_all();
|
||||
}
|
||||
|
||||
/*
|
||||
* If cache will be flushed in switch_to, no need to flush here.
|
||||
*/
|
||||
if (!(task && switch_to_should_flush_icache(task)))
|
||||
local_flush_icache_all();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -334,5 +337,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
|
||||
set_mm(prev, next, cpu);
|
||||
|
||||
flush_icache_deferred(next, cpu);
|
||||
flush_icache_deferred(next, cpu, task);
|
||||
}
|
||||
|
@ -493,6 +493,22 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
|
||||
__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
|
||||
}
|
||||
|
||||
/**
|
||||
* cpumask_assign_cpu - assign a cpu in a cpumask
|
||||
* @cpu: cpu number (< nr_cpu_ids)
|
||||
* @dstp: the cpumask pointer
|
||||
* @bool: the value to assign
|
||||
*/
|
||||
static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
|
||||
{
|
||||
assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
|
||||
}
|
||||
|
||||
static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
|
||||
{
|
||||
__assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
|
||||
}
|
||||
|
||||
/**
|
||||
* cpumask_test_cpu - test for a cpu in a cpumask
|
||||
* @cpu: cpu number (< nr_cpu_ids)
|
||||
|
@ -306,4 +306,10 @@ struct prctl_mm_map {
|
||||
# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
|
||||
# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
|
||||
|
||||
#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
|
||||
# define PR_RISCV_CTX_SW_FENCEI_ON 0
|
||||
# define PR_RISCV_CTX_SW_FENCEI_OFF 1
|
||||
# define PR_RISCV_SCOPE_PER_PROCESS 0
|
||||
# define PR_RISCV_SCOPE_PER_THREAD 1
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
@ -146,6 +146,9 @@
|
||||
#ifndef RISCV_V_GET_CONTROL
|
||||
# define RISCV_V_GET_CONTROL() (-EINVAL)
|
||||
#endif
|
||||
#ifndef RISCV_SET_ICACHE_FLUSH_CTX
|
||||
# define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* this is where the system-wide overflow UID and GID are defined, for
|
||||
@ -2757,6 +2760,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
case PR_RISCV_V_GET_CONTROL:
|
||||
error = RISCV_V_GET_CONTROL();
|
||||
break;
|
||||
case PR_RISCV_SET_ICACHE_FLUSH_CTX:
|
||||
error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3);
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user