mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 23:39:18 +00:00
503638e0ba
In 6.5, we removed the vmalloc fault path because that can't work (see [1] [2]). Then in order to make sure that new page table entries were seen by the page table walker, we had to preventively emit a sfence.vma on all harts [3] but this solution is very costly since it relies on IPI. And even there, we could end up in a loop of vmalloc faults if a vmalloc allocation is done in the IPI path (for example if it is traced, see [4]), which could result in a kernel stack overflow. Those preventive sfence.vma needed to be emitted because: - if the uarch caches invalid entries, the new mapping may not be observed by the page table walker and an invalidation may be needed. - if the uarch does not cache invalid entries, a reordered access could "miss" the new mapping and traps: in that case, we would actually only need to retry the access, no sfence.vma is required. So this patch removes those preventive sfence.vma and actually handles the possible (and unlikely) exceptions. And since the kernel stacks mappings lie in the vmalloc area, this handling must be done very early when the trap is taken, at the very beginning of handle_exception: this also rules out the vmalloc allocations in the fault path. Link: https://lore.kernel.org/linux-riscv/20230531093817.665799-1-bjorn@kernel.org/ [1] Link: https://lore.kernel.org/linux-riscv/20230801090927.2018653-1-dylan@andestech.com [2] Link: https://lore.kernel.org/linux-riscv/20230725132246.817726-1-alexghiti@rivosinc.com/ [3] Link: https://lore.kernel.org/lkml/20200508144043.13893-1-joro@8bytes.org/ [4] Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Yunhui Cui <cuiyunhui@bytedance.com> Link: https://lore.kernel.org/r/20240717060125.139416-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
102 lines
2.6 KiB
C
102 lines
2.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2015 Regents of the University of California
|
|
*/
|
|
|
|
#ifndef _ASM_RISCV_CACHEFLUSH_H
|
|
#define _ASM_RISCV_CACHEFLUSH_H
|
|
|
|
#include <linux/mm.h>
|
|
|
|
static inline void local_flush_icache_all(void)
|
|
{
|
|
asm volatile ("fence.i" ::: "memory");
|
|
}
|
|
|
|
static inline void local_flush_icache_range(unsigned long start,
|
|
unsigned long end)
|
|
{
|
|
local_flush_icache_all();
|
|
}
|
|
|
|
#define PG_dcache_clean PG_arch_1
|
|
|
|
static inline void flush_dcache_folio(struct folio *folio)
|
|
{
|
|
if (test_bit(PG_dcache_clean, &folio->flags))
|
|
clear_bit(PG_dcache_clean, &folio->flags);
|
|
}
|
|
#define flush_dcache_folio flush_dcache_folio
|
|
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
|
|
|
static inline void flush_dcache_page(struct page *page)
|
|
{
|
|
flush_dcache_folio(page_folio(page));
|
|
}
|
|
|
|
/*
|
|
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
|
|
* so instead we just flush the whole thing.
|
|
*/
|
|
#define flush_icache_range(start, end) flush_icache_all()
|
|
#define flush_icache_user_page(vma, pg, addr, len) \
|
|
do { \
|
|
if (vma->vm_flags & VM_EXEC) \
|
|
flush_icache_mm(vma->vm_mm, 0); \
|
|
} while (0)
|
|
|
|
#ifdef CONFIG_64BIT
|
|
extern u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1];
|
|
extern char _end[];
|
|
#define flush_cache_vmap flush_cache_vmap
|
|
static inline void flush_cache_vmap(unsigned long start, unsigned long end)
|
|
{
|
|
if (is_vmalloc_or_module_addr((void *)start)) {
|
|
int i;
|
|
|
|
/*
|
|
* We don't care if concurrently a cpu resets this value since
|
|
* the only place this can happen is in handle_exception() where
|
|
* an sfence.vma is emitted.
|
|
*/
|
|
for (i = 0; i < ARRAY_SIZE(new_vmalloc); ++i)
|
|
new_vmalloc[i] = -1ULL;
|
|
}
|
|
}
|
|
#define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end)
|
|
#endif
|
|
|
|
#ifndef CONFIG_SMP
|
|
|
|
#define flush_icache_all() local_flush_icache_all()
|
|
#define flush_icache_mm(mm, local) flush_icache_all()
|
|
|
|
#else /* CONFIG_SMP */
|
|
|
|
void flush_icache_all(void);
|
|
void flush_icache_mm(struct mm_struct *mm, bool local);
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
extern unsigned int riscv_cbom_block_size;
|
|
extern unsigned int riscv_cboz_block_size;
|
|
void riscv_init_cbo_blocksizes(void);
|
|
|
|
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
|
|
void riscv_noncoherent_supported(void);
|
|
void __init riscv_set_dma_cache_alignment(void);
|
|
#else
|
|
static inline void riscv_noncoherent_supported(void) {}
|
|
static inline void riscv_set_dma_cache_alignment(void) {}
|
|
#endif
|
|
|
|
/*
|
|
* Bits in sys_riscv_flush_icache()'s flags argument.
|
|
*/
|
|
#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
|
|
#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
|
|
|
|
#include <asm-generic/cacheflush.h>
|
|
|
|
#endif /* _ASM_RISCV_CACHEFLUSH_H */
|