mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 02:36:21 +00:00
7fa0da5373
The hypercall page is no longer needed. It can be removed, as from the Xen perspective it is optional. But, from Linux's perspective, it removes naked RET instructions that escape the speculative protections that Call Depth Tracking and/or Untrain Ret are trying to achieve. This is part of XSA-466 / CVE-2024-53241. Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com>
392 lines
8.9 KiB
C
392 lines
8.9 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#define pr_fmt(fmt) "callthunks: " fmt
|
|
|
|
#include <linux/debugfs.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/memory.h>
|
|
#include <linux/moduleloader.h>
|
|
#include <linux/static_call.h>
|
|
|
|
#include <asm/alternative.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/cpu.h>
|
|
#include <asm/ftrace.h>
|
|
#include <asm/insn.h>
|
|
#include <asm/kexec.h>
|
|
#include <asm/nospec-branch.h>
|
|
#include <asm/paravirt.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/switch_to.h>
|
|
#include <asm/sync_core.h>
|
|
#include <asm/text-patching.h>
|
|
#include <asm/xen/hypercall.h>
|
|
|
|
static int __initdata_or_module debug_callthunks;
|
|
|
|
#define MAX_PATCH_LEN (255-1)
|
|
|
|
#define prdbg(fmt, args...) \
|
|
do { \
|
|
if (debug_callthunks) \
|
|
printk(KERN_DEBUG pr_fmt(fmt), ##args); \
|
|
} while(0)
|
|
|
|
static int __init debug_thunks(char *str)
|
|
{
|
|
debug_callthunks = 1;
|
|
return 1;
|
|
}
|
|
__setup("debug-callthunks", debug_thunks);
|
|
|
|
#ifdef CONFIG_CALL_THUNKS_DEBUG
|
|
DEFINE_PER_CPU(u64, __x86_call_count);
|
|
DEFINE_PER_CPU(u64, __x86_ret_count);
|
|
DEFINE_PER_CPU(u64, __x86_stuffs_count);
|
|
DEFINE_PER_CPU(u64, __x86_ctxsw_count);
|
|
EXPORT_PER_CPU_SYMBOL_GPL(__x86_ctxsw_count);
|
|
EXPORT_PER_CPU_SYMBOL_GPL(__x86_call_count);
|
|
#endif
|
|
|
|
extern s32 __call_sites[], __call_sites_end[];
|
|
|
|
struct core_text {
|
|
unsigned long base;
|
|
unsigned long end;
|
|
const char *name;
|
|
};
|
|
|
|
static bool thunks_initialized __ro_after_init;
|
|
|
|
static const struct core_text builtin_coretext = {
|
|
.base = (unsigned long)_text,
|
|
.end = (unsigned long)_etext,
|
|
.name = "builtin",
|
|
};
|
|
|
|
asm (
|
|
".pushsection .rodata \n"
|
|
".global skl_call_thunk_template \n"
|
|
"skl_call_thunk_template: \n"
|
|
__stringify(INCREMENT_CALL_DEPTH)" \n"
|
|
".global skl_call_thunk_tail \n"
|
|
"skl_call_thunk_tail: \n"
|
|
".popsection \n"
|
|
);
|
|
|
|
extern u8 skl_call_thunk_template[];
|
|
extern u8 skl_call_thunk_tail[];
|
|
|
|
#define SKL_TMPL_SIZE \
|
|
((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template))
|
|
|
|
extern void error_entry(void);
|
|
extern void xen_error_entry(void);
|
|
extern void paranoid_entry(void);
|
|
|
|
static inline bool within_coretext(const struct core_text *ct, void *addr)
|
|
{
|
|
unsigned long p = (unsigned long)addr;
|
|
|
|
return ct->base <= p && p < ct->end;
|
|
}
|
|
|
|
static inline bool within_module_coretext(void *addr)
|
|
{
|
|
bool ret = false;
|
|
|
|
#ifdef CONFIG_MODULES
|
|
struct module *mod;
|
|
|
|
preempt_disable();
|
|
mod = __module_address((unsigned long)addr);
|
|
if (mod && within_module_core((unsigned long)addr, mod))
|
|
ret = true;
|
|
preempt_enable();
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
static bool is_coretext(const struct core_text *ct, void *addr)
|
|
{
|
|
if (ct && within_coretext(ct, addr))
|
|
return true;
|
|
if (within_coretext(&builtin_coretext, addr))
|
|
return true;
|
|
return within_module_coretext(addr);
|
|
}
|
|
|
|
static bool skip_addr(void *dest)
|
|
{
|
|
if (dest == error_entry)
|
|
return true;
|
|
if (dest == paranoid_entry)
|
|
return true;
|
|
if (dest == xen_error_entry)
|
|
return true;
|
|
/* Does FILL_RSB... */
|
|
if (dest == __switch_to_asm)
|
|
return true;
|
|
/* Accounts directly */
|
|
if (dest == ret_from_fork)
|
|
return true;
|
|
#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT)
|
|
if (dest == soft_restart_cpu)
|
|
return true;
|
|
#endif
|
|
#ifdef CONFIG_FUNCTION_TRACER
|
|
if (dest == __fentry__)
|
|
return true;
|
|
#endif
|
|
#ifdef CONFIG_KEXEC_CORE
|
|
if (dest >= (void *)relocate_kernel &&
|
|
dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
|
|
return true;
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
static __init_or_module void *call_get_dest(void *addr)
|
|
{
|
|
struct insn insn;
|
|
void *dest;
|
|
int ret;
|
|
|
|
ret = insn_decode_kernel(&insn, addr);
|
|
if (ret)
|
|
return ERR_PTR(ret);
|
|
|
|
/* Patched out call? */
|
|
if (insn.opcode.bytes[0] != CALL_INSN_OPCODE)
|
|
return NULL;
|
|
|
|
dest = addr + insn.length + insn.immediate.value;
|
|
if (skip_addr(dest))
|
|
return NULL;
|
|
return dest;
|
|
}
|
|
|
|
static const u8 nops[] = {
|
|
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
|
|
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
|
|
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
|
|
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
|
|
};
|
|
|
|
static void *patch_dest(void *dest, bool direct)
|
|
{
|
|
unsigned int tsize = SKL_TMPL_SIZE;
|
|
u8 insn_buff[MAX_PATCH_LEN];
|
|
u8 *pad = dest - tsize;
|
|
|
|
memcpy(insn_buff, skl_call_thunk_template, tsize);
|
|
apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize);
|
|
|
|
/* Already patched? */
|
|
if (!bcmp(pad, insn_buff, tsize))
|
|
return pad;
|
|
|
|
/* Ensure there are nops */
|
|
if (bcmp(pad, nops, tsize)) {
|
|
pr_warn_once("Invalid padding area for %pS\n", dest);
|
|
return NULL;
|
|
}
|
|
|
|
if (direct)
|
|
memcpy(pad, insn_buff, tsize);
|
|
else
|
|
text_poke_copy_locked(pad, insn_buff, tsize, true);
|
|
return pad;
|
|
}
|
|
|
|
static __init_or_module void patch_call(void *addr, const struct core_text *ct)
|
|
{
|
|
void *pad, *dest;
|
|
u8 bytes[8];
|
|
|
|
if (!within_coretext(ct, addr))
|
|
return;
|
|
|
|
dest = call_get_dest(addr);
|
|
if (!dest || WARN_ON_ONCE(IS_ERR(dest)))
|
|
return;
|
|
|
|
if (!is_coretext(ct, dest))
|
|
return;
|
|
|
|
pad = patch_dest(dest, within_coretext(ct, dest));
|
|
if (!pad)
|
|
return;
|
|
|
|
prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr,
|
|
dest, dest, pad);
|
|
__text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE);
|
|
text_poke_early(addr, bytes, CALL_INSN_SIZE);
|
|
}
|
|
|
|
static __init_or_module void
|
|
patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
|
|
{
|
|
s32 *s;
|
|
|
|
for (s = start; s < end; s++)
|
|
patch_call((void *)s + *s, ct);
|
|
}
|
|
|
|
static __init_or_module void
|
|
patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end,
|
|
const struct core_text *ct)
|
|
{
|
|
struct alt_instr *a;
|
|
|
|
for (a = start; a < end; a++)
|
|
patch_call((void *)&a->instr_offset + a->instr_offset, ct);
|
|
}
|
|
|
|
static __init_or_module void
|
|
callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
|
|
{
|
|
prdbg("Patching call sites %s\n", ct->name);
|
|
patch_call_sites(cs->call_start, cs->call_end, ct);
|
|
patch_alt_call_sites(cs->alt_start, cs->alt_end, ct);
|
|
prdbg("Patching call sites done%s\n", ct->name);
|
|
}
|
|
|
|
void __init callthunks_patch_builtin_calls(void)
|
|
{
|
|
struct callthunk_sites cs = {
|
|
.call_start = __call_sites,
|
|
.call_end = __call_sites_end,
|
|
.alt_start = __alt_instructions,
|
|
.alt_end = __alt_instructions_end
|
|
};
|
|
|
|
if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
|
|
return;
|
|
|
|
pr_info("Setting up call depth tracking\n");
|
|
mutex_lock(&text_mutex);
|
|
callthunks_setup(&cs, &builtin_coretext);
|
|
thunks_initialized = true;
|
|
mutex_unlock(&text_mutex);
|
|
}
|
|
|
|
void *callthunks_translate_call_dest(void *dest)
|
|
{
|
|
void *target;
|
|
|
|
lockdep_assert_held(&text_mutex);
|
|
|
|
if (!thunks_initialized || skip_addr(dest))
|
|
return dest;
|
|
|
|
if (!is_coretext(NULL, dest))
|
|
return dest;
|
|
|
|
target = patch_dest(dest, false);
|
|
return target ? : dest;
|
|
}
|
|
|
|
#ifdef CONFIG_BPF_JIT
|
|
static bool is_callthunk(void *addr)
|
|
{
|
|
unsigned int tmpl_size = SKL_TMPL_SIZE;
|
|
u8 insn_buff[MAX_PATCH_LEN];
|
|
unsigned long dest;
|
|
u8 *pad;
|
|
|
|
dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
|
|
if (!thunks_initialized || skip_addr((void *)dest))
|
|
return false;
|
|
|
|
pad = (void *)(dest - tmpl_size);
|
|
|
|
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
|
|
apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size);
|
|
|
|
return !bcmp(pad, insn_buff, tmpl_size);
|
|
}
|
|
|
|
int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
|
|
{
|
|
unsigned int tmpl_size = SKL_TMPL_SIZE;
|
|
u8 insn_buff[MAX_PATCH_LEN];
|
|
|
|
if (!thunks_initialized)
|
|
return 0;
|
|
|
|
/* Is function call target a thunk? */
|
|
if (func && is_callthunk(func))
|
|
return 0;
|
|
|
|
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
|
|
apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size);
|
|
|
|
memcpy(*pprog, insn_buff, tmpl_size);
|
|
*pprog += tmpl_size;
|
|
return tmpl_size;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_MODULES
|
|
void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
|
|
struct module *mod)
|
|
{
|
|
struct core_text ct = {
|
|
.base = (unsigned long)mod->mem[MOD_TEXT].base,
|
|
.end = (unsigned long)mod->mem[MOD_TEXT].base + mod->mem[MOD_TEXT].size,
|
|
.name = mod->name,
|
|
};
|
|
|
|
if (!thunks_initialized)
|
|
return;
|
|
|
|
mutex_lock(&text_mutex);
|
|
callthunks_setup(cs, &ct);
|
|
mutex_unlock(&text_mutex);
|
|
}
|
|
#endif /* CONFIG_MODULES */
|
|
|
|
#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
|
|
static int callthunks_debug_show(struct seq_file *m, void *p)
|
|
{
|
|
unsigned long cpu = (unsigned long)m->private;
|
|
|
|
seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
|
|
per_cpu(__x86_call_count, cpu),
|
|
per_cpu(__x86_ret_count, cpu),
|
|
per_cpu(__x86_stuffs_count, cpu),
|
|
per_cpu(__x86_ctxsw_count, cpu));
|
|
return 0;
|
|
}
|
|
|
|
static int callthunks_debug_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, callthunks_debug_show, inode->i_private);
|
|
}
|
|
|
|
static const struct file_operations dfs_ops = {
|
|
.open = callthunks_debug_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
static int __init callthunks_debugfs_init(void)
|
|
{
|
|
struct dentry *dir;
|
|
unsigned long cpu;
|
|
|
|
dir = debugfs_create_dir("callthunks", NULL);
|
|
for_each_possible_cpu(cpu) {
|
|
void *arg = (void *)cpu;
|
|
char name [10];
|
|
|
|
sprintf(name, "cpu%lu", cpu);
|
|
debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
|
|
}
|
|
return 0;
|
|
}
|
|
__initcall(callthunks_debugfs_init);
|
|
#endif
|