mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 07:23:14 +00:00
- Remove superfluous EFI PGD range checks which lead to those assertions failing
with certain kernel configs and LLVM. - Disable setting breakpoints on facilities involved in #DB exception handling to avoid infinite loops. - Add extra serialization to non-serializing MSRs (IA32_TSC_DEADLINE and x2 APIC MSRs) to adhere to SDM's recommendation and avoid any theoretical issues. - Re-add the EPB MSR reading on turbostat so that it works on older kernels which don't have the corresponding EPB sysfs file. - Add Alder Lake to the list of CPUs which support split lock. - Fix %dr6 register handling in order to be able to set watchpoints with gdb again. - Disable CET instrumentation in the kernel so that gcc doesn't add ENDBR64 to kernel code and thus confuse tracing. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmAfwqsACgkQEsHwGGHe VUroWA//fVOzuJxG51vAh4QEFmV0QX5V3T5If1acDVhtg9hf+iHBiD0jwhl9l5lu CN3AmSBUzb1WFujRED/YD7ahW1IFuRe3nIXAEQ8DkMP4y8b9ry48LKPAVQkBX5Tq gCEUotRXBdUafLt1rnLUGVLKcL8pn65zRJc6nYTJfPYTd79wBPUlm89X6c0GJk7+ Zjv/Zt3r+SUe5f3e/M0hhphqKntpWwwvqcj2NczJxods/9lbhvw9jnDrC1FeN+Q9 d1gK56e1DY/iqezxU9B5V4jOmLtp3B7WpyrnyKEkQTUjuYryaiXaegxPrQ9Qv1Ej ZcsusN8LG/TeWrIF7mWhBDraO05Sgw0n+d9i4h89XUtRFB/DwQdNRN/l8YPknQW8 3b0AYxpAcvlZhA20N1NQc/uwqsOtb06LQ29BeZCTDA4JFG3qUAzKNaWBptoUFIA/ t/tq7DogJbcvKWKxyWeQq280w6uxDjki+ntY0Om95ZK2NgltpQuoiBHG0YjpbI4I DkuL/3Yck/aaM1TBVSab6145ki8vg+zIydvEmAH7JXkDiOZbIZAV2mtqN8NE7cuS PVZU3dt7GHhSc/xQW4EoRtqtgiRzADPGrrlDWPwwRVgvaMkjxpk+N3ycsFuPk7hL qQb26YJ5u14ntjvtfq0u53HQhriYGsa6JqwBHiNAZaN5Azo+1ws= =XwH4 -----END PGP SIGNATURE----- Merge tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Borislav Petkov: "I hope this is the last batch of x86/urgent updates for this round: - Remove superfluous EFI PGD range checks which lead to those assertions failing with certain kernel configs and LLVM. - Disable setting breakpoints on facilities involved in #DB exception handling to avoid infinite loops. - Add extra serialization to non-serializing MSRs (IA32_TSC_DEADLINE and x2 APIC MSRs) to adhere to SDM's recommendation and avoid any theoretical issues. - Re-add the EPB MSR reading on turbostat so that it works on older kernels which don't have the corresponding EPB sysfs file. - Add Alder Lake to the list of CPUs which support split lock. - Fix %dr6 register handling in order to be able to set watchpoints with gdb again. - Disable CET instrumentation in the kernel so that gcc doesn't add ENDBR64 to kernel code and thus confuse tracing" * tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/efi: Remove EFI PGD build time checks x86/debug: Prevent data breakpoints on cpu_dr7 x86/debug: Prevent data breakpoints on __per_cpu_offset x86/apic: Add extra serialization for non-serializing MSRs tools/power/turbostat: Fallback to an MSR read for EPB x86/split_lock: Enable the split lock feature on another Alder Lake CPU x86/debug: Fix DR6 handling x86/build: Disable CET instrumentation in the kernel
This commit is contained in:
commit
e24f9c5f6e
6
Makefile
6
Makefile
@ -949,12 +949,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
|
||||
# change __FILE__ to the relative path from the srctree
|
||||
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
|
||||
|
||||
# ensure -fcf-protection is disabled when using retpoline as it is
|
||||
# incompatible with -mindirect-branch=thunk-extern
|
||||
ifdef CONFIG_RETPOLINE
|
||||
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
|
||||
endif
|
||||
|
||||
# include additional Makefiles when needed
|
||||
include-y := scripts/Makefile.extrawarn
|
||||
include-$(CONFIG_KASAN) += scripts/Makefile.kasan
|
||||
|
@ -120,6 +120,9 @@ else
|
||||
|
||||
KBUILD_CFLAGS += -mno-red-zone
|
||||
KBUILD_CFLAGS += -mcmodel=kernel
|
||||
|
||||
# Intel CET isn't enabled in the kernel
|
||||
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
|
||||
endif
|
||||
|
||||
ifdef CONFIG_X86_X32
|
||||
|
@ -197,16 +197,6 @@ static inline bool apic_needs_pit(void) { return true; }
|
||||
#endif /* !CONFIG_X86_LOCAL_APIC */
|
||||
|
||||
#ifdef CONFIG_X86_X2APIC
|
||||
/*
|
||||
* Make previous memory operations globally visible before
|
||||
* sending the IPI through x2apic wrmsr. We need a serializing instruction or
|
||||
* mfence for this.
|
||||
*/
|
||||
static inline void x2apic_wrmsr_fence(void)
|
||||
{
|
||||
asm volatile("mfence" : : : "memory");
|
||||
}
|
||||
|
||||
static inline void native_apic_msr_write(u32 reg, u32 v)
|
||||
{
|
||||
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
|
||||
|
@ -84,4 +84,22 @@ do { \
|
||||
|
||||
#include <asm-generic/barrier.h>
|
||||
|
||||
/*
|
||||
* Make previous memory operations globally visible before
|
||||
* a WRMSR.
|
||||
*
|
||||
* MFENCE makes writes visible, but only affects load/store
|
||||
* instructions. WRMSR is unfortunately not a load/store
|
||||
* instruction and is unaffected by MFENCE. The LFENCE ensures
|
||||
* that the WRMSR is not reordered.
|
||||
*
|
||||
* Most WRMSRs are full serializing instructions themselves and
|
||||
* do not require this barrier. This is only required for the
|
||||
* IA32_TSC_DEADLINE and X2APIC MSRs.
|
||||
*/
|
||||
static inline void weak_wrmsr_fence(void)
|
||||
{
|
||||
asm volatile("mfence; lfence" : : : "memory");
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_BARRIER_H */
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/mpspec.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/proto.h>
|
||||
@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta,
|
||||
{
|
||||
u64 tsc;
|
||||
|
||||
/* This MSR is special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
|
||||
tsc = rdtsc();
|
||||
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
|
||||
return 0;
|
||||
|
@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector)
|
||||
{
|
||||
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
|
||||
}
|
||||
|
||||
@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
|
||||
unsigned long flags;
|
||||
u32 dest;
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
local_irq_save(flags);
|
||||
|
||||
tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
|
||||
|
@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector)
|
||||
{
|
||||
u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
|
||||
}
|
||||
|
||||
@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
|
||||
unsigned long this_cpu;
|
||||
unsigned long flags;
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which)
|
||||
{
|
||||
unsigned long cfg = __prepare_ICR(which, vector, 0);
|
||||
|
||||
x2apic_wrmsr_fence();
|
||||
/* x2apic MSRs are special and need a special fence: */
|
||||
weak_wrmsr_fence();
|
||||
native_x2apic_icr_write(cfg, 0);
|
||||
}
|
||||
|
||||
|
@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
|
||||
CPU_ENTRY_AREA_TOTAL_SIZE))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
|
||||
* GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
if (within_area(addr, end, (unsigned long)__per_cpu_offset,
|
||||
sizeof(unsigned long) * nr_cpu_ids))
|
||||
return true;
|
||||
#else
|
||||
if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
|
||||
sizeof(pcpu_unit_offsets)))
|
||||
return true;
|
||||
#endif
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
/* The original rw GDT is being used after load_direct_gdt() */
|
||||
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
|
||||
@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
|
||||
(unsigned long)&per_cpu(cpu_tlbstate, cpu),
|
||||
sizeof(struct tlb_state)))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
|
||||
* will read per-cpu cpu_dr7 before clear dr7 register.
|
||||
*/
|
||||
if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
|
||||
sizeof(cpu_dr7)))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
struct perf_event *bp;
|
||||
unsigned long *dr6_p;
|
||||
unsigned long dr6;
|
||||
bool bpx;
|
||||
|
||||
/* The DR6 value is pointed by args->err */
|
||||
dr6_p = (unsigned long *)ERR_PTR(args->err);
|
||||
dr6 = *dr6_p;
|
||||
|
||||
/* If it's a single step, TRAP bits are random */
|
||||
if (dr6 & DR_STEP)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* Do an early return if no trap bits are set in DR6 */
|
||||
if ((dr6 & DR_TRAP_BITS) == 0)
|
||||
return NOTIFY_DONE;
|
||||
@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
if (likely(!(dr6 & (DR_TRAP0 << i))))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The counter may be concurrently released but that can only
|
||||
* occur from a call_rcu() path. We can then safely fetch
|
||||
* the breakpoint, use its callback, touch its counter
|
||||
* while we are in an rcu_read_lock() path.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
bp = this_cpu_read(bp_per_reg[i]);
|
||||
if (!bp)
|
||||
continue;
|
||||
|
||||
bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
|
||||
|
||||
/*
|
||||
* TF and data breakpoints are traps and can be merged, however
|
||||
* instruction breakpoints are faults and will be raised
|
||||
* separately.
|
||||
*
|
||||
* However DR6 can indicate both TF and instruction
|
||||
* breakpoints. In that case take TF as that has precedence and
|
||||
* delay the instruction breakpoint for the next exception.
|
||||
*/
|
||||
if (bpx && (dr6 & DR_STEP))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Reset the 'i'th TRAP bit in dr6 to denote completion of
|
||||
* exception handling
|
||||
*/
|
||||
(*dr6_p) &= ~(DR_TRAP0 << i);
|
||||
/*
|
||||
* bp can be NULL due to lazy debug register switching
|
||||
* or due to concurrent perf counter removing.
|
||||
*/
|
||||
if (!bp) {
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
}
|
||||
|
||||
perf_bp_event(bp, args->regs);
|
||||
|
||||
@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args)
|
||||
* Set up resume flag to avoid breakpoint recursion when
|
||||
* returning back to origin.
|
||||
*/
|
||||
if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
|
||||
if (bpx)
|
||||
args->regs->flags |= X86_EFLAGS_RF;
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Further processing in do_debug() is needed for a) user-space
|
||||
* breakpoints (to generate signals) and b) when the system has
|
||||
|
@ -115,31 +115,12 @@ void efi_sync_low_kernel_mappings(void)
|
||||
pud_t *pud_k, *pud_efi;
|
||||
pgd_t *efi_pgd = efi_mm.pgd;
|
||||
|
||||
/*
|
||||
* We can share all PGD entries apart from the one entry that
|
||||
* covers the EFI runtime mapping space.
|
||||
*
|
||||
* Make sure the EFI runtime region mappings are guaranteed to
|
||||
* only span a single PGD entry and that the entry also maps
|
||||
* other important kernel regions.
|
||||
*/
|
||||
MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
|
||||
MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
|
||||
(EFI_VA_END & PGDIR_MASK));
|
||||
|
||||
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
|
||||
pgd_k = pgd_offset_k(PAGE_OFFSET);
|
||||
|
||||
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
|
||||
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
|
||||
|
||||
/*
|
||||
* As with PGDs, we share all P4D entries apart from the one entry
|
||||
* that covers the EFI runtime mapping space.
|
||||
*/
|
||||
BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
|
||||
BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
|
||||
|
||||
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
|
||||
pgd_k = pgd_offset_k(EFI_VA_END);
|
||||
p4d_efi = p4d_offset(pgd_efi, 0);
|
||||
|
@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
|
||||
int get_epb(int cpu)
|
||||
{
|
||||
char path[128 + PATH_BYTES];
|
||||
unsigned long long msr;
|
||||
int ret, epb = -1;
|
||||
FILE *fp;
|
||||
|
||||
sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
|
||||
|
||||
fp = fopen_or_die(path, "r");
|
||||
fp = fopen(path, "r");
|
||||
if (!fp)
|
||||
goto msr_fallback;
|
||||
|
||||
ret = fscanf(fp, "%d", &epb);
|
||||
if (ret != 1)
|
||||
@ -1848,6 +1851,11 @@ int get_epb(int cpu)
|
||||
fclose(fp);
|
||||
|
||||
return epb;
|
||||
|
||||
msr_fallback:
|
||||
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
|
||||
|
||||
return msr & 0xf;
|
||||
}
|
||||
|
||||
void get_apic_id(struct thread_data *t)
|
||||
|
Loading…
Reference in New Issue
Block a user