mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
eb55307e67
- Limit the hardcoded topology quirk for Hygon CPUs to those which have a model ID less than 4. The newer models have the topology CPUID leaf 0xB correctly implemented and are not affected. - Make SMT control more robust against enumeration failures SMT control was added to allow controlling SMT at boottime or runtime. The primary purpose was to provide a simple mechanism to disable SMT in the light of speculation attack vectors. It turned out that the code is sensible to enumeration failures and worked only by chance for XEN/PV. XEN/PV has no real APIC enumeration which means the primary thread mask is not set up correctly. By chance a XEN/PV boot ends up with smp_num_siblings == 2, which makes the hotplug control stay at its default value "enabled". So the mask is never evaluated. The ongoing rework of the topology evaluation caused XEN/PV to end up with smp_num_siblings == 1, which sets the SMT control to "not supported" and the empty primary thread mask causes the hotplug core to deny the bringup of the APS. Make the decision logic more robust and take 'not supported' and 'not implemented' into account for the decision whether a CPU should be booted or not. - Fake primary thread mask for XEN/PV Pretend that all XEN/PV vCPUs are primary threads, which makes the usage of the primary thread mask valid on XEN/PV. That is consistent with because all of the topology information on XEN/PV is fake or even non-existent. - Encapsulate topology information in cpuinfo_x86 Move the randomly scattered topology data into a separate data structure for readability and as a preparatory step for the topology evaluation overhaul. - Consolidate APIC ID data type to u32 It's fixed width hardware data and not randomly u16, int, unsigned long or whatever developers decided to use. - Cure the abuse of cpuinfo for persisting logical IDs. Per CPU cpuinfo is used to persist the logical package and die IDs. That's really not the right place simply because cpuinfo is subject to be reinitialized when a CPU goes through an offline/online cycle. Use separate per CPU data for the persisting to enable the further topology management rework. It will be removed once the new topology management is in place. - Provide a debug interface for inspecting topology information Useful in general and extremly helpful for validating the topology management rework in terms of correctness or "bug" compatibility. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmU+yX0THHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoROUD/4vlvKEcpm9rbI5DzLcaq4DFHKbyEZF cQtzuOSM/9vTc9DHnuoNNLl9TWSYxiVYnejf3E21evfsqspYlzbTH8bId9XBCUid 6B68AJW842M2erNuwj0b0HwF1z++zpDmBDyhGOty/KQhoM8pYOHMvntAmbzJbuso Dgx6BLVFcboTy6RwlfRa0EE8f9W5V+JbmG/VBDpdyCInal7VrudoVFZmWQnPIft7 zwOJpAoehkp8OKq7geKDf79yWxu9a1sNPd62HtaVEvfHwehHqE6OaMLss1us+0vT SJ/D6gmRQBOwcXaZL0wL1dG7Km9Et4AisOvzhXGvTa5b2D5oljVoqJ7V7FTf5g3u y3aqWbeUJzERUbeJt1HoGVAKyA4GtZOvg+TNIysf6F1Z4khl9alfa9jiqjj4g1au zgItq/ZMBEBmJ7X4FxQUEUVBG2CDsEidyNBDRcimWQUDfBakV/iCs0suD8uu8ZOD K5jMx8Hi2+xFx7r1YqsfsyMBYOf/zUZw65RbNe+kI992JbJ9nhcODbnbo5MlAsyv vcqlK5FwXgZ4YAC8dZHU/tyTiqAW7oaOSkqKwTP5gcyNEqsjQHV//q6v+uqtjfYn 1C4oUsRHT2vJiV9ktNJTA4GQHIYF4geGgpG8Ih2SjXsSzdGtUd3DtX1iq0YiLEOk eHhYsnniqsYB5g== =xrz8 -----END PGP SIGNATURE----- Merge tag 'x86-core-2023-10-29-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 core updates from Thomas Gleixner: - Limit the hardcoded topology quirk for Hygon CPUs to those which have a model ID less than 4. The newer models have the topology CPUID leaf 0xB correctly implemented and are not affected. - Make SMT control more robust against enumeration failures SMT control was added to allow controlling SMT at boottime or runtime. The primary purpose was to provide a simple mechanism to disable SMT in the light of speculation attack vectors. It turned out that the code is sensible to enumeration failures and worked only by chance for XEN/PV. XEN/PV has no real APIC enumeration which means the primary thread mask is not set up correctly. By chance a XEN/PV boot ends up with smp_num_siblings == 2, which makes the hotplug control stay at its default value "enabled". So the mask is never evaluated. The ongoing rework of the topology evaluation caused XEN/PV to end up with smp_num_siblings == 1, which sets the SMT control to "not supported" and the empty primary thread mask causes the hotplug core to deny the bringup of the APS. Make the decision logic more robust and take 'not supported' and 'not implemented' into account for the decision whether a CPU should be booted or not. - Fake primary thread mask for XEN/PV Pretend that all XEN/PV vCPUs are primary threads, which makes the usage of the primary thread mask valid on XEN/PV. That is consistent with because all of the topology information on XEN/PV is fake or even non-existent. - Encapsulate topology information in cpuinfo_x86 Move the randomly scattered topology data into a separate data structure for readability and as a preparatory step for the topology evaluation overhaul. - Consolidate APIC ID data type to u32 It's fixed width hardware data and not randomly u16, int, unsigned long or whatever developers decided to use. - Cure the abuse of cpuinfo for persisting logical IDs. Per CPU cpuinfo is used to persist the logical package and die IDs. That's really not the right place simply because cpuinfo is subject to be reinitialized when a CPU goes through an offline/online cycle. Use separate per CPU data for the persisting to enable the further topology management rework. It will be removed once the new topology management is in place. - Provide a debug interface for inspecting topology information Useful in general and extremly helpful for validating the topology management rework in terms of correctness or "bug" compatibility. * tag 'x86-core-2023-10-29-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/apic, x86/hyperv: Use u32 in hv_snp_boot_ap() too x86/cpu: Provide debug interface x86/cpu/topology: Cure the abuse of cpuinfo for persisting logical ids x86/apic: Use u32 for wakeup_secondary_cpu[_64]() x86/apic: Use u32 for [gs]et_apic_id() x86/apic: Use u32 for phys_pkg_id() x86/apic: Use u32 for cpu_present_to_apicid() x86/apic: Use u32 for check_apicid_used() x86/apic: Use u32 for APIC IDs in global data x86/apic: Use BAD_APICID consistently x86/cpu: Move cpu_l[l2]c_id into topology info x86/cpu: Move logical package and die IDs into topology info x86/cpu: Remove pointless evaluation of x86_coreid_bits x86/cpu: Move cu_id into topology info x86/cpu: Move cpu_core_id into topology info hwmon: (fam15h_power) Use topology_core_id() scsi: lpfc: Use topology_core_id() x86/cpu: Move cpu_die_id into topology info x86/cpu: Move phys_proc_id into topology info x86/cpu: Encapsulate topology information in cpuinfo_x86 ...
233 lines
6.7 KiB
C
233 lines
6.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2023, Microsoft Corporation.
|
|
*
|
|
* Author:
|
|
* Saurabh Sengar <ssengar@microsoft.com>
|
|
*/
|
|
|
|
#include <asm/apic.h>
|
|
#include <asm/boot.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/i8259.h>
|
|
#include <asm/mshyperv.h>
|
|
#include <asm/realmode.h>
|
|
|
|
extern struct boot_params boot_params;
|
|
static struct real_mode_header hv_vtl_real_mode_header;
|
|
|
|
void __init hv_vtl_init_platform(void)
|
|
{
|
|
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
|
|
|
|
x86_platform.realmode_reserve = x86_init_noop;
|
|
x86_platform.realmode_init = x86_init_noop;
|
|
x86_init.irqs.pre_vector_init = x86_init_noop;
|
|
x86_init.timers.timer_init = x86_init_noop;
|
|
|
|
/* Avoid searching for BIOS MP tables */
|
|
x86_init.mpparse.find_smp_config = x86_init_noop;
|
|
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
|
|
|
|
x86_platform.get_wallclock = get_rtc_noop;
|
|
x86_platform.set_wallclock = set_rtc_noop;
|
|
x86_platform.get_nmi_reason = hv_get_nmi_reason;
|
|
|
|
x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT;
|
|
x86_platform.legacy.rtc = 0;
|
|
x86_platform.legacy.warm_reset = 0;
|
|
x86_platform.legacy.reserve_bios_regions = 0;
|
|
x86_platform.legacy.devices.pnpbios = 0;
|
|
}
|
|
|
|
static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc)
|
|
{
|
|
return ((u64)desc->base3 << 32) | ((u64)desc->base2 << 24) |
|
|
(desc->base1 << 16) | desc->base0;
|
|
}
|
|
|
|
static inline u32 hv_vtl_system_desc_limit(struct ldttss_desc *desc)
|
|
{
|
|
return ((u32)desc->limit1 << 16) | (u32)desc->limit0;
|
|
}
|
|
|
|
typedef void (*secondary_startup_64_fn)(void*, void*);
|
|
static void hv_vtl_ap_entry(void)
|
|
{
|
|
((secondary_startup_64_fn)secondary_startup_64)(&boot_params, &boot_params);
|
|
}
|
|
|
|
static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored)
|
|
{
|
|
u64 status;
|
|
int ret = 0;
|
|
struct hv_enable_vp_vtl *input;
|
|
unsigned long irq_flags;
|
|
|
|
struct desc_ptr gdt_ptr;
|
|
struct desc_ptr idt_ptr;
|
|
|
|
struct ldttss_desc *tss;
|
|
struct ldttss_desc *ldt;
|
|
struct desc_struct *gdt;
|
|
|
|
u64 rsp = current->thread.sp;
|
|
u64 rip = (u64)&hv_vtl_ap_entry;
|
|
|
|
native_store_gdt(&gdt_ptr);
|
|
store_idt(&idt_ptr);
|
|
|
|
gdt = (struct desc_struct *)((void *)(gdt_ptr.address));
|
|
tss = (struct ldttss_desc *)(gdt + GDT_ENTRY_TSS);
|
|
ldt = (struct ldttss_desc *)(gdt + GDT_ENTRY_LDT);
|
|
|
|
local_irq_save(irq_flags);
|
|
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
memset(input, 0, sizeof(*input));
|
|
|
|
input->partition_id = HV_PARTITION_ID_SELF;
|
|
input->vp_index = target_vp_index;
|
|
input->target_vtl.target_vtl = HV_VTL_MGMT;
|
|
|
|
/*
|
|
* The x86_64 Linux kernel follows the 16-bit -> 32-bit -> 64-bit
|
|
* mode transition sequence after waking up an AP with SIPI whose
|
|
* vector points to the 16-bit AP startup trampoline code. Here in
|
|
* VTL2, we can't perform that sequence as the AP has to start in
|
|
* the 64-bit mode.
|
|
*
|
|
* To make this happen, we tell the hypervisor to load a valid 64-bit
|
|
* context (most of which is just magic numbers from the CPU manual)
|
|
* so that AP jumps right to the 64-bit entry of the kernel, and the
|
|
* control registers are loaded with values that let the AP fetch the
|
|
* code and data and carry on with work it gets assigned.
|
|
*/
|
|
|
|
input->vp_context.rip = rip;
|
|
input->vp_context.rsp = rsp;
|
|
input->vp_context.rflags = 0x0000000000000002;
|
|
input->vp_context.efer = __rdmsr(MSR_EFER);
|
|
input->vp_context.cr0 = native_read_cr0();
|
|
input->vp_context.cr3 = __native_read_cr3();
|
|
input->vp_context.cr4 = native_read_cr4();
|
|
input->vp_context.msr_cr_pat = __rdmsr(MSR_IA32_CR_PAT);
|
|
input->vp_context.idtr.limit = idt_ptr.size;
|
|
input->vp_context.idtr.base = idt_ptr.address;
|
|
input->vp_context.gdtr.limit = gdt_ptr.size;
|
|
input->vp_context.gdtr.base = gdt_ptr.address;
|
|
|
|
/* Non-system desc (64bit), long, code, present */
|
|
input->vp_context.cs.selector = __KERNEL_CS;
|
|
input->vp_context.cs.base = 0;
|
|
input->vp_context.cs.limit = 0xffffffff;
|
|
input->vp_context.cs.attributes = 0xa09b;
|
|
/* Non-system desc (64bit), data, present, granularity, default */
|
|
input->vp_context.ss.selector = __KERNEL_DS;
|
|
input->vp_context.ss.base = 0;
|
|
input->vp_context.ss.limit = 0xffffffff;
|
|
input->vp_context.ss.attributes = 0xc093;
|
|
|
|
/* System desc (128bit), present, LDT */
|
|
input->vp_context.ldtr.selector = GDT_ENTRY_LDT * 8;
|
|
input->vp_context.ldtr.base = hv_vtl_system_desc_base(ldt);
|
|
input->vp_context.ldtr.limit = hv_vtl_system_desc_limit(ldt);
|
|
input->vp_context.ldtr.attributes = 0x82;
|
|
|
|
/* System desc (128bit), present, TSS, 0x8b - busy, 0x89 -- default */
|
|
input->vp_context.tr.selector = GDT_ENTRY_TSS * 8;
|
|
input->vp_context.tr.base = hv_vtl_system_desc_base(tss);
|
|
input->vp_context.tr.limit = hv_vtl_system_desc_limit(tss);
|
|
input->vp_context.tr.attributes = 0x8b;
|
|
|
|
status = hv_do_hypercall(HVCALL_ENABLE_VP_VTL, input, NULL);
|
|
|
|
if (!hv_result_success(status) &&
|
|
hv_result(status) != HV_STATUS_VTL_ALREADY_ENABLED) {
|
|
pr_err("HVCALL_ENABLE_VP_VTL failed for VP : %d ! [Err: %#llx\n]",
|
|
target_vp_index, status);
|
|
ret = -EINVAL;
|
|
goto free_lock;
|
|
}
|
|
|
|
status = hv_do_hypercall(HVCALL_START_VP, input, NULL);
|
|
|
|
if (!hv_result_success(status)) {
|
|
pr_err("HVCALL_START_VP failed for VP : %d ! [Err: %#llx]\n",
|
|
target_vp_index, status);
|
|
ret = -EINVAL;
|
|
}
|
|
|
|
free_lock:
|
|
local_irq_restore(irq_flags);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int hv_vtl_apicid_to_vp_id(u32 apic_id)
|
|
{
|
|
u64 control;
|
|
u64 status;
|
|
unsigned long irq_flags;
|
|
struct hv_get_vp_from_apic_id_in *input;
|
|
u32 *output, ret;
|
|
|
|
local_irq_save(irq_flags);
|
|
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
memset(input, 0, sizeof(*input));
|
|
input->partition_id = HV_PARTITION_ID_SELF;
|
|
input->apic_ids[0] = apic_id;
|
|
|
|
output = (u32 *)input;
|
|
|
|
control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
|
|
status = hv_do_hypercall(control, input, output);
|
|
ret = output[0];
|
|
|
|
local_irq_restore(irq_flags);
|
|
|
|
if (!hv_result_success(status)) {
|
|
pr_err("failed to get vp id from apic id %d, status %#llx\n",
|
|
apic_id, status);
|
|
return -EINVAL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
|
|
{
|
|
int vp_id;
|
|
|
|
pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid);
|
|
vp_id = hv_vtl_apicid_to_vp_id(apicid);
|
|
|
|
if (vp_id < 0) {
|
|
pr_err("Couldn't find CPU with APIC ID %d\n", apicid);
|
|
return -EINVAL;
|
|
}
|
|
if (vp_id > ms_hyperv.max_vp_index) {
|
|
pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid);
|
|
return -EINVAL;
|
|
}
|
|
|
|
return hv_vtl_bringup_vcpu(vp_id, start_eip);
|
|
}
|
|
|
|
int __init hv_vtl_early_init(void)
|
|
{
|
|
/*
|
|
* `boot_cpu_has` returns the runtime feature support,
|
|
* and here is the earliest it can be used.
|
|
*/
|
|
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
|
|
panic("XSAVE has to be disabled as it is not supported by this module.\n"
|
|
"Please add 'noxsave' to the kernel command line.\n");
|
|
|
|
real_mode_header = &hv_vtl_real_mode_header;
|
|
apic_update_callback(wakeup_secondary_cpu_64, hv_vtl_wakeup_secondary_cpu);
|
|
|
|
return 0;
|
|
}
|