- Add new infrastructure for reading TDX metadata

- Use the newly-available metadata to:
   - Disable potentially nasty #VE exceptions
   - Get more complete CPU topology information from the VMM
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmc9PCoACgkQaDWVMHDJ
 krDIjA//Wo2GVkPkviCdRxAsd0lWyKN52mVHPy1JAlT8H/idOpreMHod1Hn/Jga9
 UqSH2lKZe1JM9KcyOW/1xTVxlJtYxj+aIbDkIVj4qk84e34yOXlJXvBiXbNgscSU
 ZWDN6C20LJuv5ZNb7rA1hGRrPlPS8C5g74oBWQX6qAhBCPS6n9ir0wgixZUwV4kw
 eH3QWyVzhNOKzKqGcHY4+DCe5odn8VDxGXtC97NNRzg1kfu4s0OaCF4EpCAOhZgA
 V/K7eFJQdRsnXtQ0eOq9O6l04cd6aqEyYp76Y6FL9Ztfno6P/YopmfVwXL1wYoHr
 aXO/Qi0km+Vz8xXr6blHC/s/amiLHxn6Ou1ZqUmniOcZiYva9GfyUoHttegH4IcV
 /kDfG52A5T1rREPwakYPTMUhan04nTk7rbjwyXbATyRtbKatl5bbsli4kw0T2R5P
 laCPabvXv3eeBCk+PYPnuVCstfpN5VsLr4VUz3wSMoIEbq3l4lvO/yKaok+KCtyD
 qQjk3H09GDvlR7gWB20Tg5ajyOeoK99gD3qwtXZ7APvvAQ9MewN+fgj2z5QPYnrd
 Bea88gjsSYbJ1RYG68c378rRI0uM6RKXjXu3wSro+QN2oTuE7jfNUh8/SiDi/Ssi
 fdYTFcrVLA3/b9dEqhCOSCVyEifgLMDorAODrdf0oJDuCpeoa4E=
 =NIl1
 -----END PGP SIGNATURE-----

Merge tag 'x86_tdx_for_6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull tdx updates from Dave Hansen:
 "These essentially refine some interactions between TDX guests and
  VMMs.

  The first leverages a new TDX module feature to runtime disable the
  ability for a VM to inject #VE exceptions. Before this feature, there
  was only a static on/off switch and the guest had to panic if it was
  configured in a bad state.

  The second lets the guest opt in to be able to access the topology
  CPUID leaves. Before this, accesses to those leaves would #VE.

  For both of these, it would have been nicest to just change the
  default behavior, but some pesky "other" OSes evidently need to retain
  the legacy behavior.

  Summary:

   - Add new infrastructure for reading TDX metadata

   - Use the newly-available metadata to:
      - Disable potentially nasty #VE exceptions
      - Get more complete CPU topology information from the VMM"

* tag 'x86_tdx_for_6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tdx: Enable CPU topology enumeration
  x86/tdx: Dynamically disable SEPT violations from causing #VEs
  x86/tdx: Rename tdx_parse_tdinfo() to tdx_setup()
  x86/tdx: Introduce wrappers to read and write TD metadata
This commit is contained in:
Linus Torvalds 2024-11-22 13:07:19 -08:00
commit be4202228e
2 changed files with 128 additions and 23 deletions

View File

@ -78,6 +78,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
/* Read TD-scoped metadata */
static inline u64 tdg_vm_rd(u64 field, u64 *value)
{
struct tdx_module_args args = {
.rdx = field,
};
u64 ret;
ret = __tdcall_ret(TDG_VM_RD, &args);
*value = args.r8;
return ret;
}
/* Write TD-scoped metadata */
static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
{
struct tdx_module_args args = {
.rdx = field,
.r8 = value,
.r9 = mask,
};
return __tdcall(TDG_VM_WR, &args);
}
/**
* tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
* subtype 0) using TDG.MR.REPORT TDCALL.
@ -168,7 +194,87 @@ static void __noreturn tdx_panic(const char *msg)
__tdx_hypercall(&args);
}
static void tdx_parse_tdinfo(u64 *cc_mask)
/*
* The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
* that no #VE will be delivered for accesses to TD-private memory.
*
* TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
* controls if the guest will receive such #VE with TD attribute
* ATTR_SEPT_VE_DISABLE.
*
* Newer TDX modules allow the guest to control if it wants to receive SEPT
* violation #VEs.
*
* Check if the feature is available and disable SEPT #VE if possible.
*
* If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
* attribute is no longer reliable. It reflects the initial state of the
* control for the TD, but it will not be updated if someone (e.g. bootloader)
* changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
* determine if SEPT #VEs are enabled or disabled.
*/
static void disable_sept_ve(u64 td_attr)
{
const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
bool debug = td_attr & ATTR_DEBUG;
u64 config, controls;
/* Is this TD allowed to disable SEPT #VE */
tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
/* No SEPT #VE controls for the guest: check the attribute */
if (td_attr & ATTR_SEPT_VE_DISABLE)
return;
/* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
if (debug)
pr_warn("%s\n", msg);
else
tdx_panic(msg);
return;
}
/* Check if SEPT #VE has been disabled before us */
tdg_vm_rd(TDCS_TD_CTLS, &controls);
if (controls & TD_CTLS_PENDING_VE_DISABLE)
return;
/* Keep #VEs enabled for splats in debugging environments */
if (debug)
return;
/* Disable SEPT #VEs */
tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
TD_CTLS_PENDING_VE_DISABLE);
}
/*
* TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and
* 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs.
* In practice, this means that the kernel can only boot with a plain topology.
* Any complications will cause problems.
*
* The ENUM_TOPOLOGY feature allows the VMM to provide topology information.
* Enabling the feature eliminates topology-related #VEs: the TDX module
* virtualizes accesses to the CPUID leafs and the MSR.
*
* Enable ENUM_TOPOLOGY if it is available.
*/
static void enable_cpu_topology_enumeration(void)
{
u64 configured;
/* Has the VMM provided a valid topology configuration? */
tdg_vm_rd(TDCS_TOPOLOGY_ENUM_CONFIGURED, &configured);
if (!configured) {
pr_err("VMM did not configure X2APIC_IDs properly\n");
return;
}
tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY);
}
static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
unsigned int gpa_width;
@ -193,21 +299,13 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
gpa_width = args.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);
/*
* The kernel can not handle #VE's when accessing normal kernel
* memory. Ensure that no #VE will be delivered for accesses to
* TD-private memory. Only VMM-shared memory (MMIO) will #VE.
*/
td_attr = args.rdx;
if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";
/* Relax SEPT_VE_DISABLE check for debug TD. */
if (td_attr & ATTR_DEBUG)
pr_warn("%s\n", msg);
else
tdx_panic(msg);
}
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
disable_sept_ve(td_attr);
enable_cpu_topology_enumeration();
}
/*
@ -929,10 +1027,6 @@ static void tdx_kexec_finish(void)
void __init tdx_early_init(void)
{
struct tdx_module_args args = {
.rdx = TDCS_NOTIFY_ENABLES,
.r9 = -1ULL,
};
u64 cc_mask;
u32 eax, sig[3];
@ -947,11 +1041,11 @@ void __init tdx_early_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
cc_vendor = CC_VENDOR_INTEL;
tdx_parse_tdinfo(&cc_mask);
cc_set_mask(cc_mask);
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
tdcall(TDG_VM_WR, &args);
/* Configure the TD */
tdx_setup(&cc_mask);
cc_set_mask(cc_mask);
/*
* All bits above GPA width are reserved and kernel treats shared bit

View File

@ -16,10 +16,21 @@
#define TDG_VP_VEINFO_GET 3
#define TDG_MR_REPORT 4
#define TDG_MEM_PAGE_ACCEPT 6
#define TDG_VM_RD 7
#define TDG_VM_WR 8
/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
#define TDCS_CONFIG_FLAGS 0x1110000300000016
#define TDCS_TD_CTLS 0x1110000300000017
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019
/* TDCS_CONFIG_FLAGS bits */
#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
/* TDCS_TD_CTLS bits */
#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(0)
#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(1)
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001