diff options
24 files changed, 991 insertions, 167 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-faux-tdx-host b/Documentation/ABI/testing/sysfs-devices-faux-tdx-host new file mode 100644 index 0000000000000..c9cb273abf320 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-faux-tdx-host @@ -0,0 +1,26 @@ +What: /sys/devices/faux/tdx_host/version +Contact: linux-coco@lists.linux.dev +Description: (RO) Report the version of the loaded TDX module. + Formatted as "major.minor.update". Used by TDX module + update tooling. Example: "1.2.03". + +What: /sys/devices/faux/tdx_host/seamldr_version +Contact: linux-coco@lists.linux.dev +Description: (RO) Report the version of the loaded P-SEAMLDR. + Formatted as a TDX module version. Used by TDX module + update tooling. + +What: /sys/devices/faux/tdx_host/num_remaining_updates +Contact: linux-coco@lists.linux.dev +Description: (RO) Report the number of remaining updates. TDX maintains a + log about each TDX module that has been loaded. This log has + a finite size, which limits the number of TDX module updates + that can be performed. + + After each successful update, the number reduces by one. Once it + reaches zero, further updates will fail until next reboot. The + number is always zero if the P-SEAMLDR doesn't support updates. + + See Intel Trust Domain Extensions - SEAM Loader (SEAMLDR) + Interface Specification, Chapter "SEAMLDR_INFO" and Chapter + "SEAMLDR.INSTALL" for more information. diff --git a/Documentation/arch/x86/tdx.rst b/Documentation/arch/x86/tdx.rst index ff6b110291bc6..1a3b5bac10216 100644 --- a/Documentation/arch/x86/tdx.rst +++ b/Documentation/arch/x86/tdx.rst @@ -138,13 +138,6 @@ If the platform has such erratum, the kernel prints additional message in machine check handler to tell user the machine check may be caused by kernel bug on TDX private memory. -Kexec -~~~~~~~ - -Currently kexec doesn't work on the TDX platforms with the aforementioned -erratum. It fails when loading the kexec kernel image. Otherwise it -works normally. - Interaction vs S3 and deeper states ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1d506e5d6f46a..7b572bc24265c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -573,4 +573,5 @@ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ +#define X86_BUG_SEAMRET_INVD_VMCS X86_BUG( 1*32+11) /* "seamret_invd_vmcs" SEAMRET from P-SEAMLDR clears the current VMCS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/seamldr.h b/arch/x86/include/asm/seamldr.h new file mode 100644 index 0000000000000..cfc6a1b1a440b --- /dev/null +++ b/arch/x86/include/asm/seamldr.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SEAMLDR_H +#define _ASM_X86_SEAMLDR_H + +#include <linux/types.h> + +/* + * This is the "SEAMLDR_INFO" data structure defined in the + * "SEAM Loader (SEAMLDR) Interface Specification". + * + * Must be aligned to a 256-byte boundary. + */ +struct seamldr_info { + u32 version; + u32 attributes; + u32 vendor_id; + u32 build_date; + u16 build_num; + u16 minor_version; + u16 major_version; + u16 update_version; + u32 acm_x2apicid; + u32 num_remaining_updates; + u8 seam_info[128]; + u8 seam_ready; + u8 seam_debug; + u8 p_seam_ready; + u8 reserved[93]; +} __packed __aligned(256); + +static_assert(sizeof(struct seamldr_info) == 256); + +int seamldr_get_info(struct seamldr_info *seamldr_info); +int seamldr_install_module(const u8 *data, u32 data_len); +void seamldr_lock_module_update(void); +void seamldr_unlock_module_update(void); + +#endif /* _ASM_X86_SEAMLDR_H */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 049638e3da743..f20e91d7ac35b 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -4,6 +4,7 @@ #include <linux/bits.h> #include <linux/types.h> +#include <asm/shared/tdx_errno.h> #define TDX_HYPERCALL_STANDARD 0 diff --git a/arch/x86/kvm/vmx/tdx_errno.h b/arch/x86/include/asm/shared/tdx_errno.h index 6ff4672c41810..ee411b360e20d 100644 --- a/arch/x86/kvm/vmx/tdx_errno.h +++ b/arch/x86/include/asm/shared/tdx_errno.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* architectural status code for SEAMCALL */ - -#ifndef __KVM_X86_TDX_ERRNO_H -#define __KVM_X86_TDX_ERRNO_H +#ifndef _ASM_X86_SHARED_TDX_ERRNO_H +#define _ASM_X86_SHARED_TDX_ERRNO_H #define TDX_SEAMCALL_STATUS_MASK 0xFFFFFFFF00000000ULL @@ -14,6 +13,7 @@ #define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE 0x6000000500000000ULL #define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE 0x6000000700000000ULL #define TDX_INTERRUPTED_RESUMABLE 0x8000000300000000ULL +#define TDX_SYS_BUSY 0x8000020200000000ULL #define TDX_OPERAND_INVALID 0xC000010000000000ULL #define TDX_OPERAND_BUSY 0x8000020000000000ULL #define TDX_PREVIOUS_TLB_EPOCH_BUSY 0x8000020100000000ULL @@ -37,4 +37,4 @@ #define TDX_OPERAND_ID_SEPT 0x92 #define TDX_OPERAND_ID_TD_EPOCH 0xa9 -#endif /* __KVM_X86_TDX_ERRNO_H */ +#endif /* _ASM_X86_SHARED_TDX_ERRNO_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index a149740b24e8b..e5a9cf656c072 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -32,6 +32,10 @@ #define TDX_SUCCESS 0ULL #define TDX_RND_NO_ENTROPY 0x8000020300000000ULL +/* Bit definitions of TDX_FEATURES0 metadata field */ +#define TDX_FEATURES0_TD_PRESERVING BIT_ULL(1) +#define TDX_FEATURES0_NO_RBP_MOD BIT_ULL(18) + #ifndef __ASSEMBLER__ #include <uapi/asm/mce.h> @@ -39,6 +43,12 @@ #include <linux/pgtable.h> /* + * TDX module and P-SEAMLDR version convention: "major.minor.update" + * (e.g., "1.5.08") with zero-padded two-digit update field. + */ +#define TDX_VERSION_FMT "%u.%u.%02u" + +/* * Used by the #VE exception handler to gather the #VE exception * info from the TDX module. This is a software only structure * and not part of the TDX module/VMM ABI. @@ -97,57 +107,16 @@ static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, #endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */ #ifdef CONFIG_INTEL_TDX_HOST -u64 __seamcall(u64 fn, struct tdx_module_args *args); -u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); -u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); void tdx_init(void); +int tdx_cpu_enable(void); +const char *tdx_dump_mce_info(struct mce *m); +const struct tdx_sys_info *tdx_get_sysinfo(void); -#include <linux/preempt.h> -#include <asm/archrandom.h> -#include <asm/processor.h> - -typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); - -static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn, - struct tdx_module_args *args) -{ - lockdep_assert_preemption_disabled(); - - /* - * SEAMCALLs are made to the TDX module and can generate dirty - * cachelines of TDX private memory. Mark cache state incoherent - * so that the cache can be flushed during kexec. - * - * This needs to be done before actually making the SEAMCALL, - * because kexec-ing CPU could send NMI to stop remote CPUs, - * in which case even disabling IRQ won't help here. - */ - this_cpu_write(cache_state_incoherent, true); - - return func(fn, args); -} - -static __always_inline u64 sc_retry(sc_func_t func, u64 fn, - struct tdx_module_args *args) +static inline bool tdx_supports_runtime_update(const struct tdx_sys_info *sysinfo) { - int retry = RDRAND_RETRY_LOOPS; - u64 ret; - - do { - preempt_disable(); - ret = __seamcall_dirty_cache(func, fn, args); - preempt_enable(); - } while (ret == TDX_RND_NO_ENTROPY && --retry); - - return ret; + return sysinfo->features.tdx_features0 & TDX_FEATURES0_TD_PRESERVING; } -#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args)) -#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args)) -#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args)) -const char *tdx_dump_mce_info(struct mce *m); -const struct tdx_sys_info *tdx_get_sysinfo(void); - int tdx_guest_keyid_alloc(void); u32 tdx_get_nr_guest_keyids(void); void tdx_guest_keyid_free(unsigned int keyid); @@ -193,6 +162,8 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level) return level - 1; } +void tdx_sys_disable(void); + u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args); u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2); @@ -224,13 +195,8 @@ static inline void tdx_init(void) { } static inline u32 tdx_get_nr_guest_keyids(void) { return 0; } static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; } static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; } +static inline void tdx_sys_disable(void) { } #endif /* CONFIG_INTEL_TDX_HOST */ -#ifdef CONFIG_KEXEC_CORE -void tdx_cpu_flush_cache_for_kexec(void); -#else -static inline void tdx_cpu_flush_cache_for_kexec(void) { } -#endif - #endif /* !__ASSEMBLER__ */ #endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h index 40689c8dc67eb..41150d546589c 100644 --- a/arch/x86/include/asm/tdx_global_metadata.h +++ b/arch/x86/include/asm/tdx_global_metadata.h @@ -40,6 +40,10 @@ struct tdx_sys_info_td_conf { u64 cpuid_config_values[128][2]; }; +struct tdx_sys_info_handoff { + u16 module_hv; +}; + struct tdx_sys_info { struct tdx_sys_info_version version; struct tdx_sys_info_features features; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 37080382df548..49d8551d285d9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -147,6 +147,7 @@ struct vmcs { #define VMX_BASIC_INOUT BIT_ULL(54) #define VMX_BASIC_TRUE_CTLS BIT_ULL(55) #define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56) +#define VMX_BASIC_NO_SEAMRET_INVD_VMCS BIT_ULL(60) static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index cd796818d94d9..623d4474631a6 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -38,6 +38,7 @@ #include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/reboot.h> +#include <asm/tdx.h> #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> @@ -112,6 +113,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); + tdx_sys_disable(); x86_virt_emergency_disable_virtualization_cpu(); /* diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 0590d399d4f1f..c3f4a389992da 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -347,22 +347,6 @@ int machine_kexec_prepare(struct kimage *image) unsigned long reloc_end = (unsigned long)__relocate_kernel_end; int result; - /* - * Some early TDX-capable platforms have an erratum. A kernel - * partial write (a write transaction of less than cacheline - * lands at memory controller) to TDX private memory poisons that - * memory, and a subsequent read triggers a machine check. - * - * On those platforms the old kernel must reset TDX private - * memory before jumping to the new kernel otherwise the new - * kernel may see unexpected machine check. For simplicity - * just fail kexec/kdump on those platforms. - */ - if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) { - pr_info_once("Not allowed on platform with tdx_pw_mce bug\n"); - return -EOPNOTSUPP; - } - /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, __pa(control_page)); if (result) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 04ce321ebdf39..ed12805bbb444 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -440,16 +440,6 @@ void tdx_disable_virtualization_cpu(void) tdx_flush_vp(&arg); } local_irq_restore(flags); - - /* - * Flush cache now if kexec is possible: this is necessary to avoid - * having dirty private memory cachelines when the new kernel boots, - * but WBINVD is a relatively expensive operation and doing it during - * kexec can exacerbate races in native_stop_other_cpus(). Do it - * now, since this is a safe moment and there is going to be no more - * TDX activity on this CPU from this point on. - */ - tdx_cpu_flush_cache_for_kexec(); } #define TDX_SEAMCALL_RETRIES 10000 diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index b5cd2ffb303e5..ac8323a68b163 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -3,7 +3,6 @@ #define __KVM_X86_VMX_TDX_H #include "tdx_arch.h" -#include "tdx_errno.h" #ifdef CONFIG_KVM_INTEL_TDX #include "common.h" diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile index 90da47eb85eec..d1dbc5cc56978 100644 --- a/arch/x86/virt/vmx/tdx/Makefile +++ b/arch/x86/virt/vmx/tdx/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += seamcall.o tdx.o +obj-y += seamcall.o seamldr.o tdx.o diff --git a/arch/x86/virt/vmx/tdx/seamcall_internal.h b/arch/x86/virt/vmx/tdx/seamcall_internal.h new file mode 100644 index 0000000000000..be5f446467dfa --- /dev/null +++ b/arch/x86/virt/vmx/tdx/seamcall_internal.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * SEAMCALL utilities for TDX host-side operations. + * + * Provides convenient wrappers around SEAMCALL assembly with retry logic, + * error reporting and cache coherency tracking. + * + * Copyright (C) 2021-2023 Intel Corporation + */ + +#ifndef _X86_VIRT_SEAMCALL_INTERNAL_H +#define _X86_VIRT_SEAMCALL_INTERNAL_H + +#include <linux/printk.h> +#include <linux/types.h> +#include <asm/archrandom.h> +#include <asm/processor.h> +#include <asm/tdx.h> + +u64 __seamcall(u64 fn, struct tdx_module_args *args); +u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); +u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); + +typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); + +static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + lockdep_assert_preemption_disabled(); + + /* + * SEAMCALLs are made to the TDX module and can generate dirty + * cachelines of TDX private memory. Mark cache state incoherent + * so that the cache can be flushed during kexec. + * + * This needs to be done before actually making the SEAMCALL, + * because kexec-ing CPU could send NMI to stop remote CPUs, + * in which case even disabling IRQ won't help here. + */ + this_cpu_write(cache_state_incoherent, true); + + return func(fn, args); +} + +static __always_inline u64 sc_retry(sc_func_t func, u64 fn, + struct tdx_module_args *args) +{ + int retry = RDRAND_RETRY_LOOPS; + u64 ret; + + do { + preempt_disable(); + ret = __seamcall_dirty_cache(func, fn, args); + preempt_enable(); + } while (ret == TDX_RND_NO_ENTROPY && --retry); + + return ret; +} + +#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args)) +#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args)) +#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args)) + +typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args); + +static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args) +{ + pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err); +} + +static inline void seamcall_err_ret(u64 fn, u64 err, + struct tdx_module_args *args) +{ + seamcall_err(fn, err, args); + pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n", + args->rcx, args->rdx, args->r8); + pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n", + args->r9, args->r10, args->r11); +} + +static __always_inline int sc_retry_prerr(sc_func_t func, + sc_err_func_t err_func, + u64 fn, struct tdx_module_args *args) +{ + u64 sret = sc_retry(func, fn, args); + + if (sret == TDX_SUCCESS) + return 0; + + if (sret == TDX_SEAMCALL_VMFAILINVALID) + return -ENODEV; + + if (sret == TDX_SEAMCALL_GP) + return -EOPNOTSUPP; + + if (sret == TDX_SEAMCALL_UD) + return -EACCES; + + err_func(fn, sret, args); + return -EIO; +} + +#define seamcall_prerr(__fn, __args) \ + sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args)) + +#define seamcall_prerr_ret(__fn, __args) \ + sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args)) + +#endif /* _X86_VIRT_SEAMCALL_INTERNAL_H */ diff --git a/arch/x86/virt/vmx/tdx/seamldr.c b/arch/x86/virt/vmx/tdx/seamldr.c new file mode 100644 index 0000000000000..b1137ca6150d4 --- /dev/null +++ b/arch/x86/virt/vmx/tdx/seamldr.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * P-SEAMLDR support for TDX module management features like runtime updates + * + * Copyright (C) 2025 Intel Corporation + */ +#define pr_fmt(fmt) "seamldr: " fmt + +#include <linux/bug.h> +#include <linux/mm.h> +#include <linux/nmi.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stop_machine.h> + +#include <asm/cpufeature.h> +#include <asm/cpufeatures.h> +#include <asm/seamldr.h> + +#include "seamcall_internal.h" +#include "tdx.h" + +/* P-SEAMLDR SEAMCALL leaf function */ +#define P_SEAMLDR_INFO 0x8000000000000000 +#define P_SEAMLDR_INSTALL 0x8000000000000001 + +#define SEAMLDR_MAX_NR_MODULE_PAGES 496 +#define SEAMLDR_MAX_NR_SIG_PAGES 1 + +/* + * The seamldr_params "scenario" field specifies the operation mode: + * 0: Install TDX module from scratch (not used by kernel) + * 1: Update existing TDX module to a compatible version + */ +#define SEAMLDR_SCENARIO_UPDATE 1 + +/* + * This is the "SEAMLDR_PARAMS" data structure defined in the + * "SEAM Loader (SEAMLDR) Interface Specification". + * + * It is the in-memory ABI that the kernel passes to the P-SEAMLDR + * to update the TDX module. It breaks the TDX module image up in + * page-size pieces. + */ +struct seamldr_params { + u32 version; + u32 scenario; + u64 sigstruct_pages_pa_list[SEAMLDR_MAX_NR_SIG_PAGES]; + u8 reserved[104]; + u64 module_nr_pages; + u64 module_pages_pa_list[SEAMLDR_MAX_NR_MODULE_PAGES]; +} __packed; + +static_assert(sizeof(struct seamldr_params) == 4096); + +/* + * Serialize P-SEAMLDR calls since the hardware only allows a single CPU to + * interact with P-SEAMLDR simultaneously. Use raw version as the calls can + * be made with interrupts disabled, where plain spinlocks are prohibited in + * PREEMPT_RT kernels as they become sleeping locks. + */ +static DEFINE_RAW_SPINLOCK(seamldr_lock); + +static int seamldr_call(u64 fn, struct tdx_module_args *args) +{ + /* + * With this bug, P-SEAMLDR calls corrupt the VMCS + * pointer and must be avoided. This path should be + * unreachable since sysfs hides the ABIs. + */ + if (boot_cpu_has_bug(X86_BUG_SEAMRET_INVD_VMCS)) { + WARN_ON(1); + return -EINVAL; + } + + guard(raw_spinlock)(&seamldr_lock); + return seamcall_prerr(fn, args); +} + +int seamldr_get_info(struct seamldr_info *seamldr_info) +{ + struct tdx_module_args args = {}; + + /* + * Use slow_virt_to_phys() since @seamldr_info may be allocated on + * the stack. + */ + args.rcx = slow_virt_to_phys(seamldr_info); + return seamldr_call(P_SEAMLDR_INFO, &args); +} +EXPORT_SYMBOL_FOR_MODULES(seamldr_get_info, "tdx-host"); + +/* Call into P-SEAMLDR to install a TDX module update */ +static int seamldr_install(const struct seamldr_params *params) +{ + struct tdx_module_args args = {}; + + args.rcx = __pa(params); + return seamldr_call(P_SEAMLDR_INSTALL, &args); +} + +#define TDX_IMAGE_VERSION_2 0x200 + +/* First page of the on-disk module update image: */ +struct tdx_image_header { + u16 version; + u16 checksum; + u8 signature[8]; + u32 sigstruct_nr_pages; + u32 module_nr_pages; + u8 reserved[4076]; +} __packed; + +#define TDX_IMAGE_HEADER_SIZE sizeof(struct tdx_image_header) +static_assert(TDX_IMAGE_HEADER_SIZE == 4096); + +/* + * Intel TDX module update ABI structure. aka. "TDX module blob". + * This is the on-disk format that fw_upload lands in a kernel + * buffer. + * + * @payload contains sigstruct pages followed by module pages. + */ +struct tdx_image { + struct tdx_image_header header; + u8 payload[]; +}; + +/* + * Given a vmalloc() allocation, write all of the backing physical + * addresses to pa_list[]. Caller guarantees that the array is big + * enough. + */ +static void populate_pa_list(u64 *pa_list, const u8 *vmalloc_addr, u32 vmalloc_len_pages) +{ + int i; + + for (i = 0; i < vmalloc_len_pages; i++) { + unsigned long offset = i * PAGE_SIZE; + unsigned long pfn = vmalloc_to_pfn(&vmalloc_addr[offset]); + + pa_list[i] = pfn << PAGE_SHIFT; + } +} + +static void populate_seamldr_params(struct seamldr_params *params, + const u8 *sig, u32 sig_nr_pages, + const u8 *mod, u32 mod_nr_pages) +{ + params->version = 0; + params->scenario = SEAMLDR_SCENARIO_UPDATE; + params->module_nr_pages = mod_nr_pages; + + populate_pa_list(params->sigstruct_pages_pa_list, sig, sig_nr_pages); + populate_pa_list(params->module_pages_pa_list, mod, mod_nr_pages); +} + +/* + * @image points to a vmalloc()'d 'struct tdx_image'. Transform + * it into @params which is the P-SEAMLDR ABI format. + */ +static int init_seamldr_params(struct seamldr_params *params, + const struct tdx_image *image, + u32 image_len) +{ + const struct tdx_image_header *header = &image->header; + + u32 sigstruct_len = header->sigstruct_nr_pages * PAGE_SIZE; + u32 module_len = header->module_nr_pages * PAGE_SIZE; + + u8 *header_start = (u8 *)header; + u8 *header_end = header_start + TDX_IMAGE_HEADER_SIZE; + + u8 *sigstruct_start = header_end; + u8 *sigstruct_end = sigstruct_start + sigstruct_len; + + u8 *module_start = sigstruct_end; + + /* Check the calculated payload size against the image size. */ + if (TDX_IMAGE_HEADER_SIZE + sigstruct_len + module_len != image_len) + return -EINVAL; + + /* Reject unsupported tdx_image ABI versions. */ + if (header->version != TDX_IMAGE_VERSION_2) + return -EINVAL; + + if (header->sigstruct_nr_pages > SEAMLDR_MAX_NR_SIG_PAGES || + header->module_nr_pages > SEAMLDR_MAX_NR_MODULE_PAGES) + return -EINVAL; + + if (memcmp(header->signature, "TDX-BLOB", sizeof(header->signature))) + return -EINVAL; + + if (memchr_inv(header->reserved, 0, sizeof(header->reserved))) + return -EINVAL; + + populate_seamldr_params(params, sigstruct_start, header->sigstruct_nr_pages, + module_start, header->module_nr_pages); + return 0; +} + +/* + * During a TDX module update, all CPUs start from MODULE_UPDATE_START and + * progress to MODULE_UPDATE_DONE. Each state is associated with certain + * work. For some states, just one CPU needs to perform the work, while + * other CPUs just wait during those states. + */ +enum module_update_state { + MODULE_UPDATE_START, + MODULE_UPDATE_SHUTDOWN, + MODULE_UPDATE_CPU_INSTALL, + MODULE_UPDATE_CPU_INIT, + MODULE_UPDATE_RUN_UPDATE, + MODULE_UPDATE_DONE, +}; + +static struct update_ctrl { + enum module_update_state state; + int num_ack; + int num_failed; + /* + * Protect update_ctrl. Raw spinlock as it will be acquired from + * interrupt-disabled contexts. + */ + raw_spinlock_t lock; +} update_ctrl; + +/* Called with ctrl->lock held or during initialization. */ +static void __set_target_state(struct update_ctrl *ctrl, + enum module_update_state newstate) +{ + /* Reset ack counter. */ + ctrl->num_ack = 0; + ctrl->state = newstate; +} + +/* Last one to ack a state moves to the next state. */ +static void ack_state(struct update_ctrl *ctrl, int result) +{ + raw_spin_lock(&ctrl->lock); + + ctrl->num_failed += !!result; + ctrl->num_ack++; + if (ctrl->num_ack == num_online_cpus() && !ctrl->num_failed) + __set_target_state(ctrl, ctrl->state + 1); + + raw_spin_unlock(&ctrl->lock); +} + +static void init_state(struct update_ctrl *ctrl) +{ + raw_spin_lock_init(&ctrl->lock); + __set_target_state(ctrl, MODULE_UPDATE_START + 1); + ctrl->num_failed = 0; +} + +/* + * See multi_cpu_stop() from where this multi-cpu state-machine was + * adopted. + */ +static int do_seamldr_install_module(void *seamldr_params) +{ + enum module_update_state curstate = MODULE_UPDATE_START; + enum module_update_state newstate; + bool is_lead_cpu = false; + int ret = 0; + + /* + * Some steps must be run on exactly one CPU. Pick a "lead" CPU to + * execute those steps. Use CPU 0 because it is always online. + */ + if (smp_processor_id() == 0) + is_lead_cpu = true; + + do { + newstate = READ_ONCE(update_ctrl.state); + + if (curstate == newstate) { + cpu_relax(); + continue; + } + + curstate = newstate; + switch (curstate) { + case MODULE_UPDATE_SHUTDOWN: + if (is_lead_cpu) + ret = tdx_module_shutdown(); + break; + case MODULE_UPDATE_CPU_INSTALL: + ret = seamldr_install(seamldr_params); + break; + case MODULE_UPDATE_CPU_INIT: + ret = tdx_cpu_enable(); + break; + case MODULE_UPDATE_RUN_UPDATE: + if (is_lead_cpu) + ret = tdx_module_run_update(); + break; + default: + break; + } + + ack_state(&update_ctrl, ret); + } while (curstate != MODULE_UPDATE_DONE && + !READ_ONCE(update_ctrl.num_failed)); + + return ret; +} + +/** + * seamldr_install_module - Install a new TDX module. + * @data: Pointer to the TDX module image. + * @data_len: Size of the TDX module image. + * + * Returns 0 on success, negative error code on failure. + */ +int seamldr_install_module(const u8 *data, u32 data_len) +{ + struct seamldr_params *params; + const struct tdx_image *image; + int ret; + + /* + * init_seamldr_params() reads the header early. + * Ensure there is enough data to do at least that. + */ + if (data_len < TDX_IMAGE_HEADER_SIZE) + return -EINVAL; + + image = (const struct tdx_image *)data; + + params = kzalloc_obj(*params); + if (!params) + return -ENOMEM; + + /* Populate 'params' from 'image'. */ + ret = init_seamldr_params(params, image, data_len); + if (ret) + goto out; + + /* Ensure a stable set of online CPUs for the update process. */ + cpus_read_lock(); + init_state(&update_ctrl); + ret = stop_machine_cpuslocked(do_seamldr_install_module, params, + cpu_online_mask); + cpus_read_unlock(); + +out: + kfree(params); + return ret; +} +EXPORT_SYMBOL_FOR_MODULES(seamldr_install_module, "tdx-host"); + +/* + * stop_machine() does not interrupt preemption-disabled regions. + * Simply disabling preempt prevents updates. + */ +void seamldr_lock_module_update(void) +{ + preempt_disable(); +} +EXPORT_SYMBOL_FOR_MODULES(seamldr_lock_module_update, "tdx-host"); + +void seamldr_unlock_module_update(void) +{ + preempt_enable(); +} +EXPORT_SYMBOL_FOR_MODULES(seamldr_unlock_module_update, "tdx-host"); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index cb9b3210ab710..b15269b5941dc 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -37,12 +37,23 @@ #include <asm/msr.h> #include <asm/cpufeature.h> #include <asm/tdx.h> +#include <asm/shared/tdx_errno.h> #include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/mce.h> #include <asm/virt.h> +#include <asm/vmx.h> + +#include "seamcall_internal.h" #include "tdx.h" +struct tdx_module_state { + bool initialized; + bool sysinit_done; + int sysinit_ret; +}; + +static struct tdx_module_state tdx_module_state; static u32 tdx_global_keyid __ro_after_init; static u32 tdx_guest_keyid_start __ro_after_init; static u32 tdx_nr_guest_keyids __ro_after_init; @@ -56,53 +67,9 @@ static struct tdmr_info_list tdx_tdmr_list; /* All TDX-usable memory regions. Protected by mem_hotplug_lock. */ static LIST_HEAD(tdx_memlist); -static struct tdx_sys_info tdx_sysinfo __ro_after_init; -static bool tdx_module_initialized __ro_after_init; - -typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args); - -static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args) -{ - pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err); -} - -static inline void seamcall_err_ret(u64 fn, u64 err, - struct tdx_module_args *args) -{ - seamcall_err(fn, err, args); - pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n", - args->rcx, args->rdx, args->r8); - pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n", - args->r9, args->r10, args->r11); -} +static struct tdx_sys_info tdx_sysinfo; -static __always_inline int sc_retry_prerr(sc_func_t func, - sc_err_func_t err_func, - u64 fn, struct tdx_module_args *args) -{ - u64 sret = sc_retry(func, fn, args); - - if (sret == TDX_SUCCESS) - return 0; - - if (sret == TDX_SEAMCALL_VMFAILINVALID) - return -ENODEV; - - if (sret == TDX_SEAMCALL_GP) - return -EOPNOTSUPP; - - if (sret == TDX_SEAMCALL_UD) - return -EACCES; - - err_func(fn, sret, args); - return -EIO; -} - -#define seamcall_prerr(__fn, __args) \ - sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args)) - -#define seamcall_prerr_ret(__fn, __args) \ - sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args)) +static DEFINE_RAW_SPINLOCK(sysinit_lock); /* * Do the module global initialization once and return its result. @@ -111,31 +78,34 @@ static __always_inline int sc_retry_prerr(sc_func_t func, static int try_init_module_global(void) { struct tdx_module_args args = {}; - static DEFINE_RAW_SPINLOCK(sysinit_lock); - static bool sysinit_done; - static int sysinit_ret; + int ret; raw_spin_lock(&sysinit_lock); - if (sysinit_done) + /* Return the "cached" return code. */ + if (tdx_module_state.sysinit_done) { + ret = tdx_module_state.sysinit_ret; goto out; + } /* RCX is module attributes and all bits are reserved */ args.rcx = 0; - sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args); + ret = seamcall_prerr(TDH_SYS_INIT, &args); /* * The first SEAMCALL also detects the TDX module, thus * it can fail due to the TDX module is not loaded. * Dump message to let the user know. */ - if (sysinit_ret == -ENODEV) + if (ret == -ENODEV) pr_err("module not loaded\n"); - sysinit_done = true; + /* Save the return code for later callers. */ + tdx_module_state.sysinit_done = true; + tdx_module_state.sysinit_ret = ret; out: raw_spin_unlock(&sysinit_lock); - return sysinit_ret; + return ret; } /** @@ -143,7 +113,7 @@ out: * (and TDX module global initialization SEAMCALL if not done) on local cpu to * make this cpu be ready to run any other SEAMCALLs. */ -static int tdx_cpu_enable(void) +int tdx_cpu_enable(void) { struct tdx_module_args args = {}; int ret; @@ -184,6 +154,17 @@ static int tdx_online_cpu(unsigned int cpu) return ret; } +static void tdx_cpu_flush_cache(void) +{ + lockdep_assert_preemption_disabled(); + + if (!this_cpu_read(cache_state_incoherent)) + return; + + wbinvd(); + this_cpu_write(cache_state_incoherent, false); +} + static int tdx_offline_cpu(unsigned int cpu) { int i; @@ -220,17 +201,34 @@ static int tdx_offline_cpu(unsigned int cpu) return -EBUSY; done: + /* + * Flush cache on the CPU going offline to ensure no dirty + * cachelines of TDX private memory remain. This may be + * redundant with WBINVD done elsewhere during CPU offline + * (e.g. hlt_play_dead()), but do it explicitly for safety. + */ + tdx_cpu_flush_cache(); x86_virt_put_ref(X86_FEATURE_VMX); return 0; } static void tdx_shutdown_cpu(void *ign) { + /* + * Flush cache in preparation for kexec - this is necessary to avoid + * having dirty private memory cachelines when the new kernel boots, + * but WBINVD is a relatively expensive operation and doing it during + * kexec can exacerbate races in native_stop_other_cpus(). Do it + * now, since this is a safe moment and there is going to be no more + * TDX activity on this CPU from this point on. + */ + tdx_cpu_flush_cache(); x86_virt_put_ref(X86_FEATURE_VMX); } static void tdx_shutdown(void *ign) { + tdx_sys_disable(); on_each_cpu(tdx_shutdown_cpu, NULL, 1); } @@ -330,7 +328,7 @@ err: return ret; } -static __init int read_sys_metadata_field(u64 field_id, u64 *data) +static int read_sys_metadata_field(u64 field_id, u64 *data) { struct tdx_module_args args = {}; int ret; @@ -1270,12 +1268,70 @@ static __init int tdx_enable(void) register_syscore(&tdx_syscore); - tdx_module_initialized = true; + tdx_module_state.initialized = true; pr_info("TDX-Module initialized\n"); return 0; } subsys_initcall(tdx_enable); +int tdx_module_shutdown(void) +{ + struct tdx_sys_info_handoff handoff = {}; + struct tdx_module_args args = {}; + int ret; + int cpu; + + ret = get_tdx_sys_info_handoff(&handoff); + /* + * Handoff information is required for proper + * shutdown. Refuse to shut down without it. + */ + if (ret) + return ret; + + /* + * Use the module's handoff version as it is the highest the + * module can produce and most likely supported by newer modules. + */ + args.rcx = handoff.module_hv; + + ret = seamcall_prerr(TDH_SYS_SHUTDOWN, &args); + if (ret) + return ret; + + /* + * Clear global and per-CPU initialization flags so the new module + * can be fully re-initialized after a successful update. + * + * No locks needed as no concurrent accesses can occur here. + */ + memset(&tdx_module_state, 0, sizeof(tdx_module_state)); + for_each_possible_cpu(cpu) + per_cpu(tdx_lp_initialized, cpu) = false; + + return 0; +} + +int tdx_module_run_update(void) +{ + struct tdx_module_args args = {}; + int ret; + + ret = seamcall_prerr(TDH_SYS_UPDATE, &args); + if (ret) + return ret; + + ret = get_tdx_sys_info_version(&tdx_sysinfo.version); + /* + * Only fails if there is something unexpected + * and severely wrong with the module. + */ + WARN_ON_ONCE(ret); + + tdx_module_state.initialized = true; + return 0; +} + static bool is_pamt_page(unsigned long phys) { struct tdmr_info_list *tdmr_list = &tdx_tdmr_list; @@ -1453,6 +1509,8 @@ static struct notifier_block tdx_memory_nb = { static void __init check_tdx_erratum(void) { + u64 basic_msr; + /* * These CPUs have an erratum. A partial write from non-TD * software (e.g. via MOVNTI variants or UC/WC mapping) to TDX @@ -1464,6 +1522,14 @@ static void __init check_tdx_erratum(void) case INTEL_EMERALDRAPIDS_X: setup_force_cpu_bug(X86_BUG_TDX_PW_MCE); } + + /* + * Some TDX-capable CPUs have an erratum where the current VMCS is + * cleared after calling into P-SEAMLDR. + */ + rdmsrq(MSR_IA32_VMX_BASIC, basic_msr); + if (!(basic_msr & VMX_BASIC_NO_SEAMRET_INVD_VMCS)) + setup_force_cpu_bug(X86_BUG_SEAMRET_INVD_VMCS); } void __init tdx_init(void) @@ -1525,12 +1591,12 @@ void __init tdx_init(void) const struct tdx_sys_info *tdx_get_sysinfo(void) { - if (!tdx_module_initialized) + if (!tdx_module_state.initialized) return NULL; return (const struct tdx_sys_info *)&tdx_sysinfo; } -EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo); +EXPORT_SYMBOL_FOR_MODULES(tdx_get_sysinfo, "kvm-intel,tdx-host"); u32 tdx_get_nr_guest_keyids(void) { @@ -1921,21 +1987,32 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) } EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid); -#ifdef CONFIG_KEXEC_CORE -void tdx_cpu_flush_cache_for_kexec(void) +void tdx_sys_disable(void) { - lockdep_assert_preemption_disabled(); + struct tdx_module_args args = {}; + u64 ret; - if (!this_cpu_read(cache_state_incoherent)) - return; + /* + * Don't loop forever. + * + * - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between + * calls. + * + * - TDX_SYS_BUSY could be returned due to contention with other + * TDH.SYS.* SEAMCALLs, but will lock out *new* TDH.SYS.* SEAMCALLs, + * so that SYS.DISABLE can eventually make progress. + * + * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be + * run after this until a full reinitialization is done. + */ + do { + ret = seamcall(TDH_SYS_DISABLE, &args); + } while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY); /* - * Private memory cachelines need to be clean at the time of - * kexec. Write them back now, as the caller promises that - * there should be no more SEAMCALLs on this CPU. + * Print SEAMCALL failures, but not SW-defined error codes + * (SEAMCALL faulted with #GP/#UD, TDX not supported). */ - wbinvd(); - this_cpu_write(cache_state_incoherent, false); + if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR) + pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret); } -EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec); -#endif diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index dde219c823b41..bdfd0e1e337ac 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -46,6 +46,9 @@ #define TDH_PHYMEM_PAGE_WBINVD 41 #define TDH_VP_WR 43 #define TDH_SYS_CONFIG 45 +#define TDH_SYS_SHUTDOWN 52 +#define TDH_SYS_UPDATE 53 +#define TDH_SYS_DISABLE 69 /* * SEAMCALL leaf: @@ -84,9 +87,6 @@ struct tdmr_info { DECLARE_FLEX_ARRAY(struct tdmr_reserved_area, reserved_areas); } __packed __aligned(TDMR_INFO_ALIGNMENT); -/* Bit definitions of TDX_FEATURES0 metadata field */ -#define TDX_FEATURES0_NO_RBP_MOD BIT(18) - /* * Do not put any hardware-defined TDX structure representations below * this comment! @@ -110,4 +110,7 @@ struct tdmr_info_list { int max_tdmrs; /* How many 'tdmr_info's are allocated */ }; +int tdx_module_shutdown(void); +int tdx_module_run_update(void); + #endif diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c index c7db393a9cfb1..e49c300f23d43 100644 --- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c +++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c @@ -7,7 +7,7 @@ * Include this file to other C file instead. */ -static __init int get_tdx_sys_info_version(struct tdx_sys_info_version *sysinfo_version) +static int get_tdx_sys_info_version(struct tdx_sys_info_version *sysinfo_version) { int ret = 0; u64 val; @@ -100,13 +100,26 @@ static __init int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_ return ret; } +static int get_tdx_sys_info_handoff(struct tdx_sys_info_handoff *sysinfo_handoff) +{ + int ret; + u64 val; + + ret = read_sys_metadata_field(0x8900000100000000, &val); + if (ret) + return ret; + + sysinfo_handoff->module_hv = val; + return 0; +} + static __init int get_tdx_sys_info(struct tdx_sys_info *sysinfo) { int ret = 0; ret = ret ?: get_tdx_sys_info_version(&sysinfo->version); - pr_info("Module version: %u.%u.%02u\n", + pr_info("Module version: " TDX_VERSION_FMT "\n", sysinfo->version.major_version, sysinfo->version.minor_version, sysinfo->version.update_version); diff --git a/drivers/virt/coco/Kconfig b/drivers/virt/coco/Kconfig index df1cfaf26c658..f7691f64fbe32 100644 --- a/drivers/virt/coco/Kconfig +++ b/drivers/virt/coco/Kconfig @@ -17,5 +17,7 @@ source "drivers/virt/coco/arm-cca-guest/Kconfig" source "drivers/virt/coco/guest/Kconfig" endif +source "drivers/virt/coco/tdx-host/Kconfig" + config TSM bool diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile index cb52021912b34..b323b0ae4f82d 100644 --- a/drivers/virt/coco/Makefile +++ b/drivers/virt/coco/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_EFI_SECRET) += efi_secret/ obj-$(CONFIG_ARM_PKVM_GUEST) += pkvm-guest/ obj-$(CONFIG_SEV_GUEST) += sev-guest/ obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/ +obj-$(CONFIG_INTEL_TDX_HOST) += tdx-host/ obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/ obj-$(CONFIG_TSM) += tsm-core.o obj-$(CONFIG_TSM_GUEST) += guest/ diff --git a/drivers/virt/coco/tdx-host/Kconfig b/drivers/virt/coco/tdx-host/Kconfig new file mode 100644 index 0000000000000..57d0c01a43577 --- /dev/null +++ b/drivers/virt/coco/tdx-host/Kconfig @@ -0,0 +1,6 @@ +config TDX_HOST_SERVICES + tristate + depends on INTEL_TDX_HOST + select FW_LOADER + select FW_UPLOAD + default m diff --git a/drivers/virt/coco/tdx-host/Makefile b/drivers/virt/coco/tdx-host/Makefile new file mode 100644 index 0000000000000..e61e749a8dffb --- /dev/null +++ b/drivers/virt/coco/tdx-host/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TDX_HOST_SERVICES) += tdx-host.o diff --git a/drivers/virt/coco/tdx-host/tdx-host.c b/drivers/virt/coco/tdx-host/tdx-host.c new file mode 100644 index 0000000000000..d48952968e86c --- /dev/null +++ b/drivers/virt/coco/tdx-host/tdx-host.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * TDX host user interface driver + * + * Copyright (C) 2025 Intel Corporation + */ + +#include <linux/device/faux.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/sysfs.h> + +#include <asm/cpu_device_id.h> +#include <asm/seamldr.h> +#include <asm/tdx.h> + +static const struct x86_cpu_id tdx_host_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_TDX_HOST_PLATFORM, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, tdx_host_ids); + +static ssize_t version_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct tdx_sys_info *tdx_sysinfo = tdx_get_sysinfo(); + const struct tdx_sys_info_version *ver; + int ret; + + if (!tdx_sysinfo) + return -ENXIO; + + /* + * The version number can change during an update. + * Lock out updates while printing the version. + */ + seamldr_lock_module_update(); + + ver = &tdx_sysinfo->version; + ret = sysfs_emit(buf, TDX_VERSION_FMT "\n", ver->major_version, + ver->minor_version, + ver->update_version); + seamldr_unlock_module_update(); + + return ret; +} +static DEVICE_ATTR_RO(version); + +static struct attribute *tdx_host_attrs[] = { + &dev_attr_version.attr, + NULL, +}; + +static const struct attribute_group tdx_host_group = { + .attrs = tdx_host_attrs, +}; + +static ssize_t seamldr_version_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct seamldr_info info; + int ret; + + ret = seamldr_get_info(&info); + if (ret) + return ret; + + return sysfs_emit(buf, TDX_VERSION_FMT "\n", info.major_version, + info.minor_version, + info.update_version); +} + +static ssize_t num_remaining_updates_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct seamldr_info info; + int ret; + + ret = seamldr_get_info(&info); + if (ret) + return ret; + + return sysfs_emit(buf, "%u\n", info.num_remaining_updates); +} + +/* + * These attributes are intended for managing TDX module updates. Reading + * them issues a slow, serialized P-SEAMLDR query, so keep them admin-only. + */ +static DEVICE_ATTR_ADMIN_RO(seamldr_version); +static DEVICE_ATTR_ADMIN_RO(num_remaining_updates); + +static struct attribute *seamldr_attrs[] = { + &dev_attr_seamldr_version.attr, + &dev_attr_num_remaining_updates.attr, + NULL, +}; + +static bool supports_runtime_update(void) +{ + const struct tdx_sys_info *sysinfo = tdx_get_sysinfo(); + + if (!sysinfo) + return false; + + if (!tdx_supports_runtime_update(sysinfo)) + return false; + + /* + * This bug makes P-SEAMLDR calls clobber the current VMCS + * which breaks KVM. Avoid P-SEAMLDR calls by hiding all + * attributes if the CPU has this bug. + */ + if (boot_cpu_has_bug(X86_BUG_SEAMRET_INVD_VMCS)) + return false; + + return true; +} + +static umode_t seamldr_group_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + if (!supports_runtime_update()) + return 0; + + return attr->mode; +} + +static const struct attribute_group seamldr_group = { + .attrs = seamldr_attrs, + .is_visible = seamldr_group_visible, +}; + +static const struct attribute_group *tdx_host_groups[] = { + &tdx_host_group, + &seamldr_group, + NULL, +}; + +static enum fw_upload_err tdx_fw_prepare(struct fw_upload *fwl, + const u8 *data, u32 data_len) +{ + return FW_UPLOAD_ERR_NONE; +} + +static enum fw_upload_err tdx_fw_write(struct fw_upload *fwl, const u8 *data, + u32 offset, u32 data_len, u32 *written) +{ + int ret; + + ret = seamldr_install_module(data, data_len); + switch (ret) { + case 0: + *written = data_len; + return FW_UPLOAD_ERR_NONE; + default: + return FW_UPLOAD_ERR_FW_INVALID; + } +} + +static enum fw_upload_err tdx_fw_poll_complete(struct fw_upload *fwl) +{ + /* + * The upload completed during tdx_fw_write(). + * Never poll for completion. + */ + return FW_UPLOAD_ERR_NONE; +} + +static void tdx_fw_cancel(struct fw_upload *fwl) +{ + /* + * TDX module updates are not cancellable. + * Provide a no-op callback to satisfy fw_upload_ops. + */ +} + +static const struct fw_upload_ops tdx_fw_ops = { + .prepare = tdx_fw_prepare, + .write = tdx_fw_write, + .poll_complete = tdx_fw_poll_complete, + .cancel = tdx_fw_cancel, +}; + +static void seamldr_deinit(void *tdx_fwl) +{ + firmware_upload_unregister(tdx_fwl); +} + +static int seamldr_init(struct device *dev) +{ + struct fw_upload *tdx_fwl; + + if (!supports_runtime_update()) + return 0; + + tdx_fwl = firmware_upload_register(THIS_MODULE, dev, "tdx_module", + &tdx_fw_ops, NULL); + if (IS_ERR(tdx_fwl)) + return PTR_ERR(tdx_fwl); + + return devm_add_action_or_reset(dev, seamldr_deinit, tdx_fwl); +} + +static int tdx_host_probe(struct faux_device *fdev) +{ + return seamldr_init(&fdev->dev); +} + +static const struct faux_device_ops tdx_host_ops = { + .probe = tdx_host_probe, +}; + +static struct faux_device *fdev; + +static int __init tdx_host_init(void) +{ + if (!x86_match_cpu(tdx_host_ids) || !tdx_get_sysinfo()) + return -ENODEV; + + fdev = faux_device_create_with_groups(KBUILD_MODNAME, NULL, + &tdx_host_ops, + tdx_host_groups); + if (!fdev) + return -ENODEV; + + return 0; +} +module_init(tdx_host_init); + +static void __exit tdx_host_exit(void) +{ + faux_device_destroy(fdev); +} +module_exit(tdx_host_exit); + +MODULE_DESCRIPTION("TDX Host Services"); +MODULE_LICENSE("GPL"); |
