aboutsummaryrefslogtreecommitdiffstats
diff options
-rw-r--r--Documentation/ABI/testing/sysfs-devices-faux-tdx-host26
-rw-r--r--Documentation/arch/x86/tdx.rst7
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/seamldr.h38
-rw-r--r--arch/x86/include/asm/shared/tdx.h1
-rw-r--r--arch/x86/include/asm/shared/tdx_errno.h (renamed from arch/x86/kvm/vmx/tdx_errno.h)8
-rw-r--r--arch/x86/include/asm/tdx.h70
-rw-r--r--arch/x86/include/asm/tdx_global_metadata.h4
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c16
-rw-r--r--arch/x86/kvm/vmx/tdx.c10
-rw-r--r--arch/x86/kvm/vmx/tdx.h1
-rw-r--r--arch/x86/virt/vmx/tdx/Makefile2
-rw-r--r--arch/x86/virt/vmx/tdx/seamcall_internal.h109
-rw-r--r--arch/x86/virt/vmx/tdx/seamldr.c368
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c219
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.h9
-rw-r--r--arch/x86/virt/vmx/tdx/tdx_global_metadata.c17
-rw-r--r--drivers/virt/coco/Kconfig2
-rw-r--r--drivers/virt/coco/Makefile1
-rw-r--r--drivers/virt/coco/tdx-host/Kconfig6
-rw-r--r--drivers/virt/coco/tdx-host/Makefile1
-rw-r--r--drivers/virt/coco/tdx-host/tdx-host.c239
24 files changed, 991 insertions, 167 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-faux-tdx-host b/Documentation/ABI/testing/sysfs-devices-faux-tdx-host
new file mode 100644
index 0000000000000..c9cb273abf320
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-faux-tdx-host
@@ -0,0 +1,26 @@
+What: /sys/devices/faux/tdx_host/version
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Report the version of the loaded TDX module.
+ Formatted as "major.minor.update". Used by TDX module
+ update tooling. Example: "1.2.03".
+
+What: /sys/devices/faux/tdx_host/seamldr_version
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Report the version of the loaded P-SEAMLDR.
+ Formatted as a TDX module version. Used by TDX module
+ update tooling.
+
+What: /sys/devices/faux/tdx_host/num_remaining_updates
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Report the number of remaining updates. TDX maintains a
+ log about each TDX module that has been loaded. This log has
+ a finite size, which limits the number of TDX module updates
+ that can be performed.
+
+ After each successful update, the number reduces by one. Once it
+ reaches zero, further updates will fail until next reboot. The
+ number is always zero if the P-SEAMLDR doesn't support updates.
+
+ See Intel Trust Domain Extensions - SEAM Loader (SEAMLDR)
+ Interface Specification, Chapter "SEAMLDR_INFO" and Chapter
+ "SEAMLDR.INSTALL" for more information.
diff --git a/Documentation/arch/x86/tdx.rst b/Documentation/arch/x86/tdx.rst
index ff6b110291bc6..1a3b5bac10216 100644
--- a/Documentation/arch/x86/tdx.rst
+++ b/Documentation/arch/x86/tdx.rst
@@ -138,13 +138,6 @@ If the platform has such erratum, the kernel prints additional message in
machine check handler to tell user the machine check may be caused by
kernel bug on TDX private memory.
-Kexec
-~~~~~~~
-
-Currently kexec doesn't work on the TDX platforms with the aforementioned
-erratum. It fails when loading the kexec kernel image. Otherwise it
-works normally.
-
Interaction vs S3 and deeper states
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1d506e5d6f46a..7b572bc24265c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -573,4 +573,5 @@
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
+#define X86_BUG_SEAMRET_INVD_VMCS X86_BUG( 1*32+11) /* "seamret_invd_vmcs" SEAMRET from P-SEAMLDR clears the current VMCS */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/seamldr.h b/arch/x86/include/asm/seamldr.h
new file mode 100644
index 0000000000000..cfc6a1b1a440b
--- /dev/null
+++ b/arch/x86/include/asm/seamldr.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SEAMLDR_H
+#define _ASM_X86_SEAMLDR_H
+
+#include <linux/types.h>
+
+/*
+ * This is the "SEAMLDR_INFO" data structure defined in the
+ * "SEAM Loader (SEAMLDR) Interface Specification".
+ *
+ * Must be aligned to a 256-byte boundary.
+ */
+struct seamldr_info {
+ u32 version;
+ u32 attributes;
+ u32 vendor_id;
+ u32 build_date;
+ u16 build_num;
+ u16 minor_version;
+ u16 major_version;
+ u16 update_version;
+ u32 acm_x2apicid;
+ u32 num_remaining_updates;
+ u8 seam_info[128];
+ u8 seam_ready;
+ u8 seam_debug;
+ u8 p_seam_ready;
+ u8 reserved[93];
+} __packed __aligned(256);
+
+static_assert(sizeof(struct seamldr_info) == 256);
+
+int seamldr_get_info(struct seamldr_info *seamldr_info);
+int seamldr_install_module(const u8 *data, u32 data_len);
+void seamldr_lock_module_update(void);
+void seamldr_unlock_module_update(void);
+
+#endif /* _ASM_X86_SEAMLDR_H */
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 049638e3da743..f20e91d7ac35b 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -4,6 +4,7 @@
#include <linux/bits.h>
#include <linux/types.h>
+#include <asm/shared/tdx_errno.h>
#define TDX_HYPERCALL_STANDARD 0
diff --git a/arch/x86/kvm/vmx/tdx_errno.h b/arch/x86/include/asm/shared/tdx_errno.h
index 6ff4672c41810..ee411b360e20d 100644
--- a/arch/x86/kvm/vmx/tdx_errno.h
+++ b/arch/x86/include/asm/shared/tdx_errno.h
@@ -1,8 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* architectural status code for SEAMCALL */
-
-#ifndef __KVM_X86_TDX_ERRNO_H
-#define __KVM_X86_TDX_ERRNO_H
+#ifndef _ASM_X86_SHARED_TDX_ERRNO_H
+#define _ASM_X86_SHARED_TDX_ERRNO_H
#define TDX_SEAMCALL_STATUS_MASK 0xFFFFFFFF00000000ULL
@@ -14,6 +13,7 @@
#define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE 0x6000000500000000ULL
#define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE 0x6000000700000000ULL
#define TDX_INTERRUPTED_RESUMABLE 0x8000000300000000ULL
+#define TDX_SYS_BUSY 0x8000020200000000ULL
#define TDX_OPERAND_INVALID 0xC000010000000000ULL
#define TDX_OPERAND_BUSY 0x8000020000000000ULL
#define TDX_PREVIOUS_TLB_EPOCH_BUSY 0x8000020100000000ULL
@@ -37,4 +37,4 @@
#define TDX_OPERAND_ID_SEPT 0x92
#define TDX_OPERAND_ID_TD_EPOCH 0xa9
-#endif /* __KVM_X86_TDX_ERRNO_H */
+#endif /* _ASM_X86_SHARED_TDX_ERRNO_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index a149740b24e8b..e5a9cf656c072 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -32,6 +32,10 @@
#define TDX_SUCCESS 0ULL
#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
+/* Bit definitions of TDX_FEATURES0 metadata field */
+#define TDX_FEATURES0_TD_PRESERVING BIT_ULL(1)
+#define TDX_FEATURES0_NO_RBP_MOD BIT_ULL(18)
+
#ifndef __ASSEMBLER__
#include <uapi/asm/mce.h>
@@ -39,6 +43,12 @@
#include <linux/pgtable.h>
/*
+ * TDX module and P-SEAMLDR version convention: "major.minor.update"
+ * (e.g., "1.5.08") with zero-padded two-digit update field.
+ */
+#define TDX_VERSION_FMT "%u.%u.%02u"
+
+/*
* Used by the #VE exception handler to gather the #VE exception
* info from the TDX module. This is a software only structure
* and not part of the TDX module/VMM ABI.
@@ -97,57 +107,16 @@ static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
#endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */
#ifdef CONFIG_INTEL_TDX_HOST
-u64 __seamcall(u64 fn, struct tdx_module_args *args);
-u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
-u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
void tdx_init(void);
+int tdx_cpu_enable(void);
+const char *tdx_dump_mce_info(struct mce *m);
+const struct tdx_sys_info *tdx_get_sysinfo(void);
-#include <linux/preempt.h>
-#include <asm/archrandom.h>
-#include <asm/processor.h>
-
-typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
-
-static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
- struct tdx_module_args *args)
-{
- lockdep_assert_preemption_disabled();
-
- /*
- * SEAMCALLs are made to the TDX module and can generate dirty
- * cachelines of TDX private memory. Mark cache state incoherent
- * so that the cache can be flushed during kexec.
- *
- * This needs to be done before actually making the SEAMCALL,
- * because kexec-ing CPU could send NMI to stop remote CPUs,
- * in which case even disabling IRQ won't help here.
- */
- this_cpu_write(cache_state_incoherent, true);
-
- return func(fn, args);
-}
-
-static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
- struct tdx_module_args *args)
+static inline bool tdx_supports_runtime_update(const struct tdx_sys_info *sysinfo)
{
- int retry = RDRAND_RETRY_LOOPS;
- u64 ret;
-
- do {
- preempt_disable();
- ret = __seamcall_dirty_cache(func, fn, args);
- preempt_enable();
- } while (ret == TDX_RND_NO_ENTROPY && --retry);
-
- return ret;
+ return sysinfo->features.tdx_features0 & TDX_FEATURES0_TD_PRESERVING;
}
-#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args))
-#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args))
-#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args))
-const char *tdx_dump_mce_info(struct mce *m);
-const struct tdx_sys_info *tdx_get_sysinfo(void);
-
int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
@@ -193,6 +162,8 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level)
return level - 1;
}
+void tdx_sys_disable(void);
+
u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
@@ -224,13 +195,8 @@ static inline void tdx_init(void) { }
static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
+static inline void tdx_sys_disable(void) { }
#endif /* CONFIG_INTEL_TDX_HOST */
-#ifdef CONFIG_KEXEC_CORE
-void tdx_cpu_flush_cache_for_kexec(void);
-#else
-static inline void tdx_cpu_flush_cache_for_kexec(void) { }
-#endif
-
#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/tdx_global_metadata.h b/arch/x86/include/asm/tdx_global_metadata.h
index 40689c8dc67eb..41150d546589c 100644
--- a/arch/x86/include/asm/tdx_global_metadata.h
+++ b/arch/x86/include/asm/tdx_global_metadata.h
@@ -40,6 +40,10 @@ struct tdx_sys_info_td_conf {
u64 cpuid_config_values[128][2];
};
+struct tdx_sys_info_handoff {
+ u16 module_hv;
+};
+
struct tdx_sys_info {
struct tdx_sys_info_version version;
struct tdx_sys_info_features features;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 37080382df548..49d8551d285d9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -147,6 +147,7 @@ struct vmcs {
#define VMX_BASIC_INOUT BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
+#define VMX_BASIC_NO_SEAMRET_INVD_VMCS BIT_ULL(60)
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index cd796818d94d9..623d4474631a6 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -38,6 +38,7 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
+#include <asm/tdx.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -112,6 +113,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
+ tdx_sys_disable();
x86_virt_emergency_disable_virtualization_cpu();
/*
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 0590d399d4f1f..c3f4a389992da 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -347,22 +347,6 @@ int machine_kexec_prepare(struct kimage *image)
unsigned long reloc_end = (unsigned long)__relocate_kernel_end;
int result;
- /*
- * Some early TDX-capable platforms have an erratum. A kernel
- * partial write (a write transaction of less than cacheline
- * lands at memory controller) to TDX private memory poisons that
- * memory, and a subsequent read triggers a machine check.
- *
- * On those platforms the old kernel must reset TDX private
- * memory before jumping to the new kernel otherwise the new
- * kernel may see unexpected machine check. For simplicity
- * just fail kexec/kdump on those platforms.
- */
- if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE)) {
- pr_info_once("Not allowed on platform with tdx_pw_mce bug\n");
- return -EOPNOTSUPP;
- }
-
/* Setup the identity mapped 64bit page table */
result = init_pgtable(image, __pa(control_page));
if (result)
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 04ce321ebdf39..ed12805bbb444 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -440,16 +440,6 @@ void tdx_disable_virtualization_cpu(void)
tdx_flush_vp(&arg);
}
local_irq_restore(flags);
-
- /*
- * Flush cache now if kexec is possible: this is necessary to avoid
- * having dirty private memory cachelines when the new kernel boots,
- * but WBINVD is a relatively expensive operation and doing it during
- * kexec can exacerbate races in native_stop_other_cpus(). Do it
- * now, since this is a safe moment and there is going to be no more
- * TDX activity on this CPU from this point on.
- */
- tdx_cpu_flush_cache_for_kexec();
}
#define TDX_SEAMCALL_RETRIES 10000
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index b5cd2ffb303e5..ac8323a68b163 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -3,7 +3,6 @@
#define __KVM_X86_VMX_TDX_H
#include "tdx_arch.h"
-#include "tdx_errno.h"
#ifdef CONFIG_KVM_INTEL_TDX
#include "common.h"
diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile
index 90da47eb85eec..d1dbc5cc56978 100644
--- a/arch/x86/virt/vmx/tdx/Makefile
+++ b/arch/x86/virt/vmx/tdx/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += seamcall.o tdx.o
+obj-y += seamcall.o seamldr.o tdx.o
diff --git a/arch/x86/virt/vmx/tdx/seamcall_internal.h b/arch/x86/virt/vmx/tdx/seamcall_internal.h
new file mode 100644
index 0000000000000..be5f446467dfa
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/seamcall_internal.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SEAMCALL utilities for TDX host-side operations.
+ *
+ * Provides convenient wrappers around SEAMCALL assembly with retry logic,
+ * error reporting and cache coherency tracking.
+ *
+ * Copyright (C) 2021-2023 Intel Corporation
+ */
+
+#ifndef _X86_VIRT_SEAMCALL_INTERNAL_H
+#define _X86_VIRT_SEAMCALL_INTERNAL_H
+
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <asm/archrandom.h>
+#include <asm/processor.h>
+#include <asm/tdx.h>
+
+u64 __seamcall(u64 fn, struct tdx_module_args *args);
+u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
+u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
+
+typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
+
+static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
+ struct tdx_module_args *args)
+{
+ lockdep_assert_preemption_disabled();
+
+ /*
+ * SEAMCALLs are made to the TDX module and can generate dirty
+ * cachelines of TDX private memory. Mark cache state incoherent
+ * so that the cache can be flushed during kexec.
+ *
+ * This needs to be done before actually making the SEAMCALL,
+ * because kexec-ing CPU could send NMI to stop remote CPUs,
+ * in which case even disabling IRQ won't help here.
+ */
+ this_cpu_write(cache_state_incoherent, true);
+
+ return func(fn, args);
+}
+
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
+ struct tdx_module_args *args)
+{
+ int retry = RDRAND_RETRY_LOOPS;
+ u64 ret;
+
+ do {
+ preempt_disable();
+ ret = __seamcall_dirty_cache(func, fn, args);
+ preempt_enable();
+ } while (ret == TDX_RND_NO_ENTROPY && --retry);
+
+ return ret;
+}
+
+#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args))
+#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args))
+#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args))
+
+typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
+
+static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
+{
+ pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err);
+}
+
+static inline void seamcall_err_ret(u64 fn, u64 err,
+ struct tdx_module_args *args)
+{
+ seamcall_err(fn, err, args);
+ pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n",
+ args->rcx, args->rdx, args->r8);
+ pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n",
+ args->r9, args->r10, args->r11);
+}
+
+static __always_inline int sc_retry_prerr(sc_func_t func,
+ sc_err_func_t err_func,
+ u64 fn, struct tdx_module_args *args)
+{
+ u64 sret = sc_retry(func, fn, args);
+
+ if (sret == TDX_SUCCESS)
+ return 0;
+
+ if (sret == TDX_SEAMCALL_VMFAILINVALID)
+ return -ENODEV;
+
+ if (sret == TDX_SEAMCALL_GP)
+ return -EOPNOTSUPP;
+
+ if (sret == TDX_SEAMCALL_UD)
+ return -EACCES;
+
+ err_func(fn, sret, args);
+ return -EIO;
+}
+
+#define seamcall_prerr(__fn, __args) \
+ sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args))
+
+#define seamcall_prerr_ret(__fn, __args) \
+ sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args))
+
+#endif /* _X86_VIRT_SEAMCALL_INTERNAL_H */
diff --git a/arch/x86/virt/vmx/tdx/seamldr.c b/arch/x86/virt/vmx/tdx/seamldr.c
new file mode 100644
index 0000000000000..b1137ca6150d4
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/seamldr.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * P-SEAMLDR support for TDX module management features like runtime updates
+ *
+ * Copyright (C) 2025 Intel Corporation
+ */
+#define pr_fmt(fmt) "seamldr: " fmt
+
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/nmi.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cpufeature.h>
+#include <asm/cpufeatures.h>
+#include <asm/seamldr.h>
+
+#include "seamcall_internal.h"
+#include "tdx.h"
+
+/* P-SEAMLDR SEAMCALL leaf function */
+#define P_SEAMLDR_INFO 0x8000000000000000
+#define P_SEAMLDR_INSTALL 0x8000000000000001
+
+#define SEAMLDR_MAX_NR_MODULE_PAGES 496
+#define SEAMLDR_MAX_NR_SIG_PAGES 1
+
+/*
+ * The seamldr_params "scenario" field specifies the operation mode:
+ * 0: Install TDX module from scratch (not used by kernel)
+ * 1: Update existing TDX module to a compatible version
+ */
+#define SEAMLDR_SCENARIO_UPDATE 1
+
+/*
+ * This is the "SEAMLDR_PARAMS" data structure defined in the
+ * "SEAM Loader (SEAMLDR) Interface Specification".
+ *
+ * It is the in-memory ABI that the kernel passes to the P-SEAMLDR
+ * to update the TDX module. It breaks the TDX module image up in
+ * page-size pieces.
+ */
+struct seamldr_params {
+ u32 version;
+ u32 scenario;
+ u64 sigstruct_pages_pa_list[SEAMLDR_MAX_NR_SIG_PAGES];
+ u8 reserved[104];
+ u64 module_nr_pages;
+ u64 module_pages_pa_list[SEAMLDR_MAX_NR_MODULE_PAGES];
+} __packed;
+
+static_assert(sizeof(struct seamldr_params) == 4096);
+
+/*
+ * Serialize P-SEAMLDR calls since the hardware only allows a single CPU to
+ * interact with P-SEAMLDR simultaneously. Use raw version as the calls can
+ * be made with interrupts disabled, where plain spinlocks are prohibited in
+ * PREEMPT_RT kernels as they become sleeping locks.
+ */
+static DEFINE_RAW_SPINLOCK(seamldr_lock);
+
+static int seamldr_call(u64 fn, struct tdx_module_args *args)
+{
+ /*
+ * With this bug, P-SEAMLDR calls corrupt the VMCS
+ * pointer and must be avoided. This path should be
+ * unreachable since sysfs hides the ABIs.
+ */
+ if (boot_cpu_has_bug(X86_BUG_SEAMRET_INVD_VMCS)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ guard(raw_spinlock)(&seamldr_lock);
+ return seamcall_prerr(fn, args);
+}
+
+int seamldr_get_info(struct seamldr_info *seamldr_info)
+{
+ struct tdx_module_args args = {};
+
+ /*
+ * Use slow_virt_to_phys() since @seamldr_info may be allocated on
+ * the stack.
+ */
+ args.rcx = slow_virt_to_phys(seamldr_info);
+ return seamldr_call(P_SEAMLDR_INFO, &args);
+}
+EXPORT_SYMBOL_FOR_MODULES(seamldr_get_info, "tdx-host");
+
+/* Call into P-SEAMLDR to install a TDX module update */
+static int seamldr_install(const struct seamldr_params *params)
+{
+ struct tdx_module_args args = {};
+
+ args.rcx = __pa(params);
+ return seamldr_call(P_SEAMLDR_INSTALL, &args);
+}
+
+#define TDX_IMAGE_VERSION_2 0x200
+
+/* First page of the on-disk module update image: */
+struct tdx_image_header {
+ u16 version;
+ u16 checksum;
+ u8 signature[8];
+ u32 sigstruct_nr_pages;
+ u32 module_nr_pages;
+ u8 reserved[4076];
+} __packed;
+
+#define TDX_IMAGE_HEADER_SIZE sizeof(struct tdx_image_header)
+static_assert(TDX_IMAGE_HEADER_SIZE == 4096);
+
+/*
+ * Intel TDX module update ABI structure. aka. "TDX module blob".
+ * This is the on-disk format that fw_upload lands in a kernel
+ * buffer.
+ *
+ * @payload contains sigstruct pages followed by module pages.
+ */
+struct tdx_image {
+ struct tdx_image_header header;
+ u8 payload[];
+};
+
+/*
+ * Given a vmalloc() allocation, write all of the backing physical
+ * addresses to pa_list[]. Caller guarantees that the array is big
+ * enough.
+ */
+static void populate_pa_list(u64 *pa_list, const u8 *vmalloc_addr, u32 vmalloc_len_pages)
+{
+ int i;
+
+ for (i = 0; i < vmalloc_len_pages; i++) {
+ unsigned long offset = i * PAGE_SIZE;
+ unsigned long pfn = vmalloc_to_pfn(&vmalloc_addr[offset]);
+
+ pa_list[i] = pfn << PAGE_SHIFT;
+ }
+}
+
+static void populate_seamldr_params(struct seamldr_params *params,
+ const u8 *sig, u32 sig_nr_pages,
+ const u8 *mod, u32 mod_nr_pages)
+{
+ params->version = 0;
+ params->scenario = SEAMLDR_SCENARIO_UPDATE;
+ params->module_nr_pages = mod_nr_pages;
+
+ populate_pa_list(params->sigstruct_pages_pa_list, sig, sig_nr_pages);
+ populate_pa_list(params->module_pages_pa_list, mod, mod_nr_pages);
+}
+
+/*
+ * @image points to a vmalloc()'d 'struct tdx_image'. Transform
+ * it into @params which is the P-SEAMLDR ABI format.
+ */
+static int init_seamldr_params(struct seamldr_params *params,
+ const struct tdx_image *image,
+ u32 image_len)
+{
+ const struct tdx_image_header *header = &image->header;
+
+ u32 sigstruct_len = header->sigstruct_nr_pages * PAGE_SIZE;
+ u32 module_len = header->module_nr_pages * PAGE_SIZE;
+
+ u8 *header_start = (u8 *)header;
+ u8 *header_end = header_start + TDX_IMAGE_HEADER_SIZE;
+
+ u8 *sigstruct_start = header_end;
+ u8 *sigstruct_end = sigstruct_start + sigstruct_len;
+
+ u8 *module_start = sigstruct_end;
+
+ /* Check the calculated payload size against the image size. */
+ if (TDX_IMAGE_HEADER_SIZE + sigstruct_len + module_len != image_len)
+ return -EINVAL;
+
+ /* Reject unsupported tdx_image ABI versions. */
+ if (header->version != TDX_IMAGE_VERSION_2)
+ return -EINVAL;
+
+ if (header->sigstruct_nr_pages > SEAMLDR_MAX_NR_SIG_PAGES ||
+ header->module_nr_pages > SEAMLDR_MAX_NR_MODULE_PAGES)
+ return -EINVAL;
+
+ if (memcmp(header->signature, "TDX-BLOB", sizeof(header->signature)))
+ return -EINVAL;
+
+ if (memchr_inv(header->reserved, 0, sizeof(header->reserved)))
+ return -EINVAL;
+
+ populate_seamldr_params(params, sigstruct_start, header->sigstruct_nr_pages,
+ module_start, header->module_nr_pages);
+ return 0;
+}
+
+/*
+ * During a TDX module update, all CPUs start from MODULE_UPDATE_START and
+ * progress to MODULE_UPDATE_DONE. Each state is associated with certain
+ * work. For some states, just one CPU needs to perform the work, while
+ * other CPUs just wait during those states.
+ */
+enum module_update_state {
+ MODULE_UPDATE_START,
+ MODULE_UPDATE_SHUTDOWN,
+ MODULE_UPDATE_CPU_INSTALL,
+ MODULE_UPDATE_CPU_INIT,
+ MODULE_UPDATE_RUN_UPDATE,
+ MODULE_UPDATE_DONE,
+};
+
+static struct update_ctrl {
+ enum module_update_state state;
+ int num_ack;
+ int num_failed;
+ /*
+ * Protect update_ctrl. Raw spinlock as it will be acquired from
+ * interrupt-disabled contexts.
+ */
+ raw_spinlock_t lock;
+} update_ctrl;
+
+/* Called with ctrl->lock held or during initialization. */
+static void __set_target_state(struct update_ctrl *ctrl,
+ enum module_update_state newstate)
+{
+ /* Reset ack counter. */
+ ctrl->num_ack = 0;
+ ctrl->state = newstate;
+}
+
+/* Last one to ack a state moves to the next state. */
+static void ack_state(struct update_ctrl *ctrl, int result)
+{
+ raw_spin_lock(&ctrl->lock);
+
+ ctrl->num_failed += !!result;
+ ctrl->num_ack++;
+ if (ctrl->num_ack == num_online_cpus() && !ctrl->num_failed)
+ __set_target_state(ctrl, ctrl->state + 1);
+
+ raw_spin_unlock(&ctrl->lock);
+}
+
+static void init_state(struct update_ctrl *ctrl)
+{
+ raw_spin_lock_init(&ctrl->lock);
+ __set_target_state(ctrl, MODULE_UPDATE_START + 1);
+ ctrl->num_failed = 0;
+}
+
+/*
+ * See multi_cpu_stop() from where this multi-cpu state-machine was
+ * adopted.
+ */
+static int do_seamldr_install_module(void *seamldr_params)
+{
+ enum module_update_state curstate = MODULE_UPDATE_START;
+ enum module_update_state newstate;
+ bool is_lead_cpu = false;
+ int ret = 0;
+
+ /*
+ * Some steps must be run on exactly one CPU. Pick a "lead" CPU to
+ * execute those steps. Use CPU 0 because it is always online.
+ */
+ if (smp_processor_id() == 0)
+ is_lead_cpu = true;
+
+ do {
+ newstate = READ_ONCE(update_ctrl.state);
+
+ if (curstate == newstate) {
+ cpu_relax();
+ continue;
+ }
+
+ curstate = newstate;
+ switch (curstate) {
+ case MODULE_UPDATE_SHUTDOWN:
+ if (is_lead_cpu)
+ ret = tdx_module_shutdown();
+ break;
+ case MODULE_UPDATE_CPU_INSTALL:
+ ret = seamldr_install(seamldr_params);
+ break;
+ case MODULE_UPDATE_CPU_INIT:
+ ret = tdx_cpu_enable();
+ break;
+ case MODULE_UPDATE_RUN_UPDATE:
+ if (is_lead_cpu)
+ ret = tdx_module_run_update();
+ break;
+ default:
+ break;
+ }
+
+ ack_state(&update_ctrl, ret);
+ } while (curstate != MODULE_UPDATE_DONE &&
+ !READ_ONCE(update_ctrl.num_failed));
+
+ return ret;
+}
+
+/**
+ * seamldr_install_module - Install a new TDX module.
+ * @data: Pointer to the TDX module image.
+ * @data_len: Size of the TDX module image.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+int seamldr_install_module(const u8 *data, u32 data_len)
+{
+ struct seamldr_params *params;
+ const struct tdx_image *image;
+ int ret;
+
+ /*
+ * init_seamldr_params() reads the header early.
+ * Ensure there is enough data to do at least that.
+ */
+ if (data_len < TDX_IMAGE_HEADER_SIZE)
+ return -EINVAL;
+
+ image = (const struct tdx_image *)data;
+
+ params = kzalloc_obj(*params);
+ if (!params)
+ return -ENOMEM;
+
+ /* Populate 'params' from 'image'. */
+ ret = init_seamldr_params(params, image, data_len);
+ if (ret)
+ goto out;
+
+ /* Ensure a stable set of online CPUs for the update process. */
+ cpus_read_lock();
+ init_state(&update_ctrl);
+ ret = stop_machine_cpuslocked(do_seamldr_install_module, params,
+ cpu_online_mask);
+ cpus_read_unlock();
+
+out:
+ kfree(params);
+ return ret;
+}
+EXPORT_SYMBOL_FOR_MODULES(seamldr_install_module, "tdx-host");
+
+/*
+ * stop_machine() does not interrupt preemption-disabled regions.
+ * Simply disabling preempt prevents updates.
+ */
+void seamldr_lock_module_update(void)
+{
+ preempt_disable();
+}
+EXPORT_SYMBOL_FOR_MODULES(seamldr_lock_module_update, "tdx-host");
+
+void seamldr_unlock_module_update(void)
+{
+ preempt_enable();
+}
+EXPORT_SYMBOL_FOR_MODULES(seamldr_unlock_module_update, "tdx-host");
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index cb9b3210ab710..b15269b5941dc 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -37,12 +37,23 @@
#include <asm/msr.h>
#include <asm/cpufeature.h>
#include <asm/tdx.h>
+#include <asm/shared/tdx_errno.h>
#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include <asm/virt.h>
+#include <asm/vmx.h>
+
+#include "seamcall_internal.h"
#include "tdx.h"
+struct tdx_module_state {
+ bool initialized;
+ bool sysinit_done;
+ int sysinit_ret;
+};
+
+static struct tdx_module_state tdx_module_state;
static u32 tdx_global_keyid __ro_after_init;
static u32 tdx_guest_keyid_start __ro_after_init;
static u32 tdx_nr_guest_keyids __ro_after_init;
@@ -56,53 +67,9 @@ static struct tdmr_info_list tdx_tdmr_list;
/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
static LIST_HEAD(tdx_memlist);
-static struct tdx_sys_info tdx_sysinfo __ro_after_init;
-static bool tdx_module_initialized __ro_after_init;
-
-typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
-
-static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
-{
- pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err);
-}
-
-static inline void seamcall_err_ret(u64 fn, u64 err,
- struct tdx_module_args *args)
-{
- seamcall_err(fn, err, args);
- pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n",
- args->rcx, args->rdx, args->r8);
- pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n",
- args->r9, args->r10, args->r11);
-}
+static struct tdx_sys_info tdx_sysinfo;
-static __always_inline int sc_retry_prerr(sc_func_t func,
- sc_err_func_t err_func,
- u64 fn, struct tdx_module_args *args)
-{
- u64 sret = sc_retry(func, fn, args);
-
- if (sret == TDX_SUCCESS)
- return 0;
-
- if (sret == TDX_SEAMCALL_VMFAILINVALID)
- return -ENODEV;
-
- if (sret == TDX_SEAMCALL_GP)
- return -EOPNOTSUPP;
-
- if (sret == TDX_SEAMCALL_UD)
- return -EACCES;
-
- err_func(fn, sret, args);
- return -EIO;
-}
-
-#define seamcall_prerr(__fn, __args) \
- sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args))
-
-#define seamcall_prerr_ret(__fn, __args) \
- sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args))
+static DEFINE_RAW_SPINLOCK(sysinit_lock);
/*
* Do the module global initialization once and return its result.
@@ -111,31 +78,34 @@ static __always_inline int sc_retry_prerr(sc_func_t func,
static int try_init_module_global(void)
{
struct tdx_module_args args = {};
- static DEFINE_RAW_SPINLOCK(sysinit_lock);
- static bool sysinit_done;
- static int sysinit_ret;
+ int ret;
raw_spin_lock(&sysinit_lock);
- if (sysinit_done)
+ /* Return the "cached" return code. */
+ if (tdx_module_state.sysinit_done) {
+ ret = tdx_module_state.sysinit_ret;
goto out;
+ }
/* RCX is module attributes and all bits are reserved */
args.rcx = 0;
- sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args);
+ ret = seamcall_prerr(TDH_SYS_INIT, &args);
/*
* The first SEAMCALL also detects the TDX module, thus
* it can fail due to the TDX module is not loaded.
* Dump message to let the user know.
*/
- if (sysinit_ret == -ENODEV)
+ if (ret == -ENODEV)
pr_err("module not loaded\n");
- sysinit_done = true;
+ /* Save the return code for later callers. */
+ tdx_module_state.sysinit_done = true;
+ tdx_module_state.sysinit_ret = ret;
out:
raw_spin_unlock(&sysinit_lock);
- return sysinit_ret;
+ return ret;
}
/**
@@ -143,7 +113,7 @@ out:
* (and TDX module global initialization SEAMCALL if not done) on local cpu to
* make this cpu be ready to run any other SEAMCALLs.
*/
-static int tdx_cpu_enable(void)
+int tdx_cpu_enable(void)
{
struct tdx_module_args args = {};
int ret;
@@ -184,6 +154,17 @@ static int tdx_online_cpu(unsigned int cpu)
return ret;
}
+static void tdx_cpu_flush_cache(void)
+{
+ lockdep_assert_preemption_disabled();
+
+ if (!this_cpu_read(cache_state_incoherent))
+ return;
+
+ wbinvd();
+ this_cpu_write(cache_state_incoherent, false);
+}
+
static int tdx_offline_cpu(unsigned int cpu)
{
int i;
@@ -220,17 +201,34 @@ static int tdx_offline_cpu(unsigned int cpu)
return -EBUSY;
done:
+ /*
+ * Flush cache on the CPU going offline to ensure no dirty
+ * cachelines of TDX private memory remain. This may be
+ * redundant with WBINVD done elsewhere during CPU offline
+ * (e.g. hlt_play_dead()), but do it explicitly for safety.
+ */
+ tdx_cpu_flush_cache();
x86_virt_put_ref(X86_FEATURE_VMX);
return 0;
}
static void tdx_shutdown_cpu(void *ign)
{
+ /*
+ * Flush cache in preparation for kexec - this is necessary to avoid
+ * having dirty private memory cachelines when the new kernel boots,
+ * but WBINVD is a relatively expensive operation and doing it during
+ * kexec can exacerbate races in native_stop_other_cpus(). Do it
+ * now, since this is a safe moment and there is going to be no more
+ * TDX activity on this CPU from this point on.
+ */
+ tdx_cpu_flush_cache();
x86_virt_put_ref(X86_FEATURE_VMX);
}
static void tdx_shutdown(void *ign)
{
+ tdx_sys_disable();
on_each_cpu(tdx_shutdown_cpu, NULL, 1);
}
@@ -330,7 +328,7 @@ err:
return ret;
}
-static __init int read_sys_metadata_field(u64 field_id, u64 *data)
+static int read_sys_metadata_field(u64 field_id, u64 *data)
{
struct tdx_module_args args = {};
int ret;
@@ -1270,12 +1268,70 @@ static __init int tdx_enable(void)
register_syscore(&tdx_syscore);
- tdx_module_initialized = true;
+ tdx_module_state.initialized = true;
pr_info("TDX-Module initialized\n");
return 0;
}
subsys_initcall(tdx_enable);
+int tdx_module_shutdown(void)
+{
+ struct tdx_sys_info_handoff handoff = {};
+ struct tdx_module_args args = {};
+ int ret;
+ int cpu;
+
+ ret = get_tdx_sys_info_handoff(&handoff);
+ /*
+ * Handoff information is required for proper
+ * shutdown. Refuse to shut down without it.
+ */
+ if (ret)
+ return ret;
+
+ /*
+ * Use the module's handoff version as it is the highest the
+ * module can produce and most likely supported by newer modules.
+ */
+ args.rcx = handoff.module_hv;
+
+ ret = seamcall_prerr(TDH_SYS_SHUTDOWN, &args);
+ if (ret)
+ return ret;
+
+ /*
+ * Clear global and per-CPU initialization flags so the new module
+ * can be fully re-initialized after a successful update.
+ *
+ * No locks needed as no concurrent accesses can occur here.
+ */
+ memset(&tdx_module_state, 0, sizeof(tdx_module_state));
+ for_each_possible_cpu(cpu)
+ per_cpu(tdx_lp_initialized, cpu) = false;
+
+ return 0;
+}
+
+int tdx_module_run_update(void)
+{
+ struct tdx_module_args args = {};
+ int ret;
+
+ ret = seamcall_prerr(TDH_SYS_UPDATE, &args);
+ if (ret)
+ return ret;
+
+ ret = get_tdx_sys_info_version(&tdx_sysinfo.version);
+ /*
+ * Only fails if there is something unexpected
+ * and severely wrong with the module.
+ */
+ WARN_ON_ONCE(ret);
+
+ tdx_module_state.initialized = true;
+ return 0;
+}
+
static bool is_pamt_page(unsigned long phys)
{
struct tdmr_info_list *tdmr_list = &tdx_tdmr_list;
@@ -1453,6 +1509,8 @@ static struct notifier_block tdx_memory_nb = {
static void __init check_tdx_erratum(void)
{
+ u64 basic_msr;
+
/*
* These CPUs have an erratum. A partial write from non-TD
* software (e.g. via MOVNTI variants or UC/WC mapping) to TDX
@@ -1464,6 +1522,14 @@ static void __init check_tdx_erratum(void)
case INTEL_EMERALDRAPIDS_X:
setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
}
+
+ /*
+ * Some TDX-capable CPUs have an erratum where the current VMCS is
+ * cleared after calling into P-SEAMLDR.
+ */
+ rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
+ if (!(basic_msr & VMX_BASIC_NO_SEAMRET_INVD_VMCS))
+ setup_force_cpu_bug(X86_BUG_SEAMRET_INVD_VMCS);
}
void __init tdx_init(void)
@@ -1525,12 +1591,12 @@ void __init tdx_init(void)
const struct tdx_sys_info *tdx_get_sysinfo(void)
{
- if (!tdx_module_initialized)
+ if (!tdx_module_state.initialized)
return NULL;
return (const struct tdx_sys_info *)&tdx_sysinfo;
}
-EXPORT_SYMBOL_FOR_KVM(tdx_get_sysinfo);
+EXPORT_SYMBOL_FOR_MODULES(tdx_get_sysinfo, "kvm-intel,tdx-host");
u32 tdx_get_nr_guest_keyids(void)
{
@@ -1921,21 +1987,32 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
-#ifdef CONFIG_KEXEC_CORE
-void tdx_cpu_flush_cache_for_kexec(void)
+void tdx_sys_disable(void)
{
- lockdep_assert_preemption_disabled();
+ struct tdx_module_args args = {};
+ u64 ret;
- if (!this_cpu_read(cache_state_incoherent))
- return;
+ /*
+ * Don't loop forever.
+ *
+ * - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between
+ * calls.
+ *
+ * - TDX_SYS_BUSY could be returned due to contention with other
+ * TDH.SYS.* SEAMCALLs, but will lock out *new* TDH.SYS.* SEAMCALLs,
+ * so that SYS.DISABLE can eventually make progress.
+ *
+ * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be
+ * run after this until a full reinitialization is done.
+ */
+ do {
+ ret = seamcall(TDH_SYS_DISABLE, &args);
+ } while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY);
/*
- * Private memory cachelines need to be clean at the time of
- * kexec. Write them back now, as the caller promises that
- * there should be no more SEAMCALLs on this CPU.
+ * Print SEAMCALL failures, but not SW-defined error codes
+ * (SEAMCALL faulted with #GP/#UD, TDX not supported).
*/
- wbinvd();
- this_cpu_write(cache_state_incoherent, false);
+ if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR)
+ pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret);
}
-EXPORT_SYMBOL_FOR_KVM(tdx_cpu_flush_cache_for_kexec);
-#endif
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index dde219c823b41..bdfd0e1e337ac 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -46,6 +46,9 @@
#define TDH_PHYMEM_PAGE_WBINVD 41
#define TDH_VP_WR 43
#define TDH_SYS_CONFIG 45
+#define TDH_SYS_SHUTDOWN 52
+#define TDH_SYS_UPDATE 53
+#define TDH_SYS_DISABLE 69
/*
* SEAMCALL leaf:
@@ -84,9 +87,6 @@ struct tdmr_info {
DECLARE_FLEX_ARRAY(struct tdmr_reserved_area, reserved_areas);
} __packed __aligned(TDMR_INFO_ALIGNMENT);
-/* Bit definitions of TDX_FEATURES0 metadata field */
-#define TDX_FEATURES0_NO_RBP_MOD BIT(18)
-
/*
* Do not put any hardware-defined TDX structure representations below
* this comment!
@@ -110,4 +110,7 @@ struct tdmr_info_list {
int max_tdmrs; /* How many 'tdmr_info's are allocated */
};
+int tdx_module_shutdown(void);
+int tdx_module_run_update(void);
+
#endif
diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
index c7db393a9cfb1..e49c300f23d43 100644
--- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
+++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c
@@ -7,7 +7,7 @@
* Include this file to other C file instead.
*/
-static __init int get_tdx_sys_info_version(struct tdx_sys_info_version *sysinfo_version)
+static int get_tdx_sys_info_version(struct tdx_sys_info_version *sysinfo_version)
{
int ret = 0;
u64 val;
@@ -100,13 +100,26 @@ static __init int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_
return ret;
}
+static int get_tdx_sys_info_handoff(struct tdx_sys_info_handoff *sysinfo_handoff)
+{
+ int ret;
+ u64 val;
+
+ ret = read_sys_metadata_field(0x8900000100000000, &val);
+ if (ret)
+ return ret;
+
+ sysinfo_handoff->module_hv = val;
+ return 0;
+}
+
static __init int get_tdx_sys_info(struct tdx_sys_info *sysinfo)
{
int ret = 0;
ret = ret ?: get_tdx_sys_info_version(&sysinfo->version);
- pr_info("Module version: %u.%u.%02u\n",
+ pr_info("Module version: " TDX_VERSION_FMT "\n",
sysinfo->version.major_version,
sysinfo->version.minor_version,
sysinfo->version.update_version);
diff --git a/drivers/virt/coco/Kconfig b/drivers/virt/coco/Kconfig
index df1cfaf26c658..f7691f64fbe32 100644
--- a/drivers/virt/coco/Kconfig
+++ b/drivers/virt/coco/Kconfig
@@ -17,5 +17,7 @@ source "drivers/virt/coco/arm-cca-guest/Kconfig"
source "drivers/virt/coco/guest/Kconfig"
endif
+source "drivers/virt/coco/tdx-host/Kconfig"
+
config TSM
bool
diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile
index cb52021912b34..b323b0ae4f82d 100644
--- a/drivers/virt/coco/Makefile
+++ b/drivers/virt/coco/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_EFI_SECRET) += efi_secret/
obj-$(CONFIG_ARM_PKVM_GUEST) += pkvm-guest/
obj-$(CONFIG_SEV_GUEST) += sev-guest/
obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/
+obj-$(CONFIG_INTEL_TDX_HOST) += tdx-host/
obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/
obj-$(CONFIG_TSM) += tsm-core.o
obj-$(CONFIG_TSM_GUEST) += guest/
diff --git a/drivers/virt/coco/tdx-host/Kconfig b/drivers/virt/coco/tdx-host/Kconfig
new file mode 100644
index 0000000000000..57d0c01a43577
--- /dev/null
+++ b/drivers/virt/coco/tdx-host/Kconfig
@@ -0,0 +1,6 @@
+config TDX_HOST_SERVICES
+ tristate
+ depends on INTEL_TDX_HOST
+ select FW_LOADER
+ select FW_UPLOAD
+ default m
diff --git a/drivers/virt/coco/tdx-host/Makefile b/drivers/virt/coco/tdx-host/Makefile
new file mode 100644
index 0000000000000..e61e749a8dffb
--- /dev/null
+++ b/drivers/virt/coco/tdx-host/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TDX_HOST_SERVICES) += tdx-host.o
diff --git a/drivers/virt/coco/tdx-host/tdx-host.c b/drivers/virt/coco/tdx-host/tdx-host.c
new file mode 100644
index 0000000000000..d48952968e86c
--- /dev/null
+++ b/drivers/virt/coco/tdx-host/tdx-host.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * TDX host user interface driver
+ *
+ * Copyright (C) 2025 Intel Corporation
+ */
+
+#include <linux/device/faux.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/sysfs.h>
+
+#include <asm/cpu_device_id.h>
+#include <asm/seamldr.h>
+#include <asm/tdx.h>
+
+static const struct x86_cpu_id tdx_host_ids[] = {
+ X86_MATCH_FEATURE(X86_FEATURE_TDX_HOST_PLATFORM, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, tdx_host_ids);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ const struct tdx_sys_info *tdx_sysinfo = tdx_get_sysinfo();
+ const struct tdx_sys_info_version *ver;
+ int ret;
+
+ if (!tdx_sysinfo)
+ return -ENXIO;
+
+ /*
+ * The version number can change during an update.
+ * Lock out updates while printing the version.
+ */
+ seamldr_lock_module_update();
+
+ ver = &tdx_sysinfo->version;
+ ret = sysfs_emit(buf, TDX_VERSION_FMT "\n", ver->major_version,
+ ver->minor_version,
+ ver->update_version);
+ seamldr_unlock_module_update();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(version);
+
+static struct attribute *tdx_host_attrs[] = {
+ &dev_attr_version.attr,
+ NULL,
+};
+
+static const struct attribute_group tdx_host_group = {
+ .attrs = tdx_host_attrs,
+};
+
+static ssize_t seamldr_version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct seamldr_info info;
+ int ret;
+
+ ret = seamldr_get_info(&info);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, TDX_VERSION_FMT "\n", info.major_version,
+ info.minor_version,
+ info.update_version);
+}
+
+static ssize_t num_remaining_updates_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct seamldr_info info;
+ int ret;
+
+ ret = seamldr_get_info(&info);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u\n", info.num_remaining_updates);
+}
+
+/*
+ * These attributes are intended for managing TDX module updates. Reading
+ * them issues a slow, serialized P-SEAMLDR query, so keep them admin-only.
+ */
+static DEVICE_ATTR_ADMIN_RO(seamldr_version);
+static DEVICE_ATTR_ADMIN_RO(num_remaining_updates);
+
+static struct attribute *seamldr_attrs[] = {
+ &dev_attr_seamldr_version.attr,
+ &dev_attr_num_remaining_updates.attr,
+ NULL,
+};
+
+static bool supports_runtime_update(void)
+{
+ const struct tdx_sys_info *sysinfo = tdx_get_sysinfo();
+
+ if (!sysinfo)
+ return false;
+
+ if (!tdx_supports_runtime_update(sysinfo))
+ return false;
+
+ /*
+ * This bug makes P-SEAMLDR calls clobber the current VMCS
+ * which breaks KVM. Avoid P-SEAMLDR calls by hiding all
+ * attributes if the CPU has this bug.
+ */
+ if (boot_cpu_has_bug(X86_BUG_SEAMRET_INVD_VMCS))
+ return false;
+
+ return true;
+}
+
+static umode_t seamldr_group_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ if (!supports_runtime_update())
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group seamldr_group = {
+ .attrs = seamldr_attrs,
+ .is_visible = seamldr_group_visible,
+};
+
+static const struct attribute_group *tdx_host_groups[] = {
+ &tdx_host_group,
+ &seamldr_group,
+ NULL,
+};
+
+static enum fw_upload_err tdx_fw_prepare(struct fw_upload *fwl,
+ const u8 *data, u32 data_len)
+{
+ return FW_UPLOAD_ERR_NONE;
+}
+
+static enum fw_upload_err tdx_fw_write(struct fw_upload *fwl, const u8 *data,
+ u32 offset, u32 data_len, u32 *written)
+{
+ int ret;
+
+ ret = seamldr_install_module(data, data_len);
+ switch (ret) {
+ case 0:
+ *written = data_len;
+ return FW_UPLOAD_ERR_NONE;
+ default:
+ return FW_UPLOAD_ERR_FW_INVALID;
+ }
+}
+
+static enum fw_upload_err tdx_fw_poll_complete(struct fw_upload *fwl)
+{
+ /*
+ * The upload completed during tdx_fw_write().
+ * Never poll for completion.
+ */
+ return FW_UPLOAD_ERR_NONE;
+}
+
+static void tdx_fw_cancel(struct fw_upload *fwl)
+{
+ /*
+ * TDX module updates are not cancellable.
+ * Provide a no-op callback to satisfy fw_upload_ops.
+ */
+}
+
+static const struct fw_upload_ops tdx_fw_ops = {
+ .prepare = tdx_fw_prepare,
+ .write = tdx_fw_write,
+ .poll_complete = tdx_fw_poll_complete,
+ .cancel = tdx_fw_cancel,
+};
+
+static void seamldr_deinit(void *tdx_fwl)
+{
+ firmware_upload_unregister(tdx_fwl);
+}
+
+static int seamldr_init(struct device *dev)
+{
+ struct fw_upload *tdx_fwl;
+
+ if (!supports_runtime_update())
+ return 0;
+
+ tdx_fwl = firmware_upload_register(THIS_MODULE, dev, "tdx_module",
+ &tdx_fw_ops, NULL);
+ if (IS_ERR(tdx_fwl))
+ return PTR_ERR(tdx_fwl);
+
+ return devm_add_action_or_reset(dev, seamldr_deinit, tdx_fwl);
+}
+
+static int tdx_host_probe(struct faux_device *fdev)
+{
+ return seamldr_init(&fdev->dev);
+}
+
+static const struct faux_device_ops tdx_host_ops = {
+ .probe = tdx_host_probe,
+};
+
+static struct faux_device *fdev;
+
+static int __init tdx_host_init(void)
+{
+ if (!x86_match_cpu(tdx_host_ids) || !tdx_get_sysinfo())
+ return -ENODEV;
+
+ fdev = faux_device_create_with_groups(KBUILD_MODNAME, NULL,
+ &tdx_host_ops,
+ tdx_host_groups);
+ if (!fdev)
+ return -ENODEV;
+
+ return 0;
+}
+module_init(tdx_host_init);
+
+static void __exit tdx_host_exit(void)
+{
+ faux_device_destroy(fdev);
+}
+module_exit(tdx_host_exit);
+
+MODULE_DESCRIPTION("TDX Host Services");
+MODULE_LICENSE("GPL");