aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorSean Christopherson <seanjc@google.com>2026-05-27 19:12:30 -0700
committerSean Christopherson <seanjc@google.com>2026-05-27 19:12:30 -0700
commitf9b73424b0fcfa1226583196a3ae438c5aa55aa8 (patch)
tree8a0f5879072364643a768c338d79c0b08f3b4671 /arch
parentbc5f0a95ae3c51a48f18a6d0eaa9483b37c1dcc6 (diff)
parent110d4d263450e4172db2f71053d9382320de7e82 (diff)
downloadlinux-next-history-f9b73424b0fcfa1226583196a3ae438c5aa55aa8.tar.gz
Merge branch 'mmu'
* mmu: (23 commits) KVM: TDX: Move external page table freeing to TDX code KVM: x86: Move error handling inside free_external_spt() KVM: TDX: Rename tdx_sept_remove_private_spte() to show it's for leaf SPTEs KVM: TDX: Drop kvm_x86_ops.remove_external_spte() KVM: TDX: Hoist tdx_sept_remove_private_spte() above set_private_spte() KVM: x86/mmu: Drop KVM_BUG_ON() on shared lock to zap child external PTEs KVM: x86/tdp_mmu: Centrally propagate to-present/atomic zap updates to external PTEs KVM: x86/mmu: Plumb "sp" _pointer_ into the TDP MMU's handle_changed_spte() KVM: x86/tdp_mmu: Morph !is_frozen_spte() check into a KVM_MMU_WARN_ON() KVM: TDX: Move lockdep assert in __tdp_mmu_set_spte_atomic() to TDX code KVM: TDX: Move KVM_BUG_ON()s in __tdp_mmu_set_spte_atomic() to TDX code KVM: x86/mmu: Plumb param "old_spte" into kvm_x86_ops.set_external_spte() KVM: x86/mmu: Fold set_external_spte_present() into its sole caller KVM: TDX: Wrap mapping of leaf and non-leaf S-EPT entries into helpers KVM: TDX: Drop kvm_x86_ops.link_external_spt() x86/virt/tdx: Move mk_keyed_paddr() to tdx.c due to no external users x86/tdx: Drop exported function tdx_quirk_reset_page() x86/tdx: Use PFN directly for unmapping guest private memory x86/tdx: Use PFN directly for mapping guest private memory KVM: x86: Make "external SPTE" ops that can fail RET0 static calls ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h13
-rw-r--r--arch/x86/include/asm/tdx.h34
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c275
-rw-r--r--arch/x86/kvm/vmx/tdx.c208
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c64
6 files changed, 301 insertions, 297 deletions
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index e4fca997ec797..83dc5086138b3 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -96,10 +96,8 @@ KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
KVM_X86_OP_OPTIONAL_RET0(tdp_has_smep)
KVM_X86_OP(load_mmu_pgd)
-KVM_X86_OP_OPTIONAL(link_external_spt)
-KVM_X86_OP_OPTIONAL(set_external_spte)
+KVM_X86_OP_OPTIONAL_RET0(set_external_spte)
KVM_X86_OP_OPTIONAL(free_external_spt)
-KVM_X86_OP_OPTIONAL(remove_external_spte)
KVM_X86_OP(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
KVM_X86_OP(get_l2_tsc_multiplier)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e3c48bf988ab4..31666b81e60b1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1914,20 +1914,13 @@ struct kvm_x86_ops {
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level);
- /* Update external mapping with page table link. */
- int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
- void *external_spt);
/* Update the external page table from spte getting set. */
- int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
- u64 mirror_spte);
+ int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, u64 old_spte,
+ u64 new_spte, enum pg_level level);
/* Update external page tables for page table about to be freed. */
- int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
- void *external_spt);
+ void (*free_external_spt)(struct kvm *kvm, struct kvm_mmu_page *sp);
- /* Update external page table from spte getting removed, and flush TLB. */
- void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
- u64 mirror_spte);
bool (*has_wbinvd_exit)(void);
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index a149740b24e8b..32fbdf8f55aef 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/bits.h>
#include <linux/mmzone.h>
+#include <linux/kvm_types.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
@@ -152,7 +153,7 @@ int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
-void tdx_quirk_reset_page(struct page *page);
+void tdx_quirk_reset_paddr(unsigned long base, unsigned long size);
struct tdx_td {
/* TD root structure: */
@@ -176,30 +177,15 @@ struct tdx_vp {
struct page **tdcx_pages;
};
-static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
-{
- u64 ret;
-
- ret = page_to_phys(page);
- /* KeyID bits are just above the physical address bits: */
- ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
-
- return ret;
-}
-
-static inline int pg_level_to_tdx_sept_level(enum pg_level level)
-{
- WARN_ON_ONCE(level == PG_LEVEL_NONE);
- return level - 1;
-}
-
u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
-u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
-u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, kvm_pfn_t pfn, struct page *source,
+ u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, enum pg_level level, struct page *page, u64 *ext_err1, u64 *ext_err2);
u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page);
-u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
-u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, kvm_pfn_t pfn,
+ u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, enum pg_level level, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mng_key_config(struct tdx_td *td);
u64 tdh_mng_create(struct tdx_td *td, u16 hkid);
u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp);
@@ -215,10 +201,10 @@ u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data);
u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask);
u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size);
u64 tdh_mem_track(struct tdx_td *tdr);
-u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, enum pg_level level, u64 *ext_err1, u64 *ext_err2);
u64 tdh_phymem_cache_wb(bool resume);
u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
-u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn);
#else
static inline void tdx_init(void) { }
static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5a2f8ce9a32b8..5b3041138301b 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -53,13 +53,18 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
rcu_barrier();
}
-static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
+static void __tdp_mmu_free_sp(struct kvm_mmu_page *sp)
{
- free_page((unsigned long)sp->external_spt);
free_page((unsigned long)sp->spt);
kmem_cache_free(mmu_page_header_cache, sp);
}
+static void tdp_mmu_free_unused_sp(struct kvm_mmu_page *sp)
+{
+ free_page((unsigned long)sp->external_spt);
+ __tdp_mmu_free_sp(sp);
+}
+
/*
* This is called through call_rcu in order to free TDP page table memory
* safely with respect to other kernel threads that may be operating on
@@ -73,7 +78,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
struct kvm_mmu_page *sp = container_of(head, struct kvm_mmu_page,
rcu_head);
- tdp_mmu_free_sp(sp);
+ WARN_ON_ONCE(sp->external_spt);
+ __tdp_mmu_free_sp(sp);
}
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
@@ -320,9 +326,9 @@ out_read_unlock:
}
}
-static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
- u64 old_spte, u64 new_spte, int level,
- bool shared);
+static void handle_changed_spte(struct kvm *kvm, struct kvm_mmu_page *sp,
+ gfn_t gfn, u64 old_spte, u64 new_spte,
+ int level, bool shared);
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
@@ -359,25 +365,6 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
}
-static void remove_external_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte,
- int level)
-{
- /*
- * External (TDX) SPTEs are limited to PG_LEVEL_4K, and external
- * PTs are removed in a special order, involving free_external_spt().
- * But remove_external_spte() will be called on non-leaf PTEs via
- * __tdp_mmu_zap_root(), so avoid the error the former would return
- * in this case.
- */
- if (!is_last_spte(old_spte, level))
- return;
-
- /* Zapping leaf spte is allowed only when write lock is held. */
- lockdep_assert_held_write(&kvm->mmu_lock);
-
- kvm_x86_call(remove_external_spte)(kvm, gfn, level, old_spte);
-}
-
/**
* handle_removed_pt() - handle a page table removed from the TDP structure
*
@@ -471,86 +458,19 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte,
FROZEN_SPTE, level);
}
- handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
- old_spte, FROZEN_SPTE, level, shared);
-
- if (is_mirror_sp(sp)) {
- KVM_BUG_ON(shared, kvm);
- remove_external_spte(kvm, gfn, old_spte, level);
- }
+ handle_changed_spte(kvm, sp, gfn, old_spte, FROZEN_SPTE, level, shared);
}
- if (is_mirror_sp(sp) &&
- WARN_ON(kvm_x86_call(free_external_spt)(kvm, base_gfn, sp->role.level,
- sp->external_spt))) {
- /*
- * Failed to free page table page in mirror page table and
- * there is nothing to do further.
- * Intentionally leak the page to prevent the kernel from
- * accessing the encrypted page.
- */
- sp->external_spt = NULL;
- }
+ if (is_mirror_sp(sp))
+ kvm_x86_call(free_external_spt)(kvm, sp);
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
}
-static void *get_external_spt(gfn_t gfn, u64 new_spte, int level)
-{
- if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) {
- struct kvm_mmu_page *sp = spte_to_child_sp(new_spte);
-
- WARN_ON_ONCE(sp->role.level + 1 != level);
- WARN_ON_ONCE(sp->gfn != gfn);
- return sp->external_spt;
- }
-
- return NULL;
-}
-
-static int __must_check set_external_spte_present(struct kvm *kvm, tdp_ptep_t sptep,
- gfn_t gfn, u64 old_spte,
- u64 new_spte, int level)
-{
- bool was_present = is_shadow_present_pte(old_spte);
- bool is_present = is_shadow_present_pte(new_spte);
- bool is_leaf = is_present && is_last_spte(new_spte, level);
- int ret = 0;
-
- KVM_BUG_ON(was_present, kvm);
-
- lockdep_assert_held(&kvm->mmu_lock);
- /*
- * We need to lock out other updates to the SPTE until the external
- * page table has been modified. Use FROZEN_SPTE similar to
- * the zapping case.
- */
- if (!try_cmpxchg64(rcu_dereference(sptep), &old_spte, FROZEN_SPTE))
- return -EBUSY;
-
- /*
- * Use different call to either set up middle level
- * external page table, or leaf.
- */
- if (is_leaf) {
- ret = kvm_x86_call(set_external_spte)(kvm, gfn, level, new_spte);
- } else {
- void *external_spt = get_external_spt(gfn, new_spte, level);
-
- KVM_BUG_ON(!external_spt, kvm);
- ret = kvm_x86_call(link_external_spt)(kvm, gfn, level, external_spt);
- }
- if (ret)
- __kvm_tdp_mmu_write_spte(sptep, old_spte);
- else
- __kvm_tdp_mmu_write_spte(sptep, new_spte);
- return ret;
-}
-
/**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
* @kvm: kvm instance
- * @as_id: the address space of the paging structure the SPTE was a part of
+ * @sp: the page table in which the SPTE resides
* @gfn: the base GFN that was mapped by the SPTE
* @old_spte: The value of the SPTE before the change
* @new_spte: The value of the SPTE after the change
@@ -563,15 +483,16 @@ static int __must_check set_external_spte_present(struct kvm *kvm, tdp_ptep_t sp
* dirty logging updates are handled in common code, not here (see make_spte()
* and fast_pf_fix_direct_spte()).
*/
-static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
- u64 old_spte, u64 new_spte, int level,
- bool shared)
+static int __handle_changed_spte(struct kvm *kvm, struct kvm_mmu_page *sp,
+ gfn_t gfn, u64 old_spte, u64 new_spte,
+ int level, bool shared)
{
bool was_present = is_shadow_present_pte(old_spte);
bool is_present = is_shadow_present_pte(new_spte);
bool was_leaf = was_present && is_last_spte(old_spte, level);
bool is_leaf = is_present && is_last_spte(new_spte, level);
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+ int as_id = kvm_mmu_page_as_id(sp);
WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
WARN_ON_ONCE(level < PG_LEVEL_4K);
@@ -601,9 +522,7 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
}
if (old_spte == new_spte)
- return;
-
- trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+ return 0;
if (is_leaf)
check_spte_writable_invariants(new_spte);
@@ -630,21 +549,45 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
"a temporary frozen SPTE.\n"
"as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
as_id, gfn, old_spte, new_spte, level);
- return;
- }
- if (is_leaf != was_leaf)
- kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
+ trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+ return 0;
+ }
/*
* Recursively handle child PTs if the change removed a subtree from
* the paging structure. Note the WARN on the PFN changing without the
* SPTE being converted to a hugepage (leaf) or being zapped. Shadow
* pages are kernel allocations and should never be migrated.
+ *
+ * For the mirror page table, propagate all changes to the external SPTE
+ * (except zapping/promotion of non-leaf SPTEs) via the
+ * set_external_spte() op.
*/
if (was_present && !was_leaf &&
- (is_leaf || !is_present || WARN_ON_ONCE(pfn_changed)))
+ (is_leaf || !is_present || WARN_ON_ONCE(pfn_changed))) {
handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared);
+ } else if (is_mirror_sp(sp)) {
+ int r;
+
+ r = kvm_x86_call(set_external_spte)(kvm, gfn, old_spte, new_spte, level);
+ if (r)
+ return r;
+ }
+ trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+
+ if (is_leaf != was_leaf)
+ kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1);
+
+ return 0;
+}
+
+static void handle_changed_spte(struct kvm *kvm, struct kvm_mmu_page *sp,
+ gfn_t gfn, u64 old_spte, u64 new_spte,
+ int level, bool shared)
+{
+ KVM_BUG_ON(__handle_changed_spte(kvm, sp, gfn, old_spte, new_spte,
+ level, shared), kvm);
}
static inline int __must_check __tdp_mmu_set_spte_atomic(struct kvm *kvm,
@@ -659,34 +602,15 @@ static inline int __must_check __tdp_mmu_set_spte_atomic(struct kvm *kvm,
*/
WARN_ON_ONCE(iter->yielded || is_frozen_spte(iter->old_spte));
- if (is_mirror_sptep(iter->sptep) && !is_frozen_spte(new_spte)) {
- int ret;
-
- /*
- * Users of atomic zapping don't operate on mirror roots,
- * so don't handle it and bug the VM if it's seen.
- */
- if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm))
- return -EBUSY;
-
- ret = set_external_spte_present(kvm, iter->sptep, iter->gfn,
- iter->old_spte, new_spte, iter->level);
- if (ret)
- return ret;
- } else {
- u64 *sptep = rcu_dereference(iter->sptep);
-
- /*
- * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs
- * and does not hold the mmu_lock. On failure, i.e. if a
- * different logical CPU modified the SPTE, try_cmpxchg64()
- * updates iter->old_spte with the current value, so the caller
- * operates on fresh data, e.g. if it retries
- * tdp_mmu_set_spte_atomic()
- */
- if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte))
- return -EBUSY;
- }
+ /*
+ * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
+ * does not hold the mmu_lock. On failure, i.e. if a different logical
+ * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
+ * the current value, so the caller operates on fresh data, e.g. if it
+ * retries tdp_mmu_set_spte_atomic().
+ */
+ if (!try_cmpxchg64(rcu_dereference(iter->sptep), &iter->old_spte, new_spte))
+ return -EBUSY;
return 0;
}
@@ -712,24 +636,61 @@ static inline int __must_check tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter,
u64 new_spte)
{
+ struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
int ret;
lockdep_assert_held_read(&kvm->mmu_lock);
- ret = __tdp_mmu_set_spte_atomic(kvm, iter, new_spte);
+ /* Should not set FROZEN_SPTE as a long-term value. */
+ KVM_MMU_WARN_ON(is_frozen_spte(new_spte));
+
+ /*
+ * Temporarily freeze the SPTE until the external PTE operation has
+ * completed, e.g. so that concurrent faults don't attempt to install a
+ * child PTE in the external page table before the parent PTE has been
+ * written.
+ */
+ if (is_mirror_sptep(iter->sptep))
+ ret = __tdp_mmu_set_spte_atomic(kvm, iter, FROZEN_SPTE);
+ else
+ ret = __tdp_mmu_set_spte_atomic(kvm, iter, new_spte);
+
if (ret)
return ret;
- handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
- new_spte, iter->level, true);
-
- return 0;
+ /*
+ * Handle the change from iter->old_spte to new_spte.
+ *
+ * Note: for mirror page table, this means the updates of the external
+ * PTE, statistics, or updates of child SPTEs, child external PTEs and
+ * corresponding statistics are performed while the mirror SPTE is in
+ * frozen state (i.e., before the mirror SPTE is set to new_spte).
+ */
+ ret = __handle_changed_spte(kvm, sp, iter->gfn, iter->old_spte,
+ new_spte, iter->level, true);
+ /*
+ * Unfreeze the mirror SPTE. If updating the external SPTE failed,
+ * restore the old value so that the mirror SPTE isn't frozen in
+ * perpetuity, otherwise set the mirror SPTE to the new desired value.
+ */
+ if (is_mirror_sptep(iter->sptep)) {
+ if (ret)
+ __kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte);
+ else
+ __kvm_tdp_mmu_write_spte(iter->sptep, new_spte);
+ } else {
+ /*
+ * Bug the VM if handling the change failed, as failure is only
+ * allowed if KVM couldn't update the external SPTE.
+ */
+ KVM_BUG_ON(ret, kvm);
+ }
+ return ret;
}
/*
* tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
* @kvm: KVM instance
- * @as_id: Address space ID, i.e. regular vs. SMM
* @sptep: Pointer to the SPTE
* @old_spte: The current value of the SPTE
* @new_spte: The new value that will be set for the SPTE
@@ -739,9 +700,11 @@ static inline int __must_check tdp_mmu_set_spte_atomic(struct kvm *kvm,
* Returns the old SPTE value, which _may_ be different than @old_spte if the
* SPTE had voldatile bits.
*/
-static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
- u64 old_spte, u64 new_spte, gfn_t gfn, int level)
+static u64 tdp_mmu_set_spte(struct kvm *kvm, tdp_ptep_t sptep, u64 old_spte,
+ u64 new_spte, gfn_t gfn, int level)
{
+ struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(sptep));
+
lockdep_assert_held_write(&kvm->mmu_lock);
/*
@@ -755,16 +718,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
- handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
-
- /*
- * Users that do non-atomic setting of PTEs don't operate on mirror
- * roots, so don't handle it and bug the VM if it's seen.
- */
- if (is_mirror_sptep(sptep)) {
- KVM_BUG_ON(is_shadow_present_pte(new_spte), kvm);
- remove_external_spte(kvm, gfn, old_spte, level);
- }
+ handle_changed_spte(kvm, sp, gfn, old_spte, new_spte, level, false);
return old_spte;
}
@@ -773,9 +727,8 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
u64 new_spte)
{
WARN_ON_ONCE(iter->yielded);
- iter->old_spte = tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
- iter->old_spte, new_spte,
- iter->gfn, iter->level);
+ iter->old_spte = tdp_mmu_set_spte(kvm, iter->sptep, iter->old_spte,
+ new_spte, iter->gfn, iter->level);
}
#define tdp_root_for_each_pte(_iter, _kvm, _root, _start, _end) \
@@ -1321,7 +1274,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
* failed, e.g. because a different task modified the SPTE.
*/
if (r) {
- tdp_mmu_free_sp(sp);
+ tdp_mmu_free_unused_sp(sp);
goto retry;
}
@@ -1377,6 +1330,10 @@ static void kvm_tdp_mmu_age_spte(struct kvm *kvm, struct tdp_iter *iter)
{
u64 new_spte;
+ /* TODO: Add support for aging external SPTEs, if necessary. */
+ if (WARN_ON_ONCE(is_mirror_sptep(iter->sptep)))
+ return;
+
if (spte_ad_enabled(iter->old_spte)) {
iter->old_spte = tdp_mmu_clear_spte_bits_atomic(iter->sptep,
shadow_accessed_mask);
@@ -1628,7 +1585,7 @@ retry:
* installs its own sp in place of the last sp we tried to split.
*/
if (sp)
- tdp_mmu_free_sp(sp);
+ tdp_mmu_free_unused_sp(sp);
return 0;
}
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 738fd5ea92575..d665259d619a3 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -343,7 +343,7 @@ static int tdx_reclaim_page(struct page *page)
r = __tdx_reclaim_page(page);
if (!r)
- tdx_quirk_reset_page(page);
+ tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE);
return r;
}
@@ -597,7 +597,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
return;
- tdx_quirk_reset_page(kvm_tdx->td.tdr_page);
+ tdx_quirk_reset_paddr(page_to_phys(kvm_tdx->td.tdr_page), PAGE_SIZE);
__free_page(kvm_tdx->td.tdr_page);
kvm_tdx->td.tdr_page = NULL;
@@ -1644,8 +1644,8 @@ static int tdx_mem_page_add(struct kvm *kvm, gfn_t gfn, enum pg_level level,
KVM_BUG_ON(!kvm_tdx->page_add_src, kvm))
return -EIO;
- err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn_to_page(pfn),
- kvm_tdx->page_add_src, &entry, &level_state);
+ err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn, kvm_tdx->page_add_src,
+ &entry, &level_state);
if (unlikely(tdx_operand_busy(err)))
return -EBUSY;
@@ -1658,14 +1658,12 @@ static int tdx_mem_page_add(struct kvm *kvm, gfn_t gfn, enum pg_level level,
static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
- int tdx_level = pg_level_to_tdx_sept_level(level);
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
- struct page *page = pfn_to_page(pfn);
gpa_t gpa = gfn_to_gpa(gfn);
u64 entry, level_state;
u64 err;
- err = tdh_mem_page_aug(&kvm_tdx->td, gpa, tdx_level, page, &entry, &level_state);
+ err = tdh_mem_page_aug(&kvm_tdx->td, gpa, level, pfn, &entry, &level_state);
if (unlikely(tdx_operand_busy(err)))
return -EBUSY;
@@ -1675,18 +1673,52 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
return 0;
}
-static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
- enum pg_level level, u64 mirror_spte)
+static struct page *tdx_spte_to_sept_pt(struct kvm *kvm, gfn_t gfn,
+ u64 new_spte, enum pg_level level)
+{
+ struct kvm_mmu_page *sp = spte_to_child_sp(new_spte);
+
+ if (KVM_BUG_ON(!sp->external_spt, kvm) ||
+ KVM_BUG_ON(sp->role.level + 1 != level, kvm) ||
+ KVM_BUG_ON(sp->gfn != gfn, kvm))
+ return NULL;
+
+ return virt_to_page(sp->external_spt);
+}
+
+static int tdx_sept_map_nonleaf_spte(struct kvm *kvm, gfn_t gfn,
+ enum pg_level level, u64 new_spte)
+{
+ gpa_t gpa = gfn_to_gpa(gfn);
+ u64 err, entry, level_state;
+ struct page *sept_pt;
+
+ sept_pt = tdx_spte_to_sept_pt(kvm, gfn, new_spte, level);
+ if (!sept_pt)
+ return -EIO;
+
+ err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, level, sept_pt,
+ &entry, &level_state);
+ if (unlikely(tdx_operand_busy(err)))
+ return -EBUSY;
+
+ if (TDX_BUG_ON_2(err, TDH_MEM_SEPT_ADD, entry, level_state, kvm))
+ return -EIO;
+
+ return 0;
+}
+
+static int tdx_sept_map_leaf_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ u64 new_spte)
{
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
- kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
+ kvm_pfn_t pfn = spte_to_pfn(new_spte);
/* TODO: handle large pages. */
if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
return -EIO;
- WARN_ON_ONCE(!is_shadow_present_pte(mirror_spte) ||
- (mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
+ WARN_ON_ONCE((new_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
/*
* Ensure pre_fault_allowed is read by kvm_arch_vcpu_pre_fault_memory()
@@ -1706,25 +1738,6 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
return tdx_mem_page_aug(kvm, gfn, level, pfn);
}
-static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
- enum pg_level level, void *private_spt)
-{
- int tdx_level = pg_level_to_tdx_sept_level(level);
- gpa_t gpa = gfn_to_gpa(gfn);
- struct page *page = virt_to_page(private_spt);
- u64 err, entry, level_state;
-
- err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, tdx_level, page, &entry,
- &level_state);
- if (unlikely(tdx_operand_busy(err)))
- return -EBUSY;
-
- if (TDX_BUG_ON_2(err, TDH_MEM_SEPT_ADD, entry, level_state, kvm))
- return -EIO;
-
- return 0;
-}
-
/*
* Ensure shared and private EPTs to be flushed on all vCPUs.
* tdh_mem_track() is the only caller that increases TD epoch. An increase in
@@ -1771,35 +1784,11 @@ static void tdx_track(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
}
-static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
- enum pg_level level, void *private_spt)
-{
- struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
-
- /*
- * free_external_spt() is only called after hkid is freed when TD is
- * tearing down.
- * KVM doesn't (yet) zap page table pages in mirror page table while
- * TD is active, though guest pages mapped in mirror page table could be
- * zapped during TD is active, e.g. for shared <-> private conversion
- * and slot move/deletion.
- */
- if (KVM_BUG_ON(is_hkid_assigned(kvm_tdx), kvm))
- return -EIO;
-
- /*
- * The HKID assigned to this TD was already freed and cache was
- * already flushed. We don't have to flush again.
- */
- return tdx_reclaim_page(virt_to_page(private_spt));
-}
-
-static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
- enum pg_level level, u64 mirror_spte)
+static int tdx_sept_remove_leaf_spte(struct kvm *kvm, gfn_t gfn,
+ enum pg_level level, u64 old_spte)
{
- struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
- int tdx_level = pg_level_to_tdx_sept_level(level);
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ kvm_pfn_t pfn = spte_to_pfn(old_spte);
gpa_t gpa = gfn_to_gpa(gfn);
u64 err, entry, level_state;
@@ -1811,16 +1800,16 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
* there can't be anything populated in the private EPT.
*/
if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
- return;
+ return -EIO;
/* TODO: handle large pages. */
if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
- return;
+ return -EIO;
err = tdh_do_no_vcpus(tdh_mem_range_block, kvm, &kvm_tdx->td, gpa,
- tdx_level, &entry, &level_state);
+ level, &entry, &level_state);
if (TDX_BUG_ON_2(err, TDH_MEM_RANGE_BLOCK, entry, level_state, kvm))
- return;
+ return -EIO;
/*
* TDX requires TLB tracking before dropping private page. Do
@@ -1834,15 +1823,82 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
* Race with TDH.VP.ENTER due to (0-step mitigation) and Guest TDCALLs.
*/
err = tdh_do_no_vcpus(tdh_mem_page_remove, kvm, &kvm_tdx->td, gpa,
- tdx_level, &entry, &level_state);
+ level, &entry, &level_state);
if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
- return;
+ return -EIO;
- err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
+ err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn);
if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
- return;
+ return -EIO;
+
+ tdx_quirk_reset_paddr(PFN_PHYS(pfn), PAGE_SIZE);
+ return 0;
+}
+
+/*
+ * Handle changes for
+ * (1) leaf SPTEs from non-present to present
+ * (2) non-leaf SPTEs from non-present to present
+ * (3) leaf SPTEs from present to non-present
+ *
+ * - (1) and (2) must be under shared mmu_lock. If (1) and (2) are under
+ * exclusive mmu_lock (currently impossible), contention errors may lead to
+ * KVM_BUG_ON() in handle_changed_spte(), e.g., due to tdx_mem_page_aug(),
+ * tdx_mem_page_add(), or tdh_mem_sept_add() contending with tdh_vp_enter()
+ * due to zero-step mitigation or contending with TDCALLs.
+ * - (3) must be under write mmu_lock. If (3) is under shared mmu_lock
+ * (currently impossible), warnings will be generated due to
+ * lockdep_assert_held_write() or TDX_BUG_ON() caused by concurrent BLOCK,
+ * TRACK, REMOVE.
+ * - Promotion/demotion is not yet supported.
+ */
+static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte,
+ u64 new_spte, enum pg_level level)
+{
+ lockdep_assert_held(&kvm->mmu_lock);
- tdx_quirk_reset_page(page);
+ if (is_shadow_present_pte(old_spte))
+ return tdx_sept_remove_leaf_spte(kvm, gfn, level, old_spte);
+
+ if (KVM_BUG_ON(!is_shadow_present_pte(new_spte), kvm))
+ return -EIO;
+
+ if (!is_last_spte(new_spte, level))
+ return tdx_sept_map_nonleaf_spte(kvm, gfn, level, new_spte);
+
+ return tdx_sept_map_leaf_spte(kvm, gfn, level, new_spte);
+}
+
+/*
+ * Handle changes for non-leaf SPTEs from present to non-present.
+ * Must be under exclusive mmu_lock and cannot fail.
+ */
+static void tdx_sept_free_private_spt(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ /*
+ * KVM doesn't (yet) zap page table pages in mirror page table while
+ * TD is active, though guest pages mapped in mirror page table could be
+ * zapped during TD is active, e.g. for shared <-> private conversion
+ * and slot move/deletion.
+ *
+ * In other words, KVM should only free mirror page tables after the
+ * TD's hkid is freed, when the TD is being torn down.
+ *
+ * If the S-EPT PTE can't be removed for any reason, intentionally leak
+ * the page to prevent the kernel from accessing the encrypted page.
+ */
+ if (KVM_BUG_ON(is_hkid_assigned(to_kvm_tdx(kvm)), kvm) ||
+ tdx_reclaim_page(virt_to_page(sp->external_spt)))
+ goto out;
+
+ /*
+ * Immediately free the S-EPT page because RCU-time free is unnecessary
+ * after TDH.PHYMEM.PAGE.RECLAIM ensures there are no outstanding
+ * readers.
+ */
+ free_page((unsigned long)sp->external_spt);
+out:
+ sp->external_spt = NULL;
}
void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
@@ -2407,20 +2463,20 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
ret = -ENOMEM;
- tdr_page = alloc_page(GFP_KERNEL);
+ tdr_page = alloc_page(GFP_KERNEL_ACCOUNT);
if (!tdr_page)
goto free_hkid;
kvm_tdx->td.tdcs_nr_pages = tdx_sysinfo->td_ctrl.tdcs_base_size / PAGE_SIZE;
/* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */
kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1;
- tdcs_pages = kzalloc_objs(*kvm_tdx->td.tdcs_pages,
- kvm_tdx->td.tdcs_nr_pages);
+ tdcs_pages = kzalloc_objs(*kvm_tdx->td.tdcs_pages, kvm_tdx->td.tdcs_nr_pages,
+ GFP_KERNEL_ACCOUNT);
if (!tdcs_pages)
goto free_tdr;
for (i = 0; i < kvm_tdx->td.tdcs_nr_pages; i++) {
- tdcs_pages[i] = alloc_page(GFP_KERNEL);
+ tdcs_pages[i] = alloc_page(GFP_KERNEL_ACCOUNT);
if (!tdcs_pages[i])
goto free_tdcs;
}
@@ -2805,7 +2861,7 @@ void tdx_flush_tlb_current(struct kvm_vcpu *vcpu)
void tdx_flush_tlb_all(struct kvm_vcpu *vcpu)
{
/*
- * TDX has called tdx_track() in tdx_sept_remove_private_spte() to
+ * TDX has called tdx_track() in tdx_sept_remove_leaf_spte() to
* ensure that private EPT will be flushed on the next TD enter. No need
* to call tdx_track() here again even when this callback is a result of
* zapping private EPT.
@@ -2895,7 +2951,7 @@ static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx)
int ret, i;
u64 err;
- page = alloc_page(GFP_KERNEL);
+ page = alloc_page(GFP_KERNEL_ACCOUNT);
if (!page)
return -ENOMEM;
tdx->vp.tdvpr_page = page;
@@ -2908,14 +2964,14 @@ static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx)
tdx->vp.tdvpr_pa = page_to_phys(tdx->vp.tdvpr_page);
tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!tdx->vp.tdcx_pages) {
ret = -ENOMEM;
goto free_tdvpr;
}
for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) {
- page = alloc_page(GFP_KERNEL);
+ page = alloc_page(GFP_KERNEL_ACCOUNT);
if (!page) {
ret = -ENOMEM;
goto free_tdcx;
@@ -3435,10 +3491,8 @@ int __init tdx_hardware_setup(void)
vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size, sizeof(struct kvm_tdx));
- vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
- vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
return 0;
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index cb9b3210ab710..967482ae3c801 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -30,7 +30,6 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/idr.h>
-#include <linux/kvm_types.h>
#include <asm/page.h>
#include <asm/special_insns.h>
#include <asm/msr-index.h>
@@ -711,7 +710,7 @@ err:
* to normal kernel memory. Systems with the X86_BUG_TDX_PW_MCE erratum need to
* do the conversion explicitly via MOVDIR64B.
*/
-static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
+void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
{
const void *zero_page = (const void *)page_address(ZERO_PAGE(0));
unsigned long phys, end;
@@ -730,12 +729,7 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
*/
mb();
}
-
-void tdx_quirk_reset_page(struct page *page)
-{
- tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE);
-}
-EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page);
+EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_paddr);
static __init void tdmr_quirk_reset_pamt(struct tdmr_info *tdmr)
@@ -1568,6 +1562,17 @@ static void tdx_clflush_page(struct page *page)
clflush_cache_range(page_to_virt(page), PAGE_SIZE);
}
+static void tdx_clflush_pfn(kvm_pfn_t pfn)
+{
+ clflush_cache_range(__va(PFN_PHYS(pfn)), PAGE_SIZE);
+}
+
+static int pg_level_to_tdx_sept_level(enum pg_level level)
+{
+ WARN_ON_ONCE(level == PG_LEVEL_NONE);
+ return level - 1;
+}
+
noinstr u64 tdh_vp_enter(struct tdx_vp *td, struct tdx_module_args *args)
{
args->rcx = td->tdvpr_pa;
@@ -1588,17 +1593,18 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page)
}
EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx);
-u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, kvm_pfn_t pfn, struct page *source,
+ u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
.rcx = gpa,
.rdx = tdx_tdr_pa(td),
- .r8 = page_to_phys(page),
+ .r8 = PFN_PHYS(pfn),
.r9 = page_to_phys(source),
};
u64 ret;
- tdx_clflush_page(page);
+ tdx_clflush_pfn(pfn);
ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args);
*ext_err1 = args.rcx;
@@ -1608,10 +1614,11 @@ u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page
}
EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_add);
-u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, enum pg_level level,
+ struct page *page, u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
- .rcx = gpa | level,
+ .rcx = gpa | pg_level_to_tdx_sept_level(level),
.rdx = tdx_tdr_pa(td),
.r8 = page_to_phys(page),
};
@@ -1639,16 +1646,17 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page)
}
EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx);
-u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level,
+ kvm_pfn_t pfn, u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
- .rcx = gpa | level,
+ .rcx = gpa | pg_level_to_tdx_sept_level(level),
.rdx = tdx_tdr_pa(td),
- .r8 = page_to_phys(page),
+ .r8 = PFN_PHYS(pfn),
};
u64 ret;
- tdx_clflush_page(page);
+ tdx_clflush_pfn(pfn);
ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args);
*ext_err1 = args.rcx;
@@ -1658,10 +1666,11 @@ u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, int level, struct page *page, u
}
EXPORT_SYMBOL_FOR_KVM(tdh_mem_page_aug);
-u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, int level, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, enum pg_level level,
+ u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
- .rcx = gpa | level,
+ .rcx = gpa | pg_level_to_tdx_sept_level(level),
.rdx = tdx_tdr_pa(td),
};
u64 ret;
@@ -1874,10 +1883,11 @@ u64 tdh_mem_track(struct tdx_td *td)
}
EXPORT_SYMBOL_FOR_KVM(tdh_mem_track);
-u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, enum pg_level level,
+ u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
- .rcx = gpa | level,
+ .rcx = gpa | pg_level_to_tdx_sept_level(level),
.rdx = tdx_tdr_pa(td),
};
u64 ret;
@@ -1901,21 +1911,27 @@ u64 tdh_phymem_cache_wb(bool resume)
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_cache_wb);
+static inline u64 mk_keyed_paddr(u16 hkid, kvm_pfn_t pfn)
+{
+ /* KeyID bits are just above the physical address bits. */
+ return PFN_PHYS(pfn) | ((u64)hkid << boot_cpu_data.x86_phys_bits);
+}
+
u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
{
struct tdx_module_args args = {};
- args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
+ args.rcx = mk_keyed_paddr(tdx_global_keyid, page_to_pfn(td->tdr_page));
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr);
-u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn)
{
struct tdx_module_args args = {};
- args.rcx = mk_keyed_paddr(hkid, page);
+ args.rcx = mk_keyed_paddr(hkid, pfn);
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}