diff options
| author | Sean Christopherson <seanjc@google.com> | 2026-05-09 15:57:30 +0800 |
|---|---|---|
| committer | Sean Christopherson <seanjc@google.com> | 2026-05-27 17:19:21 -0700 |
| commit | b35bda696e4416a01a064ccb5e67bca03132d8ec (patch) | |
| tree | f382d6d0ad444b03ea0486b8a7b5857096fc21d4 /arch | |
| parent | 0cef26b537ffa963d719c529f8ff604c1db505fd (diff) | |
| download | linux-next-history-b35bda696e4416a01a064ccb5e67bca03132d8ec.tar.gz | |
KVM: x86: Move error handling inside free_external_spt()
Move the logic for TDX's specific need to leak pages when reclaim
fails inside the free_external_spt() op, so this can be done in TDX
specific code and not the generic MMU.
Do this by passing in "sp" instead of the external page table pointer.
This way, TDX code can set sp->external_spt to NULL. Since the error is now
handled internally in TDX code (by triggering KVM_BUG_ON() or
TDX_BUG_ON_3(), which warn and stop the VM on any error), change the op to
return void. This way it also operates like a normal free in that success
is guaranteed from the caller's perspective.
Opportunistically, drop the unused level and gfn args while adjusting the
sp arg.
[ Rick: Re-wrote log and massaged op name ]
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
[ Yan: Updated patch log/function comment, dropped unused param in op ]
Co-developed-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Link: https://patch.msgid.link/20260509075730.4354-1-yan.y.zhao@intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/x86/include/asm/kvm-x86-ops.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 13 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/tdx.c | 28 |
4 files changed, 18 insertions, 28 deletions
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 771d991562caf..83dc5086138b3 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -97,7 +97,7 @@ KVM_X86_OP_OPTIONAL_RET0(get_mt_mask) KVM_X86_OP_OPTIONAL_RET0(tdp_has_smep) KVM_X86_OP(load_mmu_pgd) KVM_X86_OP_OPTIONAL_RET0(set_external_spte) -KVM_X86_OP_OPTIONAL_RET0(free_external_spt) +KVM_X86_OP_OPTIONAL(free_external_spt) KVM_X86_OP(has_wbinvd_exit) KVM_X86_OP(get_l2_tsc_offset) KVM_X86_OP(get_l2_tsc_multiplier) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dc5625950e5bf..a2ae2d7ca00fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1916,8 +1916,7 @@ struct kvm_x86_ops { u64 new_spte, enum pg_level level); /* Update external page tables for page table about to be freed. */ - int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, - void *external_spt); + void (*free_external_spt)(struct kvm *kvm, struct kvm_mmu_page *sp); bool (*has_wbinvd_exit)(void); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 72d323f2d0dcc..74531e4bbee62 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -455,17 +455,8 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) handle_changed_spte(kvm, sp, gfn, old_spte, FROZEN_SPTE, level, shared); } - if (is_mirror_sp(sp) && - WARN_ON(kvm_x86_call(free_external_spt)(kvm, base_gfn, sp->role.level, - sp->external_spt))) { - /* - * Failed to free page table page in mirror page table and - * there is nothing to do further. - * Intentionally leak the page to prevent the kernel from - * accessing the encrypted page. - */ - sp->external_spt = NULL; - } + if (is_mirror_sp(sp)) + kvm_x86_call(free_external_spt)(kvm, sp); call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); } diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b1c273dfbe4c2..b3185cd9c6d65 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1849,27 +1849,27 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, u64 old_spte, return tdx_sept_map_leaf_spte(kvm, gfn, level, new_spte); } -static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn, - enum pg_level level, void *private_spt) +/* + * Handle changes for non-leaf SPTEs from present to non-present. + * Must be under exclusive mmu_lock and cannot fail. + */ +static void tdx_sept_free_private_spt(struct kvm *kvm, struct kvm_mmu_page *sp) { - struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); - /* - * free_external_spt() is only called after hkid is freed when TD is - * tearing down. * KVM doesn't (yet) zap page table pages in mirror page table while * TD is active, though guest pages mapped in mirror page table could be * zapped during TD is active, e.g. for shared <-> private conversion * and slot move/deletion. + * + * In other words, KVM should only free mirror page tables after the + * TD's hkid is freed, when the TD is being torn down. + * + * If the S-EPT PTE can't be removed for any reason, intentionally leak + * the page to prevent the kernel from accessing the encrypted page. */ - if (KVM_BUG_ON(is_hkid_assigned(kvm_tdx), kvm)) - return -EIO; - - /* - * The HKID assigned to this TD was already freed and cache was - * already flushed. We don't have to flush again. - */ - return tdx_reclaim_page(virt_to_page(private_spt)); + if (KVM_BUG_ON(is_hkid_assigned(to_kvm_tdx(kvm)), kvm) || + tdx_reclaim_page(virt_to_page(sp->external_spt))) + sp->external_spt = NULL; } void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, |
