aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorSean Christopherson <seanjc@google.com>2026-05-27 19:12:29 -0700
committerSean Christopherson <seanjc@google.com>2026-05-27 19:12:29 -0700
commitbc5f0a95ae3c51a48f18a6d0eaa9483b37c1dcc6 (patch)
treeab9093d8cb806bba964c735f465976108cf98b14 /arch
parent5ada7ec7b3d14ec7efb09d174115697ed5319d7b (diff)
parent5d40e5b49442437fe9dfd2577f7b17c07dbefb92 (diff)
downloadlinux-next-history-bc5f0a95ae3c51a48f18a6d0eaa9483b37c1dcc6.tar.gz
Merge branch 'misc'
* misc: (30 commits) KVM: SEV: Restrict userspace return codes for KVM_HC_MAP_GPA_RANGE KVM: TDX: Allow userspace to return errors to guest for MAPGPA KVM: selftests: Update hwcr_msr_test for CPUID faulting bit KVM: x86: Virtualize AMD CPUID faulting KVM: x86: Remove supports_cpuid_fault() helper KVM: x86: Prioritize CPUID faulting over CPUID VM-exits in nested VMX KVM: x86: Consolidate CPUID fault handling for emulator and interception logic KVM: x86: Treat KVM's virtual PMU as disabled for TDX VMs KVM: selftests: Add nested page fault injection test KVM: VMX: Synthesize nested EPT violation GVA_IS_VALID/GVA_TRANSLATED bits KVM: SVM: Fix nested NPF injection of PFERR_GUEST_{PAGE,FINAL}_MASK bits KVM: x86: Tell ->inject_page_fault() whether or a fault came from hardware KVM: x86: Widen x86_exception's error_code to 64 bits MAINTAINERS: KVM: Include maintainer profile KVM: x86: Remove unused X86EMUL_MODE_HOST define KVM: selftests: Verify VMX's GUEST_PENDING_DBG_EXCEPTIONS.BS Consistency Check KVM: selftests: Verify guest debug DR7.GD checking during instruction emulation KVM: selftests: Add all (known) EFLAGS bit definitions KVM: x86: Drop kvm_vcpu_do_singlestep() now that it's been gutted KVM: x86: Move KVM_GUESTDBG_SINGLESTEP handling into kvm_inject_emulated_db() ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/kvm/cpuid.c7
-rw-r--r--arch/x86/kvm/cpuid.h14
-rw-r--r--arch/x86/kvm/emulate.c20
-rw-r--r--arch/x86/kvm/kvm_emulate.h17
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h36
-rw-r--r--arch/x86/kvm/svm/nested.c38
-rw-r--r--arch/x86/kvm/svm/sev.c12
-rw-r--r--arch/x86/kvm/svm/svm.c15
-rw-r--r--arch/x86/kvm/vmx/nested.c31
-rw-r--r--arch/x86/kvm/vmx/tdx.c34
-rw-r--r--arch/x86/kvm/vmx/vmx.c38
-rw-r--r--arch/x86/kvm/x86.c208
-rw-r--r--arch/x86/kvm/x86.h6
15 files changed, 298 insertions, 203 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f4c59a5dafe28..e3c48bf988ab4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -284,6 +284,8 @@ enum x86_intercept_stage;
#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
+#define PFERR_GUEST_FAULT_STAGE_MASK \
+ (PFERR_GUEST_FINAL_MASK | PFERR_GUEST_PAGE_MASK)
#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
@@ -484,7 +486,8 @@ struct kvm_mmu {
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
- struct x86_exception *fault);
+ struct x86_exception *fault,
+ bool from_hardware);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gpa_t gva_or_gpa, u64 access,
struct x86_exception *exception);
@@ -1057,8 +1060,6 @@ struct kvm_vcpu_arch {
u16 vec;
u32 id;
u32 host_apf_flags;
- bool send_always;
- bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;
@@ -1441,6 +1442,7 @@ struct kvm_arch {
bool has_private_mem;
bool has_protected_state;
bool has_protected_eoi;
+ bool has_protected_pmu;
bool pre_fault_allowed;
struct hlist_head *mmu_page_hash;
struct list_head active_mmu_pages;
@@ -2307,10 +2309,18 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
bool has_error_code, u32 error_code);
-void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
-void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
- struct x86_exception *fault);
-bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault,
+ bool from_hardware);
+void __kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault,
+ bool from_hardware);
+
+static inline void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
+{
+ __kvm_inject_emulated_page_fault(vcpu, fault, false);
+}
+
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
static inline int __kvm_irq_line_state(unsigned long *irq_state,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a14a0f43e04ae..f534f150d1c59 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -898,6 +898,7 @@
#define MSR_K7_HWCR_IRPERF_EN_BIT 30
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
+#define MSR_K7_HWCR_CPUID_USER_DIS BIT_ULL(MSR_K7_HWCR_CPUID_USER_DIS_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
#define MSR_K7_HWCR_CPB_DIS_BIT 25
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e69156b54cfff..8e5340dd26211 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1248,7 +1248,7 @@ void kvm_initialize_cpu_caps(void)
F(AUTOIBRS),
EMULATED_F(NO_SMM_CTL_MSR),
/* PrefetchCtlMsr */
- /* GpOnUserCpuid */
+ EMULATED_F(GP_ON_USER_CPUID),
/* EPSF */
F(PREFETCHI),
F(AVX512_BMM),
@@ -2161,9 +2161,10 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
u32 eax, ebx, ecx, edx;
- if (!is_smm(vcpu) && cpuid_fault_enabled(vcpu) &&
- !kvm_require_cpl(vcpu, 0))
+ if (!kvm_is_cpuid_allowed(vcpu)) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return 1;
+ }
eax = kvm_rax_read(vcpu);
ecx = kvm_rcx_read(vcpu);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 039b8e6f40baf..fc96ba86c644d 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -7,6 +7,8 @@
#include <asm/processor.h>
#include <uapi/asm/kvm_para.h>
+#include "smm.h"
+
extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
extern bool kvm_is_configuring_cpu_caps __read_mostly;
@@ -181,15 +183,17 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
return x86_stepping(best->eax);
}
-static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
+static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
+ return (vcpu->arch.msr_misc_features_enables &
+ MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) ||
+ (vcpu->arch.msr_hwcr & MSR_K7_HWCR_CPUID_USER_DIS);
}
-static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
+static inline bool kvm_is_cpuid_allowed(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.msr_misc_features_enables &
- MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+ return !cpuid_fault_enabled(vcpu) || is_smm(vcpu) ||
+ !kvm_x86_call(get_cpl)(vcpu);
}
static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8013dccb31102..585a8ceab220d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -540,8 +540,9 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
return X86EMUL_PROPAGATE_FAULT;
}
-static int emulate_db(struct x86_emulate_ctxt *ctxt)
+static int emulate_db(struct x86_emulate_ctxt *ctxt, unsigned long dr6)
{
+ ctxt->exception.dr6 = dr6;
return emulate_exception(ctxt, DB_VECTOR, 0, false);
}
@@ -3593,12 +3594,8 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
u32 eax, ebx, ecx, edx;
- u64 msr = 0;
- ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
- if (!ctxt->ops->is_smm(ctxt) &&
- (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) &&
- ctxt->ops->cpl(ctxt))
+ if (!ctxt->ops->is_cpuid_allowed(ctxt))
return emulate_gp(ctxt, 0);
eax = reg_read(ctxt, VCPU_REGS_RAX);
@@ -3847,15 +3844,8 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
return emulate_ud(ctxt);
- if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
- ulong dr6;
-
- dr6 = ctxt->ops->get_dr(ctxt, 6);
- dr6 &= ~DR_TRAP_BITS;
- dr6 |= DR6_BD | DR6_ACTIVE_LOW;
- ctxt->ops->set_dr(ctxt, 6, dr6);
- return emulate_db(ctxt);
- }
+ if (ctxt->ops->get_effective_dr7(ctxt) & DR7_GD)
+ return emulate_db(ctxt, DR6_BD);
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0abff36d09942..3e375af15c035 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -22,9 +22,13 @@ enum x86_intercept_stage;
struct x86_exception {
u8 vector;
bool error_code_valid;
- u16 error_code;
+ u64 error_code;
bool nested_page_fault;
- u64 address; /* cr2 or nested page fault gpa */
+ union {
+ u64 address; /* cr2 or nested page fault gpa */
+ unsigned long dr6;
+ u64 payload;
+ };
u8 async_page_fault;
unsigned long exit_qualification;
};
@@ -211,6 +215,7 @@ struct x86_emulate_ops {
ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
int (*cpl)(struct x86_emulate_ctxt *ctxt);
+ ulong (*get_effective_dr7)(struct x86_emulate_ctxt *ctxt);
ulong (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
@@ -225,6 +230,7 @@ struct x86_emulate_ops {
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
+ bool (*is_cpuid_allowed)(struct x86_emulate_ctxt *ctxt);
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only);
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
@@ -520,13 +526,6 @@ enum x86_intercept {
nr_x86_intercepts
};
-/* Host execution mode. */
-#if defined(CONFIG_X86_32)
-#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
-#elif defined(CONFIG_X86_64)
-#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
-#endif
-
int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type);
bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_FAILED -1
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 07100bbfc2701..df3ae0c7ec2c3 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -328,6 +328,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK;
const int fetch_fault = access & PFERR_FETCH_MASK;
+ /*
+ * Note! Track the error_code that's common to legacy shadow paging
+ * and NPT shadow paging as a u16 to guard against unintentionally
+ * setting any of bits 63:16. Architecturally, the #PF error code is
+ * 32 bits, and Intel CPUs don't support settings bits 31:16.
+ */
u16 errcode = 0;
gpa_t real_gpa;
gfn_t gfn;
@@ -391,16 +397,6 @@ retry_walk:
nested_access | PFERR_GUEST_PAGE_MASK,
&walker->fault, 0);
- /*
- * FIXME: This can happen if emulation (for of an INS/OUTS
- * instruction) triggers a nested page fault. The exit
- * qualification / exit info field will incorrectly have
- * "guest page access" as the nested page fault's cause,
- * instead of "guest page structure access". To fix this,
- * the x86_exception struct should be augmented with enough
- * information to fix the exit_qualification or exit_info_1
- * fields.
- */
if (unlikely(real_gpa == INVALID_GPA))
return 0;
@@ -506,7 +502,8 @@ error:
* [2:0] - Derive from the access bits. The exit_qualification might be
* out of date if it is serving an EPT misconfiguration.
* [5:3] - Calculated by the page walk of the guest EPT page tables
- * [7:11] - Derived from [7:11] of real exit_qualification
+ * [7:8] - Derived from "fault stage" access bits
+ * [9:11] - Derived from [9:11] of real exit_qualification
*
* The other bits are set to 0.
*/
@@ -521,12 +518,22 @@ error:
walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
/*
+ * KVM doesn't emulate features that access GPAs directly, e.g.
+ * Intel Processor Trace. Assume the GVA is always valid; when
+ * propagating faults from hardware, KVM will discard this info
+ * and use the EXIT_QUALIFICATION bits from the VMCS.
+ */
+ walker->fault.exit_qualification |= EPT_VIOLATION_GVA_IS_VALID;
+
+ /*
* Accesses to guest paging structures are either "reads" or
* "read+write" accesses, so consider them the latter if write_fault
* is true.
*/
if (access & PFERR_GUEST_PAGE_MASK)
walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
+ else
+ walker->fault.exit_qualification |= EPT_VIOLATION_GVA_TRANSLATED;
/*
* Note, pte_access holds the raw RWX bits from the EPTE, not
@@ -542,6 +549,11 @@ error:
walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
walker->fault.async_page_fault = false;
+#if PTTYPE != PTTYPE_EPT
+ if (walker->fault.nested_page_fault)
+ walker->fault.error_code |= access & PFERR_GUEST_FAULT_STAGE_MASK;
+#endif
+
trace_kvm_mmu_walker_error(walker->fault.error_code);
return 0;
}
@@ -807,7 +819,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
*/
if (!r) {
if (!fault->prefetch)
- kvm_inject_emulated_page_fault(vcpu, &walker.fault);
+ __kvm_inject_emulated_page_fault(vcpu, &walker.fault, true);
return RET_PF_RETRY;
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 1bf3e4804ad0a..80df0d040bb8b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -34,23 +34,37 @@
#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
+ struct x86_exception *fault,
+ bool from_hardware)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
+ u64 fault_stage;
- if (vmcb->control.exit_code != SVM_EXIT_NPF) {
- /*
- * TODO: track the cause of the nested page fault, and
- * correctly fill in the high bits of exit_info_1.
- */
- vmcb->control.exit_code = SVM_EXIT_NPF;
- vmcb->control.exit_info_1 = (1ULL << 32);
- vmcb->control.exit_info_2 = fault->address;
- }
+ /*
+ * For hardware NPF exits, the GUEST_FAULT_STAGE bits are only
+ * available in the hardware exit_info_1, since the guest_mmu
+ * walker doesn't know whether the faulting GPA was a page table
+ * page or final page from L2's perspective.
+ */
+ if (from_hardware)
+ fault_stage = vmcb->control.exit_info_1 &
+ PFERR_GUEST_FAULT_STAGE_MASK;
+ else
+ fault_stage = fault->error_code & PFERR_GUEST_FAULT_STAGE_MASK;
+
+ /*
+ * All nested page faults should be annotated as occurring on the
+ * final translation *or* the page walk. Arbitrarily choose "final"
+ * if KVM is buggy and enumerated both or neither.
+ */
+ if (WARN_ON_ONCE(hweight64(fault_stage) != 1))
+ fault_stage = PFERR_GUEST_FINAL_MASK;
- vmcb->control.exit_info_1 &= ~0xffffffffULL;
- vmcb->control.exit_info_1 |= fault->error_code;
+ vmcb->control.exit_code = SVM_EXIT_NPF;
+ vmcb->control.exit_info_1 = fault_stage |
+ (fault->error_code & ~PFERR_GUEST_FAULT_STAGE_MASK);
+ vmcb->control.exit_info_2 = fault->address;
nested_svm_vmexit(svm);
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 940b97d4a8523..2cb20fec99743 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3781,9 +3781,13 @@ static int snp_rmptable_psmash(kvm_pfn_t pfn)
static int snp_complete_psc_msr(struct kvm_vcpu *vcpu)
{
+ u64 hypercall_ret = READ_ONCE(vcpu->run->hypercall.ret);
struct vcpu_svm *svm = to_svm(vcpu);
- if (vcpu->run->hypercall.ret)
+ if (!kvm_is_valid_map_gpa_range_ret(hypercall_ret))
+ return -EINVAL;
+
+ if (hypercall_ret)
set_ghcb_msr(svm, GHCB_MSR_PSC_RESP_ERROR);
else
set_ghcb_msr(svm, GHCB_MSR_PSC_RESP);
@@ -3874,10 +3878,14 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm)
static int snp_complete_one_psc(struct kvm_vcpu *vcpu)
{
+ u64 hypercall_ret = READ_ONCE(vcpu->run->hypercall.ret);
struct vcpu_svm *svm = to_svm(vcpu);
struct psc_buffer *psc = svm->sev_es.ghcb_sa;
- if (vcpu->run->hypercall.ret) {
+ if (!kvm_is_valid_map_gpa_range_ret(hypercall_ret))
+ return -EINVAL;
+
+ if (hypercall_ret) {
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
return 1; /* resume guest */
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 81e5a889a7942..d5b9426d6c06e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3674,13 +3674,8 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_run *kvm_run = vcpu->run;
- /* SEV-ES guests must use the CR write traps to track CR registers. */
- if (!is_sev_es_guest(vcpu)) {
- if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
- vcpu->arch.cr0 = svm->vmcb->save.cr0;
- if (npt_enabled)
- vcpu->arch.cr3 = svm->vmcb->save.cr3;
- }
+ if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE))
+ return 0;
if (is_guest_mode(vcpu)) {
int vmexit;
@@ -4535,11 +4530,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
x86_spec_ctrl_restore_host(svm->virt_spec_ctrl);
+ /* SEV-ES guests must use the CR write traps to track CR registers. */
if (!is_sev_es_guest(vcpu)) {
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.rip = svm->vmcb->save.rip;
+
+ if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+ vcpu->arch.cr0 = svm->vmcb->save.cr0;
+ if (npt_enabled)
+ vcpu->arch.cr3 = svm->vmcb->save.cr3;
}
kvm_reset_dirty_registers(vcpu);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 4690a4d23709d..30dcabc899a29 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -411,7 +411,8 @@ static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
}
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
+ struct x86_exception *fault,
+ bool from_hardware)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -444,13 +445,29 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
exit_qualification = 0;
} else {
u64 mask = EPT_VIOLATION_GVA_IS_VALID |
- EPT_VIOLATION_GVA_TRANSLATED;
+ EPT_VIOLATION_GVA_TRANSLATED;
+
if (vmx->nested.msrs.ept_caps & VMX_EPT_ADVANCED_VMEXIT_INFO_BIT)
mask |= EPT_VIOLATION_GVA_USER |
- EPT_VIOLATION_GVA_WRITABLE |
- EPT_VIOLATION_GVA_NX;
- exit_qualification = fault->exit_qualification;
- exit_qualification |= vmx_get_exit_qual(vcpu) & mask;
+ EPT_VIOLATION_GVA_WRITABLE |
+ EPT_VIOLATION_GVA_NX;
+
+ exit_qualification = fault->exit_qualification & ~mask;
+
+ /*
+ * Use the EXIT_QUALIFICATION from the VMCS if and only
+ * if the hardware VM-Exit from L2 was an EPT Violation.
+ * If the fault is synthesized, then EXIT_QUALIFICATION
+ * is stale and/or holds entirely different data. And
+ * conversely, KVM _must_ rely on EXIT_QUALIFICATION if
+ * the fault came from hardware, because KVM only sees
+ * and walks the faulting GPA.
+ */
+ if (from_hardware)
+ exit_qualification |= vmx_get_exit_qual(vcpu) & mask;
+ else
+ exit_qualification |= fault->exit_qualification & mask;
+
vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
}
@@ -6535,6 +6552,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
nested_evmcs_l2_tlb_flush_enabled(vcpu) &&
kvm_hv_is_tlb_flush_hcall(vcpu);
#endif
+ case EXIT_REASON_CPUID:
+ return !kvm_is_cpuid_allowed(vcpu);
default:
break;
}
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index b8c3d3d8bbfe5..738fd5ea92575 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -639,6 +639,12 @@ int tdx_vm_init(struct kvm *kvm)
kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT;
/*
+ * PMU support is provided by the TDX-Module (if enabled for the VM).
+ * From KVM's perspective, the VM doesn't have a virtual PMU.
+ */
+ kvm->arch.has_protected_pmu = true;
+
+ /*
* Because guest TD is protected, VMM can't parse the instruction in TD.
* Instead, guest uses MMIO hypercall. For unmodified device driver,
* #VE needs to be injected for MMIO and #VE handler in TD converts MMIO
@@ -1182,12 +1188,22 @@ static void __tdx_map_gpa(struct vcpu_tdx *tdx);
static int tdx_complete_vmcall_map_gpa(struct kvm_vcpu *vcpu)
{
+ u64 hypercall_ret = READ_ONCE(vcpu->run->hypercall.ret);
struct vcpu_tdx *tdx = to_tdx(vcpu);
+ long rc;
- if (vcpu->run->hypercall.ret) {
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
- tdx->vp_enter_args.r11 = tdx->map_gpa_next;
- return 1;
+ switch (hypercall_ret) {
+ case 0:
+ break;
+ case EAGAIN:
+ rc = TDVMCALL_STATUS_RETRY;
+ goto propagate_error;
+ case EINVAL:
+ rc = TDVMCALL_STATUS_INVALID_OPERAND;
+ goto propagate_error;
+ default:
+ WARN_ON_ONCE(kvm_is_valid_map_gpa_range_ret(hypercall_ret));
+ return -EINVAL;
}
tdx->map_gpa_next += TDX_MAP_GPA_MAX_LEN;
@@ -1200,13 +1216,17 @@ static int tdx_complete_vmcall_map_gpa(struct kvm_vcpu *vcpu)
* TDVMCALL_MAP_GPA, see comments in tdx_protected_apic_has_interrupt().
*/
if (kvm_vcpu_has_events(vcpu)) {
- tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_RETRY);
- tdx->vp_enter_args.r11 = tdx->map_gpa_next;
- return 1;
+ rc = TDVMCALL_STATUS_RETRY;
+ goto propagate_error;
}
__tdx_map_gpa(tdx);
return 0;
+
+propagate_error:
+ tdvmcall_set_return_code(vcpu, rc);
+ tdx->vp_enter_args.r11 = tdx->map_gpa_next;
+ return 1;
}
static void __tdx_map_gpa(struct vcpu_tdx *tdx)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 20374d8790bba..cd528c8ea1409 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1912,6 +1912,24 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu)
u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /*
+ * When injecting a #DB, single-stepping is enabled in RFLAGS, and STI
+ * or MOV-SS blocking is active, set vmcs.PENDING_DBG_EXCEPTIONS.BS to
+ * prevent a false positive from VM-Entry consistency check. VM-Entry
+ * asserts that a single-step #DB _must_ be pending in this scenario,
+ * as the previous instruction cannot have toggled RFLAGS.TF 0=>1
+ * (because STI and POP/MOV don't modify RFLAGS), therefore the one
+ * instruction delay when activating single-step breakpoints must have
+ * already expired. However, the CPU isn't smart enough to peek at
+ * vmcs.VM_ENTRY_INTR_INFO_FIELD and so doesn't realize that yes, there
+ * is indeed a #DB pending/imminent.
+ */
+ if (ex->vector == DB_VECTOR &&
+ (vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+ vmx_get_interrupt_shadow(vcpu))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
+
kvm_deliver_exception_payload(vcpu, ex);
if (ex->has_error_code) {
@@ -5495,26 +5513,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* avoid single-step #DB and MTF updates, as ICEBP is
* higher priority. Note, skipping ICEBP still clears
* STI and MOVSS blocking.
- *
- * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
- * if single-step is enabled in RFLAGS and STI or MOVSS
- * blocking is active, as the CPU doesn't set the bit
- * on VM-Exit due to #DB interception. VM-Entry has a
- * consistency check that a single-step #DB is pending
- * in this scenario as the previous instruction cannot
- * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
- * don't modify RFLAGS), therefore the one instruction
- * delay when activating single-step breakpoints must
- * have already expired. Note, the CPU sets/clears BS
- * as appropriate for all other VM-Exits types.
*/
if (is_icebp(intr_info))
WARN_ON(!skip_emulated_instruction(vcpu));
- else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
- (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
return 1;
@@ -6715,6 +6716,9 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (enable_pml && !is_guest_mode(vcpu))
vmx_flush_pml_buffer(vcpu);
+ if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE))
+ return 0;
+
/*
* KVM should never reach this point with a pending nested VM-Enter.
* More specifically, short-circuiting VM-Entry to emulate L2 due to
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4af832ecc56ba..54c552efb59e5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -133,7 +133,6 @@ static void process_nmi(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
static int sync_regs(struct kvm_vcpu *vcpu);
-static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu);
static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
@@ -152,6 +151,7 @@ struct kvm_x86_ops kvm_x86_ops __read_mostly;
#include <asm/kvm-x86-ops.h>
EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
+EXPORT_STATIC_CALL_GPL(kvm_x86_get_cpl);
static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, 0644);
@@ -970,7 +970,8 @@ static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
EMULTYPE_COMPLETE_USER_EXIT);
}
-void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault,
+ bool from_hardware)
{
++vcpu->stat.pf_guest;
@@ -987,8 +988,9 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
fault->address);
}
-void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
+void __kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault,
+ bool from_hardware)
{
struct kvm_mmu *fault_mmu;
WARN_ON_ONCE(fault->vector != PF_VECTOR);
@@ -1005,9 +1007,9 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
kvm_mmu_invalidate_addr(vcpu, fault_mmu, fault->address,
KVM_MMU_ROOT_CURRENT);
- fault_mmu->inject_page_fault(vcpu, fault);
+ fault_mmu->inject_page_fault(vcpu, fault, from_hardware);
}
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_inject_emulated_page_fault);
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
{
@@ -1021,18 +1023,6 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e);
-/*
- * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
- * a #GP and return false.
- */
-bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
-{
- if (kvm_x86_call(get_cpl)(vcpu) <= required_cpl)
- return true;
- kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
- return false;
-}
-
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
{
if ((dr != 4 && dr != 5) || !kvm_is_cr4_bit_set(vcpu, X86_CR4_DE))
@@ -1043,11 +1033,16 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr);
-static bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
+static bool __kvm_pv_async_pf_enabled(u64 data)
{
u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
- return (vcpu->arch.apf.msr_en_val & mask) == mask;
+ return (data & mask) == mask;
+}
+
+static bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
+{
+ return __kvm_pv_async_pf_enabled(vcpu->arch.apf.msr_en_val);
}
static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
@@ -1601,6 +1596,14 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr);
+static unsigned long kvm_get_effective_dr7(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ return vcpu->arch.guest_debug_dr7;
+
+ return vcpu->arch.dr7;
+}
+
int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
{
u32 pmc = kvm_rcx_read(vcpu);
@@ -3648,23 +3651,19 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
if (!lapic_in_kernel(vcpu))
return data ? 1 : 0;
+ if (__kvm_pv_async_pf_enabled(data) &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
+ sizeof(u64)))
+ return 1;
+
vcpu->arch.apf.msr_en_val = data;
- if (!kvm_pv_async_pf_enabled(vcpu)) {
+ if (__kvm_pv_async_pf_enabled(data)) {
+ kvm_async_pf_wakeup_all(vcpu);
+ } else {
kvm_clear_async_pf_completion_queue(vcpu);
kvm_async_pf_hash_reset(vcpu);
- return 0;
}
-
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
- sizeof(u64)))
- return 1;
-
- vcpu->arch.apf.send_always = (data & KVM_ASYNC_PF_SEND_ALWAYS);
- vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
-
- kvm_async_pf_wakeup_all(vcpu);
-
return 0;
}
@@ -4003,22 +4002,28 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_EFER:
return set_efer(vcpu, msr_info);
- case MSR_K7_HWCR:
- data &= ~(u64)0x40; /* ignore flush filter disable */
- data &= ~(u64)0x100; /* ignore ignne emulation enable */
- data &= ~(u64)0x8; /* ignore TLB cache disable */
-
+ case MSR_K7_HWCR: {
/*
* Allow McStatusWrEn and TscFreqSel. (Linux guests from v3.2
* through at least v6.6 whine if TscFreqSel is clear,
* depending on F/M/S.
*/
- if (data & ~(BIT_ULL(18) | BIT_ULL(24))) {
+ u64 valid = BIT_ULL(18) | BIT_ULL(24);
+
+ data &= ~(u64)0x40; /* ignore flush filter disable */
+ data &= ~(u64)0x100; /* ignore ignne emulation enable */
+ data &= ~(u64)0x8; /* ignore TLB cache disable */
+
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_GP_ON_USER_CPUID))
+ valid |= MSR_K7_HWCR_CPUID_USER_DIS;
+
+ if (data & ~valid) {
kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
vcpu->arch.msr_hwcr = data;
break;
+ }
case MSR_FAM10H_MMIO_CONF_BASE:
if (data != 0) {
kvm_pr_unimpl_wrmsr(vcpu, msr, data);
@@ -4265,7 +4270,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_MISC_FEATURES_ENABLES:
if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
(data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
- !supports_cpuid_fault(vcpu)))
+ !(vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT)))
return 1;
vcpu->arch.msr_misc_features_enables = data;
break;
@@ -6915,6 +6920,10 @@ disable_exits_unlock:
if (!enable_pmu || (cap->args[0] & ~KVM_CAP_PMU_VALID_MASK))
break;
+ if (kvm->arch.has_protected_pmu &&
+ cap->args[0] != KVM_PMU_CAP_DISABLE)
+ break;
+
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus && !kvm->arch.created_mediated_pmu) {
kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE);
@@ -8553,6 +8562,11 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
}
+static unsigned long emulator_get_effective_dr7(struct x86_emulate_ctxt *ctxt)
+{
+ return kvm_get_effective_dr7(emul_to_vcpu(ctxt));
+}
+
static unsigned long emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr)
{
return kvm_get_dr(emul_to_vcpu(ctxt), dr);
@@ -8810,6 +8824,11 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
&ctxt->exception);
}
+static bool emulator_is_cpuid_allowed(struct x86_emulate_ctxt *ctxt)
+{
+ return kvm_is_cpuid_allowed(emul_to_vcpu(ctxt));
+}
+
static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
bool exact_only)
@@ -8935,6 +8954,7 @@ static const struct x86_emulate_ops emulate_ops = {
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
.cpl = emulator_get_cpl,
+ .get_effective_dr7 = emulator_get_effective_dr7,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
.set_msr_with_filter = emulator_set_msr_with_filter,
@@ -8946,6 +8966,7 @@ static const struct x86_emulate_ops emulate_ops = {
.wbinvd = emulator_wbinvd,
.fix_hypercall = emulator_fix_hypercall,
.intercept = emulator_intercept,
+ .is_cpuid_allowed = emulator_is_cpuid_allowed,
.get_cpuid = emulator_get_cpuid,
.guest_has_movbe = emulator_guest_has_movbe,
.guest_has_fxsr = emulator_guest_has_fxsr,
@@ -8981,17 +9002,36 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
}
}
-static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+static int kvm_inject_emulated_db(struct kvm_vcpu *vcpu, unsigned long dr6)
{
- struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+ struct kvm_run *kvm_run = vcpu->run;
+
+ if (vcpu->guest_debug & (KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_SINGLESTEP)) {
+ kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
+ kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ return 0;
+ }
- if (ctxt->exception.vector == PF_VECTOR)
- kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
- else if (ctxt->exception.error_code_valid)
- kvm_queue_exception_e(vcpu, ctxt->exception.vector,
- ctxt->exception.error_code);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
+ return 1;
+}
+
+static int inject_emulated_exception(struct kvm_vcpu *vcpu)
+{
+ struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception;
+
+ if (ex->vector == DB_VECTOR)
+ return kvm_inject_emulated_db(vcpu, ex->dr6);
+
+ if (ex->vector == PF_VECTOR)
+ kvm_inject_emulated_page_fault(vcpu, ex);
+ else if (ex->error_code_valid)
+ kvm_queue_exception_e(vcpu, ex->vector, ex->error_code);
else
- kvm_queue_exception(vcpu, ctxt->exception.vector);
+ kvm_queue_exception(vcpu, ex->vector);
+ return 1;
}
static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -9031,6 +9071,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
ctxt->interruptibility = 0;
ctxt->have_exception = false;
ctxt->exception.vector = -1;
+ ctxt->exception.payload = 0;
ctxt->perm_ok = false;
init_decode_cache(ctxt);
@@ -9248,21 +9289,6 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
-{
- struct kvm_run *kvm_run = vcpu->run;
-
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW;
- kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- return 0;
- }
- kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
- return 1;
-}
-
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
unsigned long rflags = kvm_x86_call(get_rflags)(vcpu);
@@ -9283,13 +9309,16 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
* that sets the TF flag".
*/
if (unlikely(rflags & X86_EFLAGS_TF))
- r = kvm_vcpu_do_singlestep(vcpu);
+ r = kvm_inject_emulated_db(vcpu, DR6_BS);
return r;
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction);
static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
{
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ return false;
+
if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF)
return true;
@@ -9306,6 +9335,8 @@ static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu,
int emulation_type, int *r)
{
+ unsigned long dr7 = kvm_get_effective_dr7(vcpu);
+
WARN_ON_ONCE(emulation_type & EMULTYPE_NO_DECODE);
/*
@@ -9326,34 +9357,14 @@ static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu,
EMULTYPE_TRAP_UD | EMULTYPE_VMWARE_GP | EMULTYPE_PF))
return false;
- if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
- (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
- struct kvm_run *kvm_run = vcpu->run;
- unsigned long eip = kvm_get_linear_rip(vcpu);
- u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
- vcpu->arch.guest_debug_dr7,
- vcpu->arch.eff_db);
-
- if (dr6 != 0) {
- kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
- kvm_run->debug.arch.pc = eip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = 0;
- return true;
- }
- }
-
- if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
+ if (unlikely(dr7 & DR7_BP_EN_MASK) &&
!kvm_is_code_breakpoint_inhibited(vcpu)) {
unsigned long eip = kvm_get_linear_rip(vcpu);
- u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
- vcpu->arch.dr7,
- vcpu->arch.db);
+ u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, dr7,
+ vcpu->arch.eff_db);
- if (dr6 != 0) {
- kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
- *r = 1;
+ if (dr6) {
+ *r = kvm_inject_emulated_db(vcpu, dr6);
return true;
}
}
@@ -9499,8 +9510,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
*/
WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP);
- inject_emulated_exception(vcpu);
- return 1;
+ return inject_emulated_exception(vcpu);
}
return handle_emulation_failure(vcpu, emulation_type);
}
@@ -9595,8 +9605,7 @@ restart:
if (ctxt->have_exception) {
WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write);
vcpu->mmio_needed = false;
- r = 1;
- inject_emulated_exception(vcpu);
+ r = inject_emulated_exception(vcpu);
} else if (vcpu->arch.pio.count) {
if (!vcpu->arch.pio.in) {
/* FIXME: return into emulator if single-stepping. */
@@ -9639,7 +9648,7 @@ writeback:
kvm_pmu_branch_retired(vcpu);
kvm_rip_write(vcpu, ctxt->eip);
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
- r = kvm_vcpu_do_singlestep(vcpu);
+ r = kvm_inject_emulated_db(vcpu, DR6_BS);
kvm_x86_call(update_emulated_instruction)(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}
@@ -11593,9 +11602,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);
- if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE))
- return 0;
-
r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath);
return r;
@@ -13368,7 +13374,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
kvm->arch.guest_can_read_msr_platform_info = true;
- kvm->arch.enable_pmu = enable_pmu;
+ kvm->arch.enable_pmu = enable_pmu && !kvm->arch.has_protected_pmu;
#if IS_ENABLED(CONFIG_HYPERV)
spin_lock_init(&kvm->arch.hv_root_tdp_lock);
@@ -14013,7 +14019,7 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
if (!kvm_pv_async_pf_enabled(vcpu))
return false;
- if (!vcpu->arch.apf.send_always &&
+ if (!(vcpu->arch.apf.msr_en_val & KVM_ASYNC_PF_SEND_ALWAYS) &&
(vcpu->arch.guest_state_protected || !kvm_x86_call(get_cpl)(vcpu)))
return false;
@@ -14022,7 +14028,7 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
* L1 needs to opt into the special #PF vmexits that are
* used to deliver async page faults.
*/
- return vcpu->arch.apf.delivery_as_pf_vmexit;
+ return vcpu->arch.apf.msr_en_val & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
} else {
/*
* Play it safe in case the guest temporarily disables paging.
@@ -14066,7 +14072,7 @@ bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
fault.nested_page_fault = false;
fault.address = work->arch.token;
fault.async_page_fault = true;
- kvm_inject_page_fault(vcpu, &fault);
+ kvm_inject_page_fault(vcpu, &fault, false);
return true;
} else {
/*
@@ -14237,7 +14243,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
fault.address = gva;
fault.async_page_fault = false;
}
- vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
+ vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault, true);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 38a905fa86de2..aa7d5b757fb54 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -754,6 +754,12 @@ static inline void kvm_prepare_emulated_mmio_exit(struct kvm_vcpu *vcpu,
frag->data, vcpu->mmio_is_write);
}
+static inline bool kvm_is_valid_map_gpa_range_ret(u64 hypercall_ret)
+{
+ return !hypercall_ret || hypercall_ret == EINVAL ||
+ hypercall_ret == EAGAIN;
+}
+
static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr)
{
return kvm->arch.hypercall_exit_enabled & BIT(hc_nr);