diff options
| author | Sean Christopherson <seanjc@google.com> | 2026-05-27 19:12:29 -0700 |
|---|---|---|
| committer | Sean Christopherson <seanjc@google.com> | 2026-05-27 19:12:29 -0700 |
| commit | bc5f0a95ae3c51a48f18a6d0eaa9483b37c1dcc6 (patch) | |
| tree | ab9093d8cb806bba964c735f465976108cf98b14 /arch | |
| parent | 5ada7ec7b3d14ec7efb09d174115697ed5319d7b (diff) | |
| parent | 5d40e5b49442437fe9dfd2577f7b17c07dbefb92 (diff) | |
| download | linux-next-history-bc5f0a95ae3c51a48f18a6d0eaa9483b37c1dcc6.tar.gz | |
Merge branch 'misc'
* misc: (30 commits)
KVM: SEV: Restrict userspace return codes for KVM_HC_MAP_GPA_RANGE
KVM: TDX: Allow userspace to return errors to guest for MAPGPA
KVM: selftests: Update hwcr_msr_test for CPUID faulting bit
KVM: x86: Virtualize AMD CPUID faulting
KVM: x86: Remove supports_cpuid_fault() helper
KVM: x86: Prioritize CPUID faulting over CPUID VM-exits in nested VMX
KVM: x86: Consolidate CPUID fault handling for emulator and interception logic
KVM: x86: Treat KVM's virtual PMU as disabled for TDX VMs
KVM: selftests: Add nested page fault injection test
KVM: VMX: Synthesize nested EPT violation GVA_IS_VALID/GVA_TRANSLATED bits
KVM: SVM: Fix nested NPF injection of PFERR_GUEST_{PAGE,FINAL}_MASK bits
KVM: x86: Tell ->inject_page_fault() whether or a fault came from hardware
KVM: x86: Widen x86_exception's error_code to 64 bits
MAINTAINERS: KVM: Include maintainer profile
KVM: x86: Remove unused X86EMUL_MODE_HOST define
KVM: selftests: Verify VMX's GUEST_PENDING_DBG_EXCEPTIONS.BS Consistency Check
KVM: selftests: Verify guest debug DR7.GD checking during instruction emulation
KVM: selftests: Add all (known) EFLAGS bit definitions
KVM: x86: Drop kvm_vcpu_do_singlestep() now that it's been gutted
KVM: x86: Move KVM_GUESTDBG_SINGLESTEP handling into kvm_inject_emulated_db()
...
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 24 | ||||
| -rw-r--r-- | arch/x86/include/asm/msr-index.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 7 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.h | 14 | ||||
| -rw-r--r-- | arch/x86/kvm/emulate.c | 20 | ||||
| -rw-r--r-- | arch/x86/kvm/kvm_emulate.h | 17 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 36 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 38 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/sev.c | 12 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 15 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 31 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/tdx.c | 34 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 38 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 208 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.h | 6 |
15 files changed, 298 insertions, 203 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4c59a5dafe28..e3c48bf988ab4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -284,6 +284,8 @@ enum x86_intercept_stage; #define PFERR_GUEST_RMP_MASK BIT_ULL(31) #define PFERR_GUEST_FINAL_MASK BIT_ULL(32) #define PFERR_GUEST_PAGE_MASK BIT_ULL(33) +#define PFERR_GUEST_FAULT_STAGE_MASK \ + (PFERR_GUEST_FINAL_MASK | PFERR_GUEST_PAGE_MASK) #define PFERR_GUEST_ENC_MASK BIT_ULL(34) #define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) #define PFERR_GUEST_VMPL_MASK BIT_ULL(36) @@ -484,7 +486,8 @@ struct kvm_mmu { u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, - struct x86_exception *fault); + struct x86_exception *fault, + bool from_hardware); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t gva_or_gpa, u64 access, struct x86_exception *exception); @@ -1057,8 +1060,6 @@ struct kvm_vcpu_arch { u16 vec; u32 id; u32 host_apf_flags; - bool send_always; - bool delivery_as_pf_vmexit; bool pageready_pending; } apf; @@ -1441,6 +1442,7 @@ struct kvm_arch { bool has_private_mem; bool has_protected_state; bool has_protected_eoi; + bool has_protected_pmu; bool pre_fault_allowed; struct hlist_head *mmu_page_hash; struct list_head active_mmu_pages; @@ -2307,10 +2309,18 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, bool has_error_code, u32 error_code); -void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); -void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault); -bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault, + bool from_hardware); +void __kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault, + bool from_hardware); + +static inline void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault) +{ + __kvm_inject_emulated_page_fault(vcpu, fault, false); +} + bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); static inline int __kvm_irq_line_state(unsigned long *irq_state, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a14a0f43e04ae..f534f150d1c59 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -898,6 +898,7 @@ #define MSR_K7_HWCR_IRPERF_EN_BIT 30 #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35 +#define MSR_K7_HWCR_CPUID_USER_DIS BIT_ULL(MSR_K7_HWCR_CPUID_USER_DIS_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 #define MSR_K7_HWCR_CPB_DIS_BIT 25 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e69156b54cfff..8e5340dd26211 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1248,7 +1248,7 @@ void kvm_initialize_cpu_caps(void) F(AUTOIBRS), EMULATED_F(NO_SMM_CTL_MSR), /* PrefetchCtlMsr */ - /* GpOnUserCpuid */ + EMULATED_F(GP_ON_USER_CPUID), /* EPSF */ F(PREFETCHI), F(AVX512_BMM), @@ -2161,9 +2161,10 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 eax, ebx, ecx, edx; - if (!is_smm(vcpu) && cpuid_fault_enabled(vcpu) && - !kvm_require_cpl(vcpu, 0)) + if (!kvm_is_cpuid_allowed(vcpu)) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return 1; + } eax = kvm_rax_read(vcpu); ecx = kvm_rcx_read(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 039b8e6f40baf..fc96ba86c644d 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -7,6 +7,8 @@ #include <asm/processor.h> #include <uapi/asm/kvm_para.h> +#include "smm.h" + extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; extern bool kvm_is_configuring_cpu_caps __read_mostly; @@ -181,15 +183,17 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) return x86_stepping(best->eax); } -static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) +static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu) { - return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; + return (vcpu->arch.msr_misc_features_enables & + MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) || + (vcpu->arch.msr_hwcr & MSR_K7_HWCR_CPUID_USER_DIS); } -static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu) +static inline bool kvm_is_cpuid_allowed(struct kvm_vcpu *vcpu) { - return vcpu->arch.msr_misc_features_enables & - MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; + return !cpuid_fault_enabled(vcpu) || is_smm(vcpu) || + !kvm_x86_call(get_cpl)(vcpu); } static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8013dccb31102..585a8ceab220d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -540,8 +540,9 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, return X86EMUL_PROPAGATE_FAULT; } -static int emulate_db(struct x86_emulate_ctxt *ctxt) +static int emulate_db(struct x86_emulate_ctxt *ctxt, unsigned long dr6) { + ctxt->exception.dr6 = dr6; return emulate_exception(ctxt, DB_VECTOR, 0, false); } @@ -3593,12 +3594,8 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) static int em_cpuid(struct x86_emulate_ctxt *ctxt) { u32 eax, ebx, ecx, edx; - u64 msr = 0; - ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr); - if (!ctxt->ops->is_smm(ctxt) && - (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) && - ctxt->ops->cpl(ctxt)) + if (!ctxt->ops->is_cpuid_allowed(ctxt)) return emulate_gp(ctxt, 0); eax = reg_read(ctxt, VCPU_REGS_RAX); @@ -3847,15 +3844,8 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) return emulate_ud(ctxt); - if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) { - ulong dr6; - - dr6 = ctxt->ops->get_dr(ctxt, 6); - dr6 &= ~DR_TRAP_BITS; - dr6 |= DR6_BD | DR6_ACTIVE_LOW; - ctxt->ops->set_dr(ctxt, 6, dr6); - return emulate_db(ctxt); - } + if (ctxt->ops->get_effective_dr7(ctxt) & DR7_GD) + return emulate_db(ctxt, DR6_BD); return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 0abff36d09942..3e375af15c035 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -22,9 +22,13 @@ enum x86_intercept_stage; struct x86_exception { u8 vector; bool error_code_valid; - u16 error_code; + u64 error_code; bool nested_page_fault; - u64 address; /* cr2 or nested page fault gpa */ + union { + u64 address; /* cr2 or nested page fault gpa */ + unsigned long dr6; + u64 payload; + }; u8 async_page_fault; unsigned long exit_qualification; }; @@ -211,6 +215,7 @@ struct x86_emulate_ops { ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); int (*cpl)(struct x86_emulate_ctxt *ctxt); + ulong (*get_effective_dr7)(struct x86_emulate_ctxt *ctxt); ulong (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr); int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); @@ -225,6 +230,7 @@ struct x86_emulate_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage); + bool (*is_cpuid_allowed)(struct x86_emulate_ctxt *ctxt); bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); @@ -520,13 +526,6 @@ enum x86_intercept { nr_x86_intercepts }; -/* Host execution mode. */ -#if defined(CONFIG_X86_32) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 -#elif defined(CONFIG_X86_64) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 -#endif - int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type); bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_FAILED -1 diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 07100bbfc2701..df3ae0c7ec2c3 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -328,6 +328,12 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; + /* + * Note! Track the error_code that's common to legacy shadow paging + * and NPT shadow paging as a u16 to guard against unintentionally + * setting any of bits 63:16. Architecturally, the #PF error code is + * 32 bits, and Intel CPUs don't support settings bits 31:16. + */ u16 errcode = 0; gpa_t real_gpa; gfn_t gfn; @@ -391,16 +397,6 @@ retry_walk: nested_access | PFERR_GUEST_PAGE_MASK, &walker->fault, 0); - /* - * FIXME: This can happen if emulation (for of an INS/OUTS - * instruction) triggers a nested page fault. The exit - * qualification / exit info field will incorrectly have - * "guest page access" as the nested page fault's cause, - * instead of "guest page structure access". To fix this, - * the x86_exception struct should be augmented with enough - * information to fix the exit_qualification or exit_info_1 - * fields. - */ if (unlikely(real_gpa == INVALID_GPA)) return 0; @@ -506,7 +502,8 @@ error: * [2:0] - Derive from the access bits. The exit_qualification might be * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables - * [7:11] - Derived from [7:11] of real exit_qualification + * [7:8] - Derived from "fault stage" access bits + * [9:11] - Derived from [9:11] of real exit_qualification * * The other bits are set to 0. */ @@ -521,12 +518,22 @@ error: walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ; /* + * KVM doesn't emulate features that access GPAs directly, e.g. + * Intel Processor Trace. Assume the GVA is always valid; when + * propagating faults from hardware, KVM will discard this info + * and use the EXIT_QUALIFICATION bits from the VMCS. + */ + walker->fault.exit_qualification |= EPT_VIOLATION_GVA_IS_VALID; + + /* * Accesses to guest paging structures are either "reads" or * "read+write" accesses, so consider them the latter if write_fault * is true. */ if (access & PFERR_GUEST_PAGE_MASK) walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ; + else + walker->fault.exit_qualification |= EPT_VIOLATION_GVA_TRANSLATED; /* * Note, pte_access holds the raw RWX bits from the EPTE, not @@ -542,6 +549,11 @@ error: walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; walker->fault.async_page_fault = false; +#if PTTYPE != PTTYPE_EPT + if (walker->fault.nested_page_fault) + walker->fault.error_code |= access & PFERR_GUEST_FAULT_STAGE_MASK; +#endif + trace_kvm_mmu_walker_error(walker->fault.error_code); return 0; } @@ -807,7 +819,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault */ if (!r) { if (!fault->prefetch) - kvm_inject_emulated_page_fault(vcpu, &walker.fault); + __kvm_inject_emulated_page_fault(vcpu, &walker.fault, true); return RET_PF_RETRY; } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 1bf3e4804ad0a..80df0d040bb8b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -34,23 +34,37 @@ #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, - struct x86_exception *fault) + struct x86_exception *fault, + bool from_hardware) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; + u64 fault_stage; - if (vmcb->control.exit_code != SVM_EXIT_NPF) { - /* - * TODO: track the cause of the nested page fault, and - * correctly fill in the high bits of exit_info_1. - */ - vmcb->control.exit_code = SVM_EXIT_NPF; - vmcb->control.exit_info_1 = (1ULL << 32); - vmcb->control.exit_info_2 = fault->address; - } + /* + * For hardware NPF exits, the GUEST_FAULT_STAGE bits are only + * available in the hardware exit_info_1, since the guest_mmu + * walker doesn't know whether the faulting GPA was a page table + * page or final page from L2's perspective. + */ + if (from_hardware) + fault_stage = vmcb->control.exit_info_1 & + PFERR_GUEST_FAULT_STAGE_MASK; + else + fault_stage = fault->error_code & PFERR_GUEST_FAULT_STAGE_MASK; + + /* + * All nested page faults should be annotated as occurring on the + * final translation *or* the page walk. Arbitrarily choose "final" + * if KVM is buggy and enumerated both or neither. + */ + if (WARN_ON_ONCE(hweight64(fault_stage) != 1)) + fault_stage = PFERR_GUEST_FINAL_MASK; - vmcb->control.exit_info_1 &= ~0xffffffffULL; - vmcb->control.exit_info_1 |= fault->error_code; + vmcb->control.exit_code = SVM_EXIT_NPF; + vmcb->control.exit_info_1 = fault_stage | + (fault->error_code & ~PFERR_GUEST_FAULT_STAGE_MASK); + vmcb->control.exit_info_2 = fault->address; nested_svm_vmexit(svm); } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 940b97d4a8523..2cb20fec99743 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3781,9 +3781,13 @@ static int snp_rmptable_psmash(kvm_pfn_t pfn) static int snp_complete_psc_msr(struct kvm_vcpu *vcpu) { + u64 hypercall_ret = READ_ONCE(vcpu->run->hypercall.ret); struct vcpu_svm *svm = to_svm(vcpu); - if (vcpu->run->hypercall.ret) + if (!kvm_is_valid_map_gpa_range_ret(hypercall_ret)) + return -EINVAL; + + if (hypercall_ret) set_ghcb_msr(svm, GHCB_MSR_PSC_RESP_ERROR); else set_ghcb_msr(svm, GHCB_MSR_PSC_RESP); @@ -3874,10 +3878,14 @@ static void __snp_complete_one_psc(struct vcpu_svm *svm) static int snp_complete_one_psc(struct kvm_vcpu *vcpu) { + u64 hypercall_ret = READ_ONCE(vcpu->run->hypercall.ret); struct vcpu_svm *svm = to_svm(vcpu); struct psc_buffer *psc = svm->sev_es.ghcb_sa; - if (vcpu->run->hypercall.ret) { + if (!kvm_is_valid_map_gpa_range_ret(hypercall_ret)) + return -EINVAL; + + if (hypercall_ret) { snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); return 1; /* resume guest */ } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 81e5a889a7942..d5b9426d6c06e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3674,13 +3674,8 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; - /* SEV-ES guests must use the CR write traps to track CR registers. */ - if (!is_sev_es_guest(vcpu)) { - if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) - vcpu->arch.cr0 = svm->vmcb->save.cr0; - if (npt_enabled) - vcpu->arch.cr3 = svm->vmcb->save.cr3; - } + if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE)) + return 0; if (is_guest_mode(vcpu)) { int vmexit; @@ -4535,11 +4530,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) x86_spec_ctrl_restore_host(svm->virt_spec_ctrl); + /* SEV-ES guests must use the CR write traps to track CR registers. */ if (!is_sev_es_guest(vcpu)) { vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.rip = svm->vmcb->save.rip; + + if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE)) + vcpu->arch.cr0 = svm->vmcb->save.cr0; + if (npt_enabled) + vcpu->arch.cr3 = svm->vmcb->save.cr3; } kvm_reset_dirty_registers(vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 4690a4d23709d..30dcabc899a29 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -411,7 +411,8 @@ static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, } static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault) + struct x86_exception *fault, + bool from_hardware) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -444,13 +445,29 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, exit_qualification = 0; } else { u64 mask = EPT_VIOLATION_GVA_IS_VALID | - EPT_VIOLATION_GVA_TRANSLATED; + EPT_VIOLATION_GVA_TRANSLATED; + if (vmx->nested.msrs.ept_caps & VMX_EPT_ADVANCED_VMEXIT_INFO_BIT) mask |= EPT_VIOLATION_GVA_USER | - EPT_VIOLATION_GVA_WRITABLE | - EPT_VIOLATION_GVA_NX; - exit_qualification = fault->exit_qualification; - exit_qualification |= vmx_get_exit_qual(vcpu) & mask; + EPT_VIOLATION_GVA_WRITABLE | + EPT_VIOLATION_GVA_NX; + + exit_qualification = fault->exit_qualification & ~mask; + + /* + * Use the EXIT_QUALIFICATION from the VMCS if and only + * if the hardware VM-Exit from L2 was an EPT Violation. + * If the fault is synthesized, then EXIT_QUALIFICATION + * is stale and/or holds entirely different data. And + * conversely, KVM _must_ rely on EXIT_QUALIFICATION if + * the fault came from hardware, because KVM only sees + * and walks the faulting GPA. + */ + if (from_hardware) + exit_qualification |= vmx_get_exit_qual(vcpu) & mask; + else + exit_qualification |= fault->exit_qualification & mask; + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; } @@ -6535,6 +6552,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, nested_evmcs_l2_tlb_flush_enabled(vcpu) && kvm_hv_is_tlb_flush_hcall(vcpu); #endif + case EXIT_REASON_CPUID: + return !kvm_is_cpuid_allowed(vcpu); default: break; } diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b8c3d3d8bbfe5..738fd5ea92575 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -639,6 +639,12 @@ int tdx_vm_init(struct kvm *kvm) kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT; /* + * PMU support is provided by the TDX-Module (if enabled for the VM). + * From KVM's perspective, the VM doesn't have a virtual PMU. + */ + kvm->arch.has_protected_pmu = true; + + /* * Because guest TD is protected, VMM can't parse the instruction in TD. * Instead, guest uses MMIO hypercall. For unmodified device driver, * #VE needs to be injected for MMIO and #VE handler in TD converts MMIO @@ -1182,12 +1188,22 @@ static void __tdx_map_gpa(struct vcpu_tdx *tdx); static int tdx_complete_vmcall_map_gpa(struct kvm_vcpu *vcpu) { + u64 hypercall_ret = READ_ONCE(vcpu->run->hypercall.ret); struct vcpu_tdx *tdx = to_tdx(vcpu); + long rc; - if (vcpu->run->hypercall.ret) { - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); - tdx->vp_enter_args.r11 = tdx->map_gpa_next; - return 1; + switch (hypercall_ret) { + case 0: + break; + case EAGAIN: + rc = TDVMCALL_STATUS_RETRY; + goto propagate_error; + case EINVAL: + rc = TDVMCALL_STATUS_INVALID_OPERAND; + goto propagate_error; + default: + WARN_ON_ONCE(kvm_is_valid_map_gpa_range_ret(hypercall_ret)); + return -EINVAL; } tdx->map_gpa_next += TDX_MAP_GPA_MAX_LEN; @@ -1200,13 +1216,17 @@ static int tdx_complete_vmcall_map_gpa(struct kvm_vcpu *vcpu) * TDVMCALL_MAP_GPA, see comments in tdx_protected_apic_has_interrupt(). */ if (kvm_vcpu_has_events(vcpu)) { - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_RETRY); - tdx->vp_enter_args.r11 = tdx->map_gpa_next; - return 1; + rc = TDVMCALL_STATUS_RETRY; + goto propagate_error; } __tdx_map_gpa(tdx); return 0; + +propagate_error: + tdvmcall_set_return_code(vcpu, rc); + tdx->vp_enter_args.r11 = tdx->map_gpa_next; + return 1; } static void __tdx_map_gpa(struct vcpu_tdx *tdx) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 20374d8790bba..cd528c8ea1409 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1912,6 +1912,24 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu) u32 intr_info = ex->vector | INTR_INFO_VALID_MASK; struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * When injecting a #DB, single-stepping is enabled in RFLAGS, and STI + * or MOV-SS blocking is active, set vmcs.PENDING_DBG_EXCEPTIONS.BS to + * prevent a false positive from VM-Entry consistency check. VM-Entry + * asserts that a single-step #DB _must_ be pending in this scenario, + * as the previous instruction cannot have toggled RFLAGS.TF 0=>1 + * (because STI and POP/MOV don't modify RFLAGS), therefore the one + * instruction delay when activating single-step breakpoints must have + * already expired. However, the CPU isn't smart enough to peek at + * vmcs.VM_ENTRY_INTR_INFO_FIELD and so doesn't realize that yes, there + * is indeed a #DB pending/imminent. + */ + if (ex->vector == DB_VECTOR && + (vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && + vmx_get_interrupt_shadow(vcpu)) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); + kvm_deliver_exception_payload(vcpu, ex); if (ex->has_error_code) { @@ -5495,26 +5513,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) * avoid single-step #DB and MTF updates, as ICEBP is * higher priority. Note, skipping ICEBP still clears * STI and MOVSS blocking. - * - * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS - * if single-step is enabled in RFLAGS and STI or MOVSS - * blocking is active, as the CPU doesn't set the bit - * on VM-Exit due to #DB interception. VM-Entry has a - * consistency check that a single-step #DB is pending - * in this scenario as the previous instruction cannot - * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV - * don't modify RFLAGS), therefore the one instruction - * delay when activating single-step breakpoints must - * have already expired. Note, the CPU sets/clears BS - * as appropriate for all other VM-Exits types. */ if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); - else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; @@ -6715,6 +6716,9 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (enable_pml && !is_guest_mode(vcpu)) vmx_flush_pml_buffer(vcpu); + if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE)) + return 0; + /* * KVM should never reach this point with a pending nested VM-Enter. * More specifically, short-circuiting VM-Entry to emulate L2 due to diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4af832ecc56ba..54c552efb59e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -133,7 +133,6 @@ static void process_nmi(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); static int sync_regs(struct kvm_vcpu *vcpu); -static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu); static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); @@ -152,6 +151,7 @@ struct kvm_x86_ops kvm_x86_ops __read_mostly; #include <asm/kvm-x86-ops.h> EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits); EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg); +EXPORT_STATIC_CALL_GPL(kvm_x86_get_cpl); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, 0644); @@ -970,7 +970,8 @@ static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err) EMULTYPE_COMPLETE_USER_EXIT); } -void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault, + bool from_hardware) { ++vcpu->stat.pf_guest; @@ -987,8 +988,9 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) fault->address); } -void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault) +void __kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault, + bool from_hardware) { struct kvm_mmu *fault_mmu; WARN_ON_ONCE(fault->vector != PF_VECTOR); @@ -1005,9 +1007,9 @@ void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, kvm_mmu_invalidate_addr(vcpu, fault_mmu, fault->address, KVM_MMU_ROOT_CURRENT); - fault_mmu->inject_page_fault(vcpu, fault); + fault_mmu->inject_page_fault(vcpu, fault, from_hardware); } -EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inject_emulated_page_fault); +EXPORT_SYMBOL_FOR_KVM_INTERNAL(__kvm_inject_emulated_page_fault); void kvm_inject_nmi(struct kvm_vcpu *vcpu) { @@ -1021,18 +1023,6 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_queue_exception_e); -/* - * Checks if cpl <= required_cpl; if true, return true. Otherwise queue - * a #GP and return false. - */ -bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) -{ - if (kvm_x86_call(get_cpl)(vcpu) <= required_cpl) - return true; - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return false; -} - bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) { if ((dr != 4 && dr != 5) || !kvm_is_cr4_bit_set(vcpu, X86_CR4_DE)) @@ -1043,11 +1033,16 @@ bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_require_dr); -static bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu) +static bool __kvm_pv_async_pf_enabled(u64 data) { u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT; - return (vcpu->arch.apf.msr_en_val & mask) == mask; + return (data & mask) == mask; +} + +static bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu) +{ + return __kvm_pv_async_pf_enabled(vcpu->arch.apf.msr_en_val); } static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu) @@ -1601,6 +1596,14 @@ unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr) } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_get_dr); +static unsigned long kvm_get_effective_dr7(struct kvm_vcpu *vcpu) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + return vcpu->arch.guest_debug_dr7; + + return vcpu->arch.dr7; +} + int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) { u32 pmc = kvm_rcx_read(vcpu); @@ -3648,23 +3651,19 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) if (!lapic_in_kernel(vcpu)) return data ? 1 : 0; + if (__kvm_pv_async_pf_enabled(data) && + kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, + sizeof(u64))) + return 1; + vcpu->arch.apf.msr_en_val = data; - if (!kvm_pv_async_pf_enabled(vcpu)) { + if (__kvm_pv_async_pf_enabled(data)) { + kvm_async_pf_wakeup_all(vcpu); + } else { kvm_clear_async_pf_completion_queue(vcpu); kvm_async_pf_hash_reset(vcpu); - return 0; } - - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, - sizeof(u64))) - return 1; - - vcpu->arch.apf.send_always = (data & KVM_ASYNC_PF_SEND_ALWAYS); - vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; - - kvm_async_pf_wakeup_all(vcpu); - return 0; } @@ -4003,22 +4002,28 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_EFER: return set_efer(vcpu, msr_info); - case MSR_K7_HWCR: - data &= ~(u64)0x40; /* ignore flush filter disable */ - data &= ~(u64)0x100; /* ignore ignne emulation enable */ - data &= ~(u64)0x8; /* ignore TLB cache disable */ - + case MSR_K7_HWCR: { /* * Allow McStatusWrEn and TscFreqSel. (Linux guests from v3.2 * through at least v6.6 whine if TscFreqSel is clear, * depending on F/M/S. */ - if (data & ~(BIT_ULL(18) | BIT_ULL(24))) { + u64 valid = BIT_ULL(18) | BIT_ULL(24); + + data &= ~(u64)0x40; /* ignore flush filter disable */ + data &= ~(u64)0x100; /* ignore ignne emulation enable */ + data &= ~(u64)0x8; /* ignore TLB cache disable */ + + if (guest_cpu_cap_has(vcpu, X86_FEATURE_GP_ON_USER_CPUID)) + valid |= MSR_K7_HWCR_CPUID_USER_DIS; + + if (data & ~valid) { kvm_pr_unimpl_wrmsr(vcpu, msr, data); return 1; } vcpu->arch.msr_hwcr = data; break; + } case MSR_FAM10H_MMIO_CONF_BASE: if (data != 0) { kvm_pr_unimpl_wrmsr(vcpu, msr, data); @@ -4265,7 +4270,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_MISC_FEATURES_ENABLES: if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT || (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT && - !supports_cpuid_fault(vcpu))) + !(vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT))) return 1; vcpu->arch.msr_misc_features_enables = data; break; @@ -6915,6 +6920,10 @@ disable_exits_unlock: if (!enable_pmu || (cap->args[0] & ~KVM_CAP_PMU_VALID_MASK)) break; + if (kvm->arch.has_protected_pmu && + cap->args[0] != KVM_PMU_CAP_DISABLE) + break; + mutex_lock(&kvm->lock); if (!kvm->created_vcpus && !kvm->arch.created_mediated_pmu) { kvm->arch.enable_pmu = !(cap->args[0] & KVM_PMU_CAP_DISABLE); @@ -8553,6 +8562,11 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } +static unsigned long emulator_get_effective_dr7(struct x86_emulate_ctxt *ctxt) +{ + return kvm_get_effective_dr7(emul_to_vcpu(ctxt)); +} + static unsigned long emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr) { return kvm_get_dr(emul_to_vcpu(ctxt), dr); @@ -8810,6 +8824,11 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, &ctxt->exception); } +static bool emulator_is_cpuid_allowed(struct x86_emulate_ctxt *ctxt) +{ + return kvm_is_cpuid_allowed(emul_to_vcpu(ctxt)); +} + static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only) @@ -8935,6 +8954,7 @@ static const struct x86_emulate_ops emulate_ops = { .get_cr = emulator_get_cr, .set_cr = emulator_set_cr, .cpl = emulator_get_cpl, + .get_effective_dr7 = emulator_get_effective_dr7, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, .set_msr_with_filter = emulator_set_msr_with_filter, @@ -8946,6 +8966,7 @@ static const struct x86_emulate_ops emulate_ops = { .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, .intercept = emulator_intercept, + .is_cpuid_allowed = emulator_is_cpuid_allowed, .get_cpuid = emulator_get_cpuid, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, @@ -8981,17 +9002,36 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) } } -static void inject_emulated_exception(struct kvm_vcpu *vcpu) +static int kvm_inject_emulated_db(struct kvm_vcpu *vcpu, unsigned long dr6) { - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + struct kvm_run *kvm_run = vcpu->run; + + if (vcpu->guest_debug & (KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_SINGLESTEP)) { + kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; + kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + return 0; + } - if (ctxt->exception.vector == PF_VECTOR) - kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); - else if (ctxt->exception.error_code_valid) - kvm_queue_exception_e(vcpu, ctxt->exception.vector, - ctxt->exception.error_code); + kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); + return 1; +} + +static int inject_emulated_exception(struct kvm_vcpu *vcpu) +{ + struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception; + + if (ex->vector == DB_VECTOR) + return kvm_inject_emulated_db(vcpu, ex->dr6); + + if (ex->vector == PF_VECTOR) + kvm_inject_emulated_page_fault(vcpu, ex); + else if (ex->error_code_valid) + kvm_queue_exception_e(vcpu, ex->vector, ex->error_code); else - kvm_queue_exception(vcpu, ctxt->exception.vector); + kvm_queue_exception(vcpu, ex->vector); + return 1; } static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu) @@ -9031,6 +9071,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) ctxt->interruptibility = 0; ctxt->have_exception = false; ctxt->exception.vector = -1; + ctxt->exception.payload = 0; ctxt->perm_ok = false; init_decode_cache(ctxt); @@ -9248,21 +9289,6 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) -{ - struct kvm_run *kvm_run = vcpu->run; - - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW; - kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; - } - kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); - return 1; -} - int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) { unsigned long rflags = kvm_x86_call(get_rflags)(vcpu); @@ -9283,13 +9309,16 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) * that sets the TF flag". */ if (unlikely(rflags & X86_EFLAGS_TF)) - r = kvm_vcpu_do_singlestep(vcpu); + r = kvm_inject_emulated_db(vcpu, DR6_BS); return r; } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_skip_emulated_instruction); static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu) { + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + return false; + if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF) return true; @@ -9306,6 +9335,8 @@ static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu) static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int emulation_type, int *r) { + unsigned long dr7 = kvm_get_effective_dr7(vcpu); + WARN_ON_ONCE(emulation_type & EMULTYPE_NO_DECODE); /* @@ -9326,34 +9357,14 @@ static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, EMULTYPE_TRAP_UD | EMULTYPE_VMWARE_GP | EMULTYPE_PF)) return false; - if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && - (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { - struct kvm_run *kvm_run = vcpu->run; - unsigned long eip = kvm_get_linear_rip(vcpu); - u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, - vcpu->arch.guest_debug_dr7, - vcpu->arch.eff_db); - - if (dr6 != 0) { - kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW; - kvm_run->debug.arch.pc = eip; - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = 0; - return true; - } - } - - if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && + if (unlikely(dr7 & DR7_BP_EN_MASK) && !kvm_is_code_breakpoint_inhibited(vcpu)) { unsigned long eip = kvm_get_linear_rip(vcpu); - u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, - vcpu->arch.dr7, - vcpu->arch.db); + u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0, dr7, + vcpu->arch.eff_db); - if (dr6 != 0) { - kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); - *r = 1; + if (dr6) { + *r = kvm_inject_emulated_db(vcpu, dr6); return true; } } @@ -9499,8 +9510,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, */ WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR || exception_type(ctxt->exception.vector) == EXCPT_TRAP); - inject_emulated_exception(vcpu); - return 1; + return inject_emulated_exception(vcpu); } return handle_emulation_failure(vcpu, emulation_type); } @@ -9595,8 +9605,7 @@ restart: if (ctxt->have_exception) { WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write); vcpu->mmio_needed = false; - r = 1; - inject_emulated_exception(vcpu); + r = inject_emulated_exception(vcpu); } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) { /* FIXME: return into emulator if single-stepping. */ @@ -9639,7 +9648,7 @@ writeback: kvm_pmu_branch_retired(vcpu); kvm_rip_write(vcpu, ctxt->eip); if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) - r = kvm_vcpu_do_singlestep(vcpu); + r = kvm_inject_emulated_db(vcpu, DR6_BS); kvm_x86_call(update_emulated_instruction)(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } @@ -11593,9 +11602,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.apic_attention) kvm_lapic_sync_from_vapic(vcpu); - if (unlikely(exit_fastpath == EXIT_FASTPATH_EXIT_USERSPACE)) - return 0; - r = kvm_x86_call(handle_exit)(vcpu, exit_fastpath); return r; @@ -13368,7 +13374,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz; kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT; kvm->arch.guest_can_read_msr_platform_info = true; - kvm->arch.enable_pmu = enable_pmu; + kvm->arch.enable_pmu = enable_pmu && !kvm->arch.has_protected_pmu; #if IS_ENABLED(CONFIG_HYPERV) spin_lock_init(&kvm->arch.hv_root_tdp_lock); @@ -14013,7 +14019,7 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) if (!kvm_pv_async_pf_enabled(vcpu)) return false; - if (!vcpu->arch.apf.send_always && + if (!(vcpu->arch.apf.msr_en_val & KVM_ASYNC_PF_SEND_ALWAYS) && (vcpu->arch.guest_state_protected || !kvm_x86_call(get_cpl)(vcpu))) return false; @@ -14022,7 +14028,7 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) * L1 needs to opt into the special #PF vmexits that are * used to deliver async page faults. */ - return vcpu->arch.apf.delivery_as_pf_vmexit; + return vcpu->arch.apf.msr_en_val & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; } else { /* * Play it safe in case the guest temporarily disables paging. @@ -14066,7 +14072,7 @@ bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, fault.nested_page_fault = false; fault.address = work->arch.token; fault.async_page_fault = true; - kvm_inject_page_fault(vcpu, &fault); + kvm_inject_page_fault(vcpu, &fault, false); return true; } else { /* @@ -14237,7 +14243,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c fault.address = gva; fault.async_page_fault = false; } - vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault); + vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault, true); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_fixup_and_inject_pf_error); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 38a905fa86de2..aa7d5b757fb54 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -754,6 +754,12 @@ static inline void kvm_prepare_emulated_mmio_exit(struct kvm_vcpu *vcpu, frag->data, vcpu->mmio_is_write); } +static inline bool kvm_is_valid_map_gpa_range_ret(u64 hypercall_ret) +{ + return !hypercall_ret || hypercall_ret == EINVAL || + hypercall_ret == EAGAIN; +} + static inline bool user_exit_on_hypercall(struct kvm *kvm, unsigned long hc_nr) { return kvm->arch.hypercall_exit_enabled & BIT(hc_nr); |
