diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-25 10:21:13 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-25 10:21:13 -0700 |
| commit | c75597caada080effbfbc0a7fb10dc2a3bb543ad (patch) | |
| tree | b8c2f933fbb2175cdebeaf24786fc338f42f7533 /arch | |
| parent | a142da0b2d32b68a6d1b183343bbe43de8c222f9 (diff) | |
| parent | 098e32cba334da0f3fa8cfd4e022ae7c72341400 (diff) | |
| download | ath-c75597caada080effbfbc0a7fb10dc2a3bb543ad.tar.gz | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"s390:
- Fix S390_USER_OPEREXEC so it can now be enabled regardless of other
unrelated capabilities
- Fix handling of the _PAGE_UNUSED pte bit that could lead to guest
memory corruption in some scenarios
- A bunch of misc gmap fixes (locking, behaviour under memory
pressure)
- Fix CMMA dirty tracking
x86:
- Tidy up some WARN_ON() and BUG_ON(), replacing them with
WARN_ON_ONCE() or KVM_BUG_ON(). All of these have obviously never
triggered, or somebody would have been annoyed earlier, but still...
- Fix missing interrupt due to stale CR8 intercept
- Add a statistic that can come in handy to debug leaks as well as
the vulnerability to a class of recently-discovered issues
- Do not ask arch/x86/kernel to export
default_cpu_present_to_apicid() just for KVM"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits)
x86/apic: KVM: Use cpu_physical_id() to get APIC ID of running vCPU for AVIC
KVM: x86/mmu: Expose number of shadow MMU shadow pages as a stat
KVM: x86: Unconditionally recompute CR8 intercept on PPR update
KVM: VMX: Grab vmcs12 on CR8 interception update iff vCPU is in guest mode
KVM: x86: WARN (once) if RTC pending EOI tracking goes off the rails
KVM: x86: WARN and fail kvm_set_irq() if a PIC or I/O APIC vector is invalid
KVM: x86: Bug the VM, not the kernel, if the ISR count {under,over}flows
KVM: x86/mmu: Bug the VM, not the host kernel, if KVM write-protects upper SPTEs
KVM: x86: Replace BUG_ON() with WARN_ON_ONCE() on "bad" nested GPA translation
KVM: Replace guest-triggerable BUG_ON() in ioeventfd datamatch with get_unaligned()
KVM: s390: Return failure in case of failure in kvm_s390_set_cmma_bits()
KVM: s390: selftests: Fix cmma selftest
KVM: s390: Fix cmma dirty tracking
KVM: s390: Fix locking in kvm_s390_set_mem_control()
KVM: s390: Fix handle_{sske,pfmf} under memory pressure
KVM: s390: Fix code typo in gmap_protect_asce_top_level()
KVM: s390: Do not set special large pages dirty
KVM: s390: Fix dat_peek_cmma() overflow
s390/mm: Fix handling of _PAGE_UNUSED pte bit
KVM: s390: Fix typo in UCONTROL documentation
...
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/s390/include/asm/pgtable.h | 4 | ||||
| -rw-r--r-- | arch/s390/kvm/dat.c | 5 | ||||
| -rw-r--r-- | arch/s390/kvm/gmap.c | 35 | ||||
| -rw-r--r-- | arch/s390/kvm/gmap.h | 12 | ||||
| -rw-r--r-- | arch/s390/kvm/kvm-s390.c | 71 | ||||
| -rw-r--r-- | arch/s390/kvm/priv.c | 10 | ||||
| -rw-r--r-- | arch/s390/mm/gmap_helpers.c | 3 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_common.c | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/i8259.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/ioapic.c | 7 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 14 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/mmutrace.h | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 8 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/avic.c | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 6 |
20 files changed, 137 insertions, 77 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f9a8a92fa160d..859ce7c7d4544 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -980,6 +980,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) static inline void set_pte(pte_t *ptep, pte_t pte) { + if (pte_present(pte)) + pte = clear_pte_bit(pte, __pgprot(_PAGE_UNUSED)); WRITE_ONCE(*ptep, pte); } @@ -1359,8 +1361,6 @@ pgprot_t pgprot_writecombine(pgprot_t prot); static inline void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry, unsigned int nr) { - if (pte_present(entry)) - entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED)); page_table_check_ptes_set(mm, addr, ptep, entry, nr); for (;;) { set_pte(ptep, entry); diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index a4fe664f65ee1..5f1960ec982d0 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -1209,7 +1209,7 @@ int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values) int rc; rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state); - *count = state.end - start; + *count = state.end >= start ? state.end - start : 0; /* Return success if at least one value was saved, otherwise an error. */ return (rc == -EFAULT && *count > 0) ? 0 : rc; } @@ -1253,6 +1253,9 @@ int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, }; _dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state); + /* If no dirty pages were found, wrap around and continue searching */ + if (*start && state.start == -1) + _dat_walk_gfn_range(0, *start, asce, &ops, DAT_WALK_IGN_HOLES, &state); if (state.start == -1) { *count = 0; diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 77829f787dc78..298fbaecec28d 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -332,7 +332,7 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st new.h.i = 1; new.s.fc1.y = 0; new.s.fc1.prefix_notif = 0; - if (new.s.fc1.d || !new.h.p) + if ((new.s.fc1.d || !new.h.p) && !new.s.fc1.s) folio_set_dirty(phys_to_folio(crste_origin_large(crste))); new.s.fc1.d = 0; new.h.p = 1; @@ -1098,23 +1098,46 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf return 0; } +static long __set_cmma_clean_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) +{ + union pgste pgste; + + pgste = pgste_get_lock(ptep); + pgste.cmma_d = 0; + pgste_set_unlock(ptep, pgste); + + if (need_resched()) + return next; + return 0; +} + static long __set_cmma_dirty_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) { - __atomic64_or(PGSTE_CMMA_D_BIT, &pgste_of(ptep)->val); + union pgste pgste; + + pgste = pgste_get_lock(ptep); + if (!pgste.cmma_d) + atomic64_inc(walk->priv); + pgste.cmma_d = 1; + pgste_set_unlock(ptep, pgste); + if (need_resched()) return next; return 0; } -void gmap_set_cmma_all_dirty(struct gmap *gmap) +void _gmap_set_cmma_all(struct gmap *gmap, bool dirty) { - const struct dat_walk_ops ops = { .pte_entry = __set_cmma_dirty_pte, }; + const struct dat_walk_ops ops = { + .pte_entry = dirty ? __set_cmma_dirty_pte : __set_cmma_clean_pte, + }; gfn_t gfn = 0; do { scoped_guard(read_lock, &gmap->kvm->mmu_lock) gfn = _dat_walk_gfn_range(gfn, asce_end(gmap->asce), gmap->asce, &ops, - DAT_WALK_IGN_HOLES, NULL); + DAT_WALK_IGN_HOLES, + &gmap->kvm->arch.cmma_dirty_pages); cond_resched(); } while (gfn); } @@ -1287,7 +1310,7 @@ static int gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gma /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */ smp_rmb(); - rc = kvm_s390_get_guest_pages(sg->kvm, context.f, asce.rsto, asce.dt + 1, false); + rc = kvm_s390_get_guest_pages(sg->kvm, context.f, asce.rsto, asce.tl + 1, false); if (rc > 0) rc = -EFAULT; if (!rc) diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 1c040472f56d7..39938d363ec9b 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -104,7 +104,7 @@ int gmap_insert_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn gfn_t r_gfn, int level); int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, kvm_pfn_t pfn, int level, bool wr); -void gmap_set_cmma_all_dirty(struct gmap *gmap); +void _gmap_set_cmma_all(struct gmap *gmap, bool dirty); void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn); struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, union asce asce, int edat_level); @@ -198,6 +198,16 @@ static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union return !newpte.h.p || !newpte.s.pr; } +static inline void gmap_set_cmma_all_dirty(struct gmap *gmap) +{ + _gmap_set_cmma_all(gmap, true); +} + +static inline void gmap_set_cmma_all_clean(struct gmap *gmap) +{ + _gmap_set_cmma_all(gmap, false); +} + static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte, union pgste pgste, gfn_t gfn, bool needs_lock) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc9c7451fe979..23c817595e28d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1022,9 +1022,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!kvm->arch.use_cmma) break; + guard(mutex)(&kvm->lock); VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); do { - start_gfn = dat_reset_cmma(kvm->arch.gmap->asce, start_gfn); + scoped_guard(read_lock, &kvm->mmu_lock) + start_gfn = dat_reset_cmma(kvm->arch.gmap->asce, start_gfn); cond_resched(); } while (start_gfn); ret = 0; @@ -1217,13 +1219,13 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) /* * Must be called with kvm->srcu held to avoid races on memslots, and with - * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. + * kvm->slots_lock to avoid races with ourselves, kvm_s390_vm_stop_migration(), + * and kvm_s390_get_cmma_bits(). */ static int kvm_s390_vm_start_migration(struct kvm *kvm) { struct kvm_memory_slot *ms; struct kvm_memslots *slots; - unsigned long ram_pages = 0; int bkt; /* migration mode already enabled */ @@ -1240,28 +1242,54 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) kvm_for_each_memslot(ms, bkt, slots) { if (!ms->dirty_bitmap) return -EINVAL; - ram_pages += ms->npages; } - /* mark all the pages as dirty */ - gmap_set_cmma_all_dirty(kvm->arch.gmap); - atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); - kvm->arch.migration_mode = 1; + /* + * Set the flag and let KVM handle ESSA manually, potentially setting + * the cmma_d bit in some PGSTEs and increasing cmma_dirty_pages. + * At this point cmma_dirty_pages is still 0, and all existing PGSTEs + * have their cmma_d bit set to 0. + * Any newly allocated page table has its entries marked as cmma-clean, + * which is fine because the CMMA values are not dirty. + */ + WRITE_ONCE(kvm->arch.migration_mode, 1); kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); + /* + * Mark all PGSTEs as cmma-dirty, increasing cmma_dirty_pages as needed, + * but without double-counting pages that have become dirty on their own + * in the meantime. + * At this point some pages might have become dirty on their own already + * and cmma_dirty_pages might therefore be non-zero. + */ + gmap_set_cmma_all_dirty(kvm->arch.gmap); return 0; } /* - * Must be called with kvm->slots_lock to avoid races with ourselves and - * kvm_s390_vm_start_migration. + * Must be called with kvm->slots_lock to avoid races with ourselves, + * kvm_s390_vm_start_migration() and kvm_s390_get_cmma_bits(). */ static int kvm_s390_vm_stop_migration(struct kvm *kvm) { /* migration mode already disabled */ if (!kvm->arch.migration_mode) return 0; - kvm->arch.migration_mode = 0; + /* + * Unset the flag and propagate to all vCPUs. From now on the cmma_d + * bit will not be touched on any PGSTE. + * At this point cmma_dirty_pages is possibly non-zero, and thus some + * PGSTEs might have cmma_d set. + */ + WRITE_ONCE(kvm->arch.migration_mode, 0); if (kvm->arch.use_cmma) kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); + /* Clear cmma_d on all existing PGSTEs and set cmma_dirty_pages to 0. */ + gmap_set_cmma_all_clean(kvm->arch.gmap); + atomic64_set(&kvm->arch.cmma_dirty_pages, 0); + /* + * At this point the system has the expected state: migration_mode is 0, + * cmma_dirty_pages is 0, and all existing PGSTEs have their cmma_d bit + * set to 0. + */ return 0; } @@ -2317,8 +2345,8 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, static int kvm_s390_set_cmma_bits(struct kvm *kvm, const struct kvm_s390_cmma_log *args) { - struct kvm_s390_mmu_cache *mc; - u8 *bits = NULL; + struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL; + u8 *bits __free(kvfree) = NULL; int r = 0; if (!kvm->arch.use_cmma) @@ -2338,18 +2366,16 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, return -ENOMEM; bits = vmalloc(array_size(sizeof(*bits), args->count)); if (!bits) - goto out; + return -ENOMEM; r = copy_from_user(bits, (void __user *)args->values, args->count); - if (r) { - r = -EFAULT; - goto out; - } + if (r) + return -EFAULT; do { r = kvm_s390_mmu_cache_topup(mc); if (r) - break; + return r; scoped_guard(read_lock, &kvm->mmu_lock) { r = dat_set_cmma_bits(mc, kvm->arch.gmap->asce, args->start_gfn, args->count, args->mask, bits); @@ -2357,9 +2383,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, } while (r == -ENOMEM); set_bit(GMAP_FLAG_USES_CMM, &kvm->arch.gmap->flags); -out: - kvm_s390_free_mmu_cache(mc); - vfree(bits); + return r; } @@ -3584,7 +3608,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.gmap = vcpu->kvm->arch.gmap; sca_add_vcpu(vcpu); } - if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0) + if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0 || + vcpu->kvm->arch.user_operexec) vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 447ec7ed423dc..ad0ddc433a73c 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -366,7 +366,9 @@ static int handle_sske(struct kvm_vcpu *vcpu) if (rc > 1) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc == -ENOMEM) { - kvm_s390_mmu_cache_topup(vcpu->arch.mc); + rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc); + if (rc) + return rc; continue; } if (rc < 0) @@ -1122,7 +1124,9 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc > 1) return kvm_s390_inject_program_int(vcpu, rc); if (rc == -ENOMEM) { - kvm_s390_mmu_cache_topup(vcpu->arch.mc); + rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc); + if (rc) + return rc; continue; } if (rc < 0) @@ -1232,7 +1236,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) : ESSA_SET_STABLE_IF_RESIDENT)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (!vcpu->kvm->arch.migration_mode) { + if (!READ_ONCE(vcpu->kvm->arch.migration_mode)) { /* * CMMA is enabled in the KVM settings, but is disabled in * the SIE block and in the mm_context, and we are not doing diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index ee3f37af8aee4..4bf7c9012feb6 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -181,7 +181,8 @@ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr) if (IS_ERR_OR_NULL(ptep)) return; - __atomic64_or(_PAGE_UNUSED, (long *)ptep); + if (pte_present(*ptep)) + __atomic64_or(_PAGE_UNUSED, (long *)ptep); pte_unmap_unlock(ptep, ptl); } EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index eee473717c0e5..5f6c1ce9673b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1434,7 +1434,6 @@ enum kvm_mmu_type { }; struct kvm_arch { - unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; @@ -1700,6 +1699,7 @@ struct kvm_vm_stat { u64 mmu_recycled; u64 mmu_cache_miss; u64 mmu_unsync; + u64 mmu_shadow_pages; union { struct { atomic64_t pages_4k; @@ -2525,16 +2525,6 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) kvm_x86_call(vcpu_unblocking)(vcpu); } -static inline int kvm_cpu_get_apicid(int mps_cpu) -{ -#ifdef CONFIG_X86_LOCAL_APIC - return default_cpu_present_to_apicid(mps_cpu); -#else - WARN_ON_ONCE(1); - return BAD_APICID; -#endif -} - int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 2ed3b5c88c7f0..45e6b816353ed 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -26,7 +26,6 @@ u32 default_cpu_present_to_apicid(int mps_cpu) else return BAD_APICID; } -EXPORT_SYMBOL_FOR_KVM(default_cpu_present_to_apicid); /* * Set up the logical destination ID when the APIC operates in logical diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 59e28c45d7dc1..6a942ac622d5f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -194,7 +194,8 @@ int kvm_pic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq = e->irqchip.pin; int ret, irq_level; - BUG_ON(irq < 0 || irq >= PIC_NUM_PINS); + if (WARN_ON_ONCE(irq < 0 || irq >= PIC_NUM_PINS)) + return -1; pic_lock(s); irq_level = __kvm_irq_line_state(&s->irq_states[irq], diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index f3f4a483ca150..757667fb2bfa0 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -84,7 +84,7 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) { - if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) + if (WARN_ON_ONCE(ioapic->rtc_status.pending_eoi < 0)) kvm_rtc_eoi_tracking_restore_all(ioapic); } @@ -484,7 +484,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) * ensures that it is only called if it is >= zero, namely * if rtc_irq_check_coalesced returns false). */ - BUG_ON(ioapic->rtc_status.pending_eoi != 0); + WARN_ON_ONCE(ioapic->rtc_status.pending_eoi); ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, &ioapic->rtc_status); ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); @@ -504,7 +504,8 @@ int kvm_ioapic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq = e->irqchip.pin; int ret, irq_level; - BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); + if (WARN_ON_ONCE(irq < 0 || irq >= IOAPIC_NUM_PINS)) + return -1; spin_lock(&ioapic->lock); irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9d2df8623f6d1..6f30bbdddb5aa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -767,7 +767,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec); else { ++apic->isr_count; - BUG_ON(apic->isr_count > MAX_APIC_VECTOR); + KVM_BUG_ON(apic->isr_count > MAX_APIC_VECTOR, apic->vcpu->kvm); /* * ISR (in service register) bit is set when injecting an interrupt. * The highest vector is injected. Thus the latest bit set matches @@ -808,7 +808,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic)); else { --apic->isr_count; - BUG_ON(apic->isr_count < 0); + KVM_BUG_ON(apic->isr_count < 0, apic->vcpu->kvm); apic->highest_isr_cache = -1; } } @@ -980,6 +980,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) if (__apic_update_ppr(apic, &ppr) && apic_has_interrupt_for_ppr(apic, ppr) != -1) kvm_make_request(KVM_REQ_EVENT, apic->vcpu); + else + kvm_lapic_update_cr8_intercept(apic->vcpu); } void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fbb491e35b542..234d0a95abf53 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1802,13 +1802,13 @@ static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp) static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - kvm->arch.n_used_mmu_pages++; + kvm->stat.mmu_shadow_pages++; kvm_account_pgtable_pages((void *)sp->spt, +1); } static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - kvm->arch.n_used_mmu_pages--; + kvm->stat.mmu_shadow_pages--; kvm_account_pgtable_pages((void *)sp->spt, -1); } @@ -2836,9 +2836,9 @@ restart: static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) { - if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) + if (kvm->arch.n_max_mmu_pages > kvm->stat.mmu_shadow_pages) return kvm->arch.n_max_mmu_pages - - kvm->arch.n_used_mmu_pages; + kvm->stat.mmu_shadow_pages; return 0; } @@ -2874,11 +2874,11 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) { write_lock(&kvm->mmu_lock); - if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { - kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages - + if (kvm->stat.mmu_shadow_pages > goal_nr_mmu_pages) { + kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->stat.mmu_shadow_pages - goal_nr_mmu_pages); - goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; + goal_nr_mmu_pages = kvm->stat.mmu_shadow_pages; } kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index fa01719baf8d4..8354d9f397778 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -303,7 +303,7 @@ TRACE_EVENT( TP_fast_assign( __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; - __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; + __entry->mmu_used_pages = kvm->stat.mmu_shadow_pages; ), TP_printk("kvm-mmu-valid-gen %u used_pages %x", diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 5b3041138301b..c1cbae65d239f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1410,9 +1410,10 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, u64 new_spte; bool spte_set = false; - rcu_read_lock(); + if (KVM_BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL, kvm)) + return false; - BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); + rcu_read_lock(); for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end) { retry: @@ -1844,7 +1845,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, u64 new_spte; bool spte_set = false; - BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL); + if (KVM_BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL, kvm)) + return false; rcu_read_lock(); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 0726f88e679aa..58e493a80cb0e 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -460,8 +460,8 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu) int cpu = READ_ONCE(vcpu->cpu); if (cpu != get_cpu()) { - wrmsrq(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); - trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu)); + wrmsrq(MSR_AMD64_SVM_AVIC_DOORBELL, cpu_physical_id(cpu)); + trace_kvm_avic_doorbell(vcpu->vcpu_id, cpu_physical_id(cpu)); } put_cpu(); } @@ -1013,7 +1013,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, enum avic_vcpu_action action) { struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); - int h_physical_id = kvm_cpu_get_apicid(cpu); + int h_physical_id = cpu_physical_id(cpu); struct vcpu_svm *svm = to_svm(vcpu); unsigned long flags; u64 entry; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9aedb88c832d7..3e6c671a8dc26 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -2152,7 +2152,8 @@ static gpa_t svm_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, struct vcpu_svm *svm = to_svm(vcpu); struct kvm_mmu *mmu = vcpu->arch.mmu; - BUG_ON(!mmu_is_nested(vcpu)); + if (WARN_ON_ONCE(!mmu_is_nested(vcpu))) + return gpa; /* Non-GMET walks are always user-walks */ if (!(svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_GMET)) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 3a293640d58c2..6957bb6f5cf7e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7470,7 +7470,8 @@ static gpa_t vmx_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, { struct kvm_mmu *mmu = vcpu->arch.mmu; - BUG_ON(!mmu_is_nested(vcpu)); + if (WARN_ON_ONCE(!mmu_is_nested(vcpu))) + return gpa; /* * MBEC differentiates based on the effective U/S bit of diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f7c2983c533fd..2325be57d3d75 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6873,11 +6873,10 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); int tpr_threshold; if (is_guest_mode(vcpu) && - nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) + nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_TPR_SHADOW)) return; guard(vmx_vmcs01)(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d9d51803b7b20..afcac1042947a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -244,6 +244,7 @@ const struct kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, mmu_recycled), STATS_DESC_COUNTER(VM, mmu_cache_miss), STATS_DESC_ICOUNTER(VM, mmu_unsync), + STATS_DESC_ICOUNTER(VM, mmu_shadow_pages), STATS_DESC_ICOUNTER(VM, pages_4k), STATS_DESC_ICOUNTER(VM, pages_2m), STATS_DESC_ICOUNTER(VM, pages_1g), @@ -5317,7 +5318,6 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, r = kvm_apic_set_state(vcpu, s); if (r) return r; - kvm_lapic_update_cr8_intercept(vcpu); return 0; } @@ -12418,8 +12418,6 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs, kvm_register_mark_dirty(vcpu, VCPU_REG_CR3); kvm_x86_call(post_set_cr3)(vcpu, sregs->cr3); - kvm_set_cr8(vcpu, sregs->cr8); - *mmu_reset_needed |= vcpu->arch.efer != sregs->efer; kvm_x86_call(set_efer)(vcpu, sregs->efer); @@ -12448,7 +12446,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs, kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); - kvm_lapic_update_cr8_intercept(vcpu); + kvm_set_cr8(vcpu, sregs->cr8); /* Older userspace won't unhalt the vcpu on reset. */ if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && |
