aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-25 10:21:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-25 10:21:13 -0700
commitc75597caada080effbfbc0a7fb10dc2a3bb543ad (patch)
treeb8c2f933fbb2175cdebeaf24786fc338f42f7533 /arch
parenta142da0b2d32b68a6d1b183343bbe43de8c222f9 (diff)
parent098e32cba334da0f3fa8cfd4e022ae7c72341400 (diff)
downloadath-c75597caada080effbfbc0a7fb10dc2a3bb543ad.tar.gz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "s390: - Fix S390_USER_OPEREXEC so it can now be enabled regardless of other unrelated capabilities - Fix handling of the _PAGE_UNUSED pte bit that could lead to guest memory corruption in some scenarios - A bunch of misc gmap fixes (locking, behaviour under memory pressure) - Fix CMMA dirty tracking x86: - Tidy up some WARN_ON() and BUG_ON(), replacing them with WARN_ON_ONCE() or KVM_BUG_ON(). All of these have obviously never triggered, or somebody would have been annoyed earlier, but still... - Fix missing interrupt due to stale CR8 intercept - Add a statistic that can come in handy to debug leaks as well as the vulnerability to a class of recently-discovered issues - Do not ask arch/x86/kernel to export default_cpu_present_to_apicid() just for KVM" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits) x86/apic: KVM: Use cpu_physical_id() to get APIC ID of running vCPU for AVIC KVM: x86/mmu: Expose number of shadow MMU shadow pages as a stat KVM: x86: Unconditionally recompute CR8 intercept on PPR update KVM: VMX: Grab vmcs12 on CR8 interception update iff vCPU is in guest mode KVM: x86: WARN (once) if RTC pending EOI tracking goes off the rails KVM: x86: WARN and fail kvm_set_irq() if a PIC or I/O APIC vector is invalid KVM: x86: Bug the VM, not the kernel, if the ISR count {under,over}flows KVM: x86/mmu: Bug the VM, not the host kernel, if KVM write-protects upper SPTEs KVM: x86: Replace BUG_ON() with WARN_ON_ONCE() on "bad" nested GPA translation KVM: Replace guest-triggerable BUG_ON() in ioeventfd datamatch with get_unaligned() KVM: s390: Return failure in case of failure in kvm_s390_set_cmma_bits() KVM: s390: selftests: Fix cmma selftest KVM: s390: Fix cmma dirty tracking KVM: s390: Fix locking in kvm_s390_set_mem_control() KVM: s390: Fix handle_{sske,pfmf} under memory pressure KVM: s390: Fix code typo in gmap_protect_asce_top_level() KVM: s390: Do not set special large pages dirty KVM: s390: Fix dat_peek_cmma() overflow s390/mm: Fix handling of _PAGE_UNUSED pte bit KVM: s390: Fix typo in UCONTROL documentation ...
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/include/asm/pgtable.h4
-rw-r--r--arch/s390/kvm/dat.c5
-rw-r--r--arch/s390/kvm/gmap.c35
-rw-r--r--arch/s390/kvm/gmap.h12
-rw-r--r--arch/s390/kvm/kvm-s390.c71
-rw-r--r--arch/s390/kvm/priv.c10
-rw-r--r--arch/s390/mm/gmap_helpers.c3
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/kernel/apic/apic_common.c1
-rw-r--r--arch/x86/kvm/i8259.c3
-rw-r--r--arch/x86/kvm/ioapic.c7
-rw-r--r--arch/x86/kvm/lapic.c6
-rw-r--r--arch/x86/kvm/mmu/mmu.c14
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c8
-rw-r--r--arch/x86/kvm/svm/avic.c6
-rw-r--r--arch/x86/kvm/svm/nested.c3
-rw-r--r--arch/x86/kvm/vmx/nested.c3
-rw-r--r--arch/x86/kvm/vmx/vmx.c3
-rw-r--r--arch/x86/kvm/x86.c6
20 files changed, 137 insertions, 77 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f9a8a92fa160d..859ce7c7d4544 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -980,6 +980,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline void set_pte(pte_t *ptep, pte_t pte)
{
+ if (pte_present(pte))
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_UNUSED));
WRITE_ONCE(*ptep, pte);
}
@@ -1359,8 +1361,6 @@ pgprot_t pgprot_writecombine(pgprot_t prot);
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry, unsigned int nr)
{
- if (pte_present(entry))
- entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
page_table_check_ptes_set(mm, addr, ptep, entry, nr);
for (;;) {
set_pte(ptep, entry);
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index a4fe664f65ee1..5f1960ec982d0 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -1209,7 +1209,7 @@ int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values)
int rc;
rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state);
- *count = state.end - start;
+ *count = state.end >= start ? state.end - start : 0;
/* Return success if at least one value was saved, otherwise an error. */
return (rc == -EFAULT && *count > 0) ? 0 : rc;
}
@@ -1253,6 +1253,9 @@ int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values,
};
_dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state);
+ /* If no dirty pages were found, wrap around and continue searching */
+ if (*start && state.start == -1)
+ _dat_walk_gfn_range(0, *start, asce, &ops, DAT_WALK_IGN_HOLES, &state);
if (state.start == -1) {
*count = 0;
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 77829f787dc78..298fbaecec28d 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -332,7 +332,7 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
new.h.i = 1;
new.s.fc1.y = 0;
new.s.fc1.prefix_notif = 0;
- if (new.s.fc1.d || !new.h.p)
+ if ((new.s.fc1.d || !new.h.p) && !new.s.fc1.s)
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
new.s.fc1.d = 0;
new.h.p = 1;
@@ -1098,23 +1098,46 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
return 0;
}
+static long __set_cmma_clean_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
+{
+ union pgste pgste;
+
+ pgste = pgste_get_lock(ptep);
+ pgste.cmma_d = 0;
+ pgste_set_unlock(ptep, pgste);
+
+ if (need_resched())
+ return next;
+ return 0;
+}
+
static long __set_cmma_dirty_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
{
- __atomic64_or(PGSTE_CMMA_D_BIT, &pgste_of(ptep)->val);
+ union pgste pgste;
+
+ pgste = pgste_get_lock(ptep);
+ if (!pgste.cmma_d)
+ atomic64_inc(walk->priv);
+ pgste.cmma_d = 1;
+ pgste_set_unlock(ptep, pgste);
+
if (need_resched())
return next;
return 0;
}
-void gmap_set_cmma_all_dirty(struct gmap *gmap)
+void _gmap_set_cmma_all(struct gmap *gmap, bool dirty)
{
- const struct dat_walk_ops ops = { .pte_entry = __set_cmma_dirty_pte, };
+ const struct dat_walk_ops ops = {
+ .pte_entry = dirty ? __set_cmma_dirty_pte : __set_cmma_clean_pte,
+ };
gfn_t gfn = 0;
do {
scoped_guard(read_lock, &gmap->kvm->mmu_lock)
gfn = _dat_walk_gfn_range(gfn, asce_end(gmap->asce), gmap->asce, &ops,
- DAT_WALK_IGN_HOLES, NULL);
+ DAT_WALK_IGN_HOLES,
+ &gmap->kvm->arch.cmma_dirty_pages);
cond_resched();
} while (gfn);
}
@@ -1287,7 +1310,7 @@ static int gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gma
/* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */
smp_rmb();
- rc = kvm_s390_get_guest_pages(sg->kvm, context.f, asce.rsto, asce.dt + 1, false);
+ rc = kvm_s390_get_guest_pages(sg->kvm, context.f, asce.rsto, asce.tl + 1, false);
if (rc > 0)
rc = -EFAULT;
if (!rc)
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 1c040472f56d7..39938d363ec9b 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -104,7 +104,7 @@ int gmap_insert_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn
gfn_t r_gfn, int level);
int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn,
kvm_pfn_t pfn, int level, bool wr);
-void gmap_set_cmma_all_dirty(struct gmap *gmap);
+void _gmap_set_cmma_all(struct gmap *gmap, bool dirty);
void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn);
struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
union asce asce, int edat_level);
@@ -198,6 +198,16 @@ static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union
return !newpte.h.p || !newpte.s.pr;
}
+static inline void gmap_set_cmma_all_dirty(struct gmap *gmap)
+{
+ _gmap_set_cmma_all(gmap, true);
+}
+
+static inline void gmap_set_cmma_all_clean(struct gmap *gmap)
+{
+ _gmap_set_cmma_all(gmap, false);
+}
+
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
union pgste pgste, gfn_t gfn, bool needs_lock)
{
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index dc9c7451fe979..23c817595e28d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1022,9 +1022,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (!kvm->arch.use_cmma)
break;
+ guard(mutex)(&kvm->lock);
VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
do {
- start_gfn = dat_reset_cmma(kvm->arch.gmap->asce, start_gfn);
+ scoped_guard(read_lock, &kvm->mmu_lock)
+ start_gfn = dat_reset_cmma(kvm->arch.gmap->asce, start_gfn);
cond_resched();
} while (start_gfn);
ret = 0;
@@ -1217,13 +1219,13 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
/*
* Must be called with kvm->srcu held to avoid races on memslots, and with
- * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+ * kvm->slots_lock to avoid races with ourselves, kvm_s390_vm_stop_migration(),
+ * and kvm_s390_get_cmma_bits().
*/
static int kvm_s390_vm_start_migration(struct kvm *kvm)
{
struct kvm_memory_slot *ms;
struct kvm_memslots *slots;
- unsigned long ram_pages = 0;
int bkt;
/* migration mode already enabled */
@@ -1240,28 +1242,54 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
kvm_for_each_memslot(ms, bkt, slots) {
if (!ms->dirty_bitmap)
return -EINVAL;
- ram_pages += ms->npages;
}
- /* mark all the pages as dirty */
- gmap_set_cmma_all_dirty(kvm->arch.gmap);
- atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
- kvm->arch.migration_mode = 1;
+ /*
+ * Set the flag and let KVM handle ESSA manually, potentially setting
+ * the cmma_d bit in some PGSTEs and increasing cmma_dirty_pages.
+ * At this point cmma_dirty_pages is still 0, and all existing PGSTEs
+ * have their cmma_d bit set to 0.
+ * Any newly allocated page table has its entries marked as cmma-clean,
+ * which is fine because the CMMA values are not dirty.
+ */
+ WRITE_ONCE(kvm->arch.migration_mode, 1);
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ /*
+ * Mark all PGSTEs as cmma-dirty, increasing cmma_dirty_pages as needed,
+ * but without double-counting pages that have become dirty on their own
+ * in the meantime.
+ * At this point some pages might have become dirty on their own already
+ * and cmma_dirty_pages might therefore be non-zero.
+ */
+ gmap_set_cmma_all_dirty(kvm->arch.gmap);
return 0;
}
/*
- * Must be called with kvm->slots_lock to avoid races with ourselves and
- * kvm_s390_vm_start_migration.
+ * Must be called with kvm->slots_lock to avoid races with ourselves,
+ * kvm_s390_vm_start_migration() and kvm_s390_get_cmma_bits().
*/
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
{
/* migration mode already disabled */
if (!kvm->arch.migration_mode)
return 0;
- kvm->arch.migration_mode = 0;
+ /*
+ * Unset the flag and propagate to all vCPUs. From now on the cmma_d
+ * bit will not be touched on any PGSTE.
+ * At this point cmma_dirty_pages is possibly non-zero, and thus some
+ * PGSTEs might have cmma_d set.
+ */
+ WRITE_ONCE(kvm->arch.migration_mode, 0);
if (kvm->arch.use_cmma)
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+ /* Clear cmma_d on all existing PGSTEs and set cmma_dirty_pages to 0. */
+ gmap_set_cmma_all_clean(kvm->arch.gmap);
+ atomic64_set(&kvm->arch.cmma_dirty_pages, 0);
+ /*
+ * At this point the system has the expected state: migration_mode is 0,
+ * cmma_dirty_pages is 0, and all existing PGSTEs have their cmma_d bit
+ * set to 0.
+ */
return 0;
}
@@ -2317,8 +2345,8 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
const struct kvm_s390_cmma_log *args)
{
- struct kvm_s390_mmu_cache *mc;
- u8 *bits = NULL;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
+ u8 *bits __free(kvfree) = NULL;
int r = 0;
if (!kvm->arch.use_cmma)
@@ -2338,18 +2366,16 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
return -ENOMEM;
bits = vmalloc(array_size(sizeof(*bits), args->count));
if (!bits)
- goto out;
+ return -ENOMEM;
r = copy_from_user(bits, (void __user *)args->values, args->count);
- if (r) {
- r = -EFAULT;
- goto out;
- }
+ if (r)
+ return -EFAULT;
do {
r = kvm_s390_mmu_cache_topup(mc);
if (r)
- break;
+ return r;
scoped_guard(read_lock, &kvm->mmu_lock) {
r = dat_set_cmma_bits(mc, kvm->arch.gmap->asce, args->start_gfn,
args->count, args->mask, bits);
@@ -2357,9 +2383,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
} while (r == -ENOMEM);
set_bit(GMAP_FLAG_USES_CMM, &kvm->arch.gmap->flags);
-out:
- kvm_s390_free_mmu_cache(mc);
- vfree(bits);
+
return r;
}
@@ -3584,7 +3608,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
sca_add_vcpu(vcpu);
}
- if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
+ if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0 ||
+ vcpu->kvm->arch.user_operexec)
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
}
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 447ec7ed423dc..ad0ddc433a73c 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -366,7 +366,9 @@ static int handle_sske(struct kvm_vcpu *vcpu)
if (rc > 1)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == -ENOMEM) {
- kvm_s390_mmu_cache_topup(vcpu->arch.mc);
+ rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc);
+ if (rc)
+ return rc;
continue;
}
if (rc < 0)
@@ -1122,7 +1124,9 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc > 1)
return kvm_s390_inject_program_int(vcpu, rc);
if (rc == -ENOMEM) {
- kvm_s390_mmu_cache_topup(vcpu->arch.mc);
+ rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc);
+ if (rc)
+ return rc;
continue;
}
if (rc < 0)
@@ -1232,7 +1236,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
: ESSA_SET_STABLE_IF_RESIDENT))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (!vcpu->kvm->arch.migration_mode) {
+ if (!READ_ONCE(vcpu->kvm->arch.migration_mode)) {
/*
* CMMA is enabled in the KVM settings, but is disabled in
* the SIE block and in the mm_context, and we are not doing
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index ee3f37af8aee4..4bf7c9012feb6 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -181,7 +181,8 @@ void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
if (IS_ERR_OR_NULL(ptep))
return;
- __atomic64_or(_PAGE_UNUSED, (long *)ptep);
+ if (pte_present(*ptep))
+ __atomic64_or(_PAGE_UNUSED, (long *)ptep);
pte_unmap_unlock(ptep, ptl);
}
EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eee473717c0e5..5f6c1ce9673b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1434,7 +1434,6 @@ enum kvm_mmu_type {
};
struct kvm_arch {
- unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
@@ -1700,6 +1699,7 @@ struct kvm_vm_stat {
u64 mmu_recycled;
u64 mmu_cache_miss;
u64 mmu_unsync;
+ u64 mmu_shadow_pages;
union {
struct {
atomic64_t pages_4k;
@@ -2525,16 +2525,6 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
kvm_x86_call(vcpu_unblocking)(vcpu);
}
-static inline int kvm_cpu_get_apicid(int mps_cpu)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
- return default_cpu_present_to_apicid(mps_cpu);
-#else
- WARN_ON_ONCE(1);
- return BAD_APICID;
-#endif
-}
-
int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
#define KVM_CLOCK_VALID_FLAGS \
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index 2ed3b5c88c7f0..45e6b816353ed 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -26,7 +26,6 @@ u32 default_cpu_present_to_apicid(int mps_cpu)
else
return BAD_APICID;
}
-EXPORT_SYMBOL_FOR_KVM(default_cpu_present_to_apicid);
/*
* Set up the logical destination ID when the APIC operates in logical
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 59e28c45d7dc1..6a942ac622d5f 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -194,7 +194,8 @@ int kvm_pic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
int irq = e->irqchip.pin;
int ret, irq_level;
- BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);
+ if (WARN_ON_ONCE(irq < 0 || irq >= PIC_NUM_PINS))
+ return -1;
pic_lock(s);
irq_level = __kvm_irq_line_state(&s->irq_states[irq],
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index f3f4a483ca150..757667fb2bfa0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -84,7 +84,7 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
{
- if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+ if (WARN_ON_ONCE(ioapic->rtc_status.pending_eoi < 0))
kvm_rtc_eoi_tracking_restore_all(ioapic);
}
@@ -484,7 +484,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
* ensures that it is only called if it is >= zero, namely
* if rtc_irq_check_coalesced returns false).
*/
- BUG_ON(ioapic->rtc_status.pending_eoi != 0);
+ WARN_ON_ONCE(ioapic->rtc_status.pending_eoi);
ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
&ioapic->rtc_status);
ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
@@ -504,7 +504,8 @@ int kvm_ioapic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
int irq = e->irqchip.pin;
int ret, irq_level;
- BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
+ if (WARN_ON_ONCE(irq < 0 || irq >= IOAPIC_NUM_PINS))
+ return -1;
spin_lock(&ioapic->lock);
irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9d2df8623f6d1..6f30bbdddb5aa 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -767,7 +767,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec);
else {
++apic->isr_count;
- BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
+ KVM_BUG_ON(apic->isr_count > MAX_APIC_VECTOR, apic->vcpu->kvm);
/*
* ISR (in service register) bit is set when injecting an interrupt.
* The highest vector is injected. Thus the latest bit set matches
@@ -808,7 +808,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic));
else {
--apic->isr_count;
- BUG_ON(apic->isr_count < 0);
+ KVM_BUG_ON(apic->isr_count < 0, apic->vcpu->kvm);
apic->highest_isr_cache = -1;
}
}
@@ -980,6 +980,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
if (__apic_update_ppr(apic, &ppr) &&
apic_has_interrupt_for_ppr(apic, ppr) != -1)
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ else
+ kvm_lapic_update_cr8_intercept(apic->vcpu);
}
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fbb491e35b542..234d0a95abf53 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1802,13 +1802,13 @@ static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- kvm->arch.n_used_mmu_pages++;
+ kvm->stat.mmu_shadow_pages++;
kvm_account_pgtable_pages((void *)sp->spt, +1);
}
static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- kvm->arch.n_used_mmu_pages--;
+ kvm->stat.mmu_shadow_pages--;
kvm_account_pgtable_pages((void *)sp->spt, -1);
}
@@ -2836,9 +2836,9 @@ restart:
static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
- if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
+ if (kvm->arch.n_max_mmu_pages > kvm->stat.mmu_shadow_pages)
return kvm->arch.n_max_mmu_pages -
- kvm->arch.n_used_mmu_pages;
+ kvm->stat.mmu_shadow_pages;
return 0;
}
@@ -2874,11 +2874,11 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
write_lock(&kvm->mmu_lock);
- if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
- kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->arch.n_used_mmu_pages -
+ if (kvm->stat.mmu_shadow_pages > goal_nr_mmu_pages) {
+ kvm_mmu_zap_oldest_mmu_pages(kvm, kvm->stat.mmu_shadow_pages -
goal_nr_mmu_pages);
- goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages;
+ goal_nr_mmu_pages = kvm->stat.mmu_shadow_pages;
}
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index fa01719baf8d4..8354d9f397778 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -303,7 +303,7 @@ TRACE_EVENT(
TP_fast_assign(
__entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
- __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
+ __entry->mmu_used_pages = kvm->stat.mmu_shadow_pages;
),
TP_printk("kvm-mmu-valid-gen %u used_pages %x",
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5b3041138301b..c1cbae65d239f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1410,9 +1410,10 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
u64 new_spte;
bool spte_set = false;
- rcu_read_lock();
+ if (KVM_BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL, kvm))
+ return false;
- BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
+ rcu_read_lock();
for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end) {
retry:
@@ -1844,7 +1845,8 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
u64 new_spte;
bool spte_set = false;
- BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
+ if (KVM_BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL, kvm))
+ return false;
rcu_read_lock();
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0726f88e679aa..58e493a80cb0e 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -460,8 +460,8 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
int cpu = READ_ONCE(vcpu->cpu);
if (cpu != get_cpu()) {
- wrmsrq(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
- trace_kvm_avic_doorbell(vcpu->vcpu_id, kvm_cpu_get_apicid(cpu));
+ wrmsrq(MSR_AMD64_SVM_AVIC_DOORBELL, cpu_physical_id(cpu));
+ trace_kvm_avic_doorbell(vcpu->vcpu_id, cpu_physical_id(cpu));
}
put_cpu();
}
@@ -1013,7 +1013,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
enum avic_vcpu_action action)
{
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
- int h_physical_id = kvm_cpu_get_apicid(cpu);
+ int h_physical_id = cpu_physical_id(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long flags;
u64 entry;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 9aedb88c832d7..3e6c671a8dc26 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -2152,7 +2152,8 @@ static gpa_t svm_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_mmu *mmu = vcpu->arch.mmu;
- BUG_ON(!mmu_is_nested(vcpu));
+ if (WARN_ON_ONCE(!mmu_is_nested(vcpu)))
+ return gpa;
/* Non-GMET walks are always user-walks */
if (!(svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_GMET))
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 3a293640d58c2..6957bb6f5cf7e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7470,7 +7470,8 @@ static gpa_t vmx_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
- BUG_ON(!mmu_is_nested(vcpu));
+ if (WARN_ON_ONCE(!mmu_is_nested(vcpu)))
+ return gpa;
/*
* MBEC differentiates based on the effective U/S bit of
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f7c2983c533fd..2325be57d3d75 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6873,11 +6873,10 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
int tpr_threshold;
if (is_guest_mode(vcpu) &&
- nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+ nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_TPR_SHADOW))
return;
guard(vmx_vmcs01)(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d9d51803b7b20..afcac1042947a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -244,6 +244,7 @@ const struct kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_COUNTER(VM, mmu_recycled),
STATS_DESC_COUNTER(VM, mmu_cache_miss),
STATS_DESC_ICOUNTER(VM, mmu_unsync),
+ STATS_DESC_ICOUNTER(VM, mmu_shadow_pages),
STATS_DESC_ICOUNTER(VM, pages_4k),
STATS_DESC_ICOUNTER(VM, pages_2m),
STATS_DESC_ICOUNTER(VM, pages_1g),
@@ -5317,7 +5318,6 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
r = kvm_apic_set_state(vcpu, s);
if (r)
return r;
- kvm_lapic_update_cr8_intercept(vcpu);
return 0;
}
@@ -12418,8 +12418,6 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
kvm_register_mark_dirty(vcpu, VCPU_REG_CR3);
kvm_x86_call(post_set_cr3)(vcpu, sregs->cr3);
- kvm_set_cr8(vcpu, sregs->cr8);
-
*mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_call(set_efer)(vcpu, sregs->efer);
@@ -12448,7 +12446,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
- kvm_lapic_update_cr8_intercept(vcpu);
+ kvm_set_cr8(vcpu, sregs->cr8);
/* Older userspace won't unhalt the vcpu on reset. */
if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&