diff options
| author | Marc Zyngier <maz@kernel.org> | 2026-06-12 09:29:34 +0100 |
|---|---|---|
| committer | Marc Zyngier <maz@kernel.org> | 2026-06-12 09:29:34 +0100 |
| commit | 1ee27dacbe5dc4def481794d899d67b0d4570094 (patch) | |
| tree | 95c261feba92bb951d8fc2b191ca250c36d080bf /arch | |
| parent | bd2ed0733bc3a3672bf074af844173bdb468c66a (diff) | |
| parent | 4b54e2374d1bd82031cef9784e125a7100a32499 (diff) | |
| download | ath-1ee27dacbe5dc4def481794d899d67b0d4570094.tar.gz | |
Merge branch kvm-arm64/nv-mmu-7.2 into kvmarm-master/next
* kvm-arm64/nv-mmu-7.2:
: .
: Assorted collection of fixes for NV MMU bugs
:
: - Correctly plug AT S1E1A handling in the emulation backend
:
: - Make CPTR_EL2.E0POE depend on FEAT_S1POE
:
: - Drop the reference on the page if the VNCR translation
: races with an MMU notifier
:
: - Correctly synthesise an SEA if a page table walk fails due
: to a guest error
:
: - Fully invalidate the VNCR TLB and fixmap when translating
: for a new VNCR
:
: - Restart S1 walk when the S2 walk fails due to a race condition
:
: - Correctly return -EAGAIN when a S1 walk fails
:
: - Fix block mapping validity check in stage-1 walker for 64kB pages
:
: - Fix potential NULL dereference when performing an EL2 TLBI targeting
: the VNCR page
:
: - Hold kvm->mmu_lock while initialising the vncr_tlb pointer
: .
KVM: arm64: nv: Hold kvm->mmu_lock while initialising vcpu->arch.vncr_tlb
KVM: arm64: nv: Avoid dereferencing NULL VNCR pseudo-TLB
KVM: arm64: Fix block mapping validity check in stage-1 walker
KVM: arm64: nv: Restart stage-1 walk if stage-2 desc update fails
KVM: arm64: Restart instruction upon race in __kvm_at_s12()
KVM: arm64: nv: Inject SEA TTW when desc update can't write to GPA
KVM: arm64: nv: Fully update VNCR fixmap state in kvm_translate_vncr()
KVM: arm64: Don't leak PFN when kvm_translate_vncr() races MMU notifier
arm64: cpufeature: Expose ID_AA64ISAR2_EL1.ATS1A to KVM
KVM: arm64: Wire AT S1E1A in the system instruction handling table
KVM: arm64: Key CPTR_EL2.E0POE propagation on FEAT_S1POE
Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/arm64/kernel/cpufeature.c | 1 | ||||
| -rw-r--r-- | arch/arm64/kvm/at.c | 19 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/switch.h | 2 | ||||
| -rw-r--r-- | arch/arm64/kvm/nested.c | 90 | ||||
| -rw-r--r-- | arch/arm64/kvm/sys_regs.c | 1 |
5 files changed, 74 insertions, 39 deletions
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6d53bb15cf7bb..62b0d77217eeb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -266,6 +266,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_ATS1A_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 831e88f0dba0b..30e6fa8ac07cf 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -492,6 +492,9 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (wi->s2) { ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); + if (ret == -EAGAIN) + return ret; + if (ret) { fail_s1_walk(wr, (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, @@ -561,15 +564,18 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, /* Block mapping, check the validity of the level */ if (!(desc & BIT(1))) { bool valid_block = false; + bool lpa = kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52); switch (BIT(wi->pgshift)) { case SZ_4K: valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); break; case SZ_16K: - case SZ_64K: valid_block = level == 2 || (wi->pa52bit && level == 1); break; + case SZ_64K: + valid_block = level == 2 || (lpa && level == 1); + break; } if (!valid_block) @@ -590,8 +596,12 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, } ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi); - if (ret) + if (ret == -EAGAIN) return ret; + if (ret) { + fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); + return ret; + } desc = new_desc; } @@ -1622,7 +1632,10 @@ int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) return 0; } - __kvm_at_s1e01(vcpu, op, vaddr); + ret = __kvm_at_s1e01(vcpu, op, vaddr); + if (ret) + return ret; + par = vcpu_read_sys_reg(vcpu, PAR_EL1); if (par & SYS_PAR_EL1_F) return 0; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index bf0eb5e434274..161bb2a3e1d90 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -141,7 +141,7 @@ static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu) if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) val &= ~CPACR_EL1_ZEN; - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) val |= cptr & CPACR_EL1_E0POE; val |= cptr & CPTR_EL2_TCPAC; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 3204b3ef60ddd..3a5571c3c1142 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -352,8 +352,13 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, if (new_desc != desc) { ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi); - if (ret) + if (ret == -EAGAIN) return ret; + if (ret) { + out->esr = ESR_ELx_FSC_SEA_TTW(level); + out->desc = desc; + return 1; + } desc = new_desc; } @@ -866,18 +871,24 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) } } +static void this_cpu_reset_vncr_fixmap(struct kvm_vcpu *vcpu) +{ + if (!host_data_test_flag(L1_VNCR_MAPPED)) + return; + + BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id()); + BUG_ON(is_hyp_ctxt(vcpu)); + + clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu)); + vcpu->arch.vncr_tlb->cpu = -1; + host_data_clear_flag(L1_VNCR_MAPPED); + atomic_dec(&vcpu->kvm->arch.vncr_map_count); +} + void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) { /* Unconditionally drop the VNCR mapping if we have one */ - if (host_data_test_flag(L1_VNCR_MAPPED)) { - BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id()); - BUG_ON(is_hyp_ctxt(vcpu)); - - clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu)); - vcpu->arch.vncr_tlb->cpu = -1; - host_data_clear_flag(L1_VNCR_MAPPED); - atomic_dec(&vcpu->kvm->arch.vncr_map_count); - } + this_cpu_reset_vncr_fixmap(vcpu); /* * Keep a reference on the associated stage-2 MMU if the vCPU is @@ -966,9 +977,21 @@ static void invalidate_vncr(struct vncr_tlb *vt) clear_fixmap(vncr_fixmap(vt->cpu)); } +/* + * VNCR TLB invalidation occurs from MMU notifiers or TLBI instructions, and + * either can race against a vcpu not being onlined yet (no pseudo-TLB + * allocated). Similarly, the TLB might be invalid. Skip those, as they + * obviously don't participate in the invalidation at this stage. + */ +#define kvm_for_each_vncr_tlb(idx, vcpup, tlbp, kvm) \ + kvm_for_each_vcpu(idx, vcpup, kvm) \ + if (((tlbp) = vcpup->arch.vncr_tlb) && \ + (tlbp)->valid) + static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end) { struct kvm_vcpu *vcpu; + struct vncr_tlb *vt; unsigned long i; lockdep_assert_held_write(&kvm->mmu_lock); @@ -976,24 +999,9 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end) if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)) return; - kvm_for_each_vcpu(i, vcpu, kvm) { - struct vncr_tlb *vt = vcpu->arch.vncr_tlb; + kvm_for_each_vncr_tlb(i, vcpu, vt, kvm) { u64 ipa_start, ipa_end, ipa_size; - /* - * Careful here: We end-up here from an MMU notifier, - * and this can race against a vcpu not being onlined - * yet, without the pseudo-TLB being allocated. - * - * Skip those, as they obviously don't participate in - * the invalidation at this stage. - */ - if (!vt) - continue; - - if (!vt->valid) - continue; - ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); ipa_start = vt->wr.pa & ~(ipa_size - 1); @@ -1023,17 +1031,14 @@ static void invalidate_vncr_va(struct kvm *kvm, struct s1e2_tlbi_scope *scope) { struct kvm_vcpu *vcpu; + struct vncr_tlb *vt; unsigned long i; lockdep_assert_held_write(&kvm->mmu_lock); - kvm_for_each_vcpu(i, vcpu, kvm) { - struct vncr_tlb *vt = vcpu->arch.vncr_tlb; + kvm_for_each_vncr_tlb(i, vcpu, vt, kvm) { u64 va_start, va_end, va_size; - if (!vt->valid) - continue; - va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, vt->wr.level)); va_start = vt->gva & ~(va_size - 1); @@ -1317,8 +1322,20 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu) if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)) return 0; - vcpu->arch.vncr_tlb = kzalloc_obj(*vcpu->arch.vncr_tlb, - GFP_KERNEL_ACCOUNT); + if (!vcpu->arch.vncr_tlb) { + struct vncr_tlb *vt = kzalloc_obj(*vcpu->arch.vncr_tlb, + GFP_KERNEL_ACCOUNT); + + /* + * Taking the lock on assignment ensures that the TLB is + * seen as initialised when following the pointer (release + * semantics of the unlock), and avoids having acquires on + * each user which already take the lock. + */ + scoped_guard(write_lock, &vcpu->kvm->mmu_lock) + vcpu->arch.vncr_tlb = vt; + } + if (!vcpu->arch.vncr_tlb) return -ENOMEM; @@ -1351,7 +1368,8 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem) * We also prepare the next walk wilst we're at it. */ scoped_guard(write_lock, &vcpu->kvm->mmu_lock) { - invalidate_vncr(vt); + this_cpu_reset_vncr_fixmap(vcpu); + vt->valid = false; vt->wi = (struct s1_walk_info) { .regime = TR_EL20, @@ -1395,8 +1413,10 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem) } scoped_guard(write_lock, &vcpu->kvm->mmu_lock) { - if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) + if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) { + kvm_release_faultin_page(vcpu->kvm, page, true, false); return -EAGAIN; + } vt->gva = va; vt->hpa = pfn << PAGE_SHIFT; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6083a1b23dbf9..33c921df19b54 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4214,6 +4214,7 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(AT_S1E0W, handle_at_s1e01), SYS_INSN(AT_S1E1RP, handle_at_s1e01), SYS_INSN(AT_S1E1WP, handle_at_s1e01), + SYS_INSN(AT_S1E1A, handle_at_s1e01), { SYS_DESC(SYS_DC_CSW), access_dcsw }, { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, |
