aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorMarc Zyngier <maz@kernel.org>2026-06-12 09:29:34 +0100
committerMarc Zyngier <maz@kernel.org>2026-06-12 09:29:34 +0100
commit1ee27dacbe5dc4def481794d899d67b0d4570094 (patch)
tree95c261feba92bb951d8fc2b191ca250c36d080bf /arch
parentbd2ed0733bc3a3672bf074af844173bdb468c66a (diff)
parent4b54e2374d1bd82031cef9784e125a7100a32499 (diff)
downloadath-1ee27dacbe5dc4def481794d899d67b0d4570094.tar.gz
Merge branch kvm-arm64/nv-mmu-7.2 into kvmarm-master/next
* kvm-arm64/nv-mmu-7.2: : . : Assorted collection of fixes for NV MMU bugs : : - Correctly plug AT S1E1A handling in the emulation backend : : - Make CPTR_EL2.E0POE depend on FEAT_S1POE : : - Drop the reference on the page if the VNCR translation : races with an MMU notifier : : - Correctly synthesise an SEA if a page table walk fails due : to a guest error : : - Fully invalidate the VNCR TLB and fixmap when translating : for a new VNCR : : - Restart S1 walk when the S2 walk fails due to a race condition : : - Correctly return -EAGAIN when a S1 walk fails : : - Fix block mapping validity check in stage-1 walker for 64kB pages : : - Fix potential NULL dereference when performing an EL2 TLBI targeting : the VNCR page : : - Hold kvm->mmu_lock while initialising the vncr_tlb pointer : . KVM: arm64: nv: Hold kvm->mmu_lock while initialising vcpu->arch.vncr_tlb KVM: arm64: nv: Avoid dereferencing NULL VNCR pseudo-TLB KVM: arm64: Fix block mapping validity check in stage-1 walker KVM: arm64: nv: Restart stage-1 walk if stage-2 desc update fails KVM: arm64: Restart instruction upon race in __kvm_at_s12() KVM: arm64: nv: Inject SEA TTW when desc update can't write to GPA KVM: arm64: nv: Fully update VNCR fixmap state in kvm_translate_vncr() KVM: arm64: Don't leak PFN when kvm_translate_vncr() races MMU notifier arm64: cpufeature: Expose ID_AA64ISAR2_EL1.ATS1A to KVM KVM: arm64: Wire AT S1E1A in the system instruction handling table KVM: arm64: Key CPTR_EL2.E0POE propagation on FEAT_S1POE Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kvm/at.c19
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/nested.c90
-rw-r--r--arch/arm64/kvm/sys_regs.c1
5 files changed, 74 insertions, 39 deletions
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6d53bb15cf7bb..62b0d77217eeb 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -266,6 +266,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_ATS1A_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 831e88f0dba0b..30e6fa8ac07cf 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -492,6 +492,9 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (wi->s2) {
ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
+ if (ret == -EAGAIN)
+ return ret;
+
if (ret) {
fail_s1_walk(wr,
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
@@ -561,15 +564,18 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
/* Block mapping, check the validity of the level */
if (!(desc & BIT(1))) {
bool valid_block = false;
+ bool lpa = kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52);
switch (BIT(wi->pgshift)) {
case SZ_4K:
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
break;
case SZ_16K:
- case SZ_64K:
valid_block = level == 2 || (wi->pa52bit && level == 1);
break;
+ case SZ_64K:
+ valid_block = level == 2 || (lpa && level == 1);
+ break;
}
if (!valid_block)
@@ -590,8 +596,12 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
}
ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
- if (ret)
+ if (ret == -EAGAIN)
return ret;
+ if (ret) {
+ fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
+ return ret;
+ }
desc = new_desc;
}
@@ -1622,7 +1632,10 @@ int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
return 0;
}
- __kvm_at_s1e01(vcpu, op, vaddr);
+ ret = __kvm_at_s1e01(vcpu, op, vaddr);
+ if (ret)
+ return ret;
+
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
return 0;
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index bf0eb5e434274..161bb2a3e1d90 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -141,7 +141,7 @@ static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
val &= ~CPACR_EL1_ZEN;
- if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
val |= cptr & CPACR_EL1_E0POE;
val |= cptr & CPTR_EL2_TCPAC;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 3204b3ef60ddd..3a5571c3c1142 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -352,8 +352,13 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
if (new_desc != desc) {
ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi);
- if (ret)
+ if (ret == -EAGAIN)
return ret;
+ if (ret) {
+ out->esr = ESR_ELx_FSC_SEA_TTW(level);
+ out->desc = desc;
+ return 1;
+ }
desc = new_desc;
}
@@ -866,18 +871,24 @@ void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
}
}
+static void this_cpu_reset_vncr_fixmap(struct kvm_vcpu *vcpu)
+{
+ if (!host_data_test_flag(L1_VNCR_MAPPED))
+ return;
+
+ BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
+ BUG_ON(is_hyp_ctxt(vcpu));
+
+ clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
+ vcpu->arch.vncr_tlb->cpu = -1;
+ host_data_clear_flag(L1_VNCR_MAPPED);
+ atomic_dec(&vcpu->kvm->arch.vncr_map_count);
+}
+
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
/* Unconditionally drop the VNCR mapping if we have one */
- if (host_data_test_flag(L1_VNCR_MAPPED)) {
- BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
- BUG_ON(is_hyp_ctxt(vcpu));
-
- clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
- vcpu->arch.vncr_tlb->cpu = -1;
- host_data_clear_flag(L1_VNCR_MAPPED);
- atomic_dec(&vcpu->kvm->arch.vncr_map_count);
- }
+ this_cpu_reset_vncr_fixmap(vcpu);
/*
* Keep a reference on the associated stage-2 MMU if the vCPU is
@@ -966,9 +977,21 @@ static void invalidate_vncr(struct vncr_tlb *vt)
clear_fixmap(vncr_fixmap(vt->cpu));
}
+/*
+ * VNCR TLB invalidation occurs from MMU notifiers or TLBI instructions, and
+ * either can race against a vcpu not being onlined yet (no pseudo-TLB
+ * allocated). Similarly, the TLB might be invalid. Skip those, as they
+ * obviously don't participate in the invalidation at this stage.
+ */
+#define kvm_for_each_vncr_tlb(idx, vcpup, tlbp, kvm) \
+ kvm_for_each_vcpu(idx, vcpup, kvm) \
+ if (((tlbp) = vcpup->arch.vncr_tlb) && \
+ (tlbp)->valid)
+
static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
{
struct kvm_vcpu *vcpu;
+ struct vncr_tlb *vt;
unsigned long i;
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -976,24 +999,9 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
return;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ kvm_for_each_vncr_tlb(i, vcpu, vt, kvm) {
u64 ipa_start, ipa_end, ipa_size;
- /*
- * Careful here: We end-up here from an MMU notifier,
- * and this can race against a vcpu not being onlined
- * yet, without the pseudo-TLB being allocated.
- *
- * Skip those, as they obviously don't participate in
- * the invalidation at this stage.
- */
- if (!vt)
- continue;
-
- if (!vt->valid)
- continue;
-
ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level));
ipa_start = vt->wr.pa & ~(ipa_size - 1);
@@ -1023,17 +1031,14 @@ static void invalidate_vncr_va(struct kvm *kvm,
struct s1e2_tlbi_scope *scope)
{
struct kvm_vcpu *vcpu;
+ struct vncr_tlb *vt;
unsigned long i;
lockdep_assert_held_write(&kvm->mmu_lock);
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
+ kvm_for_each_vncr_tlb(i, vcpu, vt, kvm) {
u64 va_start, va_end, va_size;
- if (!vt->valid)
- continue;
-
va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level));
va_start = vt->gva & ~(va_size - 1);
@@ -1317,8 +1322,20 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
return 0;
- vcpu->arch.vncr_tlb = kzalloc_obj(*vcpu->arch.vncr_tlb,
- GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.vncr_tlb) {
+ struct vncr_tlb *vt = kzalloc_obj(*vcpu->arch.vncr_tlb,
+ GFP_KERNEL_ACCOUNT);
+
+ /*
+ * Taking the lock on assignment ensures that the TLB is
+ * seen as initialised when following the pointer (release
+ * semantics of the unlock), and avoids having acquires on
+ * each user which already take the lock.
+ */
+ scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
+ vcpu->arch.vncr_tlb = vt;
+ }
+
if (!vcpu->arch.vncr_tlb)
return -ENOMEM;
@@ -1351,7 +1368,8 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem)
* We also prepare the next walk wilst we're at it.
*/
scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
- invalidate_vncr(vt);
+ this_cpu_reset_vncr_fixmap(vcpu);
+ vt->valid = false;
vt->wi = (struct s1_walk_info) {
.regime = TR_EL20,
@@ -1395,8 +1413,10 @@ static int kvm_translate_vncr(struct kvm_vcpu *vcpu, bool *is_gmem)
}
scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
- if (mmu_invalidate_retry(vcpu->kvm, mmu_seq))
+ if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) {
+ kvm_release_faultin_page(vcpu->kvm, page, true, false);
return -EAGAIN;
+ }
vt->gva = va;
vt->hpa = pfn << PAGE_SHIFT;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 6083a1b23dbf9..33c921df19b54 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -4214,6 +4214,7 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(AT_S1E0W, handle_at_s1e01),
SYS_INSN(AT_S1E1RP, handle_at_s1e01),
SYS_INSN(AT_S1E1WP, handle_at_s1e01),
+ SYS_INSN(AT_S1E1A, handle_at_s1e01),
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },