diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 22:46:48 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 22:46:48 +0100 |
| commit | 0ce0f7ee41c6e99f682a62e1d1363881fa18ff58 (patch) | |
| tree | 09f6c7e2ff320f8144cd9a91252904fbfacf2d02 /arch | |
| parent | f08a9de96912e608e3f2d2c7c8c12fbdc6b98208 (diff) | |
| parent | 6b8c356fdeed88ef8019304c3f18fbb1d8a8be1b (diff) | |
| download | linux-next-history-0ce0f7ee41c6e99f682a62e1d1363881fa18ff58.tar.gz | |
Merge branch 'next' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
Diffstat (limited to 'arch')
26 files changed, 475 insertions, 372 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a49042bfa801f..ac5ebfb06e37c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1112,7 +1112,8 @@ struct kvm_vcpu_arch { #define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7)) /* SError pending for nested guest */ #define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8)) - +/* KVM is currently emulating an L2 to L1 exception */ +#define IN_NESTED_EXCEPTION __vcpu_single_flag(sflags, BIT(9)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 01e9c72d6aa7a..6eae7e7e2a684 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -318,8 +318,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, - struct kvm_arch *arch) +static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu) { write_sysreg(mmu->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 634ddc9042444..37c6976e44a4c 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -104,11 +104,9 @@ SYM_CODE_START_LOCAL(__finalise_el2) mov_q x0, HCR_HOST_VHE_FLAGS msr_hcr_el2 x0 - // Use the EL1 allocated stack, per-cpu offset + // Use the EL1 allocated stack mrs x0, sp_el1 mov sp, x0 - mrs x0, tpidr_el1 - msr tpidr_el2, x0 // FP configuration, vectors mrs_s x0, SYS_CPACR_EL12 diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index cbea4d9ee9552..4155fe89b58a1 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -39,10 +39,9 @@ static const u8 default_ppi[] = { [TIMER_HVTIMER] = 28, }; -static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx); -static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); +static bool kvm_timer_pending(struct arch_timer_context *timer_ctx); static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg, @@ -52,11 +51,17 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, enum kvm_arch_timer_regs treg); static bool kvm_arch_timer_get_input_level(int vintid); -static struct irq_ops arch_timer_irq_ops = { +static unsigned long kvm_arch_timer_get_irq_flags(void) +{ + return kvm_vgic_global_state.no_hw_deactivation ? VGIC_IRQ_SW_RESAMPLE : 0; +} + +static const struct irq_ops arch_timer_irq_ops = { + .get_flags = kvm_arch_timer_get_irq_flags, .get_input_level = kvm_arch_timer_get_input_level, }; -static struct irq_ops arch_timer_irq_ops_vgic_v5 = { +static const struct irq_ops arch_timer_irq_ops_vgic_v5 = { .get_input_level = kvm_arch_timer_get_input_level, .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock, .set_direct_injection = vgic_v5_set_ppi_dvi, @@ -224,7 +229,7 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) else ctx = map.direct_ptimer; - if (kvm_timer_should_fire(ctx)) + if (kvm_timer_pending(ctx)) kvm_timer_update_irq(vcpu, true, ctx); if (userspace_irqchip(vcpu->kvm) && @@ -257,7 +262,7 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); } -static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) +static bool kvm_timer_enabled(struct arch_timer_context *timer_ctx) { WARN_ON(timer_ctx && timer_ctx->loaded); return timer_ctx && @@ -294,7 +299,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; WARN(ctx->loaded, "timer %d loaded\n", i); - if (kvm_timer_irq_can_fire(ctx)) + if (kvm_timer_enabled(ctx)) min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); } @@ -358,7 +363,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } -static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) +static bool kvm_timer_pending(struct arch_timer_context *timer_ctx) { enum kvm_arch_timers index; u64 cval, now; @@ -391,7 +396,7 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); } - if (!kvm_timer_irq_can_fire(timer_ctx)) + if (!kvm_timer_enabled(timer_ctx)) return false; cval = timer_get_cval(timer_ctx); @@ -405,22 +410,30 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; } +static u64 kvm_timer_needs_notify(struct kvm_vcpu *vcpu) +{ + u64 v = vcpu->run->s.regs.device_irq_level; + + v ^= kvm_timer_pending(vcpu_vtimer(vcpu)) ? KVM_ARM_DEV_EL1_VTIMER : 0; + v ^= kvm_timer_pending(vcpu_ptimer(vcpu)) ? KVM_ARM_DEV_EL1_PTIMER : 0; + + return v & (KVM_ARM_DEV_EL1_VTIMER | KVM_ARM_DEV_EL1_PTIMER); +} + +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) +{ + return !!kvm_timer_needs_notify(vcpu); +} + /* * Reflect the timer output level into the kvm_run structure */ -void kvm_timer_update_run(struct kvm_vcpu *vcpu) +bool kvm_timer_update_run(struct kvm_vcpu *vcpu) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - struct kvm_sync_regs *regs = &vcpu->run->s.regs; - - /* Populate the device bitmap with the timer states */ - regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | - KVM_ARM_DEV_EL1_PTIMER); - if (kvm_timer_should_fire(vtimer)) - regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; - if (kvm_timer_should_fire(ptimer)) - regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; + u64 mask = kvm_timer_needs_notify(vcpu); + if (mask) + vcpu->run->s.regs.device_irq_level ^= mask; + return !!mask; } static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) @@ -446,9 +459,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, { kvm_timer_update_status(timer_ctx, new_level); - timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), - timer_ctx->irq.level); + new_level); if (userspace_irqchip(vcpu->kvm)) return; @@ -466,28 +478,25 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, kvm_vgic_inject_irq(vcpu->kvm, vcpu, timer_irq(timer_ctx), - timer_ctx->irq.level, + new_level, timer_ctx); } /* Only called for a fully emulated timer */ static void timer_emulate(struct arch_timer_context *ctx) { - bool should_fire = kvm_timer_should_fire(ctx); + bool pending = kvm_timer_pending(ctx); - trace_kvm_timer_emulate(ctx, should_fire); + trace_kvm_timer_emulate(ctx, pending); - if (should_fire != ctx->irq.level) - kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); - - kvm_timer_update_status(ctx, should_fire); + kvm_timer_update_irq(timer_context_to_vcpu(ctx), pending, ctx); /* - * If the timer can fire now, we don't need to have a soft timer - * scheduled for the future. If the timer cannot fire at all, - * then we also don't need a soft timer. + * If the timer is pending, we don't need to have a soft timer + * scheduled for the future. If the timer is disabled, then + * we don't need a soft timer either. */ - if (should_fire || !kvm_timer_irq_can_fire(ctx)) + if (pending || !kvm_timer_enabled(ctx)) return; soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); @@ -594,10 +603,10 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) * If no timers are capable of raising interrupts (disabled or * masked), then there's no more work for us to do. */ - if (!kvm_timer_irq_can_fire(map.direct_vtimer) && - !kvm_timer_irq_can_fire(map.direct_ptimer) && - !kvm_timer_irq_can_fire(map.emul_vtimer) && - !kvm_timer_irq_can_fire(map.emul_ptimer) && + if (!kvm_timer_enabled(map.direct_vtimer) && + !kvm_timer_enabled(map.direct_ptimer) && + !kvm_timer_enabled(map.emul_vtimer) && + !kvm_timer_enabled(map.emul_ptimer) && !vcpu_has_wfit_active(vcpu)) return; @@ -677,6 +686,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); + bool pending = kvm_timer_pending(ctx); bool phys_active = false; /* @@ -685,12 +695,12 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); + kvm_timer_update_irq(vcpu, pending, ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); - phys_active |= ctx->irq.level; + phys_active |= pending; phys_active |= vgic_is_v5(vcpu->kvm); set_timer_irq_phys_active(ctx, phys_active); @@ -699,6 +709,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + bool pending = kvm_timer_pending(vtimer); /* * Update the timer output so that it is likely to match the @@ -706,7 +717,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); + kvm_timer_update_irq(vcpu, pending, vtimer); /* * When using a userspace irqchip with the architected timers and a @@ -718,7 +729,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) * being de-asserted, we unmask the interrupt again so that we exit * from the guest when the timer fires. */ - if (vtimer->irq.level) + if (pending) disable_percpu_irq(host_vtimer_irq); else enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); @@ -904,23 +915,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_set_traps(vcpu, &map); } -bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) -{ - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - struct kvm_sync_regs *sregs = &vcpu->run->s.regs; - bool vlevel, plevel; - - if (likely(irqchip_in_kernel(vcpu->kvm))) - return false; - - vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; - plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; - - return kvm_timer_should_fire(vtimer) != vlevel || - kvm_timer_should_fire(ptimer) != plevel; -} - void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -1006,7 +1000,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - if (!kvm_timer_should_fire(vtimer)) { + if (!kvm_timer_pending(vtimer)) { kvm_timer_update_irq(vcpu, false, vtimer); if (static_branch_likely(&has_gic_active_state)) set_timer_irq_phys_active(vtimer, false); @@ -1288,7 +1282,12 @@ static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) static int timer_irq_set_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool val) { - if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) + bool passthrough = which != IRQCHIP_STATE_ACTIVE || + !irqd_is_forwarded_to_vcpu(d) || + (kvm_vgic_global_state.type == VGIC_V5 && + vgic_is_v3(kvm_get_running_vcpu()->kvm)); + + if (passthrough) return irq_chip_set_parent_state(d, which, val); if (val) @@ -1301,15 +1300,7 @@ static int timer_irq_set_irqchip_state(struct irq_data *d, static void timer_irq_eoi(struct irq_data *d) { - /* - * On a GICv5 host, we still need to call EOI on the parent for - * PPIs. The host driver already handles irqs which are forwarded to - * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This - * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to - * call EOI unsurprisingly results in *BAD* lock-ups. - */ - if (!irqd_is_forwarded_to_vcpu(d) || - kvm_vgic_global_state.type == VGIC_V5) + if (!irqd_is_forwarded_to_vcpu(d)) irq_chip_eoi_parent(d); } @@ -1392,8 +1383,6 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) return -ENOMEM; } - if (kvm_vgic_global_state.no_hw_deactivation) - arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, (void *)TIMER_VTIMER)); } @@ -1579,7 +1568,7 @@ static bool kvm_arch_timer_get_input_level(int vintid) ctx = vcpu_get_timer(vcpu, i); if (timer_irq(ctx) == vintid) - return kvm_timer_should_fire(ctx); + return kvm_timer_pending(ctx); } /* A timer IRQ has fired, but no matching timer was found? */ @@ -1591,8 +1580,8 @@ static bool kvm_arch_timer_get_input_level(int vintid) int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); + const struct irq_ops *ops; struct timer_map map; - struct irq_ops *ops; int ret; if (timer->enabled) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9453321ef8c67..a988fee6b6d58 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -52,6 +52,7 @@ #include <linux/irqchip/arm-gic-v5.h> +#include "vgic/vgic.h" #include "sys_regs.h" static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; @@ -1166,6 +1167,15 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) return !kvm_supports_32bit_el0(); } +static bool kvm_irq_update_run(struct kvm_vcpu *vcpu) +{ + bool r; + + r = kvm_timer_update_run(vcpu); + r |= kvm_pmu_update_run(vcpu); + return r; +} + /** * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest * @vcpu: The VCPU pointer @@ -1187,13 +1197,11 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) /* * If we're using a userspace irqchip, then check if we need * to tell a userspace irqchip about timer or PMU level - * changes and if so, exit to userspace (the actual level - * state gets updated in kvm_timer_update_run and - * kvm_pmu_update_run below). + * changes and if so, exit to userspace while updating the run + * state. */ if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { - if (kvm_timer_should_notify_user(vcpu) || - kvm_pmu_should_notify_user(vcpu)) { + if (unlikely(kvm_irq_update_run(vcpu))) { *ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; return true; @@ -1408,11 +1416,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = handle_exit(vcpu, ret); } - /* Tell userspace about in-kernel device output levels */ - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { - kvm_timer_update_run(vcpu); - kvm_pmu_update_run(vcpu); - } + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_irq_update_run(vcpu); kvm_sigset_deactivate(vcpu); @@ -1496,8 +1501,13 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return vcpu_interrupt_line(vcpu, irq_num, level); case KVM_ARM_IRQ_TYPE_PPI: - if (!irqchip_in_kernel(kvm)) + if (irqchip_in_kernel(kvm)) { + int ret = vgic_lazy_init(kvm); + if (ret) + return ret; + } else { return -ENXIO; + } vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); if (!vcpu) @@ -1524,8 +1534,13 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: - if (!irqchip_in_kernel(kvm)) + if (irqchip_in_kernel(kvm)) { + int ret = vgic_lazy_init(kvm); + if (ret) + return ret; + } else { return -ENXIO; + } if (vgic_is_v5(kvm)) { /* Build a GICv5-style IntID here */ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 9f8f0ae8e86e8..831e88f0dba0b 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -136,14 +136,106 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); } +#define _has_tgran(__r, __sz) \ + ({ \ + u64 _s1, _mmfr0 = __r; \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI; \ + }) + +static bool has_tgran(u64 mmfr0, unsigned int shift) +{ + switch (shift) { + case 12: + return _has_tgran(mmfr0, 4); + case 14: + return _has_tgran(mmfr0, 16); + case 16: + return _has_tgran(mmfr0, 64); + default: + BUG(); + } +} + +static unsigned int tcr_to_tg0_pgshift(u64 tcr) +{ + u64 tg0 = tcr & TCR_TG0_MASK; + + switch (tg0) { + case TCR_TG0_4K: + return 12; + case TCR_TG0_16K: + return 14; + case TCR_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + return 16; + } +} + +static unsigned int tcr_to_tg1_pgshift(u64 tcr) +{ + u64 tg1 = tcr & TCR_TG1_MASK; + + switch (tg1) { + case TCR_TG1_4K: + return 12; + case TCR_TG1_16K: + return 14; + case TCR_TG1_64K: + default: /* IMPDEF: treat any other value as 64k */ + return 16; + } +} + +static unsigned int fallback_tgran_shift(u64 mmfr0) +{ + if (has_tgran(mmfr0, PAGE_SHIFT)) + return PAGE_SHIFT; + else if (has_tgran(mmfr0, 12)) + return 12; + else if (has_tgran(mmfr0, 14)) + return 14; + else if (has_tgran(mmfr0, 16)) + return 16; + else /* Should be unreacheable */ + return PAGE_SHIFT; +} + +static unsigned int tcr_tg_pgshift(struct kvm *kvm, u64 tcr, bool upper_range) +{ + u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1); + unsigned int shift; + + /* Someone was silly enough to encode TG0/TG1 differently */ + if (upper_range) + shift = tcr_to_tg1_pgshift(tcr); + else + shift = tcr_to_tg0_pgshift(tcr); + + /* + * If TGx is programmed to an unimplemented value (not advertised in + * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is + * written, as per the architecture. Choose an available one while + * prioritizing PAGE_SIZE. + */ + if (!has_tgran(mmfr0, shift)) + return fallback_tgran_shift(mmfr0); + + return shift; +} + static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, struct s1_walk_result *wr, u64 va) { - u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; + u64 hcr, sctlr, tcr, ps, ia_bits, ttbr; unsigned int stride, x; - bool va55, tbi, lva; + bool va55, tbi, lva, upper_range; va55 = va & BIT(55); + upper_range = va55 && wi->regime != TR_EL2; if (vcpu_has_nv(vcpu)) { hcr = __vcpu_sys_reg(vcpu, HCR_EL2); @@ -174,35 +266,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, BUG(); } - /* Someone was silly enough to encode TG0/TG1 differently */ - if (va55 && wi->regime != TR_EL2) { + if (upper_range) wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG1_MASK, tcr); - - switch (tg << TCR_TG1_SHIFT) { - case TCR_TG1_4K: - wi->pgshift = 12; break; - case TCR_TG1_16K: - wi->pgshift = 14; break; - case TCR_TG1_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } else { + else wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); - tg = FIELD_GET(TCR_TG0_MASK, tcr); - - switch (tg << TCR_TG0_SHIFT) { - case TCR_TG0_4K: - wi->pgshift = 12; break; - case TCR_TG0_16K: - wi->pgshift = 14; break; - case TCR_TG0_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; - } - } + wi->pgshift = tcr_tg_pgshift(vcpu->kvm, tcr, upper_range); wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); ia_bits = get_ia_size(wi); @@ -1380,7 +1449,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) } } write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); - __load_stage2(mmu, mmu->arch); + __load_stage2(mmu); skip_mmu_switch: /* Temporarily switch back to guest context */ diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index dba7ced74ca5e..e688bc5139c1f 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -2631,6 +2631,14 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index) fgtreg = HFGITR2_EL2; break; + case ICH_HFGRTR_GROUP: + fgtreg = is_read ? ICH_HFGRTR_EL2 : ICH_HFGWTR_EL2; + break; + + case ICH_HFGITR_GROUP: + fgtreg = ICH_HFGITR_EL2; + break; + default: /* Something is really wrong, bail out */ WARN_ONCE(1, "Bad FGT group (encoding %08x, config %016llx)\n", @@ -2862,6 +2870,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2, preempt_disable(); + vcpu_set_flag(vcpu, IN_NESTED_EXCEPTION); + /* * We may have an exception or PC update in the EL0/EL1 context. * Commit it before entering EL2. @@ -2884,6 +2894,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2, __kvm_adjust_pc(vcpu); kvm_arch_vcpu_load(vcpu, smp_processor_id()); + vcpu_clear_flag(vcpu, IN_NESTED_EXCEPTION); + preempt_enable(); if (kvm_vcpu_has_pmu(vcpu)) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 15e17aca1dec0..3f6b1e29cd6b9 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -29,6 +29,20 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) return; /* + * Avoid needless save/restore of the guest's common + * FPSIMD/SVE/SME regs during transitions between L1/L2. + * + * These transitions only happens in a non-preemptible context + * where the host regs have already been saved and unbound. The + * live registers are either free or owned by the guest. + */ + if (vcpu_get_flag(vcpu, IN_NESTED_ERET) || + vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) { + WARN_ON_ONCE(host_owns_fp_regs()); + return; + } + + /* * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such * that the host kernel is responsible for restoring this state upon * return to userspace, and the hyp code doesn't need to save anything. @@ -102,6 +116,18 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) { unsigned long flags; + /* + * See comment in kvm_arch_vcpu_load_fp(). Note that we also rely on + * the guest's max VL to have been set by fpsimd_lazy_switch_to_host() + * so that any intervening kernel-mode SIMD (NEON or otherwise) + * operation sees the full guest state that needs saving. + */ + if (vcpu_get_flag(vcpu, IN_NESTED_ERET) || + vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) { + WARN_ON_ONCE(host_owns_fp_regs()); + return; + } + local_irq_save(flags); if (guest_owns_fp_regs()) { diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 3cbfae0e3dda1..29935c7da1dec 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -56,6 +56,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot p int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); +void kvm_guest_destroy_stage2(struct pkvm_hyp_vm *vm); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); @@ -67,7 +68,7 @@ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) - __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); + __load_stage2(&host_mmu.arch.mmu); else write_sysreg(0, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 25f04629014e6..4e329e39a695a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -217,7 +217,6 @@ static void *guest_s2_zalloc_page(void *mc) memset(addr, 0, PAGE_SIZE); p = hyp_virt_to_page(addr); p->refcount = 1; - p->order = 0; return addr; } @@ -306,23 +305,27 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) return 0; } +void kvm_guest_destroy_stage2(struct pkvm_hyp_vm *vm) +{ + guest_lock_component(vm); + kvm_pgtable_stage2_destroy(&vm->pgt); + vm->kvm.arch.mmu.pgd_phys = 0ULL; + guest_unlock_component(vm); +} + void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) { struct hyp_page *page; void *addr; /* Dump all pgtable pages in the hyp_pool */ - guest_lock_component(vm); - kvm_pgtable_stage2_destroy(&vm->pgt); - vm->kvm.arch.mmu.pgd_phys = 0ULL; - guest_unlock_component(vm); + kvm_guest_destroy_stage2(vm); /* Drain the hyp_pool into the memcache */ addr = hyp_alloc_pages(&vm->pool, 0); while (addr) { page = hyp_virt_to_page(addr); page->refcount = 0; - page->order = 0; push_hyp_memcache(mc, addr, hyp_virt_to_phys); WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); addr = hyp_alloc_pages(&vm->pool, 0); @@ -352,7 +355,7 @@ int __pkvm_prot_finalize(void) kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg_hcr(params->hcr_el2); - __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); + __load_stage2(&host_mmu.arch.mmu); /* * Make sure to have an ISB before the TLB maintenance below but only @@ -851,6 +854,16 @@ static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_pa return 0; } +static int __hyp_check_page_count_range(phys_addr_t phys, u64 size) +{ + for_each_hyp_page(page, phys, size) { + if (page->refcount) + return -EBUSY; + } + + return 0; +} + static bool guest_pte_is_poisoned(kvm_pte_t pte) { if (kvm_pte_valid(pte)) @@ -1049,7 +1062,6 @@ unlock: int __pkvm_host_unshare_hyp(u64 pfn) { u64 phys = hyp_pfn_to_phys(pfn); - u64 virt = (u64)__hyp_va(phys); u64 size = PAGE_SIZE; int ret; @@ -1062,10 +1074,9 @@ int __pkvm_host_unshare_hyp(u64 pfn) ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); if (ret) goto unlock; - if (hyp_page_count((void *)virt)) { - ret = -EBUSY; + ret = __hyp_check_page_count_range(phys, size); + if (ret) goto unlock; - } __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); @@ -1128,6 +1139,10 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) if (ret) goto unlock; + ret = __hyp_check_page_count_range(phys, size); + if (ret) + goto unlock; + __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index a1eb27a1a7477..57f86aa0f82fc 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -94,13 +94,22 @@ static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { phys_addr_t phys = hyp_page_to_phys(p); - u8 order = p->order; struct hyp_page *buddy; + bool coalesce = true; + u8 order = p->order; - memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); + /* + * 'external' pages are never coalesced and their ->order field + * untrusted as they bypass hyp_pool_init(). Enforce order-0. + */ + if (phys < pool->range_start || phys >= pool->range_end) { + order = 0; + coalesce = false; + } + + memset(hyp_page_to_virt(p), 0, PAGE_SIZE << order); - /* Skip coalescing for 'external' pages being freed into the pool. */ - if (phys < pool->range_start || phys >= pool->range_end) + if (!coalesce) goto insert; /* @@ -237,8 +246,10 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_pages; i++) { hyp_set_page_refcounted(&p[i]); + p[i].order = 0; + } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index eb1c10120f9f5..3b2c4fbc34d8e 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -853,10 +853,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, /* Must be called last since this publishes the VM. */ ret = insert_vm_table_entry(handle, hyp_vm); if (ret) - goto err_remove_mappings; + goto err_destroy_stage2; return 0; +err_destroy_stage2: + kvm_guest_destroy_stage2(hyp_vm); err_remove_mappings: unmap_donated_memory(hyp_vm, vm_size); unmap_donated_memory(pgd, pgd_size); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8d1df3d33595b..7318e3e6a5f36 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -315,7 +315,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(guest_ctxt); mmu = kern_hyp_va(vcpu->arch.hw_mmu); - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + __load_stage2(mmu); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index b29140995d484..fdb90483340ce 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -110,7 +110,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu, if (vcpu) __load_host_stage2(); else - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + __load_stage2(mmu); asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } @@ -128,7 +128,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt) return; if (vcpu) - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + __load_stage2(mmu); else __load_host_stage2(); diff --git a/arch/arm64/kvm/hyp/vgic-v5-sr.c b/arch/arm64/kvm/hyp/vgic-v5-sr.c index 47e6bcd437029..6d69dfe89a96c 100644 --- a/arch/arm64/kvm/hyp/vgic-v5-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v5-sr.c @@ -30,10 +30,9 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if) { /* * The following code assumes that the bitmap storage that we have for - * PPIs is either 64 (architected PPIs, only) or 128 bits (architected & - * impdef PPIs). + * PPIs is either 64 (architected PPIs, only). */ - BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64); + BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS != 64); bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit, read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64); @@ -49,22 +48,6 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if) cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2); cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2); - if (VGIC_V5_NR_PRIVATE_IRQS == 128) { - bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit, - read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64); - bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr, - read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64); - - cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2); - cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2); - cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2); - cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2); - cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2); - cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2); - cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2); - cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2); - } - /* Now that we are done, disable DVI */ write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2); write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2); @@ -74,9 +57,6 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if) { DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS); - /* We assume 64 or 128 PPIs - see above comment */ - BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64); - /* Enable DVI so that the guest's interrupt config takes over */ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64), SYS_ICH_PPI_DVIR0_EL2); @@ -108,50 +88,20 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if) write_sysreg_s(cpu_if->vgic_ppi_priorityr[7], SYS_ICH_PPI_PRIORITYR7_EL2); - if (VGIC_V5_NR_PRIVATE_IRQS == 128) { - /* Enable DVI so that the guest's interrupt config takes over */ - write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64), - SYS_ICH_PPI_DVIR1_EL2); - - write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64), - SYS_ICH_PPI_ACTIVER1_EL2); - write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64), - SYS_ICH_PPI_ENABLER1_EL2); - write_sysreg_s(bitmap_read(pendr, 64, 64), - SYS_ICH_PPI_PENDR1_EL2); - - write_sysreg_s(cpu_if->vgic_ppi_priorityr[8], - SYS_ICH_PPI_PRIORITYR8_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[9], - SYS_ICH_PPI_PRIORITYR9_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[10], - SYS_ICH_PPI_PRIORITYR10_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[11], - SYS_ICH_PPI_PRIORITYR11_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[12], - SYS_ICH_PPI_PRIORITYR12_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[13], - SYS_ICH_PPI_PRIORITYR13_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[14], - SYS_ICH_PPI_PRIORITYR14_EL2); - write_sysreg_s(cpu_if->vgic_ppi_priorityr[15], - SYS_ICH_PPI_PRIORITYR15_EL2); - } else { - write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2); + write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2); - write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2); - write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2); + write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2); + write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2); - write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2); - } + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2); + write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2); } void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 1e8995add14fa..bbe9cebd3d9d5 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -219,7 +219,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu) __vcpu_load_switch_sysregs(vcpu); __vcpu_load_activate_traps(vcpu); - __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); + __load_stage2(vcpu->arch.hw_mmu); } void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index f7b9dfe3f3a5a..c386d9f1c1016 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -60,7 +60,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu, * place before clearing TGE. __load_stage2() already * has an ISB in order to deal with this. */ - __load_stage2(mmu, mmu->arch); + __load_stage2(mmu); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg_hcr(val); @@ -78,7 +78,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt) /* ... and the stage-2 MMU context that we switched away from */ if (cxt->mmu) - __load_stage2(cxt->mmu, cxt->mmu->arch); + __load_stage2(cxt->mmu); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Restore the registers to what they were */ diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 38f672e940878..f91046b5c476d 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -378,32 +378,104 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, return 0; } -static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) +#define _has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) + +static bool has_tgran_2(u64 mmfr0, unsigned int shift) +{ + switch (shift) { + case 12: + return _has_tgran_2(mmfr0, 4); + case 14: + return _has_tgran_2(mmfr0, 16); + case 16: + return _has_tgran_2(mmfr0, 64); + default: + BUG(); + } +} + +static unsigned int fallback_tgran2_shift(u64 mmfr0) { - wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK; + if (has_tgran_2(mmfr0, PAGE_SHIFT)) + return PAGE_SHIFT; + else if (has_tgran_2(mmfr0, 12)) + return 12; + else if (has_tgran_2(mmfr0, 14)) + return 14; + else if (has_tgran_2(mmfr0, 16)) + return 16; + else + return PAGE_SHIFT; +} - switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { +static unsigned int vtcr_to_tg0_pgshift(struct kvm *kvm, u64 vtcr) +{ + u64 tg0 = FIELD_GET(VTCR_EL2_TG0_MASK, vtcr); + u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1); + unsigned int shift; + + switch (tg0) { case VTCR_EL2_TG0_4K: - wi->pgshift = 12; break; + shift = 12; + break; case VTCR_EL2_TG0_16K: - wi->pgshift = 14; break; + shift = 14; + break; case VTCR_EL2_TG0_64K: - default: /* IMPDEF: treat any other value as 64k */ - wi->pgshift = 16; break; + /* IMPDEF: treat any other value as 64k, subject to fallback */ + default: + shift = 16; } + /* + * If TGx is programmed to an unimplemented value (not advertised in + * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is + * written, as per the architecture. Choose an available one while + * prioritizing PAGE_SIZE. + */ + if (!has_tgran_2(mmfr0, shift)) + return fallback_tgran2_shift(mmfr0); + + return shift; +} + +static size_t vtcr_to_tg0_pgsize(struct kvm *kvm, u64 vtcr) +{ + return BIT(vtcr_to_tg0_pgshift(kvm, vtcr)); +} + +static void setup_s2_walk(struct kvm_vcpu *vcpu, struct s2_walk_info *wi) +{ + u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2); + + wi->baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + wi->t0sz = vtcr & VTCR_EL2_T0SZ_MASK; + wi->pgshift = vtcr_to_tg0_pgshift(vcpu->kvm, vtcr); wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); /* Global limit for now, should eventually be per-VM */ wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); - wi->ha = vtcr & VTCR_EL2_HA; + wi->be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE; } int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, struct kvm_s2_trans *result) { - u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2); struct s2_walk_info wi; int ret; @@ -412,11 +484,7 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, if (!vcpu_has_nv(vcpu)) return 0; - wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); - - vtcr_to_walk_info(vtcr, &wi); - - wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE; + setup_s2_walk(vcpu, &wi); ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result); if (ret) @@ -512,20 +580,21 @@ static u8 pgshift_level_to_ttl(u16 shift, u8 level) */ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) { - u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr; + size_t tg0_size = vtcr_to_tg0_pgsize(kvm_s2_mmu_to_kvm(mmu), mmu->tlb_vtcr); + u64 tmp, sz = 0; kvm_pte_t pte; u8 ttl, level; lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock); - switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { - case VTCR_EL2_TG0_4K: + switch (tg0_size) { + case SZ_4K: ttl = (TLBI_TTL_TG_4K << 2); break; - case VTCR_EL2_TG0_16K: + case SZ_16K: ttl = (TLBI_TTL_TG_16K << 2); break; - case VTCR_EL2_TG0_64K: + case SZ_64K: default: /* IMPDEF: treat any other value as 64k */ ttl = (TLBI_TTL_TG_64K << 2); break; @@ -535,19 +604,19 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) again: /* Iteratively compute the block sizes for a particular granule size */ - switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) { - case VTCR_EL2_TG0_4K: + switch (tg0_size) { + case SZ_4K: if (sz < SZ_4K) sz = SZ_4K; else if (sz < SZ_2M) sz = SZ_2M; else if (sz < SZ_1G) sz = SZ_1G; else sz = 0; break; - case VTCR_EL2_TG0_16K: + case SZ_16K: if (sz < SZ_16K) sz = SZ_16K; else if (sz < SZ_32M) sz = SZ_32M; else sz = 0; break; - case VTCR_EL2_TG0_64K: + case SZ_64K: default: /* IMPDEF: treat any other value as 64k */ if (sz < SZ_64K) sz = SZ_64K; else if (sz < SZ_512M) sz = SZ_512M; @@ -598,14 +667,14 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val) if (!max_size) { /* Compute the maximum extent of the invalidation */ - switch (FIELD_GET(VTCR_EL2_TG0_MASK, mmu->tlb_vtcr)) { - case VTCR_EL2_TG0_4K: + switch (vtcr_to_tg0_pgsize(kvm, mmu->tlb_vtcr)) { + case SZ_4K: max_size = SZ_1G; break; - case VTCR_EL2_TG0_16K: + case SZ_16K: max_size = SZ_32M; break; - case VTCR_EL2_TG0_64K: + case SZ_64K: default: /* IMPDEF: treat any other value as 64k */ /* * No, we do not support 52bit IPA in nested yet. Once @@ -1498,21 +1567,6 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } -#define has_tgran_2(__r, __sz) \ - ({ \ - u64 _s1, _s2, _mmfr0 = __r; \ - \ - _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ - TGRAN##__sz##_2, _mmfr0); \ - \ - _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ - TGRAN##__sz, _mmfr0); \ - \ - ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ - _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ - (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ - _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ - }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1599,15 +1653,15 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - if (has_tgran_2(orig_val, 4)) + if (_has_tgran_2(orig_val, 4)) val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - if (has_tgran_2(orig_val, 16)) + if (_has_tgran_2(orig_val, 16)) val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - if (has_tgran_2(orig_val, 64)) + if (_has_tgran_2(orig_val, 64)) val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index c816db5d67611..98305bbfc095a 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -396,44 +396,31 @@ static bool kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow; - overflow = kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level == overflow) + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) return; - pmu->irq_level = overflow; - - if (likely(irqchip_in_kernel(vcpu->kvm))) { - int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, - pmu->irq_num, overflow, pmu); - WARN_ON(ret); - } + WARN_ON(kvm_vgic_inject_irq(vcpu->kvm, vcpu, pmu->irq_num, + kvm_pmu_overflow_status(vcpu), pmu)); } bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_sync_regs *sregs = &vcpu->run->s.regs; bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; - if (likely(irqchip_in_kernel(vcpu->kvm))) - return false; - - return pmu->irq_level != run_level; + return kvm_pmu_overflow_status(vcpu) != run_level; } /* * Reflect the PMU overflow interrupt output level into the kvm_run structure */ -void kvm_pmu_update_run(struct kvm_vcpu *vcpu) +bool kvm_pmu_update_run(struct kvm_vcpu *vcpu) { - struct kvm_sync_regs *regs = &vcpu->run->s.regs; - - /* Populate the timer bitmap for user space */ - regs->device_irq_level &= ~KVM_ARM_DEV_PMU; - if (vcpu->arch.pmu.irq_level) - regs->device_irq_level |= KVM_ARM_DEV_PMU; + bool update = kvm_pmu_should_notify_user(vcpu); + if (update) + vcpu->run->s.regs.device_irq_level ^= KVM_ARM_DEV_PMU; + return update; } /** diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index fa5c93c7a1352..febb3a2b9ce78 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -724,6 +724,7 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu, { unsigned long *mask = vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask; struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5; + unsigned long reg = p->regval; int i; /* We never expect to get here with a read! */ @@ -731,27 +732,23 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu, return undef_access(vcpu, p, r); /* - * If we're only handling architected PPIs and the guest writes to the - * enable for the non-architected PPIs, we just return as there's - * nothing to do at all. We don't even allocate the storage for them in - * this case. + * As we're only handling architected PPIs, the guest writes to the + * enable for the non-architected PPIs just return as there's + * nothing to do at all. We don't even allocate the storage for them. */ - if (VGIC_V5_NR_PRIVATE_IRQS == 64 && p->Op2 % 2) + if (p->Op2 % 2) return true; /* - * Merge the raw guest write into out bitmap at an offset of either 0 or - * 64, then and it with our PPI mask. + * Merge the raw guest write into out bitmap, anded with our PPI mask. */ - bitmap_write(cpu_if->vgic_ppi_enabler, p->regval, 64 * (p->Op2 % 2), 64); - bitmap_and(cpu_if->vgic_ppi_enabler, cpu_if->vgic_ppi_enabler, mask, - VGIC_V5_NR_PRIVATE_IRQS); + bitmap_and(cpu_if->vgic_ppi_enabler, ®, mask, VGIC_V5_NR_PRIVATE_IRQS); /* * Sync the change in enable states to the vgic_irqs. We consider all * PPIs as we don't expose many to the guest. */ - for_each_set_bit(i, mask, VGIC_V5_NR_PRIVATE_IRQS) { + for_each_visible_v5_ppi(i, vcpu->kvm) { u32 intid = vgic_v5_make_ppi(i); struct vgic_irq *irq; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 933983bb20052..907057881b26a 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -271,18 +271,12 @@ int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu) return ret; } -static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type) +static void vgic_setup_private_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + u32 type) { - struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i]; + irq->intid = irq - &vcpu->arch.vgic_cpu.private_irqs[0]; - INIT_LIST_HEAD(&irq->ap_list); - raw_spin_lock_init(&irq->irq_lock); - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - refcount_set(&irq->refcount, 0); - - irq->intid = i; - if (vgic_irq_is_sgi(i)) { + if (vgic_irq_is_sgi(irq->intid)) { /* SGIs */ irq->enabled = 1; irq->config = VGIC_CONFIG_EDGE; @@ -303,18 +297,11 @@ static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type) } } -static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type) +static void vgic_v5_setup_private_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { - struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i]; - u32 intid = vgic_v5_make_ppi(i); - - INIT_LIST_HEAD(&irq->ap_list); - raw_spin_lock_init(&irq->irq_lock); - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - refcount_set(&irq->refcount, 0); + int i = irq - &vcpu->arch.vgic_cpu.private_irqs[0]; - irq->intid = intid; + irq->intid = vgic_v5_make_ppi(i); /* The only Edge architected PPI is the SW_PPI */ if (i == GICV5_ARCH_PPI_SW_PPI) @@ -323,7 +310,7 @@ static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type) irq->config = VGIC_CONFIG_LEVEL; /* Register the GICv5-specific PPI ops */ - vgic_v5_set_ppi_ops(vcpu, intid); + vgic_v5_set_ppi_ops(vcpu, irq->intid); } static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) @@ -349,15 +336,19 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) if (!vgic_cpu->private_irqs) return -ENOMEM; - /* - * Enable and configure all SGIs to be edge-triggered and - * configure all PPIs as level-triggered. - */ for (i = 0; i < num_private_irqs; i++) { + struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + refcount_set(&irq->refcount, 0); + if (vgic_is_v5(vcpu->kvm)) - vgic_v5_allocate_private_irq(vcpu, i, type); + vgic_v5_setup_private_irq(vcpu, irq); else - vgic_allocate_private_irq(vcpu, i, type); + vgic_setup_private_irq(vcpu, irq, type); } return 0; diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index b9b86e3a6c862..19a1094536e6a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -20,9 +20,15 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, int level, bool line_status) { unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + int ret; if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; + + ret = vgic_lazy_init(kvm); + if (ret) + return ret; + return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL); } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index a96c77dccf353..90be99443df3b 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -730,18 +730,15 @@ static int vgic_v5_get_userspace_ppis(struct kvm_device *dev, guard(mutex)(&dev->kvm->arch.config_lock); /* - * We either support 64 or 128 PPIs. In the former case, we need to - * return 0s for the second 64 bits as we have no storage backing those. + * We only support 64 PPIs, so, we need to return 0s for the + * second 64 bits as we have no storage backing those. */ ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr); if (ret) return ret; uaddr++; - if (VGIC_V5_NR_PRIVATE_IRQS == 128) - ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr); - else - ret = put_user(0, uaddr); + ret = put_user(0, uaddr); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c index fdd39ea7f83ec..d4789ff3e7402 100644 --- a/arch/arm64/kvm/vgic/vgic-v5.c +++ b/arch/arm64/kvm/vgic/vgic-v5.c @@ -10,7 +10,7 @@ #include "vgic.h" -static struct vgic_v5_ppi_caps ppi_caps; +#define ppi_caps kvm_vgic_global_state.vgic_v5_ppi_caps /* * Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which @@ -18,20 +18,17 @@ static struct vgic_v5_ppi_caps ppi_caps; */ static void vgic_v5_get_implemented_ppis(void) { - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) - return; - /* * If we have KVM, we have EL2, which means that we have support for the * EL1 and EL2 Physical & Virtual timers. */ - __assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1); - __assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1); - __assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1); - __assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1); + __set_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask); + __set_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask); + __set_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask); + __set_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask); /* The SW_PPI should be available */ - __assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1); + __set_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask); /* The PMUIRQ is available if we have the PMU */ __assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3()); @@ -146,9 +143,7 @@ int vgic_v5_init(struct kvm *kvm) /* We only allow userspace to drive the SW_PPI, if it is implemented. */ bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis, VGIC_V5_NR_PRIVATE_IRQS); - __assign_bit(GICV5_ARCH_PPI_SW_PPI, - kvm->arch.vgic.gicv5_vm.userspace_ppis, - VGIC_V5_NR_PRIVATE_IRQS); + __set_bit(GICV5_ARCH_PPI_SW_PPI, kvm->arch.vgic.gicv5_vm.userspace_ppis); bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis, kvm->arch.vgic.gicv5_vm.userspace_ppis, ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS); @@ -197,7 +192,7 @@ int vgic_v5_finalize_ppi_state(struct kvm *kvm) /* Expose PPIs with an owner or the SW_PPI, only */ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) { if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) { - __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1); + __set_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask); __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr, irq->config == VGIC_CONFIG_LEVEL); } @@ -243,9 +238,9 @@ static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu) /* * For GICv5, the PPIs are mostly directly managed by the hardware. We (the - * hypervisor) handle the pending, active, enable state save/restore, but don't - * need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the - * state, unlock, and return. + * hypervisor) handle the pending, active, enable state save/restore, but + * don't need the PPIs to be queued on a per-VCPU AP list. Therefore, + * unlock, kick the vcpu and return. */ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, unsigned long flags) @@ -255,12 +250,7 @@ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, lockdep_assert_held(&irq->irq_lock); - if (WARN_ON_ONCE(!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, irq->intid))) - goto out_unlock_fail; - vcpu = irq->target_vcpu; - if (WARN_ON_ONCE(!vcpu)) - goto out_unlock_fail; raw_spin_unlock_irqrestore(&irq->irq_lock, flags); @@ -269,11 +259,6 @@ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, kvm_vcpu_kick(vcpu); return true; - -out_unlock_fail: - raw_spin_unlock_irqrestore(&irq->irq_lock, flags); - - return false; } /* @@ -287,10 +272,10 @@ void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi) lockdep_assert_held(&irq->irq_lock); ppi = vgic_v5_get_hwirq_id(irq->intid); - __assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi); + assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi); } -static struct irq_ops vgic_v5_ppi_irq_ops = { +static const struct irq_ops vgic_v5_ppi_irq_ops = { .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock, .set_direct_injection = vgic_v5_set_ppi_dvi, }; @@ -316,7 +301,7 @@ static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu) * those actually exposed to the guest by first iterating over the mask * of exposed PPIs. */ - for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) { + for_each_visible_v5_ppi(i, vcpu->kvm) { u32 intid = vgic_v5_make_ppi(i); struct vgic_irq *irq; int pri_idx, pri_reg, pri_bit; @@ -358,7 +343,7 @@ bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu) if (!priority_mask) return false; - for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) { + for_each_visible_v5_ppi(i, vcpu->kvm) { u32 intid = vgic_v5_make_ppi(i); bool has_pending = false; struct vgic_irq *irq; @@ -391,8 +376,7 @@ void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu) activer = host_data_ptr(vgic_v5_ppi_state)->activer_exit; pendr = host_data_ptr(vgic_v5_ppi_state)->pendr; - for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, - VGIC_V5_NR_PRIVATE_IRQS) { + for_each_visible_v5_ppi(i, vcpu->kvm) { u32 intid = vgic_v5_make_ppi(i); struct vgic_irq *irq; @@ -429,8 +413,7 @@ void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu) * ICC_PPI_PENDRx_EL1, however. */ bitmap_zero(pendr, VGIC_V5_NR_PRIVATE_IRQS); - for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, - VGIC_V5_NR_PRIVATE_IRQS) { + for_each_visible_v5_ppi(i, vcpu->kvm) { u32 intid = vgic_v5_make_ppi(i); struct vgic_irq *irq; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 1e9fe8764584d..5a4768d8cd4f3 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -106,24 +106,23 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid) struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) { + enum kvm_device_type type; + if (WARN_ON(!vcpu)) return NULL; - if (vgic_is_v5(vcpu->kvm)) { - u32 int_num, hwirq_id; - - if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid)) - return NULL; - - hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid); - int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS); + type = vcpu->kvm->arch.vgic.vgic_model; - return &vcpu->arch.vgic_cpu.private_irqs[int_num]; - } + if (__irq_is_sgi(type, intid) || __irq_is_ppi(type, intid)) { + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V5: + intid = vgic_v5_get_hwirq_id(intid); + intid = array_index_nospec(intid, VGIC_V5_NR_PRIVATE_IRQS); + break; + default: + intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS); + } - /* SGIs and PPIs */ - if (intid < VGIC_NR_PRIVATE_IRQS) { - intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS); return &vcpu->arch.vgic_cpu.private_irqs[intid]; } @@ -534,11 +533,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, { struct vgic_irq *irq; unsigned long flags; - int ret; - ret = vgic_lazy_init(kvm); - if (ret) - return ret; + if (unlikely(!vgic_initialized(kvm))) + return 0; if (!vcpu && irq_is_private(kvm, intid)) return -EINVAL; @@ -573,7 +570,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, } void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid, - struct irq_ops *ops) + const struct irq_ops *ops) { struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 9d941241c8a2b..f45f7e3ec4d6e 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -378,6 +378,9 @@ void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v5_restore_state(struct kvm_vcpu *vcpu); void vgic_v5_save_state(struct kvm_vcpu *vcpu); +#define for_each_visible_v5_ppi(__i, __k) \ + for_each_set_bit(__i, (__k)->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) + static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; |
