aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorMark Brown <broonie@kernel.org>2026-05-29 22:46:48 +0100
committerMark Brown <broonie@kernel.org>2026-05-29 22:46:48 +0100
commit0ce0f7ee41c6e99f682a62e1d1363881fa18ff58 (patch)
tree09f6c7e2ff320f8144cd9a91252904fbfacf2d02 /arch
parentf08a9de96912e608e3f2d2c7c8c12fbdc6b98208 (diff)
parent6b8c356fdeed88ef8019304c3f18fbb1d8a8be1b (diff)
downloadlinux-next-history-0ce0f7ee41c6e99f682a62e1d1363881fa18ff58.tar.gz
Merge branch 'next' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h3
-rw-r--r--arch/arm64/kernel/hyp-stub.S4
-rw-r--r--arch/arm64/kvm/arch_timer.c137
-rw-r--r--arch/arm64/kvm/arm.c39
-rw-r--r--arch/arm64/kvm/at.c127
-rw-r--r--arch/arm64/kvm/emulate-nested.c12
-rw-r--r--arch/arm64/kvm/fpsimd.c26
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/mem_protect.h3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c37
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c4
-rw-r--r--arch/arm64/kvm/hyp/vgic-v5-sr.c78
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c4
-rw-r--r--arch/arm64/kvm/nested.c144
-rw-r--r--arch/arm64/kvm/pmu-emul.c31
-rw-r--r--arch/arm64/kvm/sys_regs.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c45
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c51
-rw-r--r--arch/arm64/kvm/vgic/vgic.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic.h3
26 files changed, 475 insertions, 372 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a49042bfa801f..ac5ebfb06e37c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1112,7 +1112,8 @@ struct kvm_vcpu_arch {
#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
/* SError pending for nested guest */
#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
-
+/* KVM is currently emulating an L2 to L1 exception */
+#define IN_NESTED_EXCEPTION __vcpu_single_flag(sflags, BIT(9))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 01e9c72d6aa7a..6eae7e7e2a684 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -318,8 +318,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
-static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu,
- struct kvm_arch *arch)
+static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu)
{
write_sysreg(mmu->vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 634ddc9042444..37c6976e44a4c 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -104,11 +104,9 @@ SYM_CODE_START_LOCAL(__finalise_el2)
mov_q x0, HCR_HOST_VHE_FLAGS
msr_hcr_el2 x0
- // Use the EL1 allocated stack, per-cpu offset
+ // Use the EL1 allocated stack
mrs x0, sp_el1
mov sp, x0
- mrs x0, tpidr_el1
- msr tpidr_el2, x0
// FP configuration, vectors
mrs_s x0, SYS_CPACR_EL12
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index cbea4d9ee9552..4155fe89b58a1 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -39,10 +39,9 @@ static const u8 default_ppi[] = {
[TIMER_HVTIMER] = 28,
};
-static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
struct arch_timer_context *timer_ctx);
-static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
+static bool kvm_timer_pending(struct arch_timer_context *timer_ctx);
static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
struct arch_timer_context *timer,
enum kvm_arch_timer_regs treg,
@@ -52,11 +51,17 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
enum kvm_arch_timer_regs treg);
static bool kvm_arch_timer_get_input_level(int vintid);
-static struct irq_ops arch_timer_irq_ops = {
+static unsigned long kvm_arch_timer_get_irq_flags(void)
+{
+ return kvm_vgic_global_state.no_hw_deactivation ? VGIC_IRQ_SW_RESAMPLE : 0;
+}
+
+static const struct irq_ops arch_timer_irq_ops = {
+ .get_flags = kvm_arch_timer_get_irq_flags,
.get_input_level = kvm_arch_timer_get_input_level,
};
-static struct irq_ops arch_timer_irq_ops_vgic_v5 = {
+static const struct irq_ops arch_timer_irq_ops_vgic_v5 = {
.get_input_level = kvm_arch_timer_get_input_level,
.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
.set_direct_injection = vgic_v5_set_ppi_dvi,
@@ -224,7 +229,7 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
else
ctx = map.direct_ptimer;
- if (kvm_timer_should_fire(ctx))
+ if (kvm_timer_pending(ctx))
kvm_timer_update_irq(vcpu, true, ctx);
if (userspace_irqchip(vcpu->kvm) &&
@@ -257,7 +262,7 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
}
-static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
+static bool kvm_timer_enabled(struct arch_timer_context *timer_ctx)
{
WARN_ON(timer_ctx && timer_ctx->loaded);
return timer_ctx &&
@@ -294,7 +299,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
WARN(ctx->loaded, "timer %d loaded\n", i);
- if (kvm_timer_irq_can_fire(ctx))
+ if (kvm_timer_enabled(ctx))
min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
}
@@ -358,7 +363,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
return HRTIMER_NORESTART;
}
-static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
+static bool kvm_timer_pending(struct arch_timer_context *timer_ctx)
{
enum kvm_arch_timers index;
u64 cval, now;
@@ -391,7 +396,7 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
!(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
}
- if (!kvm_timer_irq_can_fire(timer_ctx))
+ if (!kvm_timer_enabled(timer_ctx))
return false;
cval = timer_get_cval(timer_ctx);
@@ -405,22 +410,30 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
}
+static u64 kvm_timer_needs_notify(struct kvm_vcpu *vcpu)
+{
+ u64 v = vcpu->run->s.regs.device_irq_level;
+
+ v ^= kvm_timer_pending(vcpu_vtimer(vcpu)) ? KVM_ARM_DEV_EL1_VTIMER : 0;
+ v ^= kvm_timer_pending(vcpu_ptimer(vcpu)) ? KVM_ARM_DEV_EL1_PTIMER : 0;
+
+ return v & (KVM_ARM_DEV_EL1_VTIMER | KVM_ARM_DEV_EL1_PTIMER);
+}
+
+bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
+{
+ return !!kvm_timer_needs_notify(vcpu);
+}
+
/*
* Reflect the timer output level into the kvm_run structure
*/
-void kvm_timer_update_run(struct kvm_vcpu *vcpu)
+bool kvm_timer_update_run(struct kvm_vcpu *vcpu)
{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- struct kvm_sync_regs *regs = &vcpu->run->s.regs;
-
- /* Populate the device bitmap with the timer states */
- regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
- KVM_ARM_DEV_EL1_PTIMER);
- if (kvm_timer_should_fire(vtimer))
- regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
- if (kvm_timer_should_fire(ptimer))
- regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
+ u64 mask = kvm_timer_needs_notify(vcpu);
+ if (mask)
+ vcpu->run->s.regs.device_irq_level ^= mask;
+ return !!mask;
}
static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
@@ -446,9 +459,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
{
kvm_timer_update_status(timer_ctx, new_level);
- timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
- timer_ctx->irq.level);
+ new_level);
if (userspace_irqchip(vcpu->kvm))
return;
@@ -466,28 +478,25 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
timer_irq(timer_ctx),
- timer_ctx->irq.level,
+ new_level,
timer_ctx);
}
/* Only called for a fully emulated timer */
static void timer_emulate(struct arch_timer_context *ctx)
{
- bool should_fire = kvm_timer_should_fire(ctx);
+ bool pending = kvm_timer_pending(ctx);
- trace_kvm_timer_emulate(ctx, should_fire);
+ trace_kvm_timer_emulate(ctx, pending);
- if (should_fire != ctx->irq.level)
- kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
-
- kvm_timer_update_status(ctx, should_fire);
+ kvm_timer_update_irq(timer_context_to_vcpu(ctx), pending, ctx);
/*
- * If the timer can fire now, we don't need to have a soft timer
- * scheduled for the future. If the timer cannot fire at all,
- * then we also don't need a soft timer.
+ * If the timer is pending, we don't need to have a soft timer
+ * scheduled for the future. If the timer is disabled, then
+ * we don't need a soft timer either.
*/
- if (should_fire || !kvm_timer_irq_can_fire(ctx))
+ if (pending || !kvm_timer_enabled(ctx))
return;
soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
@@ -594,10 +603,10 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
* If no timers are capable of raising interrupts (disabled or
* masked), then there's no more work for us to do.
*/
- if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
- !kvm_timer_irq_can_fire(map.direct_ptimer) &&
- !kvm_timer_irq_can_fire(map.emul_vtimer) &&
- !kvm_timer_irq_can_fire(map.emul_ptimer) &&
+ if (!kvm_timer_enabled(map.direct_vtimer) &&
+ !kvm_timer_enabled(map.direct_ptimer) &&
+ !kvm_timer_enabled(map.emul_vtimer) &&
+ !kvm_timer_enabled(map.emul_ptimer) &&
!vcpu_has_wfit_active(vcpu))
return;
@@ -677,6 +686,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo
static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
{
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
+ bool pending = kvm_timer_pending(ctx);
bool phys_active = false;
/*
@@ -685,12 +695,12 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
* this point and the register restoration, we'll take the
* interrupt anyway.
*/
- kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);
+ kvm_timer_update_irq(vcpu, pending, ctx);
if (irqchip_in_kernel(vcpu->kvm))
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
- phys_active |= ctx->irq.level;
+ phys_active |= pending;
phys_active |= vgic_is_v5(vcpu->kvm);
set_timer_irq_phys_active(ctx, phys_active);
@@ -699,6 +709,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ bool pending = kvm_timer_pending(vtimer);
/*
* Update the timer output so that it is likely to match the
@@ -706,7 +717,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
* this point and the register restoration, we'll take the
* interrupt anyway.
*/
- kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
+ kvm_timer_update_irq(vcpu, pending, vtimer);
/*
* When using a userspace irqchip with the architected timers and a
@@ -718,7 +729,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
* being de-asserted, we unmask the interrupt again so that we exit
* from the guest when the timer fires.
*/
- if (vtimer->irq.level)
+ if (pending)
disable_percpu_irq(host_vtimer_irq);
else
enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
@@ -904,23 +915,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
timer_set_traps(vcpu, &map);
}
-bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
- struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
- bool vlevel, plevel;
-
- if (likely(irqchip_in_kernel(vcpu->kvm)))
- return false;
-
- vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
- plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
-
- return kvm_timer_should_fire(vtimer) != vlevel ||
- kvm_timer_should_fire(ptimer) != plevel;
-}
-
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -1006,7 +1000,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- if (!kvm_timer_should_fire(vtimer)) {
+ if (!kvm_timer_pending(vtimer)) {
kvm_timer_update_irq(vcpu, false, vtimer);
if (static_branch_likely(&has_gic_active_state))
set_timer_irq_phys_active(vtimer, false);
@@ -1288,7 +1282,12 @@ static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
static int timer_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which, bool val)
{
- if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+ bool passthrough = which != IRQCHIP_STATE_ACTIVE ||
+ !irqd_is_forwarded_to_vcpu(d) ||
+ (kvm_vgic_global_state.type == VGIC_V5 &&
+ vgic_is_v3(kvm_get_running_vcpu()->kvm));
+
+ if (passthrough)
return irq_chip_set_parent_state(d, which, val);
if (val)
@@ -1301,15 +1300,7 @@ static int timer_irq_set_irqchip_state(struct irq_data *d,
static void timer_irq_eoi(struct irq_data *d)
{
- /*
- * On a GICv5 host, we still need to call EOI on the parent for
- * PPIs. The host driver already handles irqs which are forwarded to
- * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This
- * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to
- * call EOI unsurprisingly results in *BAD* lock-ups.
- */
- if (!irqd_is_forwarded_to_vcpu(d) ||
- kvm_vgic_global_state.type == VGIC_V5)
+ if (!irqd_is_forwarded_to_vcpu(d))
irq_chip_eoi_parent(d);
}
@@ -1392,8 +1383,6 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
return -ENOMEM;
}
- if (kvm_vgic_global_state.no_hw_deactivation)
- arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
(void *)TIMER_VTIMER));
}
@@ -1579,7 +1568,7 @@ static bool kvm_arch_timer_get_input_level(int vintid)
ctx = vcpu_get_timer(vcpu, i);
if (timer_irq(ctx) == vintid)
- return kvm_timer_should_fire(ctx);
+ return kvm_timer_pending(ctx);
}
/* A timer IRQ has fired, but no matching timer was found? */
@@ -1591,8 +1580,8 @@ static bool kvm_arch_timer_get_input_level(int vintid)
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+ const struct irq_ops *ops;
struct timer_map map;
- struct irq_ops *ops;
int ret;
if (timer->enabled)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9453321ef8c67..a988fee6b6d58 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -52,6 +52,7 @@
#include <linux/irqchip/arm-gic-v5.h>
+#include "vgic/vgic.h"
#include "sys_regs.h"
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
@@ -1166,6 +1167,15 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
return !kvm_supports_32bit_el0();
}
+static bool kvm_irq_update_run(struct kvm_vcpu *vcpu)
+{
+ bool r;
+
+ r = kvm_timer_update_run(vcpu);
+ r |= kvm_pmu_update_run(vcpu);
+ return r;
+}
+
/**
* kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest
* @vcpu: The VCPU pointer
@@ -1187,13 +1197,11 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
/*
* If we're using a userspace irqchip, then check if we need
* to tell a userspace irqchip about timer or PMU level
- * changes and if so, exit to userspace (the actual level
- * state gets updated in kvm_timer_update_run and
- * kvm_pmu_update_run below).
+ * changes and if so, exit to userspace while updating the run
+ * state.
*/
if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- if (kvm_timer_should_notify_user(vcpu) ||
- kvm_pmu_should_notify_user(vcpu)) {
+ if (unlikely(kvm_irq_update_run(vcpu))) {
*ret = -EINTR;
run->exit_reason = KVM_EXIT_INTR;
return true;
@@ -1408,11 +1416,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
ret = handle_exit(vcpu, ret);
}
- /* Tell userspace about in-kernel device output levels */
- if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- kvm_timer_update_run(vcpu);
- kvm_pmu_update_run(vcpu);
- }
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ kvm_irq_update_run(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -1496,8 +1501,13 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return vcpu_interrupt_line(vcpu, irq_num, level);
case KVM_ARM_IRQ_TYPE_PPI:
- if (!irqchip_in_kernel(kvm))
+ if (irqchip_in_kernel(kvm)) {
+ int ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+ } else {
return -ENXIO;
+ }
vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
if (!vcpu)
@@ -1524,8 +1534,13 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
- if (!irqchip_in_kernel(kvm))
+ if (irqchip_in_kernel(kvm)) {
+ int ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+ } else {
return -ENXIO;
+ }
if (vgic_is_v5(kvm)) {
/* Build a GICv5-style IntID here */
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 9f8f0ae8e86e8..831e88f0dba0b 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -136,14 +136,106 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
}
+#define _has_tgran(__r, __sz) \
+ ({ \
+ u64 _s1, _mmfr0 = __r; \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI; \
+ })
+
+static bool has_tgran(u64 mmfr0, unsigned int shift)
+{
+ switch (shift) {
+ case 12:
+ return _has_tgran(mmfr0, 4);
+ case 14:
+ return _has_tgran(mmfr0, 16);
+ case 16:
+ return _has_tgran(mmfr0, 64);
+ default:
+ BUG();
+ }
+}
+
+static unsigned int tcr_to_tg0_pgshift(u64 tcr)
+{
+ u64 tg0 = tcr & TCR_TG0_MASK;
+
+ switch (tg0) {
+ case TCR_TG0_4K:
+ return 12;
+ case TCR_TG0_16K:
+ return 14;
+ case TCR_TG0_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ return 16;
+ }
+}
+
+static unsigned int tcr_to_tg1_pgshift(u64 tcr)
+{
+ u64 tg1 = tcr & TCR_TG1_MASK;
+
+ switch (tg1) {
+ case TCR_TG1_4K:
+ return 12;
+ case TCR_TG1_16K:
+ return 14;
+ case TCR_TG1_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ return 16;
+ }
+}
+
+static unsigned int fallback_tgran_shift(u64 mmfr0)
+{
+ if (has_tgran(mmfr0, PAGE_SHIFT))
+ return PAGE_SHIFT;
+ else if (has_tgran(mmfr0, 12))
+ return 12;
+ else if (has_tgran(mmfr0, 14))
+ return 14;
+ else if (has_tgran(mmfr0, 16))
+ return 16;
+ else /* Should be unreacheable */
+ return PAGE_SHIFT;
+}
+
+static unsigned int tcr_tg_pgshift(struct kvm *kvm, u64 tcr, bool upper_range)
+{
+ u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
+ unsigned int shift;
+
+ /* Someone was silly enough to encode TG0/TG1 differently */
+ if (upper_range)
+ shift = tcr_to_tg1_pgshift(tcr);
+ else
+ shift = tcr_to_tg0_pgshift(tcr);
+
+ /*
+ * If TGx is programmed to an unimplemented value (not advertised in
+ * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is
+ * written, as per the architecture. Choose an available one while
+ * prioritizing PAGE_SIZE.
+ */
+ if (!has_tgran(mmfr0, shift))
+ return fallback_tgran_shift(mmfr0);
+
+ return shift;
+}
+
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
- u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
+ u64 hcr, sctlr, tcr, ps, ia_bits, ttbr;
unsigned int stride, x;
- bool va55, tbi, lva;
+ bool va55, tbi, lva, upper_range;
va55 = va & BIT(55);
+ upper_range = va55 && wi->regime != TR_EL2;
if (vcpu_has_nv(vcpu)) {
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
@@ -174,35 +266,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
BUG();
}
- /* Someone was silly enough to encode TG0/TG1 differently */
- if (va55 && wi->regime != TR_EL2) {
+ if (upper_range)
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
- tg = FIELD_GET(TCR_TG1_MASK, tcr);
-
- switch (tg << TCR_TG1_SHIFT) {
- case TCR_TG1_4K:
- wi->pgshift = 12; break;
- case TCR_TG1_16K:
- wi->pgshift = 14; break;
- case TCR_TG1_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
- }
- } else {
+ else
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
- tg = FIELD_GET(TCR_TG0_MASK, tcr);
-
- switch (tg << TCR_TG0_SHIFT) {
- case TCR_TG0_4K:
- wi->pgshift = 12; break;
- case TCR_TG0_16K:
- wi->pgshift = 14; break;
- case TCR_TG0_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
- }
- }
+ wi->pgshift = tcr_tg_pgshift(vcpu->kvm, tcr, upper_range);
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
ia_bits = get_ia_size(wi);
@@ -1380,7 +1449,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
}
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
- __load_stage2(mmu, mmu->arch);
+ __load_stage2(mmu);
skip_mmu_switch:
/* Temporarily switch back to guest context */
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index dba7ced74ca5e..e688bc5139c1f 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2631,6 +2631,14 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
fgtreg = HFGITR2_EL2;
break;
+ case ICH_HFGRTR_GROUP:
+ fgtreg = is_read ? ICH_HFGRTR_EL2 : ICH_HFGWTR_EL2;
+ break;
+
+ case ICH_HFGITR_GROUP:
+ fgtreg = ICH_HFGITR_EL2;
+ break;
+
default:
/* Something is really wrong, bail out */
WARN_ONCE(1, "Bad FGT group (encoding %08x, config %016llx)\n",
@@ -2862,6 +2870,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
preempt_disable();
+ vcpu_set_flag(vcpu, IN_NESTED_EXCEPTION);
+
/*
* We may have an exception or PC update in the EL0/EL1 context.
* Commit it before entering EL2.
@@ -2884,6 +2894,8 @@ static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
__kvm_adjust_pc(vcpu);
kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ vcpu_clear_flag(vcpu, IN_NESTED_EXCEPTION);
+
preempt_enable();
if (kvm_vcpu_has_pmu(vcpu))
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 15e17aca1dec0..3f6b1e29cd6b9 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -29,6 +29,20 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
return;
/*
+ * Avoid needless save/restore of the guest's common
+ * FPSIMD/SVE/SME regs during transitions between L1/L2.
+ *
+ * These transitions only happens in a non-preemptible context
+ * where the host regs have already been saved and unbound. The
+ * live registers are either free or owned by the guest.
+ */
+ if (vcpu_get_flag(vcpu, IN_NESTED_ERET) ||
+ vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) {
+ WARN_ON_ONCE(host_owns_fp_regs());
+ return;
+ }
+
+ /*
* Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
* that the host kernel is responsible for restoring this state upon
* return to userspace, and the hyp code doesn't need to save anything.
@@ -102,6 +116,18 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ /*
+ * See comment in kvm_arch_vcpu_load_fp(). Note that we also rely on
+ * the guest's max VL to have been set by fpsimd_lazy_switch_to_host()
+ * so that any intervening kernel-mode SIMD (NEON or otherwise)
+ * operation sees the full guest state that needs saving.
+ */
+ if (vcpu_get_flag(vcpu, IN_NESTED_ERET) ||
+ vcpu_get_flag(vcpu, IN_NESTED_EXCEPTION)) {
+ WARN_ON_ONCE(host_owns_fp_regs());
+ return;
+ }
+
local_irq_save(flags);
if (guest_owns_fp_regs()) {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 3cbfae0e3dda1..29935c7da1dec 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -56,6 +56,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot p
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
int kvm_host_prepare_stage2(void *pgt_pool_base);
int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd);
+void kvm_guest_destroy_stage2(struct pkvm_hyp_vm *vm);
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
int hyp_pin_shared_mem(void *from, void *to);
@@ -67,7 +68,7 @@ int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
static __always_inline void __load_host_stage2(void)
{
if (static_branch_likely(&kvm_protected_mode_initialized))
- __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
+ __load_stage2(&host_mmu.arch.mmu);
else
write_sysreg(0, vttbr_el2);
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 25f04629014e6..4e329e39a695a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -217,7 +217,6 @@ static void *guest_s2_zalloc_page(void *mc)
memset(addr, 0, PAGE_SIZE);
p = hyp_virt_to_page(addr);
p->refcount = 1;
- p->order = 0;
return addr;
}
@@ -306,23 +305,27 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
return 0;
}
+void kvm_guest_destroy_stage2(struct pkvm_hyp_vm *vm)
+{
+ guest_lock_component(vm);
+ kvm_pgtable_stage2_destroy(&vm->pgt);
+ vm->kvm.arch.mmu.pgd_phys = 0ULL;
+ guest_unlock_component(vm);
+}
+
void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
struct hyp_page *page;
void *addr;
/* Dump all pgtable pages in the hyp_pool */
- guest_lock_component(vm);
- kvm_pgtable_stage2_destroy(&vm->pgt);
- vm->kvm.arch.mmu.pgd_phys = 0ULL;
- guest_unlock_component(vm);
+ kvm_guest_destroy_stage2(vm);
/* Drain the hyp_pool into the memcache */
addr = hyp_alloc_pages(&vm->pool, 0);
while (addr) {
page = hyp_virt_to_page(addr);
page->refcount = 0;
- page->order = 0;
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
addr = hyp_alloc_pages(&vm->pool, 0);
@@ -352,7 +355,7 @@ int __pkvm_prot_finalize(void)
kvm_flush_dcache_to_poc(params, sizeof(*params));
write_sysreg_hcr(params->hcr_el2);
- __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
+ __load_stage2(&host_mmu.arch.mmu);
/*
* Make sure to have an ISB before the TLB maintenance below but only
@@ -851,6 +854,16 @@ static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_pa
return 0;
}
+static int __hyp_check_page_count_range(phys_addr_t phys, u64 size)
+{
+ for_each_hyp_page(page, phys, size) {
+ if (page->refcount)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
static bool guest_pte_is_poisoned(kvm_pte_t pte)
{
if (kvm_pte_valid(pte))
@@ -1049,7 +1062,6 @@ unlock:
int __pkvm_host_unshare_hyp(u64 pfn)
{
u64 phys = hyp_pfn_to_phys(pfn);
- u64 virt = (u64)__hyp_va(phys);
u64 size = PAGE_SIZE;
int ret;
@@ -1062,10 +1074,9 @@ int __pkvm_host_unshare_hyp(u64 pfn)
ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
if (ret)
goto unlock;
- if (hyp_page_count((void *)virt)) {
- ret = -EBUSY;
+ ret = __hyp_check_page_count_range(phys, size);
+ if (ret)
goto unlock;
- }
__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
@@ -1128,6 +1139,10 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
if (ret)
goto unlock;
+ ret = __hyp_check_page_count_range(phys, size);
+ if (ret)
+ goto unlock;
+
__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index a1eb27a1a7477..57f86aa0f82fc 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -94,13 +94,22 @@ static void __hyp_attach_page(struct hyp_pool *pool,
struct hyp_page *p)
{
phys_addr_t phys = hyp_page_to_phys(p);
- u8 order = p->order;
struct hyp_page *buddy;
+ bool coalesce = true;
+ u8 order = p->order;
- memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
+ /*
+ * 'external' pages are never coalesced and their ->order field
+ * untrusted as they bypass hyp_pool_init(). Enforce order-0.
+ */
+ if (phys < pool->range_start || phys >= pool->range_end) {
+ order = 0;
+ coalesce = false;
+ }
+
+ memset(hyp_page_to_virt(p), 0, PAGE_SIZE << order);
- /* Skip coalescing for 'external' pages being freed into the pool. */
- if (phys < pool->range_start || phys >= pool->range_end)
+ if (!coalesce)
goto insert;
/*
@@ -237,8 +246,10 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
/* Init the vmemmap portion */
p = hyp_phys_to_page(phys);
- for (i = 0; i < nr_pages; i++)
+ for (i = 0; i < nr_pages; i++) {
hyp_set_page_refcounted(&p[i]);
+ p[i].order = 0;
+ }
/* Attach the unused pages to the buddy tree */
for (i = reserved_pages; i < nr_pages; i++)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index eb1c10120f9f5..3b2c4fbc34d8e 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -853,10 +853,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
/* Must be called last since this publishes the VM. */
ret = insert_vm_table_entry(handle, hyp_vm);
if (ret)
- goto err_remove_mappings;
+ goto err_destroy_stage2;
return 0;
+err_destroy_stage2:
+ kvm_guest_destroy_stage2(hyp_vm);
err_remove_mappings:
unmap_donated_memory(hyp_vm, vm_size);
unmap_donated_memory(pgd, pgd_size);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8d1df3d33595b..7318e3e6a5f36 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -315,7 +315,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(guest_ctxt);
mmu = kern_hyp_va(vcpu->arch.hw_mmu);
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ __load_stage2(mmu);
__activate_traps(vcpu);
__hyp_vgic_restore_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index b29140995d484..fdb90483340ce 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -110,7 +110,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
if (vcpu)
__load_host_stage2();
else
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ __load_stage2(mmu);
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
@@ -128,7 +128,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
return;
if (vcpu)
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ __load_stage2(mmu);
else
__load_host_stage2();
diff --git a/arch/arm64/kvm/hyp/vgic-v5-sr.c b/arch/arm64/kvm/hyp/vgic-v5-sr.c
index 47e6bcd437029..6d69dfe89a96c 100644
--- a/arch/arm64/kvm/hyp/vgic-v5-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v5-sr.c
@@ -30,10 +30,9 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
{
/*
* The following code assumes that the bitmap storage that we have for
- * PPIs is either 64 (architected PPIs, only) or 128 bits (architected &
- * impdef PPIs).
+ * PPIs is either 64 (architected PPIs, only).
*/
- BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
+ BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS != 64);
bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64);
@@ -49,22 +48,6 @@ void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
- if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
- bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
- read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64);
- bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr,
- read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64);
-
- cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
- cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
- cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
- cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
- cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
- cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
- cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
- cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
- }
-
/* Now that we are done, disable DVI */
write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
@@ -74,9 +57,6 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
{
DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
- /* We assume 64 or 128 PPIs - see above comment */
- BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
-
/* Enable DVI so that the guest's interrupt config takes over */
write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64),
SYS_ICH_PPI_DVIR0_EL2);
@@ -108,50 +88,20 @@ void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
SYS_ICH_PPI_PRIORITYR7_EL2);
- if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
- /* Enable DVI so that the guest's interrupt config takes over */
- write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64),
- SYS_ICH_PPI_DVIR1_EL2);
-
- write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64),
- SYS_ICH_PPI_ACTIVER1_EL2);
- write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64),
- SYS_ICH_PPI_ENABLER1_EL2);
- write_sysreg_s(bitmap_read(pendr, 64, 64),
- SYS_ICH_PPI_PENDR1_EL2);
-
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
- SYS_ICH_PPI_PRIORITYR8_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
- SYS_ICH_PPI_PRIORITYR9_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
- SYS_ICH_PPI_PRIORITYR10_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
- SYS_ICH_PPI_PRIORITYR11_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
- SYS_ICH_PPI_PRIORITYR12_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
- SYS_ICH_PPI_PRIORITYR13_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
- SYS_ICH_PPI_PRIORITYR14_EL2);
- write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
- SYS_ICH_PPI_PRIORITYR15_EL2);
- } else {
- write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
- write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
- }
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
}
void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if)
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1e8995add14fa..bbe9cebd3d9d5 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -219,7 +219,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
- __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
+ __load_stage2(vcpu->arch.hw_mmu);
}
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index f7b9dfe3f3a5a..c386d9f1c1016 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -60,7 +60,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
* place before clearing TGE. __load_stage2() already
* has an ISB in order to deal with this.
*/
- __load_stage2(mmu, mmu->arch);
+ __load_stage2(mmu);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg_hcr(val);
@@ -78,7 +78,7 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
/* ... and the stage-2 MMU context that we switched away from */
if (cxt->mmu)
- __load_stage2(cxt->mmu, cxt->mmu->arch);
+ __load_stage2(cxt->mmu);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
/* Restore the registers to what they were */
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 38f672e940878..f91046b5c476d 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -378,32 +378,104 @@ static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
return 0;
}
-static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
+#define _has_tgran_2(__r, __sz) \
+ ({ \
+ u64 _s1, _s2, _mmfr0 = __r; \
+ \
+ _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz##_2, _mmfr0); \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
+ _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
+ (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
+ })
+
+static bool has_tgran_2(u64 mmfr0, unsigned int shift)
+{
+ switch (shift) {
+ case 12:
+ return _has_tgran_2(mmfr0, 4);
+ case 14:
+ return _has_tgran_2(mmfr0, 16);
+ case 16:
+ return _has_tgran_2(mmfr0, 64);
+ default:
+ BUG();
+ }
+}
+
+static unsigned int fallback_tgran2_shift(u64 mmfr0)
{
- wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
+ if (has_tgran_2(mmfr0, PAGE_SHIFT))
+ return PAGE_SHIFT;
+ else if (has_tgran_2(mmfr0, 12))
+ return 12;
+ else if (has_tgran_2(mmfr0, 14))
+ return 14;
+ else if (has_tgran_2(mmfr0, 16))
+ return 16;
+ else
+ return PAGE_SHIFT;
+}
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) {
+static unsigned int vtcr_to_tg0_pgshift(struct kvm *kvm, u64 vtcr)
+{
+ u64 tg0 = FIELD_GET(VTCR_EL2_TG0_MASK, vtcr);
+ u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
+ unsigned int shift;
+
+ switch (tg0) {
case VTCR_EL2_TG0_4K:
- wi->pgshift = 12; break;
+ shift = 12;
+ break;
case VTCR_EL2_TG0_16K:
- wi->pgshift = 14; break;
+ shift = 14;
+ break;
case VTCR_EL2_TG0_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
+ /* IMPDEF: treat any other value as 64k, subject to fallback */
+ default:
+ shift = 16;
}
+ /*
+ * If TGx is programmed to an unimplemented value (not advertised in
+ * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is
+ * written, as per the architecture. Choose an available one while
+ * prioritizing PAGE_SIZE.
+ */
+ if (!has_tgran_2(mmfr0, shift))
+ return fallback_tgran2_shift(mmfr0);
+
+ return shift;
+}
+
+static size_t vtcr_to_tg0_pgsize(struct kvm *kvm, u64 vtcr)
+{
+ return BIT(vtcr_to_tg0_pgshift(kvm, vtcr));
+}
+
+static void setup_s2_walk(struct kvm_vcpu *vcpu, struct s2_walk_info *wi)
+{
+ u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+
+ wi->baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ wi->t0sz = vtcr & VTCR_EL2_T0SZ_MASK;
+ wi->pgshift = vtcr_to_tg0_pgshift(vcpu->kvm, vtcr);
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
-
wi->ha = vtcr & VTCR_EL2_HA;
+ wi->be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
struct kvm_s2_trans *result)
{
- u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
struct s2_walk_info wi;
int ret;
@@ -412,11 +484,7 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
if (!vcpu_has_nv(vcpu))
return 0;
- wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
-
- vtcr_to_walk_info(vtcr, &wi);
-
- wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
+ setup_s2_walk(vcpu, &wi);
ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result);
if (ret)
@@ -512,20 +580,21 @@ static u8 pgshift_level_to_ttl(u16 shift, u8 level)
*/
static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
{
- u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr;
+ size_t tg0_size = vtcr_to_tg0_pgsize(kvm_s2_mmu_to_kvm(mmu), mmu->tlb_vtcr);
+ u64 tmp, sz = 0;
kvm_pte_t pte;
u8 ttl, level;
lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock);
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) {
- case VTCR_EL2_TG0_4K:
+ switch (tg0_size) {
+ case SZ_4K:
ttl = (TLBI_TTL_TG_4K << 2);
break;
- case VTCR_EL2_TG0_16K:
+ case SZ_16K:
ttl = (TLBI_TTL_TG_16K << 2);
break;
- case VTCR_EL2_TG0_64K:
+ case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
ttl = (TLBI_TTL_TG_64K << 2);
break;
@@ -535,19 +604,19 @@ static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr)
again:
/* Iteratively compute the block sizes for a particular granule size */
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, vtcr)) {
- case VTCR_EL2_TG0_4K:
+ switch (tg0_size) {
+ case SZ_4K:
if (sz < SZ_4K) sz = SZ_4K;
else if (sz < SZ_2M) sz = SZ_2M;
else if (sz < SZ_1G) sz = SZ_1G;
else sz = 0;
break;
- case VTCR_EL2_TG0_16K:
+ case SZ_16K:
if (sz < SZ_16K) sz = SZ_16K;
else if (sz < SZ_32M) sz = SZ_32M;
else sz = 0;
break;
- case VTCR_EL2_TG0_64K:
+ case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
if (sz < SZ_64K) sz = SZ_64K;
else if (sz < SZ_512M) sz = SZ_512M;
@@ -598,14 +667,14 @@ unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val)
if (!max_size) {
/* Compute the maximum extent of the invalidation */
- switch (FIELD_GET(VTCR_EL2_TG0_MASK, mmu->tlb_vtcr)) {
- case VTCR_EL2_TG0_4K:
+ switch (vtcr_to_tg0_pgsize(kvm, mmu->tlb_vtcr)) {
+ case SZ_4K:
max_size = SZ_1G;
break;
- case VTCR_EL2_TG0_16K:
+ case SZ_16K:
max_size = SZ_32M;
break;
- case VTCR_EL2_TG0_64K:
+ case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
/*
* No, we do not support 52bit IPA in nested yet. Once
@@ -1498,21 +1567,6 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
}
}
-#define has_tgran_2(__r, __sz) \
- ({ \
- u64 _s1, _s2, _mmfr0 = __r; \
- \
- _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
- TGRAN##__sz##_2, _mmfr0); \
- \
- _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
- TGRAN##__sz, _mmfr0); \
- \
- ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
- _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
- (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
- _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
- })
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
@@ -1599,15 +1653,15 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
*/
switch (PAGE_SIZE) {
case SZ_4K:
- if (has_tgran_2(orig_val, 4))
+ if (_has_tgran_2(orig_val, 4))
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
fallthrough;
case SZ_16K:
- if (has_tgran_2(orig_val, 16))
+ if (_has_tgran_2(orig_val, 16))
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
fallthrough;
case SZ_64K:
- if (has_tgran_2(orig_val, 64))
+ if (_has_tgran_2(orig_val, 64))
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
break;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index c816db5d67611..98305bbfc095a 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -396,44 +396,31 @@ static bool kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- bool overflow;
- overflow = kvm_pmu_overflow_status(vcpu);
- if (pmu->irq_level == overflow)
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
return;
- pmu->irq_level = overflow;
-
- if (likely(irqchip_in_kernel(vcpu->kvm))) {
- int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu,
- pmu->irq_num, overflow, pmu);
- WARN_ON(ret);
- }
+ WARN_ON(kvm_vgic_inject_irq(vcpu->kvm, vcpu, pmu->irq_num,
+ kvm_pmu_overflow_status(vcpu), pmu));
}
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
- if (likely(irqchip_in_kernel(vcpu->kvm)))
- return false;
-
- return pmu->irq_level != run_level;
+ return kvm_pmu_overflow_status(vcpu) != run_level;
}
/*
* Reflect the PMU overflow interrupt output level into the kvm_run structure
*/
-void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
+bool kvm_pmu_update_run(struct kvm_vcpu *vcpu)
{
- struct kvm_sync_regs *regs = &vcpu->run->s.regs;
-
- /* Populate the timer bitmap for user space */
- regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
- if (vcpu->arch.pmu.irq_level)
- regs->device_irq_level |= KVM_ARM_DEV_PMU;
+ bool update = kvm_pmu_should_notify_user(vcpu);
+ if (update)
+ vcpu->run->s.regs.device_irq_level ^= KVM_ARM_DEV_PMU;
+ return update;
}
/**
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index fa5c93c7a1352..febb3a2b9ce78 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -724,6 +724,7 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
{
unsigned long *mask = vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask;
struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ unsigned long reg = p->regval;
int i;
/* We never expect to get here with a read! */
@@ -731,27 +732,23 @@ static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
return undef_access(vcpu, p, r);
/*
- * If we're only handling architected PPIs and the guest writes to the
- * enable for the non-architected PPIs, we just return as there's
- * nothing to do at all. We don't even allocate the storage for them in
- * this case.
+ * As we're only handling architected PPIs, the guest writes to the
+ * enable for the non-architected PPIs just return as there's
+ * nothing to do at all. We don't even allocate the storage for them.
*/
- if (VGIC_V5_NR_PRIVATE_IRQS == 64 && p->Op2 % 2)
+ if (p->Op2 % 2)
return true;
/*
- * Merge the raw guest write into out bitmap at an offset of either 0 or
- * 64, then and it with our PPI mask.
+ * Merge the raw guest write into out bitmap, anded with our PPI mask.
*/
- bitmap_write(cpu_if->vgic_ppi_enabler, p->regval, 64 * (p->Op2 % 2), 64);
- bitmap_and(cpu_if->vgic_ppi_enabler, cpu_if->vgic_ppi_enabler, mask,
- VGIC_V5_NR_PRIVATE_IRQS);
+ bitmap_and(cpu_if->vgic_ppi_enabler, &reg, mask, VGIC_V5_NR_PRIVATE_IRQS);
/*
* Sync the change in enable states to the vgic_irqs. We consider all
* PPIs as we don't expose many to the guest.
*/
- for_each_set_bit(i, mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 933983bb20052..907057881b26a 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -271,18 +271,12 @@ int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
return ret;
}
-static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
+static void vgic_setup_private_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ u32 type)
{
- struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
+ irq->intid = irq - &vcpu->arch.vgic_cpu.private_irqs[0];
- INIT_LIST_HEAD(&irq->ap_list);
- raw_spin_lock_init(&irq->irq_lock);
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- refcount_set(&irq->refcount, 0);
-
- irq->intid = i;
- if (vgic_irq_is_sgi(i)) {
+ if (vgic_irq_is_sgi(irq->intid)) {
/* SGIs */
irq->enabled = 1;
irq->config = VGIC_CONFIG_EDGE;
@@ -303,18 +297,11 @@ static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
}
}
-static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
+static void vgic_v5_setup_private_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
- struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
- u32 intid = vgic_v5_make_ppi(i);
-
- INIT_LIST_HEAD(&irq->ap_list);
- raw_spin_lock_init(&irq->irq_lock);
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- refcount_set(&irq->refcount, 0);
+ int i = irq - &vcpu->arch.vgic_cpu.private_irqs[0];
- irq->intid = intid;
+ irq->intid = vgic_v5_make_ppi(i);
/* The only Edge architected PPI is the SW_PPI */
if (i == GICV5_ARCH_PPI_SW_PPI)
@@ -323,7 +310,7 @@ static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
irq->config = VGIC_CONFIG_LEVEL;
/* Register the GICv5-specific PPI ops */
- vgic_v5_set_ppi_ops(vcpu, intid);
+ vgic_v5_set_ppi_ops(vcpu, irq->intid);
}
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
@@ -349,15 +336,19 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
if (!vgic_cpu->private_irqs)
return -ENOMEM;
- /*
- * Enable and configure all SGIs to be edge-triggered and
- * configure all PPIs as level-triggered.
- */
for (i = 0; i < num_private_irqs; i++) {
+ struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ raw_spin_lock_init(&irq->irq_lock);
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ refcount_set(&irq->refcount, 0);
+
if (vgic_is_v5(vcpu->kvm))
- vgic_v5_allocate_private_irq(vcpu, i, type);
+ vgic_v5_setup_private_irq(vcpu, irq);
else
- vgic_allocate_private_irq(vcpu, i, type);
+ vgic_setup_private_irq(vcpu, irq, type);
}
return 0;
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index b9b86e3a6c862..19a1094536e6a 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -20,9 +20,15 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
int level, bool line_status)
{
unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS;
+ int ret;
if (!vgic_valid_spi(kvm, spi_id))
return -EINVAL;
+
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+
return kvm_vgic_inject_irq(kvm, NULL, spi_id, level, NULL);
}
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index a96c77dccf353..90be99443df3b 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -730,18 +730,15 @@ static int vgic_v5_get_userspace_ppis(struct kvm_device *dev,
guard(mutex)(&dev->kvm->arch.config_lock);
/*
- * We either support 64 or 128 PPIs. In the former case, we need to
- * return 0s for the second 64 bits as we have no storage backing those.
+ * We only support 64 PPIs, so, we need to return 0s for the
+ * second 64 bits as we have no storage backing those.
*/
ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr);
if (ret)
return ret;
uaddr++;
- if (VGIC_V5_NR_PRIVATE_IRQS == 128)
- ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr);
- else
- ret = put_user(0, uaddr);
+ ret = put_user(0, uaddr);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index fdd39ea7f83ec..d4789ff3e7402 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -10,7 +10,7 @@
#include "vgic.h"
-static struct vgic_v5_ppi_caps ppi_caps;
+#define ppi_caps kvm_vgic_global_state.vgic_v5_ppi_caps
/*
* Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which
@@ -18,20 +18,17 @@ static struct vgic_v5_ppi_caps ppi_caps;
*/
static void vgic_v5_get_implemented_ppis(void)
{
- if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
- return;
-
/*
* If we have KVM, we have EL2, which means that we have support for the
* EL1 and EL2 Physical & Virtual timers.
*/
- __assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1);
- __assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1);
- __assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1);
- __assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1);
+ __set_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask);
+ __set_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask);
+ __set_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask);
+ __set_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask);
/* The SW_PPI should be available */
- __assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1);
+ __set_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask);
/* The PMUIRQ is available if we have the PMU */
__assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3());
@@ -146,9 +143,7 @@ int vgic_v5_init(struct kvm *kvm)
/* We only allow userspace to drive the SW_PPI, if it is implemented. */
bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis,
VGIC_V5_NR_PRIVATE_IRQS);
- __assign_bit(GICV5_ARCH_PPI_SW_PPI,
- kvm->arch.vgic.gicv5_vm.userspace_ppis,
- VGIC_V5_NR_PRIVATE_IRQS);
+ __set_bit(GICV5_ARCH_PPI_SW_PPI, kvm->arch.vgic.gicv5_vm.userspace_ppis);
bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis,
kvm->arch.vgic.gicv5_vm.userspace_ppis,
ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
@@ -197,7 +192,7 @@ int vgic_v5_finalize_ppi_state(struct kvm *kvm)
/* Expose PPIs with an owner or the SW_PPI, only */
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) {
- __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1);
+ __set_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask);
__assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr,
irq->config == VGIC_CONFIG_LEVEL);
}
@@ -243,9 +238,9 @@ static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
/*
* For GICv5, the PPIs are mostly directly managed by the hardware. We (the
- * hypervisor) handle the pending, active, enable state save/restore, but don't
- * need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the
- * state, unlock, and return.
+ * hypervisor) handle the pending, active, enable state save/restore, but
+ * don't need the PPIs to be queued on a per-VCPU AP list. Therefore,
+ * unlock, kick the vcpu and return.
*/
bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags)
@@ -255,12 +250,7 @@ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
lockdep_assert_held(&irq->irq_lock);
- if (WARN_ON_ONCE(!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, irq->intid)))
- goto out_unlock_fail;
-
vcpu = irq->target_vcpu;
- if (WARN_ON_ONCE(!vcpu))
- goto out_unlock_fail;
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
@@ -269,11 +259,6 @@ bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
kvm_vcpu_kick(vcpu);
return true;
-
-out_unlock_fail:
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-
- return false;
}
/*
@@ -287,10 +272,10 @@ void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi)
lockdep_assert_held(&irq->irq_lock);
ppi = vgic_v5_get_hwirq_id(irq->intid);
- __assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
+ assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
}
-static struct irq_ops vgic_v5_ppi_irq_ops = {
+static const struct irq_ops vgic_v5_ppi_irq_ops = {
.queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
.set_direct_injection = vgic_v5_set_ppi_dvi,
};
@@ -316,7 +301,7 @@ static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu)
* those actually exposed to the guest by first iterating over the mask
* of exposed PPIs.
*/
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
int pri_idx, pri_reg, pri_bit;
@@ -358,7 +343,7 @@ bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
if (!priority_mask)
return false;
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
bool has_pending = false;
struct vgic_irq *irq;
@@ -391,8 +376,7 @@ void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu)
activer = host_data_ptr(vgic_v5_ppi_state)->activer_exit;
pendr = host_data_ptr(vgic_v5_ppi_state)->pendr;
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
- VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
@@ -429,8 +413,7 @@ void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu)
* ICC_PPI_PENDRx_EL1, however.
*/
bitmap_zero(pendr, VGIC_V5_NR_PRIVATE_IRQS);
- for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
- VGIC_V5_NR_PRIVATE_IRQS) {
+ for_each_visible_v5_ppi(i, vcpu->kvm) {
u32 intid = vgic_v5_make_ppi(i);
struct vgic_irq *irq;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 1e9fe8764584d..5a4768d8cd4f3 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -106,24 +106,23 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
{
+ enum kvm_device_type type;
+
if (WARN_ON(!vcpu))
return NULL;
- if (vgic_is_v5(vcpu->kvm)) {
- u32 int_num, hwirq_id;
-
- if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
- return NULL;
-
- hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
- int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
+ type = vcpu->kvm->arch.vgic.vgic_model;
- return &vcpu->arch.vgic_cpu.private_irqs[int_num];
- }
+ if (__irq_is_sgi(type, intid) || __irq_is_ppi(type, intid)) {
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ intid = vgic_v5_get_hwirq_id(intid);
+ intid = array_index_nospec(intid, VGIC_V5_NR_PRIVATE_IRQS);
+ break;
+ default:
+ intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
+ }
- /* SGIs and PPIs */
- if (intid < VGIC_NR_PRIVATE_IRQS) {
- intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
return &vcpu->arch.vgic_cpu.private_irqs[intid];
}
@@ -534,11 +533,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
{
struct vgic_irq *irq;
unsigned long flags;
- int ret;
- ret = vgic_lazy_init(kvm);
- if (ret)
- return ret;
+ if (unlikely(!vgic_initialized(kvm)))
+ return 0;
if (!vcpu && irq_is_private(kvm, intid))
return -EINVAL;
@@ -573,7 +570,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
}
void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
- struct irq_ops *ops)
+ const struct irq_ops *ops)
{
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 9d941241c8a2b..f45f7e3ec4d6e 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -378,6 +378,9 @@ void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
void vgic_v5_save_state(struct kvm_vcpu *vcpu);
+#define for_each_visible_v5_ppi(__i, __k) \
+ for_each_set_bit(__i, (__k)->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS)
+
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;