aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
authorJakub Kicinski <kuba@kernel.org>2026-05-21 15:02:54 -0700
committerJakub Kicinski <kuba@kernel.org>2026-05-28 14:02:21 -0700
commitd44646fc9eeb423ad50f3043f11f66f491d908a7 (patch)
tree18e6b3eda2c313c246ba18b88b4e06b2e18432c2 /arch
parent0cf905cb9a12dbfb5d14896729b74508f83f73df (diff)
parent3e20009988e2470063824c58b19d1c80816cc46d (diff)
downloadlinux-next-history-d44646fc9eeb423ad50f3043f11f66f491d908a7.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-7.1-rc6). Conflicts: drivers/net/phy/air_en8811h.c d895767c33781 ("net: phy: air_en8811h: add AN8811HB MCU assert/deassert support") dddfadd75197e ("net: phy: Add Airoha phy library for shared code") 5226bb6634cdf ("net: phy: air_phy_lib: Factorize BuckPBus register accessors") e08f0ea6daf2e ("net: phy: Rename Airoha common BuckPBus register accessors") net/sched/sch_netem.c a2f6ed7b4873 ("net/sched: netem: add per-impairment extended statistics") 9552b11e3eda ("net/sched: fix packet loop on netem when duplicate is on") Adjacent changes: drivers/dpll/zl3073x/core.c c1224569cef0 ("dpll: zl3073x: make frequency monitor a per-device attribute") 54e65df8cf18 ("dpll: zl3073x: report FFO as DPLL vs input reference offset") net/iucv/af_iucv.c 347fdd4df85f ("af_iucv: convert to getsockopt_iter") 3589d20a666c ("net/iucv: fix locking in .getsockopt") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/insn.h2
-rw-r--r--arch/arm64/include/asm/tlb.h3
-rw-r--r--arch/arm64/kvm/arm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/trace.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c4
-rw-r--r--arch/loongarch/include/asm/efi.h4
-rw-r--r--arch/loongarch/include/asm/paravirt.h6
-rw-r--r--arch/loongarch/include/asm/qspinlock.h5
-rw-r--r--arch/loongarch/kernel/kprobes.c14
-rw-r--r--arch/loongarch/kernel/relocate.c50
-rw-r--r--arch/loongarch/mm/init.c4
-rw-r--r--arch/nios2/include/asm/linkage.h2
-rw-r--r--arch/riscv/kvm/vcpu_insn.c9
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c12
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c2
-rw-r--r--arch/s390/kernel/perf_pai.c31
-rw-r--r--arch/s390/kernel/topology.c10
-rw-r--r--arch/s390/kvm/dat.c1
-rw-r--r--arch/s390/kvm/dat.h3
-rw-r--r--arch/s390/kvm/gaccess.c1
-rw-r--r--arch/s390/kvm/gmap.c18
-rw-r--r--arch/s390/kvm/gmap.h61
-rw-r--r--arch/x86/entry/Makefile2
-rw-r--r--arch/x86/entry/common.c61
-rw-r--r--arch/x86/entry/entry.S46
-rw-r--r--arch/x86/entry/entry_64_fred.S1
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/desc_defs.h2
-rw-r--r--arch/x86/include/asm/entry-common.h2
-rw-r--r--arch/x86/include/asm/fred.h1
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/idt.c15
-rw-r--r--arch/x86/kernel/nmi.c1
-rw-r--r--arch/x86/kvm/svm/avic.c12
-rw-r--r--arch/x86/kvm/vmx/vmenter.S46
-rw-r--r--arch/x86/kvm/vmx/vmx.c19
-rw-r--r--arch/x86/kvm/x86.c2
39 files changed, 345 insertions, 129 deletions
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index f463a654a2bbd..cc0702fa64a79 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -409,7 +409,7 @@ __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000)
__AARCH64_INSN_FUNCS(cbnz, 0x7F000000, 0x35000000)
__AARCH64_INSN_FUNCS(tbz, 0x7F000000, 0x36000000)
__AARCH64_INSN_FUNCS(tbnz, 0x7F000000, 0x37000000)
-__AARCH64_INSN_FUNCS(bcond, 0xFF000010, 0x54000000)
+__AARCH64_INSN_FUNCS(bcond, 0xFF000000, 0x54000000)
__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 10869d7731b83..751bd57bc3bac 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -53,7 +53,8 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
static inline void tlb_flush(struct mmu_gather *tlb)
{
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
- tlbf_t flags = tlb->freed_tables ? TLBF_NONE : TLBF_NOWALKCACHE;
+ tlbf_t flags = (tlb->freed_tables || tlb->unshared_tables) ?
+ TLBF_NONE : TLBF_NOWALKCACHE;
unsigned long stride = tlb_get_unmap_size(tlb);
int tlb_level = tlb_get_level(tlb);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 34c9950884d5e..9453321ef8c67 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -555,8 +555,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_destroy_mpidr_data(vcpu->kvm);
err = kvm_vgic_vcpu_init(vcpu);
- if (err)
+ if (err) {
+ kvm_vgic_vcpu_destroy(vcpu);
return err;
+ }
err = kvm_share_hyp(vcpu, vcpu + 1);
if (err)
diff --git a/arch/arm64/kvm/hyp/nvhe/trace.c b/arch/arm64/kvm/hyp/nvhe/trace.c
index a6ca27b18e154..e7e150ab265ff 100644
--- a/arch/arm64/kvm/hyp/nvhe/trace.c
+++ b/arch/arm64/kvm/hyp/nvhe/trace.c
@@ -164,13 +164,16 @@ static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer,
return ret;
}
-static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size)
+static bool hyp_trace_desc_is_valid(struct hyp_trace_desc *desc, size_t desc_size)
{
struct ring_buffer_desc *rb_desc;
unsigned int cpu;
size_t nr_bpages;
void *desc_end;
+ if (!is_protected_kvm_enabled())
+ return true;
+
/*
* Both desc_size and bpages_backing_size are untrusted host-provided
* values. We rely on __pkvm_host_donate_hyp() to enforce their validity.
@@ -212,8 +215,10 @@ int __tracing_load(unsigned long desc_hva, size_t desc_size)
if (ret)
return ret;
- if (!hyp_trace_desc_validate(desc, desc_size))
+ if (!hyp_trace_desc_is_valid(desc, desc_size)) {
+ ret = -EINVAL;
goto err_release_desc;
+ }
hyp_spin_lock(&trace_buffer.lock);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2ea9f1c7ebcd0..1d7e5d560af4c 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -2307,6 +2307,10 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
/* dte entry is valid */
offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
+ /* Mimic the MAPD behaviour and reject invalid EID bits. */
+ if (num_eventid_bits > VITS_TYPER_IDBITS)
+ return -EINVAL;
+
if (!vgic_its_check_id(its, baser, id, NULL))
return -EINVAL;
diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h
index eddc8e79b3fae..1ad764b18c3e8 100644
--- a/arch/loongarch/include/asm/efi.h
+++ b/arch/loongarch/include/asm/efi.h
@@ -30,6 +30,8 @@ static inline unsigned long efi_get_kimg_min_align(void)
return SZ_2M;
}
-#define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS)
+unsigned long efi_get_kimg_kaslr_address(void);
+
+#define EFI_KIMG_PREFERRED_ADDRESS efi_get_kimg_kaslr_address()
#endif /* _ASM_LOONGARCH_EFI_H */
diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h
index 0111f0ad5f733..acae1c5e5f882 100644
--- a/arch/loongarch/include/asm/paravirt.h
+++ b/arch/loongarch/include/asm/paravirt.h
@@ -4,6 +4,12 @@
#ifdef CONFIG_PARAVIRT
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+DECLARE_PER_CPU(struct kvm_steal_time, steal_time);
+
int __init pv_ipi_init(void);
int __init pv_time_init(void);
int __init pv_spinlock_init(void);
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
index 0ee15b3b39372..fbfc6be82f26b 100644
--- a/arch/loongarch/include/asm/qspinlock.h
+++ b/arch/loongarch/include/asm/qspinlock.h
@@ -3,12 +3,9 @@
#define _ASM_LOONGARCH_QSPINLOCK_H
#include <asm/kvm_para.h>
-#include <linux/jump_label.h>
+#include <asm/paravirt.h>
#ifdef CONFIG_PARAVIRT
-DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
-DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
-DECLARE_PER_CPU(struct kvm_steal_time, steal_time);
#define virt_spin_lock virt_spin_lock
diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c
index 8ba391cfabb00..1985ed30dd16f 100644
--- a/arch/loongarch/kernel/kprobes.c
+++ b/arch/loongarch/kernel/kprobes.c
@@ -60,16 +60,18 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
/* Install breakpoint in text */
void arch_arm_kprobe(struct kprobe *p)
{
- *p->addr = KPROBE_BP_INSN;
- flush_insn_slot(p);
+ u32 insn = KPROBE_BP_INSN;
+
+ larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
/* Remove breakpoint from text */
void arch_disarm_kprobe(struct kprobe *p)
{
- *p->addr = p->opcode;
- flush_insn_slot(p);
+ u32 insn = p->opcode;
+
+ larch_insn_text_copy(p->addr, &insn, LOONGARCH_INSN_SIZE);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
@@ -184,16 +186,16 @@ static bool reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
switch (kcb->kprobe_status) {
- case KPROBE_HIT_SS:
case KPROBE_HIT_SSDONE:
case KPROBE_HIT_ACTIVE:
kprobes_inc_nmissed_count(p);
setup_singlestep(p, regs, kcb, 1);
break;
+ case KPROBE_HIT_SS:
case KPROBE_REENTER:
pr_warn("Failed to recover from reentered kprobes.\n");
dump_kprobe(p);
- WARN_ON_ONCE(1);
+ BUG();
break;
default:
WARN_ON(1);
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
index 16f6a9b39659f..4b61a9632a980 100644
--- a/arch/loongarch/kernel/relocate.c
+++ b/arch/loongarch/kernel/relocate.c
@@ -134,11 +134,23 @@ early_param("nokaslr", nokaslr);
#define KASLR_DISABLED_MESSAGE "KASLR is disabled by %s in %s cmdline.\n"
+/*
+ * Note: strictly-defined KASLR means the kernel's final runtime address
+ * has a random offset from the kernel's load address, which is implemented
+ * in relocate.c; broadly-defined KALSR means the kernel's final runtime
+ * address has a random offset from the kernel's link address (a.k.a.
+ * VMLINUX_LOAD_ADDRESS), which also include the efistlub implementation,
+ * kexec_file implementation and QEMU direct kernel boot. kaslr_disabled()
+ * return true only means strictly-defined KASLR is disabled.
+ */
static inline __init bool kaslr_disabled(void)
{
char *str;
const char *builtin_cmdline = CONFIG_CMDLINE;
+ if (kaslr_offset())
+ return true; /* KASLR is performed during early boot. */
+
str = strstr(builtin_cmdline, "nokaslr");
if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' ')) {
pr_info(KASLR_DISABLED_MESSAGE, "\'nokaslr\'", "built-in");
@@ -210,14 +222,52 @@ static inline void __init *determine_relocation_address(void)
return RELOCATED_KASLR(destination);
}
+static unsigned long __init determine_initrd_address(unsigned long *size)
+{
+ unsigned long start = 0;
+ unsigned long key_length;
+ char *p, *endp, *key = "initrd=";
+
+ key_length = strlen(key);
+ p = strstr(boot_command_line, key);
+
+ if (!p) {
+ key = "initrdmem=";
+ key_length = strlen(key);
+ p = strstr(boot_command_line, key);
+ }
+
+ if (p == boot_command_line || (p > boot_command_line && *(p - 1) == ' ')) {
+ p += key_length;
+ start = memparse(p, &endp);
+ if (*endp == ',')
+ *size = memparse(endp + 1, NULL);
+ }
+
+ return start;
+}
+
static inline int __init relocation_addr_valid(void *location_new)
{
+ unsigned long kernel_start, kernel_size;
+ unsigned long initrd_start, initrd_size = 0;
+
if ((unsigned long)location_new & 0x00000ffff)
return 0; /* Inappropriately aligned new location */
if ((unsigned long)location_new < (unsigned long)_end)
return 0; /* New location overlaps original kernel */
+ initrd_start = determine_initrd_address(&initrd_size);
+ if (initrd_start && initrd_size) {
+ kernel_start = PHYSADDR(location_new);
+ kernel_size = (unsigned long)_end - (unsigned long)_text;
+
+ if (kernel_start < (initrd_start + initrd_size) &&
+ initrd_start < (kernel_start + kernel_size))
+ return 0; /* initrd/initramfs overlaps kernel */
+ }
+
return 1;
}
#endif
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 3f9ab54114c51..031b39eb081c5 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -123,11 +123,7 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn);
- /* With altmap the first mapped page is offset from @start */
- if (altmap)
- page += vmem_altmap_offset(altmap);
__remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/nios2/include/asm/linkage.h b/arch/nios2/include/asm/linkage.h
index 211302301a8a5..c4073235852bb 100644
--- a/arch/nios2/include/asm/linkage.h
+++ b/arch/nios2/include/asm/linkage.h
@@ -12,4 +12,6 @@
#define __ALIGN .align 4
#define __ALIGN_STR ".align 4"
+#define _THIS_IP_ ({ unsigned long __ip; asm volatile("nextpc %0" : "=r" (__ip)); __ip; })
+
#endif
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
index 4d89b94128aea..f09f9251d1f0a 100644
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -415,7 +415,6 @@ int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
shift = 8 * (sizeof(ulong) - len);
} else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
len = 1;
- shift = 8 * (sizeof(ulong) - len);
#ifdef CONFIG_64BIT
} else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
len = 8;
@@ -649,22 +648,22 @@ int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
case 1:
data8 = *((u8 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data8 << shift >> shift);
+ (long)((ulong)data8 << shift) >> shift);
break;
case 2:
data16 = *((u16 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data16 << shift >> shift);
+ (long)((ulong)data16 << shift) >> shift);
break;
case 4:
data32 = *((u32 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data32 << shift >> shift);
+ (long)((ulong)data32 << shift) >> shift);
break;
case 8:
data64 = *((u64 *)run->mmio.data);
SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data64 << shift >> shift);
+ (long)((ulong)data64 << shift) >> shift);
break;
default:
return -EOPNOTSUPP;
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index a935ed96bc176..bb46dcbfb24da 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -453,8 +453,10 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
}
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
- if (!kvpmu->sdata)
- return -ENOMEM;
+ if (!kvpmu->sdata) {
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
@@ -499,8 +501,10 @@ int kvm_riscv_vcpu_pmu_event_info(struct kvm_vcpu *vcpu, unsigned long saddr_low
}
einfo = kzalloc(shmem_size, GFP_KERNEL);
- if (!einfo)
- return -ENOMEM;
+ if (!einfo) {
+ ret = SBI_ERR_FAILURE;
+ goto out;
+ }
ret = kvm_vcpu_read_guest(vcpu, shmem, einfo, shmem_size);
if (ret) {
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index 3b834709b429f..60e50296a0085 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -46,7 +46,7 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
gfn = shmem >> PAGE_SHIFT;
hva = kvm_vcpu_gfn_to_hva(vcpu, gfn);
- if (WARN_ON(kvm_is_error_hva(hva))) {
+ if (kvm_is_error_hva(hva)) {
vcpu->arch.sta.shmem = INVALID_GPA;
return;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
index 188d5ea5b3b85..c9c323d4577a9 100644
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -55,6 +55,8 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
for_each_set_bit(i, &hmask, BITS_PER_LONG) {
rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ if (!rvcpu)
+ continue;
ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
if (ret < 0)
break;
diff --git a/arch/s390/kernel/perf_pai.c b/arch/s390/kernel/perf_pai.c
index 86f71a3d1ef2d..cdb8006220ca0 100644
--- a/arch/s390/kernel/perf_pai.c
+++ b/arch/s390/kernel/perf_pai.c
@@ -186,6 +186,13 @@ static u64 pai_getctr(unsigned long *page, int nr, unsigned long offset)
return page[nr];
}
+static void pai_setctr(unsigned long *page, int nr, unsigned long offset, u64 v)
+{
+ if (offset)
+ nr += offset / sizeof(*page);
+ page[nr] = v;
+}
+
/* Read the counter values. Return value from location in CMP. For base
* event xxx_ALL sum up all events. Returns counter value.
*/
@@ -551,6 +558,8 @@ static void paicrypt_del(struct perf_event *event, int flags)
/* Create raw data and save it in buffer. Calculate the delta for each
* counter between this invocation and the last invocation.
* Returns number of bytes copied.
+ * After reading from PAI counter page, save the read value to the old
+ * page to calculate PAI counter deltas.
* Saves only entries with positive counter difference of the form
* 2 bytes: Number of counter
* 8 bytes: Value of counter
@@ -562,16 +571,22 @@ static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page,
int i, outidx = 0;
for (i = 1; i <= pp->num_avail; i++) {
- u64 val = 0, val_old = 0;
+ u64 val = 0, val_old = 0, val_k = 0, val_old_k = 0;
if (!exclude_kernel) {
- val += pai_getctr(page, i, pp->kernel_offset);
- val_old += pai_getctr(page_old, i, pp->kernel_offset);
+ val_k = pai_getctr(page, i, pp->kernel_offset);
+ val_old_k = pai_getctr(page_old, i, pp->kernel_offset);
+ if (val_k != val_old_k)
+ pai_setctr(page_old, i, pp->kernel_offset, val_k);
}
if (!exclude_user) {
- val += pai_getctr(page, i, 0);
- val_old += pai_getctr(page_old, i, 0);
+ val = pai_getctr(page, i, 0);
+ val_old = pai_getctr(page_old, i, 0);
+ if (val != val_old)
+ pai_setctr(page_old, i, 0, val);
}
+ val += val_k;
+ val_old += val_old_k;
if (val >= val_old)
val -= val_old;
else
@@ -602,8 +617,6 @@ static size_t pai_copy(struct pai_userdata *userdata, unsigned long *page,
static int pai_push_sample(size_t rawsize, struct pai_map *cpump,
struct perf_event *event)
{
- int idx = PAI_PMU_IDX(event);
- struct pai_pmu *pp = &pai_pmu[idx];
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
@@ -634,8 +647,6 @@ static int pai_push_sample(size_t rawsize, struct pai_map *cpump,
overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
- /* Save crypto counter lowcore page after reading event data. */
- memcpy((void *)PAI_SAVE_AREA(event), cpump->area, pp->area_size);
return overflow;
}
@@ -651,7 +662,7 @@ static void pai_have_sample(struct perf_event *event, struct pai_map *cpump)
rawsize = pai_copy(cpump->save, cpump->area, pp,
(unsigned long *)PAI_SAVE_AREA(event),
event->attr.exclude_user,
- event->attr.exclude_kernel);
+ !pp->kernel_offset ? true : event->attr.exclude_kernel);
if (rawsize) /* No incremented counters */
pai_push_sample(rawsize, cpump, event);
}
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 1913a5566ac2b..1377c6f3f6709 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -192,17 +192,21 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
+ /*
+ * Adjust drawer_id, book_id, and socked_id so they match the
+ * numbering scheme of e.g. the hardware management console.
+ */
case 3:
drawer = drawer->next;
- drawer->id = tle->container.id;
+ drawer->id = tle->container.id - 1;
break;
case 2:
book = book->next;
- book->id = tle->container.id;
+ book->id = tle->container.id - 1;
break;
case 1:
socket = socket->next;
- socket->id = tle->container.id;
+ socket->id = tle->container.id - 1;
break;
case 0:
add_cpus_to_mask(&tle->cpu, drawer, book, socket);
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 7b8d70fe406de..4a41c0247ffa2 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
+ pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 8f8278c448794..873e13ac5a27f 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -145,7 +145,8 @@ union pgste {
unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 5;
+ unsigned long vsie_gmem : 1; /* Contains nested guest memory */
+ unsigned long : 4;
unsigned long : 8;
};
struct {
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index b07accd196182..4f8d5592c9a92 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
} else {
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
pgste.vsie_notif = 1;
+ pgste.vsie_gmem = 1;
}
pgste_set_unlock(ptep_h, pgste);
if (rc)
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index 3c26e35af0ef7..957126ab991ca 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -125,7 +125,7 @@ struct gmap *gmap_new_child(struct gmap *parent, gfn_t limit)
int gmap_set_limit(struct gmap *gmap, gfn_t limit)
{
- struct kvm_s390_mmu_cache *mc;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
int rc, type;
type = gmap_limit_to_type(limit);
@@ -142,7 +142,6 @@ int gmap_set_limit(struct gmap *gmap, gfn_t limit)
rc = dat_set_asce_limit(mc, &gmap->asce, type);
} while (rc == -ENOMEM);
- kvm_s390_free_mmu_cache(mc);
return 0;
}
@@ -822,8 +821,8 @@ int gmap_ucas_translate(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, gpa_t
int gmap_ucas_map(struct gmap *gmap, gfn_t p_gfn, gfn_t c_gfn, unsigned long count)
{
- struct kvm_s390_mmu_cache *mc;
- int rc;
+ struct kvm_s390_mmu_cache *mc __free(kvm_s390_mmu_cache) = NULL;
+ int rc = 0;
mc = kvm_s390_new_mmu_cache();
if (!mc)
@@ -1026,13 +1025,15 @@ int gmap_insert_rmap(struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn, int level)
int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gfn, gfn_t r_gfn,
kvm_pfn_t pfn, int level, bool wr)
{
+ unsigned long bitmask;
union crste *crstep;
union pgste pgste;
union pte *ptep;
union pte pte;
int flags, rc;
- KVM_BUG_ON(!is_shadow(sg), sg->kvm);
+ if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm))
+ return -EINVAL;
lockdep_assert_held(&sg->parent->children_lock);
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
@@ -1041,8 +1042,9 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
if (rc)
return rc;
if (level <= TABLE_TYPE_REGION1) {
+ bitmask = -1UL << (8 + 11 * level);
scoped_guard(spinlock, &sg->host_to_rmap_lock)
- rc = gmap_insert_rmap(sg, p_gfn, r_gfn, level);
+ rc = gmap_insert_rmap(sg, p_gfn, r_gfn & bitmask, level);
}
if (rc)
return rc;
@@ -1143,8 +1145,10 @@ void _gmap_handle_vsie_unshadow_event(struct gmap *parent, gfn_t gfn)
}
scoped_guard(spinlock, &sg->host_to_rmap_lock)
head = radix_tree_delete(&sg->host_to_rmap, gfn);
- gmap_for_each_rmap_safe(rmap, rnext, head)
+ gmap_for_each_rmap_safe(rmap, rnext, head) {
gmap_unshadow_level(sg, rmap->r_gfn, rmap->level);
+ kfree(rmap);
+ }
}
}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index 96ee1395a5925..742e42a317445 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
return _gmap_unmap_prefix(gmap, gfn, end, false);
}
+/**
+ * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing.
+ * @oldpte: the previous value for the guest pte.
+ * @newpte: the new pte being set.
+ * @pgste: the pgste for the pte entry.
+ *
+ * If the pgste.vsie_notif bit is not set, return false: the page is not
+ * involved in vsie and thus should not trigger an unshadow operation.
+ *
+ * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest
+ * memory. The access rights on g3's memory should be synchronized with g1's
+ * and g2's. Therefore unshadowing is triggered if the new and old pte
+ * differ in protection, or if the new pte is invalid.
+ *
+ * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables
+ * for g3. If the entry becomes writable or absent, it becomes impossible to
+ * guarantee that the shadow mapping will match g2's mapping. In that case,
+ * trigger an unshadow event.
+ *
+ * Return: true if an unshadow event should be triggered, otherwise false.
+ */
+static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
+{
+ if (!pgste.vsie_notif)
+ return false;
+ if (pgste.vsie_gmem)
+ return (oldpte.h.p != newpte.h.p) || newpte.h.i;
+ return !newpte.h.p || !newpte.s.pr;
+}
+
static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
union pgste pgste, gfn_t gfn, bool needs_lock)
{
@@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
pgste.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + 1);
}
- if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
+ if (pte_needs_unshadow(*ptep, newpte, pgste)) {
pgste.vsie_notif = 0;
+ pgste.vsie_gmem = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
@@ -189,6 +220,7 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
}
if (!ptep->s.d && newpte.s.d && !newpte.s.s)
SetPageDirty(pfn_to_page(newpte.h.pfra));
+ pgste.zero = 0;
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
}
@@ -198,6 +230,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
+/**
+ * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing.
+ * @oldcrste: the previous value for the crste.
+ * @newcrste: the new value for the crste.
+ *
+ * If the old crste did not have the vsie_notif bit set, return false: the
+ * page is not involved in vsie and thus should not trigger an unshadow
+ * operation. Conversely, if the bit is set, it can only be g3 memory, since
+ * dat tables are never mapped using large pages.
+ *
+ * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the
+ * protection bit is changing or the new page is invalid, trigger an
+ * unshadow event. Also trigger an unshadow event if the new crste does not
+ * have the vsie_notif bit set.
+ *
+ * Return: true if an unshadow event should be triggered, otherwise false.
+ */
+static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
+{
+ if (!oldcrste.s.fc1.vsie_notif)
+ return false;
+ return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
+}
+
static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
union crste oldcrste, union crste newcrste,
gfn_t gfn, bool needs_lock)
@@ -216,8 +272,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
- (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 72cae8e0ce856..83b4762d6ecba 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_syscall_32.o += -fno-stack-protector
CFLAGS_syscall_64.o += -fno-stack-protector
-obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o common.o
obj-y += vdso/
obj-y += vsyscall/
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
new file mode 100644
index 0000000000000..06c7c6ebd6f94
--- /dev/null
+++ b/arch/x86/entry/common.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/entry-common.h>
+#include <linux/kvm_types.h>
+#include <linux/hrtimer_rearm.h>
+#include <asm/fred.h>
+#include <asm/desc.h>
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * On VMX, NMIs and IRQs (as configured by KVM) are acknowledged by hardware as
+ * part of the VM-Exit, i.e. the event itself is consumed as part the VM-Exit.
+ * x86_entry_from_kvm() is invoked by KVM to effectively forward NMIs and IRQs
+ * to the kernel for servicing. On SVM, a.k.a. AMD, the NMI/IRQ VM-Exit is
+ * purely a signal that an NMI/IRQ is pending, i.e. the event that triggered
+ * the VM-Exit is held pending until it's unblocked in the host.
+ */
+noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector)
+{
+ if (event_type == EVENT_TYPE_EXTINT) {
+#ifdef CONFIG_X86_64
+ /*
+ * Use FRED dispatch, even when running IDT. The dispatch
+ * tables are kept in sync between FRED and IDT, and the FRED
+ * dispatch works well with CFI.
+ */
+ fred_entry_from_kvm(event_type, vector);
+#else
+ idt_entry_from_kvm(vector);
+#endif
+ /*
+ * Strictly speaking, only the NMI path requires noinstr.
+ */
+ instrumentation_begin();
+ /*
+ * KVM/VMX will dispatch from IRQ-disabled but for a context
+ * that will have IRQs-enabled. This confuses the entry code
+ * and it will not have reprogrammed the timer. Do so now.
+ */
+ hrtimer_rearm_deferred();
+ instrumentation_end();
+
+ return;
+ }
+
+ WARN_ON_ONCE(event_type != EVENT_TYPE_NMI);
+
+#ifdef CONFIG_X86_64
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ return fred_entry_from_kvm(event_type, vector);
+#endif
+
+ /*
+ * Notably, we must use IDT dispatch for NMI when running in IDT mode.
+ * The FRED NMI context is significantly different and will not work
+ * right (specifically FRED fixed the NMI recursion issue).
+ */
+ idt_entry_from_kvm(vector);
+}
+EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm);
+#endif
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 6ba2b3adcef0f..a56e043b266dc 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -75,3 +75,49 @@ THUNK warn_thunk_thunk, __warn_thunk
#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP)
EXPORT_SYMBOL(__ref_stack_chk_guard);
#endif
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+.macro IDT_DO_EVENT_IRQOFF call_insn call_target
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+ /*
+ * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+ * creating the synthetic interrupt stack frame for the IRQ/NMI.
+ */
+ and $-16, %rsp
+ push $__KERNEL_DS
+ push %rbp
+#endif
+ pushf
+ push $__KERNEL_CS
+ \call_insn \call_target
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ leave
+ RET
+.endm
+
+.pushsection .text, "ax"
+SYM_FUNC_START(idt_do_interrupt_irqoff)
+ IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(idt_do_interrupt_irqoff)
+.popsection
+
+.pushsection .noinstr.text, "ax"
+SYM_FUNC_START(idt_do_nmi_irqoff)
+ IDT_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(idt_do_nmi_irqoff)
+.popsection
+#endif
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index 894f7f16eb80a..0d2768ab836c4 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -147,5 +147,4 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
RET
SYM_FUNC_END(asm_fred_entry_from_kvm)
-EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm);
#endif
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index a6bfcc8243cd9..d903bce24f15d 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -178,7 +178,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size, NULL);
- do_munmap(mm, addr, image->size, NULL);
+ do_munmap(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, NULL);
goto up_fail;
}
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ec95fe44fa3a0..00aeae8435297 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -438,6 +438,10 @@ extern void idt_setup_traps(void);
extern void idt_setup_apic_and_irq_gates(void);
extern bool idt_is_f00f_address(unsigned long address);
+extern void idt_do_interrupt_irqoff(unsigned long address);
+extern void idt_do_nmi_irqoff(void);
+extern void idt_entry_from_kvm(unsigned int vector);
+
#ifdef CONFIG_X86_64
extern void idt_setup_early_pf(void);
#else
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 7e6b9314758a1..2f2ce8aadf076 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -145,7 +145,7 @@ struct gate_struct {
typedef struct gate_struct gate_desc;
#ifndef _SETUP
-static inline unsigned long gate_offset(const gate_desc *g)
+static __always_inline unsigned long gate_offset(const gate_desc *g)
{
#ifdef CONFIG_X86_64
return g->offset_low | ((unsigned long)g->offset_middle << 16) |
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7535131c711bb..eca24b5e07f4c 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -97,4 +97,6 @@ static __always_inline void arch_exit_to_user_mode(void)
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
+extern void x86_entry_from_kvm(unsigned int entry_type, unsigned int vector);
+
#endif
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 2bb65677c079b..18a2f811c3580 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -110,7 +110,6 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { ret
static inline void cpu_init_fred_exceptions(void) { }
static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
-static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
static inline void fred_sync_rsp0(unsigned long rsp0) { }
static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 146f6f8b0650b..99801e844b305 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -92,6 +92,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
{ X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL },
{ X86_FEATURE_LASS, X86_FEATURE_SMAP },
+ { X86_FEATURE_INVLPGB, X86_FEATURE_PCID },
{}
};
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 2604565887564..7bcf1decc0349 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -268,6 +268,21 @@ void __init idt_setup_early_pf(void)
}
#endif
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+noinstr void idt_entry_from_kvm(unsigned int vector)
+{
+ if (vector == NMI_VECTOR)
+ return idt_do_nmi_irqoff();
+
+ /*
+ * Only the NMI path requires noinstr.
+ */
+ instrumentation_begin();
+ idt_do_interrupt_irqoff(gate_offset(idt_table + vector));
+ instrumentation_end();
+}
+#endif
+
static void __init idt_map_in_cea(void)
{
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 3d239ed127441..52a3afb1b79e9 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -614,7 +614,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
{
exc_nmi(regs);
}
-EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx);
#endif
#ifdef CONFIG_NMI_CHECK_CPU
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index adf211860949a..993b551180fe9 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1300,12 +1300,14 @@ bool __init avic_hardware_setup(void)
}
/*
- * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
- * due to erratum 1235, which results in missed VM-Exits on the sender
- * and thus missed wake events for blocking vCPUs due to the CPU
- * failing to see a software update to clear IsRunning.
+ * Disable IPI virtualization for AMD Family 17h (Zen1 and Zen2) and
+ * Hygon Family 18h (derived from AMD Zen1) CPUs due to erratum 1235,
+ * which results in missed VM-Exits on the sender and thus missed wake
+ * events for blocking vCPUs due to the CPU failing to see a software
+ * update to clear IsRunning.
*/
- enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17;
+ if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18)
+ enable_ipiv = false;
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 8a481dae9cae2..ff1f254a0ef4e 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,38 +31,6 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
- /*
- * Unconditionally create a stack frame, getting the correct RSP on the
- * stack (for x86-64) would take two instructions anyways, and RBP can
- * be used to restore RSP to make objtool happy (see below).
- */
- push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
-
-#ifdef CONFIG_X86_64
- /*
- * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
- * creating the synthetic interrupt stack frame for the IRQ/NMI.
- */
- and $-16, %rsp
- push $__KERNEL_DS
- push %rbp
-#endif
- pushf
- push $__KERNEL_CS
- \call_insn \call_target
-
- /*
- * "Restore" RSP from RBP, even though IRET has already unwound RSP to
- * the correct value. objtool doesn't know the callee will IRET and,
- * without the explicit restore, thinks the stack is getting walloped.
- * Using an unwind hint is problematic due to x86-64's dynamic alignment.
- */
- leave
- RET
-.endm
-
.section .noinstr.text, "ax"
/**
@@ -320,10 +288,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/**
@@ -375,13 +339,3 @@ SYM_FUNC_START(vmread_error_trampoline)
RET
SYM_FUNC_END(vmread_error_trampoline)
#endif
-
-.section .text, "ax"
-
-#ifndef CONFIG_X86_FRED
-
-SYM_FUNC_START(vmx_do_interrupt_irqoff)
- VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
-SYM_FUNC_END(vmx_do_interrupt_irqoff)
-
-#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 49feecb286b23..b9103de01428e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7117,9 +7117,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}
-void vmx_do_interrupt_irqoff(unsigned long entry);
-void vmx_do_nmi_irqoff(void);
-
static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{
/*
@@ -7161,17 +7158,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- /*
- * Invoke the kernel's IRQ handler for the vector. Use the FRED path
- * when it's available even if FRED isn't fully enabled, e.g. even if
- * FRED isn't supported in hardware, in order to avoid the indirect
- * CALL in the non-FRED path.
- */
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- if (IS_ENABLED(CONFIG_X86_FRED))
- fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
- else
- vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
+ x86_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@@ -7481,10 +7469,7 @@ noinstr void vmx_handle_nmi(struct kvm_vcpu *vcpu)
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- if (cpu_feature_enabled(X86_FEATURE_FRED))
- fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
- else
- vmx_do_nmi_irqoff();
+ x86_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
kvm_after_interrupt(vcpu);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a9..c1a72d749084f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4876,7 +4876,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = tdp_enabled;
break;
case KVM_CAP_X86_APIC_BUS_CYCLES_NS:
- r = APIC_BUS_CYCLE_NS_DEFAULT;
+ r = kvm ? kvm->arch.apic_bus_cycle_ns : APIC_BUS_CYCLE_NS_DEFAULT;
break;
case KVM_CAP_EXIT_HYPERCALL:
r = KVM_EXIT_HYPERCALL_VALID_MASK;