diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 18:09:21 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 18:09:21 +0100 |
| commit | 8738760e67d0f953ac9f3a46063de52859c4216b (patch) | |
| tree | 0543abcfc368d4fc5ea5e4f4d85e40811dc8f862 /arch | |
| parent | c4a3570507e705aa00c1f71ba812ed5cea19709a (diff) | |
| parent | 6ed60999d33d49251b42976a5511f1bb089797ed (diff) | |
| download | linux-next-history-8738760e67d0f953ac9f3a46063de52859c4216b.tar.gz | |
Merge branch 'next' of https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/powerpc/Kconfig | 1 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/entry-common.h | 533 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/hw_irq.h | 4 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/interrupt.h | 386 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/kasan.h | 15 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/ptrace.h | 6 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/signal.h | 1 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/stacktrace.h | 6 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/syscall.h | 5 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/thread_info.h | 1 | ||||
| -rw-r--r-- | arch/powerpc/include/uapi/asm/ptrace.h | 14 | ||||
| -rw-r--r-- | arch/powerpc/kernel/interrupt.c | 254 | ||||
| -rw-r--r-- | arch/powerpc/kernel/ptrace/ptrace.c | 142 | ||||
| -rw-r--r-- | arch/powerpc/kernel/signal.c | 25 | ||||
| -rw-r--r-- | arch/powerpc/kernel/syscall.c | 119 | ||||
| -rw-r--r-- | arch/powerpc/kernel/traps.c | 2 | ||||
| -rw-r--r-- | arch/powerpc/kernel/watchdog.c | 2 | ||||
| -rw-r--r-- | arch/powerpc/perf/core-book3s.c | 2 |
18 files changed, 690 insertions, 828 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e93df95b79e75..81642206f7de3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -206,6 +206,7 @@ config PPC select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP + select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h new file mode 100644 index 0000000000000..de5601282755e --- /dev/null +++ b/arch/powerpc/include/asm/entry-common.h @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_PPC_ENTRY_COMMON_H +#define _ASM_PPC_ENTRY_COMMON_H + +#include <asm/cputime.h> +#include <asm/interrupt.h> +#include <asm/runlatch.h> +#include <asm/stacktrace.h> +#include <asm/switch_to.h> +#include <asm/tm.h> + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG +/* + * WARN/BUG is handled with a program interrupt so minimise checks here to + * avoid recursion and maximise the chance of getting the first oops handled. + */ +#define INT_SOFT_MASK_BUG_ON(regs, cond) \ +do { \ + if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \ + BUG_ON(cond); \ +} while (0) +#else +#define INT_SOFT_MASK_BUG_ON(regs, cond) +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 +extern char __end_soft_masked[]; +bool search_kernel_soft_mask_table(unsigned long addr); +unsigned long search_kernel_restart_table(unsigned long addr); + +DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); + +static inline bool is_implicit_soft_masked(struct pt_regs *regs) +{ + if (user_mode(regs)) + return false; + + if (regs->nip >= (unsigned long)__end_soft_masked) + return false; + + return search_kernel_soft_mask_table(regs->nip); +} + +static inline void srr_regs_clobbered(void) +{ + local_paca->srr_valid = 0; + local_paca->hsrr_valid = 0; +} +#else +static inline unsigned long search_kernel_restart_table(unsigned long addr) +{ + return 0; +} + +static inline bool is_implicit_soft_masked(struct pt_regs *regs) +{ + return false; +} + +static inline void srr_regs_clobbered(void) +{ +} +#endif + +static inline void nap_adjust_return(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_970_NAP + if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { + /* Can avoid a test-and-clear because NMIs do not call this */ + clear_thread_local_flags(_TLF_NAPPING); + regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return); + } +#endif +} + +static __always_inline void booke_load_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (likely(!(dbcr0 & DBCR0_IDM))) + return; + + /* + * Check to see if the dbcr0 register is set up to debug. + * Use the internal debug mode bit to do this. + */ + mtmsr(mfmsr() & ~MSR_DE); + if (IS_ENABLED(CONFIG_PPC32)) { + isync(); + global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); + } + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBSR, -1); +#endif +} + +static inline void booke_restore_dbcr0(void) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dbcr0 = current->thread.debug.dbcr0; + + if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) { + mtspr(SPRN_DBSR, -1); + mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]); + } +#endif +} + +static inline void check_return_regs_valid(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + unsigned long trap, srr0, srr1; + static bool warned; + u8 *validp; + char *h; + + if (trap_is_scv(regs)) + return; + + trap = TRAP(regs); + // EE in HV mode sets HSRRs like 0xea0 + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) + trap = 0xea0; + + switch (trap) { + case 0x980: + case INTERRUPT_H_DATA_STORAGE: + case 0xe20: + case 0xe40: + case INTERRUPT_HMI: + case 0xe80: + case 0xea0: + case INTERRUPT_H_FAC_UNAVAIL: + case 0x1200: + case 0x1500: + case 0x1600: + case 0x1800: + validp = &local_paca->hsrr_valid; + if (!READ_ONCE(*validp)) + return; + + srr0 = mfspr(SPRN_HSRR0); + srr1 = mfspr(SPRN_HSRR1); + h = "H"; + + break; + default: + validp = &local_paca->srr_valid; + if (!READ_ONCE(*validp)) + return; + + srr0 = mfspr(SPRN_SRR0); + srr1 = mfspr(SPRN_SRR1); + h = ""; + break; + } + + if (srr0 == regs->nip && srr1 == regs->msr) + return; + + /* + * A NMI / soft-NMI interrupt may have come in after we found + * srr_valid and before the SRRs are loaded. The interrupt then + * comes in and clobbers SRRs and clears srr_valid. Then we load + * the SRRs here and test them above and find they don't match. + * + * Test validity again after that, to catch such false positives. + * + * This test in general will have some window for false negatives + * and may not catch and fix all such cases if an NMI comes in + * later and clobbers SRRs without clearing srr_valid, but hopefully + * such things will get caught most of the time, statistically + * enough to be able to get a warning out. + */ + if (!READ_ONCE(*validp)) + return; + + if (!data_race(warned)) { + data_race(warned = true); + pr_warn("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); + pr_warn("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); + show_regs(regs); + } + + WRITE_ONCE(*validp, 0); /* fixup */ +#endif +} + +static inline void arch_interrupt_enter_prepare(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC64 + irq_soft_mask_set(IRQS_ALL_DISABLED); + + /* + * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. + * Asynchronous interrupts get here with HARD_DIS set (see below), so + * this enables MSR[EE] for synchronous interrupts. IRQs remain + * soft-masked. The interrupt handler may later call + * interrupt_cond_local_irq_enable() to achieve a regular process + * context. + */ + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { + INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE)); + __hard_irq_enable(); + } else { + __hard_RI_enable(); + } + /* Enable MSR[RI] early, to support kernel SLB and hash faults */ +#endif + + if (!regs_irqs_disabled(regs)) + trace_hardirqs_off(); + + if (user_mode(regs)) { + kuap_lock(); + account_cpu_user_entry(); + account_stolen_time(); + } else { + kuap_save_and_lock(regs); + /* + * CT_WARN_ON comes here via program_check_exception, + * so avoid recursion. + */ + if (TRAP(regs) != INTERRUPT_PROGRAM) + CT_WARN_ON(ct_state() != CT_STATE_KERNEL && + ct_state() != CT_STATE_IDLE); + INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs)); + INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) && + search_kernel_restart_table(regs->nip)); + } + INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) && + !(regs->msr & MSR_EE)); + + booke_restore_dbcr0(); +} + +/* + * Care should be taken to note that arch_interrupt_exit_prepare and + * arch_interrupt_async_exit_prepare do not necessarily return immediately to + * regs context (e.g., if regs is usermode, we don't necessarily return to + * user mode). Other interrupts might be taken between here and return, + * context switch / preemption may occur in the exit path after this, or a + * signal may be delivered, etc. + * + * The real interrupt exit code is platform specific, e.g., + * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s. + * + * However arch_interrupt_nmi_exit_prepare does return directly to regs, because + * NMIs do not do "exit work" or replay soft-masked interrupts. + */ +static inline void arch_interrupt_exit_prepare(struct pt_regs *regs) +{ + if (user_mode(regs)) { + BUG_ON(regs_is_unrecoverable(regs)); + BUG_ON(regs_irqs_disabled(regs)); + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ + kuap_assert_locked(); + + local_irq_disable(); + } +} + +static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC64 + /* Ensure arch_interrupt_enter_prepare does not enable MSR[EE] */ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; +#endif + arch_interrupt_enter_prepare(regs); +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * RI=1 is set by arch_interrupt_enter_prepare, so this thread flags access + * has to come afterward (it can cause SLB faults). + */ + if (cpu_has_feature(CPU_FTR_CTRL) && + !test_thread_local_flags(_TLF_RUNLATCH)) + __ppc64_runlatch_on(); +#endif +} + +static inline void arch_interrupt_async_exit_prepare(struct pt_regs *regs) +{ + /* + * Adjust at exit so the main handler sees the true NIA. This must + * come before irq_exit() because irq_exit can enable interrupts, and + * if another interrupt is taken before nap_adjust_return has run + * here, then that interrupt would return directly to idle nap return. + */ + nap_adjust_return(regs); + + arch_interrupt_exit_prepare(regs); +} + +struct interrupt_nmi_state { +#ifdef CONFIG_PPC64 + u8 irq_soft_mask; + u8 irq_happened; + u8 ftrace_enabled; + u64 softe; +#endif +}; + +static inline bool nmi_disables_ftrace(struct pt_regs *regs) +{ + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + if (TRAP(regs) == INTERRUPT_DECREMENTER) + return false; + if (TRAP(regs) == INTERRUPT_PERFMON) + return false; + } + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { + if (TRAP(regs) == INTERRUPT_PERFMON) + return false; + } + + return true; +} + +static inline void arch_interrupt_nmi_enter_prepare(struct pt_regs *regs, + struct interrupt_nmi_state *state) +{ +#ifdef CONFIG_PPC64 + state->irq_soft_mask = local_paca->irq_soft_mask; + state->irq_happened = local_paca->irq_happened; + state->softe = regs->softe; + + /* + * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does + * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile + * because that goes through irq tracing which we don't want in NMI. + */ + local_paca->irq_soft_mask = IRQS_ALL_DISABLED; + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) { + /* + * Adjust regs->softe to be soft-masked if it had not been + * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe + * not yet set disabled), or if it was in an implicit soft + * masked state. This makes regs_irqs_disabled(regs) + * behave as expected. + */ + regs->softe = IRQS_ALL_DISABLED; + } + + __hard_RI_enable(); + + /* Don't do any per-CPU operations until interrupt state is fixed */ + + if (nmi_disables_ftrace(regs)) { + state->ftrace_enabled = this_cpu_get_ftrace_enabled(); + this_cpu_set_ftrace_enabled(0); + } +#endif +} + +static inline void arch_interrupt_nmi_exit_prepare(struct pt_regs *regs, + struct interrupt_nmi_state *state) +{ + /* + * nmi does not call nap_adjust_return because nmi should not create + * new work to do (must use irq_work for that). + */ + +#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S + if (regs_irqs_disabled(regs)) { + unsigned long rst = search_kernel_restart_table(regs->nip); + + if (rst) + regs_set_return_ip(regs, rst); + } +#endif + + if (nmi_disables_ftrace(regs)) + this_cpu_set_ftrace_enabled(state->ftrace_enabled); + + /* Check we didn't change the pending interrupt mask. */ + WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); + regs->softe = state->softe; + local_paca->irq_happened = state->irq_happened; + local_paca->irq_soft_mask = state->irq_soft_mask; +#endif +} + +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) +{ + kuap_lock(); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + BUG_ON(regs_is_unrecoverable(regs)); + BUG_ON(!user_mode(regs)); + BUG_ON(regs_irqs_disabled(regs)); + +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY) && trap_is_syscall(regs)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } +#endif + kuap_assert_locked(); + booke_restore_dbcr0(); + account_cpu_user_entry(); + account_stolen_time(); + + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); + + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return; + } +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +} + +#define arch_enter_from_user_mode arch_enter_from_user_mode + +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, + unsigned long ti_work) +{ + unsigned long mathflags; + + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely((ti_work & _TIF_RESTORE_TM))) { + restore_tm_state(regs); + } else { + mathflags = MSR_FP; + + if (cpu_has_feature(CPU_FTR_VSX)) + mathflags |= MSR_VEC | MSR_VSX; + else if (cpu_has_feature(CPU_FTR_ALTIVEC)) + mathflags |= MSR_VEC; + + /* + * If userspace MSR has all available FP bits set, + * then they are live and no need to restore. If not, + * it means the regs were given up and restore_math + * may decide to restore them (to avoid taking an FP + * fault). + */ + if ((regs->msr & mathflags) != mathflags) + restore_math(regs); + } + } + + check_return_regs_valid(regs); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + local_paca->tm_scratch = regs->msr; +#endif + /* Restore user access locks last */ + kuap_user_restore(regs); +} + +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare + +static __always_inline void arch_exit_to_user_mode(void) +{ + booke_load_dbcr0(); + + account_cpu_user_exit(); +} + +#define arch_exit_to_user_mode arch_exit_to_user_mode + +#endif /* _ASM_PPC_ENTRY_COMMON_H */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 9cd945f2acafa..b7eee6385ae5c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -393,7 +393,7 @@ static inline void do_hard_irq_enable(void) __hard_irq_enable(); } -static inline bool arch_irq_disabled_regs(struct pt_regs *regs) +static inline bool regs_irqs_disabled(struct pt_regs *regs) { return (regs->softe & IRQS_DISABLED); } @@ -466,7 +466,7 @@ static inline bool arch_irqs_disabled(void) #define hard_irq_disable() arch_local_irq_disable() -static inline bool arch_irq_disabled_regs(struct pt_regs *regs) +static inline bool regs_irqs_disabled(struct pt_regs *regs) { return !(regs->msr & MSR_EE); } diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index eb0e4a20b8188..fb42a664ae54a 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -66,11 +66,9 @@ #ifndef __ASSEMBLER__ -#include <linux/context_tracking.h> -#include <linux/hardirq.h> -#include <asm/cputime.h> -#include <asm/firmware.h> -#include <asm/ftrace.h> +#include <linux/sched/debug.h> /* for show_regs */ +#include <linux/irq-entry-common.h> + #include <asm/kprobes.h> #include <asm/runlatch.h> @@ -88,308 +86,6 @@ do { \ #define INT_SOFT_MASK_BUG_ON(regs, cond) #endif -#ifdef CONFIG_PPC_BOOK3S_64 -extern char __end_soft_masked[]; -bool search_kernel_soft_mask_table(unsigned long addr); -unsigned long search_kernel_restart_table(unsigned long addr); - -DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); - -static inline bool is_implicit_soft_masked(struct pt_regs *regs) -{ - if (user_mode(regs)) - return false; - - if (regs->nip >= (unsigned long)__end_soft_masked) - return false; - - return search_kernel_soft_mask_table(regs->nip); -} - -static inline void srr_regs_clobbered(void) -{ - local_paca->srr_valid = 0; - local_paca->hsrr_valid = 0; -} -#else -static inline unsigned long search_kernel_restart_table(unsigned long addr) -{ - return 0; -} - -static inline bool is_implicit_soft_masked(struct pt_regs *regs) -{ - return false; -} - -static inline void srr_regs_clobbered(void) -{ -} -#endif - -static inline void nap_adjust_return(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC_970_NAP - if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { - /* Can avoid a test-and-clear because NMIs do not call this */ - clear_thread_local_flags(_TLF_NAPPING); - regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return); - } -#endif -} - -static inline void booke_restore_dbcr0(void) -{ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dbcr0 = current->thread.debug.dbcr0; - - if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) { - mtspr(SPRN_DBSR, -1); - mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]); - } -#endif -} - -static inline void interrupt_enter_prepare(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC64 - irq_soft_mask_set(IRQS_ALL_DISABLED); - - /* - * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. - * Asynchronous interrupts get here with HARD_DIS set (see below), so - * this enables MSR[EE] for synchronous interrupts. IRQs remain - * soft-masked. The interrupt handler may later call - * interrupt_cond_local_irq_enable() to achieve a regular process - * context. - */ - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { - INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE)); - __hard_irq_enable(); - } else { - __hard_RI_enable(); - } - /* Enable MSR[RI] early, to support kernel SLB and hash faults */ -#endif - - if (!arch_irq_disabled_regs(regs)) - trace_hardirqs_off(); - - if (user_mode(regs)) { - kuap_lock(); - CT_WARN_ON(ct_state() != CT_STATE_USER); - user_exit_irqoff(); - - account_cpu_user_entry(); - account_stolen_time(); - } else { - kuap_save_and_lock(regs); - /* - * CT_WARN_ON comes here via program_check_exception, - * so avoid recursion. - */ - if (TRAP(regs) != INTERRUPT_PROGRAM) - CT_WARN_ON(ct_state() != CT_STATE_KERNEL && - ct_state() != CT_STATE_IDLE); - INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs)); - INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) && - search_kernel_restart_table(regs->nip)); - } - INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) && - !(regs->msr & MSR_EE)); - - booke_restore_dbcr0(); -} - -/* - * Care should be taken to note that interrupt_exit_prepare and - * interrupt_async_exit_prepare do not necessarily return immediately to - * regs context (e.g., if regs is usermode, we don't necessarily return to - * user mode). Other interrupts might be taken between here and return, - * context switch / preemption may occur in the exit path after this, or a - * signal may be delivered, etc. - * - * The real interrupt exit code is platform specific, e.g., - * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s. - * - * However interrupt_nmi_exit_prepare does return directly to regs, because - * NMIs do not do "exit work" or replay soft-masked interrupts. - */ -static inline void interrupt_exit_prepare(struct pt_regs *regs) -{ -} - -static inline void interrupt_async_enter_prepare(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC64 - /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; -#endif - interrupt_enter_prepare(regs); -#ifdef CONFIG_PPC_BOOK3S_64 - /* - * RI=1 is set by interrupt_enter_prepare, so this thread flags access - * has to come afterward (it can cause SLB faults). - */ - if (cpu_has_feature(CPU_FTR_CTRL) && - !test_thread_local_flags(_TLF_RUNLATCH)) - __ppc64_runlatch_on(); -#endif - irq_enter(); -} - -static inline void interrupt_async_exit_prepare(struct pt_regs *regs) -{ - /* - * Adjust at exit so the main handler sees the true NIA. This must - * come before irq_exit() because irq_exit can enable interrupts, and - * if another interrupt is taken before nap_adjust_return has run - * here, then that interrupt would return directly to idle nap return. - */ - nap_adjust_return(regs); - - irq_exit(); - interrupt_exit_prepare(regs); -} - -struct interrupt_nmi_state { -#ifdef CONFIG_PPC64 - u8 irq_soft_mask; - u8 irq_happened; - u8 ftrace_enabled; - u64 softe; -#endif -}; - -static inline bool nmi_disables_ftrace(struct pt_regs *regs) -{ - /* Allow DEC and PMI to be traced when they are soft-NMI */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - if (TRAP(regs) == INTERRUPT_DECREMENTER) - return false; - if (TRAP(regs) == INTERRUPT_PERFMON) - return false; - } - if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { - if (TRAP(regs) == INTERRUPT_PERFMON) - return false; - } - - return true; -} - -static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) -{ -#ifdef CONFIG_PPC64 - state->irq_soft_mask = local_paca->irq_soft_mask; - state->irq_happened = local_paca->irq_happened; - state->softe = regs->softe; - - /* - * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does - * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile - * because that goes through irq tracing which we don't want in NMI. - */ - local_paca->irq_soft_mask = IRQS_ALL_DISABLED; - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) { - /* - * Adjust regs->softe to be soft-masked if it had not been - * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe - * not yet set disabled), or if it was in an implicit soft - * masked state. This makes arch_irq_disabled_regs(regs) - * behave as expected. - */ - regs->softe = IRQS_ALL_DISABLED; - } - - __hard_RI_enable(); - - /* Don't do any per-CPU operations until interrupt state is fixed */ - - if (nmi_disables_ftrace(regs)) { - state->ftrace_enabled = this_cpu_get_ftrace_enabled(); - this_cpu_set_ftrace_enabled(0); - } -#endif - - /* If data relocations are enabled, it's safe to use nmi_enter() */ - if (mfmsr() & MSR_DR) { - nmi_enter(); - return; - } - - /* - * But do not use nmi_enter() for pseries hash guest taking a real-mode - * NMI because not everything it touches is within the RMA limit. - */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && - firmware_has_feature(FW_FEATURE_LPAR) && - !radix_enabled()) - return; - - /* - * Likewise, don't use it if we have some form of instrumentation (like - * KASAN shadow) that is not safe to access in real mode (even on radix) - */ - if (IS_ENABLED(CONFIG_KASAN)) - return; - - /* - * Likewise, do not use it in real mode if percpu first chunk is not - * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there - * are chances where percpu allocation can come from vmalloc area. - */ - if (percpu_first_chunk_is_paged) - return; - - /* Otherwise, it should be safe to call it */ - nmi_enter(); -} - -static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) -{ - if (mfmsr() & MSR_DR) { - // nmi_exit if relocations are on - nmi_exit(); - } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && - firmware_has_feature(FW_FEATURE_LPAR) && - !radix_enabled()) { - // no nmi_exit for a pseries hash guest taking a real mode exception - } else if (IS_ENABLED(CONFIG_KASAN)) { - // no nmi_exit for KASAN in real mode - } else if (percpu_first_chunk_is_paged) { - // no nmi_exit if percpu first chunk is not embedded - } else { - nmi_exit(); - } - - /* - * nmi does not call nap_adjust_return because nmi should not create - * new work to do (must use irq_work for that). - */ - -#ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S - if (arch_irq_disabled_regs(regs)) { - unsigned long rst = search_kernel_restart_table(regs->nip); - if (rst) - regs_set_return_ip(regs, rst); - } -#endif - - if (nmi_disables_ftrace(regs)) - this_cpu_set_ftrace_enabled(state->ftrace_enabled); - - /* Check we didn't change the pending interrupt mask. */ - WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); - regs->softe = state->softe; - local_paca->irq_happened = state->irq_happened; - local_paca->irq_soft_mask = state->irq_soft_mask; -#endif -} - /* * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each * function definition. The reason for this is the noinstr section is placed @@ -470,11 +166,14 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - interrupt_enter_prepare(regs); \ - \ + irqentry_state_t state; \ + arch_interrupt_enter_prepare(regs); \ + state = irqentry_enter(regs); \ + instrumentation_begin(); \ ____##func (regs); \ - \ - interrupt_exit_prepare(regs); \ + instrumentation_end(); \ + arch_interrupt_exit_prepare(regs); \ + irqentry_exit(regs, state); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -504,12 +203,15 @@ static __always_inline long ____##func(struct pt_regs *regs); \ interrupt_handler long func(struct pt_regs *regs) \ { \ long ret; \ + irqentry_state_t state; \ \ - interrupt_enter_prepare(regs); \ - \ + arch_interrupt_enter_prepare(regs); \ + state = irqentry_enter(regs); \ + instrumentation_begin(); \ ret = ____##func (regs); \ - \ - interrupt_exit_prepare(regs); \ + instrumentation_end(); \ + arch_interrupt_exit_prepare(regs); \ + irqentry_exit(regs, state); \ \ return ret; \ } \ @@ -538,11 +240,16 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - interrupt_async_enter_prepare(regs); \ - \ + irqentry_state_t state; \ + arch_interrupt_async_enter_prepare(regs); \ + state = irqentry_enter(regs); \ + instrumentation_begin(); \ + irq_enter_rcu(); \ ____##func (regs); \ - \ - interrupt_async_exit_prepare(regs); \ + irq_exit_rcu(); \ + instrumentation_end(); \ + arch_interrupt_async_exit_prepare(regs); \ + irqentry_exit(regs, state); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -572,14 +279,43 @@ ____##func(struct pt_regs *regs); \ \ interrupt_handler long func(struct pt_regs *regs) \ { \ - struct interrupt_nmi_state state; \ + irqentry_state_t state; \ + struct interrupt_nmi_state nmi_state; \ long ret; \ \ - interrupt_nmi_enter_prepare(regs, &state); \ - \ + arch_interrupt_nmi_enter_prepare(regs, &nmi_state); \ + if (mfmsr() & MSR_DR) { \ + /* nmi_entry if relocations are on */ \ + state = irqentry_nmi_enter(regs); \ + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && \ + firmware_has_feature(FW_FEATURE_LPAR) && \ + !radix_enabled()) { \ + /* no nmi_entry for a pseries hash guest \ + * taking a real mode exception */ \ + } else if (IS_ENABLED(CONFIG_KASAN)) { \ + /* no nmi_entry for KASAN in real mode */ \ + } else if (percpu_first_chunk_is_paged) { \ + /* no nmi_entry if percpu first chunk is not embedded */\ + } else { \ + state = irqentry_nmi_enter(regs); \ + } \ ret = ____##func (regs); \ - \ - interrupt_nmi_exit_prepare(regs, &state); \ + arch_interrupt_nmi_exit_prepare(regs, &nmi_state); \ + if (mfmsr() & MSR_DR) { \ + /* nmi_exit if relocations are on */ \ + irqentry_nmi_exit(regs, state); \ + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && \ + firmware_has_feature(FW_FEATURE_LPAR) && \ + !radix_enabled()) { \ + /* no nmi_exit for a pseries hash guest \ + * taking a real mode exception */ \ + } else if (IS_ENABLED(CONFIG_KASAN)) { \ + /* no nmi_exit for KASAN in real mode */ \ + } else if (percpu_first_chunk_is_paged) { \ + /* no nmi_exit if percpu first chunk is not embedded */ \ + } else { \ + irqentry_nmi_exit(regs, state); \ + } \ \ return ret; \ } \ @@ -661,7 +397,7 @@ void replay_soft_interrupts(void); static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) { - if (!arch_irq_disabled_regs(regs)) + if (!regs_irqs_disabled(regs)) local_irq_enable(); } diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 045804a86f98d..a690e7da53c27 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -3,14 +3,19 @@ #define __ASM_KASAN_H #if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) -#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn) -#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn) -#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn) -#else +#define _GLOBAL_KASAN(fn) \ + _GLOBAL(fn); \ + _GLOBAL(__##fn) +#define _GLOBAL_TOC_KASAN(fn) \ + _GLOBAL_TOC(fn); \ + _GLOBAL_TOC(__##fn) +#define EXPORT_SYMBOL_KASAN(fn) \ + EXPORT_SYMBOL(__##fn) +#else /* CONFIG_KASAN && !CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */ #define _GLOBAL_KASAN(fn) _GLOBAL(fn) #define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn) #define EXPORT_SYMBOL_KASAN(fn) -#endif +#endif /* CONFIG_KASAN && !CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */ #ifndef __ASSEMBLER__ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 94aa1de2b06e1..fdeb97421785b 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -53,6 +53,9 @@ struct pt_regs unsigned long esr; }; unsigned long result; + unsigned long exit_flags; + /* Maintain 16 byte interrupt stack alignment */ + unsigned long __pt_regs_pad[3]; }; }; #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP) @@ -174,9 +177,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -long do_syscall_trace_enter(struct pt_regs *regs); -void do_syscall_trace_leave(struct pt_regs *regs); - static inline void set_return_regs_changed(void) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index 922d43700fb41..21af92cdb237f 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -7,7 +7,6 @@ #include <uapi/asm/ptrace.h> struct pt_regs; -void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); unsigned long get_min_sigframe_size_32(void); unsigned long get_min_sigframe_size_64(void); diff --git a/arch/powerpc/include/asm/stacktrace.h b/arch/powerpc/include/asm/stacktrace.h index 6149b53b3bc8e..987f2e996262f 100644 --- a/arch/powerpc/include/asm/stacktrace.h +++ b/arch/powerpc/include/asm/stacktrace.h @@ -10,4 +10,10 @@ void show_user_instructions(struct pt_regs *regs); +static __always_inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer) + & ~(THREAD_SIZE - 1)); +} + #endif /* _ASM_POWERPC_STACKTRACE_H */ diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 4b3c52ed6e9d2..834fcc4f7b543 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -139,4 +139,9 @@ static inline int syscall_get_arch(struct task_struct *task) else return AUDIT_ARCH_PPC64; } + +static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) +{ + return false; +} #endif /* _ASM_SYSCALL_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 97f35f9b1a96e..ee3b9adb5b671 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -57,6 +57,7 @@ struct thread_info { #ifdef CONFIG_SMP unsigned int cpu; #endif + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH_64 unsigned long *livepatch_sp; diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h index 01e630149d48e..a393b7f2760a2 100644 --- a/arch/powerpc/include/uapi/asm/ptrace.h +++ b/arch/powerpc/include/uapi/asm/ptrace.h @@ -55,6 +55,8 @@ struct pt_regs unsigned long dar; /* Fault registers */ unsigned long dsisr; /* on 4xx/Book-E used for ESR */ unsigned long result; /* Result of a system call */ + unsigned long exit_flags; /* System call exit flags */ + unsigned long __pt_regs_pad[3]; /* Maintain 16 byte interrupt stack alignment */ }; #endif /* __ASSEMBLER__ */ @@ -114,10 +116,12 @@ struct pt_regs #define PT_DAR 41 #define PT_DSISR 42 #define PT_RESULT 43 -#define PT_DSCR 44 -#define PT_REGS_COUNT 44 +#define PT_EXIT_FLAGS 44 +#define PT_PAD 47 /* 3 times */ +#define PT_DSCR 48 +#define PT_REGS_COUNT 48 -#define PT_FPR0 48 /* each FP reg occupies 2 slots in this space */ +#define PT_FPR0 (PT_REGS_COUNT + 4) /* each FP reg occupies 2 slots in this space */ #ifndef __powerpc64__ @@ -129,7 +133,7 @@ struct pt_regs #define PT_FPSCR (PT_FPR0 + 32) /* each FP reg occupies 1 slot in 64-bit space */ -#define PT_VR0 82 /* each Vector reg occupies 2 slots in 64-bit */ +#define PT_VR0 (PT_FPSCR + 2) /* <82> each Vector reg occupies 2 slots in 64-bit */ #define PT_VSCR (PT_VR0 + 32*2 + 1) #define PT_VRSAVE (PT_VR0 + 33*2) @@ -137,7 +141,7 @@ struct pt_regs /* * Only store first 32 VSRs here. The second 32 VSRs in VR0-31 */ -#define PT_VSR0 150 /* each VSR reg occupies 2 slots in 64-bit */ +#define PT_VSR0 (PT_VRSAVE + 2) /* each VSR reg occupies 2 slots in 64-bit */ #define PT_VSR31 (PT_VSR0 + 2*31) #endif /* __powerpc64__ */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e63bfde13e031..89a999be13521 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/context_tracking.h> +#include <linux/entry-common.h> #include <linux/err.h> #include <linux/compat.h> #include <linux/rseq.h> @@ -25,10 +26,6 @@ unsigned long global_dbcr0[NR_CPUS]; #endif -#if defined(CONFIG_PREEMPT_DYNAMIC) -DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); -#endif - #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool exit_must_hard_disable(void) @@ -78,181 +75,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -static notrace void booke_load_dbcr0(void) -{ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - unsigned long dbcr0 = current->thread.debug.dbcr0; - - if (likely(!(dbcr0 & DBCR0_IDM))) - return; - - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - mtmsr(mfmsr() & ~MSR_DE); - if (IS_ENABLED(CONFIG_PPC32)) { - isync(); - global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); - } - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBSR, -1); -#endif -} - -static notrace void check_return_regs_valid(struct pt_regs *regs) -{ -#ifdef CONFIG_PPC_BOOK3S_64 - unsigned long trap, srr0, srr1; - static bool warned; - u8 *validp; - char *h; - - if (trap_is_scv(regs)) - return; - - trap = TRAP(regs); - // EE in HV mode sets HSRRs like 0xea0 - if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) - trap = 0xea0; - - switch (trap) { - case 0x980: - case INTERRUPT_H_DATA_STORAGE: - case 0xe20: - case 0xe40: - case INTERRUPT_HMI: - case 0xe80: - case 0xea0: - case INTERRUPT_H_FAC_UNAVAIL: - case 0x1200: - case 0x1500: - case 0x1600: - case 0x1800: - validp = &local_paca->hsrr_valid; - if (!READ_ONCE(*validp)) - return; - - srr0 = mfspr(SPRN_HSRR0); - srr1 = mfspr(SPRN_HSRR1); - h = "H"; - - break; - default: - validp = &local_paca->srr_valid; - if (!READ_ONCE(*validp)) - return; - - srr0 = mfspr(SPRN_SRR0); - srr1 = mfspr(SPRN_SRR1); - h = ""; - break; - } - - if (srr0 == regs->nip && srr1 == regs->msr) - return; - - /* - * A NMI / soft-NMI interrupt may have come in after we found - * srr_valid and before the SRRs are loaded. The interrupt then - * comes in and clobbers SRRs and clears srr_valid. Then we load - * the SRRs here and test them above and find they don't match. - * - * Test validity again after that, to catch such false positives. - * - * This test in general will have some window for false negatives - * and may not catch and fix all such cases if an NMI comes in - * later and clobbers SRRs without clearing srr_valid, but hopefully - * such things will get caught most of the time, statistically - * enough to be able to get a warning out. - */ - if (!READ_ONCE(*validp)) - return; - - if (!data_race(warned)) { - data_race(warned = true); - printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); - printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); - show_regs(regs); - } - - WRITE_ONCE(*validp, 0); /* fixup */ -#endif -} - -static notrace unsigned long -interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) -{ - unsigned long ti_flags; - -again: - ti_flags = read_thread_flags(); - while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { - local_irq_enable(); - if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { - schedule(); - } else { - /* - * SIGPENDING must restore signal handler function - * argument GPRs, and some non-volatiles (e.g., r1). - * Restore all for now. This could be made lighter. - */ - if (ti_flags & _TIF_SIGPENDING) - ret |= _TIF_RESTOREALL; - do_notify_resume(regs, ti_flags); - } - local_irq_disable(); - ti_flags = read_thread_flags(); - } - - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely((ti_flags & _TIF_RESTORE_TM))) { - restore_tm_state(regs); - } else { - unsigned long mathflags = MSR_FP; - - if (cpu_has_feature(CPU_FTR_VSX)) - mathflags |= MSR_VEC | MSR_VSX; - else if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mathflags |= MSR_VEC; - - /* - * If userspace MSR has all available FP bits set, - * then they are live and no need to restore. If not, - * it means the regs were given up and restore_math - * may decide to restore them (to avoid taking an FP - * fault). - */ - if ((regs->msr & mathflags) != mathflags) - restore_math(regs); - } - } - - check_return_regs_valid(regs); - - user_enter_irqoff(); - if (!prep_irq_for_enabled_exit(true)) { - user_exit_irqoff(); - local_irq_enable(); - local_irq_disable(); - goto again; - } - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - local_paca->tm_scratch = regs->msr; -#endif - - booke_load_dbcr0(); - - account_cpu_user_exit(); - - /* Restore user access locks last */ - kuap_user_restore(regs); - - return ret; -} - /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -267,17 +89,12 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, long scv) { unsigned long ti_flags; - unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CT_STATE_USER); - kuap_assert_locked(); regs->result = r3; - - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); + regs->exit_flags = 0; ti_flags = read_thread_flags(); @@ -290,7 +107,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; + regs->exit_flags = _TIF_RESTOREALL; else regs->gpr[3] = r3; clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); @@ -299,18 +116,28 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, } if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; + regs->exit_flags |= _TIF_RESTOREALL; } - local_irq_disable(); - ret = interrupt_exit_user_prepare_main(ret, regs); + syscall_exit_to_user_mode(regs); + +again: + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } + + /* Restore user access locks last */ + kuap_user_restore(regs); #ifdef CONFIG_PPC64 - regs->exit_result = ret; + regs->exit_result = regs->exit_flags; #endif - return ret; + return regs->exit_flags; } #ifdef CONFIG_PPC64 @@ -330,13 +157,16 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg set_kuap(AMR_KUAP_BLOCKED); #endif - trace_hardirqs_off(); - user_exit_irqoff(); - account_cpu_user_entry(); - - BUG_ON(!user_mode(regs)); +again: + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } - regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); + regs->exit_result |= regs->exit_flags; return regs->exit_result; } @@ -347,8 +177,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) unsigned long ret; BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(arch_irq_disabled_regs(regs)); - CT_WARN_ON(ct_state() == CT_STATE_USER); + BUG_ON(regs_irqs_disabled(regs)); /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -357,8 +186,21 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) kuap_assert_locked(); local_irq_disable(); + regs->exit_flags = 0; +again: + check_return_regs_valid(regs); + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } - ret = interrupt_exit_user_prepare_main(0, regs); + /* Restore user access locks last */ + kuap_user_restore(regs); + + ret = regs->exit_flags; #ifdef CONFIG_PPC64 regs->exit_result = ret; @@ -396,17 +238,10 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) local_irq_disable(); - if (!arch_irq_disabled_regs(regs)) { + if (!regs_irqs_disabled(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: - if (need_irq_preemption()) { - /* Return to preemptible kernel context */ - if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { - if (preempt_count() == 0) - preempt_schedule_irq(); - } - } check_return_regs_valid(regs); @@ -479,7 +314,6 @@ notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs) #endif trace_hardirqs_off(); - user_exit_irqoff(); account_cpu_user_entry(); BUG_ON(!user_mode(regs)); diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c6997df632873..316d4f5ead8ed 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -21,9 +21,6 @@ #include <asm/switch_to.h> #include <asm/debug.h> -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - #include "ptrace-decl.h" /* @@ -195,144 +192,6 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -#ifdef CONFIG_SECCOMP -static int do_seccomp(struct pt_regs *regs) -{ - if (!test_thread_flag(TIF_SECCOMP)) - return 0; - - /* - * The ABI we present to seccomp tracers is that r3 contains - * the syscall return value and orig_gpr3 contains the first - * syscall parameter. This is different to the ptrace ABI where - * both r3 and orig_gpr3 contain the first syscall parameter. - */ - regs->gpr[3] = -ENOSYS; - - /* - * We use the __ version here because we have already checked - * TIF_SECCOMP. If this fails, there is nothing left to do, we - * have already loaded -ENOSYS into r3, or seccomp has put - * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). - */ - if (__secure_computing()) - return -1; - - /* - * The syscall was allowed by seccomp, restore the register - * state to what audit expects. - * Note that we use orig_gpr3, which means a seccomp tracer can - * modify the first syscall parameter (in orig_gpr3) and also - * allow the syscall to proceed. - */ - regs->gpr[3] = regs->orig_gpr3; - - return 0; -} -#else -static inline int do_seccomp(struct pt_regs *regs) { return 0; } -#endif /* CONFIG_SECCOMP */ - -/** - * do_syscall_trace_enter() - Do syscall tracing on kernel entry. - * @regs: the pt_regs of the task to trace (current) - * - * Performs various types of tracing on syscall entry. This includes seccomp, - * ptrace, syscall tracepoints and audit. - * - * The pt_regs are potentially visible to userspace via ptrace, so their - * contents is ABI. - * - * One or more of the tracers may modify the contents of pt_regs, in particular - * to modify arguments or even the syscall number itself. - * - * It's also possible that a tracer can choose to reject the system call. In - * that case this function will return an illegal syscall number, and will put - * an appropriate return value in regs->r3. - * - * Return: the (possibly changed) syscall number. - */ -long do_syscall_trace_enter(struct pt_regs *regs) -{ - u32 flags; - - flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); - - if (flags) { - int rc = ptrace_report_syscall_entry(regs); - - if (unlikely(flags & _TIF_SYSCALL_EMU)) { - /* - * A nonzero return code from - * ptrace_report_syscall_entry() tells us to prevent - * the syscall execution, but we are not going to - * execute it anyway. - * - * Returning -1 will skip the syscall execution. We want - * to avoid clobbering any registers, so we don't goto - * the skip label below. - */ - return -1; - } - - if (rc) { - /* - * The tracer decided to abort the syscall. Note that - * the tracer may also just change regs->gpr[0] to an - * invalid syscall number, that is handled below on the - * exit path. - */ - goto skip; - } - } - - /* Run seccomp after ptrace; allow it to set gpr[3]. */ - if (do_seccomp(regs)) - return -1; - - /* Avoid trace and audit when syscall is invalid. */ - if (regs->gpr[0] >= NR_syscalls) - goto skip; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gpr[0]); - - if (!is_32bit_task()) - audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else - audit_syscall_entry(regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - - /* Return the possibly modified but valid syscall number */ - return regs->gpr[0]; - -skip: - /* - * If we are aborting explicitly, or if the syscall number is - * now invalid, set the return value to -ENOSYS. - */ - regs->gpr[3] = -ENOSYS; - return -1; -} - -void do_syscall_trace_leave(struct pt_regs *regs) -{ - int step; - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->result); - - step = test_thread_flag(TIF_SINGLESTEP); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - ptrace_report_syscall_exit(regs, step); -} - void __init pt_regs_check(void); /* @@ -432,6 +291,7 @@ void __init pt_regs_check(void) CHECK_REG(PT_DAR, dar); CHECK_REG(PT_DSISR, dsisr); CHECK_REG(PT_RESULT, result); + CHECK_REG(PT_EXIT_FLAGS, exit_flags); #undef CHECK_REG BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long)); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index aa17e62f37547..bb42a8b6c6422 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -6,6 +6,7 @@ * Extracted from signal_32.c and signal_64.c */ +#include <linux/entry-common.h> #include <linux/resume_user_mode.h> #include <linux/signal.h> #include <linux/uprobes.h> @@ -292,23 +293,6 @@ static void do_signal(struct task_struct *tsk) signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } -void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) -{ - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - if (thread_info_flags & _TIF_PATCH_PENDING) - klp_update_patch_state(current); - - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { - BUG_ON(regs != current->thread.regs); - do_signal(current); - } - - if (thread_info_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); -} - static unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be @@ -368,3 +352,10 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs, printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm, task_pid_nr(tsk), where, ptr, regs->nip, regs->link); } + +void arch_do_signal_or_restart(struct pt_regs *regs) +{ + BUG_ON(regs != current->thread.regs); + regs->exit_flags |= _TIF_RESTOREALL; + do_signal(current); +} diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index b762677f87371..a9da2af6efa87 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -3,6 +3,7 @@ #include <linux/compat.h> #include <linux/context_tracking.h> #include <linux/randomize_kstack.h> +#include <linux/entry-common.h> #include <asm/interrupt.h> #include <asm/kup.h> @@ -18,124 +19,10 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) long ret; syscall_fn f; - kuap_lock(); - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CT_STATE_KERNEL); - user_exit_irqoff(); - add_random_kstack_offset(); + r0 = syscall_enter_from_user_mode(regs, r0); - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!user_mode(regs)); - BUG_ON(arch_irq_disabled_regs(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - - } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(r0 >= NR_syscalls)) { if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index cb8e9357383e9..629f2a2d4780e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1956,7 +1956,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception) * prevent hash faults on user addresses when reading callchains (and * looks better from an irq tracing perspective). */ - if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs))) + if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs_irqs_disabled(regs))) performance_monitor_exception_nmi(regs); else performance_monitor_exception_async(regs); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 764001deb0605..c40c693684764 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -376,7 +376,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) u64 tb; /* should only arrive from kernel, with irqs disabled */ - WARN_ON_ONCE(!arch_irq_disabled_regs(regs)); + WARN_ON_ONCE(!regs_irqs_disabled(regs)); if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) return 0; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 2e6adf5b95c40..802bc9b7ef6fa 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2483,7 +2483,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) * will trigger a PMI after waking up from idle. Since counter values are _not_ * saved/restored in idle path, can lead to below "Can't find PMC" message. */ - if (unlikely(!found) && !arch_irq_disabled_regs(regs)) + if (unlikely(!found) && !regs_irqs_disabled(regs)) printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); /* |
