diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-24 20:55:03 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-24 20:55:03 -0700 |
| commit | 23608993bb224968a17d6db0df435ddb8e77412b (patch) | |
| tree | f606373c11942b9ad75a89cf43a9ecb00172b0c1 /arch/x86 | |
| parent | 3ba7dfb8da62c43ea02bc278863367c2b0427cc1 (diff) | |
| parent | 35e6b537af85d97e0aafd8f2829dfa884a22df20 (diff) | |
| download | ath-23608993bb224968a17d6db0df435ddb8e77412b.tar.gz | |
Merge tag 'locking-core-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"Locking primitives:
- Micro-optimize percpu_{,try_}cmpxchg{64,128}_op() and
{,try_}cmpxchg{64,128} on x86 (Uros Bizjak)
- mutexes: extend debug checks in mutex_lock() (Yunhui Cui)
- Misc cleanups (Uros Bizjak)
Lockdep:
- Fix might_fault() lockdep check of current->mm->mmap_lock (Peter
Zijlstra)
- Don't disable interrupts on RT in disable_irq_nosync_lockdep.*()
(Sebastian Andrzej Siewior)
- Disable KASAN instrumentation of lockdep.c (Waiman Long)
- Add kasan_check_byte() check in lock_acquire() (Waiman Long)
- Misc cleanups (Sebastian Andrzej Siewior)
Rust runtime integration:
- Use Pin for all LockClassKey usages (Mitchell Levy)
- sync: Add accessor for the lock behind a given guard (Alice Ryhl)
- sync: condvar: Add wait_interruptible_freezable() (Alice Ryhl)
- sync: lock: Add an example for Guard:: Lock_ref() (Boqun Feng)
Split-lock detection feature (x86):
- Fix warning mode with disabled mitigation mode (Maksim Davydov)
Locking events:
- Add locking events for rtmutex slow paths (Waiman Long)
- Add locking events for lockdep (Waiman Long)"
* tag 'locking-core-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
lockdep: Remove disable_irq_lockdep()
lockdep: Don't disable interrupts on RT in disable_irq_nosync_lockdep.*()
rust: lockdep: Use Pin for all LockClassKey usages
rust: sync: condvar: Add wait_interruptible_freezable()
rust: sync: lock: Add an example for Guard:: Lock_ref()
rust: sync: Add accessor for the lock behind a given guard
locking/lockdep: Add kasan_check_byte() check in lock_acquire()
locking/lockdep: Disable KASAN instrumentation of lockdep.c
locking/lock_events: Add locking events for lockdep
locking/lock_events: Add locking events for rtmutex slow paths
x86/split_lock: Fix the delayed detection logic
lockdep/mm: Fix might_fault() lockdep check of current->mm->mmap_lock
x86/locking: Remove semicolon from "lock" prefix
locking/mutex: Add MUTEX_WARN_ON() into fast path
x86/locking: Use asm_inline for {,try_}cmpxchg{64,128} emulations
x86/locking: Use ALT_OUTPUT_SP() for percpu_{,try_}cmpxchg{64,128}_op()
Diffstat (limited to 'arch/x86')
| -rw-r--r-- | arch/x86/include/asm/alternative.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/barrier.h | 8 | ||||
| -rw-r--r-- | arch/x86/include/asm/cmpxchg.h | 4 | ||||
| -rw-r--r-- | arch/x86/include/asm/cmpxchg_32.h | 36 | ||||
| -rw-r--r-- | arch/x86/include/asm/edac.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/percpu.h | 77 | ||||
| -rw-r--r-- | arch/x86/include/asm/sync_bitops.h | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/bus_lock.c | 20 |
8 files changed, 87 insertions, 74 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index e3903b731305c..3b3d3aa19acd0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -48,7 +48,7 @@ ".popsection\n" \ "671:" -#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock " #else /* ! CONFIG_SMP */ #define LOCK_PREFIX_HERE "" diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7b44b3c4cce11..db70832232d4a 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -12,11 +12,11 @@ */ #ifdef CONFIG_X86_32 -#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \ +#define mb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "mfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \ +#define rmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "lfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \ +#define wmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "sfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") #else #define __mb() asm volatile("mfence":::"memory") @@ -50,7 +50,7 @@ #define __dma_rmb() barrier() #define __dma_wmb() barrier() -#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") +#define __smp_mb() asm volatile("lock addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5612648b02022..fd8afc1f5f6bb 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -134,7 +134,7 @@ extern void __add_wrong_size(void) __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) #define __sync_cmpxchg(ptr, old, new, size) \ - __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + __raw_cmpxchg((ptr), (old), (new), (size), "lock ") #define __cmpxchg_local(ptr, old, new, size) \ __raw_cmpxchg((ptr), (old), (new), (size), "") @@ -222,7 +222,7 @@ extern void __add_wrong_size(void) __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) #define __sync_try_cmpxchg(ptr, pold, new, size) \ - __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ") + __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock ") #define __try_cmpxchg_local(ptr, pold, new, size) \ __raw_try_cmpxchg((ptr), (pold), (new), (size), "") diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fd1282a783ddb..8806c646d452d 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -91,19 +91,21 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, union __u64_halves o = { .full = (_old), }, \ n = { .full = (_new), }; \ \ - asm volatile(ALTERNATIVE(_lock_loc \ - "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ - : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \ - : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ - : "memory"); \ + asm_inline volatile( \ + ALTERNATIVE(_lock_loc \ + "call cmpxchg8b_emu", \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \ + : "b" (n.low), "c" (n.high), \ + [ptr] "S" (_ptr) \ + : "memory"); \ \ o.full; \ }) static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new) { - return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; "); + return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock "); } #define arch_cmpxchg64 arch_cmpxchg64 @@ -119,14 +121,16 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 n = { .full = (_new), }; \ bool ret; \ \ - asm volatile(ALTERNATIVE(_lock_loc \ - "call cmpxchg8b_emu", \ - _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ - CC_SET(e) \ - : ALT_OUTPUT_SP(CC_OUT(e) (ret), \ - "+a" (o.low), "+d" (o.high)) \ - : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \ - : "memory"); \ + asm_inline volatile( \ + ALTERNATIVE(_lock_loc \ + "call cmpxchg8b_emu", \ + _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \ + CC_SET(e) \ + : ALT_OUTPUT_SP(CC_OUT(e) (ret), \ + "+a" (o.low), "+d" (o.high)) \ + : "b" (n.low), "c" (n.high), \ + [ptr] "S" (_ptr) \ + : "memory"); \ \ if (unlikely(!ret)) \ *(_oldp) = o.full; \ @@ -136,7 +140,7 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new) { - return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; "); + return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock "); } #define arch_try_cmpxchg64 arch_try_cmpxchg64 diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h index 426fc53ff8030..dfbd1ebb9f10b 100644 --- a/arch/x86/include/asm/edac.h +++ b/arch/x86/include/asm/edac.h @@ -13,7 +13,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) * are interrupt, DMA and SMP safe. */ for (i = 0; i < size / 4; i++, virt_addr++) - asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); + asm volatile("lock addl $0, %0"::"m" (*virt_addr)); } #endif /* _ASM_X86_EDAC_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e525cd85f999f..08f5f61690b7a 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -348,15 +348,14 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ - "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ - : [var] "+m" (__my_cpu_var(_var)), \ - "+a" (old__.low), \ - "+d" (old__.high) \ - : "b" (new__.low), \ - "c" (new__.high), \ - "S" (&(_var)) \ - : "memory"); \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ \ old__.var; \ }) @@ -378,17 +377,16 @@ do { \ old__.var = *_oval; \ new__.var = _nval; \ \ - asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ - "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ - CC_SET(z) \ - : CC_OUT(z) (success), \ - [var] "+m" (__my_cpu_var(_var)), \ - "+a" (old__.low), \ - "+d" (old__.high) \ - : "b" (new__.low), \ - "c" (new__.high), \ - "S" (&(_var)) \ - : "memory"); \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + CC_SET(z) \ + : ALT_OUTPUT_SP(CC_OUT(z) (success), \ + [var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ if (unlikely(!success)) \ *_oval = old__.var; \ \ @@ -419,15 +417,14 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ - "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ - : [var] "+m" (__my_cpu_var(_var)), \ - "+a" (old__.low), \ - "+d" (old__.high) \ - : "b" (new__.low), \ - "c" (new__.high), \ - "S" (&(_var)) \ - : "memory"); \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ \ old__.var; \ }) @@ -449,19 +446,19 @@ do { \ old__.var = *_oval; \ new__.var = _nval; \ \ - asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ - "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ - CC_SET(z) \ - : CC_OUT(z) (success), \ - [var] "+m" (__my_cpu_var(_var)), \ - "+a" (old__.low), \ - "+d" (old__.high) \ - : "b" (new__.low), \ - "c" (new__.high), \ - "S" (&(_var)) \ - : "memory"); \ + asm_inline qual ( \ + ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + CC_SET(z) \ + : ALT_OUTPUT_SP(CC_OUT(z) (success), \ + [var] "+m" (__my_cpu_var(_var)), \ + "+a" (old__.low), "+d" (old__.high)) \ + : "b" (new__.low), "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ if (unlikely(!success)) \ *_oval = old__.var; \ + \ likely(success); \ }) diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index 6d8d6bc183b74..cd21a0405ac52 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h @@ -31,7 +31,7 @@ */ static inline void sync_set_bit(long nr, volatile unsigned long *addr) { - asm volatile("lock; " __ASM_SIZE(bts) " %1,%0" + asm volatile("lock " __ASM_SIZE(bts) " %1,%0" : "+m" (ADDR) : "Ir" (nr) : "memory"); @@ -49,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr) */ static inline void sync_clear_bit(long nr, volatile unsigned long *addr) { - asm volatile("lock; " __ASM_SIZE(btr) " %1,%0" + asm volatile("lock " __ASM_SIZE(btr) " %1,%0" : "+m" (ADDR) : "Ir" (nr) : "memory"); @@ -66,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr) */ static inline void sync_change_bit(long nr, volatile unsigned long *addr) { - asm volatile("lock; " __ASM_SIZE(btc) " %1,%0" + asm volatile("lock " __ASM_SIZE(btc) " %1,%0" : "+m" (ADDR) : "Ir" (nr) : "memory"); @@ -82,7 +82,7 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr) */ static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr) { - return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr); + return GEN_BINARY_RMWcc("lock " __ASM_SIZE(bts), *addr, c, "Ir", nr); } /** @@ -95,7 +95,7 @@ static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) { - return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr); + return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btr), *addr, c, "Ir", nr); } /** @@ -108,7 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) { - return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr); + return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btc), *addr, c, "Ir", nr); } #define sync_test_bit(nr, addr) test_bit(nr, addr) diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 6cba85c79d42d..97222efb4d2a6 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -192,7 +192,13 @@ static void __split_lock_reenable(struct work_struct *work) { sld_update_msr(true); } -static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); +/* + * In order for each CPU to schedule its delayed work independently of the + * others, delayed work struct must be per-CPU. This is not required when + * sysctl_sld_mitigate is enabled because of the semaphore that limits + * the number of simultaneously scheduled delayed works to 1. + */ +static DEFINE_PER_CPU(struct delayed_work, sl_reenable); /* * If a CPU goes offline with pending delayed work to re-enable split lock @@ -213,7 +219,7 @@ static int splitlock_cpu_offline(unsigned int cpu) static void split_lock_warn(unsigned long ip) { - struct delayed_work *work; + struct delayed_work *work = NULL; int cpu; if (!current->reported_split_lock) @@ -235,11 +241,17 @@ static void split_lock_warn(unsigned long ip) if (down_interruptible(&buslock_sem) == -EINTR) return; work = &sl_reenable_unlock; - } else { - work = &sl_reenable; } cpu = get_cpu(); + + if (!work) { + work = this_cpu_ptr(&sl_reenable); + /* Deferred initialization of per-CPU struct */ + if (!work->work.func) + INIT_DELAYED_WORK(work, __split_lock_reenable); + } + schedule_delayed_work_on(cpu, work, 2); /* Disable split lock detection on this CPU to make progress */ |
