aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 20:55:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 20:55:03 -0700
commit23608993bb224968a17d6db0df435ddb8e77412b (patch)
treef606373c11942b9ad75a89cf43a9ecb00172b0c1 /arch/x86
parent3ba7dfb8da62c43ea02bc278863367c2b0427cc1 (diff)
parent35e6b537af85d97e0aafd8f2829dfa884a22df20 (diff)
downloadath-23608993bb224968a17d6db0df435ddb8e77412b.tar.gz
Merge tag 'locking-core-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "Locking primitives: - Micro-optimize percpu_{,try_}cmpxchg{64,128}_op() and {,try_}cmpxchg{64,128} on x86 (Uros Bizjak) - mutexes: extend debug checks in mutex_lock() (Yunhui Cui) - Misc cleanups (Uros Bizjak) Lockdep: - Fix might_fault() lockdep check of current->mm->mmap_lock (Peter Zijlstra) - Don't disable interrupts on RT in disable_irq_nosync_lockdep.*() (Sebastian Andrzej Siewior) - Disable KASAN instrumentation of lockdep.c (Waiman Long) - Add kasan_check_byte() check in lock_acquire() (Waiman Long) - Misc cleanups (Sebastian Andrzej Siewior) Rust runtime integration: - Use Pin for all LockClassKey usages (Mitchell Levy) - sync: Add accessor for the lock behind a given guard (Alice Ryhl) - sync: condvar: Add wait_interruptible_freezable() (Alice Ryhl) - sync: lock: Add an example for Guard:: Lock_ref() (Boqun Feng) Split-lock detection feature (x86): - Fix warning mode with disabled mitigation mode (Maksim Davydov) Locking events: - Add locking events for rtmutex slow paths (Waiman Long) - Add locking events for lockdep (Waiman Long)" * tag 'locking-core-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: lockdep: Remove disable_irq_lockdep() lockdep: Don't disable interrupts on RT in disable_irq_nosync_lockdep.*() rust: lockdep: Use Pin for all LockClassKey usages rust: sync: condvar: Add wait_interruptible_freezable() rust: sync: lock: Add an example for Guard:: Lock_ref() rust: sync: Add accessor for the lock behind a given guard locking/lockdep: Add kasan_check_byte() check in lock_acquire() locking/lockdep: Disable KASAN instrumentation of lockdep.c locking/lock_events: Add locking events for lockdep locking/lock_events: Add locking events for rtmutex slow paths x86/split_lock: Fix the delayed detection logic lockdep/mm: Fix might_fault() lockdep check of current->mm->mmap_lock x86/locking: Remove semicolon from "lock" prefix locking/mutex: Add MUTEX_WARN_ON() into fast path x86/locking: Use asm_inline for {,try_}cmpxchg{64,128} emulations x86/locking: Use ALT_OUTPUT_SP() for percpu_{,try_}cmpxchg{64,128}_op()
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/alternative.h2
-rw-r--r--arch/x86/include/asm/barrier.h8
-rw-r--r--arch/x86/include/asm/cmpxchg.h4
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h36
-rw-r--r--arch/x86/include/asm/edac.h2
-rw-r--r--arch/x86/include/asm/percpu.h77
-rw-r--r--arch/x86/include/asm/sync_bitops.h12
-rw-r--r--arch/x86/kernel/cpu/bus_lock.c20
8 files changed, 87 insertions, 74 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index e3903b731305c..3b3d3aa19acd0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,7 +48,7 @@
".popsection\n" \
"671:"
-#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock "
#else /* ! CONFIG_SMP */
#define LOCK_PREFIX_HERE ""
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 7b44b3c4cce11..db70832232d4a 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -12,11 +12,11 @@
*/
#ifdef CONFIG_X86_32
-#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
+#define mb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "mfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
-#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
+#define rmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "lfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
-#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
+#define wmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "sfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#else
#define __mb() asm volatile("mfence":::"memory")
@@ -50,7 +50,7 @@
#define __dma_rmb() barrier()
#define __dma_wmb() barrier()
-#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
+#define __smp_mb() asm volatile("lock addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
#define __smp_rmb() dma_rmb()
#define __smp_wmb() barrier()
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 5612648b02022..fd8afc1f5f6bb 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -134,7 +134,7 @@ extern void __add_wrong_size(void)
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define __sync_cmpxchg(ptr, old, new, size) \
- __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+ __raw_cmpxchg((ptr), (old), (new), (size), "lock ")
#define __cmpxchg_local(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "")
@@ -222,7 +222,7 @@ extern void __add_wrong_size(void)
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define __sync_try_cmpxchg(ptr, pold, new, size) \
- __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ")
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock ")
#define __try_cmpxchg_local(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fd1282a783ddb..8806c646d452d 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -91,19 +91,21 @@ static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp,
union __u64_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
- asm volatile(ALTERNATIVE(_lock_loc \
- "call cmpxchg8b_emu", \
- _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
- : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \
- : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
- : "memory"); \
+ asm_inline volatile( \
+ ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), \
+ [ptr] "S" (_ptr) \
+ : "memory"); \
\
o.full; \
})
static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
- return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock; ");
+ return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock ");
}
#define arch_cmpxchg64 arch_cmpxchg64
@@ -119,14 +121,16 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
n = { .full = (_new), }; \
bool ret; \
\
- asm volatile(ALTERNATIVE(_lock_loc \
- "call cmpxchg8b_emu", \
- _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
- CC_SET(e) \
- : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
- "+a" (o.low), "+d" (o.high)) \
- : "b" (n.low), "c" (n.high), [ptr] "S" (_ptr) \
- : "memory"); \
+ asm_inline volatile( \
+ ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ CC_SET(e) \
+ : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
+ "+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), \
+ [ptr] "S" (_ptr) \
+ : "memory"); \
\
if (unlikely(!ret)) \
*(_oldp) = o.full; \
@@ -136,7 +140,7 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
- return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock; ");
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock ");
}
#define arch_try_cmpxchg64 arch_try_cmpxchg64
diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h
index 426fc53ff8030..dfbd1ebb9f10b 100644
--- a/arch/x86/include/asm/edac.h
+++ b/arch/x86/include/asm/edac.h
@@ -13,7 +13,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
* are interrupt, DMA and SMP safe.
*/
for (i = 0; i < size / 4; i++, virt_addr++)
- asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
+ asm volatile("lock addl $0, %0"::"m" (*virt_addr));
}
#endif /* _ASM_X86_EDAC_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e525cd85f999f..08f5f61690b7a 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -348,15 +348,14 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
- "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- : [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
\
old__.var; \
})
@@ -378,17 +377,16 @@ do { \
old__.var = *_oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
- "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- CC_SET(z) \
- : CC_OUT(z) (success), \
- [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ CC_SET(z) \
+ : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
\
@@ -419,15 +417,14 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
- "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- : [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
\
old__.var; \
})
@@ -449,19 +446,19 @@ do { \
old__.var = *_oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
- "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- CC_SET(z) \
- : CC_OUT(z) (success), \
- [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ CC_SET(z) \
+ : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
+ \
likely(success); \
})
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 6d8d6bc183b74..cd21a0405ac52 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -31,7 +31,7 @@
*/
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
+ asm volatile("lock " __ASM_SIZE(bts) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -49,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
+ asm volatile("lock " __ASM_SIZE(btr) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -66,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
+ asm volatile("lock " __ASM_SIZE(btc) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -82,7 +82,7 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
*/
static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
- return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
@@ -95,7 +95,7 @@ static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
@@ -108,7 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
- return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
#define sync_test_bit(nr, addr) test_bit(nr, addr)
diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c
index 6cba85c79d42d..97222efb4d2a6 100644
--- a/arch/x86/kernel/cpu/bus_lock.c
+++ b/arch/x86/kernel/cpu/bus_lock.c
@@ -192,7 +192,13 @@ static void __split_lock_reenable(struct work_struct *work)
{
sld_update_msr(true);
}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+/*
+ * In order for each CPU to schedule its delayed work independently of the
+ * others, delayed work struct must be per-CPU. This is not required when
+ * sysctl_sld_mitigate is enabled because of the semaphore that limits
+ * the number of simultaneously scheduled delayed works to 1.
+ */
+static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
@@ -213,7 +219,7 @@ static int splitlock_cpu_offline(unsigned int cpu)
static void split_lock_warn(unsigned long ip)
{
- struct delayed_work *work;
+ struct delayed_work *work = NULL;
int cpu;
if (!current->reported_split_lock)
@@ -235,11 +241,17 @@ static void split_lock_warn(unsigned long ip)
if (down_interruptible(&buslock_sem) == -EINTR)
return;
work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
}
cpu = get_cpu();
+
+ if (!work) {
+ work = this_cpu_ptr(&sl_reenable);
+ /* Deferred initialization of per-CPU struct */
+ if (!work->work.func)
+ INIT_DELAYED_WORK(work, __split_lock_reenable);
+ }
+
schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */