diff options
author | Vlastimil Babka <vbabka@suse.cz> | 2024-02-12 21:36:33 +0100 |
---|---|---|
committer | Vlastimil Babka <vbabka@suse.cz> | 2025-03-13 12:12:02 +0100 |
commit | df4891c13ee02fedadd66a990dd65c911f6b7ada (patch) | |
tree | a25dd941894c525b6509d39d2978ecd803f5d47d | |
parent | 856a00a80a1b2da777f49298047fef3969c6209d (diff) | |
download | linux-locking-bench.tar.gz |
lib/test: add locking-benchmark modulelocking-bench
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 2 | ||||
-rw-r--r-- | lib/Kconfig.debug | 9 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/locking-benchmark.c | 427 |
4 files changed, 439 insertions, 0 deletions
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 4fb44894ad8759..d722d6df71f2b5 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -52,3 +52,5 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) + +EXPORT_SYMBOL(this_cpu_cmpxchg16b_emu) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 35796c290ca351..f67f132bd78ddc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2574,6 +2574,15 @@ config FIND_BIT_BENCHMARK If unsure, say N. +config LOCKING_BENCHMARK + tristate "Benchmark (pcp) locking primitives" + help + This builds the "locking-benchmark" module that measures performance + of various primitives and schemes for pcp structures locking or + atomic updates. + + If unsure, say N. + config TEST_FIRMWARE tristate "Test firmware loading via userspace interface" depends on FW_LOADER diff --git a/lib/Makefile b/lib/Makefile index d5cfc7afbbb821..7f111fb94157f6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -138,6 +138,7 @@ obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-$(CONFIG_LOCKING_BENCHMARK) += locking-benchmark.o lib-y += logic_pio.o diff --git a/lib/locking-benchmark.c b/lib/locking-benchmark.c new file mode 100644 index 00000000000000..2e699eefca5b89 --- /dev/null +++ b/lib/locking-benchmark.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/rcupdate.h> +#include <linux/percpu_counter.h> +#include <asm/tsc.h> + +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) +/* + * On SMP, spin_trylock is sufficient protection. + * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP. + */ +#define pcp_trylock_prepare(flags) do { } while (0) +#define pcp_trylock_finish(flag) do { } while (0) +#else + +/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */ +#define pcp_trylock_prepare(flags) local_irq_save(flags) +#define pcp_trylock_finish(flags) local_irq_restore(flags) +#endif + +/* + * Locking a pcp requires a PCP lookup followed by a spinlock. To avoid + * a migration causing the wrong PCP to be locked and remote memory being + * potentially allocated, pin the task to the CPU for the lookup+lock. + * preempt_disable is used on !RT because it is faster than migrate_disable. + * migrate_disable is used on RT because otherwise RT spinlock usage is + * interfered with and a high priority task cannot preempt the allocator. + */ +#ifndef CONFIG_PREEMPT_RT +#define pcpu_task_pin() preempt_disable() +#define pcpu_task_unpin() preempt_enable() +#else +#define pcpu_task_pin() migrate_disable() +#define pcpu_task_unpin() migrate_enable() +#endif + +/* + * Generic helper to lookup and a per-cpu variable with an embedded spinlock. + * Return value should be used with equivalent unlock helper. + */ +#define pcpu_spin_lock(type, member, ptr) \ +({ \ + type *_ret; \ + pcpu_task_pin(); \ + _ret = this_cpu_ptr(ptr); \ + spin_lock(&_ret->member); \ + _ret; \ +}) + +#define pcpu_spin_trylock(type, member, ptr) \ +({ \ + type *_ret; \ + pcpu_task_pin(); \ + _ret = this_cpu_ptr(ptr); \ + if (!spin_trylock(&_ret->member)) { \ + pcpu_task_unpin(); \ + _ret = NULL; \ + } \ + _ret; \ +}) + +#define pcpu_spin_unlock(member, ptr) \ +({ \ + spin_unlock(&ptr->member); \ + pcpu_task_unpin(); \ +}) + +/* struct per_cpu_pages specific helpers. */ +#define pcp_spin_lock(ptr) \ + pcpu_spin_lock(struct per_cpu_pages, lock, ptr) + +#define pcp_spin_trylock(ptr) \ + pcpu_spin_trylock(struct per_cpu_pages, lock, ptr) + +#define pcp_spin_unlock(ptr) \ + pcpu_spin_unlock(lock, ptr) + +typedef union { + struct { + unsigned long counter; + void *dummy; + }; + u128 full; +} counter_ptr_t; + +struct test_pcp { + local_lock_t llock; + localtry_lock_t ltlock; + spinlock_t slock; + unsigned long counter; + counter_ptr_t counter_ptr; +}; + +static bool __dummy; + +static DEFINE_PER_CPU(struct test_pcp, test_pcps) = { + .llock = INIT_LOCAL_LOCK(llock), + .ltlock = INIT_LOCALTRY_LOCK(ltlock), + .slock = __SPIN_LOCK_UNLOCKED(slock), +}; + +static counter_ptr_t counter_ptr; + +struct test_bsl { + unsigned long page_flags; + unsigned long counter; +}; + +static struct test_bsl bsl = {}; + +#define TIMING_ITERATIONS 1000000000 + +#define print_result(name) \ + pr_info("%-35s %12llu cycles\n", name, after - before) + +static int __init locking_bench(void) +{ + unsigned long long before, after; + unsigned long __maybe_unused UP_flags; + struct test_pcp *pcp; + struct percpu_counter pcpc; + unsigned long flags; + + percpu_counter_init(&pcpc, 0, GFP_KERNEL); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + if (this_cpu_inc_return(test_pcps.counter) == 0) + __dummy = true; + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("this_cpu_inc_return"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + unsigned long old, new; + do { + old = this_cpu_read(test_pcps.counter); + new = old + 1; + } while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new)); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("this_cpu_try_cmpxchg"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + unsigned long old, new; + do { + old = raw_cpu_read(test_pcps.counter); + new = old + 1; + } while (!this_cpu_try_cmpxchg(test_pcps.counter, &old, new)); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("raw+this_cpu_try_cmpxchg"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + counter_ptr_t old, new; + do { + struct test_pcp *pcp = raw_cpu_ptr(&test_pcps); + old.full = pcp->counter_ptr.full; + new.counter = old.counter + 1; + new.dummy = old.dummy; + } while (!this_cpu_try_cmpxchg128(test_pcps.counter_ptr.full, + &old.full, new.full)); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("this_cpu_try_cmpxchg128"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + counter_ptr_t *test; + counter_ptr_t old, new; + do { + test = &counter_ptr; + old.full = test->full; + new.counter = old.counter + 1; + new.dummy = old.dummy; + } while (!try_cmpxchg128(&test->full, + &old.full, new.full)); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("try_cmpxchg128"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + struct test_bsl *test = &bsl; + + bit_spin_lock(PG_locked, &test->page_flags); + test->counter++; + bit_spin_unlock(PG_locked, &test->page_flags); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("bit_spin_lock"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + percpu_counter_inc(&pcpc); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("percpu_counter_inc"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + percpu_counter_add_local(&pcpc, 1); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("percpu_counter_inc_local"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + local_lock(&test_pcps.llock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock(&test_pcps.llock); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("local_lock"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + local_lock_irq(&test_pcps.llock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock_irq(&test_pcps.llock); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("local_lock_irq"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + local_lock_irqsave(&test_pcps.llock, flags); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock_irqrestore(&test_pcps.llock, flags); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("local_lock_irqsave"); + + + before = rdtsc_ordered(); + + for (unsigned j = 0; j < 10; j++) { + + local_irq_disable(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS/10; i++) { + local_lock_irqsave(&test_pcps.llock, flags); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + local_unlock_irqrestore(&test_pcps.llock, flags); + } + + local_irq_enable(); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("irq_dis(local_lock_irqsave)"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + localtry_lock(&test_pcps.ltlock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + localtry_unlock(&test_pcps.ltlock); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("localtry_lock"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + localtry_lock_irq(&test_pcps.ltlock); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + localtry_unlock_irq(&test_pcps.ltlock); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("localtry_lock_irq"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + localtry_lock_irqsave(&test_pcps.ltlock, flags); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + localtry_unlock_irqrestore(&test_pcps.ltlock, flags); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("localtry_lock_irqsave"); + + before = rdtsc_ordered(); + + for (unsigned j = 0; j < 10; j++) { + + local_irq_disable(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS/10; i++) { + localtry_lock_irqsave(&test_pcps.ltlock, flags); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + localtry_unlock_irqrestore(&test_pcps.ltlock, flags); + } + + local_irq_enable(); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("irq_dis(localtry_lock_irqsave)"); + + before = rdtsc_ordered(); + + for (unsigned long i = 0; i < TIMING_ITERATIONS; i++) { + + pcp_trylock_prepare(UP_flags); + + pcp = pcpu_spin_trylock(struct test_pcp, slock, &test_pcps); + + pcp = this_cpu_ptr(&test_pcps); + + pcp->counter++; + + pcpu_spin_unlock(slock, pcp); + pcp_trylock_finish(UP_flags); + } + + after = rdtsc_ordered(); + + cond_resched(); + print_result("pcpu_spin_trylock"); + + percpu_counter_destroy(&pcpc); + + /* + * Everything is OK. Return error just to let user run benchmark + * again without annoying rmmod. + */ + return -EINVAL; +} + +module_init(locking_bench); + +MODULE_DESCRIPTION("Benchmark for (pcp) locking schemes"); +MODULE_LICENSE("GPL"); |