aboutsummaryrefslogtreecommitdiffstats
diff options
authorMark Brown <broonie@kernel.org>2026-05-30 00:25:45 +0100
committerMark Brown <broonie@kernel.org>2026-05-30 00:25:46 +0100
commitfe9618ab266d20638357eff97d84540aeb22d69b (patch)
tree4139a361d7bb521ee94831414b25a1567e4c11a8
parent99befc896988c8b8b3b948b19c9d1a4e40025c07 (diff)
parent1d8f40ed9011a5a660e952235a0e8db991de509a (diff)
downloadlinux-next-history-fe9618ab266d20638357eff97d84540aeb22d69b.tar.gz
Merge branch 'slab/for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile5
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c6
-rw-r--r--drivers/gpu/drm/panthor/panthor_mmu.c15
-rw-r--r--include/linux/instruction_pointer.h24
-rw-r--r--include/linux/percpu.h2
-rw-r--r--include/linux/slab.h324
-rw-r--r--init/Kconfig3
-rw-r--r--io_uring/io_uring.c23
-rw-r--r--kernel/configs/hardening.config2
-rw-r--r--lib/test_meminit.c23
-rw-r--r--mm/Kconfig73
-rw-r--r--mm/kasan/kasan_test_c.c5
-rw-r--r--mm/kfence/kfence_test.c13
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slab_common.c52
-rw-r--r--mm/slub.c655
-rw-r--r--net/bpf/test_run.c7
-rw-r--r--net/core/skbuff.c24
-rw-r--r--tools/include/linux/slab.h2
-rw-r--r--tools/mm/slabinfo.c8
-rw-r--r--tools/testing/shared/linux.c19
22 files changed, 753 insertions, 542 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 84e58f5b1d38f..d540e38f7b18b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24844,6 +24844,12 @@ F: mm/mempool.c
F: mm/slab.h
F: mm/slab_common.c
F: mm/slub.c
+F: scripts/gdb/linux/slab.py
+F: tools/cgroup/memcg_slabinfo.py
+F: tools/include/linux/slab.h
+F: tools/lib/slab.c
+F: tools/mm/slabinfo-gnuplot.sh
+F: tools/mm/slabinfo.c
SLCAN CAN NETWORK DRIVER
M: Dario Binacchi <dario.binacchi@amarulasolutions.com>
diff --git a/Makefile b/Makefile
index 0132fcb03f0f5..d59f703f9797a 100644
--- a/Makefile
+++ b/Makefile
@@ -988,6 +988,11 @@ KBUILD_CFLAGS += $(CC_AUTO_VAR_INIT_ZERO_ENABLER)
endif
endif
+ifdef CONFIG_KMALLOC_PARTITION_TYPED
+# KMALLOC_PARTITION_CACHES_NR + 1
+KBUILD_CFLAGS += -falloc-token-max=16
+endif
+
ifdef CONFIG_CC_IS_CLANG
ifdef CONFIG_CC_HAS_COUNTED_BY_PTR
KBUILD_CFLAGS += -fexperimental-late-parse-attributes
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 058c71c82cf54..533104d71f6ce 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -330,17 +330,15 @@ static int
msm_iommu_pagetable_prealloc_allocate(struct msm_mmu *mmu, struct msm_mmu_prealloc *p)
{
struct kmem_cache *pt_cache = get_pt_cache(mmu);
- int ret;
p->pages = kvmalloc_objs(*p->pages, p->count);
if (!p->pages)
return -ENOMEM;
- ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages);
- if (ret != p->count) {
+ if (!kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages)) {
kfree(p->pages);
p->pages = NULL;
- p->count = ret;
+ p->count = 0;
return -ENOMEM;
}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 9d45008505619..10e32fe26f7e4 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1248,7 +1248,6 @@ static int panthor_vm_op_ctx_prealloc_pts(struct panthor_vm_op_ctx *op_ctx)
{
u64 size = op_ctx->va.range;
u64 va = op_ctx->va.addr;
- int ret;
/* L1, L2 and L3 page tables.
* We could optimize L3 allocation by iterating over the sgt and merging
@@ -1264,11 +1263,12 @@ static int panthor_vm_op_ctx_prealloc_pts(struct panthor_vm_op_ctx *op_ctx)
if (!op_ctx->rsvd_page_tables.pages)
return -ENOMEM;
- ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
- op_ctx->rsvd_page_tables.pages);
- op_ctx->rsvd_page_tables.count = ret;
- if (ret != pt_count)
+ if (!kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+ op_ctx->rsvd_page_tables.pages)) {
+ op_ctx->rsvd_page_tables.count = 0;
return -ENOMEM;
+ }
+ op_ctx->rsvd_page_tables.count = pt_count;
return 0;
}
@@ -1396,9 +1396,8 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
goto err_cleanup;
}
- ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
- op_ctx->rsvd_page_tables.pages);
- if (ret != pt_count) {
+ if (!kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+ op_ctx->rsvd_page_tables.pages)) {
ret = -ENOMEM;
goto err_cleanup;
}
diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h
index aa0b3ffea9353..ea5bc756bd99d 100644
--- a/include/linux/instruction_pointer.h
+++ b/include/linux/instruction_pointer.h
@@ -8,6 +8,30 @@
#ifndef _THIS_IP_
#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+/*
+ * The current generic definition of _THIS_IP_ is considered broken by GCC [1]
+ * and Clang [2]. In particular, the address of a label is only expected to be
+ * used with a computed goto.
+ *
+ * [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120071
+ * [2] https://github.com/llvm/llvm-project/issues/138272
+ *
+ * Mark it as broken, so that appropriate fallback options can be implemented
+ * for architectures that do not define their own _THIS_IP_.
+ */
+#define HAS_BROKEN_THIS_IP
+#endif
+
+/*
+ * _CODE_LOCATION_ provides a unique identifier for the current code location.
+ * When _THIS_IP_ is broken (generic version), we fall back to a static marker
+ * which guarantees uniqueness and resolves to a constant address at link time,
+ * avoiding runtime overhead and compiler optimizations breaking it.
+ */
+#ifdef HAS_BROKEN_THIS_IP
+#define _CODE_LOCATION_ ({ static const char __here; (unsigned long)&__here; })
+#else
+#define _CODE_LOCATION_ _THIS_IP_
#endif
#endif /* _LINUX_INSTRUCTION_POINTER_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 85bf8dd9f0874..bdb721dac0e32 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -36,7 +36,7 @@
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
PCPU_MIN_ALLOC_SHIFT)
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#ifdef CONFIG_KMALLOC_PARTITION_CACHES
# if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PAGE_SIZE_4KB)
# define PERCPU_DYNAMIC_SIZE_SHIFT 13
# else
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2b5ab488e96b0..d4a873a162892 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -499,14 +499,80 @@ int kmem_cache_shrink(struct kmem_cache *s);
.usersize = sizeof_field(struct __struct, __field), \
}, (__flags))
+#ifdef CONFIG_KMALLOC_PARTITION_CACHES
+typedef struct { unsigned long v; } kmalloc_token_t;
+#ifdef CONFIG_KMALLOC_PARTITION_RANDOM
+extern unsigned long random_kmalloc_seed;
+#define __kmalloc_token(...) ((kmalloc_token_t){ .v = _CODE_LOCATION_ })
+#elif defined(CONFIG_KMALLOC_PARTITION_TYPED)
+#define __kmalloc_token(...) ((kmalloc_token_t){ .v = __builtin_infer_alloc_token(__VA_ARGS__) })
+#endif
+#define DECL_TOKEN_PARAM(_token) , kmalloc_token_t (_token)
+#define _PASS_TOKEN_PARAM(_token) , (_token)
+#define PASS_TOKEN_PARAM(_token) (_token)
+#define DECL_TOKEN_PARAMS(_size, _token) size_t (_size), kmalloc_token_t (_token)
+#define PASS_TOKEN_PARAMS(_size, _token) (_size), (_token)
+#else /* !CONFIG_KMALLOC_PARTITION_CACHES */
+typedef struct {} kmalloc_token_t;
+#define __kmalloc_token(...) ((kmalloc_token_t){}) /* no-op */
+#define DECL_TOKEN_PARAM(_token)
+#define _PASS_TOKEN_PARAM(_token)
+#define PASS_TOKEN_PARAM(_token) ((kmalloc_token_t){})
+#define DECL_TOKEN_PARAMS(_size, _token) size_t (_size)
+#define PASS_TOKEN_PARAMS(_size, _token) (_size)
+#endif /* CONFIG_KMALLOC_PARTITION_CACHES */
+
/*
* Common kmalloc functions provided by all allocators
*/
-void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size,
+void * __must_check krealloc_node_align_noprof(const void *objp,
+ DECL_TOKEN_PARAMS(new_size, token),
unsigned long align,
gfp_t flags, int nid) __realloc_size(2);
-#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE)
-#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__))
+#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, PASS_TOKEN_PARAMS(_s, __kmalloc_token(_s)), 1, _f, NUMA_NO_NODE)
+#if 0 /* kernel-doc */
+/**
+ * krealloc_node_align - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @align: desired alignment.
+ * @flags: the type of memory to allocate.
+ * @nid: NUMA node or NUMA_NO_NODE
+ *
+ * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
+ * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
+ *
+ * Only alignments up to those guaranteed by kmalloc() will be honored. Please see
+ * Documentation/core-api/memory-allocation.rst for more details.
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * When slub_debug_orig_size() is off, krealloc() only knows about the bucket
+ * size of an allocation (but not the exact size it was allocated with) and
+ * hence implements the following semantics for shrinking and growing buffers
+ * with __GFP_ZERO::
+ *
+ * new bucket
+ * 0 size size
+ * |--------|----------------|
+ * | keep | zero |
+ *
+ * Otherwise, the original allocation size 'orig_size' could be used to
+ * precisely clear the requested size, and the new size will also be stored
+ * as the new 'orig_size'.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
+ */
+void *krealloc_node_align(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid);
+#endif
+#define krealloc_node_align(p, new_size, align, flags, nid) \
+ alloc_hooks(krealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(new_size, __kmalloc_token(new_size)), align, flags, nid))
#define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n)
#define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE)
@@ -612,10 +678,10 @@ static inline unsigned int arch_slab_minalign(void)
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
(KMALLOC_MIN_SIZE) : 16)
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
-#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies
+#ifdef CONFIG_KMALLOC_PARTITION_CACHES
+#define KMALLOC_PARTITION_CACHES_NR 15 // # of cache copies
#else
-#define RANDOM_KMALLOC_CACHES_NR 0
+#define KMALLOC_PARTITION_CACHES_NR 0
#endif
/*
@@ -634,8 +700,8 @@ enum kmalloc_cache_type {
#ifndef CONFIG_MEMCG
KMALLOC_CGROUP = KMALLOC_NORMAL,
#endif
- KMALLOC_RANDOM_START = KMALLOC_NORMAL,
- KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR,
+ KMALLOC_PARTITION_START = KMALLOC_NORMAL,
+ KMALLOC_PARTITION_END = KMALLOC_PARTITION_START + KMALLOC_PARTITION_CACHES_NR,
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
@@ -662,19 +728,19 @@ extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
(IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
(IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0))
-extern unsigned long random_kmalloc_seed;
-
-static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller)
+static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, kmalloc_token_t token)
{
/*
* The most common case is KMALLOC_NORMAL, so test for it
* with a single branch for all the relevant flags.
*/
if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
- /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
- return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed,
- ilog2(RANDOM_KMALLOC_CACHES_NR + 1));
+#ifdef CONFIG_KMALLOC_PARTITION_RANDOM
+ /* KMALLOC_PARTITION_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
+ return KMALLOC_PARTITION_START + hash_64(token.v ^ random_kmalloc_seed,
+ ilog2(KMALLOC_PARTITION_CACHES_NR + 1));
+#elif defined(CONFIG_KMALLOC_PARTITION_TYPED)
+ return KMALLOC_PARTITION_START + token.v;
#else
return KMALLOC_NORMAL;
#endif
@@ -815,8 +881,10 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
*/
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
-int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
-#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__))
+bool kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p);
+#define kmem_cache_alloc_bulk(...) \
+ alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__))
static __always_inline void kfree_bulk(size_t size, void **p)
{
@@ -858,16 +926,22 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf);
#define PASS_BUCKET_PARAM(_b) NULL
#endif
+#define DECL_KMALLOC_PARAMS(_size, _b, _token) DECL_BUCKET_PARAMS(_size, _b) \
+ DECL_TOKEN_PARAM(_token)
+
+#define PASS_KMALLOC_PARAMS(_size, _b, _token) PASS_BUCKET_PARAMS(_size, _b) \
+ _PASS_TOKEN_PARAM(_token)
+
/*
* The following functions are not to be used directly and are intended only
* for internal use from kmalloc() and kmalloc_node()
* with the exception of kunit tests
*/
-void *__kmalloc_noprof(size_t size, gfp_t flags)
+void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags)
__assume_kmalloc_alignment __alloc_size(1);
-void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
+void *__kmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node)
__assume_kmalloc_alignment __alloc_size(1);
void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
@@ -883,6 +957,23 @@ void *__kmalloc_large_noprof(size_t size, gfp_t flags)
void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
__assume_page_alignment __alloc_size(1);
+static __always_inline __alloc_size(1) void *_kmalloc_noprof(size_t size, gfp_t flags, kmalloc_token_t token)
+{
+ if (__builtin_constant_p(size) && size) {
+ unsigned int index;
+
+ if (size > KMALLOC_MAX_CACHE_SIZE)
+ return __kmalloc_large_noprof(size, flags);
+
+ index = kmalloc_index(size);
+ return __kmalloc_cache_noprof(
+ kmalloc_caches[kmalloc_type(flags, token)][index],
+ flags, size);
+ }
+ return __kmalloc_noprof(PASS_TOKEN_PARAMS(size, token), flags);
+}
+#define kmalloc_noprof(...) _kmalloc_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
+#if 0 /* kernel-doc */
/**
* kmalloc - allocate kernel memory
* @size: how many bytes of memory are required.
@@ -938,25 +1029,27 @@ void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
* Try really hard to succeed the allocation but fail
* eventually.
*/
-static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags)
-{
- if (__builtin_constant_p(size) && size) {
- unsigned int index;
-
- if (size > KMALLOC_MAX_CACHE_SIZE)
- return __kmalloc_large_noprof(size, flags);
-
- index = kmalloc_index(size);
- return __kmalloc_cache_noprof(
- kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
- flags, size);
- }
- return __kmalloc_noprof(size, flags);
-}
-#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
+void *kmalloc(size_t size, gfp_t flags);
+#endif
+#define kmalloc(size, flags) alloc_hooks(kmalloc_noprof(size, flags))
-void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
-#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__))
+void *_kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags, int node);
+#define kmalloc_nolock_noprof(_s, _f, _n) _kmalloc_nolock_noprof(PASS_TOKEN_PARAMS(_s, __kmalloc_token(_s)), _f, _n)
+#if 0 /* kernel-doc */
+/**
+ * kmalloc_nolock - Allocate an object of given size from any context.
+ * @size: size to allocate
+ * @gfp_flags: GFP flags. Only __GFP_ACCOUNT, __GFP_ZERO, __GFP_NO_OBJ_EXT
+ * allowed.
+ * @node: node number of the target node.
+ *
+ * Return: pointer to the new object or NULL in case of error.
+ * NULL does not mean EBUSY or EAGAIN. It means ENOMEM.
+ * There is no reason to call it again and expect !NULL.
+ */
+void *kmalloc_nolock(size_t size, gfp_t gfp_flags, int node);
+#endif
+#define kmalloc_nolock(size, gfp_flags, node) alloc_hooks(kmalloc_nolock_noprof(size, gfp_flags, node))
/**
* __alloc_objs - Allocate objects of a given type using
@@ -1060,12 +1153,12 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node);
__alloc_flex(kvzalloc, default_gfp(__VA_ARGS__), typeof(P), FAM, COUNT)
#define kmem_buckets_alloc(_b, _size, _flags) \
- alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
+ alloc_hooks(__kmalloc_node_noprof(PASS_KMALLOC_PARAMS(_size, _b, __kmalloc_token(_size)), _flags, NUMA_NO_NODE))
#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \
- alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))
+ alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_KMALLOC_PARAMS(_size, _b, __kmalloc_token(_size)), _flags, NUMA_NO_NODE, _RET_IP_))
-static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
+static __always_inline __alloc_size(1) void *_kmalloc_node_noprof(size_t size, gfp_t flags, int node, kmalloc_token_t token)
{
if (__builtin_constant_p(size) && size) {
unsigned int index;
@@ -1075,29 +1168,48 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf
index = kmalloc_index(size);
return __kmalloc_cache_node_noprof(
- kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
+ kmalloc_caches[kmalloc_type(flags, token)][index],
flags, node, size);
}
- return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node);
+ return __kmalloc_node_noprof(PASS_KMALLOC_PARAMS(size, NULL, token), flags, node);
}
+#define kmalloc_node_noprof(...) _kmalloc_node_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
+static inline __alloc_size(1, 2) void *_kmalloc_array_noprof(size_t n, size_t size, gfp_t flags, kmalloc_token_t token)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return NULL;
+ return _kmalloc_noprof(bytes, flags, token);
+}
+#define kmalloc_array_noprof(...) _kmalloc_array_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
+#if 0 /* kernel-doc */
/**
* kmalloc_array - allocate memory for an array.
* @n: number of elements.
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
-static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
+void *kmalloc_array(size_t n, size_t size, gfp_t flags);
+#endif
+#define kmalloc_array(n, size, flags) alloc_hooks(kmalloc_array_noprof(n, size, flags))
+
+static inline __realloc_size(2, 3) void * __must_check _krealloc_array_noprof(void *p,
+ size_t new_n,
+ size_t new_size,
+ gfp_t flags, kmalloc_token_t token)
{
size_t bytes;
- if (unlikely(check_mul_overflow(n, size, &bytes)))
+ if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
return NULL;
- return kmalloc_noprof(bytes, flags);
-}
-#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__))
+ return krealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(bytes, token), 1, flags, NUMA_NO_NODE);
+}
+#define krealloc_array_noprof(...) _krealloc_array_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
+#if 0 /* kernel-doc */
/**
* krealloc_array - reallocate memory for an array.
* @p: pointer to the memory chunk to reallocate
@@ -1115,19 +1227,9 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz
* In any case, the contents of the object pointed to are preserved up to the
* lesser of the new and old sizes.
*/
-static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p,
- size_t new_n,
- size_t new_size,
- gfp_t flags)
-{
- size_t bytes;
-
- if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
- return NULL;
-
- return krealloc_noprof(p, bytes, flags);
-}
-#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__))
+void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags);
+#endif
+#define krealloc_array(p, new_n, new_size, flags) alloc_hooks(krealloc_array_noprof(p, new_n, new_size, flags))
/**
* kcalloc - allocate memory for an array. The memory is set to zero.
@@ -1137,10 +1239,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi
*/
#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO)
-void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node,
+void *__kmalloc_node_track_caller_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node,
unsigned long caller) __alloc_size(1);
#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \
- __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller)
+ __kmalloc_node_track_caller_noprof(PASS_KMALLOC_PARAMS(size, NULL, __kmalloc_token(size)), flags, node, caller)
#define kmalloc_node_track_caller(...) \
alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
@@ -1157,17 +1259,18 @@ void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flag
#define kmalloc_track_caller_noprof(...) \
kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_)
-static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags,
- int node)
+static inline __alloc_size(1, 2) void *_kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags,
+ int node, kmalloc_token_t token)
{
size_t bytes;
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
if (__builtin_constant_p(n) && __builtin_constant_p(size))
- return kmalloc_node_noprof(bytes, flags, node);
- return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node);
+ return _kmalloc_node_noprof(bytes, flags, node, token);
+ return __kmalloc_node_noprof(PASS_KMALLOC_PARAMS(bytes, NULL, token), flags, node);
}
+#define kmalloc_array_node_noprof(...) _kmalloc_array_node_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
#define kcalloc_node(_n, _size, _flags, _node) \
@@ -1178,44 +1281,73 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_
*/
#define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO)
+static inline __alloc_size(1) void *_kzalloc_noprof(size_t size, gfp_t flags, kmalloc_token_t token)
+{
+ return _kmalloc_noprof(size, flags | __GFP_ZERO, token);
+}
+#define kzalloc_noprof(...) _kzalloc_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
+#if 0 /* kernel-doc */
/**
* kzalloc - allocate memory. The memory is set to zero.
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate (see kmalloc).
*/
-static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
-{
- return kmalloc_noprof(size, flags | __GFP_ZERO);
-}
-#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__))
+void *kzalloc(size_t size, gfp_t flags);
+#endif
+#define kzalloc(size, flags) alloc_hooks(kzalloc_noprof(size, flags))
#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+void *__kvmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), unsigned long align,
gfp_t flags, int node) __alloc_size(1);
#define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \
- __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node)
+ __kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(_size, NULL, __kmalloc_token(_size)), _align, _flags, _node)
#define kvmalloc_node_align(...) \
alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__))
-#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n)
+#if 0 /* kernel-doc */
+/**
+ * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
+ * @node: numa node to allocate from
+ *
+ * Only alignments up to those guaranteed by kmalloc() will be honored. Please see
+ * Documentation/core-api/memory-allocation.rst for more details.
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use kvfree for freeing the memory.
+ *
+ * GFP_NOWAIT and GFP_ATOMIC are supported, the __GFP_NORETRY modifier is not.
+ * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
+ * preferable to the vmalloc fallback, due to visible performance drawbacks.
+ *
+ * Return: pointer to the allocated memory of %NULL in case of failure
+ */
+void *kvmalloc_node(size_t size, gfp_t flags, int node);
+#endif
+#define kvmalloc_node(size, flags, node) kvmalloc_node_align(size, 1, flags, node)
+#define kvmalloc_node_noprof(size, flags, node) \
+ kvmalloc_node_align_noprof(size, 1, flags, node)
#define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE)
+#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE)
#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO)
#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
#define kmem_buckets_valloc(_b, _size, _flags) \
- alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE))
+ alloc_hooks(__kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(_size, _b, __kmalloc_token(_size)), 1, _flags, NUMA_NO_NODE))
static inline __alloc_size(1, 2) void *
-kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
+_kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node, kmalloc_token_t token)
{
size_t bytes;
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
- return kvmalloc_node_align_noprof(bytes, 1, flags, node);
+ return __kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(bytes, NULL, token), 1, flags, node);
}
-
+#define kvmalloc_array_node_noprof(...) _kvmalloc_array_node_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__))
#define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE)
#define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node)
#define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE)
@@ -1224,10 +1356,40 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
#define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__))
#define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__))
-void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+void *kvrealloc_node_align_noprof(const void *p, DECL_TOKEN_PARAMS(size, token), unsigned long align,
gfp_t flags, int nid) __realloc_size(2);
-#define kvrealloc_node_align(...) \
- alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__))
+#if 0 /* kernel-doc */
+/**
+ * kvrealloc_node_align - reallocate memory; contents remain unchanged
+ * @p: object to reallocate memory for
+ * @size: the size to reallocate
+ * @align: desired alignment
+ * @flags: the flags for the page level allocator
+ * @nid: NUMA node id
+ *
+ * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0
+ * and @p is not a %NULL pointer, the object pointed to is freed.
+ *
+ * Only alignments up to those guaranteed by kmalloc() will be honored. Please see
+ * Documentation/core-api/memory-allocation.rst for more details.
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
+ *
+ * This function must not be called concurrently with itself or kvfree() for the
+ * same memory allocation.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
+ */
+void *kvrealloc_node_align(const void *p, size_t size, unsigned long align, gfp_t flags, int nid);
+#endif
+#define kvrealloc_node_align(p, size, align, flags, nid) \
+ alloc_hooks(kvrealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(size, __kmalloc_token(size)), align, flags, nid))
#define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n)
#define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE)
diff --git a/init/Kconfig b/init/Kconfig
index 596e469d99f82..f5a9dfb1cf320 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -170,6 +170,9 @@ config CC_HAS_BROKEN_COUNTED_BY_REF
# https://github.com/llvm/llvm-project/issues/182575
default y if CC_IS_CLANG && CLANG_VERSION < 220100
+config CC_HAS_ALLOC_TOKEN
+ def_bool $(cc-option,-falloc-token-max=123)
+
config CC_HAS_MULTIDIMENSIONAL_NONSTRING
def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 753ac23401c56..00b68efedc3ae 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -980,29 +980,24 @@ __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
{
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO;
void *reqs[IO_REQ_ALLOC_BATCH];
- int ret;
-
- ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs);
+ int nr_reqs = ARRAY_SIZE(reqs);
/*
- * Bulk alloc is all-or-nothing. If we fail to get a batch,
- * retry single alloc to be on the safe side.
+ * Bulk alloc is all-or-nothing. If we fail to get a batch, retry a
+ * single allocation to be on the safe side.
*/
- if (unlikely(ret <= 0)) {
+ if (!kmem_cache_alloc_bulk(req_cachep, gfp, nr_reqs, reqs)) {
reqs[0] = kmem_cache_alloc(req_cachep, gfp);
if (!reqs[0])
return false;
- ret = 1;
+ nr_reqs = 1;
}
- percpu_ref_get_many(&ctx->refs, ret);
- ctx->nr_req_allocated += ret;
-
- while (ret--) {
- struct io_kiocb *req = reqs[ret];
+ percpu_ref_get_many(&ctx->refs, nr_reqs);
+ ctx->nr_req_allocated += nr_reqs;
- io_req_add_to_cache(req, ctx);
- }
+ while (nr_reqs--)
+ io_req_add_to_cache(reqs[nr_reqs], ctx);
return true;
}
diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config
index 7c3924614e01d..26831a2a57392 100644
--- a/kernel/configs/hardening.config
+++ b/kernel/configs/hardening.config
@@ -22,7 +22,7 @@ CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_SLAB_BUCKETS=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
-CONFIG_RANDOM_KMALLOC_CACHES=y
+CONFIG_KMALLOC_PARTITION_CACHES=y
# Sanity check userspace page table mappings.
CONFIG_PAGE_TABLE_CHECK=y
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index d028a6552cd61..68c3b9da090ef 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -229,16 +229,14 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor,
for (iter = 0; iter < 10; iter++) {
/* Do a test of bulk allocations */
if (!want_rcu && !want_ctor) {
- int ret;
-
- ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array);
- if (!ret) {
+ if (!kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE,
+ bulk_array)) {
fail = true;
} else {
int i;
- for (i = 0; i < ret; i++)
+ for (i = 0; i < BULK_SIZE; i++)
fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero);
- kmem_cache_free_bulk(c, ret, bulk_array);
+ kmem_cache_free_bulk(c, BULK_SIZE, bulk_array);
}
}
@@ -348,23 +346,24 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
{
struct kmem_cache *c;
int i, iter, maxiter = 1024;
- int num, bytes;
+ int bytes;
bool fail = false;
void *objects[10];
c = kmem_cache_create("test_cache", size, size, 0, NULL);
for (iter = 0; (iter < maxiter) && !fail; iter++) {
- num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
- objects);
- for (i = 0; i < num; i++) {
+ if (!kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
+ objects))
+ continue;
+
+ for (i = 0; i < ARRAY_SIZE(objects); i++) {
bytes = count_nonzero_bytes(objects[i], size);
if (bytes)
fail = true;
fill_with_garbage(objects[i], size);
}
- if (num)
- kmem_cache_free_bulk(c, num, objects);
+ kmem_cache_free_bulk(c, ARRAY_SIZE(objects), objects);
}
kmem_cache_destroy(c);
*total_failures += fail;
diff --git a/mm/Kconfig b/mm/Kconfig
index e649a950be93f..9e0ca48249054 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -248,22 +248,75 @@ config SLUB_STATS
out which slabs are relevant to a particular load.
Try running: slabinfo -DA
-config RANDOM_KMALLOC_CACHES
- default n
+config KMALLOC_PARTITION_CACHES
depends on !SLUB_TINY
- bool "Randomize slab caches for normal kmalloc"
+ bool "Partitioned slab caches for normal kmalloc"
+ default RANDOM_KMALLOC_CACHES
help
- A hardening feature that creates multiple copies of slab caches for
- normal kmalloc allocation and makes kmalloc randomly pick one based
- on code address, which makes the attackers more difficult to spray
- vulnerable memory objects on the heap for the purpose of exploiting
- memory vulnerabilities.
+ A hardening feature that creates multiple isolated copies of slab
+ caches for normal kmalloc allocations. This makes it more difficult
+ to exploit memory-safety vulnerabilities by attacking vulnerable
+ co-located memory objects. Several modes are provided.
Currently the number of copies is set to 16, a reasonably large value
that effectively diverges the memory objects allocated for different
subsystems or modules into different caches, at the expense of a
- limited degree of memory and CPU overhead that relates to hardware and
- system workload.
+ limited degree of memory and CPU overhead that relates to hardware
+ and system workload.
+
+choice
+ prompt "Partitioned slab cache mode"
+ depends on KMALLOC_PARTITION_CACHES
+ default KMALLOC_PARTITION_TYPED if CC_HAS_ALLOC_TOKEN
+ default KMALLOC_PARTITION_RANDOM
+ help
+ Selects the slab cache partitioning mode.
+
+config KMALLOC_PARTITION_RANDOM
+ bool "Randomize slab caches for normal kmalloc"
+ help
+ Randomly pick a slab cache based on code address and a per-boot
+ random seed.
+
+ This makes it harder for attackers to predict object co-location.
+ The placement is random: while attackers don't know which kmalloc
+ cache an object will be allocated from, they might circumvent
+ the randomization by retrying attacks across multiple machines until
+ the target objects are co-located.
+
+config KMALLOC_PARTITION_TYPED
+ bool "Type based slab cache selection for normal kmalloc"
+ depends on CC_HAS_ALLOC_TOKEN
+ help
+ Rely on Clang's allocation tokens to choose a slab cache, where token
+ IDs are derived from the allocated type.
+
+ Unlike KMALLOC_PARTITION_RANDOM, cache assignment is deterministic based
+ on type, which guarantees that objects of certain types are not
+ placed in the same cache. This effectively mitigates certain classes
+ of exploits that probabilistic defenses like KMALLOC_PARTITION_RANDOM
+ only make harder but not impossible. However, this also means the
+ cache assignment is predictable.
+
+ Clang's default token ID calculation returns a bounded hash with
+ disjoint ranges for pointer-containing and pointerless objects: when
+ used as the slab cache index, this prevents buffer overflows on
+ primitive buffers from directly corrupting pointer-containing
+ objects.
+
+ The current effectiveness of Clang's type inference can be judged by
+ -Rpass=alloc-token, which provides diagnostics where (after dead-code
+ elimination) type inference failed.
+
+ Requires Clang 22 or later.
+
+endchoice
+
+config RANDOM_KMALLOC_CACHES
+ bool
+ transitional
+ help
+ Transitional config for migration to KMALLOC_PARTITION_CACHES.
endmenu # Slab allocator options
diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c
index 3f4ed29178b3c..b9e167ed5be32 100644
--- a/mm/kasan/kasan_test_c.c
+++ b/mm/kasan/kasan_test_c.c
@@ -1225,14 +1225,13 @@ static void kmem_cache_bulk(struct kunit *test)
struct kmem_cache *cache;
size_t size = 200;
char *p[10];
- bool ret;
int i;
cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
- ret = kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p), (void **)&p);
- if (!ret) {
+ if (!kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p),
+ (void **)&p)) {
kunit_err(test, "Allocation failed: %s\n", __func__);
kmem_cache_destroy(cache);
return;
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 10424cd25e5a6..de2d0f7d62b15 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -214,7 +214,7 @@ static void test_cache_destroy(void)
static inline size_t kmalloc_cache_alignment(size_t size)
{
/* just to get ->align so no need to pass in the real caller */
- enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0);
+ enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, __kmalloc_token(0));
return kmalloc_caches[type][__kmalloc_index(size, false)]->align;
}
@@ -285,7 +285,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
if (is_kfence_address(alloc)) {
struct slab *slab = virt_to_slab(alloc);
- enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_);
+ enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, __kmalloc_token(size));
struct kmem_cache *s = test_cache ?:
kmalloc_caches[type][__kmalloc_index(size, false)];
@@ -761,9 +761,10 @@ static void test_memcache_alloc_bulk(struct kunit *test)
timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
do {
void *objects[100];
- int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
- objects);
- if (!num)
+ int i;
+
+ if (!kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC,
+ ARRAY_SIZE(objects), objects))
continue;
for (i = 0; i < ARRAY_SIZE(objects); i++) {
if (is_kfence_address(objects[i])) {
@@ -771,7 +772,7 @@ static void test_memcache_alloc_bulk(struct kunit *test)
break;
}
}
- kmem_cache_free_bulk(test_cache, num, objects);
+ kmem_cache_free_bulk(test_cache, ARRAY_SIZE(objects), objects);
/*
* kmem_cache_alloc_bulk() disables interrupts, and calling it
* in a tight loop may not give KFENCE a chance to switch the
diff --git a/mm/slab.h b/mm/slab.h
index bf2f87acf5e3a..1bf9c3021ae3d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -362,12 +362,12 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* KMALLOC_MAX_CACHE_SIZE and the caller must check that.
*/
static inline struct kmem_cache *
-kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller)
+kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, kmalloc_token_t token)
{
unsigned int index;
if (!b)
- b = &kmalloc_caches[kmalloc_type(flags, caller)];
+ b = &kmalloc_caches[kmalloc_type(flags, token)];
if (size <= 192)
index = kmalloc_size_index[size_index_elem(size)];
else
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 8b661fff5eedb..b6426d7ceec92 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -742,7 +742,7 @@ kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init =
{ /* initialization for https://llvm.org/pr42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#ifdef CONFIG_KMALLOC_PARTITION_RANDOM
unsigned long random_kmalloc_seed __ro_after_init;
EXPORT_SYMBOL(random_kmalloc_seed);
#endif
@@ -787,7 +787,7 @@ size_t kmalloc_size_roundup(size_t size)
* The flags don't matter since size_index is common to all.
* Neither does the caller for just getting ->object_size.
*/
- return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size;
+ return kmalloc_slab(size, NULL, GFP_KERNEL, __kmalloc_token(0))->object_size;
}
/* Above the smaller buckets, size is a multiple of page size. */
@@ -821,26 +821,26 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_RCL_NAME(sz)
#endif
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
-#define __KMALLOC_RANDOM_CONCAT(a, b) a ## b
-#define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz)
-#define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz,
-#define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz,
-#define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz,
-#define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz,
-#define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz,
-#define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz,
-#define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz,
-#define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz,
-#define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz,
-#define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz,
-#define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz,
-#define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz,
-#define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz,
-#define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz,
-#define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz,
-#else // CONFIG_RANDOM_KMALLOC_CACHES
-#define KMALLOC_RANDOM_NAME(N, sz)
+#ifdef CONFIG_KMALLOC_PARTITION_CACHES
+#define __KMALLOC_PARTITION_CONCAT(a, b) a ## b
+#define KMALLOC_PARTITION_NAME(N, sz) __KMALLOC_PARTITION_CONCAT(KMA_PART_, N)(sz)
+#define KMA_PART_1(sz) .name[KMALLOC_PARTITION_START + 1] = "kmalloc-part-01-" #sz,
+#define KMA_PART_2(sz) KMA_PART_1(sz) .name[KMALLOC_PARTITION_START + 2] = "kmalloc-part-02-" #sz,
+#define KMA_PART_3(sz) KMA_PART_2(sz) .name[KMALLOC_PARTITION_START + 3] = "kmalloc-part-03-" #sz,
+#define KMA_PART_4(sz) KMA_PART_3(sz) .name[KMALLOC_PARTITION_START + 4] = "kmalloc-part-04-" #sz,
+#define KMA_PART_5(sz) KMA_PART_4(sz) .name[KMALLOC_PARTITION_START + 5] = "kmalloc-part-05-" #sz,
+#define KMA_PART_6(sz) KMA_PART_5(sz) .name[KMALLOC_PARTITION_START + 6] = "kmalloc-part-06-" #sz,
+#define KMA_PART_7(sz) KMA_PART_6(sz) .name[KMALLOC_PARTITION_START + 7] = "kmalloc-part-07-" #sz,
+#define KMA_PART_8(sz) KMA_PART_7(sz) .name[KMALLOC_PARTITION_START + 8] = "kmalloc-part-08-" #sz,
+#define KMA_PART_9(sz) KMA_PART_8(sz) .name[KMALLOC_PARTITION_START + 9] = "kmalloc-part-09-" #sz,
+#define KMA_PART_10(sz) KMA_PART_9(sz) .name[KMALLOC_PARTITION_START + 10] = "kmalloc-part-10-" #sz,
+#define KMA_PART_11(sz) KMA_PART_10(sz) .name[KMALLOC_PARTITION_START + 11] = "kmalloc-part-11-" #sz,
+#define KMA_PART_12(sz) KMA_PART_11(sz) .name[KMALLOC_PARTITION_START + 12] = "kmalloc-part-12-" #sz,
+#define KMA_PART_13(sz) KMA_PART_12(sz) .name[KMALLOC_PARTITION_START + 13] = "kmalloc-part-13-" #sz,
+#define KMA_PART_14(sz) KMA_PART_13(sz) .name[KMALLOC_PARTITION_START + 14] = "kmalloc-part-14-" #sz,
+#define KMA_PART_15(sz) KMA_PART_14(sz) .name[KMALLOC_PARTITION_START + 15] = "kmalloc-part-15-" #sz,
+#else // CONFIG_KMALLOC_PARTITION_CACHES
+#define KMALLOC_PARTITION_NAME(N, sz)
#endif
#define INIT_KMALLOC_INFO(__size, __short_size) \
@@ -849,7 +849,7 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
- KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \
+ KMALLOC_PARTITION_NAME(KMALLOC_PARTITION_CACHES_NR, __short_size) \
.size = __size, \
}
@@ -961,8 +961,8 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
flags |= SLAB_CACHE_DMA;
}
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
- if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END)
+#ifdef CONFIG_KMALLOC_PARTITION_CACHES
+ if (type >= KMALLOC_PARTITION_START && type <= KMALLOC_PARTITION_END)
flags |= SLAB_NO_MERGE;
#endif
@@ -1010,7 +1010,7 @@ void __init create_kmalloc_caches(void)
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
new_kmalloc_cache(i, type);
}
-#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#ifdef CONFIG_KMALLOC_PARTITION_RANDOM
random_kmalloc_seed = get_random_u64();
#endif
diff --git a/mm/slub.c b/mm/slub.c
index a2bf3756ca7d0..67abbbf68fc10 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -227,6 +227,17 @@ struct partial_bulk_context {
struct list_head slabs;
};
+/* Structure used to iterate over objects within a slab */
+struct slab_obj_iter {
+ unsigned long pos;
+ void *start;
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+ unsigned long freelist_count;
+ unsigned long page_limit;
+ bool random;
+#endif
+};
+
static inline bool kmem_cache_debug(struct kmem_cache *s)
{
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
@@ -351,8 +362,8 @@ enum stat_item {
CMPXCHG_DOUBLE_FAIL, /* Failures of slab freelist update */
SHEAF_FLUSH, /* Objects flushed from a sheaf */
SHEAF_REFILL, /* Objects refilled to a sheaf */
- SHEAF_ALLOC, /* Allocation of an empty sheaf */
- SHEAF_FREE, /* Freeing of an empty sheaf */
+ SHEAF_ALLOC, /* Allocation of an empty sheaf including oversized ones */
+ SHEAF_FREE, /* Freeing of an empty sheaf including oversized ones */
BARN_GET, /* Got full sheaf from barn */
BARN_GET_FAIL, /* Failed to get full sheaf from barn */
BARN_PUT, /* Put full sheaf to barn */
@@ -2129,11 +2140,11 @@ static inline size_t obj_exts_alloc_size(struct kmem_cache *s,
if (!is_kmalloc_normal(s))
return sz;
- obj_exts_cache = kmalloc_slab(sz, NULL, gfp, 0);
+ obj_exts_cache = kmalloc_slab(sz, NULL, gfp, __kmalloc_token(0));
/*
- * We can't simply compare s with obj_exts_cache, because random kmalloc
- * caches have multiple caches per size, selected by caller address.
- * Since caller address may differ between kmalloc_slab() and actual
+ * We can't simply compare s with obj_exts_cache, because partitioned kmalloc
+ * caches have multiple caches per size, selected by caller address or type.
+ * Since caller address or type may differ between kmalloc_slab() and actual
* allocation, bump size when sizes are equal.
*/
if (s->object_size == obj_exts_cache->object_size)
@@ -2733,7 +2744,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
return *head != NULL;
}
-static void *setup_object(struct kmem_cache *s, void *object)
+static inline void *setup_object(struct kmem_cache *s, void *object)
{
setup_object_debug(s, object);
object = kasan_init_slab_obj(s, object);
@@ -2751,11 +2762,6 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf *sheaf;
size_t sheaf_size;
- if (gfp & __GFP_NO_OBJ_EXT)
- return NULL;
-
- gfp &= ~OBJCGS_CLEAR_MASK;
-
/*
* Prevent recursion to the same cache, or a deep stack of kmallocs of
* varying sizes (sheaf capacity might differ for each kmalloc size
@@ -2780,6 +2786,11 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp,
static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s,
gfp_t gfp)
{
+ if (gfp & __GFP_NO_OBJ_EXT)
+ return NULL;
+
+ gfp &= ~OBJCGS_CLEAR_MASK;
+
return __alloc_empty_sheaf(s, gfp, s->sheaf_capacity);
}
@@ -3329,87 +3340,14 @@ static void __init init_freelist_randomization(void)
mutex_unlock(&slab_mutex);
}
-/* Get the next entry on the pre-computed freelist randomized */
-static void *next_freelist_entry(struct kmem_cache *s,
- unsigned long *pos, void *start,
- unsigned long page_limit,
- unsigned long freelist_count)
-{
- unsigned int idx;
-
- /*
- * If the target page allocation failed, the number of objects on the
- * page might be smaller than the usual size defined by the cache.
- */
- do {
- idx = s->random_seq[*pos];
- *pos += 1;
- if (*pos >= freelist_count)
- *pos = 0;
- } while (unlikely(idx >= page_limit));
-
- return (char *)start + idx;
-}
-
static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);
-/* Shuffle the single linked freelist based on a random pre-computed sequence */
-static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
- bool allow_spin)
-{
- void *start;
- void *cur;
- void *next;
- unsigned long idx, pos, page_limit, freelist_count;
-
- if (slab->objects < 2 || !s->random_seq)
- return false;
-
- freelist_count = oo_objects(s->oo);
- if (allow_spin) {
- pos = get_random_u32_below(freelist_count);
- } else {
- struct rnd_state *state;
-
- /*
- * An interrupt or NMI handler might interrupt and change
- * the state in the middle, but that's safe.
- */
- state = &get_cpu_var(slab_rnd_state);
- pos = prandom_u32_state(state) % freelist_count;
- put_cpu_var(slab_rnd_state);
- }
-
- page_limit = slab->objects * s->size;
- start = fixup_red_left(s, slab_address(slab));
-
- /* First entry is used as the base of the freelist */
- cur = next_freelist_entry(s, &pos, start, page_limit, freelist_count);
- cur = setup_object(s, cur);
- slab->freelist = cur;
-
- for (idx = 1; idx < slab->objects; idx++) {
- next = next_freelist_entry(s, &pos, start, page_limit,
- freelist_count);
- next = setup_object(s, next);
- set_freepointer(s, cur, next);
- cur = next;
- }
- set_freepointer(s, cur, NULL);
-
- return true;
-}
#else
static inline int init_cache_random_seq(struct kmem_cache *s)
{
return 0;
}
static inline void init_freelist_randomization(void) { }
-static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
- bool allow_spin)
-{
- return false;
-}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static __always_inline void account_slab(struct slab *slab, int order,
@@ -3438,15 +3376,14 @@ static __always_inline void unaccount_slab(struct slab *slab, int order,
-(PAGE_SIZE << order));
}
+/* Allocate and initialize a slab without building its freelist. */
static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
bool allow_spin = gfpflags_allow_spinning(flags);
struct slab *slab;
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
- void *start, *p, *next;
- int idx;
- bool shuffle;
+ void *start;
flags &= gfp_allowed_mask;
@@ -3497,21 +3434,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
alloc_slab_obj_exts_early(s, slab);
account_slab(slab, oo_order(oo), s, flags);
- shuffle = shuffle_freelist(s, slab, allow_spin);
-
- if (!shuffle) {
- start = fixup_red_left(s, start);
- start = setup_object(s, start);
- slab->freelist = start;
- for (idx = 0, p = start; idx < slab->objects - 1; idx++) {
- next = p + s->size;
- next = setup_object(s, next);
- set_freepointer(s, p, next);
- p = next;
- }
- set_freepointer(s, p, NULL);
- }
-
return slab;
}
@@ -3599,15 +3521,21 @@ static inline void slab_clear_node_partial(struct slab *slab)
/*
* Management of partially allocated slabs.
*/
+static inline void set_node_partial_state(struct kmem_cache_node *n,
+ struct slab *slab)
+{
+ slab_set_node_partial(slab);
+ n->nr_partial++;
+}
+
static inline void
__add_partial(struct kmem_cache_node *n, struct slab *slab, enum add_mode mode)
{
- n->nr_partial++;
if (mode == ADD_TO_TAIL)
list_add_tail(&slab->slab_list, &n->partial);
else
list_add(&slab->slab_list, &n->partial);
- slab_set_node_partial(slab);
+ set_node_partial_state(n, slab);
}
static inline void add_partial(struct kmem_cache_node *n,
@@ -3617,13 +3545,19 @@ static inline void add_partial(struct kmem_cache_node *n,
__add_partial(n, slab, mode);
}
+static inline void clear_node_partial_state(struct kmem_cache_node *n,
+ struct slab *slab)
+{
+ slab_clear_node_partial(slab);
+ n->nr_partial--;
+}
+
static inline void remove_partial(struct kmem_cache_node *n,
struct slab *slab)
{
lockdep_assert_held(&n->list_lock);
list_del(&slab->slab_list);
- slab_clear_node_partial(slab);
- n->nr_partial--;
+ clear_node_partial_state(n, slab);
}
/*
@@ -3665,30 +3599,112 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
return object;
}
+/* Return the next free object in allocation order. */
+static inline void *next_slab_obj(struct kmem_cache *s,
+ struct slab_obj_iter *iter)
+{
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+ if (iter->random) {
+ unsigned long idx;
+
+ /*
+ * If the target page allocation failed, the number of objects on the
+ * page might be smaller than the usual size defined by the cache.
+ */
+ do {
+ idx = s->random_seq[iter->pos];
+ iter->pos++;
+ if (iter->pos >= iter->freelist_count)
+ iter->pos = 0;
+ } while (unlikely(idx >= iter->page_limit));
+
+ return setup_object(s, (char *)iter->start + idx);
+ }
+#endif
+ return setup_object(s, (char *)iter->start + iter->pos++ * s->size);
+}
+
+/* Build a freelist from the objects not yet allocated from a fresh slab. */
+static inline void build_slab_freelist(struct kmem_cache *s, struct slab *slab,
+ struct slab_obj_iter *iter)
+{
+ unsigned int nr = slab->objects - slab->inuse;
+ unsigned int i;
+ void *cur, *next;
+
+ if (!nr) {
+ slab->freelist = NULL;
+ return;
+ }
+
+ cur = next_slab_obj(s, iter);
+ slab->freelist = cur;
+
+ for (i = 1; i < nr; i++) {
+ next = next_slab_obj(s, iter);
+ set_freepointer(s, cur, next);
+ cur = next;
+ }
+
+ set_freepointer(s, cur, NULL);
+}
+
+/* Initialize an iterator over free objects in allocation order. */
+static inline void init_slab_obj_iter(struct kmem_cache *s, struct slab *slab,
+ struct slab_obj_iter *iter,
+ bool allow_spin)
+{
+ iter->pos = 0;
+ iter->start = fixup_red_left(s, slab_address(slab));
+
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+ iter->random = (slab->objects >= 2 && s->random_seq);
+ if (!iter->random)
+ return;
+
+ iter->freelist_count = oo_objects(s->oo);
+ iter->page_limit = slab->objects * s->size;
+
+ if (allow_spin) {
+ iter->pos = get_random_u32_below(iter->freelist_count);
+ } else {
+ struct rnd_state *state;
+
+ /*
+ * An interrupt or NMI handler might interrupt and change
+ * the state in the middle, but that's safe.
+ */
+ state = &get_cpu_var(slab_rnd_state);
+ iter->pos = prandom_u32_state(state) % iter->freelist_count;
+ put_cpu_var(slab_rnd_state);
+ }
+#endif
+}
+
/*
* Called only for kmem_cache_debug() caches to allocate from a freshly
* allocated slab. Allocate a single object instead of whole freelist
* and put the slab to the partial (or full) list.
*/
static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab,
- int orig_size, gfp_t gfpflags)
+ int orig_size, bool allow_spin)
{
- bool allow_spin = gfpflags_allow_spinning(gfpflags);
- int nid = slab_nid(slab);
- struct kmem_cache_node *n = get_node(s, nid);
+ struct kmem_cache_node *n;
+ struct slab_obj_iter iter;
+ bool needs_add_partial;
unsigned long flags;
void *object;
- if (!allow_spin && !spin_trylock_irqsave(&n->list_lock, flags)) {
- /* Unlucky, discard newly allocated slab. */
- free_new_slab_nolock(s, slab);
- return NULL;
- }
-
- object = slab->freelist;
- slab->freelist = get_freepointer(s, object);
+ init_slab_obj_iter(s, slab, &iter, allow_spin);
+ object = next_slab_obj(s, &iter);
slab->inuse = 1;
+ needs_add_partial = (slab->objects > 1);
+ build_slab_freelist(s, slab, &iter);
+
+ /* alloc_debug_processing() always expects a valid freepointer */
+ set_freepointer(s, object, slab->freelist);
+
if (!alloc_debug_processing(s, slab, object, orig_size)) {
/*
* It's not really expected that this would fail on a
@@ -3696,20 +3712,32 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab,
* corruption in theory could cause that.
* Leak memory of allocated slab.
*/
- if (!allow_spin)
- spin_unlock_irqrestore(&n->list_lock, flags);
return NULL;
}
- if (allow_spin)
+ n = get_node(s, slab_nid(slab));
+ if (allow_spin) {
spin_lock_irqsave(&n->list_lock, flags);
+ } else if (!spin_trylock_irqsave(&n->list_lock, flags)) {
+ /*
+ * Unlucky, discard newly allocated slab.
+ * The slab is not fully free, but it's fine as
+ * objects are not allocated to users.
+ */
+ free_new_slab_nolock(s, slab);
+ return NULL;
+ }
- if (slab->inuse == slab->objects)
- add_full(s, n, slab);
- else
+ if (needs_add_partial)
add_partial(n, slab, ADD_TO_HEAD);
+ else
+ add_full(s, n, slab);
- inc_slabs_node(s, nid, slab->objects);
+ /*
+ * Debug caches require nr_slabs updates under n->list_lock so validation
+ * cannot race with slab (de)allocations and observe inconsistent state.
+ */
+ inc_slabs_node(s, slab_nid(slab), slab->objects);
spin_unlock_irqrestore(&n->list_lock, flags);
return object;
@@ -3723,6 +3751,7 @@ static bool get_partial_node_bulk(struct kmem_cache *s,
bool allow_spin)
{
struct slab *slab, *slab2;
+ struct slab *first = NULL, *last = NULL;
unsigned int total_free = 0;
unsigned long flags;
@@ -3741,8 +3770,15 @@ static bool get_partial_node_bulk(struct kmem_cache *s,
struct freelist_counters flc;
unsigned int slab_free;
- if (!pfmemalloc_match(slab, pc->flags))
+ if (!pfmemalloc_match(slab, pc->flags)) {
+ if (first) {
+ list_bulk_move_tail(&pc->slabs,
+ &first->slab_list,
+ &last->slab_list);
+ first = NULL;
+ }
continue;
+ }
/*
* determine the number of free objects in the slab racily
@@ -3759,15 +3795,20 @@ static bool get_partial_node_bulk(struct kmem_cache *s,
&& total_free + slab_free > pc->max_objects)
break;
- remove_partial(n, slab);
-
- list_add(&slab->slab_list, &pc->slabs);
+ if (!first)
+ first = slab;
+ last = slab;
+ clear_node_partial_state(n, slab);
total_free += slab_free;
if (total_free >= pc->max_objects)
break;
}
+ if (first)
+ list_bulk_move_tail(&pc->slabs, &first->slab_list,
+ &last->slab_list);
+
spin_unlock_irqrestore(&n->list_lock, flags);
return total_free > 0;
}
@@ -4311,7 +4352,8 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
* Assumes this is performed only for caches without debugging so we
* don't need to worry about adding the slab to the full list.
*/
-static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *slab)
+static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *slab,
+ unsigned int *count)
{
struct freelist_counters old, new;
@@ -4327,6 +4369,7 @@ static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *sla
} while (!slab_update_freelist(s, slab, &old, &new, "get_freelist_nofreeze"));
+ *count = old.objects - old.inuse;
return old.freelist;
}
@@ -4349,44 +4392,41 @@ static unsigned int alloc_from_new_slab(struct kmem_cache *s, struct slab *slab,
void **p, unsigned int count, bool allow_spin)
{
unsigned int allocated = 0;
- struct kmem_cache_node *n;
- bool needs_add_partial;
+ struct slab_obj_iter iter;
+ bool needs_add_partial = true;
unsigned long flags;
- void *object;
/*
* Are we going to put the slab on the partial list?
* Note slab->inuse is 0 on a new slab.
*/
- needs_add_partial = (slab->objects > count);
-
- if (!allow_spin && needs_add_partial) {
-
- n = get_node(s, slab_nid(slab));
-
- if (!spin_trylock_irqsave(&n->list_lock, flags)) {
- /* Unlucky, discard newly allocated slab */
- free_new_slab_nolock(s, slab);
- return 0;
- }
+ if (count >= slab->objects) {
+ needs_add_partial = false;
+ count = slab->objects;
}
- object = slab->freelist;
- while (object && allocated < count) {
- p[allocated] = object;
- object = get_freepointer(s, object);
- maybe_wipe_obj_freeptr(s, p[allocated]);
+ init_slab_obj_iter(s, slab, &iter, allow_spin);
- slab->inuse++;
+ while (allocated < count) {
+ p[allocated] = next_slab_obj(s, &iter);
allocated++;
}
- slab->freelist = object;
+ slab->inuse = count;
+ build_slab_freelist(s, slab, &iter);
if (needs_add_partial) {
+ struct kmem_cache_node *n = get_node(s, slab_nid(slab));
if (allow_spin) {
- n = get_node(s, slab_nid(slab));
spin_lock_irqsave(&n->list_lock, flags);
+ } else if (!spin_trylock_irqsave(&n->list_lock, flags)) {
+ /*
+ * Unlucky, discard newly allocated slab.
+ * The slab is not fully free, but it's fine as
+ * objects are not allocated to users.
+ */
+ free_new_slab_nolock(s, slab);
+ return 0;
}
add_partial(n, slab, ADD_TO_HEAD);
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -4457,15 +4497,13 @@ new_objects:
stat(s, ALLOC_SLAB);
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
- object = alloc_single_from_new_slab(s, slab, orig_size, gfpflags);
+ object = alloc_single_from_new_slab(s, slab, orig_size, allow_spin);
if (likely(object))
goto success;
} else {
- alloc_from_new_slab(s, slab, &object, 1, allow_spin);
-
/* we don't need to check SLAB_STORE_USER here */
- if (likely(object))
+ if (alloc_from_new_slab(s, slab, &object, 1, allow_spin))
return object;
}
@@ -4981,8 +5019,8 @@ static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s,
return ret;
}
-static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
- size_t size, void **p);
+static bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p);
/*
* returns a sheaf that has at least the requested size
@@ -5002,21 +5040,20 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
if (unlikely(size > s->sheaf_capacity)) {
- sheaf = kzalloc_flex(*sheaf, objects, size, gfp);
+ sheaf = __alloc_empty_sheaf(s, gfp, size);
if (!sheaf)
return NULL;
stat(s, SHEAF_PREFILL_OVERSIZE);
- sheaf->cache = s;
sheaf->capacity = size;
/*
* we do not need to care about pfmemalloc here because oversize
- * sheaves area always flushed and freed when returned
+ * sheaves are always flushed and freed when returned
*/
if (!__kmem_cache_alloc_bulk(s, gfp, size,
&sheaf->objects[0])) {
- kfree(sheaf);
+ free_empty_sheaf(s, sheaf);
return NULL;
}
@@ -5084,7 +5121,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
if (unlikely((sheaf->capacity != s->sheaf_capacity)
|| sheaf->pfmemalloc)) {
sheaf_flush_unused(s, sheaf);
- kfree(sheaf);
+ free_empty_sheaf(s, sheaf);
return;
}
@@ -5154,9 +5191,8 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
return __prefill_sheaf_pfmemalloc(s, sheaf, gfp);
if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size,
- &sheaf->objects[sheaf->size])) {
+ &sheaf->objects[sheaf->size]))
return -ENOMEM;
- }
sheaf->size = sheaf->capacity;
return 0;
@@ -5275,7 +5311,7 @@ EXPORT_SYMBOL(__kmalloc_large_node_noprof);
static __always_inline
void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
- unsigned long caller)
+ unsigned long caller, kmalloc_token_t token)
{
struct kmem_cache *s;
void *ret;
@@ -5290,37 +5326,28 @@ void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
if (unlikely(!size))
return ZERO_SIZE_PTR;
- s = kmalloc_slab(size, b, flags, caller);
+ s = kmalloc_slab(size, b, flags, token);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
-void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
+void *__kmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node)
{
- return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node,
+ _RET_IP_, PASS_TOKEN_PARAM(token));
}
EXPORT_SYMBOL(__kmalloc_node_noprof);
-void *__kmalloc_noprof(size_t size, gfp_t flags)
+void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags)
{
- return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
+ return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_,
+ PASS_TOKEN_PARAM(token));
}
EXPORT_SYMBOL(__kmalloc_noprof);
-/**
- * kmalloc_nolock - Allocate an object of given size from any context.
- * @size: size to allocate
- * @gfp_flags: GFP flags. Only __GFP_ACCOUNT, __GFP_ZERO, __GFP_NO_OBJ_EXT
- * allowed.
- * @node: node number of the target node.
- *
- * Return: pointer to the new object or NULL in case of error.
- * NULL does not mean EBUSY or EAGAIN. It means ENOMEM.
- * There is no reason to call it again and expect !NULL.
- */
-void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
+void *_kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags, int node)
{
gfp_t alloc_gfp = __GFP_NOWARN | __GFP_NOMEMALLOC | gfp_flags;
struct kmem_cache *s;
@@ -5347,7 +5374,7 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
retry:
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
- s = kmalloc_slab(size, NULL, alloc_gfp, _RET_IP_);
+ s = kmalloc_slab(size, NULL, alloc_gfp, PASS_TOKEN_PARAM(token));
if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
/*
@@ -5400,12 +5427,13 @@ success:
ret = kasan_kmalloc(s, ret, size, alloc_gfp);
return ret;
}
-EXPORT_SYMBOL_GPL(kmalloc_nolock_noprof);
+EXPORT_SYMBOL_GPL(_kmalloc_nolock_noprof);
-void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
+void *__kmalloc_node_track_caller_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags,
int node, unsigned long caller)
{
- return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node,
+ caller, PASS_TOKEN_PARAM(token));
}
EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
@@ -5500,6 +5528,34 @@ static noinline void free_to_partial_list(
}
/*
+ * Try returning (remainder of) the freelist that we just detached from the
+ * slab. Optimistically assume the slab is still full, so we don't need to find
+ * the tail of the detached freelist.
+ *
+ * Fail if the slab isn't full anymore due to a concurrent free.
+ */
+static bool __slab_try_return_freelist(struct kmem_cache *s, struct slab *slab,
+ void *head, int cnt)
+{
+ struct freelist_counters old, new;
+
+ old.freelist = slab->freelist;
+ old.counters = slab->counters;
+
+ if (old.freelist)
+ return false;
+
+ new.freelist = head;
+ new.counters = old.counters;
+ new.inuse -= cnt;
+
+ if (!slab_update_freelist(s, slab, &old, &new, "__slab_try_return_freelist"))
+ return false;
+
+ return true;
+}
+
+/*
* Slow path handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
*
@@ -6636,7 +6692,7 @@ void kfree_nolock(const void *object)
EXPORT_SYMBOL_GPL(kfree_nolock);
static __always_inline __realloc_size(2) void *
-__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid)
+__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid, kmalloc_token_t token)
{
void *ret;
size_t ks = 0;
@@ -6708,7 +6764,7 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags,
return (void *)p;
alloc_new:
- ret = kmalloc_node_track_caller_noprof(new_size, flags, nid, _RET_IP_);
+ ret = __kmalloc_node_track_caller_noprof(PASS_KMALLOC_PARAMS(new_size, NULL, token), flags, nid, _RET_IP_);
if (ret && p) {
/* Disable KASAN checks as the object's redzone is accessed. */
kasan_disable_current();
@@ -6719,45 +6775,7 @@ alloc_new:
return ret;
}
-/**
- * krealloc_node_align - reallocate memory. The contents will remain unchanged.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @align: desired alignment.
- * @flags: the type of memory to allocate.
- * @nid: NUMA node or NUMA_NO_NODE
- *
- * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
- * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
- *
- * Only alignments up to those guaranteed by kmalloc() will be honored. Please see
- * Documentation/core-api/memory-allocation.rst for more details.
- *
- * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
- * initial memory allocation, every subsequent call to this API for the same
- * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
- * __GFP_ZERO is not fully honored by this API.
- *
- * When slub_debug_orig_size() is off, krealloc() only knows about the bucket
- * size of an allocation (but not the exact size it was allocated with) and
- * hence implements the following semantics for shrinking and growing buffers
- * with __GFP_ZERO::
- *
- * new bucket
- * 0 size size
- * |--------|----------------|
- * | keep | zero |
- *
- * Otherwise, the original allocation size 'orig_size' could be used to
- * precisely clear the requested size, and the new size will also be stored
- * as the new 'orig_size'.
- *
- * In any case, the contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.
- *
- * Return: pointer to the allocated memory or %NULL in case of error
- */
-void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long align,
+void *krealloc_node_align_noprof(const void *p, DECL_TOKEN_PARAMS(new_size, token), unsigned long align,
gfp_t flags, int nid)
{
void *ret;
@@ -6767,7 +6785,7 @@ void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long a
return ZERO_SIZE_PTR;
}
- ret = __do_krealloc(p, new_size, align, flags, nid);
+ ret = __do_krealloc(p, new_size, align, flags, nid, PASS_TOKEN_PARAM(token));
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
kfree(p);
@@ -6799,28 +6817,7 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
return flags;
}
-/**
- * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
- * failure, fall back to non-contiguous (vmalloc) allocation.
- * @size: size of the request.
- * @b: which set of kmalloc buckets to allocate from.
- * @align: desired alignment.
- * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
- * @node: numa node to allocate from
- *
- * Only alignments up to those guaranteed by kmalloc() will be honored. Please see
- * Documentation/core-api/memory-allocation.rst for more details.
- *
- * Uses kmalloc to get the memory but if the allocation fails then falls back
- * to the vmalloc allocator. Use kvfree for freeing the memory.
- *
- * GFP_NOWAIT and GFP_ATOMIC are supported, the __GFP_NORETRY modifier is not.
- * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
- * preferable to the vmalloc fallback, due to visible performance drawbacks.
- *
- * Return: pointer to the allocated memory of %NULL in case of failure
- */
-void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
+void *__kvmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), unsigned long align,
gfp_t flags, int node)
{
bool allow_block;
@@ -6832,7 +6829,7 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align,
*/
ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
kmalloc_gfp_adjust(flags, size),
- node, _RET_IP_);
+ node, _RET_IP_, PASS_TOKEN_PARAM(token));
if (ret || size <= PAGE_SIZE)
return ret;
@@ -6917,34 +6914,7 @@ void kvfree_sensitive(const void *addr, size_t len)
}
EXPORT_SYMBOL(kvfree_sensitive);
-/**
- * kvrealloc_node_align - reallocate memory; contents remain unchanged
- * @p: object to reallocate memory for
- * @size: the size to reallocate
- * @align: desired alignment
- * @flags: the flags for the page level allocator
- * @nid: NUMA node id
- *
- * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0
- * and @p is not a %NULL pointer, the object pointed to is freed.
- *
- * Only alignments up to those guaranteed by kmalloc() will be honored. Please see
- * Documentation/core-api/memory-allocation.rst for more details.
- *
- * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
- * initial memory allocation, every subsequent call to this API for the same
- * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
- * __GFP_ZERO is not fully honored by this API.
- *
- * In any case, the contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes.
- *
- * This function must not be called concurrently with itself or kvfree() for the
- * same memory allocation.
- *
- * Return: pointer to the allocated memory or %NULL in case of error
- */
-void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align,
+void *kvrealloc_node_align_noprof(const void *p, DECL_TOKEN_PARAMS(size, token), unsigned long align,
gfp_t flags, int nid)
{
void *n;
@@ -6952,10 +6922,10 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig
if (is_vmalloc_addr(p))
return vrealloc_node_align_noprof(p, size, align, flags, nid);
- n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid);
+ n = krealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(size, token), align, kmalloc_gfp_adjust(flags, size), nid);
if (!n) {
/* We failed to krealloc(), fall back to kvmalloc(). */
- n = kvmalloc_node_align_noprof(size, align, flags, nid);
+ n = __kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(size, NULL, token), align, flags, nid);
if (!n)
return NULL;
@@ -7126,60 +7096,56 @@ __refill_objects_node(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int mi
list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
+ unsigned int count;
+
list_del(&slab->slab_list);
- object = get_freelist_nofreeze(s, slab);
+ object = get_freelist_nofreeze(s, slab, &count);
- while (object && refilled < max) {
+ while (count && refilled < max) {
p[refilled] = object;
object = get_freepointer(s, object);
maybe_wipe_obj_freeptr(s, p[refilled]);
refilled++;
+ count--;
}
/*
* Freelist had more objects than we can accommodate, we need to
- * free them back. We can treat it like a detached freelist, just
- * need to find the tail object.
+ * free them back. First we try to be optimistic and assume the
+ * slab is still full since we just detached its freelist.
+ * Otherwise we must find the tail object.
*/
- if (unlikely(object)) {
+ if (unlikely(count)) {
void *head = object;
void *tail;
- int cnt = 0;
+
+ if (__slab_try_return_freelist(s, slab, head, count)) {
+ list_add(&slab->slab_list, &pc.slabs);
+ break;
+ }
do {
tail = object;
- cnt++;
object = get_freepointer(s, object);
} while (object);
- __slab_free(s, slab, head, tail, cnt, _RET_IP_);
+ __slab_free(s, slab, head, tail, count, _RET_IP_);
}
if (refilled >= max)
break;
}
- if (unlikely(!list_empty(&pc.slabs))) {
+ if (!list_empty(&pc.slabs)) {
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
-
- if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial))
- continue;
+ list_for_each_entry(slab, &pc.slabs, slab_list)
+ set_node_partial_state(n, slab);
- list_del(&slab->slab_list);
- add_partial(n, slab, ADD_TO_HEAD);
- }
+ list_splice_tail(&pc.slabs, &n->partial);
spin_unlock_irqrestore(&n->list_lock, flags);
-
- /* any slabs left are completely free and for discard */
- list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
-
- list_del(&slab->slab_list);
- discard_slab(s, slab);
- }
}
return refilled;
@@ -7275,10 +7241,6 @@ new_slab:
stat(s, ALLOC_SLAB);
- /*
- * TODO: possible optimization - if we know we will consume the whole
- * slab we might skip creating the freelist?
- */
refilled += alloc_from_new_slab(s, slab, p + refilled, max - refilled,
/* allow_spin = */ true);
@@ -7289,9 +7251,8 @@ out:
return refilled;
}
-static inline
-int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+static bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p)
{
int i;
@@ -7312,30 +7273,43 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
stat_add(s, ALLOC_SLOWPATH, i);
}
- return i;
+ return true;
error:
__kmem_cache_free_bulk(s, i, p);
- return 0;
-
+ return false;
}
-/*
- * Note that interrupts must be enabled when calling this function and gfp
- * flags must allow spinning.
+/**
+ * kmem_cache_alloc_bulk - Allocate multiple objects
+ * @s: The cache to allocate from
+ * @flags: GFP_* flags. See kmalloc().
+ * @size: Number of objects to allocate
+ * @p: Array of allocated objects
+ *
+ * Allocate @size objects from @s and places them into @p. @size must be larger
+ * than 0.
+ *
+ * Interrupts must be enabled when calling this function and @flags must allow
+ * spinning.
+ *
+ * Unlike alloc_pages_bulk(), this function does not check for already allocated
+ * objects in @p, and thus the caller does not need to zero it.
+ *
+ * Return: %true if the allocation succeeded, or %false if it failed.
*/
-int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+bool kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p)
{
unsigned int i = 0;
void *kfence_obj;
if (!size)
- return 0;
+ return false;
s = slab_pre_alloc_hook(s, flags);
if (unlikely(!s))
- return 0;
+ return false;
/*
* to make things simpler, only assume at most once kfence allocated
@@ -7352,18 +7326,18 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
}
i = alloc_from_pcs_bulk(s, flags, size, p);
-
if (i < size) {
/*
* If we ran out of memory, don't bother with freeing back to
* the percpu sheaves, we have bigger problems.
*/
- if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) {
+ if (unlikely(!__kmem_cache_alloc_bulk(s, flags, size - i,
+ p + i))) {
if (i > 0)
__kmem_cache_free_bulk(s, i, p);
if (kfence_obj)
__kfence_free(kfence_obj);
- return 0;
+ return false;
}
}
@@ -7378,16 +7352,9 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
}
out:
- /*
- * memcg and kmem_cache debug support and memory initialization.
- * Done outside of the IRQ disabled fastpath loop.
- */
- if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
- slab_want_init_on_alloc(flags, s), s->object_size))) {
- return 0;
- }
-
- return size;
+ /* memcg and kmem_cache debug support and memory initialization */
+ return likely(slab_post_alloc_hook(s, NULL, flags, size, p,
+ slab_want_init_on_alloc(flags, s), s->object_size));
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
@@ -7609,6 +7576,7 @@ static void early_kmem_cache_node_alloc(int node)
{
struct slab *slab;
struct kmem_cache_node *n;
+ struct slab_obj_iter iter;
BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
@@ -7620,14 +7588,18 @@ static void early_kmem_cache_node_alloc(int node)
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
}
- n = slab->freelist;
+ init_slab_obj_iter(kmem_cache_node, slab, &iter, true);
+
+ n = next_slab_obj(kmem_cache_node, &iter);
BUG_ON(!n);
+
+ slab->inuse = 1;
+ build_slab_freelist(kmem_cache_node, slab, &iter);
+
#ifdef CONFIG_SLUB_DEBUG
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
#endif
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
- slab->freelist = get_freepointer(kmem_cache_node, n);
- slab->inuse = 1;
kmem_cache_node->per_node[node].node = n;
init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, slab->objects);
@@ -8245,8 +8217,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
if (free == slab->objects) {
list_move(&slab->slab_list, &discard);
- slab_clear_node_partial(slab);
- n->nr_partial--;
+ clear_node_partial_state(n, slab);
dec_slabs_node(s, node, slab->objects);
} else if (free <= SHRINK_PROMOTE_MAX)
list_move(&slab->slab_list, promote + free - 1);
@@ -8470,7 +8441,7 @@ static void __init bootstrap_kmalloc_sheaves(void)
{
enum kmalloc_cache_type type;
- for (type = KMALLOC_NORMAL; type <= KMALLOC_RANDOM_END; type++) {
+ for (type = KMALLOC_NORMAL; type <= KMALLOC_PARTITION_END; type++) {
for (int idx = 0; idx < KMALLOC_SHIFT_HIGH + 1; idx++) {
if (kmalloc_caches[type][idx])
bootstrap_cache_sheaves(kmalloc_caches[type][idx]);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9aea7052ba72..3e9ef4e79c15b 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -243,12 +243,11 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
struct net_device *dev)
{
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
- int i, n;
+ int i;
LIST_HEAD(list);
- n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes,
- (void **)skbs);
- if (unlikely(n == 0)) {
+ if (unlikely(!kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp,
+ nframes, (void **)skbs))) {
for (i = 0; i < nframes; i++)
xdp_return_frame(frames[i]);
return -ENOMEM;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 78e75b1b26967..9e8ac9b934a89 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -288,11 +288,11 @@ static inline struct sk_buff *napi_skb_cache_get(bool alloc)
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!nc->skb_count)) {
- if (alloc)
- nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- GFP_ATOMIC | __GFP_NOWARN,
- NAPI_SKB_CACHE_BULK,
- nc->skb_cache);
+ if (alloc && kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN,
+ NAPI_SKB_CACHE_BULK,
+ nc->skb_cache))
+ nc->skb_count = NAPI_SKB_CACHE_BULK;
if (unlikely(!nc->skb_count)) {
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
return NULL;
@@ -353,16 +353,18 @@ u32 napi_skb_cache_get_bulk(void **skbs, u32 n)
/* No enough cached skbs. Try refilling the cache first */
bulk = min(NAPI_SKB_CACHE_SIZE - nc->skb_count, NAPI_SKB_CACHE_BULK);
- nc->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- GFP_ATOMIC | __GFP_NOWARN, bulk,
- &nc->skb_cache[nc->skb_count]);
+ if (kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN, bulk,
+ &nc->skb_cache[nc->skb_count]))
+ nc->skb_count += bulk;
if (likely(nc->skb_count >= n))
goto get;
/* Still not enough. Bulk-allocate the missing part directly, zeroed */
- n -= kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN,
- n - nc->skb_count, &skbs[nc->skb_count]);
+ if (kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN,
+ n - nc->skb_count, &skbs[nc->skb_count]))
+ n = nc->skb_count;
if (likely(nc->skb_count >= n))
goto get;
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index 6d8e9413d5a4d..2e63c2e726aaf 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -183,7 +183,7 @@ __kmem_cache_create(const char *name, unsigned int size, unsigned int align,
default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list);
-int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
+bool kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **list);
struct slab_sheaf *
kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c
index 54c7265ab52d9..87570c22b151c 100644
--- a/tools/mm/slabinfo.c
+++ b/tools/mm/slabinfo.c
@@ -193,10 +193,9 @@ static unsigned long get_obj_and_str(const char *name, char **x)
*x = NULL;
- if (!read_obj(name)) {
- x = NULL;
+ if (!read_obj(name))
return 0;
- }
+
result = strtoul(buffer, &p, 10);
while (*p == ' ')
p++;
@@ -798,7 +797,7 @@ static void slab_debug(struct slabinfo *s)
fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name);
}
if (!tracing && s->trace)
- set_obj(s, "trace", 1);
+ set_obj(s, "trace", 0);
}
static void totals(void)
@@ -1266,7 +1265,6 @@ static void read_slab_dir(void)
slab->objects_total = get_obj("objects_total");
slab->objs_per_slab = get_obj("objs_per_slab");
slab->order = get_obj("order");
- slab->partial = get_obj("partial");
slab->partial = get_obj_and_str("partial", &t);
decode_numa_list(slab->numa_partial, t);
free(t);
diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c
index 8c72571559583..e0a0693df08f5 100644
--- a/tools/testing/shared/linux.c
+++ b/tools/testing/shared/linux.c
@@ -154,7 +154,7 @@ void kmem_cache_shrink(struct kmem_cache *cachep)
{
}
-int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
+bool kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **p)
{
size_t i;
@@ -213,7 +213,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
pthread_mutex_unlock(&cachep->lock);
if (cachep->callback)
cachep->exec_callback = true;
- return 0;
+ return false;
}
for (i = 0; i < size; i++) {
@@ -224,7 +224,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
printf("Allocating %p from slab\n", p[i]);
}
- return size;
+ return true;
}
struct kmem_cache *
@@ -271,8 +271,8 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
sheaf->cache = s;
sheaf->capacity = capacity;
- sheaf->size = kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects);
- if (!sheaf->size) {
+ sheaf->size = size;
+ if (!kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects)) {
free(sheaf);
return NULL;
}
@@ -284,7 +284,6 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
struct slab_sheaf **sheafp, unsigned int size)
{
struct slab_sheaf *sheaf = *sheafp;
- int refill;
if (sheaf->size >= size)
return 0;
@@ -299,12 +298,10 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
return 0;
}
- refill = kmem_cache_alloc_bulk(s, gfp, size - sheaf->size,
- &sheaf->objects[sheaf->size]);
- if (!refill)
+ if (!kmem_cache_alloc_bulk(s, gfp, size - sheaf->size,
+ &sheaf->objects[sheaf->size]))
return -ENOMEM;
-
- sheaf->size += refill;
+ sheaf->size = size;
return 0;
}