diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-30 00:25:45 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-30 00:25:46 +0100 |
| commit | fe9618ab266d20638357eff97d84540aeb22d69b (patch) | |
| tree | 4139a361d7bb521ee94831414b25a1567e4c11a8 | |
| parent | 99befc896988c8b8b3b948b19c9d1a4e40025c07 (diff) | |
| parent | 1d8f40ed9011a5a660e952235a0e8db991de509a (diff) | |
| download | linux-next-history-fe9618ab266d20638357eff97d84540aeb22d69b.tar.gz | |
Merge branch 'slab/for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
| -rw-r--r-- | MAINTAINERS | 6 | ||||
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_iommu.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_mmu.c | 15 | ||||
| -rw-r--r-- | include/linux/instruction_pointer.h | 24 | ||||
| -rw-r--r-- | include/linux/percpu.h | 2 | ||||
| -rw-r--r-- | include/linux/slab.h | 324 | ||||
| -rw-r--r-- | init/Kconfig | 3 | ||||
| -rw-r--r-- | io_uring/io_uring.c | 23 | ||||
| -rw-r--r-- | kernel/configs/hardening.config | 2 | ||||
| -rw-r--r-- | lib/test_meminit.c | 23 | ||||
| -rw-r--r-- | mm/Kconfig | 73 | ||||
| -rw-r--r-- | mm/kasan/kasan_test_c.c | 5 | ||||
| -rw-r--r-- | mm/kfence/kfence_test.c | 13 | ||||
| -rw-r--r-- | mm/slab.h | 4 | ||||
| -rw-r--r-- | mm/slab_common.c | 52 | ||||
| -rw-r--r-- | mm/slub.c | 655 | ||||
| -rw-r--r-- | net/bpf/test_run.c | 7 | ||||
| -rw-r--r-- | net/core/skbuff.c | 24 | ||||
| -rw-r--r-- | tools/include/linux/slab.h | 2 | ||||
| -rw-r--r-- | tools/mm/slabinfo.c | 8 | ||||
| -rw-r--r-- | tools/testing/shared/linux.c | 19 |
22 files changed, 753 insertions, 542 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 84e58f5b1d38f..d540e38f7b18b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24844,6 +24844,12 @@ F: mm/mempool.c F: mm/slab.h F: mm/slab_common.c F: mm/slub.c +F: scripts/gdb/linux/slab.py +F: tools/cgroup/memcg_slabinfo.py +F: tools/include/linux/slab.h +F: tools/lib/slab.c +F: tools/mm/slabinfo-gnuplot.sh +F: tools/mm/slabinfo.c SLCAN CAN NETWORK DRIVER M: Dario Binacchi <dario.binacchi@amarulasolutions.com> diff --git a/Makefile b/Makefile index 0132fcb03f0f5..d59f703f9797a 100644 --- a/Makefile +++ b/Makefile @@ -988,6 +988,11 @@ KBUILD_CFLAGS += $(CC_AUTO_VAR_INIT_ZERO_ENABLER) endif endif +ifdef CONFIG_KMALLOC_PARTITION_TYPED +# KMALLOC_PARTITION_CACHES_NR + 1 +KBUILD_CFLAGS += -falloc-token-max=16 +endif + ifdef CONFIG_CC_IS_CLANG ifdef CONFIG_CC_HAS_COUNTED_BY_PTR KBUILD_CFLAGS += -fexperimental-late-parse-attributes diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 058c71c82cf54..533104d71f6ce 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -330,17 +330,15 @@ static int msm_iommu_pagetable_prealloc_allocate(struct msm_mmu *mmu, struct msm_mmu_prealloc *p) { struct kmem_cache *pt_cache = get_pt_cache(mmu); - int ret; p->pages = kvmalloc_objs(*p->pages, p->count); if (!p->pages) return -ENOMEM; - ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages); - if (ret != p->count) { + if (!kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages)) { kfree(p->pages); p->pages = NULL; - p->count = ret; + p->count = 0; return -ENOMEM; } diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 9d45008505619..10e32fe26f7e4 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1248,7 +1248,6 @@ static int panthor_vm_op_ctx_prealloc_pts(struct panthor_vm_op_ctx *op_ctx) { u64 size = op_ctx->va.range; u64 va = op_ctx->va.addr; - int ret; /* L1, L2 and L3 page tables. * We could optimize L3 allocation by iterating over the sgt and merging @@ -1264,11 +1263,12 @@ static int panthor_vm_op_ctx_prealloc_pts(struct panthor_vm_op_ctx *op_ctx) if (!op_ctx->rsvd_page_tables.pages) return -ENOMEM; - ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, - op_ctx->rsvd_page_tables.pages); - op_ctx->rsvd_page_tables.count = ret; - if (ret != pt_count) + if (!kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, + op_ctx->rsvd_page_tables.pages)) { + op_ctx->rsvd_page_tables.count = 0; return -ENOMEM; + } + op_ctx->rsvd_page_tables.count = pt_count; return 0; } @@ -1396,9 +1396,8 @@ static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, goto err_cleanup; } - ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, - op_ctx->rsvd_page_tables.pages); - if (ret != pt_count) { + if (!kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, + op_ctx->rsvd_page_tables.pages)) { ret = -ENOMEM; goto err_cleanup; } diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h index aa0b3ffea9353..ea5bc756bd99d 100644 --- a/include/linux/instruction_pointer.h +++ b/include/linux/instruction_pointer.h @@ -8,6 +8,30 @@ #ifndef _THIS_IP_ #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +/* + * The current generic definition of _THIS_IP_ is considered broken by GCC [1] + * and Clang [2]. In particular, the address of a label is only expected to be + * used with a computed goto. + * + * [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120071 + * [2] https://github.com/llvm/llvm-project/issues/138272 + * + * Mark it as broken, so that appropriate fallback options can be implemented + * for architectures that do not define their own _THIS_IP_. + */ +#define HAS_BROKEN_THIS_IP +#endif + +/* + * _CODE_LOCATION_ provides a unique identifier for the current code location. + * When _THIS_IP_ is broken (generic version), we fall back to a static marker + * which guarantees uniqueness and resolves to a constant address at link time, + * avoiding runtime overhead and compiler optimizations breaking it. + */ +#ifdef HAS_BROKEN_THIS_IP +#define _CODE_LOCATION_ ({ static const char __here; (unsigned long)&__here; }) +#else +#define _CODE_LOCATION_ _THIS_IP_ #endif #endif /* _LINUX_INSTRUCTION_POINTER_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 85bf8dd9f0874..bdb721dac0e32 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -36,7 +36,7 @@ #define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ PCPU_MIN_ALLOC_SHIFT) -#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#ifdef CONFIG_KMALLOC_PARTITION_CACHES # if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PAGE_SIZE_4KB) # define PERCPU_DYNAMIC_SIZE_SHIFT 13 # else diff --git a/include/linux/slab.h b/include/linux/slab.h index 2b5ab488e96b0..d4a873a162892 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -499,14 +499,80 @@ int kmem_cache_shrink(struct kmem_cache *s); .usersize = sizeof_field(struct __struct, __field), \ }, (__flags)) +#ifdef CONFIG_KMALLOC_PARTITION_CACHES +typedef struct { unsigned long v; } kmalloc_token_t; +#ifdef CONFIG_KMALLOC_PARTITION_RANDOM +extern unsigned long random_kmalloc_seed; +#define __kmalloc_token(...) ((kmalloc_token_t){ .v = _CODE_LOCATION_ }) +#elif defined(CONFIG_KMALLOC_PARTITION_TYPED) +#define __kmalloc_token(...) ((kmalloc_token_t){ .v = __builtin_infer_alloc_token(__VA_ARGS__) }) +#endif +#define DECL_TOKEN_PARAM(_token) , kmalloc_token_t (_token) +#define _PASS_TOKEN_PARAM(_token) , (_token) +#define PASS_TOKEN_PARAM(_token) (_token) +#define DECL_TOKEN_PARAMS(_size, _token) size_t (_size), kmalloc_token_t (_token) +#define PASS_TOKEN_PARAMS(_size, _token) (_size), (_token) +#else /* !CONFIG_KMALLOC_PARTITION_CACHES */ +typedef struct {} kmalloc_token_t; +#define __kmalloc_token(...) ((kmalloc_token_t){}) /* no-op */ +#define DECL_TOKEN_PARAM(_token) +#define _PASS_TOKEN_PARAM(_token) +#define PASS_TOKEN_PARAM(_token) ((kmalloc_token_t){}) +#define DECL_TOKEN_PARAMS(_size, _token) size_t (_size) +#define PASS_TOKEN_PARAMS(_size, _token) (_size) +#endif /* CONFIG_KMALLOC_PARTITION_CACHES */ + /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size, +void * __must_check krealloc_node_align_noprof(const void *objp, + DECL_TOKEN_PARAMS(new_size, token), unsigned long align, gfp_t flags, int nid) __realloc_size(2); -#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE) -#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__)) +#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, PASS_TOKEN_PARAMS(_s, __kmalloc_token(_s)), 1, _f, NUMA_NO_NODE) +#if 0 /* kernel-doc */ +/** + * krealloc_node_align - reallocate memory. The contents will remain unchanged. + * @p: object to reallocate memory for. + * @new_size: how many bytes of memory are required. + * @align: desired alignment. + * @flags: the type of memory to allocate. + * @nid: NUMA node or NUMA_NO_NODE + * + * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size + * is 0 and @p is not a %NULL pointer, the object pointed to is freed. + * + * Only alignments up to those guaranteed by kmalloc() will be honored. Please see + * Documentation/core-api/memory-allocation.rst for more details. + * + * If __GFP_ZERO logic is requested, callers must ensure that, starting with the + * initial memory allocation, every subsequent call to this API for the same + * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that + * __GFP_ZERO is not fully honored by this API. + * + * When slub_debug_orig_size() is off, krealloc() only knows about the bucket + * size of an allocation (but not the exact size it was allocated with) and + * hence implements the following semantics for shrinking and growing buffers + * with __GFP_ZERO:: + * + * new bucket + * 0 size size + * |--------|----------------| + * | keep | zero | + * + * Otherwise, the original allocation size 'orig_size' could be used to + * precisely clear the requested size, and the new size will also be stored + * as the new 'orig_size'. + * + * In any case, the contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. + * + * Return: pointer to the allocated memory or %NULL in case of error + */ +void *krealloc_node_align(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid); +#endif +#define krealloc_node_align(p, new_size, align, flags, nid) \ + alloc_hooks(krealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(new_size, __kmalloc_token(new_size)), align, flags, nid)) #define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n) #define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE) @@ -612,10 +678,10 @@ static inline unsigned int arch_slab_minalign(void) #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) -#ifdef CONFIG_RANDOM_KMALLOC_CACHES -#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies +#ifdef CONFIG_KMALLOC_PARTITION_CACHES +#define KMALLOC_PARTITION_CACHES_NR 15 // # of cache copies #else -#define RANDOM_KMALLOC_CACHES_NR 0 +#define KMALLOC_PARTITION_CACHES_NR 0 #endif /* @@ -634,8 +700,8 @@ enum kmalloc_cache_type { #ifndef CONFIG_MEMCG KMALLOC_CGROUP = KMALLOC_NORMAL, #endif - KMALLOC_RANDOM_START = KMALLOC_NORMAL, - KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, + KMALLOC_PARTITION_START = KMALLOC_NORMAL, + KMALLOC_PARTITION_END = KMALLOC_PARTITION_START + KMALLOC_PARTITION_CACHES_NR, #ifdef CONFIG_SLUB_TINY KMALLOC_RECLAIM = KMALLOC_NORMAL, #else @@ -662,19 +728,19 @@ extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) -extern unsigned long random_kmalloc_seed; - -static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, kmalloc_token_t token) { /* * The most common case is KMALLOC_NORMAL, so test for it * with a single branch for all the relevant flags. */ if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) -#ifdef CONFIG_RANDOM_KMALLOC_CACHES - /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ - return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, - ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); +#ifdef CONFIG_KMALLOC_PARTITION_RANDOM + /* KMALLOC_PARTITION_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ + return KMALLOC_PARTITION_START + hash_64(token.v ^ random_kmalloc_seed, + ilog2(KMALLOC_PARTITION_CACHES_NR + 1)); +#elif defined(CONFIG_KMALLOC_PARTITION_TYPED) + return KMALLOC_PARTITION_START + token.v; #else return KMALLOC_NORMAL; #endif @@ -815,8 +881,10 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); -int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); -#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) +bool kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, + size_t size, void **p); +#define kmem_cache_alloc_bulk(...) \ + alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) static __always_inline void kfree_bulk(size_t size, void **p) { @@ -858,16 +926,22 @@ unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf); #define PASS_BUCKET_PARAM(_b) NULL #endif +#define DECL_KMALLOC_PARAMS(_size, _b, _token) DECL_BUCKET_PARAMS(_size, _b) \ + DECL_TOKEN_PARAM(_token) + +#define PASS_KMALLOC_PARAMS(_size, _b, _token) PASS_BUCKET_PARAMS(_size, _b) \ + _PASS_TOKEN_PARAM(_token) + /* * The following functions are not to be used directly and are intended only * for internal use from kmalloc() and kmalloc_node() * with the exception of kunit tests */ -void *__kmalloc_noprof(size_t size, gfp_t flags) +void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) @@ -883,6 +957,23 @@ void *__kmalloc_large_noprof(size_t size, gfp_t flags) void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment __alloc_size(1); +static __always_inline __alloc_size(1) void *_kmalloc_noprof(size_t size, gfp_t flags, kmalloc_token_t token) +{ + if (__builtin_constant_p(size) && size) { + unsigned int index; + + if (size > KMALLOC_MAX_CACHE_SIZE) + return __kmalloc_large_noprof(size, flags); + + index = kmalloc_index(size); + return __kmalloc_cache_noprof( + kmalloc_caches[kmalloc_type(flags, token)][index], + flags, size); + } + return __kmalloc_noprof(PASS_TOKEN_PARAMS(size, token), flags); +} +#define kmalloc_noprof(...) _kmalloc_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) +#if 0 /* kernel-doc */ /** * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. @@ -938,25 +1029,27 @@ void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) -{ - if (__builtin_constant_p(size) && size) { - unsigned int index; - - if (size > KMALLOC_MAX_CACHE_SIZE) - return __kmalloc_large_noprof(size, flags); - - index = kmalloc_index(size); - return __kmalloc_cache_noprof( - kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], - flags, size); - } - return __kmalloc_noprof(size, flags); -} -#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) +void *kmalloc(size_t size, gfp_t flags); +#endif +#define kmalloc(size, flags) alloc_hooks(kmalloc_noprof(size, flags)) -void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node); -#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__)) +void *_kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags, int node); +#define kmalloc_nolock_noprof(_s, _f, _n) _kmalloc_nolock_noprof(PASS_TOKEN_PARAMS(_s, __kmalloc_token(_s)), _f, _n) +#if 0 /* kernel-doc */ +/** + * kmalloc_nolock - Allocate an object of given size from any context. + * @size: size to allocate + * @gfp_flags: GFP flags. Only __GFP_ACCOUNT, __GFP_ZERO, __GFP_NO_OBJ_EXT + * allowed. + * @node: node number of the target node. + * + * Return: pointer to the new object or NULL in case of error. + * NULL does not mean EBUSY or EAGAIN. It means ENOMEM. + * There is no reason to call it again and expect !NULL. + */ +void *kmalloc_nolock(size_t size, gfp_t gfp_flags, int node); +#endif +#define kmalloc_nolock(size, gfp_flags, node) alloc_hooks(kmalloc_nolock_noprof(size, gfp_flags, node)) /** * __alloc_objs - Allocate objects of a given type using @@ -1060,12 +1153,12 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node); __alloc_flex(kvzalloc, default_gfp(__VA_ARGS__), typeof(P), FAM, COUNT) #define kmem_buckets_alloc(_b, _size, _flags) \ - alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) + alloc_hooks(__kmalloc_node_noprof(PASS_KMALLOC_PARAMS(_size, _b, __kmalloc_token(_size)), _flags, NUMA_NO_NODE)) #define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ - alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) + alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_KMALLOC_PARAMS(_size, _b, __kmalloc_token(_size)), _flags, NUMA_NO_NODE, _RET_IP_)) -static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *_kmalloc_node_noprof(size_t size, gfp_t flags, int node, kmalloc_token_t token) { if (__builtin_constant_p(size) && size) { unsigned int index; @@ -1075,29 +1168,48 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf index = kmalloc_index(size); return __kmalloc_cache_node_noprof( - kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], + kmalloc_caches[kmalloc_type(flags, token)][index], flags, node, size); } - return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); + return __kmalloc_node_noprof(PASS_KMALLOC_PARAMS(size, NULL, token), flags, node); } +#define kmalloc_node_noprof(...) _kmalloc_node_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) +static inline __alloc_size(1, 2) void *_kmalloc_array_noprof(size_t n, size_t size, gfp_t flags, kmalloc_token_t token) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + return _kmalloc_noprof(bytes, flags, token); +} +#define kmalloc_array_noprof(...) _kmalloc_array_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) +#if 0 /* kernel-doc */ /** * kmalloc_array - allocate memory for an array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) +void *kmalloc_array(size_t n, size_t size, gfp_t flags); +#endif +#define kmalloc_array(n, size, flags) alloc_hooks(kmalloc_array_noprof(n, size, flags)) + +static inline __realloc_size(2, 3) void * __must_check _krealloc_array_noprof(void *p, + size_t new_n, + size_t new_size, + gfp_t flags, kmalloc_token_t token) { size_t bytes; - if (unlikely(check_mul_overflow(n, size, &bytes))) + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) return NULL; - return kmalloc_noprof(bytes, flags); -} -#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) + return krealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(bytes, token), 1, flags, NUMA_NO_NODE); +} +#define krealloc_array_noprof(...) _krealloc_array_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) +#if 0 /* kernel-doc */ /** * krealloc_array - reallocate memory for an array. * @p: pointer to the memory chunk to reallocate @@ -1115,19 +1227,9 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz * In any case, the contents of the object pointed to are preserved up to the * lesser of the new and old sizes. */ -static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, - size_t new_n, - size_t new_size, - gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) - return NULL; - - return krealloc_noprof(p, bytes, flags); -} -#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) +void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags); +#endif +#define krealloc_array(p, new_n, new_size, flags) alloc_hooks(krealloc_array_noprof(p, new_n, new_size, flags)) /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -1137,10 +1239,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi */ #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) -void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, +void *__kmalloc_node_track_caller_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node, unsigned long caller) __alloc_size(1); #define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ - __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) + __kmalloc_node_track_caller_noprof(PASS_KMALLOC_PARAMS(size, NULL, __kmalloc_token(size)), flags, node, caller) #define kmalloc_node_track_caller(...) \ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) @@ -1157,17 +1259,18 @@ void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flag #define kmalloc_track_caller_noprof(...) \ kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) -static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, - int node) +static inline __alloc_size(1, 2) void *_kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, + int node, kmalloc_token_t token) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc_node_noprof(bytes, flags, node); - return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); + return _kmalloc_node_noprof(bytes, flags, node, token); + return __kmalloc_node_noprof(PASS_KMALLOC_PARAMS(bytes, NULL, token), flags, node); } +#define kmalloc_array_node_noprof(...) _kmalloc_array_node_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) #define kcalloc_node(_n, _size, _flags, _node) \ @@ -1178,44 +1281,73 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_ */ #define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) +static inline __alloc_size(1) void *_kzalloc_noprof(size_t size, gfp_t flags, kmalloc_token_t token) +{ + return _kmalloc_noprof(size, flags | __GFP_ZERO, token); +} +#define kzalloc_noprof(...) _kzalloc_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) +#if 0 /* kernel-doc */ /** * kzalloc - allocate memory. The memory is set to zero. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) -{ - return kmalloc_noprof(size, flags | __GFP_ZERO); -} -#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) +void *kzalloc(size_t size, gfp_t flags); +#endif +#define kzalloc(size, flags) alloc_hooks(kzalloc_noprof(size, flags)) #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, +void *__kvmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), unsigned long align, gfp_t flags, int node) __alloc_size(1); #define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \ - __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node) + __kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(_size, NULL, __kmalloc_token(_size)), _align, _flags, _node) #define kvmalloc_node_align(...) \ alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__)) -#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n) +#if 0 /* kernel-doc */ +/** + * kvmalloc_node - attempt to allocate physically contiguous memory, but upon + * failure, fall back to non-contiguous (vmalloc) allocation. + * @size: size of the request. + * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. + * @node: numa node to allocate from + * + * Only alignments up to those guaranteed by kmalloc() will be honored. Please see + * Documentation/core-api/memory-allocation.rst for more details. + * + * Uses kmalloc to get the memory but if the allocation fails then falls back + * to the vmalloc allocator. Use kvfree for freeing the memory. + * + * GFP_NOWAIT and GFP_ATOMIC are supported, the __GFP_NORETRY modifier is not. + * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is + * preferable to the vmalloc fallback, due to visible performance drawbacks. + * + * Return: pointer to the allocated memory of %NULL in case of failure + */ +void *kvmalloc_node(size_t size, gfp_t flags, int node); +#endif +#define kvmalloc_node(size, flags, node) kvmalloc_node_align(size, 1, flags, node) +#define kvmalloc_node_noprof(size, flags, node) \ + kvmalloc_node_align_noprof(size, 1, flags, node) #define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE) +#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) #define kmem_buckets_valloc(_b, _size, _flags) \ - alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE)) + alloc_hooks(__kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(_size, _b, __kmalloc_token(_size)), 1, _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * -kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) +_kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node, kmalloc_token_t token) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - return kvmalloc_node_align_noprof(bytes, 1, flags, node); + return __kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(bytes, NULL, token), 1, flags, node); } - +#define kvmalloc_array_node_noprof(...) _kvmalloc_array_node_noprof(__VA_ARGS__, __kmalloc_token(__VA_ARGS__)) #define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) #define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) #define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) @@ -1224,10 +1356,40 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) -void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, +void *kvrealloc_node_align_noprof(const void *p, DECL_TOKEN_PARAMS(size, token), unsigned long align, gfp_t flags, int nid) __realloc_size(2); -#define kvrealloc_node_align(...) \ - alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__)) +#if 0 /* kernel-doc */ +/** + * kvrealloc_node_align - reallocate memory; contents remain unchanged + * @p: object to reallocate memory for + * @size: the size to reallocate + * @align: desired alignment + * @flags: the flags for the page level allocator + * @nid: NUMA node id + * + * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0 + * and @p is not a %NULL pointer, the object pointed to is freed. + * + * Only alignments up to those guaranteed by kmalloc() will be honored. Please see + * Documentation/core-api/memory-allocation.rst for more details. + * + * If __GFP_ZERO logic is requested, callers must ensure that, starting with the + * initial memory allocation, every subsequent call to this API for the same + * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that + * __GFP_ZERO is not fully honored by this API. + * + * In any case, the contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. + * + * This function must not be called concurrently with itself or kvfree() for the + * same memory allocation. + * + * Return: pointer to the allocated memory or %NULL in case of error + */ +void *kvrealloc_node_align(const void *p, size_t size, unsigned long align, gfp_t flags, int nid); +#endif +#define kvrealloc_node_align(p, size, align, flags, nid) \ + alloc_hooks(kvrealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(size, __kmalloc_token(size)), align, flags, nid)) #define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n) #define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE) diff --git a/init/Kconfig b/init/Kconfig index 596e469d99f82..f5a9dfb1cf320 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -170,6 +170,9 @@ config CC_HAS_BROKEN_COUNTED_BY_REF # https://github.com/llvm/llvm-project/issues/182575 default y if CC_IS_CLANG && CLANG_VERSION < 220100 +config CC_HAS_ALLOC_TOKEN + def_bool $(cc-option,-falloc-token-max=123) + config CC_HAS_MULTIDIMENSIONAL_NONSTRING def_bool $(success,echo 'char tag[][4] __attribute__((__nonstring__)) = { };' | $(CC) $(CLANG_FLAGS) -x c - -c -o /dev/null -Werror) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 753ac23401c56..00b68efedc3ae 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -980,29 +980,24 @@ __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx) { gfp_t gfp = GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO; void *reqs[IO_REQ_ALLOC_BATCH]; - int ret; - - ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs); + int nr_reqs = ARRAY_SIZE(reqs); /* - * Bulk alloc is all-or-nothing. If we fail to get a batch, - * retry single alloc to be on the safe side. + * Bulk alloc is all-or-nothing. If we fail to get a batch, retry a + * single allocation to be on the safe side. */ - if (unlikely(ret <= 0)) { + if (!kmem_cache_alloc_bulk(req_cachep, gfp, nr_reqs, reqs)) { reqs[0] = kmem_cache_alloc(req_cachep, gfp); if (!reqs[0]) return false; - ret = 1; + nr_reqs = 1; } - percpu_ref_get_many(&ctx->refs, ret); - ctx->nr_req_allocated += ret; - - while (ret--) { - struct io_kiocb *req = reqs[ret]; + percpu_ref_get_many(&ctx->refs, nr_reqs); + ctx->nr_req_allocated += nr_reqs; - io_req_add_to_cache(req, ctx); - } + while (nr_reqs--) + io_req_add_to_cache(reqs[nr_reqs], ctx); return true; } diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config index 7c3924614e01d..26831a2a57392 100644 --- a/kernel/configs/hardening.config +++ b/kernel/configs/hardening.config @@ -22,7 +22,7 @@ CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SLAB_BUCKETS=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y -CONFIG_RANDOM_KMALLOC_CACHES=y +CONFIG_KMALLOC_PARTITION_CACHES=y # Sanity check userspace page table mappings. CONFIG_PAGE_TABLE_CHECK=y diff --git a/lib/test_meminit.c b/lib/test_meminit.c index d028a6552cd61..68c3b9da090ef 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -229,16 +229,14 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor, for (iter = 0; iter < 10; iter++) { /* Do a test of bulk allocations */ if (!want_rcu && !want_ctor) { - int ret; - - ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array); - if (!ret) { + if (!kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, + bulk_array)) { fail = true; } else { int i; - for (i = 0; i < ret; i++) + for (i = 0; i < BULK_SIZE; i++) fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero); - kmem_cache_free_bulk(c, ret, bulk_array); + kmem_cache_free_bulk(c, BULK_SIZE, bulk_array); } } @@ -348,23 +346,24 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures) { struct kmem_cache *c; int i, iter, maxiter = 1024; - int num, bytes; + int bytes; bool fail = false; void *objects[10]; c = kmem_cache_create("test_cache", size, size, 0, NULL); for (iter = 0; (iter < maxiter) && !fail; iter++) { - num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects), - objects); - for (i = 0; i < num; i++) { + if (!kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects), + objects)) + continue; + + for (i = 0; i < ARRAY_SIZE(objects); i++) { bytes = count_nonzero_bytes(objects[i], size); if (bytes) fail = true; fill_with_garbage(objects[i], size); } - if (num) - kmem_cache_free_bulk(c, num, objects); + kmem_cache_free_bulk(c, ARRAY_SIZE(objects), objects); } kmem_cache_destroy(c); *total_failures += fail; diff --git a/mm/Kconfig b/mm/Kconfig index e649a950be93f..9e0ca48249054 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -248,22 +248,75 @@ config SLUB_STATS out which slabs are relevant to a particular load. Try running: slabinfo -DA -config RANDOM_KMALLOC_CACHES - default n +config KMALLOC_PARTITION_CACHES depends on !SLUB_TINY - bool "Randomize slab caches for normal kmalloc" + bool "Partitioned slab caches for normal kmalloc" + default RANDOM_KMALLOC_CACHES help - A hardening feature that creates multiple copies of slab caches for - normal kmalloc allocation and makes kmalloc randomly pick one based - on code address, which makes the attackers more difficult to spray - vulnerable memory objects on the heap for the purpose of exploiting - memory vulnerabilities. + A hardening feature that creates multiple isolated copies of slab + caches for normal kmalloc allocations. This makes it more difficult + to exploit memory-safety vulnerabilities by attacking vulnerable + co-located memory objects. Several modes are provided. Currently the number of copies is set to 16, a reasonably large value that effectively diverges the memory objects allocated for different subsystems or modules into different caches, at the expense of a - limited degree of memory and CPU overhead that relates to hardware and - system workload. + limited degree of memory and CPU overhead that relates to hardware + and system workload. + +choice + prompt "Partitioned slab cache mode" + depends on KMALLOC_PARTITION_CACHES + default KMALLOC_PARTITION_TYPED if CC_HAS_ALLOC_TOKEN + default KMALLOC_PARTITION_RANDOM + help + Selects the slab cache partitioning mode. + +config KMALLOC_PARTITION_RANDOM + bool "Randomize slab caches for normal kmalloc" + help + Randomly pick a slab cache based on code address and a per-boot + random seed. + + This makes it harder for attackers to predict object co-location. + The placement is random: while attackers don't know which kmalloc + cache an object will be allocated from, they might circumvent + the randomization by retrying attacks across multiple machines until + the target objects are co-located. + +config KMALLOC_PARTITION_TYPED + bool "Type based slab cache selection for normal kmalloc" + depends on CC_HAS_ALLOC_TOKEN + help + Rely on Clang's allocation tokens to choose a slab cache, where token + IDs are derived from the allocated type. + + Unlike KMALLOC_PARTITION_RANDOM, cache assignment is deterministic based + on type, which guarantees that objects of certain types are not + placed in the same cache. This effectively mitigates certain classes + of exploits that probabilistic defenses like KMALLOC_PARTITION_RANDOM + only make harder but not impossible. However, this also means the + cache assignment is predictable. + + Clang's default token ID calculation returns a bounded hash with + disjoint ranges for pointer-containing and pointerless objects: when + used as the slab cache index, this prevents buffer overflows on + primitive buffers from directly corrupting pointer-containing + objects. + + The current effectiveness of Clang's type inference can be judged by + -Rpass=alloc-token, which provides diagnostics where (after dead-code + elimination) type inference failed. + + Requires Clang 22 or later. + +endchoice + +config RANDOM_KMALLOC_CACHES + bool + transitional + help + Transitional config for migration to KMALLOC_PARTITION_CACHES. endmenu # Slab allocator options diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 3f4ed29178b3c..b9e167ed5be32 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -1225,14 +1225,13 @@ static void kmem_cache_bulk(struct kunit *test) struct kmem_cache *cache; size_t size = 200; char *p[10]; - bool ret; int i; cache = kmem_cache_create("test_cache", size, 0, 0, NULL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); - ret = kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p), (void **)&p); - if (!ret) { + if (!kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p), + (void **)&p)) { kunit_err(test, "Allocation failed: %s\n", __func__); kmem_cache_destroy(cache); return; diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 10424cd25e5a6..de2d0f7d62b15 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -214,7 +214,7 @@ static void test_cache_destroy(void) static inline size_t kmalloc_cache_alignment(size_t size) { /* just to get ->align so no need to pass in the real caller */ - enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0); + enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, __kmalloc_token(0)); return kmalloc_caches[type][__kmalloc_index(size, false)]->align; } @@ -285,7 +285,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat if (is_kfence_address(alloc)) { struct slab *slab = virt_to_slab(alloc); - enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_); + enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, __kmalloc_token(size)); struct kmem_cache *s = test_cache ?: kmalloc_caches[type][__kmalloc_index(size, false)]; @@ -761,9 +761,10 @@ static void test_memcache_alloc_bulk(struct kunit *test) timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval); do { void *objects[100]; - int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects), - objects); - if (!num) + int i; + + if (!kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, + ARRAY_SIZE(objects), objects)) continue; for (i = 0; i < ARRAY_SIZE(objects); i++) { if (is_kfence_address(objects[i])) { @@ -771,7 +772,7 @@ static void test_memcache_alloc_bulk(struct kunit *test) break; } } - kmem_cache_free_bulk(test_cache, num, objects); + kmem_cache_free_bulk(test_cache, ARRAY_SIZE(objects), objects); /* * kmem_cache_alloc_bulk() disables interrupts, and calling it * in a tight loop may not give KFENCE a chance to switch the diff --git a/mm/slab.h b/mm/slab.h index bf2f87acf5e3a..1bf9c3021ae3d 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -362,12 +362,12 @@ static inline unsigned int size_index_elem(unsigned int bytes) * KMALLOC_MAX_CACHE_SIZE and the caller must check that. */ static inline struct kmem_cache * -kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) +kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, kmalloc_token_t token) { unsigned int index; if (!b) - b = &kmalloc_caches[kmalloc_type(flags, caller)]; + b = &kmalloc_caches[kmalloc_type(flags, token)]; if (size <= 192) index = kmalloc_size_index[size_index_elem(size)]; else diff --git a/mm/slab_common.c b/mm/slab_common.c index 8b661fff5eedb..b6426d7ceec92 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -742,7 +742,7 @@ kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES] __ro_after_init = { /* initialization for https://llvm.org/pr42570 */ }; EXPORT_SYMBOL(kmalloc_caches); -#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#ifdef CONFIG_KMALLOC_PARTITION_RANDOM unsigned long random_kmalloc_seed __ro_after_init; EXPORT_SYMBOL(random_kmalloc_seed); #endif @@ -787,7 +787,7 @@ size_t kmalloc_size_roundup(size_t size) * The flags don't matter since size_index is common to all. * Neither does the caller for just getting ->object_size. */ - return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size; + return kmalloc_slab(size, NULL, GFP_KERNEL, __kmalloc_token(0))->object_size; } /* Above the smaller buckets, size is a multiple of page size. */ @@ -821,26 +821,26 @@ EXPORT_SYMBOL(kmalloc_size_roundup); #define KMALLOC_RCL_NAME(sz) #endif -#ifdef CONFIG_RANDOM_KMALLOC_CACHES -#define __KMALLOC_RANDOM_CONCAT(a, b) a ## b -#define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz) -#define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz, -#define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz, -#define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz, -#define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz, -#define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz, -#define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz, -#define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz, -#define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz, -#define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz, -#define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz, -#define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz, -#define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz, -#define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz, -#define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz, -#define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz, -#else // CONFIG_RANDOM_KMALLOC_CACHES -#define KMALLOC_RANDOM_NAME(N, sz) +#ifdef CONFIG_KMALLOC_PARTITION_CACHES +#define __KMALLOC_PARTITION_CONCAT(a, b) a ## b +#define KMALLOC_PARTITION_NAME(N, sz) __KMALLOC_PARTITION_CONCAT(KMA_PART_, N)(sz) +#define KMA_PART_1(sz) .name[KMALLOC_PARTITION_START + 1] = "kmalloc-part-01-" #sz, +#define KMA_PART_2(sz) KMA_PART_1(sz) .name[KMALLOC_PARTITION_START + 2] = "kmalloc-part-02-" #sz, +#define KMA_PART_3(sz) KMA_PART_2(sz) .name[KMALLOC_PARTITION_START + 3] = "kmalloc-part-03-" #sz, +#define KMA_PART_4(sz) KMA_PART_3(sz) .name[KMALLOC_PARTITION_START + 4] = "kmalloc-part-04-" #sz, +#define KMA_PART_5(sz) KMA_PART_4(sz) .name[KMALLOC_PARTITION_START + 5] = "kmalloc-part-05-" #sz, +#define KMA_PART_6(sz) KMA_PART_5(sz) .name[KMALLOC_PARTITION_START + 6] = "kmalloc-part-06-" #sz, +#define KMA_PART_7(sz) KMA_PART_6(sz) .name[KMALLOC_PARTITION_START + 7] = "kmalloc-part-07-" #sz, +#define KMA_PART_8(sz) KMA_PART_7(sz) .name[KMALLOC_PARTITION_START + 8] = "kmalloc-part-08-" #sz, +#define KMA_PART_9(sz) KMA_PART_8(sz) .name[KMALLOC_PARTITION_START + 9] = "kmalloc-part-09-" #sz, +#define KMA_PART_10(sz) KMA_PART_9(sz) .name[KMALLOC_PARTITION_START + 10] = "kmalloc-part-10-" #sz, +#define KMA_PART_11(sz) KMA_PART_10(sz) .name[KMALLOC_PARTITION_START + 11] = "kmalloc-part-11-" #sz, +#define KMA_PART_12(sz) KMA_PART_11(sz) .name[KMALLOC_PARTITION_START + 12] = "kmalloc-part-12-" #sz, +#define KMA_PART_13(sz) KMA_PART_12(sz) .name[KMALLOC_PARTITION_START + 13] = "kmalloc-part-13-" #sz, +#define KMA_PART_14(sz) KMA_PART_13(sz) .name[KMALLOC_PARTITION_START + 14] = "kmalloc-part-14-" #sz, +#define KMA_PART_15(sz) KMA_PART_14(sz) .name[KMALLOC_PARTITION_START + 15] = "kmalloc-part-15-" #sz, +#else // CONFIG_KMALLOC_PARTITION_CACHES +#define KMALLOC_PARTITION_NAME(N, sz) #endif #define INIT_KMALLOC_INFO(__size, __short_size) \ @@ -849,7 +849,7 @@ EXPORT_SYMBOL(kmalloc_size_roundup); KMALLOC_RCL_NAME(__short_size) \ KMALLOC_CGROUP_NAME(__short_size) \ KMALLOC_DMA_NAME(__short_size) \ - KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \ + KMALLOC_PARTITION_NAME(KMALLOC_PARTITION_CACHES_NR, __short_size) \ .size = __size, \ } @@ -961,8 +961,8 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type) flags |= SLAB_CACHE_DMA; } -#ifdef CONFIG_RANDOM_KMALLOC_CACHES - if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END) +#ifdef CONFIG_KMALLOC_PARTITION_CACHES + if (type >= KMALLOC_PARTITION_START && type <= KMALLOC_PARTITION_END) flags |= SLAB_NO_MERGE; #endif @@ -1010,7 +1010,7 @@ void __init create_kmalloc_caches(void) for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) new_kmalloc_cache(i, type); } -#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#ifdef CONFIG_KMALLOC_PARTITION_RANDOM random_kmalloc_seed = get_random_u64(); #endif diff --git a/mm/slub.c b/mm/slub.c index a2bf3756ca7d0..67abbbf68fc10 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -227,6 +227,17 @@ struct partial_bulk_context { struct list_head slabs; }; +/* Structure used to iterate over objects within a slab */ +struct slab_obj_iter { + unsigned long pos; + void *start; +#ifdef CONFIG_SLAB_FREELIST_RANDOM + unsigned long freelist_count; + unsigned long page_limit; + bool random; +#endif +}; + static inline bool kmem_cache_debug(struct kmem_cache *s) { return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); @@ -351,8 +362,8 @@ enum stat_item { CMPXCHG_DOUBLE_FAIL, /* Failures of slab freelist update */ SHEAF_FLUSH, /* Objects flushed from a sheaf */ SHEAF_REFILL, /* Objects refilled to a sheaf */ - SHEAF_ALLOC, /* Allocation of an empty sheaf */ - SHEAF_FREE, /* Freeing of an empty sheaf */ + SHEAF_ALLOC, /* Allocation of an empty sheaf including oversized ones */ + SHEAF_FREE, /* Freeing of an empty sheaf including oversized ones */ BARN_GET, /* Got full sheaf from barn */ BARN_GET_FAIL, /* Failed to get full sheaf from barn */ BARN_PUT, /* Put full sheaf to barn */ @@ -2129,11 +2140,11 @@ static inline size_t obj_exts_alloc_size(struct kmem_cache *s, if (!is_kmalloc_normal(s)) return sz; - obj_exts_cache = kmalloc_slab(sz, NULL, gfp, 0); + obj_exts_cache = kmalloc_slab(sz, NULL, gfp, __kmalloc_token(0)); /* - * We can't simply compare s with obj_exts_cache, because random kmalloc - * caches have multiple caches per size, selected by caller address. - * Since caller address may differ between kmalloc_slab() and actual + * We can't simply compare s with obj_exts_cache, because partitioned kmalloc + * caches have multiple caches per size, selected by caller address or type. + * Since caller address or type may differ between kmalloc_slab() and actual * allocation, bump size when sizes are equal. */ if (s->object_size == obj_exts_cache->object_size) @@ -2733,7 +2744,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, return *head != NULL; } -static void *setup_object(struct kmem_cache *s, void *object) +static inline void *setup_object(struct kmem_cache *s, void *object) { setup_object_debug(s, object); object = kasan_init_slab_obj(s, object); @@ -2751,11 +2762,6 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp, struct slab_sheaf *sheaf; size_t sheaf_size; - if (gfp & __GFP_NO_OBJ_EXT) - return NULL; - - gfp &= ~OBJCGS_CLEAR_MASK; - /* * Prevent recursion to the same cache, or a deep stack of kmallocs of * varying sizes (sheaf capacity might differ for each kmalloc size @@ -2780,6 +2786,11 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp, static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp) { + if (gfp & __GFP_NO_OBJ_EXT) + return NULL; + + gfp &= ~OBJCGS_CLEAR_MASK; + return __alloc_empty_sheaf(s, gfp, s->sheaf_capacity); } @@ -3329,87 +3340,14 @@ static void __init init_freelist_randomization(void) mutex_unlock(&slab_mutex); } -/* Get the next entry on the pre-computed freelist randomized */ -static void *next_freelist_entry(struct kmem_cache *s, - unsigned long *pos, void *start, - unsigned long page_limit, - unsigned long freelist_count) -{ - unsigned int idx; - - /* - * If the target page allocation failed, the number of objects on the - * page might be smaller than the usual size defined by the cache. - */ - do { - idx = s->random_seq[*pos]; - *pos += 1; - if (*pos >= freelist_count) - *pos = 0; - } while (unlikely(idx >= page_limit)); - - return (char *)start + idx; -} - static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state); -/* Shuffle the single linked freelist based on a random pre-computed sequence */ -static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, - bool allow_spin) -{ - void *start; - void *cur; - void *next; - unsigned long idx, pos, page_limit, freelist_count; - - if (slab->objects < 2 || !s->random_seq) - return false; - - freelist_count = oo_objects(s->oo); - if (allow_spin) { - pos = get_random_u32_below(freelist_count); - } else { - struct rnd_state *state; - - /* - * An interrupt or NMI handler might interrupt and change - * the state in the middle, but that's safe. - */ - state = &get_cpu_var(slab_rnd_state); - pos = prandom_u32_state(state) % freelist_count; - put_cpu_var(slab_rnd_state); - } - - page_limit = slab->objects * s->size; - start = fixup_red_left(s, slab_address(slab)); - - /* First entry is used as the base of the freelist */ - cur = next_freelist_entry(s, &pos, start, page_limit, freelist_count); - cur = setup_object(s, cur); - slab->freelist = cur; - - for (idx = 1; idx < slab->objects; idx++) { - next = next_freelist_entry(s, &pos, start, page_limit, - freelist_count); - next = setup_object(s, next); - set_freepointer(s, cur, next); - cur = next; - } - set_freepointer(s, cur, NULL); - - return true; -} #else static inline int init_cache_random_seq(struct kmem_cache *s) { return 0; } static inline void init_freelist_randomization(void) { } -static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, - bool allow_spin) -{ - return false; -} #endif /* CONFIG_SLAB_FREELIST_RANDOM */ static __always_inline void account_slab(struct slab *slab, int order, @@ -3438,15 +3376,14 @@ static __always_inline void unaccount_slab(struct slab *slab, int order, -(PAGE_SIZE << order)); } +/* Allocate and initialize a slab without building its freelist. */ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { bool allow_spin = gfpflags_allow_spinning(flags); struct slab *slab; struct kmem_cache_order_objects oo = s->oo; gfp_t alloc_gfp; - void *start, *p, *next; - int idx; - bool shuffle; + void *start; flags &= gfp_allowed_mask; @@ -3497,21 +3434,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) alloc_slab_obj_exts_early(s, slab); account_slab(slab, oo_order(oo), s, flags); - shuffle = shuffle_freelist(s, slab, allow_spin); - - if (!shuffle) { - start = fixup_red_left(s, start); - start = setup_object(s, start); - slab->freelist = start; - for (idx = 0, p = start; idx < slab->objects - 1; idx++) { - next = p + s->size; - next = setup_object(s, next); - set_freepointer(s, p, next); - p = next; - } - set_freepointer(s, p, NULL); - } - return slab; } @@ -3599,15 +3521,21 @@ static inline void slab_clear_node_partial(struct slab *slab) /* * Management of partially allocated slabs. */ +static inline void set_node_partial_state(struct kmem_cache_node *n, + struct slab *slab) +{ + slab_set_node_partial(slab); + n->nr_partial++; +} + static inline void __add_partial(struct kmem_cache_node *n, struct slab *slab, enum add_mode mode) { - n->nr_partial++; if (mode == ADD_TO_TAIL) list_add_tail(&slab->slab_list, &n->partial); else list_add(&slab->slab_list, &n->partial); - slab_set_node_partial(slab); + set_node_partial_state(n, slab); } static inline void add_partial(struct kmem_cache_node *n, @@ -3617,13 +3545,19 @@ static inline void add_partial(struct kmem_cache_node *n, __add_partial(n, slab, mode); } +static inline void clear_node_partial_state(struct kmem_cache_node *n, + struct slab *slab) +{ + slab_clear_node_partial(slab); + n->nr_partial--; +} + static inline void remove_partial(struct kmem_cache_node *n, struct slab *slab) { lockdep_assert_held(&n->list_lock); list_del(&slab->slab_list); - slab_clear_node_partial(slab); - n->nr_partial--; + clear_node_partial_state(n, slab); } /* @@ -3665,30 +3599,112 @@ static void *alloc_single_from_partial(struct kmem_cache *s, return object; } +/* Return the next free object in allocation order. */ +static inline void *next_slab_obj(struct kmem_cache *s, + struct slab_obj_iter *iter) +{ +#ifdef CONFIG_SLAB_FREELIST_RANDOM + if (iter->random) { + unsigned long idx; + + /* + * If the target page allocation failed, the number of objects on the + * page might be smaller than the usual size defined by the cache. + */ + do { + idx = s->random_seq[iter->pos]; + iter->pos++; + if (iter->pos >= iter->freelist_count) + iter->pos = 0; + } while (unlikely(idx >= iter->page_limit)); + + return setup_object(s, (char *)iter->start + idx); + } +#endif + return setup_object(s, (char *)iter->start + iter->pos++ * s->size); +} + +/* Build a freelist from the objects not yet allocated from a fresh slab. */ +static inline void build_slab_freelist(struct kmem_cache *s, struct slab *slab, + struct slab_obj_iter *iter) +{ + unsigned int nr = slab->objects - slab->inuse; + unsigned int i; + void *cur, *next; + + if (!nr) { + slab->freelist = NULL; + return; + } + + cur = next_slab_obj(s, iter); + slab->freelist = cur; + + for (i = 1; i < nr; i++) { + next = next_slab_obj(s, iter); + set_freepointer(s, cur, next); + cur = next; + } + + set_freepointer(s, cur, NULL); +} + +/* Initialize an iterator over free objects in allocation order. */ +static inline void init_slab_obj_iter(struct kmem_cache *s, struct slab *slab, + struct slab_obj_iter *iter, + bool allow_spin) +{ + iter->pos = 0; + iter->start = fixup_red_left(s, slab_address(slab)); + +#ifdef CONFIG_SLAB_FREELIST_RANDOM + iter->random = (slab->objects >= 2 && s->random_seq); + if (!iter->random) + return; + + iter->freelist_count = oo_objects(s->oo); + iter->page_limit = slab->objects * s->size; + + if (allow_spin) { + iter->pos = get_random_u32_below(iter->freelist_count); + } else { + struct rnd_state *state; + + /* + * An interrupt or NMI handler might interrupt and change + * the state in the middle, but that's safe. + */ + state = &get_cpu_var(slab_rnd_state); + iter->pos = prandom_u32_state(state) % iter->freelist_count; + put_cpu_var(slab_rnd_state); + } +#endif +} + /* * Called only for kmem_cache_debug() caches to allocate from a freshly * allocated slab. Allocate a single object instead of whole freelist * and put the slab to the partial (or full) list. */ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab, - int orig_size, gfp_t gfpflags) + int orig_size, bool allow_spin) { - bool allow_spin = gfpflags_allow_spinning(gfpflags); - int nid = slab_nid(slab); - struct kmem_cache_node *n = get_node(s, nid); + struct kmem_cache_node *n; + struct slab_obj_iter iter; + bool needs_add_partial; unsigned long flags; void *object; - if (!allow_spin && !spin_trylock_irqsave(&n->list_lock, flags)) { - /* Unlucky, discard newly allocated slab. */ - free_new_slab_nolock(s, slab); - return NULL; - } - - object = slab->freelist; - slab->freelist = get_freepointer(s, object); + init_slab_obj_iter(s, slab, &iter, allow_spin); + object = next_slab_obj(s, &iter); slab->inuse = 1; + needs_add_partial = (slab->objects > 1); + build_slab_freelist(s, slab, &iter); + + /* alloc_debug_processing() always expects a valid freepointer */ + set_freepointer(s, object, slab->freelist); + if (!alloc_debug_processing(s, slab, object, orig_size)) { /* * It's not really expected that this would fail on a @@ -3696,20 +3712,32 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab, * corruption in theory could cause that. * Leak memory of allocated slab. */ - if (!allow_spin) - spin_unlock_irqrestore(&n->list_lock, flags); return NULL; } - if (allow_spin) + n = get_node(s, slab_nid(slab)); + if (allow_spin) { spin_lock_irqsave(&n->list_lock, flags); + } else if (!spin_trylock_irqsave(&n->list_lock, flags)) { + /* + * Unlucky, discard newly allocated slab. + * The slab is not fully free, but it's fine as + * objects are not allocated to users. + */ + free_new_slab_nolock(s, slab); + return NULL; + } - if (slab->inuse == slab->objects) - add_full(s, n, slab); - else + if (needs_add_partial) add_partial(n, slab, ADD_TO_HEAD); + else + add_full(s, n, slab); - inc_slabs_node(s, nid, slab->objects); + /* + * Debug caches require nr_slabs updates under n->list_lock so validation + * cannot race with slab (de)allocations and observe inconsistent state. + */ + inc_slabs_node(s, slab_nid(slab), slab->objects); spin_unlock_irqrestore(&n->list_lock, flags); return object; @@ -3723,6 +3751,7 @@ static bool get_partial_node_bulk(struct kmem_cache *s, bool allow_spin) { struct slab *slab, *slab2; + struct slab *first = NULL, *last = NULL; unsigned int total_free = 0; unsigned long flags; @@ -3741,8 +3770,15 @@ static bool get_partial_node_bulk(struct kmem_cache *s, struct freelist_counters flc; unsigned int slab_free; - if (!pfmemalloc_match(slab, pc->flags)) + if (!pfmemalloc_match(slab, pc->flags)) { + if (first) { + list_bulk_move_tail(&pc->slabs, + &first->slab_list, + &last->slab_list); + first = NULL; + } continue; + } /* * determine the number of free objects in the slab racily @@ -3759,15 +3795,20 @@ static bool get_partial_node_bulk(struct kmem_cache *s, && total_free + slab_free > pc->max_objects) break; - remove_partial(n, slab); - - list_add(&slab->slab_list, &pc->slabs); + if (!first) + first = slab; + last = slab; + clear_node_partial_state(n, slab); total_free += slab_free; if (total_free >= pc->max_objects) break; } + if (first) + list_bulk_move_tail(&pc->slabs, &first->slab_list, + &last->slab_list); + spin_unlock_irqrestore(&n->list_lock, flags); return total_free > 0; } @@ -4311,7 +4352,8 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) * Assumes this is performed only for caches without debugging so we * don't need to worry about adding the slab to the full list. */ -static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *slab) +static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *slab, + unsigned int *count) { struct freelist_counters old, new; @@ -4327,6 +4369,7 @@ static inline void *get_freelist_nofreeze(struct kmem_cache *s, struct slab *sla } while (!slab_update_freelist(s, slab, &old, &new, "get_freelist_nofreeze")); + *count = old.objects - old.inuse; return old.freelist; } @@ -4349,44 +4392,41 @@ static unsigned int alloc_from_new_slab(struct kmem_cache *s, struct slab *slab, void **p, unsigned int count, bool allow_spin) { unsigned int allocated = 0; - struct kmem_cache_node *n; - bool needs_add_partial; + struct slab_obj_iter iter; + bool needs_add_partial = true; unsigned long flags; - void *object; /* * Are we going to put the slab on the partial list? * Note slab->inuse is 0 on a new slab. */ - needs_add_partial = (slab->objects > count); - - if (!allow_spin && needs_add_partial) { - - n = get_node(s, slab_nid(slab)); - - if (!spin_trylock_irqsave(&n->list_lock, flags)) { - /* Unlucky, discard newly allocated slab */ - free_new_slab_nolock(s, slab); - return 0; - } + if (count >= slab->objects) { + needs_add_partial = false; + count = slab->objects; } - object = slab->freelist; - while (object && allocated < count) { - p[allocated] = object; - object = get_freepointer(s, object); - maybe_wipe_obj_freeptr(s, p[allocated]); + init_slab_obj_iter(s, slab, &iter, allow_spin); - slab->inuse++; + while (allocated < count) { + p[allocated] = next_slab_obj(s, &iter); allocated++; } - slab->freelist = object; + slab->inuse = count; + build_slab_freelist(s, slab, &iter); if (needs_add_partial) { + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); if (allow_spin) { - n = get_node(s, slab_nid(slab)); spin_lock_irqsave(&n->list_lock, flags); + } else if (!spin_trylock_irqsave(&n->list_lock, flags)) { + /* + * Unlucky, discard newly allocated slab. + * The slab is not fully free, but it's fine as + * objects are not allocated to users. + */ + free_new_slab_nolock(s, slab); + return 0; } add_partial(n, slab, ADD_TO_HEAD); spin_unlock_irqrestore(&n->list_lock, flags); @@ -4457,15 +4497,13 @@ new_objects: stat(s, ALLOC_SLAB); if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { - object = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); + object = alloc_single_from_new_slab(s, slab, orig_size, allow_spin); if (likely(object)) goto success; } else { - alloc_from_new_slab(s, slab, &object, 1, allow_spin); - /* we don't need to check SLAB_STORE_USER here */ - if (likely(object)) + if (alloc_from_new_slab(s, slab, &object, 1, allow_spin)) return object; } @@ -4981,8 +5019,8 @@ static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s, return ret; } -static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, - size_t size, void **p); +static bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p); /* * returns a sheaf that has at least the requested size @@ -5002,21 +5040,20 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) if (unlikely(size > s->sheaf_capacity)) { - sheaf = kzalloc_flex(*sheaf, objects, size, gfp); + sheaf = __alloc_empty_sheaf(s, gfp, size); if (!sheaf) return NULL; stat(s, SHEAF_PREFILL_OVERSIZE); - sheaf->cache = s; sheaf->capacity = size; /* * we do not need to care about pfmemalloc here because oversize - * sheaves area always flushed and freed when returned + * sheaves are always flushed and freed when returned */ if (!__kmem_cache_alloc_bulk(s, gfp, size, &sheaf->objects[0])) { - kfree(sheaf); + free_empty_sheaf(s, sheaf); return NULL; } @@ -5084,7 +5121,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, if (unlikely((sheaf->capacity != s->sheaf_capacity) || sheaf->pfmemalloc)) { sheaf_flush_unused(s, sheaf); - kfree(sheaf); + free_empty_sheaf(s, sheaf); return; } @@ -5154,9 +5191,8 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, return __prefill_sheaf_pfmemalloc(s, sheaf, gfp); if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size, - &sheaf->objects[sheaf->size])) { + &sheaf->objects[sheaf->size])) return -ENOMEM; - } sheaf->size = sheaf->capacity; return 0; @@ -5275,7 +5311,7 @@ EXPORT_SYMBOL(__kmalloc_large_node_noprof); static __always_inline void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node, - unsigned long caller) + unsigned long caller, kmalloc_token_t token) { struct kmem_cache *s; void *ret; @@ -5290,37 +5326,28 @@ void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node, if (unlikely(!size)) return ZERO_SIZE_PTR; - s = kmalloc_slab(size, b, flags, caller); + s = kmalloc_slab(size, b, flags, token); ret = slab_alloc_node(s, NULL, flags, node, caller, size); ret = kasan_kmalloc(s, ret, size, flags); trace_kmalloc(caller, ret, size, s->size, flags, node); return ret; } -void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node) { - return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_); + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, + _RET_IP_, PASS_TOKEN_PARAM(token)); } EXPORT_SYMBOL(__kmalloc_node_noprof); -void *__kmalloc_noprof(size_t size, gfp_t flags) +void *__kmalloc_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t flags) { - return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_); + return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_, + PASS_TOKEN_PARAM(token)); } EXPORT_SYMBOL(__kmalloc_noprof); -/** - * kmalloc_nolock - Allocate an object of given size from any context. - * @size: size to allocate - * @gfp_flags: GFP flags. Only __GFP_ACCOUNT, __GFP_ZERO, __GFP_NO_OBJ_EXT - * allowed. - * @node: node number of the target node. - * - * Return: pointer to the new object or NULL in case of error. - * NULL does not mean EBUSY or EAGAIN. It means ENOMEM. - * There is no reason to call it again and expect !NULL. - */ -void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node) +void *_kmalloc_nolock_noprof(DECL_TOKEN_PARAMS(size, token), gfp_t gfp_flags, int node) { gfp_t alloc_gfp = __GFP_NOWARN | __GFP_NOMEMALLOC | gfp_flags; struct kmem_cache *s; @@ -5347,7 +5374,7 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node) retry: if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) return NULL; - s = kmalloc_slab(size, NULL, alloc_gfp, _RET_IP_); + s = kmalloc_slab(size, NULL, alloc_gfp, PASS_TOKEN_PARAM(token)); if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s)) /* @@ -5400,12 +5427,13 @@ success: ret = kasan_kmalloc(s, ret, size, alloc_gfp); return ret; } -EXPORT_SYMBOL_GPL(kmalloc_nolock_noprof); +EXPORT_SYMBOL_GPL(_kmalloc_nolock_noprof); -void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, +void *__kmalloc_node_track_caller_noprof(DECL_KMALLOC_PARAMS(size, b, token), gfp_t flags, int node, unsigned long caller) { - return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller); + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, + caller, PASS_TOKEN_PARAM(token)); } EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof); @@ -5500,6 +5528,34 @@ static noinline void free_to_partial_list( } /* + * Try returning (remainder of) the freelist that we just detached from the + * slab. Optimistically assume the slab is still full, so we don't need to find + * the tail of the detached freelist. + * + * Fail if the slab isn't full anymore due to a concurrent free. + */ +static bool __slab_try_return_freelist(struct kmem_cache *s, struct slab *slab, + void *head, int cnt) +{ + struct freelist_counters old, new; + + old.freelist = slab->freelist; + old.counters = slab->counters; + + if (old.freelist) + return false; + + new.freelist = head; + new.counters = old.counters; + new.inuse -= cnt; + + if (!slab_update_freelist(s, slab, &old, &new, "__slab_try_return_freelist")) + return false; + + return true; +} + +/* * Slow path handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. * @@ -6636,7 +6692,7 @@ void kfree_nolock(const void *object) EXPORT_SYMBOL_GPL(kfree_nolock); static __always_inline __realloc_size(2) void * -__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid) +__do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, int nid, kmalloc_token_t token) { void *ret; size_t ks = 0; @@ -6708,7 +6764,7 @@ __do_krealloc(const void *p, size_t new_size, unsigned long align, gfp_t flags, return (void *)p; alloc_new: - ret = kmalloc_node_track_caller_noprof(new_size, flags, nid, _RET_IP_); + ret = __kmalloc_node_track_caller_noprof(PASS_KMALLOC_PARAMS(new_size, NULL, token), flags, nid, _RET_IP_); if (ret && p) { /* Disable KASAN checks as the object's redzone is accessed. */ kasan_disable_current(); @@ -6719,45 +6775,7 @@ alloc_new: return ret; } -/** - * krealloc_node_align - reallocate memory. The contents will remain unchanged. - * @p: object to reallocate memory for. - * @new_size: how many bytes of memory are required. - * @align: desired alignment. - * @flags: the type of memory to allocate. - * @nid: NUMA node or NUMA_NO_NODE - * - * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size - * is 0 and @p is not a %NULL pointer, the object pointed to is freed. - * - * Only alignments up to those guaranteed by kmalloc() will be honored. Please see - * Documentation/core-api/memory-allocation.rst for more details. - * - * If __GFP_ZERO logic is requested, callers must ensure that, starting with the - * initial memory allocation, every subsequent call to this API for the same - * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that - * __GFP_ZERO is not fully honored by this API. - * - * When slub_debug_orig_size() is off, krealloc() only knows about the bucket - * size of an allocation (but not the exact size it was allocated with) and - * hence implements the following semantics for shrinking and growing buffers - * with __GFP_ZERO:: - * - * new bucket - * 0 size size - * |--------|----------------| - * | keep | zero | - * - * Otherwise, the original allocation size 'orig_size' could be used to - * precisely clear the requested size, and the new size will also be stored - * as the new 'orig_size'. - * - * In any case, the contents of the object pointed to are preserved up to the - * lesser of the new and old sizes. - * - * Return: pointer to the allocated memory or %NULL in case of error - */ -void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long align, +void *krealloc_node_align_noprof(const void *p, DECL_TOKEN_PARAMS(new_size, token), unsigned long align, gfp_t flags, int nid) { void *ret; @@ -6767,7 +6785,7 @@ void *krealloc_node_align_noprof(const void *p, size_t new_size, unsigned long a return ZERO_SIZE_PTR; } - ret = __do_krealloc(p, new_size, align, flags, nid); + ret = __do_krealloc(p, new_size, align, flags, nid, PASS_TOKEN_PARAM(token)); if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret)) kfree(p); @@ -6799,28 +6817,7 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) return flags; } -/** - * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon - * failure, fall back to non-contiguous (vmalloc) allocation. - * @size: size of the request. - * @b: which set of kmalloc buckets to allocate from. - * @align: desired alignment. - * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. - * @node: numa node to allocate from - * - * Only alignments up to those guaranteed by kmalloc() will be honored. Please see - * Documentation/core-api/memory-allocation.rst for more details. - * - * Uses kmalloc to get the memory but if the allocation fails then falls back - * to the vmalloc allocator. Use kvfree for freeing the memory. - * - * GFP_NOWAIT and GFP_ATOMIC are supported, the __GFP_NORETRY modifier is not. - * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is - * preferable to the vmalloc fallback, due to visible performance drawbacks. - * - * Return: pointer to the allocated memory of %NULL in case of failure - */ -void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, +void *__kvmalloc_node_noprof(DECL_KMALLOC_PARAMS(size, b, token), unsigned long align, gfp_t flags, int node) { bool allow_block; @@ -6832,7 +6829,7 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, */ ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), kmalloc_gfp_adjust(flags, size), - node, _RET_IP_); + node, _RET_IP_, PASS_TOKEN_PARAM(token)); if (ret || size <= PAGE_SIZE) return ret; @@ -6917,34 +6914,7 @@ void kvfree_sensitive(const void *addr, size_t len) } EXPORT_SYMBOL(kvfree_sensitive); -/** - * kvrealloc_node_align - reallocate memory; contents remain unchanged - * @p: object to reallocate memory for - * @size: the size to reallocate - * @align: desired alignment - * @flags: the flags for the page level allocator - * @nid: NUMA node id - * - * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0 - * and @p is not a %NULL pointer, the object pointed to is freed. - * - * Only alignments up to those guaranteed by kmalloc() will be honored. Please see - * Documentation/core-api/memory-allocation.rst for more details. - * - * If __GFP_ZERO logic is requested, callers must ensure that, starting with the - * initial memory allocation, every subsequent call to this API for the same - * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that - * __GFP_ZERO is not fully honored by this API. - * - * In any case, the contents of the object pointed to are preserved up to the - * lesser of the new and old sizes. - * - * This function must not be called concurrently with itself or kvfree() for the - * same memory allocation. - * - * Return: pointer to the allocated memory or %NULL in case of error - */ -void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, +void *kvrealloc_node_align_noprof(const void *p, DECL_TOKEN_PARAMS(size, token), unsigned long align, gfp_t flags, int nid) { void *n; @@ -6952,10 +6922,10 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig if (is_vmalloc_addr(p)) return vrealloc_node_align_noprof(p, size, align, flags, nid); - n = krealloc_node_align_noprof(p, size, align, kmalloc_gfp_adjust(flags, size), nid); + n = krealloc_node_align_noprof(p, PASS_TOKEN_PARAMS(size, token), align, kmalloc_gfp_adjust(flags, size), nid); if (!n) { /* We failed to krealloc(), fall back to kvmalloc(). */ - n = kvmalloc_node_align_noprof(size, align, flags, nid); + n = __kvmalloc_node_noprof(PASS_KMALLOC_PARAMS(size, NULL, token), align, flags, nid); if (!n) return NULL; @@ -7126,60 +7096,56 @@ __refill_objects_node(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int mi list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) { + unsigned int count; + list_del(&slab->slab_list); - object = get_freelist_nofreeze(s, slab); + object = get_freelist_nofreeze(s, slab, &count); - while (object && refilled < max) { + while (count && refilled < max) { p[refilled] = object; object = get_freepointer(s, object); maybe_wipe_obj_freeptr(s, p[refilled]); refilled++; + count--; } /* * Freelist had more objects than we can accommodate, we need to - * free them back. We can treat it like a detached freelist, just - * need to find the tail object. + * free them back. First we try to be optimistic and assume the + * slab is still full since we just detached its freelist. + * Otherwise we must find the tail object. */ - if (unlikely(object)) { + if (unlikely(count)) { void *head = object; void *tail; - int cnt = 0; + + if (__slab_try_return_freelist(s, slab, head, count)) { + list_add(&slab->slab_list, &pc.slabs); + break; + } do { tail = object; - cnt++; object = get_freepointer(s, object); } while (object); - __slab_free(s, slab, head, tail, cnt, _RET_IP_); + __slab_free(s, slab, head, tail, count, _RET_IP_); } if (refilled >= max) break; } - if (unlikely(!list_empty(&pc.slabs))) { + if (!list_empty(&pc.slabs)) { spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) { - - if (unlikely(!slab->inuse && n->nr_partial >= s->min_partial)) - continue; + list_for_each_entry(slab, &pc.slabs, slab_list) + set_node_partial_state(n, slab); - list_del(&slab->slab_list); - add_partial(n, slab, ADD_TO_HEAD); - } + list_splice_tail(&pc.slabs, &n->partial); spin_unlock_irqrestore(&n->list_lock, flags); - - /* any slabs left are completely free and for discard */ - list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) { - - list_del(&slab->slab_list); - discard_slab(s, slab); - } } return refilled; @@ -7275,10 +7241,6 @@ new_slab: stat(s, ALLOC_SLAB); - /* - * TODO: possible optimization - if we know we will consume the whole - * slab we might skip creating the freelist? - */ refilled += alloc_from_new_slab(s, slab, p + refilled, max - refilled, /* allow_spin = */ true); @@ -7289,9 +7251,8 @@ out: return refilled; } -static inline -int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +static bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p) { int i; @@ -7312,30 +7273,43 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, stat_add(s, ALLOC_SLOWPATH, i); } - return i; + return true; error: __kmem_cache_free_bulk(s, i, p); - return 0; - + return false; } -/* - * Note that interrupts must be enabled when calling this function and gfp - * flags must allow spinning. +/** + * kmem_cache_alloc_bulk - Allocate multiple objects + * @s: The cache to allocate from + * @flags: GFP_* flags. See kmalloc(). + * @size: Number of objects to allocate + * @p: Array of allocated objects + * + * Allocate @size objects from @s and places them into @p. @size must be larger + * than 0. + * + * Interrupts must be enabled when calling this function and @flags must allow + * spinning. + * + * Unlike alloc_pages_bulk(), this function does not check for already allocated + * objects in @p, and thus the caller does not need to zero it. + * + * Return: %true if the allocation succeeded, or %false if it failed. */ -int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +bool kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, + size_t size, void **p) { unsigned int i = 0; void *kfence_obj; if (!size) - return 0; + return false; s = slab_pre_alloc_hook(s, flags); if (unlikely(!s)) - return 0; + return false; /* * to make things simpler, only assume at most once kfence allocated @@ -7352,18 +7326,18 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, } i = alloc_from_pcs_bulk(s, flags, size, p); - if (i < size) { /* * If we ran out of memory, don't bother with freeing back to * the percpu sheaves, we have bigger problems. */ - if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) { + if (unlikely(!__kmem_cache_alloc_bulk(s, flags, size - i, + p + i))) { if (i > 0) __kmem_cache_free_bulk(s, i, p); if (kfence_obj) __kfence_free(kfence_obj); - return 0; + return false; } } @@ -7378,16 +7352,9 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, } out: - /* - * memcg and kmem_cache debug support and memory initialization. - * Done outside of the IRQ disabled fastpath loop. - */ - if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p, - slab_want_init_on_alloc(flags, s), s->object_size))) { - return 0; - } - - return size; + /* memcg and kmem_cache debug support and memory initialization */ + return likely(slab_post_alloc_hook(s, NULL, flags, size, p, + slab_want_init_on_alloc(flags, s), s->object_size)); } EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof); @@ -7609,6 +7576,7 @@ static void early_kmem_cache_node_alloc(int node) { struct slab *slab; struct kmem_cache_node *n; + struct slab_obj_iter iter; BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); @@ -7620,14 +7588,18 @@ static void early_kmem_cache_node_alloc(int node) pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); } - n = slab->freelist; + init_slab_obj_iter(kmem_cache_node, slab, &iter, true); + + n = next_slab_obj(kmem_cache_node, &iter); BUG_ON(!n); + + slab->inuse = 1; + build_slab_freelist(kmem_cache_node, slab, &iter); + #ifdef CONFIG_SLUB_DEBUG init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); #endif n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); - slab->freelist = get_freepointer(kmem_cache_node, n); - slab->inuse = 1; kmem_cache_node->per_node[node].node = n; init_kmem_cache_node(n); inc_slabs_node(kmem_cache_node, node, slab->objects); @@ -8245,8 +8217,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) if (free == slab->objects) { list_move(&slab->slab_list, &discard); - slab_clear_node_partial(slab); - n->nr_partial--; + clear_node_partial_state(n, slab); dec_slabs_node(s, node, slab->objects); } else if (free <= SHRINK_PROMOTE_MAX) list_move(&slab->slab_list, promote + free - 1); @@ -8470,7 +8441,7 @@ static void __init bootstrap_kmalloc_sheaves(void) { enum kmalloc_cache_type type; - for (type = KMALLOC_NORMAL; type <= KMALLOC_RANDOM_END; type++) { + for (type = KMALLOC_NORMAL; type <= KMALLOC_PARTITION_END; type++) { for (int idx = 0; idx < KMALLOC_SHIFT_HIGH + 1; idx++) { if (kmalloc_caches[type][idx]) bootstrap_cache_sheaves(kmalloc_caches[type][idx]); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c9aea7052ba72..3e9ef4e79c15b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -243,12 +243,11 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes, struct net_device *dev) { gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; - int i, n; + int i; LIST_HEAD(list); - n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes, - (void **)skbs); - if (unlikely(n == 0)) { + if (unlikely(!kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, + nframes, (void **)skbs))) { for (i = 0; i < nframes; i++) xdp_return_frame(frames[i]); return -ENOMEM; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 78e75b1b26967..9e8ac9b934a89 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -288,11 +288,11 @@ static inline struct sk_buff *napi_skb_cache_get(bool alloc) local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!nc->skb_count)) { - if (alloc) - nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, - GFP_ATOMIC | __GFP_NOWARN, - NAPI_SKB_CACHE_BULK, - nc->skb_cache); + if (alloc && kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_NOWARN, + NAPI_SKB_CACHE_BULK, + nc->skb_cache)) + nc->skb_count = NAPI_SKB_CACHE_BULK; if (unlikely(!nc->skb_count)) { local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return NULL; @@ -353,16 +353,18 @@ u32 napi_skb_cache_get_bulk(void **skbs, u32 n) /* No enough cached skbs. Try refilling the cache first */ bulk = min(NAPI_SKB_CACHE_SIZE - nc->skb_count, NAPI_SKB_CACHE_BULK); - nc->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, - GFP_ATOMIC | __GFP_NOWARN, bulk, - &nc->skb_cache[nc->skb_count]); + if (kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_NOWARN, bulk, + &nc->skb_cache[nc->skb_count])) + nc->skb_count += bulk; if (likely(nc->skb_count >= n)) goto get; /* Still not enough. Bulk-allocate the missing part directly, zeroed */ - n -= kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, - GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN, - n - nc->skb_count, &skbs[nc->skb_count]); + if (kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN, + n - nc->skb_count, &skbs[nc->skb_count])) + n = nc->skb_count; if (likely(nc->skb_count >= n)) goto get; diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 6d8e9413d5a4d..2e63c2e726aaf 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -183,7 +183,7 @@ __kmem_cache_create(const char *name, unsigned int size, unsigned int align, default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); -int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, +bool kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **list); struct slab_sheaf * kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c index 54c7265ab52d9..87570c22b151c 100644 --- a/tools/mm/slabinfo.c +++ b/tools/mm/slabinfo.c @@ -193,10 +193,9 @@ static unsigned long get_obj_and_str(const char *name, char **x) *x = NULL; - if (!read_obj(name)) { - x = NULL; + if (!read_obj(name)) return 0; - } + result = strtoul(buffer, &p, 10); while (*p == ' ') p++; @@ -798,7 +797,7 @@ static void slab_debug(struct slabinfo *s) fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); } if (!tracing && s->trace) - set_obj(s, "trace", 1); + set_obj(s, "trace", 0); } static void totals(void) @@ -1266,7 +1265,6 @@ static void read_slab_dir(void) slab->objects_total = get_obj("objects_total"); slab->objs_per_slab = get_obj("objs_per_slab"); slab->order = get_obj("order"); - slab->partial = get_obj("partial"); slab->partial = get_obj_and_str("partial", &t); decode_numa_list(slab->numa_partial, t); free(t); diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 8c72571559583..e0a0693df08f5 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -154,7 +154,7 @@ void kmem_cache_shrink(struct kmem_cache *cachep) { } -int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, +bool kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **p) { size_t i; @@ -213,7 +213,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, pthread_mutex_unlock(&cachep->lock); if (cachep->callback) cachep->exec_callback = true; - return 0; + return false; } for (i = 0; i < size; i++) { @@ -224,7 +224,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, printf("Allocating %p from slab\n", p[i]); } - return size; + return true; } struct kmem_cache * @@ -271,8 +271,8 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) sheaf->cache = s; sheaf->capacity = capacity; - sheaf->size = kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects); - if (!sheaf->size) { + sheaf->size = size; + if (!kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects)) { free(sheaf); return NULL; } @@ -284,7 +284,6 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, struct slab_sheaf **sheafp, unsigned int size) { struct slab_sheaf *sheaf = *sheafp; - int refill; if (sheaf->size >= size) return 0; @@ -299,12 +298,10 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, return 0; } - refill = kmem_cache_alloc_bulk(s, gfp, size - sheaf->size, - &sheaf->objects[sheaf->size]); - if (!refill) + if (!kmem_cache_alloc_bulk(s, gfp, size - sheaf->size, + &sheaf->objects[sheaf->size])) return -ENOMEM; - - sheaf->size += refill; + sheaf->size = size; return 0; } |
