diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 22:46:32 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 22:46:32 +0100 |
| commit | e2bd485974b30605aaa2fd4b8b6551d9a1846a62 (patch) | |
| tree | d91bd2ce403f408e3c8bb7327719cc0df9754ef7 /tools | |
| parent | 505ffd23177fa0ac34abf4bd729b99d0540d3d4b (diff) | |
| parent | ddfd3966d0d4f0a8a3cf4d01d31ebba5fd689e33 (diff) | |
| download | linux-next-history-e2bd485974b30605aaa2fd4b8b6551d9a1846a62.tar.gz | |
Merge branch 'master' of https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
# Conflicts:
# drivers/cpufreq/Kconfig.x86
# drivers/cpufreq/Makefile
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/objtool/Build | 2 | ||||
| -rw-r--r-- | tools/objtool/Makefile | 4 | ||||
| -rw-r--r-- | tools/objtool/arch/x86/decode.c | 17 | ||||
| -rw-r--r-- | tools/objtool/builtin-check.c | 20 | ||||
| -rw-r--r-- | tools/objtool/builtin-klp.c | 1 | ||||
| -rw-r--r-- | tools/objtool/check.c | 410 | ||||
| -rw-r--r-- | tools/objtool/disas.c | 22 | ||||
| -rw-r--r-- | tools/objtool/elf.c | 124 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/arch.h | 3 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/builtin.h | 7 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/check.h | 34 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/checksum.h | 53 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/elf.h | 59 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/klp.h | 1 | ||||
| -rw-r--r-- | tools/objtool/include/objtool/warn.h | 49 | ||||
| -rw-r--r-- | tools/objtool/klp-checksum.c | 347 | ||||
| -rw-r--r-- | tools/objtool/klp-diff.c | 796 | ||||
| -rw-r--r-- | tools/objtool/objtool.c | 3 | ||||
| -rw-r--r-- | tools/objtool/trace.c | 8 | ||||
| -rw-r--r-- | tools/testing/selftests/membarrier/Makefile | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/membarrier/membarrier_rseq_stress.c | 951 | ||||
| -rw-r--r-- | tools/testing/selftests/timers/posix_timers.c | 55 |
22 files changed, 2282 insertions, 689 deletions
diff --git a/tools/objtool/Build b/tools/objtool/Build index 600da051af12e..93a37b0dfd313 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -12,7 +12,7 @@ objtool-$(BUILD_DISAS) += disas.o objtool-$(BUILD_DISAS) += trace.o objtool-$(BUILD_ORC) += orc_gen.o orc_dump.o -objtool-$(BUILD_KLP) += builtin-klp.o klp-diff.o klp-post-link.o +objtool-$(BUILD_KLP) += builtin-klp.o klp-checksum.o klp-diff.o klp-post-link.o objtool-y += libstring.o objtool-y += libctype.o diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index b71d1886022e9..a4484fd22a96d 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -58,7 +58,7 @@ INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ -I$(srctree)/tools/objtool/include \ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include \ - -I$(LIBSUBCMD_OUTPUT)/include + -I$(srctree)/tools/lib OBJTOOL_CFLAGS := -std=gnu11 -fomit-frame-pointer -O2 -g $(WARNINGS) \ $(INCLUDES) $(LIBELF_FLAGS) $(LIBXXHASH_CFLAGS) $(HOSTCFLAGS) @@ -135,7 +135,7 @@ $(LIBSUBCMD): fixdep $(LIBSUBCMD_OUTPUT) FORCE $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ DESTDIR=$(LIBSUBCMD_OUTPUT) prefix= subdir= \ $(HOST_OVERRIDES) EXTRA_CFLAGS="$(OBJTOOL_CFLAGS)" \ - $@ install_headers + $@ $(LIBSUBCMD)-clean: $(call QUIET_CLEAN, libsubcmd) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 350b8ee6e7769..1b387d5a195ba 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -805,14 +805,27 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; } - if (ins.immediate.nbytes) + if (ins.immediate.nbytes) { insn->immediate = ins.immediate.value; - else if (ins.displacement.nbytes) + insn->immediate_len = ins.immediate.nbytes; + } else if (ins.displacement.nbytes) { insn->immediate = ins.displacement.value; + insn->immediate_len = ins.displacement.nbytes; + } return 0; } +size_t arch_jump_opcode_bytes(struct objtool_file *file, struct instruction *insn, + unsigned char *buf) +{ + size_t len; + + len = insn->len - insn->immediate_len; + memcpy(buf, insn->sec->data->d_buf + insn->offset, len); + return len; +} + void arch_initial_func_cfi_state(struct cfi_init_state *state) { int i; diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index b780df5137152..118c3de2f293e 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -73,8 +73,6 @@ static int parse_hacks(const struct option *opt, const char *str, int unset) static const struct option check_options[] = { OPT_GROUP("Actions:"), - OPT_BOOLEAN(0, "checksum", &opts.checksum, "generate per-function checksums"), - OPT_BOOLEAN(0, "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"), OPT_STRING_OPTARG('d', "disas", &opts.disas, "function-pattern", "disassemble functions", "*"), OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks), OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"), @@ -85,7 +83,7 @@ static const struct option check_options[] = { OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"), OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), - OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"), + OPT_INTEGER(0, "prefix", &opts.prefix, "generate or grow prefix symbols for N-byte function padding"), OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), @@ -93,9 +91,10 @@ static const struct option check_options[] = { OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump), OPT_GROUP("Options:"), + OPT_BOOLEAN(0, "cfi", &opts.cfi, "grow kCFI preamble symbols (use with --prefix)"), + OPT_BOOLEAN(0, "fineibt", &opts.fineibt, "create .cfi_sites section for FineIBT"), OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"), OPT_BOOLEAN(0, "backup", &opts.backup, "create backup (.orig) file on warning/error"), - OPT_STRING(0, "debug-checksum", &opts.debug_checksum, "funcs", "enable checksum debug output"), OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"), OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"), OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"), @@ -165,20 +164,17 @@ static bool opts_valid(void) return false; } -#ifndef BUILD_KLP - if (opts.checksum) { - ERROR("--checksum not supported; install xxhash-devel/libxxhash-dev (version >= 0.8) and recompile"); + if (opts.cfi && !opts.prefix) { + ERROR("--cfi requires --prefix"); return false; } -#endif - if (opts.debug_checksum && !opts.checksum) { - ERROR("--debug-checksum requires --checksum"); + if (opts.fineibt && !opts.cfi) { + ERROR("--fineibt requires --cfi"); return false; } - if (opts.checksum || - opts.disas || + if (opts.disas || opts.hack_jump_label || opts.hack_noinstr || opts.ibt || diff --git a/tools/objtool/builtin-klp.c b/tools/objtool/builtin-klp.c index 56d5a5b92f725..58c3b9bda3eb3 100644 --- a/tools/objtool/builtin-klp.c +++ b/tools/objtool/builtin-klp.c @@ -13,6 +13,7 @@ struct subcmd { }; static struct subcmd subcmds[] = { + { "checksum", "Generate per-function checksums", cmd_klp_checksum, }, { "diff", "Generate binary diff of two object files", cmd_klp_diff, }, { "post-link", "Finalize klp symbols/relocs after module linking", cmd_klp_post_link, }, }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9b11cf3193b9c..10b18cf9c3608 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -18,7 +18,6 @@ #include <objtool/special.h> #include <objtool/trace.h> #include <objtool/warn.h> -#include <objtool/checksum.h> #include <objtool/util.h> #include <linux/objtool_types.h> @@ -64,8 +63,8 @@ struct instruction *next_insn_same_sec(struct objtool_file *file, return insn; } -static struct instruction *next_insn_same_func(struct objtool_file *file, - struct instruction *insn) +struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn) { struct instruction *next = next_insn_same_sec(file, insn); struct symbol *func = insn_func(insn); @@ -113,10 +112,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, for_each_sec(file->elf, __sec) \ sec_for_each_insn(file, __sec, insn) -#define func_for_each_insn(file, func, insn) \ - for (insn = find_insn(file, func->sec, func->offset); \ - insn; \ - insn = next_insn_same_func(file, insn)) #define sym_for_each_insn(file, sym, insn) \ for (insn = find_insn(file, sym->sec, sym->offset); \ @@ -491,7 +486,7 @@ static int decode_instructions(struct objtool_file *file) return -1; } - if (func->embedded_insn || func->alias != func) + if (func->embedded_insn || is_alias_sym(func)) continue; if (!find_insn(file, sec, func->offset)) { @@ -500,7 +495,7 @@ static int decode_instructions(struct objtool_file *file) } sym_for_each_insn(file, func, insn) { - insn->sym = func; + insn->_sym = func; if (is_func_sym(func) && insn->type == INSN_ENDBR && list_empty(&insn->call_node)) { @@ -864,15 +859,14 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) list_for_each_entry(insn, &file->endbr_list, call_node) { int *site = (int *)sec->data->d_buf + idx; - struct symbol *sym = insn->sym; + struct symbol *func = insn_func(insn); *site = 0; - if (opts.module && sym && is_func_sym(sym) && - insn->offset == sym->offset && - (!strcmp(sym->name, "init_module") || - !strcmp(sym->name, "cleanup_module"))) { + if (opts.module && func && insn->offset == func->offset && + (!strcmp(func->name, "init_module") || + !strcmp(func->name, "cleanup_module"))) { ERROR("%s(): Magic init_module() function name is deprecated, use module_init(fn) instead", - sym->name); + func->name); return -1; } @@ -887,6 +881,31 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file) return 0; } +/* +* Grow __cfi_ symbols to fill the NOP gap between the 'mov <hash>, %rax' and +* the start of the function. +*/ +static int grow_cfi_symbols(struct objtool_file *file) +{ + struct symbol *sym; + + for_each_sym(file->elf, sym) { + if (!is_func_sym(sym) || !strstarts(sym->name, "__cfi_") || + sym->len != 5) + continue; + + if (!find_func_by_offset(sym->sec, sym->offset + sym->len + opts.prefix)) + continue; + + sym->len += opts.prefix; + sym->sym.st_size = sym->len; + if (elf_write_symbol(file->elf, sym)) + return -1; + } + + return 0; +} + static int create_cfi_sections(struct objtool_file *file) { struct section *sec; @@ -1023,59 +1042,6 @@ static int create_direct_call_sections(struct objtool_file *file) return 0; } -#ifdef BUILD_KLP -static int create_sym_checksum_section(struct objtool_file *file) -{ - struct section *sec; - struct symbol *sym; - unsigned int idx = 0; - struct sym_checksum *checksum; - size_t entsize = sizeof(struct sym_checksum); - - sec = find_section_by_name(file->elf, ".discard.sym_checksum"); - if (sec) { - if (!opts.dryrun) - WARN("file already has .discard.sym_checksum section, skipping"); - - return 0; - } - - for_each_sym(file->elf, sym) - if (sym->csum.checksum) - idx++; - - if (!idx) - return 0; - - sec = elf_create_section_pair(file->elf, ".discard.sym_checksum", entsize, - idx, idx); - if (!sec) - return -1; - - idx = 0; - for_each_sym(file->elf, sym) { - if (!sym->csum.checksum) - continue; - - if (!elf_init_reloc(file->elf, sec->rsec, idx, idx * entsize, - sym, 0, R_TEXT64)) - return -1; - - checksum = (struct sym_checksum *)sec->data->d_buf + idx; - checksum->addr = 0; /* reloc */ - checksum->checksum = sym->csum.checksum; - - mark_sec_changed(file->elf, sec, true); - - idx++; - } - - return 0; -} -#else -static int create_sym_checksum_section(struct objtool_file *file) { return -EINVAL; } -#endif - /* * Warnings shouldn't be reported for ignored functions. */ @@ -1349,14 +1315,11 @@ __weak bool arch_is_embedded_insn(struct symbol *sym) return false; } -static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) { struct reloc *reloc; - if (insn->no_reloc) - return NULL; - - if (!file) + if (!file || insn->no_reloc || insn->fake) return NULL; reloc = find_reloc_by_dest_range(file->elf, insn->sec, @@ -1642,7 +1605,7 @@ static int add_jump_destinations(struct objtool_file *file) } if (!dest_sym || is_sec_sym(dest_sym)) { - dest_sym = dest_insn->sym; + dest_sym = insn_sym(dest_insn); if (!dest_sym) goto set_jump_dest; } @@ -1658,7 +1621,7 @@ static int add_jump_destinations(struct objtool_file *file) continue; } - if (!insn->sym || insn->sym->pfunc == dest_sym->pfunc) + if (!insn_sym(insn) || insn_sym(insn)->pfunc == dest_sym->pfunc) goto set_jump_dest; /* @@ -1831,7 +1794,6 @@ static int handle_group_alt(struct objtool_file *file, nop->offset = special_alt->new_off + special_alt->new_len; nop->len = special_alt->orig_len - special_alt->new_len; nop->type = INSN_NOP; - nop->sym = orig_insn->sym; nop->alt_group = new_alt_group; nop->fake = 1; } @@ -1850,7 +1812,6 @@ static int handle_group_alt(struct objtool_file *file, last_new_insn = insn; - insn->sym = orig_insn->sym; insn->alt_group = new_alt_group; /* @@ -2232,7 +2193,7 @@ static int add_jump_table_alts(struct objtool_file *file) return 0; for_each_sym(file->elf, func) { - if (!is_func_sym(func) || func->alias != func) + if (!is_func_sym(func) || is_alias_sym(func)) continue; mark_func_jump_tables(file, func); @@ -2493,12 +2454,12 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi break; case ANNOTYPE_NOCFI: - sym = insn->sym; + sym = insn_sym(insn); if (!sym) { ERROR_INSN(insn, "dodgy NOCFI annotation"); return -1; } - insn->sym->nocfi = 1; + sym->nocfi = 1; break; default: @@ -2566,7 +2527,6 @@ static int classify_symbols(struct objtool_file *file) static void mark_rodata(struct objtool_file *file) { struct section *sec; - bool found = false; /* * Search for the following rodata sections, each of which can @@ -2579,15 +2539,11 @@ static void mark_rodata(struct objtool_file *file) * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file->elf, sec) { - if ((!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) || - !strncmp(sec->name, ".data.rel.ro", 12)) { - sec->rodata = true; - found = true; + if (is_rodata_sec(sec)) { + file->rodata = true; + return; } } - - file->rodata = found; } static void mark_holes(struct objtool_file *file) @@ -2604,7 +2560,7 @@ static void mark_holes(struct objtool_file *file) * favour of a regular symbol, but leaves the code in place. */ for_each_insn(file, insn) { - if (insn->sym || !find_symbol_hole_containing(insn->sec, insn->offset)) { + if (insn_sym(insn) || !find_symbol_hole_containing(insn->sec, insn->offset)) { in_hole = false; continue; } @@ -2622,7 +2578,7 @@ static void mark_holes(struct objtool_file *file) if (insn->jump_dest) { struct symbol *dest_func = insn_func(insn->jump_dest); - if (dest_func && dest_func->cold) + if (dest_func && is_cold_func(dest_func)) dest_func->ignore = true; } } @@ -2630,14 +2586,35 @@ static void mark_holes(struct objtool_file *file) static bool validate_branch_enabled(void) { - return opts.stackval || - opts.orc || - opts.uaccess || + return opts.stackval || + opts.orc || + opts.uaccess; +} + +static bool alts_needed(void) +{ + return validate_branch_enabled() || + opts.noinstr || + opts.hack_jump_label || + opts.disas || opts.checksum; } -static int decode_sections(struct objtool_file *file) +int decode_file(struct objtool_file *file) { + arch_initial_func_cfi_state(&initial_func_cfi); + init_cfi_state(&init_cfi); + init_cfi_state(&func_cfi); + set_func_state(&func_cfi); + init_cfi_state(&force_undefined_cfi); + force_undefined_cfi.force_undefined = true; + + if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) + return -1; + + cfi_hash_add(&init_cfi); + cfi_hash_add(&func_cfi); + file->klp = is_livepatch_module(file); mark_rodata(file); @@ -2666,7 +2643,7 @@ static int decode_sections(struct objtool_file *file) * Must be before add_jump_destinations(), which depends on 'func' * being set for alternatives, to enable proper sibling call detection. */ - if (validate_branch_enabled() || opts.noinstr || opts.hack_jump_label || opts.disas) { + if (alts_needed()) { if (add_special_section_alts(file)) return -1; } @@ -3027,7 +3004,7 @@ static int update_cfi_state(struct instruction *insn, } if (op->dest.reg == CFI_BP && op->src.reg == CFI_SP && - insn->sym->frame_pointer) { + insn_sym(insn)->frame_pointer) { /* addi.d fp,sp,imm on LoongArch */ if (cfa->base == CFI_SP && cfa->offset == op->src.offset) { cfa->base = CFI_BP; @@ -3039,7 +3016,7 @@ static int update_cfi_state(struct instruction *insn, if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { /* addi.d sp,fp,imm on LoongArch */ if (cfa->base == CFI_BP && cfa->offset == 0) { - if (insn->sym->frame_pointer) { + if (insn_sym(insn)->frame_pointer) { cfa->base = CFI_SP; cfa->offset = -op->src.offset; } @@ -3662,88 +3639,6 @@ static bool skip_alt_group(struct instruction *insn) return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC; } -static int checksum_debug_init(struct objtool_file *file) -{ - char *dup, *s; - - if (!opts.debug_checksum) - return 0; - - dup = strdup(opts.debug_checksum); - if (!dup) { - ERROR_GLIBC("strdup"); - return -1; - } - - s = dup; - while (*s) { - struct symbol *func; - char *comma; - - comma = strchr(s, ','); - if (comma) - *comma = '\0'; - - func = find_symbol_by_name(file->elf, s); - if (!func || !is_func_sym(func)) - WARN("--debug-checksum: can't find '%s'", s); - else - func->debug_checksum = 1; - - if (!comma) - break; - - s = comma + 1; - } - - free(dup); - return 0; -} - -static void checksum_update_insn(struct objtool_file *file, struct symbol *func, - struct instruction *insn) -{ - struct reloc *reloc = insn_reloc(file, insn); - unsigned long offset; - struct symbol *sym; - - if (insn->fake) - return; - - checksum_update(func, insn, insn->sec->data->d_buf + insn->offset, insn->len); - - if (!reloc) { - struct symbol *call_dest = insn_call_dest(insn); - - if (call_dest) - checksum_update(func, insn, call_dest->demangled_name, - strlen(call_dest->demangled_name)); - return; - } - - sym = reloc->sym; - offset = arch_insn_adjusted_addend(insn, reloc); - - if (is_string_sec(sym->sec)) { - char *str; - - str = sym->sec->data->d_buf + sym->offset + offset; - checksum_update(func, insn, str, strlen(str)); - return; - } - - if (is_sec_sym(sym)) { - sym = find_symbol_containing(reloc->sym->sec, offset); - if (!sym) - return; - - offset -= sym->offset; - } - - checksum_update(func, insn, sym->demangled_name, strlen(sym->demangled_name)); - checksum_update(func, insn, &offset, sizeof(offset)); -} - static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state); static int do_validate_branch(struct objtool_file *file, struct symbol *func, @@ -4025,9 +3920,6 @@ static int do_validate_branch(struct objtool_file *file, struct symbol *func, insn->trace = 0; next_insn = next_insn_to_validate(file, insn); - if (opts.checksum && func && insn->sec) - checksum_update_insn(file, func, insn); - if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { /* Ignore KCFI type preambles, which always fall through */ if (is_prefix_func(func)) @@ -4093,9 +3985,6 @@ static int validate_unwind_hint(struct objtool_file *file, struct symbol *func = insn_func(insn); int ret; - if (opts.checksum) - checksum_init(func); - ret = validate_branch(file, func, insn, *state); if (ret) BT_INSN(insn, "<=== (hint)"); @@ -4304,7 +4193,7 @@ static int validate_retpoline(struct objtool_file *file) * broken. */ list_for_each_entry(insn, &file->retpoline_call_list, call_node) { - struct symbol *sym = insn->sym; + struct symbol *sym = insn_sym(insn); if (sym && (is_notype_sym(sym) || is_func_sym(sym)) && !sym->nocfi) { @@ -4407,17 +4296,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio * For FineIBT or kCFI, a certain number of bytes preceding the function may be * NOPs. Those NOPs may be rewritten at runtime and executed, so give them a * proper function name: __pfx_<func>. - * - * The NOPs may not exist for the following cases: - * - * - compiler cloned functions (*.cold, *.part0, etc) - * - asm functions created with inline asm or without SYM_FUNC_START() - * - * Also, the function may already have a prefix from a previous objtool run - * (livepatch extracted functions, or manually running objtool multiple times). - * - * So return 0 if the NOPs are missing or the function already has a prefix - * symbol. */ static int create_prefix_symbol(struct objtool_file *file, struct symbol *func) { @@ -4425,10 +4303,6 @@ static int create_prefix_symbol(struct objtool_file *file, struct symbol *func) char name[SYM_NAME_LEN]; struct cfi_state *cfi; - if (!is_func_sym(func) || is_prefix_func(func) || - func->cold || func->static_call_tramp) - return 0; - if ((strlen(func->name) + sizeof("__pfx_") > SYM_NAME_LEN)) { WARN("%s: symbol name too long, can't create __pfx_ symbol", func->name); @@ -4438,59 +4312,21 @@ static int create_prefix_symbol(struct objtool_file *file, struct symbol *func) if (snprintf_check(name, SYM_NAME_LEN, "__pfx_%s", func->name)) return -1; - if (file->klp) { - struct symbol *pfx; - - pfx = find_symbol_by_offset(func->sec, func->offset - opts.prefix); - if (pfx && is_prefix_func(pfx) && !strcmp(pfx->name, name)) - return 0; - } - - insn = find_insn(file, func->sec, func->offset); - if (!insn) { - WARN("%s: can't find starting instruction", func->name); + if (!elf_create_symbol(file->elf, name, func->sec, + GELF_ST_BIND(func->sym.st_info), + GELF_ST_TYPE(func->sym.st_info), + func->offset - opts.prefix, opts.prefix)) return -1; - } - - for (prev = prev_insn_same_sec(file, insn); - prev; - prev = prev_insn_same_sec(file, prev)) { - u64 offset; - if (prev->type != INSN_NOP) - return 0; - - offset = func->offset - prev->offset; - - if (offset > opts.prefix) - return 0; - - if (offset < opts.prefix) - continue; - - if (!elf_create_symbol(file->elf, name, func->sec, - GELF_ST_BIND(func->sym.st_info), - GELF_ST_TYPE(func->sym.st_info), - prev->offset, opts.prefix)) - return -1; - - break; - } - - if (!prev) - return 0; - - if (!insn->cfi) { - /* - * This can happen if stack validation isn't enabled or the - * function is annotated with STACK_FRAME_NON_STANDARD. - */ + /* Propagate insn->cfi to the prefix code */ + insn = find_insn(file, func->sec, func->offset); + if (!insn || !insn->cfi) return 0; - } - /* Propagate insn->cfi to the prefix code */ cfi = cfi_hash_find_or_add(insn->cfi); - for (; prev != insn; prev = next_insn_same_sec(file, prev)) + for (prev = find_insn(file, func->sec, func->offset - opts.prefix); + prev && prev != insn; + prev = next_insn_same_sec(file, prev)) prev->cfi = cfi; return 0; @@ -4498,15 +4334,20 @@ static int create_prefix_symbol(struct objtool_file *file, struct symbol *func) static int create_prefix_symbols(struct objtool_file *file) { - struct section *sec; + struct section *pfe_sec; struct symbol *func; + struct reloc *reloc; - for_each_sec(file->elf, sec) { - if (!is_text_sec(sec)) + for_each_sec(file->elf, pfe_sec) { + if (strcmp(pfe_sec->name, "__patchable_function_entries")) + continue; + if (!pfe_sec->rsec) continue; - sec_for_each_sym(sec, func) { - if (create_prefix_symbol(file, func)) + for_each_reloc(pfe_sec->rsec, reloc) { + func = find_func_by_offset(reloc->sym->sec, + reloc->sym->offset + reloc_addend(reloc) + opts.prefix); + if (func && create_prefix_symbol(file, func)) return -1; } } @@ -4526,7 +4367,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, return 1; } - if (sym->pfunc != sym || sym->alias != sym) + if (sym->pfunc != sym || is_alias_sym(sym)) return 0; insn = find_insn(file, sec, sym->offset); @@ -4538,9 +4379,6 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, func = insn_func(insn); - if (opts.checksum) - checksum_init(func); - if (opts.trace && !fnmatch(opts.trace, sym->name, 0)) { trace_enable(); TRACE("%s: validation begin\n", sym->name); @@ -4553,9 +4391,6 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, TRACE("%s: validation %s\n\n", sym->name, ret ? "failed" : "end"); trace_disable(); - if (opts.checksum) - checksum_finish(func); - return ret; } @@ -4948,7 +4783,7 @@ struct insn_chunk { * which can trigger more allocations for .debug_* sections whose data hasn't * been read yet. */ -static void free_insns(struct objtool_file *file) +void free_insns(struct objtool_file *file) { struct instruction *insn; struct insn_chunk *chunks = NULL, *chunk; @@ -4995,26 +4830,7 @@ int check(struct objtool_file *file) objtool_disas_ctx = disas_ctx; } - arch_initial_func_cfi_state(&initial_func_cfi); - init_cfi_state(&init_cfi); - init_cfi_state(&func_cfi); - set_func_state(&func_cfi); - init_cfi_state(&force_undefined_cfi); - force_undefined_cfi.force_undefined = true; - - if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) { - ret = -1; - goto out; - } - - cfi_hash_add(&init_cfi); - cfi_hash_add(&func_cfi); - - ret = checksum_debug_init(file); - if (ret) - goto out; - - ret = decode_sections(file); + ret = decode_file(file); if (ret) goto out; @@ -5064,12 +4880,6 @@ int check(struct objtool_file *file) goto out; } - if (opts.cfi) { - ret = create_cfi_sections(file); - if (ret) - goto out; - } - if (opts.rethunk) { ret = create_return_sites_sections(file); if (ret) @@ -5089,9 +4899,21 @@ int check(struct objtool_file *file) } if (opts.prefix) { - ret = create_prefix_symbols(file); - if (ret) - goto out; + if (!opts.cfi) { + ret = create_prefix_symbols(file); + if (ret) + goto out; + } else { + ret = grow_cfi_symbols(file); + if (ret) + goto out; + + if (opts.fineibt) { + ret = create_cfi_sections(file); + if (ret) + goto out; + } + } } if (opts.ibt) { @@ -5103,12 +4925,6 @@ int check(struct objtool_file *file) if (opts.noabs) warnings += check_abs_references(file); - if (opts.checksum) { - ret = create_sym_checksum_section(file); - if (ret) - goto out; - } - if (opts.orc && nr_insns) { ret = orc_create(file); if (ret) diff --git a/tools/objtool/disas.c b/tools/objtool/disas.c index 59090234af190..e6a54a83605c9 100644 --- a/tools/objtool/disas.c +++ b/tools/objtool/disas.c @@ -210,7 +210,7 @@ static bool disas_print_addr_alt(bfd_vma addr, struct disassemble_info *dinfo) offset = addr - alt_group->first_insn->offset; addr = orig_first_insn->offset + offset; - sym = orig_first_insn->sym; + sym = insn_sym(orig_first_insn); disas_print_addr_sym(orig_first_insn->sec, sym, addr, dinfo); @@ -222,15 +222,13 @@ static void disas_print_addr_noreloc(bfd_vma addr, { struct disas_context *dctx = dinfo->application_data; struct instruction *insn = dctx->insn; - struct symbol *sym = NULL; + struct symbol *sym = insn_sym(insn); if (disas_print_addr_alt(addr, dinfo)) return; - if (insn->sym && addr >= insn->sym->offset && - addr < insn->sym->offset + insn->sym->len) { - sym = insn->sym; - } + if (sym && (addr < sym->offset || addr >= sym->offset + sym->len)) + sym = NULL; disas_print_addr_sym(insn->sec, sym, addr, dinfo); } @@ -291,9 +289,9 @@ static void disas_print_address(bfd_vma addr, struct disassemble_info *dinfo) * up. So check it first. */ jump_dest = insn->jump_dest; - if (jump_dest && jump_dest->sym && jump_dest->offset == addr) { + if (jump_dest && insn_sym(jump_dest) && jump_dest->offset == addr) { if (!disas_print_addr_alt(addr, dinfo)) - disas_print_addr_sym(jump_dest->sec, jump_dest->sym, + disas_print_addr_sym(jump_dest->sec, insn_sym(jump_dest), addr, dinfo); return; } @@ -768,8 +766,8 @@ static int disas_alt_jump(struct disas_alt *dalt) if (orig_insn->len == 5) suffix[0] = 'q'; str = strfmt("jmp%-3s %lx <%s+0x%lx>", suffix, - dest_insn->offset, dest_insn->sym->name, - dest_insn->offset - dest_insn->sym->offset); + dest_insn->offset, insn_sym(dest_insn)->name, + dest_insn->offset - insn_sym(dest_insn)->offset); nops = 0; } else { str = strfmt("nop%d", orig_insn->len); @@ -794,8 +792,8 @@ static int disas_alt_extable(struct disas_alt *dalt) alt_insn = dalt->alt->insn; str = strfmt("resume at 0x%lx <%s+0x%lx>", - alt_insn->offset, alt_insn->sym->name, - alt_insn->offset - alt_insn->sym->offset); + alt_insn->offset, insn_sym(alt_insn)->name, + alt_insn->offset - insn_sym(alt_insn)->offset); if (!str) return -1; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index f3df2bde119fc..33c95a74a51bd 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -27,27 +27,16 @@ static ssize_t demangled_name_len(const char *name); -static inline u32 str_hash(const char *str) -{ - return jhash(str, strlen(str), 0); -} - -static inline u32 str_hash_demangled(const char *str) +u32 str_hash_demangled(const char *str) { return jhash(str, demangled_name_len(str), 0); } -#define __elf_table(name) (elf->name##_hash) -#define __elf_bits(name) (elf->name##_bits) - -#define __elf_table_entry(name, key) \ - __elf_table(name)[hash_min(key, __elf_bits(name))] - #define elf_hash_add(name, node, key) \ ({ \ struct elf_hash_node *__node = node; \ - __node->next = __elf_table_entry(name, key); \ - __elf_table_entry(name, key) = __node; \ + __node->next = __elf_table_entry(elf, name, key); \ + __elf_table_entry(elf, name, key) = __node; \ }) static inline void __elf_hash_del(struct elf_hash_node *node, @@ -69,30 +58,20 @@ static inline void __elf_hash_del(struct elf_hash_node *node, } #define elf_hash_del(name, node, key) \ - __elf_hash_del(node, &__elf_table_entry(name, key)) - -#define elf_list_entry(ptr, type, member) \ -({ \ - typeof(ptr) __ptr = (ptr); \ - __ptr ? container_of(__ptr, type, member) : NULL; \ -}) - -#define elf_hash_for_each_possible(name, obj, member, key) \ - for (obj = elf_list_entry(__elf_table_entry(name, key), typeof(*obj), member); \ - obj; \ - obj = elf_list_entry(obj->member.next, typeof(*(obj)), member)) + __elf_hash_del(node, &__elf_table_entry(elf, name, key)) #define elf_alloc_hash(name, size) \ ({ \ - __elf_bits(name) = max(10, ilog2(size)); \ - __elf_table(name) = mmap(NULL, sizeof(struct elf_hash_node *) << __elf_bits(name), \ + __elf_bits(elf, name) = max(10, ilog2(size)); \ + __elf_table(elf, name) = mmap(NULL, \ + sizeof(struct elf_hash_node *) << __elf_bits(elf, name), \ PROT_READ|PROT_WRITE, \ MAP_PRIVATE|MAP_ANON, -1, 0); \ - if (__elf_table(name) == (void *)-1L) { \ + if (__elf_table(elf, name) == (void *)-1L) { \ ERROR_GLIBC("mmap fail " #name); \ - __elf_table(name) = NULL; \ + __elf_table(elf, name) = NULL; \ } \ - __elf_table(name); \ + __elf_table(elf, name); \ }) static inline unsigned long __sym_start(struct symbol *s) @@ -141,7 +120,7 @@ struct section *find_section_by_name(const struct elf *elf, const char *name) { struct section *sec; - elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) { + elf_hash_for_each_possible(elf, section_name, sec, name_hash, str_hash(name)) { if (!strcmp(sec->name, name)) return sec; } @@ -154,7 +133,7 @@ static struct section *find_section_by_index(struct elf *elf, { struct section *sec; - elf_hash_for_each_possible(section, sec, hash, idx) { + elf_hash_for_each_possible(elf, section, sec, hash, idx) { if (sec->idx == idx) return sec; } @@ -166,7 +145,7 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) { struct symbol *sym; - elf_hash_for_each_possible(symbol, sym, hash, idx) { + elf_hash_for_each_possible(elf, symbol, sym, hash, idx) { if (sym->idx == idx) return sym; } @@ -229,6 +208,20 @@ struct symbol *find_symbol_containing(const struct section *sec, unsigned long o } /* + * Also match the symbol end address which can be used for a bounds comparison. + */ +struct symbol *find_symbol_containing_inclusive(const struct section *sec, + unsigned long offset) +{ + struct symbol *sym = find_symbol_containing(sec, offset); + + if (!sym && offset) + sym = find_symbol_containing(sec, offset - 1); + + return sym; +} + +/* * Returns size of hole starting at @offset. */ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) @@ -285,7 +278,7 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name) { struct symbol *sym; - elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) { + elf_hash_for_each_possible(elf, symbol_name, sym, name_hash, str_hash(name)) { if (!strcmp(sym->name, name)) return sym; } @@ -300,7 +293,7 @@ static struct symbol *find_local_symbol_by_file_and_name(const struct elf *elf, { struct symbol *sym; - elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash_demangled(name)) { + elf_hash_for_each_possible(elf, symbol_name, sym, name_hash, str_hash_demangled(name)) { if (sym->bind == STB_LOCAL && sym->file == file && !strcmp(sym->name, name)) { return sym; @@ -314,7 +307,7 @@ struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *nam { struct symbol *sym; - elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash_demangled(name)) { + elf_hash_for_each_possible(elf, symbol_name, sym, name_hash, str_hash_demangled(name)) { if (!strcmp(sym->name, name) && !is_local_sym(sym)) return sym; } @@ -322,21 +315,9 @@ struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *nam return NULL; } -void iterate_global_symbol_by_demangled_name(const struct elf *elf, - const char *demangled_name, - void (*process)(struct symbol *sym, void *data), - void *data) -{ - struct symbol *sym; - - elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(demangled_name)) { - if (!strcmp(sym->demangled_name, demangled_name) && !is_local_sym(sym)) - process(sym, data); - } -} - +/* If there are multiple matches, return the first one in the range */ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, - unsigned long offset, unsigned int len) + unsigned long offset, unsigned int len) { struct reloc *reloc, *r = NULL; struct section *rsec; @@ -347,7 +328,7 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se return NULL; for_offset_range(o, offset, offset + len) { - elf_hash_for_each_possible(reloc, reloc, hash, + elf_hash_for_each_possible(elf, reloc, reloc, hash, sec_offset_hash(rsec, o)) { if (reloc->sec != rsec) continue; @@ -358,11 +339,11 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se r = reloc; } } - if (r) + if (r && (reloc_offset(r) & OFFSET_STRIDE_MASK) == o) return r; } - return NULL; + return r; } struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset) @@ -668,7 +649,7 @@ static int read_symbols(struct elf *elf) if (is_file_sym(sym)) file = sym; - else if (sym->bind == STB_LOCAL) + else if (sym->bind == STB_LOCAL && !is_sec_sym(sym)) sym->file = file; } @@ -1016,6 +997,26 @@ non_local: return sym; } +int elf_write_symbol(struct elf *elf, struct symbol *sym) +{ + struct section *symtab, *symtab_shndx; + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + ERROR("no .symtab"); + return -1; + } + + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + + if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) + return -1; + + mark_sec_changed(elf, symtab, true); + + return 0; +} + struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec) { struct symbol *sym = calloc(1, sizeof(*sym)); @@ -1172,6 +1173,17 @@ static int read_relocs(struct elf *elf) return 0; } +static void mark_rodata(struct elf *elf) +{ + struct section *sec; + + for_each_sec(elf, sec) { + if ((strstarts(sec->name, ".rodata") && !strstr(sec->name, ".str1.")) || + strstarts(sec->name, ".data.rel.ro")) + sec->rodata = true; + } +} + struct elf *elf_open_read(const char *name, int flags) { struct elf *elf; @@ -1222,6 +1234,8 @@ struct elf *elf_open_read(const char *name, int flags) if (read_sections(elf)) goto err; + mark_rodata(elf); + if (read_symbols(elf)) goto err; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 8866158975fcb..96d828a8401fd 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -79,6 +79,9 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec unsigned long offset, unsigned int maxlen, struct instruction *insn); +size_t arch_jump_opcode_bytes(struct objtool_file *file, struct instruction *insn, + unsigned char *buf); + bool arch_callee_saved_reg(unsigned char reg); unsigned long arch_jump_destination(struct instruction *insn); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index b9e229ed4dc05..e844e9c82b7b2 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,8 +9,8 @@ struct opts { /* actions: */ - bool cfi; bool checksum; + const char *disas; bool dump_orc; bool hack_jump_label; bool hack_noinstr; @@ -20,6 +20,7 @@ struct opts { bool noabs; bool noinstr; bool orc; + int prefix; bool retpoline; bool rethunk; bool unret; @@ -27,14 +28,14 @@ struct opts { bool stackval; bool static_call; bool uaccess; - int prefix; - const char *disas; /* options: */ bool backtrace; bool backup; + bool cfi; const char *debug_checksum; bool dryrun; + bool fineibt; bool link; bool mnop; bool module; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 5f2f77bd9b416..063f5985fecd0 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -68,6 +68,7 @@ struct instruction { s8 instr; u32 idx : INSN_CHUNK_BITS, + immediate_len : 4, dead_end : 1, ignore_alts : 1, hint : 1, @@ -81,7 +82,7 @@ struct instruction { hole : 1, fake : 1, trace : 1; - /* 9 bit hole */ + /* 4 bit hole */ struct alt_group *alt_group; struct instruction *jump_dest; @@ -94,14 +95,30 @@ struct instruction { }; }; struct alternative *alts; - struct symbol *sym; + struct symbol *_sym; struct stack_op *stack_ops; struct cfi_state *cfi; }; +/* + * Return the symbol associated with an instruction. For alternative + * replacements, return the symbol of the original code being replaced rather + * than NULL. insn->_sym reflects the actual location in the ELF file. + */ +static inline struct symbol *insn_sym(struct instruction *insn) +{ + struct symbol *sym = insn->_sym; + + if ((!sym || !is_func_sym(sym)) && + insn->alt_group && insn->alt_group->orig_group) + sym = insn->alt_group->orig_group->first_insn->_sym; + + return sym; +} + static inline struct symbol *insn_func(struct instruction *insn) { - struct symbol *sym = insn->sym; + struct symbol *sym = insn_sym(insn); if (sym && sym->type != STT_FUNC) sym = NULL; @@ -144,6 +161,12 @@ struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruction *insn); +struct instruction *next_insn_same_func(struct objtool_file *file, struct instruction *insn); + +#define func_for_each_insn(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn; \ + insn = next_insn_same_func(file, insn)) #define sec_for_each_insn(file, _sec, insn) \ for (insn = find_insn(file, _sec, 0); \ @@ -155,6 +178,11 @@ struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruc insn && insn->offset < sym->offset + sym->len; \ insn = next_insn_same_sec(file, insn)) +struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn); + +int decode_file(struct objtool_file *file); +void free_insns(struct objtool_file *file); + const char *objtool_disas_insn(struct instruction *insn); extern size_t sym_name_max_len; diff --git a/tools/objtool/include/objtool/checksum.h b/tools/objtool/include/objtool/checksum.h index 7fe21608722ac..d46293f547166 100644 --- a/tools/objtool/include/objtool/checksum.h +++ b/tools/objtool/include/objtool/checksum.h @@ -6,37 +6,54 @@ #ifdef BUILD_KLP -static inline void checksum_init(struct symbol *func) +static inline void checksum_init(struct symbol *sym) { - if (func && !func->csum.state) { - func->csum.state = XXH3_createState(); - XXH3_64bits_reset(func->csum.state); + if (sym && !sym->csum.state) { + sym->csum.state = XXH3_createState(); + XXH3_64bits_reset(sym->csum.state); } } -static inline void checksum_update(struct symbol *func, - struct instruction *insn, - const void *data, size_t size) +static inline void __checksum_update(struct symbol *sym, const void *data, + size_t size) { - XXH3_64bits_update(func->csum.state, data, size); - dbg_checksum(func, insn, XXH3_64bits_digest(func->csum.state)); + XXH3_64bits_update(sym->csum.state, data, size); } -static inline void checksum_finish(struct symbol *func) +static inline void __checksum_update_insn(struct symbol *sym, + struct instruction *insn, + const void *data, size_t size) { - if (func && func->csum.state) { - func->csum.checksum = XXH3_64bits_digest(func->csum.state); - func->csum.state = NULL; + __checksum_update(sym, data, size); + dbg_checksum_insn(sym, insn, XXH3_64bits_digest(sym->csum.state)); +} + +static inline void __checksum_update_object(struct symbol *sym, + unsigned long offset, + const char *what, const void *data, + size_t size) +{ + __checksum_update(sym, &offset, sizeof(offset)); + __checksum_update(sym, data, size); + dbg_checksum_object(sym, offset, what, XXH3_64bits_digest(sym->csum.state)); +} + +static inline void checksum_finish(struct symbol *sym) +{ + if (sym && sym->csum.state) { + sym->csum.checksum = XXH3_64bits_digest(sym->csum.state); + XXH3_freeState(sym->csum.state); + sym->csum.state = NULL; } } +int calculate_checksums(struct objtool_file *file); +int create_sym_checksum_section(struct objtool_file *file); + #else /* !BUILD_KLP */ -static inline void checksum_init(struct symbol *func) {} -static inline void checksum_update(struct symbol *func, - struct instruction *insn, - const void *data, size_t size) {} -static inline void checksum_finish(struct symbol *func) {} +static inline int calculate_checksums(struct objtool_file *file) { return -ENOSYS; } +static inline int create_sym_checksum_section(struct objtool_file *file) { return -EINVAL; } #endif /* !BUILD_KLP */ diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 25573e5af76ef..d9c44df9cc76a 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -21,6 +21,13 @@ #define SEC_NAME_LEN 1024 #define SYM_NAME_LEN 512 +static inline u32 str_hash(const char *str) +{ + return jhash(str, strlen(str), 0); +} + +u32 str_hash_demangled(const char *str); + #define bswap_if_needed(elf, val) __bswap_if_needed(&elf->ehdr, val) #ifdef LIBELF_USE_DEPRECATED @@ -89,6 +96,7 @@ struct symbol { u8 changed : 1; u8 included : 1; u8 klp : 1; + u8 dont_correlate : 1; struct list_head pv_target; struct reloc *relocs; struct section *group_sec; @@ -130,6 +138,23 @@ struct elf { struct symbol *symbol_data; }; +#define __elf_table(elf, name) ((elf)->name##_hash) +#define __elf_bits(elf, name) ((elf)->name##_bits) + +#define __elf_table_entry(elf, name, key) \ + __elf_table(elf, name)[hash_min(key, __elf_bits(elf, name))] + +#define elf_list_entry(ptr, type, member) \ +({ \ + typeof(ptr) __ptr = (ptr); \ + __ptr ? container_of(__ptr, type, member) : NULL; \ +}) + +#define elf_hash_for_each_possible(elf, name, obj, member, key) \ + for (obj = elf_list_entry(__elf_table_entry(elf, name, key), typeof(*obj), member); \ + obj; \ + obj = elf_list_entry(obj->member.next, typeof(*(obj)), member)) + struct elf *elf_open_read(const char *name, int flags); struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name); @@ -175,6 +200,7 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec, struct symbol *sym, s64 addend); +int elf_write_symbol(struct elf *elf, struct symbol *sym); int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); @@ -186,10 +212,8 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name); -void iterate_global_symbol_by_demangled_name(const struct elf *elf, const char *demangled_name, - void (*process)(struct symbol *sym, void *data), - void *data); struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); +struct symbol *find_symbol_containing_inclusive(const struct section *sec, unsigned long offset); int find_symbol_hole_containing(const struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, @@ -276,11 +300,21 @@ static inline bool is_local_sym(struct symbol *sym) return sym->bind == STB_LOCAL; } +static inline bool is_alias_sym(struct symbol *sym) +{ + return sym->alias != sym; +} + static inline bool is_prefix_func(struct symbol *sym) { return sym->prefix; } +static inline bool is_cold_func(struct symbol *sym) +{ + return sym->cold; +} + static inline bool is_reloc_sec(struct section *sec) { return sec->sh.sh_type == SHT_RELA || sec->sh.sh_type == SHT_REL; @@ -296,6 +330,11 @@ static inline bool is_text_sec(struct section *sec) return sec->sh.sh_flags & SHF_EXECINSTR; } +static inline bool is_rodata_sec(struct section *sec) +{ + return sec->rodata; +} + static inline bool sec_changed(struct section *sec) { return sec->_changed; @@ -468,6 +507,16 @@ static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next) #define for_each_sym_continue(elf, sym) \ list_for_each_entry_continue(sym, &elf->symbols, global_list) +#define for_each_sym_by_name(elf, _name, sym) \ + elf_hash_for_each_possible(elf, symbol_name, sym, name_hash, \ + str_hash_demangled(_name)) \ + if (strcmp(sym->name, _name)) {} else + +#define for_each_sym_by_demangled_name(elf, name, sym) \ + elf_hash_for_each_possible(elf, symbol_name, sym, name_hash, \ + str_hash(name)) \ + if (strcmp(sym->demangled_name, name)) {} else + #define rsec_next_reloc(rsec, reloc) \ reloc_idx(reloc) < sec_num_entries(rsec) - 1 ? reloc + 1 : NULL @@ -491,10 +540,10 @@ static inline struct symbol *get_func_prefix(struct symbol *func) { struct symbol *prev; - if (!is_func_sym(func)) + if (!is_func_sym(func) || !func->offset) return NULL; - prev = sec_prev_sym(func); + prev = find_func_containing(func->sec, func->offset - 1); if (prev && is_prefix_func(prev)) return prev; diff --git a/tools/objtool/include/objtool/klp.h b/tools/objtool/include/objtool/klp.h index e32e5e8bc6312..6f60cf05db864 100644 --- a/tools/objtool/include/objtool/klp.h +++ b/tools/objtool/include/objtool/klp.h @@ -29,6 +29,7 @@ struct klp_reloc { u32 type; }; +int cmd_klp_checksum(int argc, const char **argv); int cmd_klp_diff(int argc, const char **argv); int cmd_klp_post_link(int argc, const char **argv); diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index fa8b7d292e839..870e147f3a565 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -77,13 +77,13 @@ static inline char *offstr(struct section *sec, unsigned long offset) #define WARN_INSN(insn, format, ...) \ ({ \ struct instruction *_insn = (insn); \ - if (!_insn->sym || !_insn->sym->warned) { \ + if (!insn_sym(_insn) || !insn_sym(_insn)->warned) { \ WARN_FUNC(_insn->sec, _insn->offset, format, \ ##__VA_ARGS__); \ BT_INSN(_insn, ""); \ } \ - if (_insn->sym) \ - _insn->sym->warned = 1; \ + if (insn_sym(_insn)) \ + insn_sym(_insn)->warned = 1; \ }) #define BT_INSN(insn, format, ...) \ @@ -109,7 +109,7 @@ static inline char *offstr(struct section *sec, unsigned long offset) #define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__) #define ERROR_INSN(insn, format, ...) ERROR_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__) -extern bool debug; +extern bool debug, debug_correlate, debug_clone; extern int indent; static inline void unindent(int *unused) { indent--; } @@ -130,32 +130,39 @@ static inline void unindent(int *unused) { indent--; } objname ? ": " : "", \ ##__VA_ARGS__) -#define dbg(args...) \ +#define dbg_checksum_insn(func, insn, checksum) \ ({ \ - if (unlikely(debug)) \ + if (unlikely(func->debug_checksum)) { \ + char *insn_off = offstr(insn->sec, insn->offset); \ + __dbg("checksum: %s(): %s %016llx", \ + func->name, insn_off, (unsigned long long)checksum);\ + free(insn_off); \ + } \ +}) + +#define dbg_checksum_object(sym, offset, what, checksum) \ +({ \ + if (unlikely(sym->debug_checksum)) \ + __dbg("checksum: %s+0x%lx: %s %016llx", \ + sym->name, offset, what, \ + (unsigned long long)checksum); \ +}) + +#define dbg_correlate(args...) \ +({ \ + if (unlikely(debug_correlate)) \ __dbg(args); \ }) -#define __dbg_indent(format, ...) \ +#define __dbg_clone(format, ...) \ ({ \ - if (unlikely(debug)) \ + if (unlikely(debug_clone)) \ __dbg("%*s" format, indent * 8, "", ##__VA_ARGS__); \ }) -#define dbg_indent(args...) \ +#define dbg_clone(args...) \ int __cleanup(unindent) __dummy_##__COUNTER__; \ - __dbg_indent(args); \ + __dbg_clone(args); \ indent++ -#define dbg_checksum(func, insn, checksum) \ -({ \ - if (unlikely(insn->sym && insn->sym->pfunc && \ - insn->sym->pfunc->debug_checksum)) { \ - char *insn_off = offstr(insn->sec, insn->offset); \ - __dbg("checksum: %s %s %016llx", \ - func->name, insn_off, (unsigned long long)checksum);\ - free(insn_off); \ - } \ -}) - #endif /* _WARN_H */ diff --git a/tools/objtool/klp-checksum.c b/tools/objtool/klp-checksum.c new file mode 100644 index 0000000000000..b8e47f28997e9 --- /dev/null +++ b/tools/objtool/klp-checksum.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <string.h> +#include <subcmd/parse-options.h> + +#include <objtool/arch.h> +#include <objtool/builtin.h> +#include <objtool/check.h> +#include <objtool/elf.h> +#include <objtool/klp.h> +#include <objtool/objtool.h> +#include <objtool/warn.h> +#include <objtool/checksum.h> + +static int checksum_debug_init(struct objtool_file *file) +{ + char *dup, *s; + + if (!opts.debug_checksum) + return 0; + + dup = strdup(opts.debug_checksum); + if (!dup) { + ERROR_GLIBC("strdup"); + return -1; + } + + s = dup; + while (*s) { + bool found = false; + struct symbol *sym; + char *comma; + + comma = strchr(s, ','); + if (comma) + *comma = '\0'; + + for_each_sym_by_name(file->elf, s, sym) { + if (!is_func_sym(sym) && !is_object_sym(sym)) + continue; + sym->debug_checksum = 1; + found = true; + } + + if (!found) + WARN("--debug-checksum: can't find '%s'", s); + + if (!comma) + break; + + s = comma + 1; + } + + free(dup); + return 0; +} + +static void checksum_update_insn(struct objtool_file *file, struct symbol *func, + struct instruction *insn) +{ + struct reloc *reloc = insn_reloc(file, insn); + struct alternative *alt; + unsigned long offset; + struct symbol *sym; + static bool in_alt; + + if (insn->fake) + return; + + if (!reloc) { + struct symbol *call_dest = insn_call_dest(insn); + struct instruction *jump_dest = insn->jump_dest; + + /* + * For a jump/call non-relocated dest offset embedded in the + * instruction, the offset may vary due to changes in + * surrounding code. Just hash the opcode and a + * position-independent representation of the destination. + */ + + if (call_dest || jump_dest) { + unsigned char buf[16]; + size_t len; + + len = arch_jump_opcode_bytes(file, insn, buf); + __checksum_update_insn(func, insn, buf, len); + + if (call_dest) { + __checksum_update_insn(func, insn, call_dest->demangled_name, + strlen(call_dest->demangled_name)); + + } else if (jump_dest) { + struct symbol *dest_sym; + unsigned long offset; + + /* + * use insn->_sym instead of insn_sym() here. + * For alternative replacements, the latter + * would give the function of the code being + * replaced. + */ + dest_sym = jump_dest->_sym; + if (!dest_sym) + goto alts; + + __checksum_update_insn(func, insn, dest_sym->demangled_name, + strlen(dest_sym->demangled_name)); + + offset = jump_dest->offset - dest_sym->offset; + __checksum_update_insn(func, insn, &offset, sizeof(offset)); + } + + goto alts; + } + } + + __checksum_update_insn(func, insn, insn->sec->data->d_buf + insn->offset, insn->len); + + if (!reloc) + goto alts; + + sym = reloc->sym; + offset = arch_insn_adjusted_addend(insn, reloc); + + if (is_string_sec(sym->sec)) { + char *str; + + str = sym->sec->data->d_buf + sym->offset + offset; + __checksum_update_insn(func, insn, str, strlen(str)); + goto alts; + } + + if (is_sec_sym(sym)) { + sym = find_symbol_containing(reloc->sym->sec, offset); + if (!sym) + goto alts; + + offset -= sym->offset; + } + + __checksum_update_insn(func, insn, sym->demangled_name, + strlen(sym->demangled_name)); + __checksum_update_insn(func, insn, &offset, sizeof(offset)); + +alts: + for (alt = insn->alts; alt; alt = alt->next) { + struct alt_group *alt_group = alt->insn->alt_group; + + /* Prevent __ex_table recursion, e.g. LOAD_SEGMENT() */ + if (in_alt) + break; + in_alt = true; + + __checksum_update_insn(func, insn, &alt->type, + sizeof(alt->type)); + + if (alt_group && alt_group->orig_group) { + struct instruction *alt_insn; + + __checksum_update_insn(func, insn, &alt_group->feature,sizeof(alt_group->feature)); + + for (alt_insn = alt->insn; alt_insn; alt_insn = next_insn_same_sec(file, alt_insn)) { + checksum_update_insn(file, func, alt_insn); + if (!alt_group->last_insn || alt_insn == alt_group->last_insn) + break; + } + } else { + checksum_update_insn(file, func, alt->insn); + } + + in_alt = false; + } +} + +static void checksum_update_object(struct objtool_file *file, struct symbol *sym) +{ + struct reloc *reloc; + + __checksum_update_object(sym, 0, "len", &sym->len, sizeof(sym->len)); + + if (sym->sec->data->d_buf) + __checksum_update_object(sym, 0, "data", + sym->sec->data->d_buf + sym->offset, + sym->len); + + sym_for_each_reloc(file->elf, sym, reloc) { + unsigned long sym_offset = reloc_offset(reloc) - sym->offset; + struct symbol *target = reloc->sym; + s64 offset; + + offset = reloc_addend(reloc); + + if (is_string_sec(target->sec)) { + char *str; + + str = target->sec->data->d_buf + target->offset + offset; + __checksum_update_object(sym, sym_offset, + "reloc string", str, strlen(str)); + continue; + } + + if (is_sec_sym(target)) { + target = find_symbol_containing(reloc->sym->sec, offset); + if (!target) + continue; + + offset -= target->offset; + } + + __checksum_update_object(sym, sym_offset, "reloc name", + target->demangled_name, + strlen(target->demangled_name)); + __checksum_update_object(sym, sym_offset, "reloc addend", + &offset, sizeof(offset)); + } +} + +int calculate_checksums(struct objtool_file *file) +{ + struct instruction *insn; + struct symbol *sym; + + if (checksum_debug_init(file)) + return -1; + + for_each_sym(file->elf, sym) { + + /* + * Skip cold subfunctions and aliases: they share the + * parent's checksum via func_for_each_insn() which + * follows func->cfunc into the cold subfunction. + */ + if (is_cold_func(sym) || is_alias_sym(sym) || !sym->len || + !sym->sec || !sym->sec->data) + continue; + + if (is_func_sym(sym)) { + checksum_init(sym); + func_for_each_insn(file, sym, insn) + checksum_update_insn(file, sym, insn); + checksum_finish(sym); + + } else if (is_object_sym(sym)) { + checksum_init(sym); + checksum_update_object(file, sym); + checksum_finish(sym); + } + + } + + return 0; +} + +int create_sym_checksum_section(struct objtool_file *file) +{ + struct section *sec; + struct symbol *sym; + unsigned int idx = 0; + struct sym_checksum *checksum; + size_t entsize = sizeof(struct sym_checksum); + + sec = find_section_by_name(file->elf, ".discard.sym_checksum"); + if (sec) { + if (!opts.dryrun) + WARN("file already has .discard.sym_checksum section, skipping"); + + return 0; + } + + for_each_sym(file->elf, sym) + if (sym->csum.checksum) + idx++; + + sec = elf_create_section_pair(file->elf, ".discard.sym_checksum", entsize, + idx, idx); + if (!sec) + return -1; + + idx = 0; + for_each_sym(file->elf, sym) { + if (!sym->csum.checksum) + continue; + + if (!elf_init_reloc(file->elf, sec->rsec, idx, idx * entsize, + sym, 0, R_TEXT64)) + return -1; + + checksum = (struct sym_checksum *)sec->data->d_buf + idx; + checksum->addr = 0; /* reloc */ + checksum->checksum = sym->csum.checksum; + + mark_sec_changed(file->elf, sec, true); + + idx++; + } + + return 0; +} + +static const char * const klp_checksum_usage[] = { + "objtool klp checksum [<options>] file.o", + NULL, +}; + +int cmd_klp_checksum(int argc, const char **argv) +{ + struct objtool_file *file; + int ret; + + const struct option options[] = { + OPT_STRING(0, "debug-checksum", &opts.debug_checksum, "syms", "enable checksum debug output"), + OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"), + OPT_END(), + }; + + argc = parse_options(argc, argv, options, klp_checksum_usage, 0); + if (argc != 1) + usage_with_options(klp_checksum_usage, options); + + opts.checksum = true; + + objname = argv[0]; + + file = objtool_open_read(objname); + if (!file) + return 1; + + ret = decode_file(file); + if (ret) + goto out; + + ret = calculate_checksums(file); + if (ret) + goto out; + + ret = create_sym_checksum_section(file); + +out: + free_insns(file); + + if (ret) + return ret; + + if (!opts.dryrun && file->elf->changed && elf_write(file->elf)) + return 1; + + return elf_close(file->elf); +} diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c index 0b0d1503851ff..f8787d7d14547 100644 --- a/tools/objtool/klp-diff.c +++ b/tools/objtool/klp-diff.c @@ -33,6 +33,9 @@ struct export { char *mod, *sym; }; +bool debug, debug_correlate, debug_clone; +int indent; + static const char * const klp_diff_usage[] = { "objtool klp diff [<options>] <in1.o> <in2.o> <out.o>", NULL, @@ -40,17 +43,14 @@ static const char * const klp_diff_usage[] = { static const struct option klp_diff_options[] = { OPT_GROUP("Options:"), - OPT_BOOLEAN('d', "debug", &debug, "enable debug output"), + OPT_BOOLEAN('d', "debug", &debug, "enable all debug output"), + OPT_BOOLEAN(0, "debug-correlate", &debug_correlate, "enable correlation debug output"), + OPT_BOOLEAN(0, "debug-clone", &debug_clone, "enable cloning debug output"), OPT_END(), }; static DEFINE_HASHTABLE(exports, 15); -static inline u32 str_hash(const char *str) -{ - return jhash(str, strlen(str), 0); -} - static char *escape_str(const char *orig) { size_t len = 0; @@ -171,7 +171,7 @@ static int read_sym_checksums(struct elf *elf) sec = find_section_by_name(elf, ".discard.sym_checksum"); if (!sec) { - ERROR("'%s' missing .discard.sym_checksum section, file not processed by 'objtool --checksum'?", + ERROR("'%s' missing .discard.sym_checksum section, file not processed by 'objtool klp checksum'?", elf->name); return -1; } @@ -206,7 +206,7 @@ static int read_sym_checksums(struct elf *elf) return -1; } - if (is_func_sym(sym)) + if (is_func_sym(sym) || is_object_sym(sym)) sym->csum.checksum = sym_checksum->checksum; } @@ -242,25 +242,39 @@ static struct symbol *next_file_symbol(struct elf *elf, struct symbol *sym) static bool is_uncorrelated_static_local(struct symbol *sym) { static const char * const vars[] = { - "__already_done.", - "__func__.", - "__key.", - "__warned.", - "_entry.", - "_entry_ptr.", - "_rs.", - "descriptor.", - "CSWTCH.", + "__already_done", + "__func__", + "__key", + "__warned", + "_entry", + "_entry_ptr", + "_rs", + "descriptor", + "CSWTCH", }; + const char *dot; if (!is_object_sym(sym) || !is_local_sym(sym)) return false; - if (!strcmp(sym->sec->name, ".data.once")) + /* WARN_ONCE, etc */ + if (!strcmp(sym->sec->name, ".data..once")) return true; + dot = strchr(sym->name, '.'); + if (!dot) + return false; + for (int i = 0; i < ARRAY_SIZE(vars); i++) { - if (strstarts(sym->name, vars[i])) + size_t len = strlen(vars[i]); + + /* GCC: <var>.<id> */ + if (strstarts(sym->name, vars[i]) && (sym->name[len] == '.')) + return true; + + /* Clang: <func>.<var>[.<id>] */ + if (strstarts(dot + 1, vars[i]) && + (dot[1 + len] == '.' || dot[1 + len] == '\0')) return true; } @@ -268,20 +282,21 @@ static bool is_uncorrelated_static_local(struct symbol *sym) } /* - * Clang emits several useless .Ltmp_* code labels. + * .L symbols are assembler-local labels not present in kallsyms. They must + * never become KLP relocations; instead their data is cloned into the patch + * module. This covers .Ltmp* (Clang temp labels), .L__const.* (Clang local + * constants), and any other assembler-local pattern. */ -static bool is_clang_tmp_label(struct symbol *sym) +static bool is_local_label(struct symbol *sym) { - return is_notype_sym(sym) && - is_text_sec(sym->sec) && - strstarts(sym->name, ".Ltmp") && - isdigit(sym->name[5]); + return strstarts(sym->name, ".L"); } static bool is_special_section(struct section *sec) { static const char * const specials[] = { ".altinstructions", + ".kcfi_traps", ".smp_locks", "__bug_table", "__ex_table", @@ -339,6 +354,45 @@ static bool is_special_section_aux(struct section *sec) } /* + * Symbols created by ___ADDRESSABLE() are only used to convince the toolchain + * not to optimize out the referenced symbol. + */ +static bool is_addressable_sym(struct symbol *sym) +{ + return !strcmp(sym->sec->name, ".discard.addressable"); +} + +/* + * ABS symbols are typically assembly .set/.equ constants which are never + * referenced by relocations. (Exclude FILE symbols which are also SHN_ABS.) + */ +static bool is_abs_sym(struct symbol *sym) +{ + return sym->sym.st_shndx == SHN_ABS && !is_file_sym(sym); +} + +static bool is_initcall_sym(struct symbol *sym) +{ + return strstarts(sym->name, "__initcall__") || + strstarts(sym->name, "__initstub__"); +} + +/* + * Some .rodata is anonymous and can't be correlated due to there being no + * symbol names. + * + * The .rodata.cst* sections aren't technically anonymous, they're SHF_MERGE + * constant pool sections containing small fixed-size data (lookup tables, + * bitmasks) which are only read by value, so pointer equivalence isn't needed. + * They are typically referenced by UBSAN data sections. + */ +static bool is_anonymous_rodata(struct symbol *sym) +{ + return is_rodata_sec(sym->sec) && + (!is_object_sym(sym) || strstarts(sym->sec->name, ".rodata.cst")); +} + +/* * These symbols should never be correlated, so their local patched versions * are used instead of linking to the originals. */ @@ -347,96 +401,391 @@ static bool dont_correlate(struct symbol *sym) return is_file_sym(sym) || is_null_sym(sym) || is_sec_sym(sym) || + is_abs_sym(sym) || is_prefix_func(sym) || is_uncorrelated_static_local(sym) || - is_clang_tmp_label(sym) || + is_local_label(sym) || is_string_sec(sym->sec) || + is_anonymous_rodata(sym) || + is_initcall_sym(sym) || + is_addressable_sym(sym) || is_special_section(sym->sec) || - is_special_section_aux(sym->sec) || - strstarts(sym->name, "__initcall__"); + is_special_section_aux(sym->sec); } -struct process_demangled_name_data { - struct symbol *ret; - int count; -}; +static const char *llvm_suffix(const char *name) +{ + return strstr(name, ".llvm."); +} -static void process_demangled_name(struct symbol *sym, void *d) +static bool is_llvm_sym(struct symbol *sym) { - struct process_demangled_name_data *data = d; + return llvm_suffix(sym->name); +} - if (sym->twin) - return; +/* + * Determine if two symbols have compatible source file origins: + * + * - If both symbols are local, only return true if they belong to the same + * ELF file symbol. + * + * - If both symbols are global, always return true, as globals don't have + * file associations. + * + * - If they have different scopes, also return true, as the patch might have + * changed the symbol's scope. + * + * Works for both same-ELF (direct pointer compare) and cross-ELF + * (compare via file->twin) cases. + */ +static bool maybe_same_file(struct symbol *sym1, struct symbol *sym2) +{ + if (!sym1->file || !sym2->file) + return true; + if (sym1->file == sym2->file) + return true; + return sym1->file->twin == sym2->file; +} + +/* + * Similar to maybe_same_file(), but strict: no scope changes allowed. + * + * Works for both same-ELF (direct pointer compare) and cross-ELF + * (compare via file->twin) cases. + */ +static bool same_file(struct symbol *sym1, struct symbol *sym2) +{ + if (llvm_suffix(sym1->name) && llvm_suffix(sym2->name)) + return true; + if (!sym1->file && !sym2->file) + return true; + if (!sym1->file || !sym2->file) + return false; + if (sym1->file == sym2->file) + return true; + return sym1->file->twin == sym2->file; +} - data->count++; - data->ret = sym; +/* + * Is it a local symbol, or at least was it local in the translation unit + * before LLVM promoted it? + */ +static bool is_tu_local_sym(struct symbol *sym) +{ + return is_local_sym(sym) || is_llvm_sym(sym); } /* - * When there is no full name match, try match demangled_name. This would - * match original foo.llvm.123 to patched foo.llvm.456. + * Try to find sym1's twin in patched using deterministic matching. + * + * Multiple symbols can share a demangled name (e.g., static functions in + * different TUs). This function counts same-named candidates through a + * funnel of progressively tighter filters. Each level is a strict subset + * of the previous one. + * + * The widest level that yields a 1:1 match wins. Narrower levels are only + * needed when the wider level is ambiguous (count > 1). + * + * Candidates are pre-filtered by maybe_same_file(), which narrows most + * local symbols to their own TU. For example, 19 different static + * type_show() functions across vmlinux.o each see only one candidate after + * pre-filtering, so they match immediately at Level 1. + * + * Level 1 (name): Works when the demangled name is unique after + * pre-filtering. Handles most symbols: unique globals like copy_signal(), + * or per-TU locals like pcspkr_probe(). + * + * Level 2 (scope): Filters by local-vs-global (TU-local-vs-not). Example: + * parse_header() exists as both a static and a global function. Level 1 + * sees both (same demangled name), but Level 2 separates them by scope. + * + * Level 3 (file): Strict file matching via same_file(), which rejects scope + * changes. Example: LLVM-promoted foo.llvm.12345 (global, no FILE symbol) + * vs genuine local foo (has FILE symbol). Both are TU-local so Level 2 + * can't distinguish them, but same_file() rejects the pair because one has + * a file association and the other doesn't. * - * Note that, in very rare cases, it is possible to have multiple - * foo.llvm.<hash> in the same kernel. When this happens, report error and - * fail the diff. + * Level 4 (checksum): Distinguishes by function checksum. Example: + * usb_devnode.llvm.AAA and usb_devnode.llvm.BBB are two LLVM-promoted + * functions from different TUs with the same demangled name. After a TU + * change, the .llvm. hashes change but the functions themselves may be + * unchanged. Level 4 matches each to the patched candidate with the + * same checksum. */ -static int find_global_symbol_by_demangled_name(struct elf *elf, struct symbol *sym, - struct symbol **out_sym) +static struct symbol *find_twin(struct elfs *e, struct symbol *sym1) { - struct process_demangled_name_data data = {}; + struct symbol *name_last = NULL, *scope_last = NULL, + *file_last = NULL, *csum_last = NULL; + unsigned int name_orig = 0, name_patched = 0; + unsigned int scope_orig = 0, scope_patched = 0; + unsigned int file_orig = 0, file_patched = 0; + unsigned int csum_orig = 0, csum_patched = 0; + struct symbol *sym2, *match = NULL; - iterate_global_symbol_by_demangled_name(elf, sym->demangled_name, - process_demangled_name, - &data); - if (data.count > 1) { - ERROR("Multiple (%d) correlation candidates for %s", data.count, sym->name); - return -1; + /* Count orig candidates */ + for_each_sym_by_demangled_name(e->orig, sym1->demangled_name, sym2) { + if (sym2->twin || sym1->type != sym2->type || sym2->dont_correlate || + (!maybe_same_file(sym1, sym2))) + continue; + + /* Level 1: name match (widest filter) */ + name_orig++; + + /* Level 2: scope (scope changes allowed) */ + if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2)) + continue; + scope_orig++; + + /* Level 3: file (scope changes disallowed) */ + if (!same_file(sym1, sym2)) + continue; + file_orig++; + + /* Level 4: checksum (unchanged symbols) */ + if (sym1->len != sym2->len || !sym1->csum.checksum || + sym1->csum.checksum != sym2->csum.checksum) + continue; + csum_orig++; } - *out_sym = data.ret; - return 0; + + /* Count patched candidates */ + for_each_sym_by_demangled_name(e->patched, sym1->demangled_name, sym2) { + if (sym2->twin || sym1->type != sym2->type || sym2->dont_correlate || + !maybe_same_file(sym1, sym2)) + continue; + + /* Level 1 */ + name_patched++; + name_last = sym2; + + /* Level 2 */ + if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2)) + continue; + scope_patched++; + scope_last = sym2; + + /* Level 3 */ + if (!same_file(sym1, sym2)) + continue; + file_patched++; + file_last = sym2; + + /* Level 4 */ + if (sym1->len != sym2->len || !sym1->csum.checksum || + sym1->csum.checksum != sym2->csum.checksum) + continue; + csum_patched++; + csum_last = sym2; + } + + /* Return the widest level that yields a unique (1:1) match */ + if (name_orig == 1 && name_patched == 1) + match = name_last; + else if (scope_orig == 1 && scope_patched == 1) + match = scope_last; + else if (file_orig == 1 && file_patched == 1) + match = file_last; + else if (csum_orig == 1 && csum_patched == 1) + match = csum_last; + + if (!match) + return NULL; + + if (name_orig != 1 || name_patched != 1) + dbg_correlate("find_twin(): %s%s -> %s%s", + sym1->name, is_func_sym(sym1) ? "()" : "", + match->name, is_func_sym(match) ? "()" : ""); + + return match; } +struct llvm_suffix_pair { + struct hlist_node hash; + const char *orig; + const char *patched; +}; + +static DECLARE_HASHTABLE(suffix_map, 7); + /* - * For each symbol in the original kernel, find its corresponding "twin" in the - * patched kernel. + * Build a mapping of known orig-to-patched LLVM suffixes based on + * already-correlated symbol pairs. All promoted symbols from the same TU + * share the same .llvm.<hash> suffix, so one correlated pair seeds the map + * for the entire TU. */ -static int correlate_symbols(struct elfs *e) +static int update_suffix_map(struct elf *elf) { - struct symbol *file1_sym, *file2_sym; - struct symbol *sym1, *sym2; + struct llvm_suffix_pair *entry; + struct symbol *sym; - file1_sym = first_file_symbol(e->orig); - file2_sym = first_file_symbol(e->patched); + for_each_sym(elf, sym) { + const char *s1, *s2; + bool found; - /* - * Correlate any locals before the first FILE symbol. This has been - * seen when LTO inexplicably strips the initramfs_data.o FILE symbol - * due to the file only containing data and no code. - */ - for_each_sym(e->orig, sym1) { - if (sym1 == file1_sym || !is_local_sym(sym1)) - break; + if (!sym->twin) + continue; + + s1 = llvm_suffix(sym->name); + s2 = llvm_suffix(sym->twin->name); - if (dont_correlate(sym1)) + if (!s1 || !s2) continue; - for_each_sym(e->patched, sym2) { - if (sym2 == file2_sym || !is_local_sym(sym2)) + found = false; + hash_for_each_possible(suffix_map, entry, hash, str_hash(s1)) { + if (!strcmp(entry->orig, s1)) { + found = true; break; + } + } + if (found) + continue; - if (sym2->twin || dont_correlate(sym2)) - continue; + entry = calloc(1, sizeof(*entry)); + if (!entry) { + ERROR_GLIBC("calloc"); + return -1; + } - if (strcmp(sym1->demangled_name, sym2->demangled_name)) - continue; + entry->orig = s1; + entry->patched = s2; + hash_add(suffix_map, &entry->hash, str_hash(s1)); + } - sym1->twin = sym2; - sym2->twin = sym1; + return 0; +} + +/* + * Match by translating the symbol's .llvm.<hash> suffix through the suffix + * map to find the corresponding hash suffix for the patched object. + * + * Example: In the original kernel, TU drivers/base/core.c contains + * foo.llvm.12345 and bar.llvm.12345 (same TU, same hash). After patching, + * they become foo.llvm.67890 and bar.llvm.67890. If foo was already + * correlated by find_twin() (e.g., unique by name), the suffix map records + * .llvm.12345 -> .llvm.67890. When processing bar.llvm.12345, this + * function looks up .llvm.12345, gets .llvm.67890, constructs the name + * bar.llvm.67890, and finds the match. + */ +static struct symbol *find_twin_suffixed(struct elf *elf, struct symbol *sym1) +{ + const char *suffix, *patched_suffix = NULL; + struct symbol *sym2, *match = NULL; + char name[SYM_NAME_LEN]; + struct llvm_suffix_pair *entry; + int count = 0; + + suffix = llvm_suffix(sym1->name); + if (!suffix) + return NULL; + + hash_for_each_possible(suffix_map, entry, hash, str_hash(suffix)) { + if (!strcmp(entry->orig, suffix)) { + patched_suffix = entry->patched; break; } } + if (!patched_suffix) + return NULL; + + if (snprintf_check(name, SYM_NAME_LEN, "%s%s", + sym1->demangled_name, patched_suffix)) + return NULL; + + for_each_sym_by_name(elf, name, sym2) { + if (sym2->twin || sym1->type != sym2->type || sym2->dont_correlate) + continue; + count++; + match = sym2; + } + + if (count != 1) + return NULL; + + dbg_correlate("find_suffixed_twin(): %s%s -> %s%s", + sym1->name, is_func_sym(sym1) ? "()" : "", + match->name, is_func_sym(match) ? "()" : ""); + + return match; +} + +/* + * Last-resort positional matching. + * + * Finds a symbol with the same position in the symbol table among + * same-demangled-name candidates, similar to livepatch sympos. Note that + * LLVM-promoted symbols are globals, which come after locals in the symbol + * table, so we have to be careful not to compare different scopes. + * + * Example: arch/x86/events/intel/core.c defines many __quirk variables via + * X86_MATCH_*() macros. In the symbol table they appear as __quirk.90, + * __quirk.97, __quirk.101, etc., all with demangled name __quirk, same + * scope, and same FILE symbol. No deterministic filter can distinguish + * them, so they're matched by position: the 1st __quirk in orig matches the + * 1st in patched, the 2nd matches the 2nd, etc. + * + * This is less deterministic than the other strategies, so it's done last. + */ +static struct symbol *find_twin_positional(struct elfs *e, struct symbol *sym1) +{ + unsigned int idx_orig = 0, idx_patched = 0; + unsigned int sym1_pos = 0; + struct symbol *sym2, *match = NULL; + + for_each_sym_by_demangled_name(e->orig, sym1->demangled_name, sym2) { + if (sym2->twin || sym1->type != sym2->type || sym2->dont_correlate || + !maybe_same_file(sym1, sym2)) + continue; + if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2) || + is_llvm_sym(sym1) != is_llvm_sym(sym2)) + continue; + if (sym1 == sym2) + sym1_pos = idx_orig; + idx_orig++; + } + + for_each_sym_by_demangled_name(e->patched, sym1->demangled_name, sym2) { + if (sym2->twin || sym1->type != sym2->type || sym2->dont_correlate || + !maybe_same_file(sym1, sym2)) + continue; + if (is_tu_local_sym(sym1) != is_tu_local_sym(sym2) || + is_llvm_sym(sym1) != is_llvm_sym(sym2)) + continue; + if (idx_patched == sym1_pos) + match = sym2; + idx_patched++; + } + + if (idx_orig != idx_patched) + return NULL; + + dbg_correlate("find_twin_positional(): %s%s -> %s%s", + sym1->name, is_func_sym(sym1) ? "()" : "", + match->name, is_func_sym(match) ? "()" : ""); + + return match; +} + +/* + * Correlate symbols between the orig and patched objects. This is a + * prerequisite for detecting changed functions, as well as for properly + * translating relocations so they point to the correct symbol. + */ +static int correlate_symbols(struct elfs *e) +{ + struct symbol *file1_sym, *file2_sym; + struct symbol *sym1, *sym2; + bool progress; + + for_each_sym(e->orig, sym1) + sym1->dont_correlate = dont_correlate(sym1); + for_each_sym(e->patched, sym2) + sym2->dont_correlate = dont_correlate(sym2); + + /* Correlate FILE symbols */ + file1_sym = first_file_symbol(e->orig); + file2_sym = first_file_symbol(e->patched); - /* Correlate locals after the first FILE symbol */ for (; ; file1_sym = next_file_symbol(e->orig, file1_sym), file2_sym = next_file_symbol(e->patched, file2_sym)) { @@ -460,96 +809,56 @@ static int correlate_symbols(struct elfs *e) file1_sym->twin = file2_sym; file2_sym->twin = file1_sym; - - sym1 = file1_sym; - - for_each_sym_continue(e->orig, sym1) { - if (is_file_sym(sym1) || !is_local_sym(sym1)) - break; - - if (dont_correlate(sym1)) - continue; - - sym2 = file2_sym; - for_each_sym_continue(e->patched, sym2) { - if (is_file_sym(sym2) || !is_local_sym(sym2)) - break; - - if (sym2->twin || dont_correlate(sym2)) - continue; - - if (strcmp(sym1->demangled_name, sym2->demangled_name)) - continue; - - sym1->twin = sym2; - sym2->twin = sym1; - break; - } - } } - /* Correlate globals */ - for_each_sym(e->orig, sym1) { - if (sym1->bind == STB_LOCAL) - continue; - - sym2 = find_global_symbol_by_name(e->patched, sym1->name); - if (sym2 && !sym2->twin) { - sym1->twin = sym2; - sym2->twin = sym1; - } - } /* - * Correlate globals with demangled_name. - * A separate loop is needed because we want to finish all the - * full name correlations first. + * Correlate in two phases: loop deterministic levels until no more + * progress, then use positional fallback for the rest. This prevents + * the nondeterministic positional matching from stealing symbols that + * have deterministic matches. */ - for_each_sym(e->orig, sym1) { - if (sym1->bind == STB_LOCAL || sym1->twin) - continue; - - if (find_global_symbol_by_demangled_name(e->patched, sym1, &sym2)) - return -1; - - if (sym2 && !sym2->twin) { + hash_init(suffix_map); + do { + progress = false; + for_each_sym(e->orig, sym1) { + if (sym1->twin || sym1->dont_correlate) + continue; + sym2 = find_twin(e, sym1); + if (!sym2) + continue; sym1->twin = sym2; sym2->twin = sym1; + progress = true; } - } - - /* Correlate original locals with patched globals */ - for_each_sym(e->orig, sym1) { - if (sym1->twin || dont_correlate(sym1) || !is_local_sym(sym1)) - continue; - sym2 = find_global_symbol_by_name(e->patched, sym1->name); - if (!sym2 && find_global_symbol_by_demangled_name(e->patched, sym1, &sym2)) + if (update_suffix_map(e->orig)) return -1; - if (sym2 && !sym2->twin) { + for_each_sym(e->orig, sym1) { + if (sym1->twin || sym1->dont_correlate) + continue; + sym2 = find_twin_suffixed(e->patched, sym1); + if (!sym2) + continue; sym1->twin = sym2; sym2->twin = sym1; + progress = true; } - } + } while (progress); - /* Correlate original globals with patched locals */ - for_each_sym(e->patched, sym2) { - if (sym2->twin || dont_correlate(sym2) || !is_local_sym(sym2)) + for_each_sym(e->orig, sym1) { + if (sym1->twin || sym1->dont_correlate) continue; - - sym1 = find_global_symbol_by_name(e->orig, sym2->name); - if (!sym1 && find_global_symbol_by_demangled_name(e->orig, sym2, &sym1)) - return -1; - - if (sym1 && !sym1->twin) { - sym2->twin = sym1; - sym1->twin = sym2; - } + sym2 = find_twin_positional(e, sym1); + if (!sym2) + continue; + sym1->twin = sym2; + sym2->twin = sym1; } for_each_sym(e->orig, sym1) { - if (sym1->twin || dont_correlate(sym1)) + if (sym1->twin || sym1->dont_correlate) continue; WARN("no correlation: %s", sym1->name); } @@ -655,7 +964,7 @@ static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym size_t size; /* bss doesn't have data */ - if (patched_sym->sec->data->d_buf) + if (patched_sym->sec->data && patched_sym->sec->data->d_buf) data = patched_sym->sec->data->d_buf + patched_sym->offset; if (is_sec_sym(patched_sym)) @@ -715,7 +1024,7 @@ static struct symbol *clone_symbol(struct elfs *e, struct symbol *patched_sym, if (patched_sym->clone) return patched_sym->clone; - dbg_indent("%s%s", patched_sym->name, data_too ? " [+DATA]" : ""); + dbg_clone("%s%s", patched_sym->name, data_too ? " [+DATA]" : ""); /* Make sure the prefix gets cloned first */ if (is_func_sym(patched_sym) && data_too) { @@ -757,19 +1066,24 @@ static void mark_included_function(struct symbol *func) */ static int mark_changed_functions(struct elfs *e) { - struct symbol *sym_orig, *patched_sym; + struct symbol *orig_sym, *patched_sym; bool changed = false; /* Find changed functions */ - for_each_sym(e->orig, sym_orig) { - if (!is_func_sym(sym_orig) || is_prefix_func(sym_orig)) + for_each_sym(e->orig, orig_sym) { + if (orig_sym->dont_correlate) continue; - patched_sym = sym_orig->twin; + patched_sym = orig_sym->twin; if (!patched_sym) continue; - if (sym_orig->csum.checksum != patched_sym->csum.checksum) { + if (orig_sym->csum.checksum != patched_sym->csum.checksum) { + if (!is_func_sym(orig_sym)) { + ERROR("changed data: %s", orig_sym->name); + return -1; + } + patched_sym->changed = 1; mark_included_function(patched_sym); changed = true; @@ -778,7 +1092,7 @@ static int mark_changed_functions(struct elfs *e) /* Find added functions and print them */ for_each_sym(e->patched, patched_sym) { - if (!is_func_sym(patched_sym) || is_prefix_func(patched_sym)) + if (!is_func_sym(patched_sym) || patched_sym->dont_correlate) continue; if (!patched_sym->twin) { @@ -794,7 +1108,7 @@ static int mark_changed_functions(struct elfs *e) printf("%s: changed function: %s\n", objname, patched_sym->name); } - return !changed ? -1 : 0; + return !changed ? 1 : 0; } static int clone_included_functions(struct elfs *e) @@ -811,39 +1125,6 @@ static int clone_included_functions(struct elfs *e) return 0; } -/* - * Determine whether a relocation should reference the section rather than the - * underlying symbol. - */ -static bool section_reference_needed(struct section *sec) -{ - /* - * String symbols are zero-length and uncorrelated. It's easier to - * deal with them as section symbols. - */ - if (is_string_sec(sec)) - return true; - - /* - * .rodata has mostly anonymous data so there's no way to determine the - * length of a needed reference. just copy the whole section if needed. - */ - if (strstarts(sec->name, ".rodata")) - return true; - - /* UBSAN anonymous data */ - if (strstarts(sec->name, ".data..Lubsan") || /* GCC */ - strstarts(sec->name, ".data..L__unnamed_")) /* Clang */ - return true; - - return false; -} - -static bool is_reloc_allowed(struct reloc *reloc) -{ - return section_reference_needed(reloc->sym->sec) == is_sec_sym(reloc->sym); -} - static struct export *find_export(struct symbol *sym) { struct export *export; @@ -917,7 +1198,7 @@ static bool klp_reloc_needed(struct reloc *patched_reloc) struct export *export; /* no external symbol to reference */ - if (dont_correlate(patched_sym)) + if (patched_sym->dont_correlate) return false; /* For included functions, a regular reloc will do. */ @@ -952,34 +1233,60 @@ static bool klp_reloc_needed(struct reloc *patched_reloc) return true; } +/* Return -1 error, 0 success, 1 skip */ static int convert_reloc_sym_to_secsym(struct elf *elf, struct reloc *reloc) { struct symbol *sym = reloc->sym; struct section *sec = sym->sec; + if (is_sec_sym(sym)) + return 0; + if (!sec->sym && !elf_create_section_symbol(elf, sec)) return -1; reloc->sym = sec->sym; - set_reloc_sym(elf, reloc, sym->idx); + set_reloc_sym(elf, reloc, sec->sym->idx); set_reloc_addend(elf, reloc, sym->offset + reloc_addend(reloc)); return 0; } +/* Return -1 error, 0 success, 1 skip */ static int convert_reloc_secsym_to_sym(struct elf *elf, struct reloc *reloc) { struct symbol *sym = reloc->sym; struct section *sec = sym->sec; + if (!is_sec_sym(sym)) + return 0; + /* If the symbol has a dedicated section, it's easy to find */ sym = find_symbol_by_offset(sec, 0); if (sym && sym->len == sec_size(sec)) goto found_sym; /* No dedicated section; find the symbol manually */ - sym = find_symbol_containing(sec, arch_adjusted_addend(reloc)); + sym = find_symbol_containing_inclusive(sec, arch_adjusted_addend(reloc)); if (!sym) { /* + * This is presumably an .altinstr_replacement section which is + * empty due to it only having zero-length replacement(s). + */ + if (!sec_size(sec)) + return 1; + + /* + * .rodata is a mixed bag of named objects and anonymous data. + * + * Convert section symbol references to named object symbols + * when possible, to preserve pointer identity for const + * structs like file_operations. Otherwise a section symbol is + * fine. + */ + if (is_rodata_sec(sec)) + return 0; + + /* * This can happen for special section references to weak code * whose symbol has been stripped by the linker. */ @@ -994,18 +1301,33 @@ found_sym: } /* + * Sections with anonymous or uncorrelated data (strings, UBSAN data, Clang + * anonymous constants) need section symbol references. + */ +static bool is_uncorrelated_section(struct section *sec) +{ + return is_string_sec(sec) || + strstarts(sec->name, ".data..Lubsan") || /* GCC */ + strstarts(sec->name, ".data..L__unnamed_") || /* Clang */ + strstarts(sec->name, ".data..Lanon."); /* Clang */ +} + +/* * Convert a relocation symbol reference to the needed format: either a section - * symbol or the underlying symbol itself. + * symbol or the underlying symbol itself. Return -1 error, 0 success, 1 skip. */ static int convert_reloc_sym(struct elf *elf, struct reloc *reloc) { - if (is_reloc_allowed(reloc)) - return 0; + struct section *sec = reloc->sym->sec; + + if (reloc_type(reloc) == R_NONE) + return 1; - if (section_reference_needed(reloc->sym->sec)) + if (is_uncorrelated_section(sec)) return convert_reloc_sym_to_secsym(elf, reloc); - else - return convert_reloc_secsym_to_sym(elf, reloc); + + /* Everything else: references should use named symbols. */ + return convert_reloc_secsym_to_sym(elf, reloc); } /* @@ -1079,7 +1401,7 @@ static int clone_reloc_klp(struct elfs *e, struct reloc *patched_reloc, klp_sym = find_symbol_by_name(e->out, sym_name); if (!klp_sym) { - __dbg_indent("%s", sym_name); + __dbg_clone("%s", sym_name); /* STB_WEAK: avoid modpost undefined symbol warnings */ klp_sym = elf_create_symbol(e->out, sym_name, NULL, @@ -1130,7 +1452,7 @@ static int clone_reloc_klp(struct elfs *e, struct reloc *patched_reloc, } #define dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp) \ - dbg_indent("%s+0x%lx: %s%s0x%lx [%s%s%s%s%s%s]", \ + dbg_clone("%s+0x%lx: %s%s0x%lx [%s%s%s%s%s%s]", \ sec->name, offset, patched_sym->name, \ addend >= 0 ? "+" : "-", labs(addend), \ sym_type(patched_sym), \ @@ -1150,13 +1472,6 @@ static int clone_reloc(struct elfs *e, struct reloc *patched_reloc, struct symbol *out_sym; bool klp; - if (!is_reloc_allowed(patched_reloc)) { - ERROR_FUNC(patched_reloc->sec->base, reloc_offset(patched_reloc), - "missing symbol for reference to %s+%ld", - patched_sym->name, addend); - return -1; - } - klp = klp_reloc_needed(patched_reloc); dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp); @@ -1186,13 +1501,13 @@ static int clone_reloc(struct elfs *e, struct reloc *patched_reloc, /* * For strings, all references use section symbols, thanks to - * section_reference_needed(). clone_symbol() has cloned an empty + * convert_reloc_sym(). clone_symbol() has cloned an empty * version of the string section. Now copy the string itself. */ if (is_string_sec(patched_sym->sec)) { const char *str = patched_sym->sec->data->d_buf + addend; - __dbg_indent("\"%s\"", escape_str(str)); + __dbg_clone("\"%s\"", escape_str(str)); addend = elf_add_string(e->out, out_sym->sec, str); if (addend == -1) @@ -1239,6 +1554,7 @@ static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym) for_each_reloc(patched_rsec, patched_reloc) { unsigned long offset; + int ret; if (reloc_offset(patched_reloc) < start || reloc_offset(patched_reloc) >= end) @@ -1252,12 +1568,15 @@ static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym) !strcmp(patched_reloc->sym->sec->name, ".altinstr_aux")) continue; - if (convert_reloc_sym(e->patched, patched_reloc)) { + ret = convert_reloc_sym(e->patched, patched_reloc); + if (ret < 0) { ERROR_FUNC(patched_rsec->base, reloc_offset(patched_reloc), "failed to convert reloc sym '%s' to its proper format", patched_reloc->sym->name); return -1; } + if (ret > 0) + continue; offset = out_sym->offset + (reloc_offset(patched_reloc) - patched_sym->offset); @@ -1334,7 +1653,7 @@ static int create_fake_symbols(struct elf *elf) sec = find_section_by_name(elf, ".discard.annotate_data"); if (!sec || !sec->rsec) - return 0; + goto entsize; for_each_reloc(sec->rsec, reloc) { unsigned long offset, size; @@ -1366,7 +1685,7 @@ static int create_fake_symbols(struct elf *elf) /* * 2) Make symbols for sh_entsize, and simple arrays of pointers: */ - +entsize: for_each_sec(elf, sec) { unsigned int entry_size; unsigned long offset; @@ -1400,6 +1719,7 @@ static int create_fake_symbols(struct elf *elf) /* Keep a special section entry if it references an included function */ static bool should_keep_special_sym(struct elf *elf, struct symbol *sym) { + bool annotate_insn = !strcmp(sym->sec->name, ".discard.annotate_insn"); struct reloc *reloc; if (is_sec_sym(sym) || !sym->sec->rsec) @@ -1409,7 +1729,16 @@ static bool should_keep_special_sym(struct elf *elf, struct symbol *sym) if (convert_reloc_sym(elf, reloc)) continue; - if (is_func_sym(reloc->sym) && reloc->sym->included) + if (!reloc->sym->clone || is_undef_sym(reloc->sym->clone)) + continue; + + /* + * Keep special section references to cloned functions. + * In some cases annotate_insn can also reference cloned alt + * replacement fake symbols; keep those references as well. + */ + if (is_func_sym(reloc->sym) || + (annotate_insn && is_notype_sym(reloc->sym))) return true; } @@ -1553,15 +1882,28 @@ static int clone_special_section(struct elfs *e, struct section *patched_sec) /* Extract only the needed bits from special sections */ static int clone_special_sections(struct elfs *e) { - struct section *patched_sec; + struct section *sec, *annotate_insn = NULL; - for_each_sec(e->patched, patched_sec) { - if (is_special_section(patched_sec)) { - if (clone_special_section(e, patched_sec)) + for_each_sec(e->patched, sec) { + if (is_special_section(sec)) { + if (!strcmp(sec->name, ".discard.annotate_insn")) { + annotate_insn = sec; + continue; + } + if (clone_special_section(e, sec)) return -1; } } + /* + * Do .discard.annotate_insn last, it can reference other special + * sections (alt replacements) so they need to be cloned first. + */ + if (annotate_insn) { + if (clone_special_section(e, annotate_insn)) + return -1; + } + return 0; } @@ -1638,7 +1980,8 @@ static int create_klp_sections(struct elfs *e) unsigned long sympos; void *func_data; - if (!is_func_sym(sym) || sym->cold || !sym->clone || !sym->clone->changed) + if (!is_func_sym(sym) || is_cold_func(sym) || + !sym->clone || !sym->clone->changed) continue; /* allocate klp_func_ext */ @@ -1798,11 +2141,17 @@ static int copy_import_ns(struct elfs *e) int cmd_klp_diff(int argc, const char **argv) { struct elfs e = {0}; + int ret; argc = parse_options(argc, argv, klp_diff_options, klp_diff_usage, 0); if (argc != 3) usage_with_options(klp_diff_usage, klp_diff_options); + if (debug) { + debug_correlate = true; + debug_clone = true; + } + objname = argv[0]; e.orig = elf_open_read(argv[0], O_RDONLY); @@ -1824,7 +2173,10 @@ int cmd_klp_diff(int argc, const char **argv) if (correlate_symbols(&e)) return -1; - if (mark_changed_functions(&e)) + ret = mark_changed_functions(&e); + if (ret < 0) + return -1; + if (ret > 0) return 0; e.out = elf_create_file(&e.orig->ehdr, argv[2]); diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 1c3622117c33c..a4e139dee7e9f 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -16,9 +16,6 @@ #include <objtool/objtool.h> #include <objtool/warn.h> -bool debug; -int indent; - static struct objtool_file file; struct objtool_file *objtool_open_read(const char *filename) diff --git a/tools/objtool/trace.c b/tools/objtool/trace.c index 5dec44dab781c..61c6aa302bc34 100644 --- a/tools/objtool/trace.c +++ b/tools/objtool/trace.c @@ -169,8 +169,8 @@ void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt, */ TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s for instruction at 0x%lx <%s+0x%lx>", alt_name, - orig_insn->offset, orig_insn->sym->name, - orig_insn->offset - orig_insn->sym->offset); + orig_insn->offset, insn_sym(orig_insn)->name, + orig_insn->offset - insn_sym(orig_insn)->offset); } else { TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s", alt_name); } @@ -185,8 +185,8 @@ void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt, if (orig_insn->type == INSN_NOP) { suffix[0] = (orig_insn->len == 5) ? 'q' : '\0'; TRACE_ADDR(orig_insn, "jmp%-3s %lx <%s+0x%lx>", suffix, - alt_insn->offset, alt_insn->sym->name, - alt_insn->offset - alt_insn->sym->offset); + alt_insn->offset, insn_sym(alt_insn)->name, + alt_insn->offset - insn_sym(alt_insn)->offset); } else { TRACE_ADDR(orig_insn, "nop%d", orig_insn->len); trace_depth--; diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile index fc840e06ff565..829f95c835157 100644 --- a/tools/testing/selftests/membarrier/Makefile +++ b/tools/testing/selftests/membarrier/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) +CFLAGS += -g $(KHDR_INCLUDES) -pthread -I../../../../tools/include LDLIBS += -lpthread TEST_GEN_PROGS := membarrier_test_single_thread \ - membarrier_test_multi_thread + membarrier_test_multi_thread \ + membarrier_rseq_stress include ../lib.mk diff --git a/tools/testing/selftests/membarrier/membarrier_rseq_stress.c b/tools/testing/selftests/membarrier/membarrier_rseq_stress.c new file mode 100644 index 0000000000000..c188d7498610c --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_rseq_stress.c @@ -0,0 +1,951 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Membarrier stress test for CFS throttle interactions. + * + * Reproducer for the interaction between CFS throttle and expedited membarrier. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <syscall.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <stdint.h> +#include <errno.h> +#include <sched.h> +#include <time.h> +#include <signal.h> +#include <stdatomic.h> +#include <dirent.h> +#include <sys/prctl.h> +#include <sys/mman.h> + +#include "../kselftest.h" + +/* -- Architecture-specific rseq signature -- */ +#if defined(__x86_64__) || defined(__i386__) +# define RSEQ_SIG 0x53053053U +#elif defined(__aarch64__) +# define RSEQ_SIG 0xd428bc00U +#elif defined(__powerpc__) || defined(__powerpc64__) +# define RSEQ_SIG 0x0f000000U +#elif defined(__s390__) || defined(__s390x__) +# define RSEQ_SIG 0x0c000000U +#else +# define RSEQ_SIG 0 +# define UNSUPPORTED_ARCH 1 +#endif + +/* -- rseq ABI (kernel uapi; define locally for portability) -- */ +#define RSEQ_CPU_ID_UNINITIALIZED ((__u32)-1) + +#include <linux/compiler.h> + +struct rseq_abi { + __u32 cpu_id_start; + __u32 cpu_id; + __u64 rseq_cs; + __u32 flags; + __u32 node_id; + __u32 mm_cid; + char end[0]; +} __aligned(32); + +/* -- membarrier constants (not in all distro headers) -- */ +#ifndef MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ +# define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ (1 << 7) +#endif +#ifndef MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ +# define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ (1 << 8) +#endif +#ifndef MEMBARRIER_CMD_FLAG_CPU +# define MEMBARRIER_CMD_FLAG_CPU (1 << 0) +#endif + +/* -- Test parameters -- */ +#define N_SIBLINGS 2000 +#define NEST_DEPTH 5 +static char g_cgroup_path[4096]; +static int use_cgroup_v2; + +#define CFS_QUOTA_US 1000 +#define CFS_PERIOD_US 5000 +#define N_HAMMER_PER_CPU 25 +#define N_BURNER_PER_CPU 50 +#define MAX_STRESS_CPUS 1024 +#define TEST_DURATION_SEC 20 + +/* Latency thresholds for the sentinel */ +#define LATENCY_WARN_MS 50 +#define LATENCY_CRITICAL_MS 200 + +/* Sentinel sampling interval */ +#define SENTINEL_INTERVAL_US 500 + +/* -- Shared globals -- */ +static atomic_int g_stop; +static atomic_int g_stop_sentinel; +static atomic_long g_max_latency_us; +static atomic_long g_interval_max_latency_us; +static atomic_long g_mb_ok; +static atomic_long g_mb_err; +static int g_ncpus_stress; +static int *g_stress_cpus; + +static atomic_int g_test_ready; + +/* Per-thread rseq ABI block registered with the kernel */ +static __thread struct rseq_abi tls_rseq + __attribute__((tls_model("initial-exec"))) __aligned(32) = { + .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, +}; + +/* -- Utility -- */ +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY | O_CLOEXEC); + + if (fd < 0) + return -errno; + + size_t len = strlen(val); + ssize_t r = write(fd, val, len); + + close(fd); + if (r < 0) + return -errno; + if ((size_t)r != len) + return -EIO; + return 0; +} + +static uint64_t monotonic_us(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000ULL + ts.tv_nsec / 1000ULL; +} + +static void update_max_latency(long lat) +{ + long old = atomic_load_explicit(&g_max_latency_us, memory_order_relaxed); + + while (lat > old) { + if (atomic_compare_exchange_weak_explicit(&g_max_latency_us, &old, lat, + memory_order_relaxed, memory_order_relaxed)) + break; + } + + old = atomic_load_explicit(&g_interval_max_latency_us, memory_order_relaxed); + while (lat > old) { + if (atomic_compare_exchange_weak_explicit(&g_interval_max_latency_us, &old, lat, + memory_order_relaxed, memory_order_relaxed)) + break; + } +} + +static void init_stress_cpus(void) +{ + cpu_set_t set; + int capacity = MAX_STRESS_CPUS; + + g_stress_cpus = malloc(capacity * sizeof(int)); + if (!g_stress_cpus) + ksft_exit_fail_msg("malloc failed for g_stress_cpus\n"); + + if (sched_getaffinity(0, sizeof(set), &set) < 0) + ksft_exit_fail_msg("sched_getaffinity failed\n"); + + for (int i = 0; i < CPU_SETSIZE && g_ncpus_stress < capacity; i++) { + if (CPU_ISSET(i, &set)) + g_stress_cpus[g_ncpus_stress++] = i; + } + + if (g_ncpus_stress == 0) + ksft_exit_skip("No CPUs available for stress test\n"); + + ksft_print_msg("Stressing %d CPUs discovered via affinity\n", g_ncpus_stress); +} + +/* -- rseq / membarrier helpers -- */ +static int rseq_register_thread(void) +{ + int r = syscall(SYS_rseq, &tls_rseq, sizeof(tls_rseq), 0, RSEQ_SIG); + + return (r == 0 || errno == EBUSY || errno == EINVAL) ? 0 : -1; +} + +static int rseq_register_thread_at(struct rseq_abi *rseq) +{ + int r = syscall(SYS_rseq, rseq, sizeof(*rseq), 0, RSEQ_SIG); + + return (r == 0 || errno == EBUSY || errno == EINVAL) ? 0 : -1; +} + +static int membarrier_register_rseq_mm(void) +{ + return syscall(SYS_membarrier, + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0); +} + +/* -- cgroup helpers -- */ +static void rm_cgroup_recursive(const char *path) +{ + DIR *dir = opendir(path); + + if (!dir) + return; + struct dirent *entry; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + if (entry->d_type == DT_DIR) { + char sub_path[4096]; + + snprintf(sub_path, sizeof(sub_path), "%s/%s", path, entry->d_name); + rm_cgroup_recursive(sub_path); + } + } + closedir(dir); + rmdir(path); +} + +static void cgroup_teardown(void); + +static int cgroup_setup(void) +{ + struct stat st; + + if (stat("/sys/fs/cgroup/cpu", &st) == 0) { + use_cgroup_v2 = 0; + snprintf(g_cgroup_path, sizeof(g_cgroup_path), + "/sys/fs/cgroup/cpu/membarrier_stress_test"); + } else if (stat("/dev/cgroup/cpu", &st) == 0) { + use_cgroup_v2 = 0; + snprintf(g_cgroup_path, sizeof(g_cgroup_path), + "/dev/cgroup/cpu/membarrier_stress_test"); + } else if (stat("/cgroup/cpu", &st) == 0) { + use_cgroup_v2 = 0; + snprintf(g_cgroup_path, sizeof(g_cgroup_path), + "/cgroup/cpu/membarrier_stress_test"); + } else if (stat("/sys/fs/cgroup/cgroup.controllers", &st) == 0) { + use_cgroup_v2 = 1; + snprintf(g_cgroup_path, sizeof(g_cgroup_path), + "/sys/fs/cgroup/membarrier_stress_test"); + } else { + ksft_print_msg("WARN: cgroup mount not found. Using v2 at /sys/fs/cgroup\n"); + use_cgroup_v2 = 1; + snprintf(g_cgroup_path, sizeof(g_cgroup_path), + "/sys/fs/cgroup/membarrier_stress_test"); + } + + /* Robust cleanup before setup */ + cgroup_teardown(); + + if (use_cgroup_v2) { + /* Enable cpu controller in root cgroup */ + if (write_file("/sys/fs/cgroup/cgroup.subtree_control", "+cpu") < 0) + ksft_print_msg("WARN: failed to enable cpu controller in /sys/fs/cgroup\n"); + } + + if (mkdir(g_cgroup_path, 0755) < 0 && errno != EEXIST) { + ksft_print_msg("mkdir base %s failed: %s\n", g_cgroup_path, strerror(errno)); + return -1; + } + + if (use_cgroup_v2) { + char ctrl_path[4096]; + + snprintf(ctrl_path, sizeof(ctrl_path), "%s/cgroup.subtree_control", g_cgroup_path); + if (write_file(ctrl_path, "+cpu") < 0) + ksft_print_msg("WARN: failed to enable cpu controller in %s\n", + g_cgroup_path); + } + + for (int i = 0; i < N_SIBLINGS; i++) { + char sibling_path[4096]; + + snprintf(sibling_path, sizeof(sibling_path), "%s/n%d", g_cgroup_path, i); + if (mkdir(sibling_path, 0755) < 0 && errno != EEXIST) { + ksft_print_msg("mkdir wide %s failed: %s\n", sibling_path, strerror(errno)); + return -1; + } + + if (use_cgroup_v2) { + char ctrl_path[4096]; + + snprintf(ctrl_path, sizeof(ctrl_path), + "%s/cgroup.subtree_control", sibling_path); + if (write_file(ctrl_path, "+cpu") < 0) + ksft_print_msg("WARN: failed to enable cpu controller in %s\n", + sibling_path); + } + + char current_path[4096]; + + snprintf(current_path, sizeof(current_path), "%s", sibling_path); + for (int j = 0; j < NEST_DEPTH; j++) { + snprintf(current_path + strlen(current_path), + sizeof(current_path) - strlen(current_path), "/d%d", j); + if (mkdir(current_path, 0755) < 0 && errno != EEXIST) { + ksft_print_msg("mkdir deep %s failed: %s\n", + current_path, strerror(errno)); + return -1; + } + + /* Enable for all but the leaf */ + if (use_cgroup_v2 && j < NEST_DEPTH - 1) { + char ctrl_path[4096]; + + snprintf(ctrl_path, sizeof(ctrl_path), "%s/cgroup.subtree_control", + current_path); + if (write_file(ctrl_path, "+cpu") < 0) + ksft_print_msg("WARN: cannot enable cpu controller in %s\n", + current_path); + } + } + } + + char quota[64], period[64], max_str[128]; + + snprintf(quota, sizeof(quota), "%d", CFS_QUOTA_US); + snprintf(period, sizeof(period), "%d", CFS_PERIOD_US); + snprintf(max_str, sizeof(max_str), "%d %d", CFS_QUOTA_US, CFS_PERIOD_US); + + if (use_cgroup_v2) { + char max_path[4096]; + + snprintf(max_path, sizeof(max_path), "%s/cpu.max", g_cgroup_path); + if (write_file(max_path, max_str) < 0) { + ksft_print_msg("ERROR: cannot write cpu.max at %s\n", max_path); + return -1; + } + ksft_print_msg("cgroup (v2) %s: cpu.max=%s\n", g_cgroup_path, max_str); + } else { + char quota_path[4096], period_path[4096]; + + snprintf(quota_path, sizeof(quota_path), "%s/cpu.cfs_quota_us", g_cgroup_path); + snprintf(period_path, sizeof(period_path), "%s/cpu.cfs_period_us", g_cgroup_path); + + if (write_file(period_path, period) < 0) { + ksft_print_msg("ERROR: cannot write cpu.cfs_period_us at %s\n", + period_path); + return -1; + } + if (write_file(quota_path, quota) < 0) { + ksft_print_msg("ERROR: cannot write cpu.cfs_quota_us at %s\n", quota_path); + return -1; + } + ksft_print_msg("cgroup (v1) %s: cpu.cfs_quota_us=%d cpu.cfs_period_us=%d\n", + g_cgroup_path, CFS_QUOTA_US, CFS_PERIOD_US); + } + + return 0; +} + +static int cgroup_add_pid_to_path(pid_t pid, const char *path) +{ + char buf[32], file_path[4096]; + + snprintf(buf, sizeof(buf), "%d", (int)pid); + if (use_cgroup_v2) { + snprintf(file_path, sizeof(file_path), "%s/cgroup.procs", path); + return write_file(file_path, buf); + } + /* In v1, try tasks first, fallback to cgroup.procs */ + snprintf(file_path, sizeof(file_path), "%s/tasks", path); + int r = write_file(file_path, buf); + + if (r < 0) { + snprintf(file_path, sizeof(file_path), "%s/cgroup.procs", path); + r = write_file(file_path, buf); + } + return r; +} + +static void cgroup_teardown(void) +{ + rm_cgroup_recursive(g_cgroup_path); +} + +static void cgroup_unthrottle(void) +{ + if (use_cgroup_v2) { + char max_path[4096]; + + snprintf(max_path, sizeof(max_path), "%s/cpu.max", g_cgroup_path); + write_file(max_path, "max"); + } else { + char quota_path[4096]; + + snprintf(quota_path, sizeof(quota_path), "%s/cpu.cfs_quota_us", g_cgroup_path); + write_file(quota_path, "-1"); + } +} + +/* -- CPU burner (inside throttled child process) -- */ +static void *burner_thread_fn(void *arg) +{ + struct rseq_abi my_rseq; + int cpu = (int)(uintptr_t)arg; + + memset(&my_rseq, 0, sizeof(my_rseq)); + my_rseq.cpu_id = RSEQ_CPU_ID_UNINITIALIZED; + + if (rseq_register_thread_at(&my_rseq) < 0) { + perror("rseq_register (burner)"); + return NULL; + } + + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu, &set); + if (sched_setaffinity(0, sizeof(set), &set) < 0) + perror("sched_setaffinity (burner)"); + + unsigned long sink = 0; + + while (!atomic_load_explicit(&g_stop, memory_order_relaxed)) { + sink++; + /* Prevent compiler from optimizing the loop away */ + asm volatile("" : "+g"(sink)); + } + + return NULL; +} + +static int burner_thread_fn_wrapper(void *arg) +{ + burner_thread_fn(arg); + return 0; +} + +static int leaf_child_fn(void *arg) +{ + int i = (int)(uintptr_t)arg; + int total_burners = g_ncpus_stress * N_BURNER_PER_CPU; + int n_threads_per_leaf = total_burners / N_SIBLINGS; + + if (i < (total_burners % N_SIBLINGS)) + n_threads_per_leaf++; + + prctl(PR_SET_PDEATHSIG, SIGTERM); + if (getppid() == 1) + _exit(1); + + char leaf_path[4096]; + + snprintf(leaf_path, sizeof(leaf_path), "%s/n%d", g_cgroup_path, i); + for (int j = 0; j < NEST_DEPTH; j++) + snprintf(leaf_path + strlen(leaf_path), + sizeof(leaf_path) - strlen(leaf_path), "/d%d", j); + + int r = cgroup_add_pid_to_path(getpid(), leaf_path); + + if (r < 0) { + char buf[512]; + int len = snprintf(buf, sizeof(buf), + "[leaf child %d] failed to join cgroup %s: err %d\n", + i, leaf_path, -r); + (void)!write(2, buf, len); + _exit(1); + } + + for (int j = 0; j < n_threads_per_leaf; j++) { + int cpu = g_stress_cpus[(i * n_threads_per_leaf + j) % g_ncpus_stress]; + + /* Allocate stack via mmap (bypasses heap) */ + size_t stack_size = 64 * 1024; + void *stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) { + const char *msg = "mmap stack failed\n"; + (void)!write(2, msg, strlen(msg)); + _exit(1); + } + + /* Use raw clone to create a thread sharing the VM and thread group */ + pid_t pid = clone(burner_thread_fn_wrapper, stack + stack_size, + CLONE_VM | CLONE_THREAD | CLONE_SIGHAND, + (void *)(uintptr_t)cpu); + if (pid < 0) { + const char *msg = "clone burner failed\n"; + (void)!write(2, msg, strlen(msg)); + _exit(1); + } + } + + // Wait for SIGTERM + sigset_t mask; + + sigemptyset(&mask); + sigaddset(&mask, SIGTERM); + int sig; + + sigwait(&mask, &sig); + + _exit(0); +} + +struct leaf_info { + pid_t pid; + void *stack; +}; + +static int run_throttle_child(void *arg) +{ + (void)arg; + prctl(PR_SET_PDEATHSIG, SIGTERM); + if (getppid() == 1) + _exit(1); + + int n_leafs = N_SIBLINGS; + + /* Block signals before spawning to avoid missing early failures */ + sigset_t mask; + + sigemptyset(&mask); + sigaddset(&mask, SIGTERM); + sigaddset(&mask, SIGCHLD); + sigprocmask(SIG_BLOCK, &mask, NULL); + + /* Use mmap for tracking structures to avoid glibc heap usage */ + struct leaf_info *leaves = mmap(NULL, n_leafs * sizeof(struct leaf_info), + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (leaves == MAP_FAILED) { + const char *msg = "mmap leaves array failed\n"; + (void)!write(2, msg, strlen(msg)); + _exit(1); + } + + for (int i = 0; i < n_leafs; i++) { + size_t stack_size = 64 * 1024; + void *stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) { + const char *msg = "mmap leaf stack failed\n"; + (void)!write(2, msg, strlen(msg)); + _exit(1); + } + + leaves[i].stack = stack; + + pid_t pid = clone(leaf_child_fn, stack + stack_size, + CLONE_VM | SIGCHLD, (void *)(uintptr_t)i); + + if (pid < 0) { + const char *msg = "clone (leaf child) failed\n"; + (void)!write(2, msg, strlen(msg)); + + /* Clean up successfully spawned children */ + for (int j = 0; j < i; j++) { + kill(leaves[j].pid, SIGTERM); + waitpid(leaves[j].pid, NULL, 0); + munmap(leaves[j].stack, stack_size); + } + munmap(leaves, n_leafs * sizeof(struct leaf_info)); + + if (errno == EAGAIN) + _exit(4); + else + _exit(1); + } + leaves[i].pid = pid; + } + + int failed = 0; + + while (1) { + int sig; + + sigwait(&mask, &sig); + + if (sig == SIGTERM) { + break; + } else if (sig == SIGCHLD) { + int status; + pid_t pid; + + // Reap all dead children + while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { + for (int i = 0; i < n_leafs; i++) { + if (leaves[i].pid == pid) { + leaves[i].pid = 0; + break; + } + } + if ((WIFEXITED(status) && WEXITSTATUS(status) != 0) || + WIFSIGNALED(status)) { + char buf[128]; + int len = snprintf(buf, sizeof(buf), + "[manager] child %d died unexpectedly (status %d)\n", + pid, WEXITSTATUS(status)); + (void)!write(2, buf, len); + failed = 1; + } + } + if (failed) + break; + } + } + + // Terminate all leaf kids + for (int i = 0; i < n_leafs; i++) { + if (leaves[i].pid > 0) + kill(leaves[i].pid, SIGTERM); + } + + for (int i = 0; i < n_leafs; i++) { + if (leaves[i].pid > 0) + waitpid(leaves[i].pid, NULL, 0); + munmap(leaves[i].stack, 64 * 1024); + } + + munmap(leaves, n_leafs * sizeof(struct leaf_info)); + + _exit(failed ? 1 : 0); +} + +/* -- Membarrier hammer thread -- */ +static void *hammer_thread_fn(void *arg) +{ + int target_cpu = *(int *)arg; + long local_ok = 0; + long local_err = 0; + int count = 0; + const int batch_size = 1024; + + if (rseq_register_thread() < 0) { + ksft_print_msg("[hammer] rseq_register failed: %s\n", strerror(errno)); + return NULL; + } + + membarrier_register_rseq_mm(); + + while (!atomic_load_explicit(&g_stop, memory_order_relaxed)) { + int r = syscall(SYS_membarrier, + MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, + target_cpu); + if (__builtin_expect(r == 0, 1)) + local_ok++; + else + local_err++; + + count++; + if (__builtin_expect(count >= batch_size, 0)) { + atomic_fetch_add_explicit(&g_mb_ok, local_ok, memory_order_relaxed); + atomic_fetch_add_explicit(&g_mb_err, local_err, memory_order_relaxed); + local_ok = 0; + local_err = 0; + count = 0; + } + } + + /* Flush any remaining counts on exit */ + if (local_ok > 0) + atomic_fetch_add_explicit(&g_mb_ok, local_ok, memory_order_relaxed); + if (local_err > 0) + atomic_fetch_add_explicit(&g_mb_err, local_err, memory_order_relaxed); + + return NULL; +} + +/* -- Latency sentinel -- */ +static void *sentinel_thread_fn(void *arg) +{ + (void)arg; + struct sched_param sp = { .sched_priority = 20 }; + + if (sched_setscheduler(0, SCHED_FIFO, &sp) < 0) + ksft_print_msg("WARN: no SCHED_FIFO for sentinel (less precise)\n"); + + while (!atomic_load_explicit(&g_test_ready, memory_order_relaxed) && + !atomic_load_explicit(&g_stop_sentinel, memory_order_relaxed)) { + struct timespec ts = {0, 1000 * 1000}; /* 1ms */ + + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); + } + + uint64_t prev = monotonic_us(); + + while (!atomic_load_explicit(&g_stop_sentinel, memory_order_relaxed)) { + struct timespec ts = { + .tv_sec = 0, + .tv_nsec = SENTINEL_INTERVAL_US * 1000L, + }; + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); + + uint64_t now = monotonic_us(); + long latency_us = (long)(now - prev) - SENTINEL_INTERVAL_US; + + prev = now; + + if (latency_us <= 0) + continue; + + update_max_latency(latency_us); + + if (latency_us > LATENCY_CRITICAL_MS * 1000L) { + ksft_print_msg("\n[SENTINEL] CRITICAL: %ld ms delay (lockup precursor!)\n", + latency_us / 1000); + } else if (latency_us > LATENCY_WARN_MS * 1000L) { + ksft_print_msg("\n[SENTINEL] WARN: %ld ms latency spike\n", + latency_us / 1000); + } + } + return NULL; +} + +/* -- Progress reporter -- */ +static void *reporter_thread_fn(void *arg) +{ + (void)arg; + int elapsed = 0; + + while (!atomic_load_explicit(&g_stop_sentinel, memory_order_relaxed)) { + for (int i = 0; i < 5; i++) { + sleep(1); + if (atomic_load_explicit(&g_stop_sentinel, memory_order_relaxed)) + break; + } + if (atomic_load_explicit(&g_stop_sentinel, memory_order_relaxed)) + break; + elapsed += 5; + long interval_max = atomic_exchange_explicit(&g_interval_max_latency_us, + 0, memory_order_relaxed); + + ksft_print_msg("[%3ds] mb: ok=%-10ld err=%-8ld | max_lat=%ld us\n", + elapsed, + atomic_load(&g_mb_ok), + atomic_load(&g_mb_err), + interval_max); + } + return NULL; +} + +/* -- Main -- */ +int main(void) +{ + ksft_print_header(); +#ifdef UNSUPPORTED_ARCH + ksft_exit_skip("Unsupported architecture\n"); +#endif + ksft_set_plan(1); + + if (geteuid() != 0) + ksft_exit_skip("Must run as root (cgroup + SCHED_FIFO)\n"); + + init_stress_cpus(); + + ksft_print_msg("=== membarrier rseq + CFS unthrottle stress ===\n"); + ksft_print_msg("Stressing CPUs: %d\n", g_ncpus_stress); + ksft_print_msg("Quota: %d/%d us (~%d unthrottles/sec/CPU)\n", + CFS_QUOTA_US, CFS_PERIOD_US, + 1000000 / CFS_PERIOD_US); + ksft_print_msg("Hammer threads: %d per CPU (%d total)\n", + N_HAMMER_PER_CPU, g_ncpus_stress * N_HAMMER_PER_CPU); + ksft_print_msg("Duration: %d seconds\n\n", TEST_DURATION_SEC); + + if (cgroup_setup() < 0) { + cgroup_teardown(); + ksft_exit_skip("cgroup_setup failed (missing permissions or v2 ctrls?)\n"); + } + + if (rseq_register_thread() < 0) { + ksft_print_msg("rseq_register (%s) failed: %s\n", __func__, strerror(errno)); + cgroup_teardown(); + ksft_exit_skip("rseq syscall failed or not available\n"); + } + if (membarrier_register_rseq_mm() < 0) { + ksft_print_msg("MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ: %s\n" + "Kernel >= 5.10 with CONFIG_RSEQ required.\n", + strerror(errno)); + cgroup_teardown(); + ksft_exit_skip("membarrier register failed\n"); + } + ksft_print_msg("rseq membarrier registered OK\n"); + + sigset_t sigmask; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGTERM); + sigprocmask(SIG_BLOCK, &sigmask, NULL); + + void *stack = malloc(1024 * 1024); + + if (!stack) { + perror("malloc stack"); + cgroup_teardown(); + ksft_exit_fail_msg("Malloc stack failed\n"); + } + pid_t child = clone(run_throttle_child, stack + 1024 * 1024, CLONE_VM | SIGCHLD, NULL); + + if (child < 0) { + perror("clone"); + cgroup_teardown(); + ksft_exit_fail_msg("Clone failed\n"); + } + + sigprocmask(SIG_UNBLOCK, &sigmask, NULL); + ksft_print_msg("Throttle child PID %d started\n", child); + + int n_threads = g_ncpus_stress * N_HAMMER_PER_CPU + 2; + pthread_t *threads = (pthread_t *)calloc(n_threads, sizeof(pthread_t)); + int *cpuargs = (int *)calloc(g_ncpus_stress * N_HAMMER_PER_CPU, sizeof(int)); + + if (!threads || !cpuargs) { + perror("calloc"); + kill(child, SIGTERM); + waitpid(child, NULL, 0); + cgroup_teardown(); + ksft_exit_fail_msg("Thread allocation failed\n"); + } + + int ti = 0, ai = 0; + int r; + + ksft_print_msg("Creating sentinel thread...\n"); + r = pthread_create(&threads[ti], NULL, sentinel_thread_fn, NULL); + if (r != 0) { + kill(child, SIGTERM); + waitpid(child, NULL, 0); + cgroup_teardown(); + free(threads); + free(cpuargs); + free(g_stress_cpus); + ksft_exit_fail_msg("pthread_create (sentinel) failed: %s\n", strerror(r)); + } + ti++; + + ksft_print_msg("Creating reporter thread...\n"); + r = pthread_create(&threads[ti], NULL, reporter_thread_fn, NULL); + if (r != 0) { + atomic_store(&g_stop_sentinel, 1); + pthread_join(threads[0], NULL); + kill(child, SIGTERM); + waitpid(child, NULL, 0); + cgroup_teardown(); + free(threads); + free(cpuargs); + free(g_stress_cpus); + ksft_exit_fail_msg("pthread_create (reporter) failed: %s\n", strerror(r)); + } + ti++; + + ksft_print_msg("Creating %d hammer threads...\n", g_ncpus_stress * N_HAMMER_PER_CPU); + for (int i = 0; i < g_ncpus_stress; i++) { + int cpu = g_stress_cpus[i]; + + for (int j = 0; j < N_HAMMER_PER_CPU; j++) { + cpuargs[ai] = cpu; + r = pthread_create(&threads[ti], NULL, hammer_thread_fn, &cpuargs[ai]); + if (r != 0) { + ksft_print_msg("pthread_create failed at thread %d: %s\n", + ti, strerror(r)); + + atomic_store(&g_stop_sentinel, 1); + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + + atomic_store(&g_stop, 1); + for (int k = 2; k < ti; k++) + pthread_join(threads[k], NULL); + + kill(child, SIGTERM); + waitpid(child, NULL, 0); + cgroup_teardown(); + + free(threads); + free(cpuargs); + free(g_stress_cpus); + + if (r == EAGAIN) + ksft_exit_skip("Resource limits prevent threads\n"); + else + ksft_exit_fail_msg("Failed to create hammer thread\n"); + } + ti++; + ai++; + } + } + + ksft_print_msg("All threads running. Tip: monitor dmesg for lockups\n\n"); + + atomic_store_explicit(&g_test_ready, 1, memory_order_relaxed); + int child_failed = 0; + int child_status = 0; + + for (int i = 0; i < TEST_DURATION_SEC; i++) { + sleep(1); + int r = waitpid(child, &child_status, WNOHANG); + + if (r == child) { + child_failed = 1; + break; + } + } + + atomic_store(&g_stop_sentinel, 1); + pthread_join(threads[0], NULL); + pthread_join(threads[1], NULL); + + atomic_store(&g_stop, 1); + + /* Unthrottle to allow children to exit quickly */ + cgroup_unthrottle(); + + if (!child_failed) { + kill(child, SIGTERM); + waitpid(child, NULL, 0); + } + for (int i = 2; i < ti; i++) + pthread_join(threads[i], NULL); + + long max_lat = atomic_load(&g_max_latency_us); + long total_ok = atomic_load(&g_mb_ok); + long total_err = atomic_load(&g_mb_err); + + ksft_print_msg("\n=== RESULTS ===\n"); + ksft_print_msg("membarrier syscalls : %ld ok %ld errors\n", total_ok, total_err); + ksft_print_msg("Max scheduler latency: %ld us (%ld ms)\n", max_lat, max_lat / 1000); + cgroup_teardown(); + free(threads); + free(cpuargs); + free(g_stress_cpus); + + if (child_failed) { + if (WIFEXITED(child_status) && WEXITSTATUS(child_status) == 4) + ksft_exit_skip("Manager child skipped (resource limits?)\n"); + ksft_test_result_fail("membarrier_rseq_stress: Manager child died early\n"); + ksft_exit_fail(); + } else if (total_ok == 0) { + ksft_test_result_fail("membarrier_rseq_stress: No successful membarrier calls\n"); + ksft_exit_fail(); + } else if (total_err > 0) { + ksft_test_result_fail("membarrier_rseq_stress: syscall errors\n"); + ksft_exit_fail(); + } else if (max_lat > LATENCY_CRITICAL_MS * 1000L) { + ksft_test_result_fail("membarrier_rseq_stress: LOCKUP PRECURSOR\n"); + ksft_exit_fail(); + } else if (max_lat > LATENCY_WARN_MS * 1000L) { + ksft_test_result_fail("membarrier_rseq_stress: significant latency spike\n"); + ksft_exit_fail(); + } else { + ksft_test_result_pass("membarrier_rseq_stress\n"); + ksft_exit_pass(); + } + + return 0; +} diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 38512623622a5..2f3bac9fc6e87 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -78,19 +78,25 @@ static void sig_handler(int nr) done = 1; } +static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) +{ + int64_t diff; + + diff = NSEC_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); + diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); + return diff; +} + /* * Check the expected timer expiration matches the GTOD elapsed delta since * we armed the timer. Keep a 0.5 sec error margin due to various jitter. */ -static int check_diff(struct timeval start, struct timeval end) +static int check_diff(struct timespec start, struct timespec end) { - long long diff; - - diff = end.tv_usec - start.tv_usec; - diff += (end.tv_sec - start.tv_sec) * USEC_PER_SEC; + long long diff = calcdiff_ns(end, start); - if (llabs(diff - DELAY * USEC_PER_SEC) > USEC_PER_SEC / 2) { - printf("Diff too high: %lld..", diff); + if (llabs(diff - DELAY * NSEC_PER_SEC) > NSEC_PER_SEC / 2) { + printf("Diff too high: %lld ns..", diff); return -1; } @@ -99,22 +105,25 @@ static int check_diff(struct timeval start, struct timeval end) static void check_itimer(int which, const char *name) { - struct timeval start, end; + struct timespec start, end; struct itimerval val = { .it_value.tv_sec = DELAY, }; + int clock_id = CLOCK_REALTIME; done = 0; if (which == ITIMER_VIRTUAL) signal(SIGVTALRM, sig_handler); - else if (which == ITIMER_PROF) + else if (which == ITIMER_PROF) { + clock_id = CLOCK_THREAD_CPUTIME_ID; signal(SIGPROF, sig_handler); + } else if (which == ITIMER_REAL) signal(SIGALRM, sig_handler); - if (gettimeofday(&start, NULL) < 0) - fatal_error(name, "gettimeofday()"); + if (clock_gettime(clock_id, &start)) + fatal_error(name, "clock_gettime()"); if (setitimer(which, &val, NULL) < 0) fatal_error(name, "setitimer()"); @@ -126,18 +135,19 @@ static void check_itimer(int which, const char *name) else if (which == ITIMER_REAL) idle_loop(); - if (gettimeofday(&end, NULL) < 0) - fatal_error(name, "gettimeofday()"); + if (clock_gettime(clock_id, &end)) + fatal_error(name, "clock_gettime()"); ksft_test_result(check_diff(start, end) == 0, "%s\n", name); } static void check_timer_create(int which, const char *name) { - struct timeval start, end; + struct timespec start, end; struct itimerspec val = { .it_value.tv_sec = DELAY, }; + int clock_id = CLOCK_REALTIME; timer_t id; done = 0; @@ -148,16 +158,16 @@ static void check_timer_create(int which, const char *name) if (signal(SIGALRM, sig_handler) == SIG_ERR) fatal_error(name, "signal()"); - if (gettimeofday(&start, NULL) < 0) - fatal_error(name, "gettimeofday()"); + if (clock_gettime(clock_id, &start)) + fatal_error(name, "clock_gettime()"); if (timer_settime(id, 0, &val, NULL) < 0) fatal_error(name, "timer_settime()"); user_loop(); - if (gettimeofday(&end, NULL) < 0) - fatal_error(name, "gettimeofday()"); + if (clock_gettime(clock_id, &end)) + fatal_error(name, "clock_gettime()"); ksft_test_result(check_diff(start, end) == 0, "timer_create() per %s\n", name); @@ -445,15 +455,6 @@ static void check_delete(void) ksft_test_result(!tsig.signals, "check_delete\n"); } -static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) -{ - int64_t diff; - - diff = NSEC_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); - diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); - return diff; -} - static void check_sigev_none(int which, const char *name) { struct timespec start, now; |
