Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

objtool: Remove instruction::list

Replace the instruction::list by allocating instructions in arrays of
256 entries and stringing them together by (amortized) find_insn().
This shrinks instruction by 16 bytes and brings it down to 128.

struct instruction {
- struct list_head list; /* 0 16 */
- struct hlist_node hash; /* 16 16 */
- struct list_head call_node; /* 32 16 */
- struct section * sec; /* 48 8 */
- long unsigned int offset; /* 56 8 */
- /* --- cacheline 1 boundary (64 bytes) --- */
- long unsigned int immediate; /* 64 8 */
- unsigned int len; /* 72 4 */
- u8 type; /* 76 1 */
-
- /* Bitfield combined with previous fields */
+ struct hlist_node hash; /* 0 16 */
+ struct list_head call_node; /* 16 16 */
+ struct section * sec; /* 32 8 */
+ long unsigned int offset; /* 40 8 */
+ long unsigned int immediate; /* 48 8 */
+ u8 len; /* 56 1 */
+ u8 prev_len; /* 57 1 */
+ u8 type; /* 58 1 */
+ s8 instr; /* 59 1 */
+ u32 idx:8; /* 60: 0 4 */
+ u32 dead_end:1; /* 60: 8 4 */
+ u32 ignore:1; /* 60: 9 4 */
+ u32 ignore_alts:1; /* 60:10 4 */
+ u32 hint:1; /* 60:11 4 */
+ u32 save:1; /* 60:12 4 */
+ u32 restore:1; /* 60:13 4 */
+ u32 retpoline_safe:1; /* 60:14 4 */
+ u32 noendbr:1; /* 60:15 4 */
+ u32 entry:1; /* 60:16 4 */
+ u32 visited:4; /* 60:17 4 */
+ u32 no_reloc:1; /* 60:21 4 */

- u16 dead_end:1; /* 76: 8 2 */
- u16 ignore:1; /* 76: 9 2 */
- u16 ignore_alts:1; /* 76:10 2 */
- u16 hint:1; /* 76:11 2 */
- u16 save:1; /* 76:12 2 */
- u16 restore:1; /* 76:13 2 */
- u16 retpoline_safe:1; /* 76:14 2 */
- u16 noendbr:1; /* 76:15 2 */
- u16 entry:1; /* 78: 0 2 */
- u16 visited:4; /* 78: 1 2 */
- u16 no_reloc:1; /* 78: 5 2 */
+ /* XXX 10 bits hole, try to pack */

- /* XXX 2 bits hole, try to pack */
- /* Bitfield combined with next fields */
-
- s8 instr; /* 79 1 */
- struct alt_group * alt_group; /* 80 8 */
- struct instruction * jump_dest; /* 88 8 */
- struct instruction * first_jump_src; /* 96 8 */
+ /* --- cacheline 1 boundary (64 bytes) --- */
+ struct alt_group * alt_group; /* 64 8 */
+ struct instruction * jump_dest; /* 72 8 */
+ struct instruction * first_jump_src; /* 80 8 */
union {
- struct symbol * _call_dest; /* 104 8 */
- struct reloc * _jump_table; /* 104 8 */
- }; /* 104 8 */
- struct alternative * alts; /* 112 8 */
- struct symbol * sym; /* 120 8 */
- /* --- cacheline 2 boundary (128 bytes) --- */
- struct stack_op * stack_ops; /* 128 8 */
- struct cfi_state * cfi; /* 136 8 */
+ struct symbol * _call_dest; /* 88 8 */
+ struct reloc * _jump_table; /* 88 8 */
+ }; /* 88 8 */
+ struct alternative * alts; /* 96 8 */
+ struct symbol * sym; /* 104 8 */
+ struct stack_op * stack_ops; /* 112 8 */
+ struct cfi_state * cfi; /* 120 8 */

- /* size: 144, cachelines: 3, members: 28 */
- /* sum members: 142 */
- /* sum bitfield members: 14 bits, bit holes: 1, sum bit holes: 2 bits */
- /* last cacheline: 16 bytes */
+ /* size: 128, cachelines: 2, members: 29 */
+ /* sum members: 124 */
+ /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 10 bits */
};

pre: 5:38.18 real, 213.25 user, 124.90 sys, 23449040 mem
post: 5:03.34 real, 210.75 user, 88.80 sys, 20241232 mem

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org> # build only
Tested-by: Thomas Weißschuh <linux@weissschuh.net> # compile and run
Link: https://lore.kernel.org/r/20230208172245.851307606@infradead.org

authored by

Peter Zijlstra and committed by
Ingo Molnar
1c34496e 6ea17e84

+134 -87
+107 -61
tools/objtool/check.c
··· 47 47 return NULL; 48 48 } 49 49 50 - static struct instruction *next_insn_same_sec(struct objtool_file *file, 51 - struct instruction *insn) 50 + struct instruction *next_insn_same_sec(struct objtool_file *file, 51 + struct instruction *insn) 52 52 { 53 - struct instruction *next = list_next_entry(insn, list); 53 + if (insn->idx == INSN_CHUNK_MAX) 54 + return find_insn(file, insn->sec, insn->offset + insn->len); 54 55 55 - if (!next || &next->list == &file->insn_list || next->sec != insn->sec) 56 + insn++; 57 + if (!insn->len) 56 58 return NULL; 57 59 58 - return next; 60 + return insn; 59 61 } 60 62 61 63 static struct instruction *next_insn_same_func(struct objtool_file *file, 62 64 struct instruction *insn) 63 65 { 64 - struct instruction *next = list_next_entry(insn, list); 66 + struct instruction *next = next_insn_same_sec(file, insn); 65 67 struct symbol *func = insn_func(insn); 66 68 67 69 if (!func) 68 70 return NULL; 69 71 70 - if (&next->list != &file->insn_list && insn_func(next) == func) 72 + if (next && insn_func(next) == func) 71 73 return next; 72 74 73 75 /* Check if we're already in the subfunction: */ ··· 80 78 return find_insn(file, func->cfunc->sec, func->cfunc->offset); 81 79 } 82 80 83 - static struct instruction *prev_insn_same_sym(struct objtool_file *file, 84 - struct instruction *insn) 81 + static struct instruction *prev_insn_same_sec(struct objtool_file *file, 82 + struct instruction *insn) 85 83 { 86 - struct instruction *prev = list_prev_entry(insn, list); 84 + if (insn->idx == 0) { 85 + if (insn->prev_len) 86 + return find_insn(file, insn->sec, insn->offset - insn->prev_len); 87 + return NULL; 88 + } 87 89 88 - if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn)) 90 + return insn - 1; 91 + } 92 + 93 + static struct instruction *prev_insn_same_sym(struct objtool_file *file, 94 + struct instruction *insn) 95 + { 96 + struct instruction *prev = prev_insn_same_sec(file, insn); 97 + 98 + if (prev && insn_func(prev) == insn_func(insn)) 89 99 return prev; 90 100 91 101 return NULL; 92 102 } 103 + 104 + #define for_each_insn(file, insn) \ 105 + for (struct section *__sec, *__fake = (struct section *)1; \ 106 + __fake; __fake = NULL) \ 107 + for_each_sec(file, __sec) \ 108 + sec_for_each_insn(file, __sec, insn) 93 109 94 110 #define func_for_each_insn(file, func, insn) \ 95 111 for (insn = find_insn(file, func->sec, func->offset); \ ··· 116 96 117 97 #define sym_for_each_insn(file, sym, insn) \ 118 98 for (insn = find_insn(file, sym->sec, sym->offset); \ 119 - insn && &insn->list != &file->insn_list && \ 120 - insn->sec == sym->sec && \ 121 - insn->offset < sym->offset + sym->len; \ 122 - insn = list_next_entry(insn, list)) 99 + insn && insn->offset < sym->offset + sym->len; \ 100 + insn = next_insn_same_sec(file, insn)) 123 101 124 102 #define sym_for_each_insn_continue_reverse(file, sym, insn) \ 125 - for (insn = list_prev_entry(insn, list); \ 126 - &insn->list != &file->insn_list && \ 127 - insn->sec == sym->sec && insn->offset >= sym->offset; \ 128 - insn = list_prev_entry(insn, list)) 103 + for (insn = prev_insn_same_sec(file, insn); \ 104 + insn && insn->offset >= sym->offset; \ 105 + insn = prev_insn_same_sec(file, insn)) 129 106 130 107 #define sec_for_each_insn_from(file, insn) \ 131 108 for (; insn; insn = next_insn_same_sec(file, insn)) ··· 401 384 int ret; 402 385 403 386 for_each_sec(file, sec) { 387 + struct instruction *insns = NULL; 388 + u8 prev_len = 0; 389 + u8 idx = 0; 404 390 405 391 if (!(sec->sh.sh_flags & SHF_EXECINSTR)) 406 392 continue; ··· 429 409 sec->init = true; 430 410 431 411 for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { 432 - insn = malloc(sizeof(*insn)); 433 - if (!insn) { 434 - WARN("malloc failed"); 435 - return -1; 412 + if (!insns || idx == INSN_CHUNK_MAX) { 413 + insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE); 414 + if (!insns) { 415 + WARN("malloc failed"); 416 + return -1; 417 + } 418 + idx = 0; 419 + } else { 420 + idx++; 436 421 } 437 - memset(insn, 0, sizeof(*insn)); 438 - INIT_LIST_HEAD(&insn->call_node); 422 + insn = &insns[idx]; 423 + insn->idx = idx; 439 424 425 + INIT_LIST_HEAD(&insn->call_node); 440 426 insn->sec = sec; 441 427 insn->offset = offset; 428 + insn->prev_len = prev_len; 442 429 443 430 ret = arch_decode_instruction(file, sec, offset, 444 431 sec->sh.sh_size - offset, 445 432 insn); 446 433 if (ret) 447 - goto err; 434 + return ret; 435 + 436 + prev_len = insn->len; 448 437 449 438 /* 450 439 * By default, "ud2" is a dead end unless otherwise ··· 464 435 insn->dead_end = true; 465 436 466 437 hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset)); 467 - list_add_tail(&insn->list, &file->insn_list); 468 438 nr_insns++; 469 439 } 440 + 441 + // printf("%s: last chunk used: %d\n", sec->name, (int)idx); 470 442 471 443 list_for_each_entry(func, &sec->symbol_list, list) { 472 444 if (func->type != STT_NOTYPE && func->type != STT_FUNC) ··· 511 481 printf("nr_insns: %lu\n", nr_insns); 512 482 513 483 return 0; 514 - 515 - err: 516 - free(insn); 517 - return ret; 518 484 } 519 485 520 486 /* ··· 625 599 } 626 600 insn = find_insn(file, reloc->sym->sec, reloc->addend); 627 601 if (insn) 628 - insn = list_prev_entry(insn, list); 602 + insn = prev_insn_same_sec(file, insn); 629 603 else if (reloc->addend == reloc->sym->sec->sh.sh_size) { 630 604 insn = find_last_insn(file, reloc->sym->sec); 631 605 if (!insn) { ··· 660 634 } 661 635 insn = find_insn(file, reloc->sym->sec, reloc->addend); 662 636 if (insn) 663 - insn = list_prev_entry(insn, list); 637 + insn = prev_insn_same_sec(file, insn); 664 638 else if (reloc->addend == reloc->sym->sec->sh.sh_size) { 665 639 insn = find_last_insn(file, reloc->sym->sec); 666 640 if (!insn) { ··· 1801 1775 orig_alt_group->orig_group = NULL; 1802 1776 orig_alt_group->first_insn = orig_insn; 1803 1777 orig_alt_group->last_insn = last_orig_insn; 1778 + orig_alt_group->nop = NULL; 1804 1779 } else { 1805 1780 if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len - 1806 1781 orig_alt_group->first_insn->offset != special_alt->orig_len) { ··· 1903 1876 return -1; 1904 1877 } 1905 1878 1906 - if (nop) 1907 - list_add(&nop->list, &last_new_insn->list); 1908 1879 end: 1909 1880 new_alt_group->orig_group = orig_alt_group; 1910 1881 new_alt_group->first_insn = *new_insn; 1911 - new_alt_group->last_insn = nop ? : last_new_insn; 1882 + new_alt_group->last_insn = last_new_insn; 1883 + new_alt_group->nop = nop; 1912 1884 new_alt_group->cfi = orig_alt_group->cfi; 1913 1885 return 0; 1914 1886 } ··· 1957 1931 else 1958 1932 file->jl_long++; 1959 1933 1960 - *new_insn = list_next_entry(orig_insn, list); 1934 + *new_insn = next_insn_same_sec(file, orig_insn); 1961 1935 return 0; 1962 1936 } 1963 1937 ··· 3548 3522 * Simulate the fact that alternatives are patched in-place. When the 3549 3523 * end of a replacement alt_group is reached, redirect objtool flow to 3550 3524 * the end of the original alt_group. 3525 + * 3526 + * insn->alts->insn -> alt_group->first_insn 3527 + * ... 3528 + * alt_group->last_insn 3529 + * [alt_group->nop] -> next(orig_group->last_insn) 3551 3530 */ 3552 - if (alt_group && insn == alt_group->last_insn && alt_group->orig_group) 3553 - return next_insn_same_sec(file, alt_group->orig_group->last_insn); 3531 + if (alt_group) { 3532 + if (alt_group->nop) { 3533 + /* ->nop implies ->orig_group */ 3534 + if (insn == alt_group->last_insn) 3535 + return alt_group->nop; 3536 + if (insn == alt_group->nop) 3537 + goto next_orig; 3538 + } 3539 + if (insn == alt_group->last_insn && alt_group->orig_group) 3540 + goto next_orig; 3541 + } 3554 3542 3555 3543 return next_insn_same_sec(file, insn); 3544 + 3545 + next_orig: 3546 + return next_insn_same_sec(file, alt_group->orig_group->last_insn); 3556 3547 } 3557 3548 3558 3549 /* ··· 3820 3777 return 0; 3821 3778 } 3822 3779 3780 + static int validate_unwind_hint(struct objtool_file *file, 3781 + struct instruction *insn, 3782 + struct insn_state *state) 3783 + { 3784 + if (insn->hint && !insn->visited && !insn->ignore) { 3785 + int ret = validate_branch(file, insn_func(insn), insn, *state); 3786 + if (ret && opts.backtrace) 3787 + BT_FUNC("<=== (hint)", insn); 3788 + return ret; 3789 + } 3790 + 3791 + return 0; 3792 + } 3793 + 3823 3794 static int validate_unwind_hints(struct objtool_file *file, struct section *sec) 3824 3795 { 3825 3796 struct instruction *insn; 3826 3797 struct insn_state state; 3827 - int ret, warnings = 0; 3798 + int warnings = 0; 3828 3799 3829 3800 if (!file->hints) 3830 3801 return 0; ··· 3846 3789 init_insn_state(file, &state, sec); 3847 3790 3848 3791 if (sec) { 3849 - insn = find_insn(file, sec, 0); 3850 - if (!insn) 3851 - return 0; 3792 + sec_for_each_insn(file, sec, insn) 3793 + warnings += validate_unwind_hint(file, insn, &state); 3852 3794 } else { 3853 - insn = list_first_entry(&file->insn_list, typeof(*insn), list); 3854 - } 3855 - 3856 - while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { 3857 - if (insn->hint && !insn->visited && !insn->ignore) { 3858 - ret = validate_branch(file, insn_func(insn), insn, state); 3859 - if (ret && opts.backtrace) 3860 - BT_FUNC("<=== (hint)", insn); 3861 - warnings += ret; 3862 - } 3863 - 3864 - insn = list_next_entry(insn, list); 3795 + for_each_insn(file, insn) 3796 + warnings += validate_unwind_hint(file, insn, &state); 3865 3797 } 3866 3798 3867 3799 return warnings; ··· 4116 4070 * 4117 4071 * It may also insert a UD2 after calling a __noreturn function. 4118 4072 */ 4119 - prev_insn = list_prev_entry(insn, list); 4073 + prev_insn = prev_insn_same_sec(file, insn); 4120 4074 if ((prev_insn->dead_end || 4121 4075 dead_end_function(file, insn_call_dest(prev_insn))) && 4122 4076 (insn->type == INSN_BUG || ··· 4148 4102 if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len) 4149 4103 break; 4150 4104 4151 - insn = list_next_entry(insn, list); 4105 + insn = next_insn_same_sec(file, insn); 4152 4106 } 4153 4107 4154 4108 return false; ··· 4161 4115 return 0; 4162 4116 4163 4117 for (;;) { 4164 - struct instruction *prev = list_prev_entry(insn, list); 4118 + struct instruction *prev = prev_insn_same_sec(file, insn); 4165 4119 u64 offset; 4166 4120 4167 - if (&prev->list == &file->insn_list) 4121 + if (!prev) 4168 4122 break; 4169 4123 4170 4124 if (prev->type != INSN_NOP) ··· 4563 4517 4564 4518 warnings += ret; 4565 4519 4566 - if (list_empty(&file->insn_list)) 4520 + if (!nr_insns) 4567 4521 goto out; 4568 4522 4569 4523 if (opts.retpoline) { ··· 4672 4626 warnings += ret; 4673 4627 } 4674 4628 4675 - if (opts.orc && !list_empty(&file->insn_list)) { 4629 + if (opts.orc && nr_insns) { 4676 4630 ret = orc_create(file); 4677 4631 if (ret < 0) 4678 4632 goto out;
+27 -24
tools/objtool/include/objtool/check.h
··· 27 27 struct alt_group *orig_group; 28 28 29 29 /* First and last instructions in the group */ 30 - struct instruction *first_insn, *last_insn; 30 + struct instruction *first_insn, *last_insn, *nop; 31 31 32 32 /* 33 33 * Byte-offset-addressed len-sized array of pointers to CFI structs. ··· 36 36 struct cfi_state **cfi; 37 37 }; 38 38 39 + #define INSN_CHUNK_BITS 8 40 + #define INSN_CHUNK_SIZE (1 << INSN_CHUNK_BITS) 41 + #define INSN_CHUNK_MAX (INSN_CHUNK_SIZE - 1) 42 + 39 43 struct instruction { 40 - struct list_head list; 41 44 struct hlist_node hash; 42 45 struct list_head call_node; 43 46 struct section *sec; 44 47 unsigned long offset; 45 48 unsigned long immediate; 46 - unsigned int len; 49 + 50 + u8 len; 51 + u8 prev_len; 47 52 u8 type; 48 - 49 - u16 dead_end : 1, 50 - ignore : 1, 51 - ignore_alts : 1, 52 - hint : 1, 53 - save : 1, 54 - restore : 1, 55 - retpoline_safe : 1, 56 - noendbr : 1, 57 - entry : 1, 58 - visited : 4, 59 - no_reloc : 1; 60 - /* 2 bit hole */ 61 - 62 53 s8 instr; 54 + 55 + u32 idx : INSN_CHUNK_BITS, 56 + dead_end : 1, 57 + ignore : 1, 58 + ignore_alts : 1, 59 + hint : 1, 60 + save : 1, 61 + restore : 1, 62 + retpoline_safe : 1, 63 + noendbr : 1, 64 + entry : 1, 65 + visited : 4, 66 + no_reloc : 1; 67 + /* 10 bit hole */ 63 68 64 69 struct alt_group *alt_group; 65 70 struct instruction *jump_dest; ··· 114 109 struct instruction *find_insn(struct objtool_file *file, 115 110 struct section *sec, unsigned long offset); 116 111 117 - #define for_each_insn(file, insn) \ 118 - list_for_each_entry(insn, &file->insn_list, list) 112 + struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruction *insn); 119 113 120 - #define sec_for_each_insn(file, sec, insn) \ 121 - for (insn = find_insn(file, sec, 0); \ 122 - insn && &insn->list != &file->insn_list && \ 123 - insn->sec == sec; \ 124 - insn = list_next_entry(insn, list)) 114 + #define sec_for_each_insn(file, _sec, insn) \ 115 + for (insn = find_insn(file, _sec, 0); \ 116 + insn && insn->sec == _sec; \ 117 + insn = next_insn_same_sec(file, insn)) 125 118 126 119 #endif /* _CHECK_H */
-1
tools/objtool/include/objtool/objtool.h
··· 21 21 22 22 struct objtool_file { 23 23 struct elf *elf; 24 - struct list_head insn_list; 25 24 DECLARE_HASHTABLE(insn_hash, 20); 26 25 struct list_head retpoline_call_list; 27 26 struct list_head return_thunk_list;
-1
tools/objtool/objtool.c
··· 99 99 return NULL; 100 100 } 101 101 102 - INIT_LIST_HEAD(&file.insn_list); 103 102 hash_init(file.insn_hash); 104 103 INIT_LIST_HEAD(&file.retpoline_call_list); 105 104 INIT_LIST_HEAD(&file.return_thunk_list);