Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf/hw_breakpoint: Optimize constant number of breakpoint slots

Optimize internal hw_breakpoint state if the architecture's number of
breakpoint slots is constant. This avoids several kmalloc() calls and
potentially unnecessary failures if the allocations fail, as well as
subtly improves code generation and cache locality.

The protocol is that if an architecture defines hw_breakpoint_slots via
the preprocessor, it must be constant and the same for all types.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20220829124719.675715-7-elver@google.com

authored by

Marco Elver and committed by
Peter Zijlstra
be3f1525 db5f6f85

+63 -41
+1 -4
arch/sh/include/asm/hw_breakpoint.h
··· 48 48 /* Maximum number of UBC channels */ 49 49 #define HBP_NUM 2 50 50 51 - static inline int hw_breakpoint_slots(int type) 52 - { 53 - return HBP_NUM; 54 - } 51 + #define hw_breakpoint_slots(type) (HBP_NUM) 55 52 56 53 /* arch/sh/kernel/hw_breakpoint.c */ 57 54 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+1 -4
arch/x86/include/asm/hw_breakpoint.h
··· 44 44 /* Total number of available HW breakpoint registers */ 45 45 #define HBP_NUM 4 46 46 47 - static inline int hw_breakpoint_slots(int type) 48 - { 49 - return HBP_NUM; 50 - } 47 + #define hw_breakpoint_slots(type) (HBP_NUM) 51 48 52 49 struct perf_event_attr; 53 50 struct perf_event;
+61 -33
kernel/events/hw_breakpoint.c
··· 40 40 /* Number of pinned cpu breakpoints in a cpu */ 41 41 unsigned int cpu_pinned; 42 42 /* tsk_pinned[n] is the number of tasks having n+1 breakpoints */ 43 + #ifdef hw_breakpoint_slots 44 + unsigned int tsk_pinned[hw_breakpoint_slots(0)]; 45 + #else 43 46 unsigned int *tsk_pinned; 47 + #endif 44 48 /* Number of non-pinned cpu/task breakpoints in a cpu */ 45 49 unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */ 46 50 }; 47 51 48 52 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); 49 - static int nr_slots[TYPE_MAX] __ro_after_init; 50 53 51 54 static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) 52 55 { ··· 76 73 /* Serialize accesses to the above constraints */ 77 74 static DEFINE_MUTEX(nr_bp_mutex); 78 75 76 + #ifdef hw_breakpoint_slots 77 + /* 78 + * Number of breakpoint slots is constant, and the same for all types. 79 + */ 80 + static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); 81 + static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); } 82 + static inline int init_breakpoint_slots(void) { return 0; } 83 + #else 84 + /* 85 + * Dynamic number of breakpoint slots. 86 + */ 87 + static int __nr_bp_slots[TYPE_MAX] __ro_after_init; 88 + 89 + static inline int hw_breakpoint_slots_cached(int type) 90 + { 91 + return __nr_bp_slots[type]; 92 + } 93 + 94 + static __init int init_breakpoint_slots(void) 95 + { 96 + int i, cpu, err_cpu; 97 + 98 + for (i = 0; i < TYPE_MAX; i++) 99 + __nr_bp_slots[i] = hw_breakpoint_slots(i); 100 + 101 + for_each_possible_cpu(cpu) { 102 + for (i = 0; i < TYPE_MAX; i++) { 103 + struct bp_cpuinfo *info = get_bp_info(cpu, i); 104 + 105 + info->tsk_pinned = kcalloc(__nr_bp_slots[i], sizeof(int), GFP_KERNEL); 106 + if (!info->tsk_pinned) 107 + goto err; 108 + } 109 + } 110 + 111 + return 0; 112 + err: 113 + for_each_possible_cpu(err_cpu) { 114 + for (i = 0; i < TYPE_MAX; i++) 115 + kfree(get_bp_info(err_cpu, i)->tsk_pinned); 116 + if (err_cpu == cpu) 117 + break; 118 + } 119 + 120 + return -ENOMEM; 121 + } 122 + #endif 123 + 79 124 __weak int hw_breakpoint_weight(struct perf_event *bp) 80 125 { 81 126 return 1; ··· 146 95 unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned; 147 96 int i; 148 97 149 - for (i = nr_slots[type] - 1; i >= 0; i--) { 98 + for (i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 150 99 if (tsk_pinned[i] > 0) 151 100 return i + 1; 152 101 } ··· 363 312 fetch_this_slot(&slots, weight); 364 313 365 314 /* Flexible counters need to keep at least one slot */ 366 - if (slots.pinned + (!!slots.flexible) > nr_slots[type]) 315 + if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type)) 367 316 return -ENOSPC; 368 317 369 318 ret = arch_reserve_bp_slot(bp); ··· 683 632 if (info->cpu_pinned) 684 633 return true; 685 634 686 - for (int slot = 0; slot < nr_slots[type]; ++slot) { 635 + for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 687 636 if (info->tsk_pinned[slot]) 688 637 return true; 689 638 } ··· 767 716 768 717 int __init init_hw_breakpoint(void) 769 718 { 770 - int cpu, err_cpu; 771 - int i, ret; 772 - 773 - for (i = 0; i < TYPE_MAX; i++) 774 - nr_slots[i] = hw_breakpoint_slots(i); 775 - 776 - for_each_possible_cpu(cpu) { 777 - for (i = 0; i < TYPE_MAX; i++) { 778 - struct bp_cpuinfo *info = get_bp_info(cpu, i); 779 - 780 - info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int), 781 - GFP_KERNEL); 782 - if (!info->tsk_pinned) { 783 - ret = -ENOMEM; 784 - goto err; 785 - } 786 - } 787 - } 719 + int ret; 788 720 789 721 ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); 790 722 if (ret) 791 - goto err; 723 + return ret; 724 + 725 + ret = init_breakpoint_slots(); 726 + if (ret) 727 + return ret; 792 728 793 729 constraints_initialized = true; 794 730 795 731 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); 796 732 797 733 return register_die_notifier(&hw_breakpoint_exceptions_nb); 798 - 799 - err: 800 - for_each_possible_cpu(err_cpu) { 801 - for (i = 0; i < TYPE_MAX; i++) 802 - kfree(get_bp_info(err_cpu, i)->tsk_pinned); 803 - if (err_cpu == cpu) 804 - break; 805 - } 806 - 807 - return ret; 808 734 }