Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tools lib: Adopt list_sort() from the kernel sources

Add list_sort.[ch] from the main kernel tree. The linux/bug.h #include
is removed due to conflicting definitions. Add check-headers and modify
perf build accordingly.

MANIFEST and python-ext-sources fixes suggested by Arnaldo.

Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Antonov <alexander.antonov@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrew Kilroy <andrew.kilroy@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Changbin Du <changbin.du@intel.com>
Cc: Denys Zagorui <dzagorui@cisco.com>
Cc: Fabian Hemmer <copy@copy.sh>
Cc: Felix Fietkau <nbd@nbd.name>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jacob Keller <jacob.e.keller@intel.com>
Cc: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kees Kook <keescook@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicholas Fraser <nfraser@codeweavers.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Paul Clarke <pc@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Riccardo Mancini <rickyman7@gmail.com>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: ShihCheng Tu <mrtoastcheng@gmail.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Wan Jiabing <wanjiabing@vivo.com>
Cc: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20211015172132.1162559-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
92ec3cc9 f792cf8a

+275
+14
tools/include/linux/list_sort.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_LIST_SORT_H 3 + #define _LINUX_LIST_SORT_H 4 + 5 + #include <linux/types.h> 6 + 7 + struct list_head; 8 + 9 + typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, 10 + const struct list_head *, const struct list_head *); 11 + 12 + __attribute__((nonnull(2,3))) 13 + void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp); 14 + #endif
+252
tools/lib/list_sort.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/kernel.h> 3 + #include <linux/compiler.h> 4 + #include <linux/export.h> 5 + #include <linux/string.h> 6 + #include <linux/list_sort.h> 7 + #include <linux/list.h> 8 + 9 + /* 10 + * Returns a list organized in an intermediate format suited 11 + * to chaining of merge() calls: null-terminated, no reserved or 12 + * sentinel head node, "prev" links not maintained. 13 + */ 14 + __attribute__((nonnull(2,3,4))) 15 + static struct list_head *merge(void *priv, list_cmp_func_t cmp, 16 + struct list_head *a, struct list_head *b) 17 + { 18 + struct list_head *head, **tail = &head; 19 + 20 + for (;;) { 21 + /* if equal, take 'a' -- important for sort stability */ 22 + if (cmp(priv, a, b) <= 0) { 23 + *tail = a; 24 + tail = &a->next; 25 + a = a->next; 26 + if (!a) { 27 + *tail = b; 28 + break; 29 + } 30 + } else { 31 + *tail = b; 32 + tail = &b->next; 33 + b = b->next; 34 + if (!b) { 35 + *tail = a; 36 + break; 37 + } 38 + } 39 + } 40 + return head; 41 + } 42 + 43 + /* 44 + * Combine final list merge with restoration of standard doubly-linked 45 + * list structure. This approach duplicates code from merge(), but 46 + * runs faster than the tidier alternatives of either a separate final 47 + * prev-link restoration pass, or maintaining the prev links 48 + * throughout. 49 + */ 50 + __attribute__((nonnull(2,3,4,5))) 51 + static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, 52 + struct list_head *a, struct list_head *b) 53 + { 54 + struct list_head *tail = head; 55 + u8 count = 0; 56 + 57 + for (;;) { 58 + /* if equal, take 'a' -- important for sort stability */ 59 + if (cmp(priv, a, b) <= 0) { 60 + tail->next = a; 61 + a->prev = tail; 62 + tail = a; 63 + a = a->next; 64 + if (!a) 65 + break; 66 + } else { 67 + tail->next = b; 68 + b->prev = tail; 69 + tail = b; 70 + b = b->next; 71 + if (!b) { 72 + b = a; 73 + break; 74 + } 75 + } 76 + } 77 + 78 + /* Finish linking remainder of list b on to tail */ 79 + tail->next = b; 80 + do { 81 + /* 82 + * If the merge is highly unbalanced (e.g. the input is 83 + * already sorted), this loop may run many iterations. 84 + * Continue callbacks to the client even though no 85 + * element comparison is needed, so the client's cmp() 86 + * routine can invoke cond_resched() periodically. 87 + */ 88 + if (unlikely(!++count)) 89 + cmp(priv, b, b); 90 + b->prev = tail; 91 + tail = b; 92 + b = b->next; 93 + } while (b); 94 + 95 + /* And the final links to make a circular doubly-linked list */ 96 + tail->next = head; 97 + head->prev = tail; 98 + } 99 + 100 + /** 101 + * list_sort - sort a list 102 + * @priv: private data, opaque to list_sort(), passed to @cmp 103 + * @head: the list to sort 104 + * @cmp: the elements comparison function 105 + * 106 + * The comparison function @cmp must return > 0 if @a should sort after 107 + * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should 108 + * sort before @b *or* their original order should be preserved. It is 109 + * always called with the element that came first in the input in @a, 110 + * and list_sort is a stable sort, so it is not necessary to distinguish 111 + * the @a < @b and @a == @b cases. 112 + * 113 + * This is compatible with two styles of @cmp function: 114 + * - The traditional style which returns <0 / =0 / >0, or 115 + * - Returning a boolean 0/1. 116 + * The latter offers a chance to save a few cycles in the comparison 117 + * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c). 118 + * 119 + * A good way to write a multi-word comparison is:: 120 + * 121 + * if (a->high != b->high) 122 + * return a->high > b->high; 123 + * if (a->middle != b->middle) 124 + * return a->middle > b->middle; 125 + * return a->low > b->low; 126 + * 127 + * 128 + * This mergesort is as eager as possible while always performing at least 129 + * 2:1 balanced merges. Given two pending sublists of size 2^k, they are 130 + * merged to a size-2^(k+1) list as soon as we have 2^k following elements. 131 + * 132 + * Thus, it will avoid cache thrashing as long as 3*2^k elements can 133 + * fit into the cache. Not quite as good as a fully-eager bottom-up 134 + * mergesort, but it does use 0.2*n fewer comparisons, so is faster in 135 + * the common case that everything fits into L1. 136 + * 137 + * 138 + * The merging is controlled by "count", the number of elements in the 139 + * pending lists. This is beautifully simple code, but rather subtle. 140 + * 141 + * Each time we increment "count", we set one bit (bit k) and clear 142 + * bits k-1 .. 0. Each time this happens (except the very first time 143 + * for each bit, when count increments to 2^k), we merge two lists of 144 + * size 2^k into one list of size 2^(k+1). 145 + * 146 + * This merge happens exactly when the count reaches an odd multiple of 147 + * 2^k, which is when we have 2^k elements pending in smaller lists, 148 + * so it's safe to merge away two lists of size 2^k. 149 + * 150 + * After this happens twice, we have created two lists of size 2^(k+1), 151 + * which will be merged into a list of size 2^(k+2) before we create 152 + * a third list of size 2^(k+1), so there are never more than two pending. 153 + * 154 + * The number of pending lists of size 2^k is determined by the 155 + * state of bit k of "count" plus two extra pieces of information: 156 + * 157 + * - The state of bit k-1 (when k == 0, consider bit -1 always set), and 158 + * - Whether the higher-order bits are zero or non-zero (i.e. 159 + * is count >= 2^(k+1)). 160 + * 161 + * There are six states we distinguish. "x" represents some arbitrary 162 + * bits, and "y" represents some arbitrary non-zero bits: 163 + * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k 164 + * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k 165 + * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k 166 + * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k 167 + * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k 168 + * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k 169 + * (merge and loop back to state 2) 170 + * 171 + * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because 172 + * bit k-1 is set while the more significant bits are non-zero) and 173 + * merge them away in the 5->2 transition. Note in particular that just 174 + * before the 5->2 transition, all lower-order bits are 11 (state 3), 175 + * so there is one list of each smaller size. 176 + * 177 + * When we reach the end of the input, we merge all the pending 178 + * lists, from smallest to largest. If you work through cases 2 to 179 + * 5 above, you can see that the number of elements we merge with a list 180 + * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to 181 + * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1). 182 + */ 183 + __attribute__((nonnull(2,3))) 184 + void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp) 185 + { 186 + struct list_head *list = head->next, *pending = NULL; 187 + size_t count = 0; /* Count of pending */ 188 + 189 + if (list == head->prev) /* Zero or one elements */ 190 + return; 191 + 192 + /* Convert to a null-terminated singly-linked list. */ 193 + head->prev->next = NULL; 194 + 195 + /* 196 + * Data structure invariants: 197 + * - All lists are singly linked and null-terminated; prev 198 + * pointers are not maintained. 199 + * - pending is a prev-linked "list of lists" of sorted 200 + * sublists awaiting further merging. 201 + * - Each of the sorted sublists is power-of-two in size. 202 + * - Sublists are sorted by size and age, smallest & newest at front. 203 + * - There are zero to two sublists of each size. 204 + * - A pair of pending sublists are merged as soon as the number 205 + * of following pending elements equals their size (i.e. 206 + * each time count reaches an odd multiple of that size). 207 + * That ensures each later final merge will be at worst 2:1. 208 + * - Each round consists of: 209 + * - Merging the two sublists selected by the highest bit 210 + * which flips when count is incremented, and 211 + * - Adding an element from the input as a size-1 sublist. 212 + */ 213 + do { 214 + size_t bits; 215 + struct list_head **tail = &pending; 216 + 217 + /* Find the least-significant clear bit in count */ 218 + for (bits = count; bits & 1; bits >>= 1) 219 + tail = &(*tail)->prev; 220 + /* Do the indicated merge */ 221 + if (likely(bits)) { 222 + struct list_head *a = *tail, *b = a->prev; 223 + 224 + a = merge(priv, cmp, b, a); 225 + /* Install the merged result in place of the inputs */ 226 + a->prev = b->prev; 227 + *tail = a; 228 + } 229 + 230 + /* Move one element from input list to pending */ 231 + list->prev = pending; 232 + pending = list; 233 + list = list->next; 234 + pending->next = NULL; 235 + count++; 236 + } while (list); 237 + 238 + /* End of input; merge together all the pending lists. */ 239 + list = pending; 240 + pending = pending->prev; 241 + for (;;) { 242 + struct list_head *next = pending->prev; 243 + 244 + if (!next) 245 + break; 246 + list = merge(priv, cmp, pending, list); 247 + pending = next; 248 + } 249 + /* The final merge, rebuilding prev links */ 250 + merge_final(priv, cmp, head, pending, list); 251 + } 252 + EXPORT_SYMBOL(list_sort);
+1
tools/perf/MANIFEST
··· 17 17 tools/lib/symbol/kallsyms.h 18 18 tools/lib/find_bit.c 19 19 tools/lib/bitmap.c 20 + tools/lib/list_sort.c 20 21 tools/lib/str_error_r.c 21 22 tools/lib/vsprintf.c 22 23 tools/lib/zalloc.c
+2
tools/perf/check-headers.sh
··· 26 26 include/linux/const.h 27 27 include/vdso/const.h 28 28 include/linux/hash.h 29 + include/linux/list-sort.h 29 30 include/uapi/linux/hw_breakpoint.h 30 31 arch/x86/include/asm/disabled-features.h 31 32 arch/x86/include/asm/required-features.h ··· 151 150 check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' 152 151 check include/linux/ctype.h '-I "isdigit("' 153 152 check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' 153 + check lib/list_sort.c '-I "^#include <linux/bug.h>"' 154 154 155 155 # diff non-symmetric files 156 156 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+5
tools/perf/util/Build
··· 138 138 perf-y += branch.o 139 139 perf-y += mem2node.o 140 140 perf-y += clockid.o 141 + perf-y += list_sort.o 141 142 142 143 perf-$(CONFIG_LIBBPF) += bpf-loader.o 143 144 perf-$(CONFIG_LIBBPF) += bpf_map.o ··· 314 313 $(call if_changed_dep,cc_o_c) 315 314 316 315 $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE 316 + $(call rule_mkdir) 317 + $(call if_changed_dep,cc_o_c) 318 + 319 + $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE 317 320 $(call rule_mkdir) 318 321 $(call if_changed_dep,cc_o_c)
+1
tools/perf/util/python-ext-sources
··· 18 18 util/namespaces.c 19 19 ../lib/bitmap.c 20 20 ../lib/find_bit.c 21 + ../lib/list_sort.c 21 22 ../lib/hweight.c 22 23 ../lib/string.c 23 24 ../lib/vsprintf.c