Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rseq/selftests: Provide basic percpu ops test

"basic_percpu_ops_test" is a slightly more "realistic" variant,
implementing a few simple per-cpu operations and testing their
correctness.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: linux-kselftest@vger.kernel.org
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Andrew Hunter <ahh@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: linux-api@vger.kernel.org
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/20180602124408.8430-15-mathieu.desnoyers@efficios.com

authored by

Mathieu Desnoyers and committed by
Thomas Gleixner
01a5ec42 1badac4f

+312
+312
tools/testing/selftests/rseq/basic_percpu_ops_test.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + #define _GNU_SOURCE 3 + #include <assert.h> 4 + #include <pthread.h> 5 + #include <sched.h> 6 + #include <stdint.h> 7 + #include <stdio.h> 8 + #include <stdlib.h> 9 + #include <string.h> 10 + #include <stddef.h> 11 + 12 + #include "rseq.h" 13 + 14 + #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) 15 + 16 + struct percpu_lock_entry { 17 + intptr_t v; 18 + } __attribute__((aligned(128))); 19 + 20 + struct percpu_lock { 21 + struct percpu_lock_entry c[CPU_SETSIZE]; 22 + }; 23 + 24 + struct test_data_entry { 25 + intptr_t count; 26 + } __attribute__((aligned(128))); 27 + 28 + struct spinlock_test_data { 29 + struct percpu_lock lock; 30 + struct test_data_entry c[CPU_SETSIZE]; 31 + int reps; 32 + }; 33 + 34 + struct percpu_list_node { 35 + intptr_t data; 36 + struct percpu_list_node *next; 37 + }; 38 + 39 + struct percpu_list_entry { 40 + struct percpu_list_node *head; 41 + } __attribute__((aligned(128))); 42 + 43 + struct percpu_list { 44 + struct percpu_list_entry c[CPU_SETSIZE]; 45 + }; 46 + 47 + /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ 48 + int rseq_this_cpu_lock(struct percpu_lock *lock) 49 + { 50 + int cpu; 51 + 52 + for (;;) { 53 + int ret; 54 + 55 + cpu = rseq_cpu_start(); 56 + ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 57 + 0, 1, cpu); 58 + if (rseq_likely(!ret)) 59 + break; 60 + /* Retry if comparison fails or rseq aborts. */ 61 + } 62 + /* 63 + * Acquire semantic when taking lock after control dependency. 64 + * Matches rseq_smp_store_release(). 65 + */ 66 + rseq_smp_acquire__after_ctrl_dep(); 67 + return cpu; 68 + } 69 + 70 + void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 71 + { 72 + assert(lock->c[cpu].v == 1); 73 + /* 74 + * Release lock, with release semantic. Matches 75 + * rseq_smp_acquire__after_ctrl_dep(). 76 + */ 77 + rseq_smp_store_release(&lock->c[cpu].v, 0); 78 + } 79 + 80 + void *test_percpu_spinlock_thread(void *arg) 81 + { 82 + struct spinlock_test_data *data = arg; 83 + int i, cpu; 84 + 85 + if (rseq_register_current_thread()) { 86 + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 87 + errno, strerror(errno)); 88 + abort(); 89 + } 90 + for (i = 0; i < data->reps; i++) { 91 + cpu = rseq_this_cpu_lock(&data->lock); 92 + data->c[cpu].count++; 93 + rseq_percpu_unlock(&data->lock, cpu); 94 + } 95 + if (rseq_unregister_current_thread()) { 96 + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 97 + errno, strerror(errno)); 98 + abort(); 99 + } 100 + 101 + return NULL; 102 + } 103 + 104 + /* 105 + * A simple test which implements a sharded counter using a per-cpu 106 + * lock. Obviously real applications might prefer to simply use a 107 + * per-cpu increment; however, this is reasonable for a test and the 108 + * lock can be extended to synchronize more complicated operations. 109 + */ 110 + void test_percpu_spinlock(void) 111 + { 112 + const int num_threads = 200; 113 + int i; 114 + uint64_t sum; 115 + pthread_t test_threads[num_threads]; 116 + struct spinlock_test_data data; 117 + 118 + memset(&data, 0, sizeof(data)); 119 + data.reps = 5000; 120 + 121 + for (i = 0; i < num_threads; i++) 122 + pthread_create(&test_threads[i], NULL, 123 + test_percpu_spinlock_thread, &data); 124 + 125 + for (i = 0; i < num_threads; i++) 126 + pthread_join(test_threads[i], NULL); 127 + 128 + sum = 0; 129 + for (i = 0; i < CPU_SETSIZE; i++) 130 + sum += data.c[i].count; 131 + 132 + assert(sum == (uint64_t)data.reps * num_threads); 133 + } 134 + 135 + void this_cpu_list_push(struct percpu_list *list, 136 + struct percpu_list_node *node, 137 + int *_cpu) 138 + { 139 + int cpu; 140 + 141 + for (;;) { 142 + intptr_t *targetptr, newval, expect; 143 + int ret; 144 + 145 + cpu = rseq_cpu_start(); 146 + /* Load list->c[cpu].head with single-copy atomicity. */ 147 + expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 148 + newval = (intptr_t)node; 149 + targetptr = (intptr_t *)&list->c[cpu].head; 150 + node->next = (struct percpu_list_node *)expect; 151 + ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 152 + if (rseq_likely(!ret)) 153 + break; 154 + /* Retry if comparison fails or rseq aborts. */ 155 + } 156 + if (_cpu) 157 + *_cpu = cpu; 158 + } 159 + 160 + /* 161 + * Unlike a traditional lock-less linked list; the availability of a 162 + * rseq primitive allows us to implement pop without concerns over 163 + * ABA-type races. 164 + */ 165 + struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 166 + int *_cpu) 167 + { 168 + for (;;) { 169 + struct percpu_list_node *head; 170 + intptr_t *targetptr, expectnot, *load; 171 + off_t offset; 172 + int ret, cpu; 173 + 174 + cpu = rseq_cpu_start(); 175 + targetptr = (intptr_t *)&list->c[cpu].head; 176 + expectnot = (intptr_t)NULL; 177 + offset = offsetof(struct percpu_list_node, next); 178 + load = (intptr_t *)&head; 179 + ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 180 + offset, load, cpu); 181 + if (rseq_likely(!ret)) { 182 + if (_cpu) 183 + *_cpu = cpu; 184 + return head; 185 + } 186 + if (ret > 0) 187 + return NULL; 188 + /* Retry if rseq aborts. */ 189 + } 190 + } 191 + 192 + /* 193 + * __percpu_list_pop is not safe against concurrent accesses. Should 194 + * only be used on lists that are not concurrently modified. 195 + */ 196 + struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 197 + { 198 + struct percpu_list_node *node; 199 + 200 + node = list->c[cpu].head; 201 + if (!node) 202 + return NULL; 203 + list->c[cpu].head = node->next; 204 + return node; 205 + } 206 + 207 + void *test_percpu_list_thread(void *arg) 208 + { 209 + int i; 210 + struct percpu_list *list = (struct percpu_list *)arg; 211 + 212 + if (rseq_register_current_thread()) { 213 + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 214 + errno, strerror(errno)); 215 + abort(); 216 + } 217 + 218 + for (i = 0; i < 100000; i++) { 219 + struct percpu_list_node *node; 220 + 221 + node = this_cpu_list_pop(list, NULL); 222 + sched_yield(); /* encourage shuffling */ 223 + if (node) 224 + this_cpu_list_push(list, node, NULL); 225 + } 226 + 227 + if (rseq_unregister_current_thread()) { 228 + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 229 + errno, strerror(errno)); 230 + abort(); 231 + } 232 + 233 + return NULL; 234 + } 235 + 236 + /* Simultaneous modification to a per-cpu linked list from many threads. */ 237 + void test_percpu_list(void) 238 + { 239 + int i, j; 240 + uint64_t sum = 0, expected_sum = 0; 241 + struct percpu_list list; 242 + pthread_t test_threads[200]; 243 + cpu_set_t allowed_cpus; 244 + 245 + memset(&list, 0, sizeof(list)); 246 + 247 + /* Generate list entries for every usable cpu. */ 248 + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 249 + for (i = 0; i < CPU_SETSIZE; i++) { 250 + if (!CPU_ISSET(i, &allowed_cpus)) 251 + continue; 252 + for (j = 1; j <= 100; j++) { 253 + struct percpu_list_node *node; 254 + 255 + expected_sum += j; 256 + 257 + node = malloc(sizeof(*node)); 258 + assert(node); 259 + node->data = j; 260 + node->next = list.c[i].head; 261 + list.c[i].head = node; 262 + } 263 + } 264 + 265 + for (i = 0; i < 200; i++) 266 + pthread_create(&test_threads[i], NULL, 267 + test_percpu_list_thread, &list); 268 + 269 + for (i = 0; i < 200; i++) 270 + pthread_join(test_threads[i], NULL); 271 + 272 + for (i = 0; i < CPU_SETSIZE; i++) { 273 + struct percpu_list_node *node; 274 + 275 + if (!CPU_ISSET(i, &allowed_cpus)) 276 + continue; 277 + 278 + while ((node = __percpu_list_pop(&list, i))) { 279 + sum += node->data; 280 + free(node); 281 + } 282 + } 283 + 284 + /* 285 + * All entries should now be accounted for (unless some external 286 + * actor is interfering with our allowed affinity while this 287 + * test is running). 288 + */ 289 + assert(sum == expected_sum); 290 + } 291 + 292 + int main(int argc, char **argv) 293 + { 294 + if (rseq_register_current_thread()) { 295 + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 296 + errno, strerror(errno)); 297 + goto error; 298 + } 299 + printf("spinlock\n"); 300 + test_percpu_spinlock(); 301 + printf("percpu_list\n"); 302 + test_percpu_list(); 303 + if (rseq_unregister_current_thread()) { 304 + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 305 + errno, strerror(errno)); 306 + goto error; 307 + } 308 + return 0; 309 + 310 + error: 311 + return -1; 312 + }