at v5.13 11 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7#include <linux/init.h> 8#include <linux/kernel.h> 9#include <linux/module.h> 10#include <linux/vmalloc.h> 11#include <linux/random.h> 12#include <linux/kthread.h> 13#include <linux/moduleparam.h> 14#include <linux/completion.h> 15#include <linux/delay.h> 16#include <linux/rwsem.h> 17#include <linux/mm.h> 18#include <linux/rcupdate.h> 19#include <linux/slab.h> 20 21#define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26__param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29__param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32__param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35__param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38__param(int, run_test_mask, INT_MAX, 39 "Set tests specified in the mask.\n\n" 40 "\t\tid: 1, name: fix_size_alloc_test\n" 41 "\t\tid: 2, name: full_fit_alloc_test\n" 42 "\t\tid: 4, name: long_busy_list_alloc_test\n" 43 "\t\tid: 8, name: random_size_alloc_test\n" 44 "\t\tid: 16, name: fix_align_alloc_test\n" 45 "\t\tid: 32, name: random_size_align_alloc_test\n" 46 "\t\tid: 64, name: align_shift_alloc_test\n" 47 "\t\tid: 128, name: pcpu_alloc_test\n" 48 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 49 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 50 /* Add a new test case description here. */ 51); 52 53/* 54 * Read write semaphore for synchronization of setup 55 * phase that is done in main thread and workers. 56 */ 57static DECLARE_RWSEM(prepare_for_test_rwsem); 58 59/* 60 * Completion tracking for worker threads. 61 */ 62static DECLARE_COMPLETION(test_all_done_comp); 63static atomic_t test_n_undone = ATOMIC_INIT(0); 64 65static inline void 66test_report_one_done(void) 67{ 68 if (atomic_dec_and_test(&test_n_undone)) 69 complete(&test_all_done_comp); 70} 71 72static int random_size_align_alloc_test(void) 73{ 74 unsigned long size, align, rnd; 75 void *ptr; 76 int i; 77 78 for (i = 0; i < test_loop_count; i++) { 79 get_random_bytes(&rnd, sizeof(rnd)); 80 81 /* 82 * Maximum 1024 pages, if PAGE_SIZE is 4096. 83 */ 84 align = 1 << (rnd % 23); 85 86 /* 87 * Maximum 10 pages. 88 */ 89 size = ((rnd % 10) + 1) * PAGE_SIZE; 90 91 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 92 __builtin_return_address(0)); 93 if (!ptr) 94 return -1; 95 96 vfree(ptr); 97 } 98 99 return 0; 100} 101 102/* 103 * This test case is supposed to be failed. 104 */ 105static int align_shift_alloc_test(void) 106{ 107 unsigned long align; 108 void *ptr; 109 int i; 110 111 for (i = 0; i < BITS_PER_LONG; i++) { 112 align = ((unsigned long) 1) << i; 113 114 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 115 __builtin_return_address(0)); 116 if (!ptr) 117 return -1; 118 119 vfree(ptr); 120 } 121 122 return 0; 123} 124 125static int fix_align_alloc_test(void) 126{ 127 void *ptr; 128 int i; 129 130 for (i = 0; i < test_loop_count; i++) { 131 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 132 GFP_KERNEL | __GFP_ZERO, 0, 133 __builtin_return_address(0)); 134 if (!ptr) 135 return -1; 136 137 vfree(ptr); 138 } 139 140 return 0; 141} 142 143static int random_size_alloc_test(void) 144{ 145 unsigned int n; 146 void *p; 147 int i; 148 149 for (i = 0; i < test_loop_count; i++) { 150 get_random_bytes(&n, sizeof(i)); 151 n = (n % 100) + 1; 152 153 p = vmalloc(n * PAGE_SIZE); 154 155 if (!p) 156 return -1; 157 158 *((__u8 *)p) = 1; 159 vfree(p); 160 } 161 162 return 0; 163} 164 165static int long_busy_list_alloc_test(void) 166{ 167 void *ptr_1, *ptr_2; 168 void **ptr; 169 int rv = -1; 170 int i; 171 172 ptr = vmalloc(sizeof(void *) * 15000); 173 if (!ptr) 174 return rv; 175 176 for (i = 0; i < 15000; i++) 177 ptr[i] = vmalloc(1 * PAGE_SIZE); 178 179 for (i = 0; i < test_loop_count; i++) { 180 ptr_1 = vmalloc(100 * PAGE_SIZE); 181 if (!ptr_1) 182 goto leave; 183 184 ptr_2 = vmalloc(1 * PAGE_SIZE); 185 if (!ptr_2) { 186 vfree(ptr_1); 187 goto leave; 188 } 189 190 *((__u8 *)ptr_1) = 0; 191 *((__u8 *)ptr_2) = 1; 192 193 vfree(ptr_1); 194 vfree(ptr_2); 195 } 196 197 /* Success */ 198 rv = 0; 199 200leave: 201 for (i = 0; i < 15000; i++) 202 vfree(ptr[i]); 203 204 vfree(ptr); 205 return rv; 206} 207 208static int full_fit_alloc_test(void) 209{ 210 void **ptr, **junk_ptr, *tmp; 211 int junk_length; 212 int rv = -1; 213 int i; 214 215 junk_length = fls(num_online_cpus()); 216 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 217 218 ptr = vmalloc(sizeof(void *) * junk_length); 219 if (!ptr) 220 return rv; 221 222 junk_ptr = vmalloc(sizeof(void *) * junk_length); 223 if (!junk_ptr) { 224 vfree(ptr); 225 return rv; 226 } 227 228 for (i = 0; i < junk_length; i++) { 229 ptr[i] = vmalloc(1 * PAGE_SIZE); 230 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 231 } 232 233 for (i = 0; i < junk_length; i++) 234 vfree(junk_ptr[i]); 235 236 for (i = 0; i < test_loop_count; i++) { 237 tmp = vmalloc(1 * PAGE_SIZE); 238 239 if (!tmp) 240 goto error; 241 242 *((__u8 *)tmp) = 1; 243 vfree(tmp); 244 } 245 246 /* Success */ 247 rv = 0; 248 249error: 250 for (i = 0; i < junk_length; i++) 251 vfree(ptr[i]); 252 253 vfree(ptr); 254 vfree(junk_ptr); 255 256 return rv; 257} 258 259static int fix_size_alloc_test(void) 260{ 261 void *ptr; 262 int i; 263 264 for (i = 0; i < test_loop_count; i++) { 265 ptr = vmalloc(3 * PAGE_SIZE); 266 267 if (!ptr) 268 return -1; 269 270 *((__u8 *)ptr) = 0; 271 272 vfree(ptr); 273 } 274 275 return 0; 276} 277 278static int 279pcpu_alloc_test(void) 280{ 281 int rv = 0; 282#ifndef CONFIG_NEED_PER_CPU_KM 283 void __percpu **pcpu; 284 size_t size, align; 285 int i; 286 287 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 288 if (!pcpu) 289 return -1; 290 291 for (i = 0; i < 35000; i++) { 292 unsigned int r; 293 294 get_random_bytes(&r, sizeof(i)); 295 size = (r % (PAGE_SIZE / 4)) + 1; 296 297 /* 298 * Maximum PAGE_SIZE 299 */ 300 get_random_bytes(&r, sizeof(i)); 301 align = 1 << ((i % 11) + 1); 302 303 pcpu[i] = __alloc_percpu(size, align); 304 if (!pcpu[i]) 305 rv = -1; 306 } 307 308 for (i = 0; i < 35000; i++) 309 free_percpu(pcpu[i]); 310 311 vfree(pcpu); 312#endif 313 return rv; 314} 315 316struct test_kvfree_rcu { 317 struct rcu_head rcu; 318 unsigned char array[20]; 319}; 320 321static int 322kvfree_rcu_1_arg_vmalloc_test(void) 323{ 324 struct test_kvfree_rcu *p; 325 int i; 326 327 for (i = 0; i < test_loop_count; i++) { 328 p = vmalloc(1 * PAGE_SIZE); 329 if (!p) 330 return -1; 331 332 p->array[0] = 'a'; 333 kvfree_rcu(p); 334 } 335 336 return 0; 337} 338 339static int 340kvfree_rcu_2_arg_vmalloc_test(void) 341{ 342 struct test_kvfree_rcu *p; 343 int i; 344 345 for (i = 0; i < test_loop_count; i++) { 346 p = vmalloc(1 * PAGE_SIZE); 347 if (!p) 348 return -1; 349 350 p->array[0] = 'a'; 351 kvfree_rcu(p, rcu); 352 } 353 354 return 0; 355} 356 357struct test_case_desc { 358 const char *test_name; 359 int (*test_func)(void); 360}; 361 362static struct test_case_desc test_case_array[] = { 363 { "fix_size_alloc_test", fix_size_alloc_test }, 364 { "full_fit_alloc_test", full_fit_alloc_test }, 365 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 366 { "random_size_alloc_test", random_size_alloc_test }, 367 { "fix_align_alloc_test", fix_align_alloc_test }, 368 { "random_size_align_alloc_test", random_size_align_alloc_test }, 369 { "align_shift_alloc_test", align_shift_alloc_test }, 370 { "pcpu_alloc_test", pcpu_alloc_test }, 371 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, 372 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, 373 /* Add a new test case here. */ 374}; 375 376struct test_case_data { 377 int test_failed; 378 int test_passed; 379 u64 time; 380}; 381 382static struct test_driver { 383 struct task_struct *task; 384 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 385 386 unsigned long start; 387 unsigned long stop; 388} *tdriver; 389 390static void shuffle_array(int *arr, int n) 391{ 392 unsigned int rnd; 393 int i, j, x; 394 395 for (i = n - 1; i > 0; i--) { 396 get_random_bytes(&rnd, sizeof(rnd)); 397 398 /* Cut the range. */ 399 j = rnd % i; 400 401 /* Swap indexes. */ 402 x = arr[i]; 403 arr[i] = arr[j]; 404 arr[j] = x; 405 } 406} 407 408static int test_func(void *private) 409{ 410 struct test_driver *t = private; 411 int random_array[ARRAY_SIZE(test_case_array)]; 412 int index, i, j; 413 ktime_t kt; 414 u64 delta; 415 416 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 417 random_array[i] = i; 418 419 if (!sequential_test_order) 420 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 421 422 /* 423 * Block until initialization is done. 424 */ 425 down_read(&prepare_for_test_rwsem); 426 427 t->start = get_cycles(); 428 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 429 index = random_array[i]; 430 431 /* 432 * Skip tests if run_test_mask has been specified. 433 */ 434 if (!((run_test_mask & (1 << index)) >> index)) 435 continue; 436 437 kt = ktime_get(); 438 for (j = 0; j < test_repeat_count; j++) { 439 if (!test_case_array[index].test_func()) 440 t->data[index].test_passed++; 441 else 442 t->data[index].test_failed++; 443 } 444 445 /* 446 * Take an average time that test took. 447 */ 448 delta = (u64) ktime_us_delta(ktime_get(), kt); 449 do_div(delta, (u32) test_repeat_count); 450 451 t->data[index].time = delta; 452 } 453 t->stop = get_cycles(); 454 455 up_read(&prepare_for_test_rwsem); 456 test_report_one_done(); 457 458 /* 459 * Wait for the kthread_stop() call. 460 */ 461 while (!kthread_should_stop()) 462 msleep(10); 463 464 return 0; 465} 466 467static int 468init_test_configurtion(void) 469{ 470 /* 471 * A maximum number of workers is defined as hard-coded 472 * value and set to USHRT_MAX. We add such gap just in 473 * case and for potential heavy stressing. 474 */ 475 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 476 477 /* Allocate the space for test instances. */ 478 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 479 if (tdriver == NULL) 480 return -1; 481 482 if (test_repeat_count <= 0) 483 test_repeat_count = 1; 484 485 if (test_loop_count <= 0) 486 test_loop_count = 1; 487 488 return 0; 489} 490 491static void do_concurrent_test(void) 492{ 493 int i, ret; 494 495 /* 496 * Set some basic configurations plus sanity check. 497 */ 498 ret = init_test_configurtion(); 499 if (ret < 0) 500 return; 501 502 /* 503 * Put on hold all workers. 504 */ 505 down_write(&prepare_for_test_rwsem); 506 507 for (i = 0; i < nr_threads; i++) { 508 struct test_driver *t = &tdriver[i]; 509 510 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 511 512 if (!IS_ERR(t->task)) 513 /* Success. */ 514 atomic_inc(&test_n_undone); 515 else 516 pr_err("Failed to start %d kthread\n", i); 517 } 518 519 /* 520 * Now let the workers do their job. 521 */ 522 up_write(&prepare_for_test_rwsem); 523 524 /* 525 * Sleep quiet until all workers are done with 1 second 526 * interval. Since the test can take a lot of time we 527 * can run into a stack trace of the hung task. That is 528 * why we go with completion_timeout and HZ value. 529 */ 530 do { 531 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 532 } while (!ret); 533 534 for (i = 0; i < nr_threads; i++) { 535 struct test_driver *t = &tdriver[i]; 536 int j; 537 538 if (!IS_ERR(t->task)) 539 kthread_stop(t->task); 540 541 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 542 if (!((run_test_mask & (1 << j)) >> j)) 543 continue; 544 545 pr_info( 546 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 547 test_case_array[j].test_name, 548 t->data[j].test_passed, 549 t->data[j].test_failed, 550 test_repeat_count, test_loop_count, 551 t->data[j].time); 552 } 553 554 pr_info("All test took worker%d=%lu cycles\n", 555 i, t->stop - t->start); 556 } 557 558 kvfree(tdriver); 559} 560 561static int vmalloc_test_init(void) 562{ 563 do_concurrent_test(); 564 return -EAGAIN; /* Fail will directly unload the module */ 565} 566 567static void vmalloc_test_exit(void) 568{ 569} 570 571module_init(vmalloc_test_init) 572module_exit(vmalloc_test_exit) 573 574MODULE_LICENSE("GPL"); 575MODULE_AUTHOR("Uladzislau Rezki"); 576MODULE_DESCRIPTION("vmalloc test module");