at v6.1 11 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Test module for stress and analyze performance of vmalloc allocator. 5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> 6 */ 7#include <linux/init.h> 8#include <linux/kernel.h> 9#include <linux/module.h> 10#include <linux/vmalloc.h> 11#include <linux/random.h> 12#include <linux/kthread.h> 13#include <linux/moduleparam.h> 14#include <linux/completion.h> 15#include <linux/delay.h> 16#include <linux/rwsem.h> 17#include <linux/mm.h> 18#include <linux/rcupdate.h> 19#include <linux/slab.h> 20 21#define __param(type, name, init, msg) \ 22 static type name = init; \ 23 module_param(name, type, 0444); \ 24 MODULE_PARM_DESC(name, msg) \ 25 26__param(int, nr_threads, 0, 27 "Number of workers to perform tests(min: 1 max: USHRT_MAX)"); 28 29__param(bool, sequential_test_order, false, 30 "Use sequential stress tests order"); 31 32__param(int, test_repeat_count, 1, 33 "Set test repeat counter"); 34 35__param(int, test_loop_count, 1000000, 36 "Set test loop counter"); 37 38__param(int, nr_pages, 0, 39 "Set number of pages for fix_size_alloc_test(default: 1)"); 40 41__param(int, run_test_mask, INT_MAX, 42 "Set tests specified in the mask.\n\n" 43 "\t\tid: 1, name: fix_size_alloc_test\n" 44 "\t\tid: 2, name: full_fit_alloc_test\n" 45 "\t\tid: 4, name: long_busy_list_alloc_test\n" 46 "\t\tid: 8, name: random_size_alloc_test\n" 47 "\t\tid: 16, name: fix_align_alloc_test\n" 48 "\t\tid: 32, name: random_size_align_alloc_test\n" 49 "\t\tid: 64, name: align_shift_alloc_test\n" 50 "\t\tid: 128, name: pcpu_alloc_test\n" 51 "\t\tid: 256, name: kvfree_rcu_1_arg_vmalloc_test\n" 52 "\t\tid: 512, name: kvfree_rcu_2_arg_vmalloc_test\n" 53 /* Add a new test case description here. */ 54); 55 56/* 57 * Read write semaphore for synchronization of setup 58 * phase that is done in main thread and workers. 59 */ 60static DECLARE_RWSEM(prepare_for_test_rwsem); 61 62/* 63 * Completion tracking for worker threads. 64 */ 65static DECLARE_COMPLETION(test_all_done_comp); 66static atomic_t test_n_undone = ATOMIC_INIT(0); 67 68static inline void 69test_report_one_done(void) 70{ 71 if (atomic_dec_and_test(&test_n_undone)) 72 complete(&test_all_done_comp); 73} 74 75static int random_size_align_alloc_test(void) 76{ 77 unsigned long size, align; 78 unsigned int rnd; 79 void *ptr; 80 int i; 81 82 for (i = 0; i < test_loop_count; i++) { 83 rnd = get_random_u8(); 84 85 /* 86 * Maximum 1024 pages, if PAGE_SIZE is 4096. 87 */ 88 align = 1 << (rnd % 23); 89 90 /* 91 * Maximum 10 pages. 92 */ 93 size = ((rnd % 10) + 1) * PAGE_SIZE; 94 95 ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0, 96 __builtin_return_address(0)); 97 if (!ptr) 98 return -1; 99 100 vfree(ptr); 101 } 102 103 return 0; 104} 105 106/* 107 * This test case is supposed to be failed. 108 */ 109static int align_shift_alloc_test(void) 110{ 111 unsigned long align; 112 void *ptr; 113 int i; 114 115 for (i = 0; i < BITS_PER_LONG; i++) { 116 align = ((unsigned long) 1) << i; 117 118 ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0, 119 __builtin_return_address(0)); 120 if (!ptr) 121 return -1; 122 123 vfree(ptr); 124 } 125 126 return 0; 127} 128 129static int fix_align_alloc_test(void) 130{ 131 void *ptr; 132 int i; 133 134 for (i = 0; i < test_loop_count; i++) { 135 ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1, 136 GFP_KERNEL | __GFP_ZERO, 0, 137 __builtin_return_address(0)); 138 if (!ptr) 139 return -1; 140 141 vfree(ptr); 142 } 143 144 return 0; 145} 146 147static int random_size_alloc_test(void) 148{ 149 unsigned int n; 150 void *p; 151 int i; 152 153 for (i = 0; i < test_loop_count; i++) { 154 n = prandom_u32_max(100) + 1; 155 p = vmalloc(n * PAGE_SIZE); 156 157 if (!p) 158 return -1; 159 160 *((__u8 *)p) = 1; 161 vfree(p); 162 } 163 164 return 0; 165} 166 167static int long_busy_list_alloc_test(void) 168{ 169 void *ptr_1, *ptr_2; 170 void **ptr; 171 int rv = -1; 172 int i; 173 174 ptr = vmalloc(sizeof(void *) * 15000); 175 if (!ptr) 176 return rv; 177 178 for (i = 0; i < 15000; i++) 179 ptr[i] = vmalloc(1 * PAGE_SIZE); 180 181 for (i = 0; i < test_loop_count; i++) { 182 ptr_1 = vmalloc(100 * PAGE_SIZE); 183 if (!ptr_1) 184 goto leave; 185 186 ptr_2 = vmalloc(1 * PAGE_SIZE); 187 if (!ptr_2) { 188 vfree(ptr_1); 189 goto leave; 190 } 191 192 *((__u8 *)ptr_1) = 0; 193 *((__u8 *)ptr_2) = 1; 194 195 vfree(ptr_1); 196 vfree(ptr_2); 197 } 198 199 /* Success */ 200 rv = 0; 201 202leave: 203 for (i = 0; i < 15000; i++) 204 vfree(ptr[i]); 205 206 vfree(ptr); 207 return rv; 208} 209 210static int full_fit_alloc_test(void) 211{ 212 void **ptr, **junk_ptr, *tmp; 213 int junk_length; 214 int rv = -1; 215 int i; 216 217 junk_length = fls(num_online_cpus()); 218 junk_length *= (32 * 1024 * 1024 / PAGE_SIZE); 219 220 ptr = vmalloc(sizeof(void *) * junk_length); 221 if (!ptr) 222 return rv; 223 224 junk_ptr = vmalloc(sizeof(void *) * junk_length); 225 if (!junk_ptr) { 226 vfree(ptr); 227 return rv; 228 } 229 230 for (i = 0; i < junk_length; i++) { 231 ptr[i] = vmalloc(1 * PAGE_SIZE); 232 junk_ptr[i] = vmalloc(1 * PAGE_SIZE); 233 } 234 235 for (i = 0; i < junk_length; i++) 236 vfree(junk_ptr[i]); 237 238 for (i = 0; i < test_loop_count; i++) { 239 tmp = vmalloc(1 * PAGE_SIZE); 240 241 if (!tmp) 242 goto error; 243 244 *((__u8 *)tmp) = 1; 245 vfree(tmp); 246 } 247 248 /* Success */ 249 rv = 0; 250 251error: 252 for (i = 0; i < junk_length; i++) 253 vfree(ptr[i]); 254 255 vfree(ptr); 256 vfree(junk_ptr); 257 258 return rv; 259} 260 261static int fix_size_alloc_test(void) 262{ 263 void *ptr; 264 int i; 265 266 for (i = 0; i < test_loop_count; i++) { 267 ptr = vmalloc((nr_pages > 0 ? nr_pages:1) * PAGE_SIZE); 268 269 if (!ptr) 270 return -1; 271 272 *((__u8 *)ptr) = 0; 273 274 vfree(ptr); 275 } 276 277 return 0; 278} 279 280static int 281pcpu_alloc_test(void) 282{ 283 int rv = 0; 284#ifndef CONFIG_NEED_PER_CPU_KM 285 void __percpu **pcpu; 286 size_t size, align; 287 int i; 288 289 pcpu = vmalloc(sizeof(void __percpu *) * 35000); 290 if (!pcpu) 291 return -1; 292 293 for (i = 0; i < 35000; i++) { 294 size = prandom_u32_max(PAGE_SIZE / 4) + 1; 295 296 /* 297 * Maximum PAGE_SIZE 298 */ 299 align = 1 << (prandom_u32_max(11) + 1); 300 301 pcpu[i] = __alloc_percpu(size, align); 302 if (!pcpu[i]) 303 rv = -1; 304 } 305 306 for (i = 0; i < 35000; i++) 307 free_percpu(pcpu[i]); 308 309 vfree(pcpu); 310#endif 311 return rv; 312} 313 314struct test_kvfree_rcu { 315 struct rcu_head rcu; 316 unsigned char array[20]; 317}; 318 319static int 320kvfree_rcu_1_arg_vmalloc_test(void) 321{ 322 struct test_kvfree_rcu *p; 323 int i; 324 325 for (i = 0; i < test_loop_count; i++) { 326 p = vmalloc(1 * PAGE_SIZE); 327 if (!p) 328 return -1; 329 330 p->array[0] = 'a'; 331 kvfree_rcu(p); 332 } 333 334 return 0; 335} 336 337static int 338kvfree_rcu_2_arg_vmalloc_test(void) 339{ 340 struct test_kvfree_rcu *p; 341 int i; 342 343 for (i = 0; i < test_loop_count; i++) { 344 p = vmalloc(1 * PAGE_SIZE); 345 if (!p) 346 return -1; 347 348 p->array[0] = 'a'; 349 kvfree_rcu(p, rcu); 350 } 351 352 return 0; 353} 354 355struct test_case_desc { 356 const char *test_name; 357 int (*test_func)(void); 358}; 359 360static struct test_case_desc test_case_array[] = { 361 { "fix_size_alloc_test", fix_size_alloc_test }, 362 { "full_fit_alloc_test", full_fit_alloc_test }, 363 { "long_busy_list_alloc_test", long_busy_list_alloc_test }, 364 { "random_size_alloc_test", random_size_alloc_test }, 365 { "fix_align_alloc_test", fix_align_alloc_test }, 366 { "random_size_align_alloc_test", random_size_align_alloc_test }, 367 { "align_shift_alloc_test", align_shift_alloc_test }, 368 { "pcpu_alloc_test", pcpu_alloc_test }, 369 { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test }, 370 { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test }, 371 /* Add a new test case here. */ 372}; 373 374struct test_case_data { 375 int test_failed; 376 int test_passed; 377 u64 time; 378}; 379 380static struct test_driver { 381 struct task_struct *task; 382 struct test_case_data data[ARRAY_SIZE(test_case_array)]; 383 384 unsigned long start; 385 unsigned long stop; 386} *tdriver; 387 388static void shuffle_array(int *arr, int n) 389{ 390 int i, j; 391 392 for (i = n - 1; i > 0; i--) { 393 /* Cut the range. */ 394 j = prandom_u32_max(i); 395 396 /* Swap indexes. */ 397 swap(arr[i], arr[j]); 398 } 399} 400 401static int test_func(void *private) 402{ 403 struct test_driver *t = private; 404 int random_array[ARRAY_SIZE(test_case_array)]; 405 int index, i, j; 406 ktime_t kt; 407 u64 delta; 408 409 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) 410 random_array[i] = i; 411 412 if (!sequential_test_order) 413 shuffle_array(random_array, ARRAY_SIZE(test_case_array)); 414 415 /* 416 * Block until initialization is done. 417 */ 418 down_read(&prepare_for_test_rwsem); 419 420 t->start = get_cycles(); 421 for (i = 0; i < ARRAY_SIZE(test_case_array); i++) { 422 index = random_array[i]; 423 424 /* 425 * Skip tests if run_test_mask has been specified. 426 */ 427 if (!((run_test_mask & (1 << index)) >> index)) 428 continue; 429 430 kt = ktime_get(); 431 for (j = 0; j < test_repeat_count; j++) { 432 if (!test_case_array[index].test_func()) 433 t->data[index].test_passed++; 434 else 435 t->data[index].test_failed++; 436 } 437 438 /* 439 * Take an average time that test took. 440 */ 441 delta = (u64) ktime_us_delta(ktime_get(), kt); 442 do_div(delta, (u32) test_repeat_count); 443 444 t->data[index].time = delta; 445 } 446 t->stop = get_cycles(); 447 448 up_read(&prepare_for_test_rwsem); 449 test_report_one_done(); 450 451 /* 452 * Wait for the kthread_stop() call. 453 */ 454 while (!kthread_should_stop()) 455 msleep(10); 456 457 return 0; 458} 459 460static int 461init_test_configurtion(void) 462{ 463 /* 464 * A maximum number of workers is defined as hard-coded 465 * value and set to USHRT_MAX. We add such gap just in 466 * case and for potential heavy stressing. 467 */ 468 nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX); 469 470 /* Allocate the space for test instances. */ 471 tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL); 472 if (tdriver == NULL) 473 return -1; 474 475 if (test_repeat_count <= 0) 476 test_repeat_count = 1; 477 478 if (test_loop_count <= 0) 479 test_loop_count = 1; 480 481 return 0; 482} 483 484static void do_concurrent_test(void) 485{ 486 int i, ret; 487 488 /* 489 * Set some basic configurations plus sanity check. 490 */ 491 ret = init_test_configurtion(); 492 if (ret < 0) 493 return; 494 495 /* 496 * Put on hold all workers. 497 */ 498 down_write(&prepare_for_test_rwsem); 499 500 for (i = 0; i < nr_threads; i++) { 501 struct test_driver *t = &tdriver[i]; 502 503 t->task = kthread_run(test_func, t, "vmalloc_test/%d", i); 504 505 if (!IS_ERR(t->task)) 506 /* Success. */ 507 atomic_inc(&test_n_undone); 508 else 509 pr_err("Failed to start %d kthread\n", i); 510 } 511 512 /* 513 * Now let the workers do their job. 514 */ 515 up_write(&prepare_for_test_rwsem); 516 517 /* 518 * Sleep quiet until all workers are done with 1 second 519 * interval. Since the test can take a lot of time we 520 * can run into a stack trace of the hung task. That is 521 * why we go with completion_timeout and HZ value. 522 */ 523 do { 524 ret = wait_for_completion_timeout(&test_all_done_comp, HZ); 525 } while (!ret); 526 527 for (i = 0; i < nr_threads; i++) { 528 struct test_driver *t = &tdriver[i]; 529 int j; 530 531 if (!IS_ERR(t->task)) 532 kthread_stop(t->task); 533 534 for (j = 0; j < ARRAY_SIZE(test_case_array); j++) { 535 if (!((run_test_mask & (1 << j)) >> j)) 536 continue; 537 538 pr_info( 539 "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n", 540 test_case_array[j].test_name, 541 t->data[j].test_passed, 542 t->data[j].test_failed, 543 test_repeat_count, test_loop_count, 544 t->data[j].time); 545 } 546 547 pr_info("All test took worker%d=%lu cycles\n", 548 i, t->stop - t->start); 549 } 550 551 kvfree(tdriver); 552} 553 554static int vmalloc_test_init(void) 555{ 556 do_concurrent_test(); 557 return -EAGAIN; /* Fail will directly unload the module */ 558} 559 560static void vmalloc_test_exit(void) 561{ 562} 563 564module_init(vmalloc_test_init) 565module_exit(vmalloc_test_exit) 566 567MODULE_LICENSE("GPL"); 568MODULE_AUTHOR("Uladzislau Rezki"); 569MODULE_DESCRIPTION("vmalloc test module");