perf tests: Add option to run tests in parallel

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

By default tests are forked, add an option (-p or --parallel) so that
the forked tests are all started in parallel and then their output
gathered serially. This is opt-in as running in parallel can cause
test flakes.

Rather than fork within the code, the start_command/finish_command
from libsubcmd are used. This changes how stderr and stdout are
handled. The child stderr and stdout are always read to avoid the
child blocking. If verbose is 1 (-v) then if the test fails the child
stdout and stderr are displayed. If the verbose is >1 (e.g. -vv) then
the stdout and stderr from the child are immediately displayed.

An unscientific test on my laptop shows the wall clock time for perf
test without parallel being 5 minutes 21 seconds and with parallel
(-p) being 1 minute 50 seconds.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@arm.com>
Cc: Justin Stitt <justinstitt@google.com>
Cc: Bill Wendling <morbo@google.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: llvm@lists.linux.dev
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240221034155.1500118-9-irogers@google.com

authored by

Ian Rogers and committed by

Namhyung Kim 2 years ago b482f5f8 964461ee

+218 -102

1 changed file

expand all

tools

perf

tests

builtin-test.c

+218 -102

tools/perf/tests/builtin-test.c

··· 6 6 */ 7 7 #include <fcntl.h> 8 8 #include <errno.h> 9 + #include <poll.h> 9 10 #include <unistd.h> 10 11 #include <string.h> 11 12 #include <stdlib.h> ··· 22 21 #include "debug.h" 23 22 #include "color.h" 24 23 #include <subcmd/parse-options.h> 24 + #include <subcmd/run-command.h> 25 25 #include "string2.h" 26 26 #include "symbol.h" 27 27 #include "util/rlimit.h" 28 + #include "util/strbuf.h" 28 29 #include <linux/kernel.h> 29 30 #include <linux/string.h> 30 31 #include <subcmd/exec-cmd.h> ··· 34 31 35 32 #include "tests-scripts.h" 36 33 34 + /* 35 + * Command line option to not fork the test running in the same process and 36 + * making them easier to debug. 37 + */ 37 38 static bool dont_fork; 39 + /* Fork the tests in parallel and then wait for their completion. */ 40 + static bool parallel; 38 41 const char *dso_to_test; 39 42 const char *test_objdump_path = "objdump"; 40 43 ··· 218 209 return false; 219 210 } 220 211 221 - static int run_test(struct test_suite *test, int subtest) 212 + struct child_test { 213 + struct child_process process; 214 + struct test_suite *test; 215 + int test_num; 216 + int subtest; 217 + }; 218 + 219 + static int run_test_child(struct child_process *process) 222 220 { 223 - int status, err = -1, child = dont_fork ? 0 : fork(); 224 - char sbuf[STRERR_BUFSIZE]; 225 - 226 - if (child < 0) { 227 - pr_err("failed to fork test: %s\n", 228 - str_error_r(errno, sbuf, sizeof(sbuf))); 229 - return -1; 230 - } 231 - 232 - if (!child) { 233 - if (!dont_fork) { 234 - pr_debug("test child forked, pid %d\n", getpid()); 235 - 236 - if (verbose <= 0) { 237 - int nullfd = open("/dev/null", O_WRONLY); 238 - 239 - if (nullfd >= 0) { 240 - close(STDERR_FILENO); 241 - close(STDOUT_FILENO); 242 - 243 - dup2(nullfd, STDOUT_FILENO); 244 - dup2(STDOUT_FILENO, STDERR_FILENO); 245 - close(nullfd); 246 - } 247 - } else { 248 - signal(SIGSEGV, sighandler_dump_stack); 249 - signal(SIGFPE, sighandler_dump_stack); 250 - } 251 - } 252 - 253 - err = test_function(test, subtest)(test, subtest); 254 - if (!dont_fork) 255 - exit(err); 256 - } 257 - 258 - if (!dont_fork) { 259 - wait(&status); 260 - 261 - if (WIFEXITED(status)) { 262 - err = (signed char)WEXITSTATUS(status); 263 - pr_debug("test child finished with %d\n", err); 264 - } else if (WIFSIGNALED(status)) { 265 - err = -1; 266 - pr_debug("test child interrupted\n"); 267 - } 268 - } 269 - 270 - return err; 271 - } 272 - 273 - #define for_each_test(j, k, t) \ 274 - for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0) \ 275 - while ((t = tests[j][k++]) != NULL) 276 - 277 - static int test_and_print(struct test_suite *t, int subtest) 278 - { 221 + struct child_test *child = container_of(process, struct child_test, process); 279 222 int err; 280 223 281 - pr_debug("\n--- start ---\n"); 282 - err = run_test(t, subtest); 283 - pr_debug("---- end ----\n"); 224 + pr_debug("--- start ---\n"); 225 + pr_debug("test child forked, pid %d\n", getpid()); 226 + err = test_function(child->test, child->subtest)(child->test, child->subtest); 227 + pr_debug("---- end(%d) ----\n", err); 228 + fflush(NULL); 229 + return -err; 230 + } 284 231 285 - if (!has_subtests(t)) 286 - pr_debug("%s:", t->desc); 287 - else 288 - pr_debug("%s subtest %d:", t->desc, subtest + 1); 232 + static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width) 233 + { 234 + if (has_subtests(t)) { 235 + int subw = width > 2 ? width - 2 : width; 289 236 290 - switch (err) { 237 + pr_info("%3d.%1d: %-*s:", i + 1, subtest + 1, subw, test_description(t, subtest)); 238 + } else 239 + pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest)); 240 + 241 + switch (result) { 291 242 case TEST_OK: 292 243 pr_info(" Ok\n"); 293 244 break; ··· 266 297 break; 267 298 } 268 299 269 - return err; 300 + return 0; 270 301 } 302 + 303 + static int finish_test(struct child_test *child_test, int width) 304 + { 305 + struct test_suite *t = child_test->test; 306 + int i = child_test->test_num; 307 + int subi = child_test->subtest; 308 + int out = child_test->process.out; 309 + int err = child_test->process.err; 310 + bool out_done = out <= 0; 311 + bool err_done = err <= 0; 312 + struct strbuf out_output = STRBUF_INIT; 313 + struct strbuf err_output = STRBUF_INIT; 314 + int ret; 315 + 316 + /* 317 + * For test suites with subtests, display the suite name ahead of the 318 + * sub test names. 319 + */ 320 + if (has_subtests(t) && subi == 0) 321 + pr_info("%3d: %-*s:\n", i + 1, width, test_description(t, -1)); 322 + 323 + /* 324 + * Busy loop reading from the child's stdout and stderr that are set to 325 + * be non-blocking until EOF. 326 + */ 327 + if (!out_done) 328 + fcntl(out, F_SETFL, O_NONBLOCK); 329 + if (!err_done) 330 + fcntl(err, F_SETFL, O_NONBLOCK); 331 + if (verbose > 1) { 332 + if (has_subtests(t)) 333 + pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); 334 + else 335 + pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); 336 + } 337 + while (!out_done || !err_done) { 338 + struct pollfd pfds[2] = { 339 + { .fd = out, 340 + .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, 341 + }, 342 + { .fd = err, 343 + .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, 344 + }, 345 + }; 346 + char buf[512]; 347 + ssize_t len; 348 + 349 + /* Poll to avoid excessive spinning, timeout set for 1000ms. */ 350 + poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/1000); 351 + if (!out_done && pfds[0].revents) { 352 + errno = 0; 353 + len = read(out, buf, sizeof(buf) - 1); 354 + 355 + if (len <= 0) { 356 + out_done = errno != EAGAIN; 357 + } else { 358 + buf[len] = '\0'; 359 + if (verbose > 1) 360 + fprintf(stdout, "%s", buf); 361 + else 362 + strbuf_addstr(&out_output, buf); 363 + } 364 + } 365 + if (!err_done && pfds[1].revents) { 366 + errno = 0; 367 + len = read(err, buf, sizeof(buf) - 1); 368 + 369 + if (len <= 0) { 370 + err_done = errno != EAGAIN; 371 + } else { 372 + buf[len] = '\0'; 373 + if (verbose > 1) 374 + fprintf(stdout, "%s", buf); 375 + else 376 + strbuf_addstr(&err_output, buf); 377 + } 378 + } 379 + } 380 + /* Clean up child process. */ 381 + ret = finish_command(&child_test->process); 382 + if (verbose == 1 && ret == TEST_FAIL) { 383 + /* Add header for test that was skipped above. */ 384 + if (has_subtests(t)) 385 + pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); 386 + else 387 + pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); 388 + fprintf(stdout, "%s", out_output.buf); 389 + fprintf(stderr, "%s", err_output.buf); 390 + } 391 + strbuf_release(&out_output); 392 + strbuf_release(&err_output); 393 + print_test_result(t, i, subi, ret, width); 394 + if (out > 0) 395 + close(out); 396 + if (err > 0) 397 + close(err); 398 + return 0; 399 + } 400 + 401 + static int start_test(struct test_suite *test, int i, int subi, struct child_test **child, 402 + int width) 403 + { 404 + int err; 405 + 406 + *child = NULL; 407 + if (dont_fork) { 408 + pr_debug("--- start ---\n"); 409 + err = test_function(test, subi)(test, subi); 410 + pr_debug("---- end ----\n"); 411 + print_test_result(test, i, subi, err, width); 412 + return 0; 413 + } 414 + 415 + *child = zalloc(sizeof(**child)); 416 + if (!*child) 417 + return -ENOMEM; 418 + 419 + (*child)->test = test; 420 + (*child)->test_num = i; 421 + (*child)->subtest = subi; 422 + (*child)->process.pid = -1; 423 + (*child)->process.no_stdin = 1; 424 + if (verbose <= 0) { 425 + (*child)->process.no_stdout = 1; 426 + (*child)->process.no_stderr = 1; 427 + } else { 428 + (*child)->process.out = -1; 429 + (*child)->process.err = -1; 430 + } 431 + (*child)->process.no_exec_cmd = run_test_child; 432 + err = start_command(&(*child)->process); 433 + if (err || parallel) 434 + return err; 435 + return finish_test(*child, width); 436 + } 437 + 438 + #define for_each_test(j, k, t) \ 439 + for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0) \ 440 + while ((t = tests[j][k++]) != NULL) 271 441 272 442 static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) 273 443 { ··· 414 306 unsigned int j, k; 415 307 int i = 0; 416 308 int width = 0; 309 + size_t num_tests = 0; 310 + struct child_test **child_tests; 311 + int child_test_num = 0; 417 312 418 313 for_each_test(j, k, t) { 419 314 int len = strlen(test_description(t, -1)); 420 315 421 316 if (width < len) 422 317 width = len; 318 + 319 + if (has_subtests(t)) { 320 + for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { 321 + len = strlen(test_description(t, subi)); 322 + if (width < len) 323 + width = len; 324 + num_tests++; 325 + } 326 + } else { 327 + num_tests++; 328 + } 423 329 } 330 + child_tests = calloc(num_tests, sizeof(*child_tests)); 331 + if (!child_tests) 332 + return -ENOMEM; 424 333 425 334 for_each_test(j, k, t) { 426 335 int curr = i++; 427 - int subi; 428 336 429 337 if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) { 430 338 bool skip = true; 431 - int subn; 432 339 433 - subn = num_subtests(t); 434 - 435 - for (subi = 0; subi < subn; subi++) { 340 + for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { 436 341 if (perf_test__matches(test_description(t, subi), 437 342 curr, argc, argv)) 438 343 skip = false; ··· 455 334 continue; 456 335 } 457 336 458 - pr_info("%3d: %-*s:", i, width, test_description(t, -1)); 459 - 460 337 if (intlist__find(skiplist, i)) { 338 + pr_info("%3d: %-*s:", curr + 1, width, test_description(t, -1)); 461 339 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); 462 340 continue; 463 341 } 464 342 465 343 if (!has_subtests(t)) { 466 - test_and_print(t, -1); 344 + int err = start_test(t, curr, -1, &child_tests[child_test_num++], width); 345 + 346 + if (err) { 347 + /* TODO: if parallel waitpid the already forked children. */ 348 + free(child_tests); 349 + return err; 350 + } 467 351 } else { 468 - int subn = num_subtests(t); 469 - /* 470 - * minus 2 to align with normal testcases. 471 - * For subtest we print additional '.x' in number. 472 - * for example: 473 - * 474 - * 35: Test LLVM searching and compiling : 475 - * 35.1: Basic BPF llvm compiling test : Ok 476 - */ 477 - int subw = width > 2 ? width - 2 : width; 352 + for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { 353 + int err; 478 354 479 - if (subn <= 0) { 480 - color_fprintf(stderr, PERF_COLOR_YELLOW, 481 - " Skip (not compiled in)\n"); 482 - continue; 483 - } 484 - pr_info("\n"); 485 - 486 - for (subi = 0; subi < subn; subi++) { 487 - int len = strlen(test_description(t, subi)); 488 - 489 - if (subw < len) 490 - subw = len; 491 - } 492 - 493 - for (subi = 0; subi < subn; subi++) { 494 355 if (!perf_test__matches(test_description(t, subi), 495 356 curr, argc, argv)) 496 357 continue; 497 358 498 - pr_info("%3d.%1d: %-*s:", i, subi + 1, subw, 499 - test_description(t, subi)); 500 - test_and_print(t, subi); 359 + err = start_test(t, curr, subi, &child_tests[child_test_num++], 360 + width); 361 + if (err) 362 + return err; 501 363 } 502 364 } 503 365 } 366 + for (i = 0; i < child_test_num; i++) { 367 + if (parallel) { 368 + int ret = finish_test(child_tests[i], width); 369 + 370 + if (ret) 371 + return ret; 372 + } 373 + free(child_tests[i]); 374 + } 375 + free(child_tests); 504 376 return 0; 505 377 } 506 378 ··· 561 447 "be more verbose (show symbol address, etc)"), 562 448 OPT_BOOLEAN('F', "dont-fork", &dont_fork, 563 449 "Do not fork for testcase"), 450 + OPT_BOOLEAN('p', "parallel", &parallel, 451 + "Run the tests altogether in parallel"), 564 452 OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), 565 453 OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), 566 454 OPT_STRING(0, "objdump", &test_objdump_path, "path",