Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tests record: Update testcase to fix usage of affinity for machines with #CPUs > 1K

The perf record testcase fails on systems with more than 1K CPUs.

Testcase: perf test -vv "PERF_RECORD_* events & perf_sample fields"

PERF_RECORD_* events & perf_sample fields :
--- start ---
test child forked, pid 272482
sched_getaffinity: Invalid argument
sched__get_first_possible_cpu: Invalid argument
test child finished with -1
---- end ----
PERF_RECORD_* events & perf_sample fields: FAILED!

sched__get_first_possible_cpu uses "sched_getaffinity" to get the
cpumask and this call is returning EINVAL (Invalid argument).

This happens because the default mask size in glibc is 1024.

To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the
mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate
cpumask, CPU_ALLOC_SIZE for size.

Same fix needed for mask which is used to setaffinity so that mask size
is large enough to represent number of possible CPU's in the system.

Reported-by: Tejas Manhas <tejas05@linux.ibm.com>
Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>
Tested-by: Ian Rogers <irogers@google.com>
Tested-by: Tejas Manhas <tejas05@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Aditya Bodkhe <Aditya.Bodkhe1@ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Hari Bathini <hbathini@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Athira Rajeev and committed by
Arnaldo Carvalho de Melo
c0e885e9 e7ace97f

+27 -11
+27 -11
tools/perf/tests/perf-record.c
··· 13 13 #include "tests.h" 14 14 #include "util/mmap.h" 15 15 #include "util/sample.h" 16 + #include "util/cpumap.h" 16 17 17 18 static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) 18 19 { 19 - int i, cpu = -1, nrcpus = 1024; 20 - realloc: 21 - CPU_ZERO(maskp); 20 + int i, cpu = -1; 21 + int nrcpus = cpu__max_cpu().cpu; 22 + size_t size = CPU_ALLOC_SIZE(nrcpus); 22 23 23 - if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) { 24 - if (errno == EINVAL && nrcpus < (1024 << 8)) { 24 + realloc: 25 + CPU_ZERO_S(size, maskp); 26 + 27 + if (sched_getaffinity(pid, size, maskp) == -1) { 28 + if (errno == EINVAL && nrcpus < (cpu__max_cpu().cpu << 8)) { 25 29 nrcpus = nrcpus << 2; 26 30 goto realloc; 27 31 } ··· 34 30 } 35 31 36 32 for (i = 0; i < nrcpus; i++) { 37 - if (CPU_ISSET(i, maskp)) { 33 + if (CPU_ISSET_S(i, size, maskp)) { 38 34 if (cpu == -1) 39 35 cpu = i; 40 36 else 41 - CPU_CLR(i, maskp); 37 + CPU_CLR_S(i, size, maskp); 42 38 } 43 39 } 44 40 ··· 54 50 .no_buffering = true, 55 51 .mmap_pages = 256, 56 52 }; 57 - cpu_set_t cpu_mask; 58 - size_t cpu_mask_size = sizeof(cpu_mask); 53 + int nrcpus = cpu__max_cpu().cpu; 54 + cpu_set_t *cpu_mask; 55 + size_t cpu_mask_size; 59 56 struct evlist *evlist = evlist__new_dummy(); 60 57 struct evsel *evsel; 61 58 struct perf_sample sample; ··· 74 69 int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, }; 75 70 char sbuf[STRERR_BUFSIZE]; 76 71 72 + cpu_mask = CPU_ALLOC(nrcpus); 73 + if (!cpu_mask) { 74 + pr_debug("failed to create cpumask\n"); 75 + goto out; 76 + } 77 + 78 + cpu_mask_size = CPU_ALLOC_SIZE(nrcpus); 79 + CPU_ZERO_S(cpu_mask_size, cpu_mask); 80 + 77 81 perf_sample__init(&sample, /*all=*/false); 78 82 if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */ 79 83 evlist = evlist__new_default(); 80 84 81 85 if (evlist == NULL) { 82 86 pr_debug("Not enough memory to create evlist\n"); 87 + CPU_FREE(cpu_mask); 83 88 goto out; 84 89 } 85 90 ··· 126 111 evsel__set_sample_bit(evsel, TIME); 127 112 evlist__config(evlist, &opts, NULL); 128 113 129 - err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); 114 + err = sched__get_first_possible_cpu(evlist->workload.pid, cpu_mask); 130 115 if (err < 0) { 131 116 pr_debug("sched__get_first_possible_cpu: %s\n", 132 117 str_error_r(errno, sbuf, sizeof(sbuf))); ··· 138 123 /* 139 124 * So that we can check perf_sample.cpu on all the samples. 140 125 */ 141 - if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { 126 + if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) { 142 127 pr_debug("sched_setaffinity: %s\n", 143 128 str_error_r(errno, sbuf, sizeof(sbuf))); 144 129 goto out_delete_evlist; ··· 343 328 ++errs; 344 329 } 345 330 out_delete_evlist: 331 + CPU_FREE(cpu_mask); 346 332 evlist__delete(evlist); 347 333 out: 348 334 perf_sample__exit(&sample);