Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf arm-spe/cs-etm: Directly iterate CPU maps

Rather than iterate all CPUs and see if they are in CPU maps, directly
iterate the CPU map. Similarly make use of the intersect function
taking care for when "any" CPU is specified. Switch
perf_cpu_map__has_any_cpu_or_is_empty() to more appropriate
alternatives.

Reviewed-by: James Clark <james.clark@arm.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Andrew Jones <ajones@ventanamicro.com>
Cc: André Almeida <andrealmeid@igalia.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Atish Patra <atishp@rivosinc.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paran Lee <p4ranlee@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Cc: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20240202234057.2085863-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
e28ee123 dcd45b37

+54 -70
+52 -68
tools/perf/arch/arm/util/cs-etm.c
··· 197 197 static int cs_etm_validate_config(struct auxtrace_record *itr, 198 198 struct evsel *evsel) 199 199 { 200 - int i, err = -EINVAL; 200 + int idx, err = 0; 201 201 struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus; 202 - struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 202 + struct perf_cpu_map *intersect_cpus; 203 + struct perf_cpu cpu; 203 204 204 - /* Set option of each CPU we have */ 205 - for (i = 0; i < cpu__max_cpu().cpu; i++) { 206 - struct perf_cpu cpu = { .cpu = i, }; 205 + /* 206 + * Set option of each CPU we have. In per-cpu case, do the validation 207 + * for CPUs to work with. In per-thread case, the CPU map has the "any" 208 + * CPU value. Since the traced program can run on any CPUs in this case, 209 + * thus don't skip validation. 210 + */ 211 + if (!perf_cpu_map__has_any_cpu(event_cpus)) { 212 + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 207 213 208 - /* 209 - * In per-cpu case, do the validation for CPUs to work with. 210 - * In per-thread case, the CPU map is empty. Since the traced 211 - * program can run on any CPUs in this case, thus don't skip 212 - * validation. 213 - */ 214 - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) && 215 - !perf_cpu_map__has(event_cpus, cpu)) 216 - continue; 217 - 218 - if (!perf_cpu_map__has(online_cpus, cpu)) 219 - continue; 220 - 221 - err = cs_etm_validate_context_id(itr, evsel, i); 222 - if (err) 223 - goto out; 224 - err = cs_etm_validate_timestamp(itr, evsel, i); 225 - if (err) 226 - goto out; 214 + intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); 215 + perf_cpu_map__put(online_cpus); 216 + } else { 217 + intersect_cpus = perf_cpu_map__new_online_cpus(); 227 218 } 228 219 229 - err = 0; 230 - out: 231 - perf_cpu_map__put(online_cpus); 220 + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) { 221 + err = cs_etm_validate_context_id(itr, evsel, cpu.cpu); 222 + if (err) 223 + break; 224 + 225 + err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu); 226 + if (err) 227 + break; 228 + } 229 + 230 + perf_cpu_map__put(intersect_cpus); 232 231 return err; 233 232 } 234 233 ··· 434 435 * Also the case of per-cpu mmaps, need the contextID in order to be notified 435 436 * when a context switch happened. 436 437 */ 437 - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 438 + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 438 439 evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, 439 440 "timestamp", 1); 440 441 evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, ··· 460 461 evsel->core.attr.sample_period = 1; 461 462 462 463 /* In per-cpu case, always need the time of mmap events etc */ 463 - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) 464 + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) 464 465 evsel__set_sample_bit(evsel, TIME); 465 466 466 467 err = cs_etm_validate_config(itr, cs_etm_evsel); ··· 532 533 cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, 533 534 struct evlist *evlist __maybe_unused) 534 535 { 535 - int i; 536 + int idx; 536 537 int etmv3 = 0, etmv4 = 0, ete = 0; 537 538 struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; 538 - struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 539 + struct perf_cpu_map *intersect_cpus; 540 + struct perf_cpu cpu; 539 541 540 - /* cpu map is not empty, we have specific CPUs to work with */ 541 - if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { 542 - for (i = 0; i < cpu__max_cpu().cpu; i++) { 543 - struct perf_cpu cpu = { .cpu = i, }; 542 + if (!perf_cpu_map__has_any_cpu(event_cpus)) { 543 + /* cpu map is not "any" CPU , we have specific CPUs to work with */ 544 + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); 544 545 545 - if (!perf_cpu_map__has(event_cpus, cpu) || 546 - !perf_cpu_map__has(online_cpus, cpu)) 547 - continue; 548 - 549 - if (cs_etm_is_ete(itr, i)) 550 - ete++; 551 - else if (cs_etm_is_etmv4(itr, i)) 552 - etmv4++; 553 - else 554 - etmv3++; 555 - } 546 + intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); 547 + perf_cpu_map__put(online_cpus); 556 548 } else { 557 - /* get configuration for all CPUs in the system */ 558 - for (i = 0; i < cpu__max_cpu().cpu; i++) { 559 - struct perf_cpu cpu = { .cpu = i, }; 560 - 561 - if (!perf_cpu_map__has(online_cpus, cpu)) 562 - continue; 563 - 564 - if (cs_etm_is_ete(itr, i)) 565 - ete++; 566 - else if (cs_etm_is_etmv4(itr, i)) 567 - etmv4++; 568 - else 569 - etmv3++; 570 - } 549 + /* Event can be "any" CPU so count all online CPUs. */ 550 + intersect_cpus = perf_cpu_map__new_online_cpus(); 571 551 } 572 - 573 - perf_cpu_map__put(online_cpus); 552 + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) { 553 + if (cs_etm_is_ete(itr, cpu.cpu)) 554 + ete++; 555 + else if (cs_etm_is_etmv4(itr, cpu.cpu)) 556 + etmv4++; 557 + else 558 + etmv3++; 559 + } 560 + perf_cpu_map__put(intersect_cpus); 574 561 575 562 return (CS_ETM_HEADER_SIZE + 576 563 (ete * CS_ETE_PRIV_SIZE) + ··· 798 813 if (!session->evlist->core.nr_mmaps) 799 814 return -EINVAL; 800 815 801 - /* If the cpu_map is empty all online CPUs are involved */ 802 - if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { 816 + /* If the cpu_map has the "any" CPU all online CPUs are involved */ 817 + if (perf_cpu_map__has_any_cpu(event_cpus)) { 803 818 cpu_map = online_cpus; 804 819 } else { 805 820 /* Make sure all specified CPUs are online */ 806 - for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { 807 - struct perf_cpu cpu = { .cpu = i, }; 821 + struct perf_cpu cpu; 808 822 809 - if (perf_cpu_map__has(event_cpus, cpu) && 810 - !perf_cpu_map__has(online_cpus, cpu)) 823 + perf_cpu_map__for_each_cpu(cpu, i, event_cpus) { 824 + if (!perf_cpu_map__has(online_cpus, cpu)) 811 825 return -EINVAL; 812 826 } 813 827
+2 -2
tools/perf/arch/arm64/util/arm-spe.c
··· 232 232 * In the case of per-cpu mmaps, sample CPU for AUX event; 233 233 * also enable the timestamp tracing for samples correlation. 234 234 */ 235 - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 235 + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 236 236 evsel__set_sample_bit(arm_spe_evsel, CPU); 237 237 evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, 238 238 "ts_enable", 1); ··· 265 265 tracking_evsel->core.attr.sample_period = 1; 266 266 267 267 /* In per-cpu case, always need the time of mmap events etc */ 268 - if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { 268 + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 269 269 evsel__set_sample_bit(tracking_evsel, TIME); 270 270 evsel__set_sample_bit(tracking_evsel, CPU); 271 271