Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf record: Apply affinity masks when reading mmap buffers

Build node cpu masks for mmap data buffers. Apply node cpu masks to tool
thread every time it references data buffers cross node or cross cpu.

Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/b25e4ebc-078d-2c7b-216c-f0bed108d073@linux.intel.com
[ Use cpu-set-sched.h to get the CPU_{EQUAL,OR}() fallbacks for older systems ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Alexey Budankov and committed by
Arnaldo Carvalho de Melo
f13de660 de20e320

+53 -1
+15
tools/perf/builtin-record.c
··· 38 38 #include "util/bpf-loader.h" 39 39 #include "util/trigger.h" 40 40 #include "util/perf-hooks.h" 41 + #include "util/cpu-set-sched.h" 41 42 #include "util/time-utils.h" 42 43 #include "util/units.h" 43 44 #include "util/bpf-event.h" ··· 537 536 struct record_opts *opts = &rec->opts; 538 537 char msg[512]; 539 538 539 + if (opts->affinity != PERF_AFFINITY_SYS) 540 + cpu__setup_cpunode_map(); 541 + 540 542 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 541 543 opts->auxtrace_mmap_pages, 542 544 opts->auxtrace_snapshot_mode, ··· 723 719 .type = PERF_RECORD_FINISHED_ROUND, 724 720 }; 725 721 722 + static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 723 + { 724 + if (rec->opts.affinity != PERF_AFFINITY_SYS && 725 + !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 726 + CPU_ZERO(&rec->affinity_mask); 727 + CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 728 + sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 729 + } 730 + } 731 + 726 732 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 727 733 bool overwrite) 728 734 { ··· 760 746 struct perf_mmap *map = &maps[i]; 761 747 762 748 if (map->base) { 749 + record__adjust_affinity(rec, map); 763 750 if (!record__aio_enabled(rec)) { 764 751 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 765 752 rc = -1;
+10
tools/perf/util/cpumap.c
··· 730 730 buf[size - 1] = '\0'; 731 731 return ptr - buf; 732 732 } 733 + 734 + const struct cpu_map *cpu_map__online(void) /* thread unsafe */ 735 + { 736 + static const struct cpu_map *online = NULL; 737 + 738 + if (!online) 739 + online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ 740 + 741 + return online; 742 + }
+1
tools/perf/util/cpumap.h
··· 29 29 int cpu_map__get_core(struct cpu_map *map, int idx, void *data); 30 30 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 31 31 int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); 32 + const struct cpu_map *cpu_map__online(void); /* thread unsafe */ 32 33 33 34 struct cpu_map *cpu_map__get(struct cpu_map *map); 34 35 void cpu_map__put(struct cpu_map *map);
+27 -1
tools/perf/util/mmap.c
··· 383 383 auxtrace_mmap__munmap(&map->auxtrace_mmap); 384 384 } 385 385 386 + static void build_node_mask(int node, cpu_set_t *mask) 387 + { 388 + int c, cpu, nr_cpus; 389 + const struct cpu_map *cpu_map = NULL; 390 + 391 + cpu_map = cpu_map__online(); 392 + if (!cpu_map) 393 + return; 394 + 395 + nr_cpus = cpu_map__nr(cpu_map); 396 + for (c = 0; c < nr_cpus; c++) { 397 + cpu = cpu_map->map[c]; /* map c index to online cpu index */ 398 + if (cpu__get_node(cpu) == node) 399 + CPU_SET(cpu, mask); 400 + } 401 + } 402 + 403 + static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) 404 + { 405 + CPU_ZERO(&map->affinity_mask); 406 + if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) 407 + build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); 408 + else if (mp->affinity == PERF_AFFINITY_CPU) 409 + CPU_SET(map->cpu, &map->affinity_mask); 410 + } 411 + 386 412 int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) 387 413 { 388 414 /* ··· 438 412 map->fd = fd; 439 413 map->cpu = cpu; 440 414 441 - CPU_ZERO(&map->affinity_mask); 415 + perf_mmap__setup_affinity_mask(map, mp); 442 416 443 417 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 444 418 &mp->auxtrace_mp, map->base, fd))