Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

libperf cpumap: Reduce allocations and sorting in intersect

On hybrid platforms the CPU maps are often disjoint. Rather than copy
CPUs and trim, compute the number of common CPUs, if none early exit,
otherwise copy in an sorted order. This avoids memory allocation in
the disjoint case and avoids a second malloc and useless sort in the
previous trim cases.

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

authored by

Ian Rogers and committed by
Namhyung Kim
af9e8d12 28981501

+19 -10
+19 -10
tools/lib/perf/cpumap.c
··· 453 453 struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, 454 454 struct perf_cpu_map *other) 455 455 { 456 - struct perf_cpu *tmp_cpus; 457 - int tmp_len; 458 456 int i, j, k; 459 - struct perf_cpu_map *merged = NULL; 457 + struct perf_cpu_map *merged; 460 458 461 459 if (perf_cpu_map__is_subset(other, orig)) 462 460 return perf_cpu_map__get(orig); 463 461 if (perf_cpu_map__is_subset(orig, other)) 464 462 return perf_cpu_map__get(other); 465 463 466 - tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other)); 467 - tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); 468 - if (!tmp_cpus) 464 + i = j = k = 0; 465 + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { 466 + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) 467 + i++; 468 + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) 469 + j++; 470 + else { /* CPUs match. */ 471 + i++; 472 + j++; 473 + k++; 474 + } 475 + } 476 + if (k == 0) /* Maps are completely disjoint. */ 469 477 return NULL; 470 478 479 + merged = perf_cpu_map__alloc(k); 480 + if (!merged) 481 + return NULL; 482 + /* Entries are added to merged in sorted order, so no need to sort again. */ 471 483 i = j = k = 0; 472 484 while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { 473 485 if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) ··· 488 476 j++; 489 477 else { 490 478 j++; 491 - tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); 479 + RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++); 492 480 } 493 481 } 494 - if (k) 495 - merged = cpu_map__trim_new(k, tmp_cpus); 496 - free(tmp_cpus); 497 482 return merged; 498 483 }