tools/perf/builtin-stat.c at v5.8-rc3

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / tools / perf / builtin-stat.c
at v5.8-rc3 2313 lines 62 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * builtin-stat.c
   4 *
   5 * Builtin stat command: Give a precise performance counters summary
   6 * overview about any workload, CPU or specific PID.
   7 *
   8 * Sample output:
   9
  10   $ perf stat ./hackbench 10
  11
  12  Time: 0.118
  13
  14  Performance counter stats for './hackbench 10':
  15
  16       1708.761321 task-clock                #   11.037 CPUs utilized
  17            41,190 context-switches          #    0.024 M/sec
  18             6,735 CPU-migrations            #    0.004 M/sec
  19            17,318 page-faults               #    0.010 M/sec
  20     5,205,202,243 cycles                    #    3.046 GHz
  21     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
  22     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
  23     2,603,501,247 instructions              #    0.50  insns per cycle
  24                                             #    1.48  stalled cycles per insn
  25       484,357,498 branches                  #  283.455 M/sec
  26         6,388,934 branch-misses             #    1.32% of all branches
  27
  28        0.154822978  seconds time elapsed
  29
  30 *
  31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  32 *
  33 * Improvements and fixes by:
  34 *
  35 *   Arjan van de Ven <arjan@linux.intel.com>
  36 *   Yanmin Zhang <yanmin.zhang@intel.com>
  37 *   Wu Fengguang <fengguang.wu@intel.com>
  38 *   Mike Galbraith <efault@gmx.de>
  39 *   Paul Mackerras <paulus@samba.org>
  40 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  41 */
  42
  43#include "builtin.h"
  44#include "perf.h"
  45#include "util/cgroup.h"
  46#include <subcmd/parse-options.h>
  47#include "util/parse-events.h"
  48#include "util/pmu.h"
  49#include "util/event.h"
  50#include "util/evlist.h"
  51#include "util/evsel.h"
  52#include "util/debug.h"
  53#include "util/color.h"
  54#include "util/stat.h"
  55#include "util/header.h"
  56#include "util/cpumap.h"
  57#include "util/thread_map.h"
  58#include "util/counts.h"
  59#include "util/group.h"
  60#include "util/session.h"
  61#include "util/tool.h"
  62#include "util/string2.h"
  63#include "util/metricgroup.h"
  64#include "util/synthetic-events.h"
  65#include "util/target.h"
  66#include "util/time-utils.h"
  67#include "util/top.h"
  68#include "util/affinity.h"
  69#include "util/pfm.h"
  70#include "asm/bug.h"
  71
  72#include <linux/time64.h>
  73#include <linux/zalloc.h>
  74#include <api/fs/fs.h>
  75#include <errno.h>
  76#include <signal.h>
  77#include <stdlib.h>
  78#include <sys/prctl.h>
  79#include <inttypes.h>
  80#include <locale.h>
  81#include <math.h>
  82#include <sys/types.h>
  83#include <sys/stat.h>
  84#include <sys/wait.h>
  85#include <unistd.h>
  86#include <sys/time.h>
  87#include <sys/resource.h>
  88#include <linux/err.h>
  89
  90#include <linux/ctype.h>
  91#include <perf/evlist.h>
  92
  93#define DEFAULT_SEPARATOR	" "
  94#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
  95
  96static void print_counters(struct timespec *ts, int argc, const char **argv);
  97
  98/* Default events used for perf stat -T */
  99static const char *transaction_attrs = {
 100	"task-clock,"
 101	"{"
 102	"instructions,"
 103	"cycles,"
 104	"cpu/cycles-t/,"
 105	"cpu/tx-start/,"
 106	"cpu/el-start/,"
 107	"cpu/cycles-ct/"
 108	"}"
 109};
 110
 111/* More limited version when the CPU does not have all events. */
 112static const char * transaction_limited_attrs = {
 113	"task-clock,"
 114	"{"
 115	"instructions,"
 116	"cycles,"
 117	"cpu/cycles-t/,"
 118	"cpu/tx-start/"
 119	"}"
 120};
 121
 122static const char * topdown_attrs[] = {
 123	"topdown-total-slots",
 124	"topdown-slots-retired",
 125	"topdown-recovery-bubbles",
 126	"topdown-fetch-bubbles",
 127	"topdown-slots-issued",
 128	NULL,
 129};
 130
 131static const char *smi_cost_attrs = {
 132	"{"
 133	"msr/aperf/,"
 134	"msr/smi/,"
 135	"cycles"
 136	"}"
 137};
 138
 139static struct evlist	*evsel_list;
 140
 141static struct target target = {
 142	.uid	= UINT_MAX,
 143};
 144
 145#define METRIC_ONLY_LEN 20
 146
 147static volatile pid_t		child_pid			= -1;
 148static int			detailed_run			=  0;
 149static bool			transaction_run;
 150static bool			topdown_run			= false;
 151static bool			smi_cost			= false;
 152static bool			smi_reset			= false;
 153static int			big_num_opt			=  -1;
 154static bool			group				= false;
 155static const char		*pre_cmd			= NULL;
 156static const char		*post_cmd			= NULL;
 157static bool			sync_run			= false;
 158static bool			forever				= false;
 159static bool			force_metric_only		= false;
 160static struct timespec		ref_time;
 161static bool			append_file;
 162static bool			interval_count;
 163static const char		*output_name;
 164static int			output_fd;
 165
 166struct perf_stat {
 167	bool			 record;
 168	struct perf_data	 data;
 169	struct perf_session	*session;
 170	u64			 bytes_written;
 171	struct perf_tool	 tool;
 172	bool			 maps_allocated;
 173	struct perf_cpu_map	*cpus;
 174	struct perf_thread_map *threads;
 175	enum aggr_mode		 aggr_mode;
 176};
 177
 178static struct perf_stat		perf_stat;
 179#define STAT_RECORD		perf_stat.record
 180
 181static volatile int done = 0;
 182
 183static struct perf_stat_config stat_config = {
 184	.aggr_mode		= AGGR_GLOBAL,
 185	.scale			= true,
 186	.unit_width		= 4, /* strlen("unit") */
 187	.run_count		= 1,
 188	.metric_only_len	= METRIC_ONLY_LEN,
 189	.walltime_nsecs_stats	= &walltime_nsecs_stats,
 190	.big_num		= true,
 191};
 192
 193static bool cpus_map_matched(struct evsel *a, struct evsel *b)
 194{
 195	if (!a->core.cpus && !b->core.cpus)
 196		return true;
 197
 198	if (!a->core.cpus || !b->core.cpus)
 199		return false;
 200
 201	if (a->core.cpus->nr != b->core.cpus->nr)
 202		return false;
 203
 204	for (int i = 0; i < a->core.cpus->nr; i++) {
 205		if (a->core.cpus->map[i] != b->core.cpus->map[i])
 206			return false;
 207	}
 208
 209	return true;
 210}
 211
 212static void evlist__check_cpu_maps(struct evlist *evlist)
 213{
 214	struct evsel *evsel, *pos, *leader;
 215	char buf[1024];
 216
 217	evlist__for_each_entry(evlist, evsel) {
 218		leader = evsel->leader;
 219
 220		/* Check that leader matches cpus with each member. */
 221		if (leader == evsel)
 222			continue;
 223		if (cpus_map_matched(leader, evsel))
 224			continue;
 225
 226		/* If there's mismatch disable the group and warn user. */
 227		WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
 228		evsel__group_desc(leader, buf, sizeof(buf));
 229		pr_warning("  %s\n", buf);
 230
 231		if (verbose) {
 232			cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
 233			pr_warning("     %s: %s\n", leader->name, buf);
 234			cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
 235			pr_warning("     %s: %s\n", evsel->name, buf);
 236		}
 237
 238		for_each_group_evsel(pos, leader) {
 239			pos->leader = pos;
 240			pos->core.nr_members = 0;
 241		}
 242		evsel->leader->core.nr_members = 0;
 243	}
 244}
 245
 246static inline void diff_timespec(struct timespec *r, struct timespec *a,
 247				 struct timespec *b)
 248{
 249	r->tv_sec = a->tv_sec - b->tv_sec;
 250	if (a->tv_nsec < b->tv_nsec) {
 251		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
 252		r->tv_sec--;
 253	} else {
 254		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
 255	}
 256}
 257
 258static void perf_stat__reset_stats(void)
 259{
 260	int i;
 261
 262	perf_evlist__reset_stats(evsel_list);
 263	perf_stat__reset_shadow_stats();
 264
 265	for (i = 0; i < stat_config.stats_num; i++)
 266		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
 267}
 268
 269static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
 270				     union perf_event *event,
 271				     struct perf_sample *sample __maybe_unused,
 272				     struct machine *machine __maybe_unused)
 273{
 274	if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
 275		pr_err("failed to write perf data, error: %m\n");
 276		return -1;
 277	}
 278
 279	perf_stat.bytes_written += event->header.size;
 280	return 0;
 281}
 282
 283static int write_stat_round_event(u64 tm, u64 type)
 284{
 285	return perf_event__synthesize_stat_round(NULL, tm, type,
 286						 process_synthesized_event,
 287						 NULL);
 288}
 289
 290#define WRITE_STAT_ROUND_EVENT(time, interval) \
 291	write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
 292
 293#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 294
 295static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
 296				   struct perf_counts_values *count)
 297{
 298	struct perf_sample_id *sid = SID(counter, cpu, thread);
 299
 300	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
 301					   process_synthesized_event, NULL);
 302}
 303
 304static int read_single_counter(struct evsel *counter, int cpu,
 305			       int thread, struct timespec *rs)
 306{
 307	if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
 308		u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
 309		struct perf_counts_values *count =
 310			perf_counts(counter->counts, cpu, thread);
 311		count->ena = count->run = val;
 312		count->val = val;
 313		return 0;
 314	}
 315	return evsel__read_counter(counter, cpu, thread);
 316}
 317
 318/*
 319 * Read out the results of a single counter:
 320 * do not aggregate counts across CPUs in system-wide mode
 321 */
 322static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 323{
 324	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
 325	int thread;
 326
 327	if (!counter->supported)
 328		return -ENOENT;
 329
 330	if (counter->core.system_wide)
 331		nthreads = 1;
 332
 333	for (thread = 0; thread < nthreads; thread++) {
 334		struct perf_counts_values *count;
 335
 336		count = perf_counts(counter->counts, cpu, thread);
 337
 338		/*
 339		 * The leader's group read loads data into its group members
 340		 * (via evsel__read_counter()) and sets their count->loaded.
 341		 */
 342		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
 343		    read_single_counter(counter, cpu, thread, rs)) {
 344			counter->counts->scaled = -1;
 345			perf_counts(counter->counts, cpu, thread)->ena = 0;
 346			perf_counts(counter->counts, cpu, thread)->run = 0;
 347			return -1;
 348		}
 349
 350		perf_counts__set_loaded(counter->counts, cpu, thread, false);
 351
 352		if (STAT_RECORD) {
 353			if (evsel__write_stat_event(counter, cpu, thread, count)) {
 354				pr_err("failed to write stat event\n");
 355				return -1;
 356			}
 357		}
 358
 359		if (verbose > 1) {
 360			fprintf(stat_config.output,
 361				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 362					evsel__name(counter),
 363					cpu,
 364					count->val, count->ena, count->run);
 365		}
 366	}
 367
 368	return 0;
 369}
 370
 371static int read_affinity_counters(struct timespec *rs)
 372{
 373	struct evsel *counter;
 374	struct affinity affinity;
 375	int i, ncpus, cpu;
 376
 377	if (affinity__setup(&affinity) < 0)
 378		return -1;
 379
 380	ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
 381	if (!target__has_cpu(&target) || target__has_per_thread(&target))
 382		ncpus = 1;
 383	evlist__for_each_cpu(evsel_list, i, cpu) {
 384		if (i >= ncpus)
 385			break;
 386		affinity__set(&affinity, cpu);
 387
 388		evlist__for_each_entry(evsel_list, counter) {
 389			if (evsel__cpu_iter_skip(counter, cpu))
 390				continue;
 391			if (!counter->err) {
 392				counter->err = read_counter_cpu(counter, rs,
 393								counter->cpu_iter - 1);
 394			}
 395		}
 396	}
 397	affinity__cleanup(&affinity);
 398	return 0;
 399}
 400
 401static void read_counters(struct timespec *rs)
 402{
 403	struct evsel *counter;
 404
 405	if (!stat_config.summary && (read_affinity_counters(rs) < 0))
 406		return;
 407
 408	evlist__for_each_entry(evsel_list, counter) {
 409		if (counter->err)
 410			pr_debug("failed to read counter %s\n", counter->name);
 411		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
 412			pr_warning("failed to process counter %s\n", counter->name);
 413		counter->err = 0;
 414	}
 415}
 416
 417static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
 418{
 419	int i;
 420
 421	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
 422	if (!config->stats)
 423		return -1;
 424
 425	config->stats_num = nthreads;
 426
 427	for (i = 0; i < nthreads; i++)
 428		runtime_stat__init(&config->stats[i]);
 429
 430	return 0;
 431}
 432
 433static void runtime_stat_delete(struct perf_stat_config *config)
 434{
 435	int i;
 436
 437	if (!config->stats)
 438		return;
 439
 440	for (i = 0; i < config->stats_num; i++)
 441		runtime_stat__exit(&config->stats[i]);
 442
 443	zfree(&config->stats);
 444}
 445
 446static void runtime_stat_reset(struct perf_stat_config *config)
 447{
 448	int i;
 449
 450	if (!config->stats)
 451		return;
 452
 453	for (i = 0; i < config->stats_num; i++)
 454		perf_stat__reset_shadow_per_stat(&config->stats[i]);
 455}
 456
 457static void process_interval(void)
 458{
 459	struct timespec ts, rs;
 460
 461	clock_gettime(CLOCK_MONOTONIC, &ts);
 462	diff_timespec(&rs, &ts, &ref_time);
 463
 464	perf_stat__reset_shadow_per_stat(&rt_stat);
 465	runtime_stat_reset(&stat_config);
 466	read_counters(&rs);
 467
 468	if (STAT_RECORD) {
 469		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
 470			pr_err("failed to write stat round event\n");
 471	}
 472
 473	init_stats(&walltime_nsecs_stats);
 474	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
 475	print_counters(&rs, 0, NULL);
 476}
 477
 478static void enable_counters(void)
 479{
 480	if (stat_config.initial_delay)
 481		usleep(stat_config.initial_delay * USEC_PER_MSEC);
 482
 483	/*
 484	 * We need to enable counters only if:
 485	 * - we don't have tracee (attaching to task or cpu)
 486	 * - we have initial delay configured
 487	 */
 488	if (!target__none(&target) || stat_config.initial_delay)
 489		evlist__enable(evsel_list);
 490}
 491
 492static void disable_counters(void)
 493{
 494	/*
 495	 * If we don't have tracee (attaching to task or cpu), counters may
 496	 * still be running. To get accurate group ratios, we must stop groups
 497	 * from counting before reading their constituent counters.
 498	 */
 499	if (!target__none(&target))
 500		evlist__disable(evsel_list);
 501}
 502
 503static volatile int workload_exec_errno;
 504
 505/*
 506 * perf_evlist__prepare_workload will send a SIGUSR1
 507 * if the fork fails, since we asked by setting its
 508 * want_signal to true.
 509 */
 510static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
 511					void *ucontext __maybe_unused)
 512{
 513	workload_exec_errno = info->si_value.sival_int;
 514}
 515
 516static bool evsel__should_store_id(struct evsel *counter)
 517{
 518	return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
 519}
 520
 521static bool is_target_alive(struct target *_target,
 522			    struct perf_thread_map *threads)
 523{
 524	struct stat st;
 525	int i;
 526
 527	if (!target__has_task(_target))
 528		return true;
 529
 530	for (i = 0; i < threads->nr; i++) {
 531		char path[PATH_MAX];
 532
 533		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
 534			  threads->map[i].pid);
 535
 536		if (!stat(path, &st))
 537			return true;
 538	}
 539
 540	return false;
 541}
 542
 543enum counter_recovery {
 544	COUNTER_SKIP,
 545	COUNTER_RETRY,
 546	COUNTER_FATAL,
 547};
 548
 549static enum counter_recovery stat_handle_error(struct evsel *counter)
 550{
 551	char msg[BUFSIZ];
 552	/*
 553	 * PPC returns ENXIO for HW counters until 2.6.37
 554	 * (behavior changed with commit b0a873e).
 555	 */
 556	if (errno == EINVAL || errno == ENOSYS ||
 557	    errno == ENOENT || errno == EOPNOTSUPP ||
 558	    errno == ENXIO) {
 559		if (verbose > 0)
 560			ui__warning("%s event is not supported by the kernel.\n",
 561				    evsel__name(counter));
 562		counter->supported = false;
 563		/*
 564		 * errored is a sticky flag that means one of the counter's
 565		 * cpu event had a problem and needs to be reexamined.
 566		 */
 567		counter->errored = true;
 568
 569		if ((counter->leader != counter) ||
 570		    !(counter->leader->core.nr_members > 1))
 571			return COUNTER_SKIP;
 572	} else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
 573		if (verbose > 0)
 574			ui__warning("%s\n", msg);
 575		return COUNTER_RETRY;
 576	} else if (target__has_per_thread(&target) &&
 577		   evsel_list->core.threads &&
 578		   evsel_list->core.threads->err_thread != -1) {
 579		/*
 580		 * For global --per-thread case, skip current
 581		 * error thread.
 582		 */
 583		if (!thread_map__remove(evsel_list->core.threads,
 584					evsel_list->core.threads->err_thread)) {
 585			evsel_list->core.threads->err_thread = -1;
 586			return COUNTER_RETRY;
 587		}
 588	}
 589
 590	evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
 591	ui__error("%s\n", msg);
 592
 593	if (child_pid != -1)
 594		kill(child_pid, SIGTERM);
 595	return COUNTER_FATAL;
 596}
 597
 598static int __run_perf_stat(int argc, const char **argv, int run_idx)
 599{
 600	int interval = stat_config.interval;
 601	int times = stat_config.times;
 602	int timeout = stat_config.timeout;
 603	char msg[BUFSIZ];
 604	unsigned long long t0, t1;
 605	struct evsel *counter;
 606	struct timespec ts;
 607	size_t l;
 608	int status = 0;
 609	const bool forks = (argc > 0);
 610	bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
 611	struct affinity affinity;
 612	int i, cpu;
 613	bool second_pass = false;
 614
 615	if (interval) {
 616		ts.tv_sec  = interval / USEC_PER_MSEC;
 617		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
 618	} else if (timeout) {
 619		ts.tv_sec  = timeout / USEC_PER_MSEC;
 620		ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
 621	} else {
 622		ts.tv_sec  = 1;
 623		ts.tv_nsec = 0;
 624	}
 625
 626	if (forks) {
 627		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
 628						  workload_exec_failed_signal) < 0) {
 629			perror("failed to prepare workload");
 630			return -1;
 631		}
 632		child_pid = evsel_list->workload.pid;
 633	}
 634
 635	if (group)
 636		perf_evlist__set_leader(evsel_list);
 637
 638	if (affinity__setup(&affinity) < 0)
 639		return -1;
 640
 641	evlist__for_each_cpu (evsel_list, i, cpu) {
 642		affinity__set(&affinity, cpu);
 643
 644		evlist__for_each_entry(evsel_list, counter) {
 645			if (evsel__cpu_iter_skip(counter, cpu))
 646				continue;
 647			if (counter->reset_group || counter->errored)
 648				continue;
 649try_again:
 650			if (create_perf_stat_counter(counter, &stat_config, &target,
 651						     counter->cpu_iter - 1) < 0) {
 652
 653				/*
 654				 * Weak group failed. We cannot just undo this here
 655				 * because earlier CPUs might be in group mode, and the kernel
 656				 * doesn't support mixing group and non group reads. Defer
 657				 * it to later.
 658				 * Don't close here because we're in the wrong affinity.
 659				 */
 660				if ((errno == EINVAL || errno == EBADF) &&
 661				    counter->leader != counter &&
 662				    counter->weak_group) {
 663					perf_evlist__reset_weak_group(evsel_list, counter, false);
 664					assert(counter->reset_group);
 665					second_pass = true;
 666					continue;
 667				}
 668
 669				switch (stat_handle_error(counter)) {
 670				case COUNTER_FATAL:
 671					return -1;
 672				case COUNTER_RETRY:
 673					goto try_again;
 674				case COUNTER_SKIP:
 675					continue;
 676				default:
 677					break;
 678				}
 679
 680			}
 681			counter->supported = true;
 682		}
 683	}
 684
 685	if (second_pass) {
 686		/*
 687		 * Now redo all the weak group after closing them,
 688		 * and also close errored counters.
 689		 */
 690
 691		evlist__for_each_cpu(evsel_list, i, cpu) {
 692			affinity__set(&affinity, cpu);
 693			/* First close errored or weak retry */
 694			evlist__for_each_entry(evsel_list, counter) {
 695				if (!counter->reset_group && !counter->errored)
 696					continue;
 697				if (evsel__cpu_iter_skip_no_inc(counter, cpu))
 698					continue;
 699				perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
 700			}
 701			/* Now reopen weak */
 702			evlist__for_each_entry(evsel_list, counter) {
 703				if (!counter->reset_group && !counter->errored)
 704					continue;
 705				if (evsel__cpu_iter_skip(counter, cpu))
 706					continue;
 707				if (!counter->reset_group)
 708					continue;
 709try_again_reset:
 710				pr_debug2("reopening weak %s\n", evsel__name(counter));
 711				if (create_perf_stat_counter(counter, &stat_config, &target,
 712							     counter->cpu_iter - 1) < 0) {
 713
 714					switch (stat_handle_error(counter)) {
 715					case COUNTER_FATAL:
 716						return -1;
 717					case COUNTER_RETRY:
 718						goto try_again_reset;
 719					case COUNTER_SKIP:
 720						continue;
 721					default:
 722						break;
 723					}
 724				}
 725				counter->supported = true;
 726			}
 727		}
 728	}
 729	affinity__cleanup(&affinity);
 730
 731	evlist__for_each_entry(evsel_list, counter) {
 732		if (!counter->supported) {
 733			perf_evsel__free_fd(&counter->core);
 734			continue;
 735		}
 736
 737		l = strlen(counter->unit);
 738		if (l > stat_config.unit_width)
 739			stat_config.unit_width = l;
 740
 741		if (evsel__should_store_id(counter) &&
 742		    evsel__store_ids(counter, evsel_list))
 743			return -1;
 744	}
 745
 746	if (perf_evlist__apply_filters(evsel_list, &counter)) {
 747		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 748			counter->filter, evsel__name(counter), errno,
 749			str_error_r(errno, msg, sizeof(msg)));
 750		return -1;
 751	}
 752
 753	if (STAT_RECORD) {
 754		int err, fd = perf_data__fd(&perf_stat.data);
 755
 756		if (is_pipe) {
 757			err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
 758		} else {
 759			err = perf_session__write_header(perf_stat.session, evsel_list,
 760							 fd, false);
 761		}
 762
 763		if (err < 0)
 764			return err;
 765
 766		err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
 767							 process_synthesized_event, is_pipe);
 768		if (err < 0)
 769			return err;
 770	}
 771
 772	/*
 773	 * Enable counters and exec the command:
 774	 */
 775	t0 = rdclock();
 776	clock_gettime(CLOCK_MONOTONIC, &ref_time);
 777
 778	if (forks) {
 779		perf_evlist__start_workload(evsel_list);
 780		enable_counters();
 781
 782		if (interval || timeout) {
 783			while (!waitpid(child_pid, &status, WNOHANG)) {
 784				nanosleep(&ts, NULL);
 785				if (timeout)
 786					break;
 787				process_interval();
 788				if (interval_count && !(--times))
 789					break;
 790			}
 791		}
 792		if (child_pid != -1) {
 793			if (timeout)
 794				kill(child_pid, SIGTERM);
 795			wait4(child_pid, &status, 0, &stat_config.ru_data);
 796		}
 797
 798		if (workload_exec_errno) {
 799			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
 800			pr_err("Workload failed: %s\n", emsg);
 801			return -1;
 802		}
 803
 804		if (WIFSIGNALED(status))
 805			psignal(WTERMSIG(status), argv[0]);
 806	} else {
 807		enable_counters();
 808		while (!done) {
 809			nanosleep(&ts, NULL);
 810			if (!is_target_alive(&target, evsel_list->core.threads))
 811				break;
 812			if (timeout)
 813				break;
 814			if (interval) {
 815				process_interval();
 816				if (interval_count && !(--times))
 817					break;
 818			}
 819		}
 820	}
 821
 822	disable_counters();
 823
 824	t1 = rdclock();
 825
 826	if (stat_config.walltime_run_table)
 827		stat_config.walltime_run[run_idx] = t1 - t0;
 828
 829	if (interval) {
 830		stat_config.interval = 0;
 831		stat_config.summary = true;
 832		init_stats(&walltime_nsecs_stats);
 833		update_stats(&walltime_nsecs_stats, t1 - t0);
 834
 835		if (stat_config.aggr_mode == AGGR_GLOBAL)
 836			perf_evlist__save_aggr_prev_raw_counts(evsel_list);
 837
 838		perf_evlist__copy_prev_raw_counts(evsel_list);
 839		perf_evlist__reset_prev_raw_counts(evsel_list);
 840		runtime_stat_reset(&stat_config);
 841		perf_stat__reset_shadow_per_stat(&rt_stat);
 842	} else
 843		update_stats(&walltime_nsecs_stats, t1 - t0);
 844
 845	/*
 846	 * Closing a group leader splits the group, and as we only disable
 847	 * group leaders, results in remaining events becoming enabled. To
 848	 * avoid arbitrary skew, we must read all counters before closing any
 849	 * group leaders.
 850	 */
 851	read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
 852
 853	/*
 854	 * We need to keep evsel_list alive, because it's processed
 855	 * later the evsel_list will be closed after.
 856	 */
 857	if (!STAT_RECORD)
 858		evlist__close(evsel_list);
 859
 860	return WEXITSTATUS(status);
 861}
 862
 863static int run_perf_stat(int argc, const char **argv, int run_idx)
 864{
 865	int ret;
 866
 867	if (pre_cmd) {
 868		ret = system(pre_cmd);
 869		if (ret)
 870			return ret;
 871	}
 872
 873	if (sync_run)
 874		sync();
 875
 876	ret = __run_perf_stat(argc, argv, run_idx);
 877	if (ret)
 878		return ret;
 879
 880	if (post_cmd) {
 881		ret = system(post_cmd);
 882		if (ret)
 883			return ret;
 884	}
 885
 886	return ret;
 887}
 888
 889static void print_counters(struct timespec *ts, int argc, const char **argv)
 890{
 891	/* Do not print anything if we record to the pipe. */
 892	if (STAT_RECORD && perf_stat.data.is_pipe)
 893		return;
 894
 895	perf_evlist__print_counters(evsel_list, &stat_config, &target,
 896				    ts, argc, argv);
 897}
 898
 899static volatile int signr = -1;
 900
 901static void skip_signal(int signo)
 902{
 903	if ((child_pid == -1) || stat_config.interval)
 904		done = 1;
 905
 906	signr = signo;
 907	/*
 908	 * render child_pid harmless
 909	 * won't send SIGTERM to a random
 910	 * process in case of race condition
 911	 * and fast PID recycling
 912	 */
 913	child_pid = -1;
 914}
 915
 916static void sig_atexit(void)
 917{
 918	sigset_t set, oset;
 919
 920	/*
 921	 * avoid race condition with SIGCHLD handler
 922	 * in skip_signal() which is modifying child_pid
 923	 * goal is to avoid send SIGTERM to a random
 924	 * process
 925	 */
 926	sigemptyset(&set);
 927	sigaddset(&set, SIGCHLD);
 928	sigprocmask(SIG_BLOCK, &set, &oset);
 929
 930	if (child_pid != -1)
 931		kill(child_pid, SIGTERM);
 932
 933	sigprocmask(SIG_SETMASK, &oset, NULL);
 934
 935	if (signr == -1)
 936		return;
 937
 938	signal(signr, SIG_DFL);
 939	kill(getpid(), signr);
 940}
 941
 942void perf_stat__set_big_num(int set)
 943{
 944	stat_config.big_num = (set != 0);
 945}
 946
 947static int stat__set_big_num(const struct option *opt __maybe_unused,
 948			     const char *s __maybe_unused, int unset)
 949{
 950	big_num_opt = unset ? 0 : 1;
 951	perf_stat__set_big_num(!unset);
 952	return 0;
 953}
 954
 955static int enable_metric_only(const struct option *opt __maybe_unused,
 956			      const char *s __maybe_unused, int unset)
 957{
 958	force_metric_only = true;
 959	stat_config.metric_only = !unset;
 960	return 0;
 961}
 962
 963static int parse_metric_groups(const struct option *opt,
 964			       const char *str,
 965			       int unset __maybe_unused)
 966{
 967	return metricgroup__parse_groups(opt, str,
 968					 stat_config.metric_no_group,
 969					 stat_config.metric_no_merge,
 970					 &stat_config.metric_events);
 971}
 972
 973static struct option stat_options[] = {
 974	OPT_BOOLEAN('T', "transaction", &transaction_run,
 975		    "hardware transaction statistics"),
 976	OPT_CALLBACK('e', "event", &evsel_list, "event",
 977		     "event selector. use 'perf list' to list available events",
 978		     parse_events_option),
 979	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
 980		     "event filter", parse_filter),
 981	OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
 982		    "child tasks do not inherit counters"),
 983	OPT_STRING('p', "pid", &target.pid, "pid",
 984		   "stat events on existing process id"),
 985	OPT_STRING('t', "tid", &target.tid, "tid",
 986		   "stat events on existing thread id"),
 987	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
 988		    "system-wide collection from all CPUs"),
 989	OPT_BOOLEAN('g', "group", &group,
 990		    "put the counters into a counter group"),
 991	OPT_BOOLEAN(0, "scale", &stat_config.scale,
 992		    "Use --no-scale to disable counter scaling for multiplexing"),
 993	OPT_INCR('v', "verbose", &verbose,
 994		    "be more verbose (show counter open errors, etc)"),
 995	OPT_INTEGER('r', "repeat", &stat_config.run_count,
 996		    "repeat command and print average + stddev (max: 100, forever: 0)"),
 997	OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
 998		    "display details about each run (only with -r option)"),
 999	OPT_BOOLEAN('n', "null", &stat_config.null_run,
1000		    "null run - dont start any counters"),
1001	OPT_INCR('d', "detailed", &detailed_run,
1002		    "detailed run - start a lot of events"),
1003	OPT_BOOLEAN('S', "sync", &sync_run,
1004		    "call sync() before starting a run"),
1005	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1006			   "print large numbers with thousands\' separators",
1007			   stat__set_big_num),
1008	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1009		    "list of cpus to monitor in system-wide"),
1010	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1011		    "disable CPU count aggregation", AGGR_NONE),
1012	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
1013	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
1014		   "print counts with custom separator"),
1015	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1016		     "monitor event in cgroup name only", parse_cgroups),
1017	OPT_STRING('o', "output", &output_name, "file", "output file name"),
1018	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1019	OPT_INTEGER(0, "log-fd", &output_fd,
1020		    "log output to fd, instead of stderr"),
1021	OPT_STRING(0, "pre", &pre_cmd, "command",
1022			"command to run prior to the measured command"),
1023	OPT_STRING(0, "post", &post_cmd, "command",
1024			"command to run after to the measured command"),
1025	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1026		    "print counts at regular interval in ms "
1027		    "(overhead is possible for values <= 100ms)"),
1028	OPT_INTEGER(0, "interval-count", &stat_config.times,
1029		    "print counts for fixed number of times"),
1030	OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
1031		    "clear screen in between new interval"),
1032	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
1033		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
1034	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1035		     "aggregate counts per processor socket", AGGR_SOCKET),
1036	OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
1037		     "aggregate counts per processor die", AGGR_DIE),
1038	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1039		     "aggregate counts per physical processor core", AGGR_CORE),
1040	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1041		     "aggregate counts per thread", AGGR_THREAD),
1042	OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
1043		     "aggregate counts per numa node", AGGR_NODE),
1044	OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
1045		     "ms to wait before starting measurement after program start"),
1046	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
1047			"Only print computed metrics. No raw values", enable_metric_only),
1048	OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
1049		       "don't group metric events, impacts multiplexing"),
1050	OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
1051		       "don't try to share events between metrics in a group"),
1052	OPT_BOOLEAN(0, "topdown", &topdown_run,
1053			"measure topdown level 1 statistics"),
1054	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1055			"measure SMI cost"),
1056	OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
1057		     "monitor specified metrics or metric groups (separated by ,)",
1058		     parse_metric_groups),
1059	OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
1060			 "Configure all used events to run in kernel space.",
1061			 PARSE_OPT_EXCLUSIVE),
1062	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
1063			 "Configure all used events to run in user space.",
1064			 PARSE_OPT_EXCLUSIVE),
1065	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
1066		    "Use with 'percore' event qualifier to show the event "
1067		    "counts of one hardware thread by sum up total hardware "
1068		    "threads of same physical core"),
1069#ifdef HAVE_LIBPFM
1070	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
1071		"libpfm4 event selector. use 'perf list' to list available events",
1072		parse_libpfm_events_option),
1073#endif
1074	OPT_END()
1075};
1076
1077static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
1078				 struct perf_cpu_map *map, int cpu)
1079{
1080	return cpu_map__get_socket(map, cpu, NULL);
1081}
1082
1083static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
1084			      struct perf_cpu_map *map, int cpu)
1085{
1086	return cpu_map__get_die(map, cpu, NULL);
1087}
1088
1089static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
1090			       struct perf_cpu_map *map, int cpu)
1091{
1092	return cpu_map__get_core(map, cpu, NULL);
1093}
1094
1095static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
1096			       struct perf_cpu_map *map, int cpu)
1097{
1098	return cpu_map__get_node(map, cpu, NULL);
1099}
1100
1101static int perf_stat__get_aggr(struct perf_stat_config *config,
1102			       aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
1103{
1104	int cpu;
1105
1106	if (idx >= map->nr)
1107		return -1;
1108
1109	cpu = map->map[idx];
1110
1111	if (config->cpus_aggr_map->map[cpu] == -1)
1112		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
1113
1114	return config->cpus_aggr_map->map[cpu];
1115}
1116
1117static int perf_stat__get_socket_cached(struct perf_stat_config *config,
1118					struct perf_cpu_map *map, int idx)
1119{
1120	return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
1121}
1122
1123static int perf_stat__get_die_cached(struct perf_stat_config *config,
1124					struct perf_cpu_map *map, int idx)
1125{
1126	return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
1127}
1128
1129static int perf_stat__get_core_cached(struct perf_stat_config *config,
1130				      struct perf_cpu_map *map, int idx)
1131{
1132	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
1133}
1134
1135static int perf_stat__get_node_cached(struct perf_stat_config *config,
1136				      struct perf_cpu_map *map, int idx)
1137{
1138	return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
1139}
1140
1141static bool term_percore_set(void)
1142{
1143	struct evsel *counter;
1144
1145	evlist__for_each_entry(evsel_list, counter) {
1146		if (counter->percore)
1147			return true;
1148	}
1149
1150	return false;
1151}
1152
1153static int perf_stat_init_aggr_mode(void)
1154{
1155	int nr;
1156
1157	switch (stat_config.aggr_mode) {
1158	case AGGR_SOCKET:
1159		if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1160			perror("cannot build socket map");
1161			return -1;
1162		}
1163		stat_config.aggr_get_id = perf_stat__get_socket_cached;
1164		break;
1165	case AGGR_DIE:
1166		if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1167			perror("cannot build die map");
1168			return -1;
1169		}
1170		stat_config.aggr_get_id = perf_stat__get_die_cached;
1171		break;
1172	case AGGR_CORE:
1173		if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1174			perror("cannot build core map");
1175			return -1;
1176		}
1177		stat_config.aggr_get_id = perf_stat__get_core_cached;
1178		break;
1179	case AGGR_NODE:
1180		if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1181			perror("cannot build core map");
1182			return -1;
1183		}
1184		stat_config.aggr_get_id = perf_stat__get_node_cached;
1185		break;
1186	case AGGR_NONE:
1187		if (term_percore_set()) {
1188			if (cpu_map__build_core_map(evsel_list->core.cpus,
1189						    &stat_config.aggr_map)) {
1190				perror("cannot build core map");
1191				return -1;
1192			}
1193			stat_config.aggr_get_id = perf_stat__get_core_cached;
1194		}
1195		break;
1196	case AGGR_GLOBAL:
1197	case AGGR_THREAD:
1198	case AGGR_UNSET:
1199	default:
1200		break;
1201	}
1202
1203	/*
1204	 * The evsel_list->cpus is the base we operate on,
1205	 * taking the highest cpu number to be the size of
1206	 * the aggregation translate cpumap.
1207	 */
1208	nr = perf_cpu_map__max(evsel_list->core.cpus);
1209	stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
1210	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1211}
1212
1213static void perf_stat__exit_aggr_mode(void)
1214{
1215	perf_cpu_map__put(stat_config.aggr_map);
1216	perf_cpu_map__put(stat_config.cpus_aggr_map);
1217	stat_config.aggr_map = NULL;
1218	stat_config.cpus_aggr_map = NULL;
1219}
1220
1221static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
1222{
1223	int cpu;
1224
1225	if (idx > map->nr)
1226		return -1;
1227
1228	cpu = map->map[idx];
1229
1230	if (cpu >= env->nr_cpus_avail)
1231		return -1;
1232
1233	return cpu;
1234}
1235
1236static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
1237{
1238	struct perf_env *env = data;
1239	int cpu = perf_env__get_cpu(env, map, idx);
1240
1241	return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
1242}
1243
1244static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
1245{
1246	struct perf_env *env = data;
1247	int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
1248
1249	if (cpu != -1) {
1250		/*
1251		 * Encode socket in bit range 15:8
1252		 * die_id is relative to socket,
1253		 * we need a global id. So we combine
1254		 * socket + die id
1255		 */
1256		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1257			return -1;
1258
1259		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1260			return -1;
1261
1262		die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
1263	}
1264
1265	return die_id;
1266}
1267
1268static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
1269{
1270	struct perf_env *env = data;
1271	int core = -1, cpu = perf_env__get_cpu(env, map, idx);
1272
1273	if (cpu != -1) {
1274		/*
1275		 * Encode socket in bit range 31:24
1276		 * encode die id in bit range 23:16
1277		 * core_id is relative to socket and die,
1278		 * we need a global id. So we combine
1279		 * socket + die id + core id
1280		 */
1281		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1282			return -1;
1283
1284		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1285			return -1;
1286
1287		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1288			return -1;
1289
1290		core = (env->cpu[cpu].socket_id << 24) |
1291		       (env->cpu[cpu].die_id << 16) |
1292		       (env->cpu[cpu].core_id & 0xffff);
1293	}
1294
1295	return core;
1296}
1297
1298static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
1299{
1300	int cpu = perf_env__get_cpu(data, map, idx);
1301
1302	return perf_env__numa_node(data, cpu);
1303}
1304
1305static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
1306				      struct perf_cpu_map **sockp)
1307{
1308	return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
1309}
1310
1311static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
1312				   struct perf_cpu_map **diep)
1313{
1314	return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1315}
1316
1317static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
1318				    struct perf_cpu_map **corep)
1319{
1320	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
1321}
1322
1323static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
1324				    struct perf_cpu_map **nodep)
1325{
1326	return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
1327}
1328
1329static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
1330				      struct perf_cpu_map *map, int idx)
1331{
1332	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
1333}
1334static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1335				   struct perf_cpu_map *map, int idx)
1336{
1337	return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1338}
1339
1340static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
1341				    struct perf_cpu_map *map, int idx)
1342{
1343	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1344}
1345
1346static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
1347				    struct perf_cpu_map *map, int idx)
1348{
1349	return perf_env__get_node(map, idx, &perf_stat.session->header.env);
1350}
1351
1352static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1353{
1354	struct perf_env *env = &st->session->header.env;
1355
1356	switch (stat_config.aggr_mode) {
1357	case AGGR_SOCKET:
1358		if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1359			perror("cannot build socket map");
1360			return -1;
1361		}
1362		stat_config.aggr_get_id = perf_stat__get_socket_file;
1363		break;
1364	case AGGR_DIE:
1365		if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1366			perror("cannot build die map");
1367			return -1;
1368		}
1369		stat_config.aggr_get_id = perf_stat__get_die_file;
1370		break;
1371	case AGGR_CORE:
1372		if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1373			perror("cannot build core map");
1374			return -1;
1375		}
1376		stat_config.aggr_get_id = perf_stat__get_core_file;
1377		break;
1378	case AGGR_NODE:
1379		if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1380			perror("cannot build core map");
1381			return -1;
1382		}
1383		stat_config.aggr_get_id = perf_stat__get_node_file;
1384		break;
1385	case AGGR_NONE:
1386	case AGGR_GLOBAL:
1387	case AGGR_THREAD:
1388	case AGGR_UNSET:
1389	default:
1390		break;
1391	}
1392
1393	return 0;
1394}
1395
1396static int topdown_filter_events(const char **attr, char **str, bool use_group)
1397{
1398	int off = 0;
1399	int i;
1400	int len = 0;
1401	char *s;
1402
1403	for (i = 0; attr[i]; i++) {
1404		if (pmu_have_event("cpu", attr[i])) {
1405			len += strlen(attr[i]) + 1;
1406			attr[i - off] = attr[i];
1407		} else
1408			off++;
1409	}
1410	attr[i - off] = NULL;
1411
1412	*str = malloc(len + 1 + 2);
1413	if (!*str)
1414		return -1;
1415	s = *str;
1416	if (i - off == 0) {
1417		*s = 0;
1418		return 0;
1419	}
1420	if (use_group)
1421		*s++ = '{';
1422	for (i = 0; attr[i]; i++) {
1423		strcpy(s, attr[i]);
1424		s += strlen(s);
1425		*s++ = ',';
1426	}
1427	if (use_group) {
1428		s[-1] = '}';
1429		*s = 0;
1430	} else
1431		s[-1] = 0;
1432	return 0;
1433}
1434
1435__weak bool arch_topdown_check_group(bool *warn)
1436{
1437	*warn = false;
1438	return false;
1439}
1440
1441__weak void arch_topdown_group_warn(void)
1442{
1443}
1444
1445/*
1446 * Add default attributes, if there were no attributes specified or
1447 * if -d/--detailed, -d -d or -d -d -d is used:
1448 */
1449static int add_default_attributes(void)
1450{
1451	int err;
1452	struct perf_event_attr default_attrs0[] = {
1453
1454  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
1455  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
1456  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
1457  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
1458
1459  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
1460};
1461	struct perf_event_attr frontend_attrs[] = {
1462  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
1463};
1464	struct perf_event_attr backend_attrs[] = {
1465  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
1466};
1467	struct perf_event_attr default_attrs1[] = {
1468  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
1469  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
1470  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
1471
1472};
1473
1474/*
1475 * Detailed stats (-d), covering the L1 and last level data caches:
1476 */
1477	struct perf_event_attr detailed_attrs[] = {
1478
1479  { .type = PERF_TYPE_HW_CACHE,
1480    .config =
1481	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1482	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1483	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1484
1485  { .type = PERF_TYPE_HW_CACHE,
1486    .config =
1487	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1488	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1489	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1490
1491  { .type = PERF_TYPE_HW_CACHE,
1492    .config =
1493	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1494	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1495	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1496
1497  { .type = PERF_TYPE_HW_CACHE,
1498    .config =
1499	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1500	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1501	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1502};
1503
1504/*
1505 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1506 */
1507	struct perf_event_attr very_detailed_attrs[] = {
1508
1509  { .type = PERF_TYPE_HW_CACHE,
1510    .config =
1511	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1512	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1513	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1514
1515  { .type = PERF_TYPE_HW_CACHE,
1516    .config =
1517	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1518	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1519	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1520
1521  { .type = PERF_TYPE_HW_CACHE,
1522    .config =
1523	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1524	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1525	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1526
1527  { .type = PERF_TYPE_HW_CACHE,
1528    .config =
1529	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1530	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1531	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1532
1533  { .type = PERF_TYPE_HW_CACHE,
1534    .config =
1535	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1536	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1537	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1538
1539  { .type = PERF_TYPE_HW_CACHE,
1540    .config =
1541	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1542	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1543	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1544
1545};
1546
1547/*
1548 * Very, very detailed stats (-d -d -d), adding prefetch events:
1549 */
1550	struct perf_event_attr very_very_detailed_attrs[] = {
1551
1552  { .type = PERF_TYPE_HW_CACHE,
1553    .config =
1554	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1555	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1556	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1557
1558  { .type = PERF_TYPE_HW_CACHE,
1559    .config =
1560	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1561	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1562	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1563};
1564	struct parse_events_error errinfo;
1565
1566	/* Set attrs if no event is selected and !null_run: */
1567	if (stat_config.null_run)
1568		return 0;
1569
1570	bzero(&errinfo, sizeof(errinfo));
1571	if (transaction_run) {
1572		/* Handle -T as -M transaction. Once platform specific metrics
1573		 * support has been added to the json files, all archictures
1574		 * will use this approach. To determine transaction support
1575		 * on an architecture test for such a metric name.
1576		 */
1577		if (metricgroup__has_metric("transaction")) {
1578			struct option opt = { .value = &evsel_list };
1579
1580			return metricgroup__parse_groups(&opt, "transaction",
1581							 stat_config.metric_no_group,
1582							stat_config.metric_no_merge,
1583							 &stat_config.metric_events);
1584		}
1585
1586		if (pmu_have_event("cpu", "cycles-ct") &&
1587		    pmu_have_event("cpu", "el-start"))
1588			err = parse_events(evsel_list, transaction_attrs,
1589					   &errinfo);
1590		else
1591			err = parse_events(evsel_list,
1592					   transaction_limited_attrs,
1593					   &errinfo);
1594		if (err) {
1595			fprintf(stderr, "Cannot set up transaction events\n");
1596			parse_events_print_error(&errinfo, transaction_attrs);
1597			return -1;
1598		}
1599		return 0;
1600	}
1601
1602	if (smi_cost) {
1603		int smi;
1604
1605		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
1606			fprintf(stderr, "freeze_on_smi is not supported.\n");
1607			return -1;
1608		}
1609
1610		if (!smi) {
1611			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
1612				fprintf(stderr, "Failed to set freeze_on_smi.\n");
1613				return -1;
1614			}
1615			smi_reset = true;
1616		}
1617
1618		if (pmu_have_event("msr", "aperf") &&
1619		    pmu_have_event("msr", "smi")) {
1620			if (!force_metric_only)
1621				stat_config.metric_only = true;
1622			err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
1623		} else {
1624			fprintf(stderr, "To measure SMI cost, it needs "
1625				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
1626			parse_events_print_error(&errinfo, smi_cost_attrs);
1627			return -1;
1628		}
1629		if (err) {
1630			parse_events_print_error(&errinfo, smi_cost_attrs);
1631			fprintf(stderr, "Cannot set up SMI cost events\n");
1632			return -1;
1633		}
1634		return 0;
1635	}
1636
1637	if (topdown_run) {
1638		char *str = NULL;
1639		bool warn = false;
1640
1641		if (stat_config.aggr_mode != AGGR_GLOBAL &&
1642		    stat_config.aggr_mode != AGGR_CORE) {
1643			pr_err("top down event configuration requires --per-core mode\n");
1644			return -1;
1645		}
1646		stat_config.aggr_mode = AGGR_CORE;
1647		if (nr_cgroups || !target__has_cpu(&target)) {
1648			pr_err("top down event configuration requires system-wide mode (-a)\n");
1649			return -1;
1650		}
1651
1652		if (!force_metric_only)
1653			stat_config.metric_only = true;
1654		if (topdown_filter_events(topdown_attrs, &str,
1655				arch_topdown_check_group(&warn)) < 0) {
1656			pr_err("Out of memory\n");
1657			return -1;
1658		}
1659		if (topdown_attrs[0] && str) {
1660			if (warn)
1661				arch_topdown_group_warn();
1662			err = parse_events(evsel_list, str, &errinfo);
1663			if (err) {
1664				fprintf(stderr,
1665					"Cannot set up top down events %s: %d\n",
1666					str, err);
1667				parse_events_print_error(&errinfo, str);
1668				free(str);
1669				return -1;
1670			}
1671		} else {
1672			fprintf(stderr, "System does not support topdown\n");
1673			return -1;
1674		}
1675		free(str);
1676	}
1677
1678	if (!evsel_list->core.nr_entries) {
1679		if (target__has_cpu(&target))
1680			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
1681
1682		if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
1683			return -1;
1684		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
1685			if (perf_evlist__add_default_attrs(evsel_list,
1686						frontend_attrs) < 0)
1687				return -1;
1688		}
1689		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
1690			if (perf_evlist__add_default_attrs(evsel_list,
1691						backend_attrs) < 0)
1692				return -1;
1693		}
1694		if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
1695			return -1;
1696	}
1697
1698	/* Detailed events get appended to the event list: */
1699
1700	if (detailed_run <  1)
1701		return 0;
1702
1703	/* Append detailed run extra attributes: */
1704	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1705		return -1;
1706
1707	if (detailed_run < 2)
1708		return 0;
1709
1710	/* Append very detailed run extra attributes: */
1711	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1712		return -1;
1713
1714	if (detailed_run < 3)
1715		return 0;
1716
1717	/* Append very, very detailed run extra attributes: */
1718	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1719}
1720
1721static const char * const stat_record_usage[] = {
1722	"perf stat record [<options>]",
1723	NULL,
1724};
1725
1726static void init_features(struct perf_session *session)
1727{
1728	int feat;
1729
1730	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1731		perf_header__set_feat(&session->header, feat);
1732
1733	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1734	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1735	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1736	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1737	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1738}
1739
1740static int __cmd_record(int argc, const char **argv)
1741{
1742	struct perf_session *session;
1743	struct perf_data *data = &perf_stat.data;
1744
1745	argc = parse_options(argc, argv, stat_options, stat_record_usage,
1746			     PARSE_OPT_STOP_AT_NON_OPTION);
1747
1748	if (output_name)
1749		data->path = output_name;
1750
1751	if (stat_config.run_count != 1 || forever) {
1752		pr_err("Cannot use -r option with perf stat record.\n");
1753		return -1;
1754	}
1755
1756	session = perf_session__new(data, false, NULL);
1757	if (IS_ERR(session)) {
1758		pr_err("Perf session creation failed\n");
1759		return PTR_ERR(session);
1760	}
1761
1762	init_features(session);
1763
1764	session->evlist   = evsel_list;
1765	perf_stat.session = session;
1766	perf_stat.record  = true;
1767	return argc;
1768}
1769
1770static int process_stat_round_event(struct perf_session *session,
1771				    union perf_event *event)
1772{
1773	struct perf_record_stat_round *stat_round = &event->stat_round;
1774	struct evsel *counter;
1775	struct timespec tsh, *ts = NULL;
1776	const char **argv = session->header.env.cmdline_argv;
1777	int argc = session->header.env.nr_cmdline;
1778
1779	evlist__for_each_entry(evsel_list, counter)
1780		perf_stat_process_counter(&stat_config, counter);
1781
1782	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
1783		update_stats(&walltime_nsecs_stats, stat_round->time);
1784
1785	if (stat_config.interval && stat_round->time) {
1786		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
1787		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
1788		ts = &tsh;
1789	}
1790
1791	print_counters(ts, argc, argv);
1792	return 0;
1793}
1794
1795static
1796int process_stat_config_event(struct perf_session *session,
1797			      union perf_event *event)
1798{
1799	struct perf_tool *tool = session->tool;
1800	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1801
1802	perf_event__read_stat_config(&stat_config, &event->stat_config);
1803
1804	if (perf_cpu_map__empty(st->cpus)) {
1805		if (st->aggr_mode != AGGR_UNSET)
1806			pr_warning("warning: processing task data, aggregation mode not set\n");
1807		return 0;
1808	}
1809
1810	if (st->aggr_mode != AGGR_UNSET)
1811		stat_config.aggr_mode = st->aggr_mode;
1812
1813	if (perf_stat.data.is_pipe)
1814		perf_stat_init_aggr_mode();
1815	else
1816		perf_stat_init_aggr_mode_file(st);
1817
1818	return 0;
1819}
1820
1821static int set_maps(struct perf_stat *st)
1822{
1823	if (!st->cpus || !st->threads)
1824		return 0;
1825
1826	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
1827		return -EINVAL;
1828
1829	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
1830
1831	if (perf_evlist__alloc_stats(evsel_list, true))
1832		return -ENOMEM;
1833
1834	st->maps_allocated = true;
1835	return 0;
1836}
1837
1838static
1839int process_thread_map_event(struct perf_session *session,
1840			     union perf_event *event)
1841{
1842	struct perf_tool *tool = session->tool;
1843	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1844
1845	if (st->threads) {
1846		pr_warning("Extra thread map event, ignoring.\n");
1847		return 0;
1848	}
1849
1850	st->threads = thread_map__new_event(&event->thread_map);
1851	if (!st->threads)
1852		return -ENOMEM;
1853
1854	return set_maps(st);
1855}
1856
1857static
1858int process_cpu_map_event(struct perf_session *session,
1859			  union perf_event *event)
1860{
1861	struct perf_tool *tool = session->tool;
1862	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1863	struct perf_cpu_map *cpus;
1864
1865	if (st->cpus) {
1866		pr_warning("Extra cpu map event, ignoring.\n");
1867		return 0;
1868	}
1869
1870	cpus = cpu_map__new_data(&event->cpu_map.data);
1871	if (!cpus)
1872		return -ENOMEM;
1873
1874	st->cpus = cpus;
1875	return set_maps(st);
1876}
1877
1878static const char * const stat_report_usage[] = {
1879	"perf stat report [<options>]",
1880	NULL,
1881};
1882
1883static struct perf_stat perf_stat = {
1884	.tool = {
1885		.attr		= perf_event__process_attr,
1886		.event_update	= perf_event__process_event_update,
1887		.thread_map	= process_thread_map_event,
1888		.cpu_map	= process_cpu_map_event,
1889		.stat_config	= process_stat_config_event,
1890		.stat		= perf_event__process_stat_event,
1891		.stat_round	= process_stat_round_event,
1892	},
1893	.aggr_mode = AGGR_UNSET,
1894};
1895
1896static int __cmd_report(int argc, const char **argv)
1897{
1898	struct perf_session *session;
1899	const struct option options[] = {
1900	OPT_STRING('i', "input", &input_name, "file", "input file name"),
1901	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1902		     "aggregate counts per processor socket", AGGR_SOCKET),
1903	OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1904		     "aggregate counts per processor die", AGGR_DIE),
1905	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1906		     "aggregate counts per physical processor core", AGGR_CORE),
1907	OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
1908		     "aggregate counts per numa node", AGGR_NODE),
1909	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
1910		     "disable CPU count aggregation", AGGR_NONE),
1911	OPT_END()
1912	};
1913	struct stat st;
1914	int ret;
1915
1916	argc = parse_options(argc, argv, options, stat_report_usage, 0);
1917
1918	if (!input_name || !strlen(input_name)) {
1919		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
1920			input_name = "-";
1921		else
1922			input_name = "perf.data";
1923	}
1924
1925	perf_stat.data.path = input_name;
1926	perf_stat.data.mode = PERF_DATA_MODE_READ;
1927
1928	session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
1929	if (IS_ERR(session))
1930		return PTR_ERR(session);
1931
1932	perf_stat.session  = session;
1933	stat_config.output = stderr;
1934	evsel_list         = session->evlist;
1935
1936	ret = perf_session__process_events(session);
1937	if (ret)
1938		return ret;
1939
1940	perf_session__delete(session);
1941	return 0;
1942}
1943
1944static void setup_system_wide(int forks)
1945{
1946	/*
1947	 * Make system wide (-a) the default target if
1948	 * no target was specified and one of following
1949	 * conditions is met:
1950	 *
1951	 *   - there's no workload specified
1952	 *   - there is workload specified but all requested
1953	 *     events are system wide events
1954	 */
1955	if (!target__none(&target))
1956		return;
1957
1958	if (!forks)
1959		target.system_wide = true;
1960	else {
1961		struct evsel *counter;
1962
1963		evlist__for_each_entry(evsel_list, counter) {
1964			if (!counter->core.system_wide)
1965				return;
1966		}
1967
1968		if (evsel_list->core.nr_entries)
1969			target.system_wide = true;
1970	}
1971}
1972
1973int cmd_stat(int argc, const char **argv)
1974{
1975	const char * const stat_usage[] = {
1976		"perf stat [<options>] [<command>]",
1977		NULL
1978	};
1979	int status = -EINVAL, run_idx;
1980	const char *mode;
1981	FILE *output = stderr;
1982	unsigned int interval, timeout;
1983	const char * const stat_subcommands[] = { "record", "report" };
1984
1985	setlocale(LC_ALL, "");
1986
1987	evsel_list = evlist__new();
1988	if (evsel_list == NULL)
1989		return -ENOMEM;
1990
1991	parse_events__shrink_config_terms();
1992
1993	/* String-parsing callback-based options would segfault when negated */
1994	set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
1995	set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
1996	set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
1997
1998	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
1999					(const char **) stat_usage,
2000					PARSE_OPT_STOP_AT_NON_OPTION);
2001	perf_stat__collect_metric_expr(evsel_list);
2002	perf_stat__init_shadow_stats();
2003
2004	if (stat_config.csv_sep) {
2005		stat_config.csv_output = true;
2006		if (!strcmp(stat_config.csv_sep, "\\t"))
2007			stat_config.csv_sep = "\t";
2008	} else
2009		stat_config.csv_sep = DEFAULT_SEPARATOR;
2010
2011	if (argc && !strncmp(argv[0], "rec", 3)) {
2012		argc = __cmd_record(argc, argv);
2013		if (argc < 0)
2014			return -1;
2015	} else if (argc && !strncmp(argv[0], "rep", 3))
2016		return __cmd_report(argc, argv);
2017
2018	interval = stat_config.interval;
2019	timeout = stat_config.timeout;
2020
2021	/*
2022	 * For record command the -o is already taken care of.
2023	 */
2024	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2025		output = NULL;
2026
2027	if (output_name && output_fd) {
2028		fprintf(stderr, "cannot use both --output and --log-fd\n");
2029		parse_options_usage(stat_usage, stat_options, "o", 1);
2030		parse_options_usage(NULL, stat_options, "log-fd", 0);
2031		goto out;
2032	}
2033
2034	if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2035		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2036		goto out;
2037	}
2038
2039	if (stat_config.metric_only && stat_config.run_count > 1) {
2040		fprintf(stderr, "--metric-only is not supported with -r\n");
2041		goto out;
2042	}
2043
2044	if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
2045		fprintf(stderr, "--table is only supported with -r\n");
2046		parse_options_usage(stat_usage, stat_options, "r", 1);
2047		parse_options_usage(NULL, stat_options, "table", 0);
2048		goto out;
2049	}
2050
2051	if (output_fd < 0) {
2052		fprintf(stderr, "argument to --log-fd must be a > 0\n");
2053		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2054		goto out;
2055	}
2056
2057	if (!output) {
2058		struct timespec tm;
2059		mode = append_file ? "a" : "w";
2060
2061		output = fopen(output_name, mode);
2062		if (!output) {
2063			perror("failed to create output file");
2064			return -1;
2065		}
2066		clock_gettime(CLOCK_REALTIME, &tm);
2067		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2068	} else if (output_fd > 0) {
2069		mode = append_file ? "a" : "w";
2070		output = fdopen(output_fd, mode);
2071		if (!output) {
2072			perror("Failed opening logfd");
2073			return -errno;
2074		}
2075	}
2076
2077	stat_config.output = output;
2078
2079	/*
2080	 * let the spreadsheet do the pretty-printing
2081	 */
2082	if (stat_config.csv_output) {
2083		/* User explicitly passed -B? */
2084		if (big_num_opt == 1) {
2085			fprintf(stderr, "-B option not supported with -x\n");
2086			parse_options_usage(stat_usage, stat_options, "B", 1);
2087			parse_options_usage(NULL, stat_options, "x", 1);
2088			goto out;
2089		} else /* Nope, so disable big number formatting */
2090			stat_config.big_num = false;
2091	} else if (big_num_opt == 0) /* User passed --no-big-num */
2092		stat_config.big_num = false;
2093
2094	setup_system_wide(argc);
2095
2096	/*
2097	 * Display user/system times only for single
2098	 * run and when there's specified tracee.
2099	 */
2100	if ((stat_config.run_count == 1) && target__none(&target))
2101		stat_config.ru_display = true;
2102
2103	if (stat_config.run_count < 0) {
2104		pr_err("Run count must be a positive number\n");
2105		parse_options_usage(stat_usage, stat_options, "r", 1);
2106		goto out;
2107	} else if (stat_config.run_count == 0) {
2108		forever = true;
2109		stat_config.run_count = 1;
2110	}
2111
2112	if (stat_config.walltime_run_table) {
2113		stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
2114		if (!stat_config.walltime_run) {
2115			pr_err("failed to setup -r option");
2116			goto out;
2117		}
2118	}
2119
2120	if ((stat_config.aggr_mode == AGGR_THREAD) &&
2121		!target__has_task(&target)) {
2122		if (!target.system_wide || target.cpu_list) {
2123			fprintf(stderr, "The --per-thread option is only "
2124				"available when monitoring via -p -t -a "
2125				"options or only --per-thread.\n");
2126			parse_options_usage(NULL, stat_options, "p", 1);
2127			parse_options_usage(NULL, stat_options, "t", 1);
2128			goto out;
2129		}
2130	}
2131
2132	/*
2133	 * no_aggr, cgroup are for system-wide only
2134	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
2135	 */
2136	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2137	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
2138	    !target__has_cpu(&target)) {
2139		fprintf(stderr, "both cgroup and no-aggregation "
2140			"modes only available in system-wide mode\n");
2141
2142		parse_options_usage(stat_usage, stat_options, "G", 1);
2143		parse_options_usage(NULL, stat_options, "A", 1);
2144		parse_options_usage(NULL, stat_options, "a", 1);
2145		goto out;
2146	}
2147
2148	if (add_default_attributes())
2149		goto out;
2150
2151	target__validate(&target);
2152
2153	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
2154		target.per_thread = true;
2155
2156	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
2157		if (target__has_task(&target)) {
2158			pr_err("Problems finding threads of monitor\n");
2159			parse_options_usage(stat_usage, stat_options, "p", 1);
2160			parse_options_usage(NULL, stat_options, "t", 1);
2161		} else if (target__has_cpu(&target)) {
2162			perror("failed to parse CPUs map");
2163			parse_options_usage(stat_usage, stat_options, "C", 1);
2164			parse_options_usage(NULL, stat_options, "a", 1);
2165		}
2166		goto out;
2167	}
2168
2169	evlist__check_cpu_maps(evsel_list);
2170
2171	/*
2172	 * Initialize thread_map with comm names,
2173	 * so we could print it out on output.
2174	 */
2175	if (stat_config.aggr_mode == AGGR_THREAD) {
2176		thread_map__read_comms(evsel_list->core.threads);
2177		if (target.system_wide) {
2178			if (runtime_stat_new(&stat_config,
2179				perf_thread_map__nr(evsel_list->core.threads))) {
2180				goto out;
2181			}
2182		}
2183	}
2184
2185	if (stat_config.aggr_mode == AGGR_NODE)
2186		cpu__setup_cpunode_map();
2187
2188	if (stat_config.times && interval)
2189		interval_count = true;
2190	else if (stat_config.times && !interval) {
2191		pr_err("interval-count option should be used together with "
2192				"interval-print.\n");
2193		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2194		parse_options_usage(stat_usage, stat_options, "I", 1);
2195		goto out;
2196	}
2197
2198	if (timeout && timeout < 100) {
2199		if (timeout < 10) {
2200			pr_err("timeout must be >= 10ms.\n");
2201			parse_options_usage(stat_usage, stat_options, "timeout", 0);
2202			goto out;
2203		} else
2204			pr_warning("timeout < 100ms. "
2205				   "The overhead percentage could be high in some cases. "
2206				   "Please proceed with caution.\n");
2207	}
2208	if (timeout && interval) {
2209		pr_err("timeout option is not supported with interval-print.\n");
2210		parse_options_usage(stat_usage, stat_options, "timeout", 0);
2211		parse_options_usage(stat_usage, stat_options, "I", 1);
2212		goto out;
2213	}
2214
2215	if (perf_evlist__alloc_stats(evsel_list, interval))
2216		goto out;
2217
2218	if (perf_stat_init_aggr_mode())
2219		goto out;
2220
2221	/*
2222	 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
2223	 * while avoiding that older tools show confusing messages.
2224	 *
2225	 * However for pipe sessions we need to keep it zero,
2226	 * because script's perf_evsel__check_attr is triggered
2227	 * by attr->sample_type != 0, and we can't run it on
2228	 * stat sessions.
2229	 */
2230	stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
2231
2232	/*
2233	 * We dont want to block the signals - that would cause
2234	 * child tasks to inherit that and Ctrl-C would not work.
2235	 * What we want is for Ctrl-C to work in the exec()-ed
2236	 * task, but being ignored by perf stat itself:
2237	 */
2238	atexit(sig_atexit);
2239	if (!forever)
2240		signal(SIGINT,  skip_signal);
2241	signal(SIGCHLD, skip_signal);
2242	signal(SIGALRM, skip_signal);
2243	signal(SIGABRT, skip_signal);
2244
2245	status = 0;
2246	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
2247		if (stat_config.run_count != 1 && verbose > 0)
2248			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2249				run_idx + 1);
2250
2251		if (run_idx != 0)
2252			perf_evlist__reset_prev_raw_counts(evsel_list);
2253
2254		status = run_perf_stat(argc, argv, run_idx);
2255		if (forever && status != -1 && !interval) {
2256			print_counters(NULL, argc, argv);
2257			perf_stat__reset_stats();
2258		}
2259	}
2260
2261	if (!forever && status != -1 && (!interval || stat_config.summary))
2262		print_counters(NULL, argc, argv);
2263
2264	if (STAT_RECORD) {
2265		/*
2266		 * We synthesize the kernel mmap record just so that older tools
2267		 * don't emit warnings about not being able to resolve symbols
2268		 * due to /proc/sys/kernel/kptr_restrict settings and instear provide
2269		 * a saner message about no samples being in the perf.data file.
2270		 *
2271		 * This also serves to suppress a warning about f_header.data.size == 0
2272		 * in header.c at the moment 'perf stat record' gets introduced, which
2273		 * is not really needed once we start adding the stat specific PERF_RECORD_
2274		 * records, but the need to suppress the kptr_restrict messages in older
2275		 * tools remain  -acme
2276		 */
2277		int fd = perf_data__fd(&perf_stat.data);
2278		int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2279							     process_synthesized_event,
2280							     &perf_stat.session->machines.host);
2281		if (err) {
2282			pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
2283				   "older tools may produce warnings about this file\n.");
2284		}
2285
2286		if (!interval) {
2287			if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
2288				pr_err("failed to write stat round event\n");
2289		}
2290
2291		if (!perf_stat.data.is_pipe) {
2292			perf_stat.session->header.data_size += perf_stat.bytes_written;
2293			perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2294		}
2295
2296		evlist__close(evsel_list);
2297		perf_session__delete(perf_stat.session);
2298	}
2299
2300	perf_stat__exit_aggr_mode();
2301	perf_evlist__free_stats(evsel_list);
2302out:
2303	zfree(&stat_config.walltime_run);
2304
2305	if (smi_cost && smi_reset)
2306		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2307
2308	evlist__delete(evsel_list);
2309
2310	runtime_stat_delete(&stat_config);
2311
2312	return status;
2313}
Configure Feed

Configure Feed