Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (106 commits)
perf kvm: Fix copy & paste error in description
perf script: Kill script_spec__delete
perf top: Fix a memory leak
perf stat: Introduce get_ratio_color() helper
perf session: Remove impossible condition check
perf tools: Fix feature-bits rework fallout, remove unused variable
perf script: Add generic perl handler to process events
perf tools: Use for_each_set_bit() to iterate over feature flags
perf tools: Unify handling of features when writing feature section
perf report: Accept fifos as input file
perf tools: Moving code in some files
perf tools: Fix out-of-bound access to struct perf_session
perf tools: Continue processing header on unknown features
perf tools: Improve macros for struct feature_ops
perf: builtin-record: Document and check that mmap_pages must be a power of two.
perf: builtin-record: Provide advice if mmap'ing fails with EPERM.
perf tools: Fix truncated annotation
perf script: look up thread using tid instead of pid
perf tools: Look up thread names for system wide profiling
perf tools: Fix comm for processes with named threads
...

+5591 -3012
+5
Documentation/kernel-parameters.txt
··· 1885 1885 arch_perfmon: [X86] Force use of architectural 1886 1886 perfmon on Intel CPUs instead of the 1887 1887 CPU specific event set. 1888 + timer: [X86] Force use of architectural NMI 1889 + timer mode (see also oprofile.timer 1890 + for generic hr timer mode) 1891 + [s390] Force legacy basic mode sampling 1892 + (report cpu_type "timer") 1888 1893 1889 1894 oops=panic Always panic on oopses. Default is to just kill the 1890 1895 process, but there is a small probability of
-2
Documentation/trace/events.txt
··· 191 191 192 192 Currently, only exact string matches are supported. 193 193 194 - Currently, the maximum number of predicates in a filter is 16. 195 - 196 194 5.2 Setting filters 197 195 ------------------- 198 196
+4
arch/Kconfig
··· 30 30 config HAVE_OPROFILE 31 31 bool 32 32 33 + config OPROFILE_NMI_TIMER 34 + def_bool y 35 + depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI 36 + 33 37 config KPROBES 34 38 bool "Kprobes" 35 39 depends on MODULES
+6 -1
arch/s390/oprofile/hwsampler.c
··· 22 22 #include <asm/irq.h> 23 23 24 24 #include "hwsampler.h" 25 + #include "op_counter.h" 25 26 26 27 #define MAX_NUM_SDB 511 27 28 #define MIN_NUM_SDB 1 ··· 897 896 if (sample_data_ptr->P == 1) { 898 897 /* userspace sample */ 899 898 unsigned int pid = sample_data_ptr->prim_asn; 899 + if (!counter_config.user) 900 + goto skip_sample; 900 901 rcu_read_lock(); 901 902 tsk = pid_task(find_vpid(pid), PIDTYPE_PID); 902 903 if (tsk) ··· 906 903 rcu_read_unlock(); 907 904 } else { 908 905 /* kernelspace sample */ 906 + if (!counter_config.kernel) 907 + goto skip_sample; 909 908 regs = task_pt_regs(current); 910 909 } 911 910 ··· 915 910 oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, 916 911 !sample_data_ptr->P, tsk); 917 912 mutex_unlock(&hws_sem); 918 - 913 + skip_sample: 919 914 sample_data_ptr++; 920 915 } 921 916 }
+344 -31
arch/s390/oprofile/init.c
··· 2 2 * arch/s390/oprofile/init.c 3 3 * 4 4 * S390 Version 5 - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation 5 + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation 6 6 * Author(s): Thomas Spatzier (tspat@de.ibm.com) 7 7 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) 8 8 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) 9 + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) 9 10 * 10 11 * @remark Copyright 2002-2011 OProfile authors 11 12 */ ··· 15 14 #include <linux/init.h> 16 15 #include <linux/errno.h> 17 16 #include <linux/fs.h> 17 + #include <linux/module.h> 18 + #include <asm/processor.h> 18 19 19 20 #include "../../../drivers/oprofile/oprof.h" 20 21 ··· 25 22 #ifdef CONFIG_64BIT 26 23 27 24 #include "hwsampler.h" 25 + #include "op_counter.h" 28 26 29 27 #define DEFAULT_INTERVAL 4127518 30 28 ··· 39 35 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; 40 36 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; 41 37 42 - static int hwsampler_file; 38 + static int hwsampler_enabled; 43 39 static int hwsampler_running; /* start_mutex must be held to change */ 40 + static int hwsampler_available; 44 41 45 42 static struct oprofile_operations timer_ops; 43 + 44 + struct op_counter_config counter_config; 45 + 46 + enum __force_cpu_type { 47 + reserved = 0, /* do not force */ 48 + timer, 49 + }; 50 + static int force_cpu_type; 51 + 52 + static int set_cpu_type(const char *str, struct kernel_param *kp) 53 + { 54 + if (!strcmp(str, "timer")) { 55 + force_cpu_type = timer; 56 + printk(KERN_INFO "oprofile: forcing timer to be returned " 57 + "as cpu type\n"); 58 + } else { 59 + force_cpu_type = 0; 60 + } 61 + 62 + return 0; 63 + } 64 + module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); 65 + MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" 66 + "(report cpu_type \"timer\""); 46 67 47 68 static int oprofile_hwsampler_start(void) 48 69 { 49 70 int retval; 50 71 51 - hwsampler_running = hwsampler_file; 72 + hwsampler_running = hwsampler_enabled; 52 73 53 74 if (!hwsampler_running) 54 75 return timer_ops.start(); ··· 101 72 return; 102 73 } 103 74 75 + /* 76 + * File ops used for: 77 + * /dev/oprofile/0/enabled 78 + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) 79 + */ 80 + 104 81 static ssize_t hwsampler_read(struct file *file, char __user *buf, 105 82 size_t count, loff_t *offset) 106 83 { 107 - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); 84 + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); 108 85 } 109 86 110 87 static ssize_t hwsampler_write(struct file *file, char const __user *buf, ··· 126 91 if (retval <= 0) 127 92 return retval; 128 93 94 + if (val != 0 && val != 1) 95 + return -EINVAL; 96 + 129 97 if (oprofile_started) 130 98 /* 131 99 * save to do without locking as we set ··· 137 99 */ 138 100 return -EBUSY; 139 101 140 - hwsampler_file = val; 102 + hwsampler_enabled = val; 141 103 142 104 return count; 143 105 } ··· 147 109 .write = hwsampler_write, 148 110 }; 149 111 150 - static int oprofile_create_hwsampling_files(struct super_block *sb, 151 - struct dentry *root) 112 + /* 113 + * File ops used for: 114 + * /dev/oprofile/0/count 115 + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) 116 + * 117 + * Make sure that the value is within the hardware range. 118 + */ 119 + 120 + static ssize_t hw_interval_read(struct file *file, char __user *buf, 121 + size_t count, loff_t *offset) 152 122 { 153 - struct dentry *hw_dir; 123 + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, 124 + count, offset); 125 + } 154 126 155 - /* reinitialize default values */ 156 - hwsampler_file = 1; 127 + static ssize_t hw_interval_write(struct file *file, char const __user *buf, 128 + size_t count, loff_t *offset) 129 + { 130 + unsigned long val; 131 + int retval; 157 132 158 - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); 159 - if (!hw_dir) 133 + if (*offset) 134 + return -EINVAL; 135 + retval = oprofilefs_ulong_from_user(&val, buf, count); 136 + if (retval) 137 + return retval; 138 + if (val < oprofile_min_interval) 139 + oprofile_hw_interval = oprofile_min_interval; 140 + else if (val > oprofile_max_interval) 141 + oprofile_hw_interval = oprofile_max_interval; 142 + else 143 + oprofile_hw_interval = val; 144 + 145 + return count; 146 + } 147 + 148 + static const struct file_operations hw_interval_fops = { 149 + .read = hw_interval_read, 150 + .write = hw_interval_write, 151 + }; 152 + 153 + /* 154 + * File ops used for: 155 + * /dev/oprofile/0/event 156 + * Only a single event with number 0 is supported with this counter. 157 + * 158 + * /dev/oprofile/0/unit_mask 159 + * This is a dummy file needed by the user space tools. 160 + * No value other than 0 is accepted or returned. 161 + */ 162 + 163 + static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, 164 + size_t count, loff_t *offset) 165 + { 166 + return oprofilefs_ulong_to_user(0, buf, count, offset); 167 + } 168 + 169 + static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, 170 + size_t count, loff_t *offset) 171 + { 172 + unsigned long val; 173 + int retval; 174 + 175 + if (*offset) 160 176 return -EINVAL; 161 177 162 - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); 163 - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", 164 - &oprofile_hw_interval); 165 - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", 166 - &oprofile_min_interval); 167 - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", 168 - &oprofile_max_interval); 169 - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", 170 - &oprofile_sdbt_blocks); 178 + retval = oprofilefs_ulong_from_user(&val, buf, count); 179 + if (retval) 180 + return retval; 181 + if (val != 0) 182 + return -EINVAL; 183 + return count; 184 + } 171 185 186 + static const struct file_operations zero_fops = { 187 + .read = hwsampler_zero_read, 188 + .write = hwsampler_zero_write, 189 + }; 190 + 191 + /* /dev/oprofile/0/kernel file ops. */ 192 + 193 + static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, 194 + size_t count, loff_t *offset) 195 + { 196 + return oprofilefs_ulong_to_user(counter_config.kernel, 197 + buf, count, offset); 198 + } 199 + 200 + static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, 201 + size_t count, loff_t *offset) 202 + { 203 + unsigned long val; 204 + int retval; 205 + 206 + if (*offset) 207 + return -EINVAL; 208 + 209 + retval = oprofilefs_ulong_from_user(&val, buf, count); 210 + if (retval) 211 + return retval; 212 + 213 + if (val != 0 && val != 1) 214 + return -EINVAL; 215 + 216 + counter_config.kernel = val; 217 + 218 + return count; 219 + } 220 + 221 + static const struct file_operations kernel_fops = { 222 + .read = hwsampler_kernel_read, 223 + .write = hwsampler_kernel_write, 224 + }; 225 + 226 + /* /dev/oprofile/0/user file ops. */ 227 + 228 + static ssize_t hwsampler_user_read(struct file *file, char __user *buf, 229 + size_t count, loff_t *offset) 230 + { 231 + return oprofilefs_ulong_to_user(counter_config.user, 232 + buf, count, offset); 233 + } 234 + 235 + static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, 236 + size_t count, loff_t *offset) 237 + { 238 + unsigned long val; 239 + int retval; 240 + 241 + if (*offset) 242 + return -EINVAL; 243 + 244 + retval = oprofilefs_ulong_from_user(&val, buf, count); 245 + if (retval) 246 + return retval; 247 + 248 + if (val != 0 && val != 1) 249 + return -EINVAL; 250 + 251 + counter_config.user = val; 252 + 253 + return count; 254 + } 255 + 256 + static const struct file_operations user_fops = { 257 + .read = hwsampler_user_read, 258 + .write = hwsampler_user_write, 259 + }; 260 + 261 + 262 + /* 263 + * File ops used for: /dev/oprofile/timer/enabled 264 + * The value always has to be the inverted value of hwsampler_enabled. So 265 + * no separate variable is created. That way we do not need locking. 266 + */ 267 + 268 + static ssize_t timer_enabled_read(struct file *file, char __user *buf, 269 + size_t count, loff_t *offset) 270 + { 271 + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); 272 + } 273 + 274 + static ssize_t timer_enabled_write(struct file *file, char const __user *buf, 275 + size_t count, loff_t *offset) 276 + { 277 + unsigned long val; 278 + int retval; 279 + 280 + if (*offset) 281 + return -EINVAL; 282 + 283 + retval = oprofilefs_ulong_from_user(&val, buf, count); 284 + if (retval) 285 + return retval; 286 + 287 + if (val != 0 && val != 1) 288 + return -EINVAL; 289 + 290 + /* Timer cannot be disabled without having hardware sampling. */ 291 + if (val == 0 && !hwsampler_available) 292 + return -EINVAL; 293 + 294 + if (oprofile_started) 295 + /* 296 + * save to do without locking as we set 297 + * hwsampler_running in start() when start_mutex is 298 + * held 299 + */ 300 + return -EBUSY; 301 + 302 + hwsampler_enabled = !val; 303 + 304 + return count; 305 + } 306 + 307 + static const struct file_operations timer_enabled_fops = { 308 + .read = timer_enabled_read, 309 + .write = timer_enabled_write, 310 + }; 311 + 312 + 313 + static int oprofile_create_hwsampling_files(struct super_block *sb, 314 + struct dentry *root) 315 + { 316 + struct dentry *dir; 317 + 318 + dir = oprofilefs_mkdir(sb, root, "timer"); 319 + if (!dir) 320 + return -EINVAL; 321 + 322 + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); 323 + 324 + if (!hwsampler_available) 325 + return 0; 326 + 327 + /* reinitialize default values */ 328 + hwsampler_enabled = 1; 329 + counter_config.kernel = 1; 330 + counter_config.user = 1; 331 + 332 + if (!force_cpu_type) { 333 + /* 334 + * Create the counter file system. A single virtual 335 + * counter is created which can be used to 336 + * enable/disable hardware sampling dynamically from 337 + * user space. The user space will configure a single 338 + * counter with a single event. The value of 'event' 339 + * and 'unit_mask' are not evaluated by the kernel code 340 + * and can only be set to 0. 341 + */ 342 + 343 + dir = oprofilefs_mkdir(sb, root, "0"); 344 + if (!dir) 345 + return -EINVAL; 346 + 347 + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); 348 + oprofilefs_create_file(sb, dir, "event", &zero_fops); 349 + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); 350 + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); 351 + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); 352 + oprofilefs_create_file(sb, dir, "user", &user_fops); 353 + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", 354 + &oprofile_sdbt_blocks); 355 + 356 + } else { 357 + /* 358 + * Hardware sampling can be used but the cpu_type is 359 + * forced to timer in order to deal with legacy user 360 + * space tools. The /dev/oprofile/hwsampling fs is 361 + * provided in that case. 362 + */ 363 + dir = oprofilefs_mkdir(sb, root, "hwsampling"); 364 + if (!dir) 365 + return -EINVAL; 366 + 367 + oprofilefs_create_file(sb, dir, "hwsampler", 368 + &hwsampler_fops); 369 + oprofilefs_create_file(sb, dir, "hw_interval", 370 + &hw_interval_fops); 371 + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", 372 + &oprofile_min_interval); 373 + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", 374 + &oprofile_max_interval); 375 + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", 376 + &oprofile_sdbt_blocks); 377 + } 172 378 return 0; 173 379 } 174 380 175 381 static int oprofile_hwsampler_init(struct oprofile_operations *ops) 176 382 { 383 + /* 384 + * Initialize the timer mode infrastructure as well in order 385 + * to be able to switch back dynamically. oprofile_timer_init 386 + * is not supposed to fail. 387 + */ 388 + if (oprofile_timer_init(ops)) 389 + BUG(); 390 + 391 + memcpy(&timer_ops, ops, sizeof(timer_ops)); 392 + ops->create_files = oprofile_create_hwsampling_files; 393 + 394 + /* 395 + * If the user space tools do not support newer cpu types, 396 + * the force_cpu_type module parameter 397 + * can be used to always return \"timer\" as cpu type. 398 + */ 399 + if (force_cpu_type != timer) { 400 + struct cpuid id; 401 + 402 + get_cpu_id (&id); 403 + 404 + switch (id.machine) { 405 + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; 406 + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; 407 + default: return -ENODEV; 408 + } 409 + } 410 + 177 411 if (hwsampler_setup()) 178 412 return -ENODEV; 179 413 180 414 /* 181 - * create hwsampler files only if hwsampler_setup() succeeds. 415 + * Query the range for the sampling interval from the 416 + * hardware. 182 417 */ 183 418 oprofile_min_interval = hwsampler_query_min_interval(); 184 419 if (oprofile_min_interval == 0) ··· 466 155 if (oprofile_hw_interval > oprofile_max_interval) 467 156 oprofile_hw_interval = oprofile_max_interval; 468 157 469 - if (oprofile_timer_init(ops)) 470 - return -ENODEV; 471 - 472 - printk(KERN_INFO "oprofile: using hardware sampling\n"); 473 - 474 - memcpy(&timer_ops, ops, sizeof(timer_ops)); 158 + printk(KERN_INFO "oprofile: System z hardware sampling " 159 + "facility found.\n"); 475 160 476 161 ops->start = oprofile_hwsampler_start; 477 162 ops->stop = oprofile_hwsampler_stop; 478 - ops->create_files = oprofile_create_hwsampling_files; 479 163 480 164 return 0; 481 165 } 482 166 483 167 static void oprofile_hwsampler_exit(void) 484 168 { 485 - oprofile_timer_exit(); 486 169 hwsampler_shutdown(); 487 170 } 488 171 ··· 487 182 ops->backtrace = s390_backtrace; 488 183 489 184 #ifdef CONFIG_64BIT 490 - return oprofile_hwsampler_init(ops); 185 + 186 + /* 187 + * -ENODEV is not reported to the caller. The module itself 188 + * will use the timer mode sampling as fallback and this is 189 + * always available. 190 + */ 191 + hwsampler_available = oprofile_hwsampler_init(ops) == 0; 192 + 193 + return 0; 491 194 #else 492 195 return -ENODEV; 493 196 #endif
+23
arch/s390/oprofile/op_counter.h
··· 1 + /** 2 + * arch/s390/oprofile/op_counter.h 3 + * 4 + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation 5 + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) 6 + * 7 + * @remark Copyright 2011 OProfile authors 8 + */ 9 + 10 + #ifndef OP_COUNTER_H 11 + #define OP_COUNTER_H 12 + 13 + struct op_counter_config { 14 + /* `enabled' maps to the hwsampler_file variable. */ 15 + /* `count' maps to the oprofile_hw_interval variable. */ 16 + /* `event' and `unit_mask' are unused. */ 17 + unsigned long kernel; 18 + unsigned long user; 19 + }; 20 + 21 + extern struct op_counter_config counter_config; 22 + 23 + #endif /* OP_COUNTER_H */
+7
arch/x86/include/asm/insn.h
··· 137 137 return (insn->vex_prefix.value != 0); 138 138 } 139 139 140 + /* Ensure this instruction is decoded completely */ 141 + static inline int insn_complete(struct insn *insn) 142 + { 143 + return insn->opcode.got && insn->modrm.got && insn->sib.got && 144 + insn->displacement.got && insn->immediate.got; 145 + } 146 + 140 147 static inline insn_byte_t insn_vex_m_bits(struct insn *insn) 141 148 { 142 149 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+37 -7
arch/x86/include/asm/perf_event.h
··· 57 57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) 58 58 59 59 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 60 + #define ARCH_PERFMON_EVENTS_COUNT 7 60 61 61 62 /* 62 63 * Intel "Architectural Performance Monitoring" CPUID ··· 73 72 unsigned int full; 74 73 }; 75 74 75 + union cpuid10_ebx { 76 + struct { 77 + unsigned int no_unhalted_core_cycles:1; 78 + unsigned int no_instructions_retired:1; 79 + unsigned int no_unhalted_reference_cycles:1; 80 + unsigned int no_llc_reference:1; 81 + unsigned int no_llc_misses:1; 82 + unsigned int no_branch_instruction_retired:1; 83 + unsigned int no_branch_misses_retired:1; 84 + } split; 85 + unsigned int full; 86 + }; 87 + 76 88 union cpuid10_edx { 77 89 struct { 78 90 unsigned int num_counters_fixed:5; ··· 95 81 unsigned int full; 96 82 }; 97 83 84 + struct x86_pmu_capability { 85 + int version; 86 + int num_counters_gp; 87 + int num_counters_fixed; 88 + int bit_width_gp; 89 + int bit_width_fixed; 90 + unsigned int events_mask; 91 + int events_mask_len; 92 + }; 98 93 99 94 /* 100 95 * Fixed-purpose performance events: ··· 112 89 /* 113 90 * All 3 fixed-mode PMCs are configured via this single MSR: 114 91 */ 115 - #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d 92 + #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d 116 93 117 94 /* 118 95 * The counts are available in three separate MSRs: 119 96 */ 120 97 121 98 /* Instr_Retired.Any: */ 122 - #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 123 - #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) 99 + #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 100 + #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) 124 101 125 102 /* CPU_CLK_Unhalted.Core: */ 126 - #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a 127 - #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) 103 + #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a 104 + #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) 128 105 129 106 /* CPU_CLK_Unhalted.Ref: */ 130 - #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 131 - #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 107 + #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 108 + #define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) 109 + #define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) 132 110 133 111 /* 134 112 * We model BTS tracing as another fixed-mode PMC. ··· 226 202 }; 227 203 228 204 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); 205 + extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); 229 206 #else 230 207 static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 231 208 { 232 209 *nr = 0; 233 210 return NULL; 211 + } 212 + 213 + static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) 214 + { 215 + memset(cap, 0, sizeof(*cap)); 234 216 } 235 217 236 218 static inline void perf_events_lapic_init(void) { }
+205 -57
arch/x86/kernel/cpu/perf_event.c
··· 484 484 return event->pmu == &pmu; 485 485 } 486 486 487 + /* 488 + * Event scheduler state: 489 + * 490 + * Assign events iterating over all events and counters, beginning 491 + * with events with least weights first. Keep the current iterator 492 + * state in struct sched_state. 493 + */ 494 + struct sched_state { 495 + int weight; 496 + int event; /* event index */ 497 + int counter; /* counter index */ 498 + int unassigned; /* number of events to be assigned left */ 499 + unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 500 + }; 501 + 502 + /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ 503 + #define SCHED_STATES_MAX 2 504 + 505 + struct perf_sched { 506 + int max_weight; 507 + int max_events; 508 + struct event_constraint **constraints; 509 + struct sched_state state; 510 + int saved_states; 511 + struct sched_state saved[SCHED_STATES_MAX]; 512 + }; 513 + 514 + /* 515 + * Initialize interator that runs through all events and counters. 516 + */ 517 + static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, 518 + int num, int wmin, int wmax) 519 + { 520 + int idx; 521 + 522 + memset(sched, 0, sizeof(*sched)); 523 + sched->max_events = num; 524 + sched->max_weight = wmax; 525 + sched->constraints = c; 526 + 527 + for (idx = 0; idx < num; idx++) { 528 + if (c[idx]->weight == wmin) 529 + break; 530 + } 531 + 532 + sched->state.event = idx; /* start with min weight */ 533 + sched->state.weight = wmin; 534 + sched->state.unassigned = num; 535 + } 536 + 537 + static void perf_sched_save_state(struct perf_sched *sched) 538 + { 539 + if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) 540 + return; 541 + 542 + sched->saved[sched->saved_states] = sched->state; 543 + sched->saved_states++; 544 + } 545 + 546 + static bool perf_sched_restore_state(struct perf_sched *sched) 547 + { 548 + if (!sched->saved_states) 549 + return false; 550 + 551 + sched->saved_states--; 552 + sched->state = sched->saved[sched->saved_states]; 553 + 554 + /* continue with next counter: */ 555 + clear_bit(sched->state.counter++, sched->state.used); 556 + 557 + return true; 558 + } 559 + 560 + /* 561 + * Select a counter for the current event to schedule. Return true on 562 + * success. 563 + */ 564 + static bool __perf_sched_find_counter(struct perf_sched *sched) 565 + { 566 + struct event_constraint *c; 567 + int idx; 568 + 569 + if (!sched->state.unassigned) 570 + return false; 571 + 572 + if (sched->state.event >= sched->max_events) 573 + return false; 574 + 575 + c = sched->constraints[sched->state.event]; 576 + 577 + /* Prefer fixed purpose counters */ 578 + if (x86_pmu.num_counters_fixed) { 579 + idx = X86_PMC_IDX_FIXED; 580 + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { 581 + if (!__test_and_set_bit(idx, sched->state.used)) 582 + goto done; 583 + } 584 + } 585 + /* Grab the first unused counter starting with idx */ 586 + idx = sched->state.counter; 587 + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { 588 + if (!__test_and_set_bit(idx, sched->state.used)) 589 + goto done; 590 + } 591 + 592 + return false; 593 + 594 + done: 595 + sched->state.counter = idx; 596 + 597 + if (c->overlap) 598 + perf_sched_save_state(sched); 599 + 600 + return true; 601 + } 602 + 603 + static bool perf_sched_find_counter(struct perf_sched *sched) 604 + { 605 + while (!__perf_sched_find_counter(sched)) { 606 + if (!perf_sched_restore_state(sched)) 607 + return false; 608 + } 609 + 610 + return true; 611 + } 612 + 613 + /* 614 + * Go through all unassigned events and find the next one to schedule. 615 + * Take events with the least weight first. Return true on success. 616 + */ 617 + static bool perf_sched_next_event(struct perf_sched *sched) 618 + { 619 + struct event_constraint *c; 620 + 621 + if (!sched->state.unassigned || !--sched->state.unassigned) 622 + return false; 623 + 624 + do { 625 + /* next event */ 626 + sched->state.event++; 627 + if (sched->state.event >= sched->max_events) { 628 + /* next weight */ 629 + sched->state.event = 0; 630 + sched->state.weight++; 631 + if (sched->state.weight > sched->max_weight) 632 + return false; 633 + } 634 + c = sched->constraints[sched->state.event]; 635 + } while (c->weight != sched->state.weight); 636 + 637 + sched->state.counter = 0; /* start with first counter */ 638 + 639 + return true; 640 + } 641 + 642 + /* 643 + * Assign a counter for each event. 644 + */ 645 + static int perf_assign_events(struct event_constraint **constraints, int n, 646 + int wmin, int wmax, int *assign) 647 + { 648 + struct perf_sched sched; 649 + 650 + perf_sched_init(&sched, constraints, n, wmin, wmax); 651 + 652 + do { 653 + if (!perf_sched_find_counter(&sched)) 654 + break; /* failed */ 655 + if (assign) 656 + assign[sched.state.event] = sched.state.counter; 657 + } while (perf_sched_next_event(&sched)); 658 + 659 + return sched.state.unassigned; 660 + } 661 + 487 662 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 488 663 { 489 664 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 490 665 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 491 - int i, j, w, wmax, num = 0; 666 + int i, wmin, wmax, num = 0; 492 667 struct hw_perf_event *hwc; 493 668 494 669 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 495 670 496 - for (i = 0; i < n; i++) { 671 + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 497 672 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 498 673 constraints[i] = c; 674 + wmin = min(wmin, c->weight); 675 + wmax = max(wmax, c->weight); 499 676 } 500 677 501 678 /* ··· 698 521 if (assign) 699 522 assign[i] = hwc->idx; 700 523 } 701 - if (i == n) 702 - goto done; 703 524 704 - /* 705 - * begin slow path 706 - */ 525 + /* slow path */ 526 + if (i != n) 527 + num = perf_assign_events(constraints, n, wmin, wmax, assign); 707 528 708 - bitmap_zero(used_mask, X86_PMC_IDX_MAX); 709 - 710 - /* 711 - * weight = number of possible counters 712 - * 713 - * 1 = most constrained, only works on one counter 714 - * wmax = least constrained, works on any counter 715 - * 716 - * assign events to counters starting with most 717 - * constrained events. 718 - */ 719 - wmax = x86_pmu.num_counters; 720 - 721 - /* 722 - * when fixed event counters are present, 723 - * wmax is incremented by 1 to account 724 - * for one more choice 725 - */ 726 - if (x86_pmu.num_counters_fixed) 727 - wmax++; 728 - 729 - for (w = 1, num = n; num && w <= wmax; w++) { 730 - /* for each event */ 731 - for (i = 0; num && i < n; i++) { 732 - c = constraints[i]; 733 - hwc = &cpuc->event_list[i]->hw; 734 - 735 - if (c->weight != w) 736 - continue; 737 - 738 - for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 739 - if (!test_bit(j, used_mask)) 740 - break; 741 - } 742 - 743 - if (j == X86_PMC_IDX_MAX) 744 - break; 745 - 746 - __set_bit(j, used_mask); 747 - 748 - if (assign) 749 - assign[i] = j; 750 - num--; 751 - } 752 - } 753 - done: 754 529 /* 755 530 * scheduling failed or is just a simulation, 756 531 * free resources if necessary ··· 1248 1119 1249 1120 static int __init init_hw_perf_events(void) 1250 1121 { 1122 + struct x86_pmu_quirk *quirk; 1251 1123 struct event_constraint *c; 1252 1124 int err; 1253 1125 ··· 1277 1147 1278 1148 pr_cont("%s PMU driver.\n", x86_pmu.name); 1279 1149 1280 - if (x86_pmu.quirks) 1281 - x86_pmu.quirks(); 1150 + for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) 1151 + quirk->func(); 1282 1152 1283 1153 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1284 1154 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", ··· 1301 1171 1302 1172 unconstrained = (struct event_constraint) 1303 1173 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1304 - 0, x86_pmu.num_counters); 1174 + 0, x86_pmu.num_counters, 0); 1305 1175 1306 1176 if (x86_pmu.event_constraints) { 1177 + /* 1178 + * event on fixed counter2 (REF_CYCLES) only works on this 1179 + * counter, so do not extend mask to generic counters 1180 + */ 1307 1181 for_each_event_constraint(c, x86_pmu.event_constraints) { 1308 - if (c->cmask != X86_RAW_EVENT_MASK) 1182 + if (c->cmask != X86_RAW_EVENT_MASK 1183 + || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { 1309 1184 continue; 1185 + } 1310 1186 1311 1187 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 1312 1188 c->weight += x86_pmu.num_counters; ··· 1702 1566 1703 1567 return misc; 1704 1568 } 1569 + 1570 + void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) 1571 + { 1572 + cap->version = x86_pmu.version; 1573 + cap->num_counters_gp = x86_pmu.num_counters; 1574 + cap->num_counters_fixed = x86_pmu.num_counters_fixed; 1575 + cap->bit_width_gp = x86_pmu.cntval_bits; 1576 + cap->bit_width_fixed = x86_pmu.cntval_bits; 1577 + cap->events_mask = (unsigned int)x86_pmu.events_maskl; 1578 + cap->events_mask_len = x86_pmu.events_mask_len; 1579 + } 1580 + EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
+48 -3
arch/x86/kernel/cpu/perf_event.h
··· 45 45 u64 code; 46 46 u64 cmask; 47 47 int weight; 48 + int overlap; 48 49 }; 49 50 50 51 struct amd_nb { ··· 152 151 void *kfree_on_online; 153 152 }; 154 153 155 - #define __EVENT_CONSTRAINT(c, n, m, w) {\ 154 + #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ 156 155 { .idxmsk64 = (n) }, \ 157 156 .code = (c), \ 158 157 .cmask = (m), \ 159 158 .weight = (w), \ 159 + .overlap = (o), \ 160 160 } 161 161 162 162 #define EVENT_CONSTRAINT(c, n, m) \ 163 - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 163 + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) 164 + 165 + /* 166 + * The overlap flag marks event constraints with overlapping counter 167 + * masks. This is the case if the counter mask of such an event is not 168 + * a subset of any other counter mask of a constraint with an equal or 169 + * higher weight, e.g.: 170 + * 171 + * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 172 + * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); 173 + * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); 174 + * 175 + * The event scheduler may not select the correct counter in the first 176 + * cycle because it needs to know which subsequent events will be 177 + * scheduled. It may fail to schedule the events then. So we set the 178 + * overlap flag for such constraints to give the scheduler a hint which 179 + * events to select for counter rescheduling. 180 + * 181 + * Care must be taken as the rescheduling algorithm is O(n!) which 182 + * will increase scheduling cycles for an over-commited system 183 + * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros 184 + * and its counter masks must be kept at a minimum. 185 + */ 186 + #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ 187 + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) 164 188 165 189 /* 166 190 * Constraint on the Event code. ··· 261 235 u64 capabilities; 262 236 }; 263 237 238 + struct x86_pmu_quirk { 239 + struct x86_pmu_quirk *next; 240 + void (*func)(void); 241 + }; 242 + 264 243 /* 265 244 * struct x86_pmu - generic x86 pmu 266 245 */ ··· 290 259 int num_counters_fixed; 291 260 int cntval_bits; 292 261 u64 cntval_mask; 262 + union { 263 + unsigned long events_maskl; 264 + unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; 265 + }; 266 + int events_mask_len; 293 267 int apic; 294 268 u64 max_period; 295 269 struct event_constraint * ··· 304 268 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 305 269 struct perf_event *event); 306 270 struct event_constraint *event_constraints; 307 - void (*quirks)(void); 271 + struct x86_pmu_quirk *quirks; 308 272 int perfctr_second_write; 309 273 310 274 int (*cpu_prepare)(int cpu); ··· 344 308 */ 345 309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 346 310 }; 311 + 312 + #define x86_add_quirk(func_) \ 313 + do { \ 314 + static struct x86_pmu_quirk __quirk __initdata = { \ 315 + .func = func_, \ 316 + }; \ 317 + __quirk.next = x86_pmu.quirks; \ 318 + x86_pmu.quirks = &__quirk; \ 319 + } while (0) 347 320 348 321 #define ERF_NO_HT_SHARING 1 349 322 #define ERF_HAS_RSP_1 2
+1 -1
arch/x86/kernel/cpu/perf_event_amd.c
··· 492 492 static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 493 493 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 494 494 static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 495 - static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); 495 + static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 496 496 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 497 497 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 498 498
+61 -27
arch/x86/kernel/cpu/perf_event_intel.c
··· 28 28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 29 29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 30 30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 31 + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ 31 32 }; 32 33 33 34 static struct event_constraint intel_core_event_constraints[] __read_mostly = ··· 46 45 { 47 46 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 48 47 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 49 - /* 50 - * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event 51 - * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed 52 - * ratio between these counters. 53 - */ 54 - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 48 + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 55 49 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 56 50 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 57 51 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ ··· 64 68 { 65 69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 66 70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 67 - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 71 + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 68 72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 69 73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 70 74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ ··· 86 90 { 87 91 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 88 92 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 89 - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 93 + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 90 94 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 91 95 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 92 96 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ ··· 98 102 { 99 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 100 104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 101 - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 105 + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 102 106 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 103 107 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 104 108 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ ··· 121 125 { 122 126 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 123 127 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 124 - /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 128 + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 125 129 EVENT_CONSTRAINT_END 126 130 }; 127 131 ··· 1515 1519 .guest_get_msrs = intel_guest_get_msrs, 1516 1520 }; 1517 1521 1518 - static void intel_clovertown_quirks(void) 1522 + static __init void intel_clovertown_quirk(void) 1519 1523 { 1520 1524 /* 1521 1525 * PEBS is unreliable due to: ··· 1541 1545 x86_pmu.pebs_constraints = NULL; 1542 1546 } 1543 1547 1544 - static void intel_sandybridge_quirks(void) 1548 + static __init void intel_sandybridge_quirk(void) 1545 1549 { 1546 1550 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); 1547 1551 x86_pmu.pebs = 0; 1548 1552 x86_pmu.pebs_constraints = NULL; 1549 1553 } 1550 1554 1555 + static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { 1556 + { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, 1557 + { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, 1558 + { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, 1559 + { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, 1560 + { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, 1561 + { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, 1562 + { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, 1563 + }; 1564 + 1565 + static __init void intel_arch_events_quirk(void) 1566 + { 1567 + int bit; 1568 + 1569 + /* disable event that reported as not presend by cpuid */ 1570 + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { 1571 + intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; 1572 + printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", 1573 + intel_arch_events_map[bit].name); 1574 + } 1575 + } 1576 + 1577 + static __init void intel_nehalem_quirk(void) 1578 + { 1579 + union cpuid10_ebx ebx; 1580 + 1581 + ebx.full = x86_pmu.events_maskl; 1582 + if (ebx.split.no_branch_misses_retired) { 1583 + /* 1584 + * Erratum AAJ80 detected, we work it around by using 1585 + * the BR_MISP_EXEC.ANY event. This will over-count 1586 + * branch-misses, but it's still much better than the 1587 + * architectural event which is often completely bogus: 1588 + */ 1589 + intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 1590 + ebx.split.no_branch_misses_retired = 0; 1591 + x86_pmu.events_maskl = ebx.full; 1592 + printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); 1593 + } 1594 + } 1595 + 1551 1596 __init int intel_pmu_init(void) 1552 1597 { 1553 1598 union cpuid10_edx edx; 1554 1599 union cpuid10_eax eax; 1600 + union cpuid10_ebx ebx; 1555 1601 unsigned int unused; 1556 - unsigned int ebx; 1557 1602 int version; 1558 1603 1559 1604 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { ··· 1611 1574 * Check whether the Architectural PerfMon supports 1612 1575 * Branch Misses Retired hw_event or not. 1613 1576 */ 1614 - cpuid(10, &eax.full, &ebx, &unused, &edx.full); 1615 - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) 1577 + cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); 1578 + if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) 1616 1579 return -ENODEV; 1617 1580 1618 1581 version = eax.split.version_id; ··· 1625 1588 x86_pmu.num_counters = eax.split.num_counters; 1626 1589 x86_pmu.cntval_bits = eax.split.bit_width; 1627 1590 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 1591 + 1592 + x86_pmu.events_maskl = ebx.full; 1593 + x86_pmu.events_mask_len = eax.split.mask_length; 1628 1594 1629 1595 /* 1630 1596 * Quirk: v2 perfmon does not report fixed-purpose events, so ··· 1648 1608 1649 1609 intel_ds_init(); 1650 1610 1611 + x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ 1612 + 1651 1613 /* 1652 1614 * Install the hw-cache-events table: 1653 1615 */ ··· 1659 1617 break; 1660 1618 1661 1619 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1662 - x86_pmu.quirks = intel_clovertown_quirks; 1620 + x86_add_quirk(intel_clovertown_quirk); 1663 1621 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1664 1622 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1665 1623 case 29: /* six-core 45 nm xeon "Dunnington" */ ··· 1693 1651 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1694 1652 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1695 1653 1696 - if (ebx & 0x40) { 1697 - /* 1698 - * Erratum AAJ80 detected, we work it around by using 1699 - * the BR_MISP_EXEC.ANY event. This will over-count 1700 - * branch-misses, but it's still much better than the 1701 - * architectural event which is often completely bogus: 1702 - */ 1703 - intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 1654 + x86_add_quirk(intel_nehalem_quirk); 1704 1655 1705 - pr_cont("erratum AAJ80 worked around, "); 1706 - } 1707 1656 pr_cont("Nehalem events, "); 1708 1657 break; 1709 1658 ··· 1734 1701 break; 1735 1702 1736 1703 case 42: /* SandyBridge */ 1737 - x86_pmu.quirks = intel_sandybridge_quirks; 1704 + x86_add_quirk(intel_sandybridge_quirk); 1738 1705 case 45: /* SandyBridge, "Romely-EP" */ 1739 1706 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1740 1707 sizeof(hw_cache_event_ids)); ··· 1771 1738 break; 1772 1739 } 1773 1740 } 1741 + 1774 1742 return 0; 1775 1743 }
+1 -1
arch/x86/kernel/jump_label.c
··· 50 50 put_online_cpus(); 51 51 } 52 52 53 - void arch_jump_label_transform_static(struct jump_entry *entry, 53 + __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, 54 54 enum jump_label_type type) 55 55 { 56 56 __jump_label_transform(entry, type, text_poke_early);
+8 -1
arch/x86/lib/inat.c
··· 82 82 const insn_attr_t *table; 83 83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) 84 84 return 0; 85 - table = inat_avx_tables[vex_m][vex_p]; 85 + /* At first, this checks the master table */ 86 + table = inat_avx_tables[vex_m][0]; 86 87 if (!table) 87 88 return 0; 89 + if (!inat_is_group(table[opcode]) && vex_p) { 90 + /* If this is not a group, get attribute directly */ 91 + table = inat_avx_tables[vex_m][vex_p]; 92 + if (!table) 93 + return 0; 94 + } 88 95 return table[opcode]; 89 96 } 90 97
+3 -1
arch/x86/lib/insn.c
··· 202 202 m = insn_vex_m_bits(insn); 203 203 p = insn_vex_p_bits(insn); 204 204 insn->attr = inat_get_avx_attribute(op, m, p); 205 - if (!inat_accept_vex(insn->attr)) 205 + if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) 206 206 insn->attr = 0; /* This instruction is bad */ 207 207 goto end; /* VEX has only 1 byte for opcode */ 208 208 } ··· 249 249 pfx = insn_last_prefix(insn); 250 250 insn->attr = inat_get_group_attribute(mod, pfx, 251 251 insn->attr); 252 + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) 253 + insn->attr = 0; /* This is bad */ 252 254 } 253 255 } 254 256
+333 -273
arch/x86/lib/x86-opcode-map.txt
··· 1 1 # x86 Opcode Maps 2 2 # 3 + # This is (mostly) based on following documentations. 4 + # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 5 + # (#325383-040US, October 2011) 6 + # - Intel(R) Advanced Vector Extensions Programming Reference 7 + # (#319433-011,JUNE 2011). 8 + # 3 9 #<Opcode maps> 4 10 # Table: table-name 5 11 # Referrer: escaped-name ··· 21 15 # EndTable 22 16 # 23 17 # AVX Superscripts 24 - # (VEX): this opcode can accept VEX prefix. 25 - # (oVEX): this opcode requires VEX prefix. 26 - # (o128): this opcode only supports 128bit VEX. 27 - # (o256): this opcode only supports 256bit VEX. 18 + # (v): this opcode requires VEX prefix. 19 + # (v1): this opcode only supports 128bit VEX. 20 + # 21 + # Last Prefix Superscripts 22 + # - (66): the last prefix is 0x66 23 + # - (F3): the last prefix is 0xF3 24 + # - (F2): the last prefix is 0xF2 28 25 # 29 26 30 27 Table: one byte opcode ··· 208 199 a1: MOV rAX,Ov 209 200 a2: MOV Ob,AL 210 201 a3: MOV Ov,rAX 211 - a4: MOVS/B Xb,Yb 212 - a5: MOVS/W/D/Q Xv,Yv 202 + a4: MOVS/B Yb,Xb 203 + a5: MOVS/W/D/Q Yv,Xv 213 204 a6: CMPS/B Xb,Yb 214 205 a7: CMPS/W/D Xv,Yv 215 206 a8: TEST AL,Ib ··· 242 233 c1: Grp2 Ev,Ib (1A) 243 234 c2: RETN Iw (f64) 244 235 c3: RETN 245 - c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) 246 - c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) 236 + c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) 237 + c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) 247 238 c6: Grp11 Eb,Ib (1A) 248 239 c7: Grp11 Ev,Iz (1A) 249 240 c8: ENTER Iw,Ib ··· 329 320 # 3DNow! uses the last imm byte as opcode extension. 330 321 0f: 3DNow! Pq,Qq,Ib 331 322 # 0x0f 0x10-0x1f 332 - 10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) 333 - 11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) 334 - 12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) 335 - 13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) 336 - 14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) 337 - 15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) 338 - 16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) 339 - 17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 323 + # NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands 324 + # but it actually has operands. And also, vmovss and vmovsd only accept 128bit. 325 + # MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. 326 + # Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming 327 + # Reference A.1 328 + 10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) 329 + 11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) 330 + 12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) 331 + 13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) 332 + 14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) 333 + 15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) 334 + 16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) 335 + 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 340 336 18: Grp16 (1A) 341 337 19: 342 338 1a: ··· 359 345 25: 360 346 26: 361 347 27: 362 - 28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) 363 - 29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) 364 - 2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) 365 - 2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) 366 - 2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) 367 - 2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) 368 - 2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) 369 - 2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) 348 + 28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) 349 + 29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) 350 + 2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) 351 + 2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) 352 + 2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) 353 + 2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) 354 + 2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) 355 + 2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) 370 356 # 0x0f 0x30-0x3f 371 357 30: WRMSR 372 358 31: RDTSC ··· 402 388 4e: CMOVLE/NG Gv,Ev 403 389 4f: CMOVNLE/G Gv,Ev 404 390 # 0x0f 0x50-0x5f 405 - 50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) 406 - 51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) 407 - 52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) 408 - 53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) 409 - 54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) 410 - 55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) 411 - 56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) 412 - 57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) 413 - 58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) 414 - 59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) 415 - 5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) 416 - 5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) 417 - 5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) 418 - 5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) 419 - 5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) 420 - 5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) 391 + 50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) 392 + 51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) 393 + 52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) 394 + 53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) 395 + 54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) 396 + 55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) 397 + 56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) 398 + 57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) 399 + 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) 400 + 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) 401 + 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) 402 + 5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 403 + 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) 404 + 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) 405 + 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) 406 + 5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) 421 407 # 0x0f 0x60-0x6f 422 - 60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) 423 - 61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) 424 - 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) 425 - 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) 426 - 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) 427 - 65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) 428 - 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) 429 - 67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) 430 - 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) 431 - 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) 432 - 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) 433 - 6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) 434 - 6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) 435 - 6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) 436 - 6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) 437 - 6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) 408 + 60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) 409 + 61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) 410 + 62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) 411 + 63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) 412 + 64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) 413 + 65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) 414 + 66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) 415 + 67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) 416 + 68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) 417 + 69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) 418 + 6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) 419 + 6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) 420 + 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) 421 + 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) 422 + 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) 423 + 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) 438 424 # 0x0f 0x70-0x7f 439 - 70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 425 + 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 440 426 71: Grp12 (1A) 441 427 72: Grp13 (1A) 442 428 73: Grp14 (1A) 443 - 74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) 444 - 75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) 445 - 76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) 446 - 77: emms/vzeroupper/vzeroall (VEX) 447 - 78: VMREAD Ed/q,Gd/q 448 - 79: VMWRITE Gd/q,Ed/q 429 + 74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) 430 + 75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) 431 + 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) 432 + # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. 433 + 77: emms | vzeroupper | vzeroall 434 + 78: VMREAD Ey,Gy 435 + 79: VMWRITE Gy,Ey 449 436 7a: 450 437 7b: 451 - 7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) 452 - 7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) 453 - 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) 454 - 7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) 438 + 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) 439 + 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) 440 + 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) 441 + 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) 455 442 # 0x0f 0x80-0x8f 456 443 80: JO Jz (f64) 457 444 81: JNO Jz (f64) 458 - 82: JB/JNAE/JC Jz (f64) 459 - 83: JNB/JAE/JNC Jz (f64) 460 - 84: JZ/JE Jz (f64) 461 - 85: JNZ/JNE Jz (f64) 445 + 82: JB/JC/JNAE Jz (f64) 446 + 83: JAE/JNB/JNC Jz (f64) 447 + 84: JE/JZ Jz (f64) 448 + 85: JNE/JNZ Jz (f64) 462 449 86: JBE/JNA Jz (f64) 463 - 87: JNBE/JA Jz (f64) 450 + 87: JA/JNBE Jz (f64) 464 451 88: JS Jz (f64) 465 452 89: JNS Jz (f64) 466 453 8a: JP/JPE Jz (f64) ··· 517 502 b9: Grp10 (1A) 518 503 ba: Grp8 Ev,Ib (1A) 519 504 bb: BTC Ev,Gv 520 - bc: BSF Gv,Ev 521 - bd: BSR Gv,Ev 505 + bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) 506 + bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) 522 507 be: MOVSX Gv,Eb 523 508 bf: MOVSX Gv,Ew 524 509 # 0x0f 0xc0-0xcf 525 510 c0: XADD Eb,Gb 526 511 c1: XADD Ev,Gv 527 - c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) 528 - c3: movnti Md/q,Gd/q 529 - c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) 530 - c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) 531 - c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) 512 + c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) 513 + c3: movnti My,Gy 514 + c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) 515 + c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) 516 + c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) 532 517 c7: Grp9 (1A) 533 518 c8: BSWAP RAX/EAX/R8/R8D 534 519 c9: BSWAP RCX/ECX/R9/R9D ··· 539 524 ce: BSWAP RSI/ESI/R14/R14D 540 525 cf: BSWAP RDI/EDI/R15/R15D 541 526 # 0x0f 0xd0-0xdf 542 - d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) 543 - d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) 544 - d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) 545 - d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) 546 - d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) 547 - d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) 548 - d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) 549 - d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) 550 - d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) 551 - d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) 552 - da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) 553 - db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) 554 - dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) 555 - dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) 556 - de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) 557 - df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) 527 + d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) 528 + d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) 529 + d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) 530 + d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) 531 + d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) 532 + d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) 533 + d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) 534 + d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) 535 + d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) 536 + d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) 537 + da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) 538 + db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) 539 + dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) 540 + dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) 541 + de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) 542 + df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) 558 543 # 0x0f 0xe0-0xef 559 - e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) 560 - e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) 561 - e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) 562 - e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) 563 - e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) 564 - e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) 565 - e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) 566 - e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) 567 - e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) 568 - e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) 569 - ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) 570 - eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) 571 - ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) 572 - ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) 573 - ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) 574 - ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) 544 + e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) 545 + e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) 546 + e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) 547 + e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) 548 + e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) 549 + e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) 550 + e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) 551 + e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) 552 + e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) 553 + e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) 554 + ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) 555 + eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) 556 + ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) 557 + ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) 558 + ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) 559 + ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) 575 560 # 0x0f 0xf0-0xff 576 - f0: lddqu Vdq,Mdq (F2),(VEX) 577 - f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) 578 - f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) 579 - f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) 580 - f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) 581 - f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) 582 - f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) 583 - f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) 584 - f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) 585 - f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) 586 - fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) 587 - fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) 588 - fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) 589 - fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) 590 - fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) 561 + f0: vlddqu Vx,Mx (F2) 562 + f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) 563 + f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) 564 + f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) 565 + f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) 566 + f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) 567 + f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) 568 + f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) 569 + f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) 570 + f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) 571 + fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) 572 + fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) 573 + fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 574 + fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 575 + fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 591 576 ff: 592 577 EndTable 593 578 ··· 595 580 Referrer: 3-byte escape 1 596 581 AVXcode: 2 597 582 # 0x0f 0x38 0x00-0x0f 598 - 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) 599 - 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) 600 - 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) 601 - 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) 602 - 04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) 603 - 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) 604 - 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) 605 - 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) 606 - 08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) 607 - 09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) 608 - 0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) 609 - 0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) 610 - 0c: Vpermilps /r (66),(oVEX) 611 - 0d: Vpermilpd /r (66),(oVEX) 612 - 0e: vtestps /r (66),(oVEX) 613 - 0f: vtestpd /r (66),(oVEX) 583 + 00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) 584 + 01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) 585 + 02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) 586 + 03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) 587 + 04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) 588 + 05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) 589 + 06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) 590 + 07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) 591 + 08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) 592 + 09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) 593 + 0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) 594 + 0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) 595 + 0c: vpermilps Vx,Hx,Wx (66),(v) 596 + 0d: vpermilpd Vx,Hx,Wx (66),(v) 597 + 0e: vtestps Vx,Wx (66),(v) 598 + 0f: vtestpd Vx,Wx (66),(v) 614 599 # 0x0f 0x38 0x10-0x1f 615 600 10: pblendvb Vdq,Wdq (66) 616 601 11: 617 602 12: 618 - 13: 603 + 13: vcvtph2ps Vx,Wx,Ib (66),(v) 619 604 14: blendvps Vdq,Wdq (66) 620 605 15: blendvpd Vdq,Wdq (66) 621 - 16: 622 - 17: ptest Vdq,Wdq (66),(VEX) 623 - 18: vbroadcastss /r (66),(oVEX) 624 - 19: vbroadcastsd /r (66),(oVEX),(o256) 625 - 1a: vbroadcastf128 /r (66),(oVEX),(o256) 606 + 16: vpermps Vqq,Hqq,Wqq (66),(v) 607 + 17: vptest Vx,Wx (66) 608 + 18: vbroadcastss Vx,Wd (66),(v) 609 + 19: vbroadcastsd Vqq,Wq (66),(v) 610 + 1a: vbroadcastf128 Vqq,Mdq (66),(v) 626 611 1b: 627 - 1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) 628 - 1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) 629 - 1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 612 + 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) 613 + 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) 614 + 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) 630 615 1f: 631 616 # 0x0f 0x38 0x20-0x2f 632 - 20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) 633 - 21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) 634 - 22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) 635 - 23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) 636 - 24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) 637 - 25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 617 + 20: vpmovsxbw Vx,Ux/Mq (66),(v1) 618 + 21: vpmovsxbd Vx,Ux/Md (66),(v1) 619 + 22: vpmovsxbq Vx,Ux/Mw (66),(v1) 620 + 23: vpmovsxwd Vx,Ux/Mq (66),(v1) 621 + 24: vpmovsxwq Vx,Ux/Md (66),(v1) 622 + 25: vpmovsxdq Vx,Ux/Mq (66),(v1) 638 623 26: 639 624 27: 640 - 28: pmuldq Vdq,Wdq (66),(VEX),(o128) 641 - 29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) 642 - 2a: movntdqa Vdq,Mdq (66),(VEX),(o128) 643 - 2b: packusdw Vdq,Wdq (66),(VEX),(o128) 644 - 2c: vmaskmovps(ld) /r (66),(oVEX) 645 - 2d: vmaskmovpd(ld) /r (66),(oVEX) 646 - 2e: vmaskmovps(st) /r (66),(oVEX) 647 - 2f: vmaskmovpd(st) /r (66),(oVEX) 625 + 28: vpmuldq Vx,Hx,Wx (66),(v1) 626 + 29: vpcmpeqq Vx,Hx,Wx (66),(v1) 627 + 2a: vmovntdqa Vx,Mx (66),(v1) 628 + 2b: vpackusdw Vx,Hx,Wx (66),(v1) 629 + 2c: vmaskmovps Vx,Hx,Mx (66),(v) 630 + 2d: vmaskmovpd Vx,Hx,Mx (66),(v) 631 + 2e: vmaskmovps Mx,Hx,Vx (66),(v) 632 + 2f: vmaskmovpd Mx,Hx,Vx (66),(v) 648 633 # 0x0f 0x38 0x30-0x3f 649 - 30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) 650 - 31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) 651 - 32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) 652 - 33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) 653 - 34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) 654 - 35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 655 - 36: 656 - 37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) 657 - 38: pminsb Vdq,Wdq (66),(VEX),(o128) 658 - 39: pminsd Vdq,Wdq (66),(VEX),(o128) 659 - 3a: pminuw Vdq,Wdq (66),(VEX),(o128) 660 - 3b: pminud Vdq,Wdq (66),(VEX),(o128) 661 - 3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) 662 - 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 663 - 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 664 - 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) 634 + 30: vpmovzxbw Vx,Ux/Mq (66),(v1) 635 + 31: vpmovzxbd Vx,Ux/Md (66),(v1) 636 + 32: vpmovzxbq Vx,Ux/Mw (66),(v1) 637 + 33: vpmovzxwd Vx,Ux/Mq (66),(v1) 638 + 34: vpmovzxwq Vx,Ux/Md (66),(v1) 639 + 35: vpmovzxdq Vx,Ux/Mq (66),(v1) 640 + 36: vpermd Vqq,Hqq,Wqq (66),(v) 641 + 37: vpcmpgtq Vx,Hx,Wx (66),(v1) 642 + 38: vpminsb Vx,Hx,Wx (66),(v1) 643 + 39: vpminsd Vx,Hx,Wx (66),(v1) 644 + 3a: vpminuw Vx,Hx,Wx (66),(v1) 645 + 3b: vpminud Vx,Hx,Wx (66),(v1) 646 + 3c: vpmaxsb Vx,Hx,Wx (66),(v1) 647 + 3d: vpmaxsd Vx,Hx,Wx (66),(v1) 648 + 3e: vpmaxuw Vx,Hx,Wx (66),(v1) 649 + 3f: vpmaxud Vx,Hx,Wx (66),(v1) 665 650 # 0x0f 0x38 0x40-0x8f 666 - 40: pmulld Vdq,Wdq (66),(VEX),(o128) 667 - 41: phminposuw Vdq,Wdq (66),(VEX),(o128) 668 - 80: INVEPT Gd/q,Mdq (66) 669 - 81: INVPID Gd/q,Mdq (66) 651 + 40: vpmulld Vx,Hx,Wx (66),(v1) 652 + 41: vphminposuw Vdq,Wdq (66),(v1) 653 + 42: 654 + 43: 655 + 44: 656 + 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 657 + 46: vpsravd Vx,Hx,Wx (66),(v) 658 + 47: vpsllvd/q Vx,Hx,Wx (66),(v) 659 + # Skip 0x48-0x57 660 + 58: vpbroadcastd Vx,Wx (66),(v) 661 + 59: vpbroadcastq Vx,Wx (66),(v) 662 + 5a: vbroadcasti128 Vqq,Mdq (66),(v) 663 + # Skip 0x5b-0x77 664 + 78: vpbroadcastb Vx,Wx (66),(v) 665 + 79: vpbroadcastw Vx,Wx (66),(v) 666 + # Skip 0x7a-0x7f 667 + 80: INVEPT Gy,Mdq (66) 668 + 81: INVPID Gy,Mdq (66) 669 + 82: INVPCID Gy,Mdq (66) 670 + 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) 671 + 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) 670 672 # 0x0f 0x38 0x90-0xbf (FMA) 671 - 96: vfmaddsub132pd/ps /r (66),(VEX) 672 - 97: vfmsubadd132pd/ps /r (66),(VEX) 673 - 98: vfmadd132pd/ps /r (66),(VEX) 674 - 99: vfmadd132sd/ss /r (66),(VEX),(o128) 675 - 9a: vfmsub132pd/ps /r (66),(VEX) 676 - 9b: vfmsub132sd/ss /r (66),(VEX),(o128) 677 - 9c: vfnmadd132pd/ps /r (66),(VEX) 678 - 9d: vfnmadd132sd/ss /r (66),(VEX),(o128) 679 - 9e: vfnmsub132pd/ps /r (66),(VEX) 680 - 9f: vfnmsub132sd/ss /r (66),(VEX),(o128) 681 - a6: vfmaddsub213pd/ps /r (66),(VEX) 682 - a7: vfmsubadd213pd/ps /r (66),(VEX) 683 - a8: vfmadd213pd/ps /r (66),(VEX) 684 - a9: vfmadd213sd/ss /r (66),(VEX),(o128) 685 - aa: vfmsub213pd/ps /r (66),(VEX) 686 - ab: vfmsub213sd/ss /r (66),(VEX),(o128) 687 - ac: vfnmadd213pd/ps /r (66),(VEX) 688 - ad: vfnmadd213sd/ss /r (66),(VEX),(o128) 689 - ae: vfnmsub213pd/ps /r (66),(VEX) 690 - af: vfnmsub213sd/ss /r (66),(VEX),(o128) 691 - b6: vfmaddsub231pd/ps /r (66),(VEX) 692 - b7: vfmsubadd231pd/ps /r (66),(VEX) 693 - b8: vfmadd231pd/ps /r (66),(VEX) 694 - b9: vfmadd231sd/ss /r (66),(VEX),(o128) 695 - ba: vfmsub231pd/ps /r (66),(VEX) 696 - bb: vfmsub231sd/ss /r (66),(VEX),(o128) 697 - bc: vfnmadd231pd/ps /r (66),(VEX) 698 - bd: vfnmadd231sd/ss /r (66),(VEX),(o128) 699 - be: vfnmsub231pd/ps /r (66),(VEX) 700 - bf: vfnmsub231sd/ss /r (66),(VEX),(o128) 673 + 90: vgatherdd/q Vx,Hx,Wx (66),(v) 674 + 91: vgatherqd/q Vx,Hx,Wx (66),(v) 675 + 92: vgatherdps/d Vx,Hx,Wx (66),(v) 676 + 93: vgatherqps/d Vx,Hx,Wx (66),(v) 677 + 94: 678 + 95: 679 + 96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) 680 + 97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) 681 + 98: vfmadd132ps/d Vx,Hx,Wx (66),(v) 682 + 99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 683 + 9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) 684 + 9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) 685 + 9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) 686 + 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 687 + 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) 688 + 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) 689 + a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) 690 + a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) 691 + a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) 692 + a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) 693 + aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) 694 + ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) 695 + ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) 696 + ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) 697 + ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) 698 + af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) 699 + b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) 700 + b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) 701 + b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) 702 + b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) 703 + ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) 704 + bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) 705 + bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) 706 + bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) 707 + be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) 708 + bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) 701 709 # 0x0f 0x38 0xc0-0xff 702 - db: aesimc Vdq,Wdq (66),(VEX),(o128) 703 - dc: aesenc Vdq,Wdq (66),(VEX),(o128) 704 - dd: aesenclast Vdq,Wdq (66),(VEX),(o128) 705 - de: aesdec Vdq,Wdq (66),(VEX),(o128) 706 - df: aesdeclast Vdq,Wdq (66),(VEX),(o128) 707 - f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) 708 - f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) 710 + db: VAESIMC Vdq,Wdq (66),(v1) 711 + dc: VAESENC Vdq,Hdq,Wdq (66),(v1) 712 + dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) 713 + de: VAESDEC Vdq,Hdq,Wdq (66),(v1) 714 + df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) 715 + f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) 716 + f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) 717 + f3: ANDN Gy,By,Ey (v) 718 + f4: Grp17 (1A) 719 + f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) 720 + f6: MULX By,Gy,rDX,Ey (F2),(v) 721 + f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) 709 722 EndTable 710 723 711 724 Table: 3-byte opcode 2 (0x0f 0x3a) 712 725 Referrer: 3-byte escape 2 713 726 AVXcode: 3 714 727 # 0x0f 0x3a 0x00-0xff 715 - 04: vpermilps /r,Ib (66),(oVEX) 716 - 05: vpermilpd /r,Ib (66),(oVEX) 717 - 06: vperm2f128 /r,Ib (66),(oVEX),(o256) 718 - 08: roundps Vdq,Wdq,Ib (66),(VEX) 719 - 09: roundpd Vdq,Wdq,Ib (66),(VEX) 720 - 0a: roundss Vss,Wss,Ib (66),(VEX),(o128) 721 - 0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) 722 - 0c: blendps Vdq,Wdq,Ib (66),(VEX) 723 - 0d: blendpd Vdq,Wdq,Ib (66),(VEX) 724 - 0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) 725 - 0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) 726 - 14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) 727 - 15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) 728 - 16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) 729 - 17: extractps Ed,Vdq,Ib (66),(VEX),(o128) 730 - 18: vinsertf128 /r,Ib (66),(oVEX),(o256) 731 - 19: vextractf128 /r,Ib (66),(oVEX),(o256) 732 - 20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) 733 - 21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) 734 - 22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) 735 - 40: dpps Vdq,Wdq,Ib (66),(VEX) 736 - 41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) 737 - 42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) 738 - 44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) 739 - 4a: vblendvps /r,Ib (66),(oVEX) 740 - 4b: vblendvpd /r,Ib (66),(oVEX) 741 - 4c: vpblendvb /r,Ib (66),(oVEX),(o128) 742 - 60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) 743 - 61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) 744 - 62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) 745 - 63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) 746 - df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) 728 + 00: vpermq Vqq,Wqq,Ib (66),(v) 729 + 01: vpermpd Vqq,Wqq,Ib (66),(v) 730 + 02: vpblendd Vx,Hx,Wx,Ib (66),(v) 731 + 03: 732 + 04: vpermilps Vx,Wx,Ib (66),(v) 733 + 05: vpermilpd Vx,Wx,Ib (66),(v) 734 + 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 735 + 07: 736 + 08: vroundps Vx,Wx,Ib (66) 737 + 09: vroundpd Vx,Wx,Ib (66) 738 + 0a: vroundss Vss,Wss,Ib (66),(v1) 739 + 0b: vroundsd Vsd,Wsd,Ib (66),(v1) 740 + 0c: vblendps Vx,Hx,Wx,Ib (66) 741 + 0d: vblendpd Vx,Hx,Wx,Ib (66) 742 + 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) 743 + 0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) 744 + 14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) 745 + 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) 746 + 16: vpextrd/q Ey,Vdq,Ib (66),(v1) 747 + 17: vextractps Ed,Vdq,Ib (66),(v1) 748 + 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) 749 + 19: vextractf128 Wdq,Vqq,Ib (66),(v) 750 + 1d: vcvtps2ph Wx,Vx,Ib (66),(v) 751 + 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) 752 + 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) 753 + 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 754 + 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) 755 + 39: vextracti128 Wdq,Vqq,Ib (66),(v) 756 + 40: vdpps Vx,Hx,Wx,Ib (66) 757 + 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) 758 + 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) 759 + 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) 760 + 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 761 + 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) 762 + 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) 763 + 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) 764 + 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 765 + 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 766 + 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 767 + 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) 768 + df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) 769 + f0: RORX Gy,Ey,Ib (F2),(v) 747 770 EndTable 748 771 749 772 GrpTable: Grp1 ··· 843 790 2: CALLN Ev (f64) 844 791 3: CALLF Ep 845 792 4: JMPN Ev (f64) 846 - 5: JMPF Ep 793 + 5: JMPF Mp 847 794 6: PUSH Ev (d64) 848 795 7: 849 796 EndTable ··· 860 807 GrpTable: Grp7 861 808 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 862 809 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) 863 - 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) 810 + 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) 864 811 3: LIDT Ms 865 812 4: SMSW Mw/Rv 866 813 5: ··· 877 824 878 825 GrpTable: Grp9 879 826 1: CMPXCHG8B/16B Mq/Mdq 880 - 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) 881 - 7: VMPTRST Mq 827 + 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) 828 + 7: VMPTRST Mq | VMPTRST Mq (F3) 882 829 EndTable 883 830 884 831 GrpTable: Grp10 885 832 EndTable 886 833 887 834 GrpTable: Grp11 835 + # Note: the operands are given by group opcode 888 836 0: MOV 889 837 EndTable 890 838 891 839 GrpTable: Grp12 892 - 2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) 893 - 4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) 894 - 6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) 840 + 2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) 841 + 4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) 842 + 6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) 895 843 EndTable 896 844 897 845 GrpTable: Grp13 898 - 2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) 899 - 4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) 900 - 6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) 846 + 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) 847 + 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) 848 + 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) 901 849 EndTable 902 850 903 851 GrpTable: Grp14 904 - 2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) 905 - 3: psrldq Udq,Ib (66),(11B),(VEX),(o128) 906 - 6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) 907 - 7: pslldq Udq,Ib (66),(11B),(VEX),(o128) 852 + 2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) 853 + 3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) 854 + 6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) 855 + 7: vpslldq Hx,Ux,Ib (66),(11B),(v1) 908 856 EndTable 909 857 910 858 GrpTable: Grp15 911 - 0: fxsave 912 - 1: fxstor 913 - 2: ldmxcsr (VEX) 914 - 3: stmxcsr (VEX) 859 + 0: fxsave | RDFSBASE Ry (F3),(11B) 860 + 1: fxstor | RDGSBASE Ry (F3),(11B) 861 + 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 862 + 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 915 863 4: XSAVE 916 864 5: XRSTOR | lfence (11B) 917 - 6: mfence (11B) 865 + 6: XSAVEOPT | mfence (11B) 918 866 7: clflush | sfence (11B) 919 867 EndTable 920 868 ··· 924 870 1: prefetch T0 925 871 2: prefetch T1 926 872 3: prefetch T2 873 + EndTable 874 + 875 + GrpTable: Grp17 876 + 1: BLSR By,Ey (v) 877 + 2: BLSMSK By,Ey (v) 878 + 3: BLSI By,Ey (v) 927 879 EndTable 928 880 929 881 # AMD's Prefetch Group
+1 -2
arch/x86/oprofile/Makefile
··· 4 4 oprof.o cpu_buffer.o buffer_sync.o \ 5 5 event_buffer.o oprofile_files.o \ 6 6 oprofilefs.o oprofile_stats.o \ 7 - timer_int.o ) 7 + timer_int.o nmi_timer_int.o ) 8 8 9 9 oprofile-y := $(DRIVER_OBJS) init.o backtrace.o 10 10 oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ 11 11 op_model_ppro.o op_model_p4.o 12 - oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
+8 -22
arch/x86/oprofile/init.c
··· 16 16 * with the NMI mode driver. 17 17 */ 18 18 19 + #ifdef CONFIG_X86_LOCAL_APIC 19 20 extern int op_nmi_init(struct oprofile_operations *ops); 20 - extern int op_nmi_timer_init(struct oprofile_operations *ops); 21 21 extern void op_nmi_exit(void); 22 - extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); 22 + #else 23 + static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } 24 + static void op_nmi_exit(void) { } 25 + #endif 23 26 24 - static int nmi_timer; 27 + extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); 25 28 26 29 int __init oprofile_arch_init(struct oprofile_operations *ops) 27 30 { 28 - int ret; 29 - 30 - ret = -ENODEV; 31 - 32 - #ifdef CONFIG_X86_LOCAL_APIC 33 - ret = op_nmi_init(ops); 34 - #endif 35 - nmi_timer = (ret != 0); 36 - #ifdef CONFIG_X86_IO_APIC 37 - if (nmi_timer) 38 - ret = op_nmi_timer_init(ops); 39 - #endif 40 31 ops->backtrace = x86_backtrace; 41 - 42 - return ret; 32 + return op_nmi_init(ops); 43 33 } 44 - 45 34 46 35 void oprofile_arch_exit(void) 47 36 { 48 - #ifdef CONFIG_X86_LOCAL_APIC 49 - if (!nmi_timer) 50 - op_nmi_exit(); 51 - #endif 37 + op_nmi_exit(); 52 38 }
+21 -6
arch/x86/oprofile/nmi_int.c
··· 595 595 return 0; 596 596 } 597 597 598 - static int force_arch_perfmon; 599 - static int force_cpu_type(const char *str, struct kernel_param *kp) 598 + enum __force_cpu_type { 599 + reserved = 0, /* do not force */ 600 + timer, 601 + arch_perfmon, 602 + }; 603 + 604 + static int force_cpu_type; 605 + 606 + static int set_cpu_type(const char *str, struct kernel_param *kp) 600 607 { 601 - if (!strcmp(str, "arch_perfmon")) { 602 - force_arch_perfmon = 1; 608 + if (!strcmp(str, "timer")) { 609 + force_cpu_type = timer; 610 + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); 611 + } else if (!strcmp(str, "arch_perfmon")) { 612 + force_cpu_type = arch_perfmon; 603 613 printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); 614 + } else { 615 + force_cpu_type = 0; 604 616 } 605 617 606 618 return 0; 607 619 } 608 - module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); 620 + module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); 609 621 610 622 static int __init ppro_init(char **cpu_type) 611 623 { 612 624 __u8 cpu_model = boot_cpu_data.x86_model; 613 625 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ 614 626 615 - if (force_arch_perfmon && cpu_has_arch_perfmon) 627 + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) 616 628 return 0; 617 629 618 630 /* ··· 689 677 int ret = 0; 690 678 691 679 if (!cpu_has_apic) 680 + return -ENODEV; 681 + 682 + if (force_cpu_type == timer) 692 683 return -ENODEV; 693 684 694 685 switch (vendor) {
-50
arch/x86/oprofile/nmi_timer_int.c
··· 1 - /** 2 - * @file nmi_timer_int.c 3 - * 4 - * @remark Copyright 2003 OProfile authors 5 - * @remark Read the file COPYING 6 - * 7 - * @author Zwane Mwaikambo <zwane@linuxpower.ca> 8 - */ 9 - 10 - #include <linux/init.h> 11 - #include <linux/smp.h> 12 - #include <linux/errno.h> 13 - #include <linux/oprofile.h> 14 - #include <linux/rcupdate.h> 15 - #include <linux/kdebug.h> 16 - 17 - #include <asm/nmi.h> 18 - #include <asm/apic.h> 19 - #include <asm/ptrace.h> 20 - 21 - static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) 22 - { 23 - oprofile_add_sample(regs, 0); 24 - return NMI_HANDLED; 25 - } 26 - 27 - static int timer_start(void) 28 - { 29 - if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, 30 - 0, "oprofile-timer")) 31 - return 1; 32 - return 0; 33 - } 34 - 35 - 36 - static void timer_stop(void) 37 - { 38 - unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); 39 - synchronize_sched(); /* Allow already-started NMIs to complete. */ 40 - } 41 - 42 - 43 - int __init op_nmi_timer_init(struct oprofile_operations *ops) 44 - { 45 - ops->start = timer_start; 46 - ops->stop = timer_stop; 47 - ops->cpu_type = "timer"; 48 - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); 49 - return 0; 50 - }
+10 -3
arch/x86/tools/Makefile
··· 18 18 quiet_cmd_posttest = TEST $@ 19 19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) 20 20 21 - posttest: $(obj)/test_get_len vmlinux 22 - $(call cmd,posttest) 21 + quiet_cmd_sanitytest = TEST $@ 22 + cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 23 23 24 - hostprogs-y := test_get_len 24 + posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity 25 + $(call cmd,posttest) 26 + $(call cmd,sanitytest) 27 + 28 + hostprogs-y += test_get_len insn_sanity 25 29 26 30 # -I needed for generated C source and C source which in the kernel tree. 27 31 HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 28 32 33 + HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 34 + 29 35 # Dependencies are also needed. 30 36 $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 31 37 38 + $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+12 -9
arch/x86/tools/gen-insn-attr-x86.awk
··· 47 47 sep_expr = "^\\|$" 48 48 group_expr = "^Grp[0-9A-Za-z]+" 49 49 50 - imm_expr = "^[IJAO][a-z]" 50 + imm_expr = "^[IJAOL][a-z]" 51 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 53 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" ··· 59 59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 60 60 imm_flag["Ob"] = "INAT_MOFFSET" 61 61 imm_flag["Ov"] = "INAT_MOFFSET" 62 + imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 62 63 63 64 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 64 65 force64_expr = "\\([df]64\\)" ··· 71 70 lprefix3_expr = "\\(F2\\)" 72 71 max_lprefix = 4 73 72 74 - vexok_expr = "\\(VEX\\)" 75 - vexonly_expr = "\\(oVEX\\)" 73 + # All opcodes starting with lower-case 'v' or with (v1) superscript 74 + # accepts VEX prefix 75 + vexok_opcode_expr = "^v.*" 76 + vexok_expr = "\\(v1\\)" 77 + # All opcodes with (v) superscript supports *only* VEX prefix 78 + vexonly_expr = "\\(v\\)" 76 79 77 80 prefix_expr = "\\(Prefix\\)" 78 81 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" ··· 90 85 prefix_num["SEG=GS"] = "INAT_PFX_GS" 91 86 prefix_num["SEG=SS"] = "INAT_PFX_SS" 92 87 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 93 - prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" 94 - prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" 88 + prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" 89 + prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" 95 90 96 91 clear_vars() 97 92 } ··· 315 310 if (match(opcode, fpu_expr)) 316 311 flags = add_flags(flags, "INAT_MODRM") 317 312 318 - # check VEX only code 313 + # check VEX codes 319 314 if (match(ext, vexonly_expr)) 320 315 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 321 - 322 - # check VEX only code 323 - if (match(ext, vexok_expr)) 316 + else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) 324 317 flags = add_flags(flags, "INAT_VEXOK") 325 318 326 319 # check prefixes
+275
arch/x86/tools/insn_sanity.c
··· 1 + /* 2 + * x86 decoder sanity test - based on test_get_insn.c 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; either version 2 of the License, or 7 + * (at your option) any later version. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + * 18 + * Copyright (C) IBM Corporation, 2009 19 + * Copyright (C) Hitachi, Ltd., 2011 20 + */ 21 + 22 + #include <stdlib.h> 23 + #include <stdio.h> 24 + #include <string.h> 25 + #include <assert.h> 26 + #include <unistd.h> 27 + #include <sys/types.h> 28 + #include <sys/stat.h> 29 + #include <fcntl.h> 30 + 31 + #define unlikely(cond) (cond) 32 + #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) 33 + 34 + #include <asm/insn.h> 35 + #include <inat.c> 36 + #include <insn.c> 37 + 38 + /* 39 + * Test of instruction analysis against tampering. 40 + * Feed random binary to instruction decoder and ensure not to 41 + * access out-of-instruction-buffer. 42 + */ 43 + 44 + #define DEFAULT_MAX_ITER 10000 45 + #define INSN_NOP 0x90 46 + 47 + static const char *prog; /* Program name */ 48 + static int verbose; /* Verbosity */ 49 + static int x86_64; /* x86-64 bit mode flag */ 50 + static unsigned int seed; /* Random seed */ 51 + static unsigned long iter_start; /* Start of iteration number */ 52 + static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */ 53 + static FILE *input_file; /* Input file name */ 54 + 55 + static void usage(const char *err) 56 + { 57 + if (err) 58 + fprintf(stderr, "Error: %s\n\n", err); 59 + fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); 60 + fprintf(stderr, "\t-y 64bit mode\n"); 61 + fprintf(stderr, "\t-n 32bit mode\n"); 62 + fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n"); 63 + fprintf(stderr, "\t-s Give a random seed (and iteration number)\n"); 64 + fprintf(stderr, "\t-m Give a maximum iteration number\n"); 65 + fprintf(stderr, "\t-i Give an input file with decoded binary\n"); 66 + exit(1); 67 + } 68 + 69 + static void dump_field(FILE *fp, const char *name, const char *indent, 70 + struct insn_field *field) 71 + { 72 + fprintf(fp, "%s.%s = {\n", indent, name); 73 + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", 74 + indent, field->value, field->bytes[0], field->bytes[1], 75 + field->bytes[2], field->bytes[3]); 76 + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, 77 + field->got, field->nbytes); 78 + } 79 + 80 + static void dump_insn(FILE *fp, struct insn *insn) 81 + { 82 + fprintf(fp, "Instruction = {\n"); 83 + dump_field(fp, "prefixes", "\t", &insn->prefixes); 84 + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); 85 + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); 86 + dump_field(fp, "opcode", "\t", &insn->opcode); 87 + dump_field(fp, "modrm", "\t", &insn->modrm); 88 + dump_field(fp, "sib", "\t", &insn->sib); 89 + dump_field(fp, "displacement", "\t", &insn->displacement); 90 + dump_field(fp, "immediate1", "\t", &insn->immediate1); 91 + dump_field(fp, "immediate2", "\t", &insn->immediate2); 92 + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", 93 + insn->attr, insn->opnd_bytes, insn->addr_bytes); 94 + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", 95 + insn->length, insn->x86_64, insn->kaddr); 96 + } 97 + 98 + static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, 99 + unsigned char *insn_buf, struct insn *insn) 100 + { 101 + int i; 102 + 103 + fprintf(fp, "%s:\n", msg); 104 + 105 + dump_insn(fp, insn); 106 + 107 + fprintf(fp, "You can reproduce this with below command(s);\n"); 108 + 109 + /* Input a decoded instruction sequence directly */ 110 + fprintf(fp, " $ echo "); 111 + for (i = 0; i < MAX_INSN_SIZE; i++) 112 + fprintf(fp, " %02x", insn_buf[i]); 113 + fprintf(fp, " | %s -i -\n", prog); 114 + 115 + if (!input_file) { 116 + fprintf(fp, "Or \n"); 117 + /* Give a seed and iteration number */ 118 + fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter); 119 + } 120 + } 121 + 122 + static void init_random_seed(void) 123 + { 124 + int fd; 125 + 126 + fd = open("/dev/urandom", O_RDONLY); 127 + if (fd < 0) 128 + goto fail; 129 + 130 + if (read(fd, &seed, sizeof(seed)) != sizeof(seed)) 131 + goto fail; 132 + 133 + close(fd); 134 + return; 135 + fail: 136 + usage("Failed to open /dev/urandom"); 137 + } 138 + 139 + /* Read given instruction sequence from the input file */ 140 + static int read_next_insn(unsigned char *insn_buf) 141 + { 142 + char buf[256] = "", *tmp; 143 + int i; 144 + 145 + tmp = fgets(buf, ARRAY_SIZE(buf), input_file); 146 + if (tmp == NULL || feof(input_file)) 147 + return 0; 148 + 149 + for (i = 0; i < MAX_INSN_SIZE; i++) { 150 + insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16); 151 + if (*tmp != ' ') 152 + break; 153 + } 154 + 155 + return i; 156 + } 157 + 158 + static int generate_insn(unsigned char *insn_buf) 159 + { 160 + int i; 161 + 162 + if (input_file) 163 + return read_next_insn(insn_buf); 164 + 165 + /* Fills buffer with random binary up to MAX_INSN_SIZE */ 166 + for (i = 0; i < MAX_INSN_SIZE - 1; i += 2) 167 + *(unsigned short *)(&insn_buf[i]) = random() & 0xffff; 168 + 169 + while (i < MAX_INSN_SIZE) 170 + insn_buf[i++] = random() & 0xff; 171 + 172 + return i; 173 + } 174 + 175 + static void parse_args(int argc, char **argv) 176 + { 177 + int c; 178 + char *tmp = NULL; 179 + int set_seed = 0; 180 + 181 + prog = argv[0]; 182 + while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) { 183 + switch (c) { 184 + case 'y': 185 + x86_64 = 1; 186 + break; 187 + case 'n': 188 + x86_64 = 0; 189 + break; 190 + case 'v': 191 + verbose++; 192 + break; 193 + case 'i': 194 + if (strcmp("-", optarg) == 0) 195 + input_file = stdin; 196 + else 197 + input_file = fopen(optarg, "r"); 198 + if (!input_file) 199 + usage("Failed to open input file"); 200 + break; 201 + case 's': 202 + seed = (unsigned int)strtoul(optarg, &tmp, 0); 203 + if (*tmp == ',') { 204 + optarg = tmp + 1; 205 + iter_start = strtoul(optarg, &tmp, 0); 206 + } 207 + if (*tmp != '\0' || tmp == optarg) 208 + usage("Failed to parse seed"); 209 + set_seed = 1; 210 + break; 211 + case 'm': 212 + iter_end = strtoul(optarg, &tmp, 0); 213 + if (*tmp != '\0' || tmp == optarg) 214 + usage("Failed to parse max_iter"); 215 + break; 216 + default: 217 + usage(NULL); 218 + } 219 + } 220 + 221 + /* Check errors */ 222 + if (iter_end < iter_start) 223 + usage("Max iteration number must be bigger than iter-num"); 224 + 225 + if (set_seed && input_file) 226 + usage("Don't use input file (-i) with random seed (-s)"); 227 + 228 + /* Initialize random seed */ 229 + if (!input_file) { 230 + if (!set_seed) /* No seed is given */ 231 + init_random_seed(); 232 + srand(seed); 233 + } 234 + } 235 + 236 + int main(int argc, char **argv) 237 + { 238 + struct insn insn; 239 + int insns = 0; 240 + int errors = 0; 241 + unsigned long i; 242 + unsigned char insn_buf[MAX_INSN_SIZE * 2]; 243 + 244 + parse_args(argc, argv); 245 + 246 + /* Prepare stop bytes with NOPs */ 247 + memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); 248 + 249 + for (i = 0; i < iter_end; i++) { 250 + if (generate_insn(insn_buf) <= 0) 251 + break; 252 + 253 + if (i < iter_start) /* Skip to given iteration number */ 254 + continue; 255 + 256 + /* Decode an instruction */ 257 + insn_init(&insn, insn_buf, x86_64); 258 + insn_get_length(&insn); 259 + 260 + if (insn.next_byte <= insn.kaddr || 261 + insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { 262 + /* Access out-of-range memory */ 263 + dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); 264 + errors++; 265 + } else if (verbose && !insn_complete(&insn)) 266 + dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); 267 + else if (verbose >= 2) 268 + dump_insn(stdout, &insn); 269 + insns++; 270 + } 271 + 272 + fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); 273 + 274 + return errors ? 1 : 0; 275 + }
+173
drivers/oprofile/nmi_timer_int.c
··· 1 + /** 2 + * @file nmi_timer_int.c 3 + * 4 + * @remark Copyright 2011 Advanced Micro Devices, Inc. 5 + * 6 + * @author Robert Richter <robert.richter@amd.com> 7 + */ 8 + 9 + #include <linux/init.h> 10 + #include <linux/smp.h> 11 + #include <linux/errno.h> 12 + #include <linux/oprofile.h> 13 + #include <linux/perf_event.h> 14 + 15 + #ifdef CONFIG_OPROFILE_NMI_TIMER 16 + 17 + static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events); 18 + static int ctr_running; 19 + 20 + static struct perf_event_attr nmi_timer_attr = { 21 + .type = PERF_TYPE_HARDWARE, 22 + .config = PERF_COUNT_HW_CPU_CYCLES, 23 + .size = sizeof(struct perf_event_attr), 24 + .pinned = 1, 25 + .disabled = 1, 26 + }; 27 + 28 + static void nmi_timer_callback(struct perf_event *event, 29 + struct perf_sample_data *data, 30 + struct pt_regs *regs) 31 + { 32 + event->hw.interrupts = 0; /* don't throttle interrupts */ 33 + oprofile_add_sample(regs, 0); 34 + } 35 + 36 + static int nmi_timer_start_cpu(int cpu) 37 + { 38 + struct perf_event *event = per_cpu(nmi_timer_events, cpu); 39 + 40 + if (!event) { 41 + event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL, 42 + nmi_timer_callback, NULL); 43 + if (IS_ERR(event)) 44 + return PTR_ERR(event); 45 + per_cpu(nmi_timer_events, cpu) = event; 46 + } 47 + 48 + if (event && ctr_running) 49 + perf_event_enable(event); 50 + 51 + return 0; 52 + } 53 + 54 + static void nmi_timer_stop_cpu(int cpu) 55 + { 56 + struct perf_event *event = per_cpu(nmi_timer_events, cpu); 57 + 58 + if (event && ctr_running) 59 + perf_event_disable(event); 60 + } 61 + 62 + static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action, 63 + void *data) 64 + { 65 + int cpu = (unsigned long)data; 66 + switch (action) { 67 + case CPU_DOWN_FAILED: 68 + case CPU_ONLINE: 69 + nmi_timer_start_cpu(cpu); 70 + break; 71 + case CPU_DOWN_PREPARE: 72 + nmi_timer_stop_cpu(cpu); 73 + break; 74 + } 75 + return NOTIFY_DONE; 76 + } 77 + 78 + static struct notifier_block nmi_timer_cpu_nb = { 79 + .notifier_call = nmi_timer_cpu_notifier 80 + }; 81 + 82 + static int nmi_timer_start(void) 83 + { 84 + int cpu; 85 + 86 + get_online_cpus(); 87 + ctr_running = 1; 88 + for_each_online_cpu(cpu) 89 + nmi_timer_start_cpu(cpu); 90 + put_online_cpus(); 91 + 92 + return 0; 93 + } 94 + 95 + static void nmi_timer_stop(void) 96 + { 97 + int cpu; 98 + 99 + get_online_cpus(); 100 + for_each_online_cpu(cpu) 101 + nmi_timer_stop_cpu(cpu); 102 + ctr_running = 0; 103 + put_online_cpus(); 104 + } 105 + 106 + static void nmi_timer_shutdown(void) 107 + { 108 + struct perf_event *event; 109 + int cpu; 110 + 111 + get_online_cpus(); 112 + unregister_cpu_notifier(&nmi_timer_cpu_nb); 113 + for_each_possible_cpu(cpu) { 114 + event = per_cpu(nmi_timer_events, cpu); 115 + if (!event) 116 + continue; 117 + perf_event_disable(event); 118 + per_cpu(nmi_timer_events, cpu) = NULL; 119 + perf_event_release_kernel(event); 120 + } 121 + 122 + put_online_cpus(); 123 + } 124 + 125 + static int nmi_timer_setup(void) 126 + { 127 + int cpu, err; 128 + u64 period; 129 + 130 + /* clock cycles per tick: */ 131 + period = (u64)cpu_khz * 1000; 132 + do_div(period, HZ); 133 + nmi_timer_attr.sample_period = period; 134 + 135 + get_online_cpus(); 136 + err = register_cpu_notifier(&nmi_timer_cpu_nb); 137 + if (err) 138 + goto out; 139 + /* can't attach events to offline cpus: */ 140 + for_each_online_cpu(cpu) { 141 + err = nmi_timer_start_cpu(cpu); 142 + if (err) 143 + break; 144 + } 145 + if (err) 146 + nmi_timer_shutdown(); 147 + out: 148 + put_online_cpus(); 149 + return err; 150 + } 151 + 152 + int __init op_nmi_timer_init(struct oprofile_operations *ops) 153 + { 154 + int err = 0; 155 + 156 + err = nmi_timer_setup(); 157 + if (err) 158 + return err; 159 + nmi_timer_shutdown(); /* only check, don't alloc */ 160 + 161 + ops->create_files = NULL; 162 + ops->setup = nmi_timer_setup; 163 + ops->shutdown = nmi_timer_shutdown; 164 + ops->start = nmi_timer_start; 165 + ops->stop = nmi_timer_stop; 166 + ops->cpu_type = "timer"; 167 + 168 + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); 169 + 170 + return 0; 171 + } 172 + 173 + #endif
+12 -18
drivers/oprofile/oprof.c
··· 246 246 int err; 247 247 248 248 /* always init architecture to setup backtrace support */ 249 + timer_mode = 0; 249 250 err = oprofile_arch_init(&oprofile_ops); 251 + if (!err) { 252 + if (!timer && !oprofilefs_register()) 253 + return 0; 254 + oprofile_arch_exit(); 255 + } 250 256 251 - timer_mode = err || timer; /* fall back to timer mode on errors */ 252 - if (timer_mode) { 253 - if (!err) 254 - oprofile_arch_exit(); 257 + /* setup timer mode: */ 258 + timer_mode = 1; 259 + /* no nmi timer mode if oprofile.timer is set */ 260 + if (timer || op_nmi_timer_init(&oprofile_ops)) { 255 261 err = oprofile_timer_init(&oprofile_ops); 256 262 if (err) 257 263 return err; 258 264 } 259 265 260 - err = oprofilefs_register(); 261 - if (!err) 262 - return 0; 263 - 264 - /* failed */ 265 - if (timer_mode) 266 - oprofile_timer_exit(); 267 - else 268 - oprofile_arch_exit(); 269 - 270 - return err; 266 + return oprofilefs_register(); 271 267 } 272 268 273 269 274 270 static void __exit oprofile_exit(void) 275 271 { 276 272 oprofilefs_unregister(); 277 - if (timer_mode) 278 - oprofile_timer_exit(); 279 - else 273 + if (!timer_mode) 280 274 oprofile_arch_exit(); 281 275 } 282 276
+9 -1
drivers/oprofile/oprof.h
··· 35 35 36 36 void oprofile_create_files(struct super_block *sb, struct dentry *root); 37 37 int oprofile_timer_init(struct oprofile_operations *ops); 38 - void oprofile_timer_exit(void); 38 + #ifdef CONFIG_OPROFILE_NMI_TIMER 39 + int op_nmi_timer_init(struct oprofile_operations *ops); 40 + #else 41 + static inline int op_nmi_timer_init(struct oprofile_operations *ops) 42 + { 43 + return -ENODEV; 44 + } 45 + #endif 46 + 39 47 40 48 int oprofile_set_ulong(unsigned long *addr, unsigned long val); 41 49 int oprofile_set_timeout(unsigned long time);
+15 -15
drivers/oprofile/timer_int.c
··· 97 97 .notifier_call = oprofile_cpu_notify, 98 98 }; 99 99 100 - int oprofile_timer_init(struct oprofile_operations *ops) 100 + static int oprofile_hrtimer_setup(void) 101 101 { 102 - int rc; 103 - 104 - rc = register_hotcpu_notifier(&oprofile_cpu_notifier); 105 - if (rc) 106 - return rc; 107 - ops->create_files = NULL; 108 - ops->setup = NULL; 109 - ops->shutdown = NULL; 110 - ops->start = oprofile_hrtimer_start; 111 - ops->stop = oprofile_hrtimer_stop; 112 - ops->cpu_type = "timer"; 113 - printk(KERN_INFO "oprofile: using timer interrupt.\n"); 114 - return 0; 102 + return register_hotcpu_notifier(&oprofile_cpu_notifier); 115 103 } 116 104 117 - void oprofile_timer_exit(void) 105 + static void oprofile_hrtimer_shutdown(void) 118 106 { 119 107 unregister_hotcpu_notifier(&oprofile_cpu_notifier); 108 + } 109 + 110 + int oprofile_timer_init(struct oprofile_operations *ops) 111 + { 112 + ops->create_files = NULL; 113 + ops->setup = oprofile_hrtimer_setup; 114 + ops->shutdown = oprofile_hrtimer_shutdown; 115 + ops->start = oprofile_hrtimer_start; 116 + ops->stop = oprofile_hrtimer_stop; 117 + ops->cpu_type = "timer"; 118 + printk(KERN_INFO "oprofile: using timer interrupt.\n"); 119 + return 0; 120 120 }
+8 -2
include/linux/bitops.h
··· 22 22 #include <asm/bitops.h> 23 23 24 24 #define for_each_set_bit(bit, addr, size) \ 25 - for ((bit) = find_first_bit((addr), (size)); \ 26 - (bit) < (size); \ 25 + for ((bit) = find_first_bit((addr), (size)); \ 26 + (bit) < (size); \ 27 + (bit) = find_next_bit((addr), (size), (bit) + 1)) 28 + 29 + /* same as for_each_set_bit() but use bit as value to start with */ 30 + #define for_each_set_bit_cont(bit, addr, size) \ 31 + for ((bit) = find_next_bit((addr), (size), (bit)); \ 32 + (bit) < (size); \ 27 33 (bit) = find_next_bit((addr), (size), (bit) + 1)) 28 34 29 35 static __inline__ int get_bitmask_order(unsigned int count)
+27
include/linux/jump_label.h
··· 3 3 4 4 #include <linux/types.h> 5 5 #include <linux/compiler.h> 6 + #include <linux/workqueue.h> 6 7 7 8 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 8 9 ··· 13 12 #ifdef CONFIG_MODULES 14 13 struct jump_label_mod *next; 15 14 #endif 15 + }; 16 + 17 + struct jump_label_key_deferred { 18 + struct jump_label_key key; 19 + unsigned long timeout; 20 + struct delayed_work work; 16 21 }; 17 22 18 23 # include <asm/jump_label.h> ··· 58 51 extern int jump_label_text_reserved(void *start, void *end); 59 52 extern void jump_label_inc(struct jump_label_key *key); 60 53 extern void jump_label_dec(struct jump_label_key *key); 54 + extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); 61 55 extern bool jump_label_enabled(struct jump_label_key *key); 62 56 extern void jump_label_apply_nops(struct module *mod); 57 + extern void jump_label_rate_limit(struct jump_label_key_deferred *key, 58 + unsigned long rl); 63 59 64 60 #else /* !HAVE_JUMP_LABEL */ 65 61 ··· 77 67 static __always_inline void jump_label_init(void) 78 68 { 79 69 } 70 + 71 + struct jump_label_key_deferred { 72 + struct jump_label_key key; 73 + }; 80 74 81 75 static __always_inline bool static_branch(struct jump_label_key *key) 82 76 { ··· 97 83 static inline void jump_label_dec(struct jump_label_key *key) 98 84 { 99 85 atomic_dec(&key->enabled); 86 + } 87 + 88 + static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) 89 + { 90 + jump_label_dec(&key->key); 100 91 } 101 92 102 93 static inline int jump_label_text_reserved(void *start, void *end) ··· 121 102 { 122 103 return 0; 123 104 } 105 + 106 + static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, 107 + unsigned long rl) 108 + { 109 + } 124 110 #endif /* HAVE_JUMP_LABEL */ 111 + 112 + #define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) 113 + #define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) 125 114 126 115 #endif /* _LINUX_JUMP_LABEL_H */
+5 -3
include/linux/perf_event.h
··· 54 54 PERF_COUNT_HW_BUS_CYCLES = 6, 55 55 PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, 56 56 PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, 57 + PERF_COUNT_HW_REF_CPU_CYCLES = 9, 57 58 58 59 PERF_COUNT_HW_MAX, /* non-ABI */ 59 60 }; ··· 891 890 int nr_active; 892 891 int is_active; 893 892 int nr_stat; 893 + int nr_freq; 894 894 int rotate_disable; 895 895 atomic_t refcount; 896 896 struct task_struct *task; ··· 1065 1063 } 1066 1064 } 1067 1065 1068 - extern struct jump_label_key perf_sched_events; 1066 + extern struct jump_label_key_deferred perf_sched_events; 1069 1067 1070 1068 static inline void perf_event_task_sched_in(struct task_struct *prev, 1071 1069 struct task_struct *task) 1072 1070 { 1073 - if (static_branch(&perf_sched_events)) 1071 + if (static_branch(&perf_sched_events.key)) 1074 1072 __perf_event_task_sched_in(prev, task); 1075 1073 } 1076 1074 ··· 1079 1077 { 1080 1078 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); 1081 1079 1082 - if (static_branch(&perf_sched_events)) 1080 + if (static_branch(&perf_sched_events.key)) 1083 1081 __perf_event_task_sched_out(prev, next); 1084 1082 } 1085 1083
+1 -1
kernel/events/Makefile
··· 2 2 CFLAGS_REMOVE_core.o = -pg 3 3 endif 4 4 5 - obj-y := core.o ring_buffer.o 5 + obj-y := core.o ring_buffer.o callchain.o 6 6 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+191
kernel/events/callchain.c
··· 1 + /* 2 + * Performance events callchain code, extracted from core.c: 3 + * 4 + * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 5 + * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar 6 + * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 7 + * Copyright � 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 8 + * 9 + * For licensing details see kernel-base/COPYING 10 + */ 11 + 12 + #include <linux/perf_event.h> 13 + #include <linux/slab.h> 14 + #include "internal.h" 15 + 16 + struct callchain_cpus_entries { 17 + struct rcu_head rcu_head; 18 + struct perf_callchain_entry *cpu_entries[0]; 19 + }; 20 + 21 + static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); 22 + static atomic_t nr_callchain_events; 23 + static DEFINE_MUTEX(callchain_mutex); 24 + static struct callchain_cpus_entries *callchain_cpus_entries; 25 + 26 + 27 + __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, 28 + struct pt_regs *regs) 29 + { 30 + } 31 + 32 + __weak void perf_callchain_user(struct perf_callchain_entry *entry, 33 + struct pt_regs *regs) 34 + { 35 + } 36 + 37 + static void release_callchain_buffers_rcu(struct rcu_head *head) 38 + { 39 + struct callchain_cpus_entries *entries; 40 + int cpu; 41 + 42 + entries = container_of(head, struct callchain_cpus_entries, rcu_head); 43 + 44 + for_each_possible_cpu(cpu) 45 + kfree(entries->cpu_entries[cpu]); 46 + 47 + kfree(entries); 48 + } 49 + 50 + static void release_callchain_buffers(void) 51 + { 52 + struct callchain_cpus_entries *entries; 53 + 54 + entries = callchain_cpus_entries; 55 + rcu_assign_pointer(callchain_cpus_entries, NULL); 56 + call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); 57 + } 58 + 59 + static int alloc_callchain_buffers(void) 60 + { 61 + int cpu; 62 + int size; 63 + struct callchain_cpus_entries *entries; 64 + 65 + /* 66 + * We can't use the percpu allocation API for data that can be 67 + * accessed from NMI. Use a temporary manual per cpu allocation 68 + * until that gets sorted out. 69 + */ 70 + size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); 71 + 72 + entries = kzalloc(size, GFP_KERNEL); 73 + if (!entries) 74 + return -ENOMEM; 75 + 76 + size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; 77 + 78 + for_each_possible_cpu(cpu) { 79 + entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, 80 + cpu_to_node(cpu)); 81 + if (!entries->cpu_entries[cpu]) 82 + goto fail; 83 + } 84 + 85 + rcu_assign_pointer(callchain_cpus_entries, entries); 86 + 87 + return 0; 88 + 89 + fail: 90 + for_each_possible_cpu(cpu) 91 + kfree(entries->cpu_entries[cpu]); 92 + kfree(entries); 93 + 94 + return -ENOMEM; 95 + } 96 + 97 + int get_callchain_buffers(void) 98 + { 99 + int err = 0; 100 + int count; 101 + 102 + mutex_lock(&callchain_mutex); 103 + 104 + count = atomic_inc_return(&nr_callchain_events); 105 + if (WARN_ON_ONCE(count < 1)) { 106 + err = -EINVAL; 107 + goto exit; 108 + } 109 + 110 + if (count > 1) { 111 + /* If the allocation failed, give up */ 112 + if (!callchain_cpus_entries) 113 + err = -ENOMEM; 114 + goto exit; 115 + } 116 + 117 + err = alloc_callchain_buffers(); 118 + if (err) 119 + release_callchain_buffers(); 120 + exit: 121 + mutex_unlock(&callchain_mutex); 122 + 123 + return err; 124 + } 125 + 126 + void put_callchain_buffers(void) 127 + { 128 + if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { 129 + release_callchain_buffers(); 130 + mutex_unlock(&callchain_mutex); 131 + } 132 + } 133 + 134 + static struct perf_callchain_entry *get_callchain_entry(int *rctx) 135 + { 136 + int cpu; 137 + struct callchain_cpus_entries *entries; 138 + 139 + *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); 140 + if (*rctx == -1) 141 + return NULL; 142 + 143 + entries = rcu_dereference(callchain_cpus_entries); 144 + if (!entries) 145 + return NULL; 146 + 147 + cpu = smp_processor_id(); 148 + 149 + return &entries->cpu_entries[cpu][*rctx]; 150 + } 151 + 152 + static void 153 + put_callchain_entry(int rctx) 154 + { 155 + put_recursion_context(__get_cpu_var(callchain_recursion), rctx); 156 + } 157 + 158 + struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) 159 + { 160 + int rctx; 161 + struct perf_callchain_entry *entry; 162 + 163 + 164 + entry = get_callchain_entry(&rctx); 165 + if (rctx == -1) 166 + return NULL; 167 + 168 + if (!entry) 169 + goto exit_put; 170 + 171 + entry->nr = 0; 172 + 173 + if (!user_mode(regs)) { 174 + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 175 + perf_callchain_kernel(entry, regs); 176 + if (current->mm) 177 + regs = task_pt_regs(current); 178 + else 179 + regs = NULL; 180 + } 181 + 182 + if (regs) { 183 + perf_callchain_store(entry, PERF_CONTEXT_USER); 184 + perf_callchain_user(entry, regs); 185 + } 186 + 187 + exit_put: 188 + put_callchain_entry(rctx); 189 + 190 + return entry; 191 + }
+53 -243
kernel/events/core.c
··· 128 128 * perf_sched_events : >0 events exist 129 129 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu 130 130 */ 131 - struct jump_label_key perf_sched_events __read_mostly; 131 + struct jump_label_key_deferred perf_sched_events __read_mostly; 132 132 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); 133 133 134 134 static atomic_t nr_mmap_events __read_mostly; ··· 1130 1130 if (!is_software_event(event)) 1131 1131 cpuctx->active_oncpu--; 1132 1132 ctx->nr_active--; 1133 + if (event->attr.freq && event->attr.sample_freq) 1134 + ctx->nr_freq--; 1133 1135 if (event->attr.exclusive || !cpuctx->active_oncpu) 1134 1136 cpuctx->exclusive = 0; 1135 1137 } ··· 1327 1325 } 1328 1326 raw_spin_unlock_irq(&ctx->lock); 1329 1327 } 1328 + EXPORT_SYMBOL_GPL(perf_event_disable); 1330 1329 1331 1330 static void perf_set_shadow_time(struct perf_event *event, 1332 1331 struct perf_event_context *ctx, ··· 1409 1406 if (!is_software_event(event)) 1410 1407 cpuctx->active_oncpu++; 1411 1408 ctx->nr_active++; 1409 + if (event->attr.freq && event->attr.sample_freq) 1410 + ctx->nr_freq++; 1412 1411 1413 1412 if (event->attr.exclusive) 1414 1413 cpuctx->exclusive = 1; ··· 1667 1662 * Note: this works for group members as well as group leaders 1668 1663 * since the non-leader members' sibling_lists will be empty. 1669 1664 */ 1670 - static void __perf_event_mark_enabled(struct perf_event *event, 1671 - struct perf_event_context *ctx) 1665 + static void __perf_event_mark_enabled(struct perf_event *event) 1672 1666 { 1673 1667 struct perf_event *sub; 1674 1668 u64 tstamp = perf_event_time(event); ··· 1705 1701 */ 1706 1702 perf_cgroup_set_timestamp(current, ctx); 1707 1703 1708 - __perf_event_mark_enabled(event, ctx); 1704 + __perf_event_mark_enabled(event); 1709 1705 1710 1706 if (!event_filter_match(event)) { 1711 1707 if (is_cgroup_event(event)) ··· 1786 1782 1787 1783 retry: 1788 1784 if (!ctx->is_active) { 1789 - __perf_event_mark_enabled(event, ctx); 1785 + __perf_event_mark_enabled(event); 1790 1786 goto out; 1791 1787 } 1792 1788 ··· 1813 1809 out: 1814 1810 raw_spin_unlock_irq(&ctx->lock); 1815 1811 } 1812 + EXPORT_SYMBOL_GPL(perf_event_enable); 1816 1813 1817 1814 int perf_event_refresh(struct perf_event *event, int refresh) 1818 1815 { ··· 2332 2327 u64 interrupts, now; 2333 2328 s64 delta; 2334 2329 2330 + if (!ctx->nr_freq) 2331 + return; 2332 + 2335 2333 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2336 2334 if (event->state != PERF_EVENT_STATE_ACTIVE) 2337 2335 continue; ··· 2390 2382 { 2391 2383 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; 2392 2384 struct perf_event_context *ctx = NULL; 2393 - int rotate = 0, remove = 1; 2385 + int rotate = 0, remove = 1, freq = 0; 2394 2386 2395 2387 if (cpuctx->ctx.nr_events) { 2396 2388 remove = 0; 2397 2389 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 2398 2390 rotate = 1; 2391 + if (cpuctx->ctx.nr_freq) 2392 + freq = 1; 2399 2393 } 2400 2394 2401 2395 ctx = cpuctx->task_ctx; ··· 2405 2395 remove = 0; 2406 2396 if (ctx->nr_events != ctx->nr_active) 2407 2397 rotate = 1; 2398 + if (ctx->nr_freq) 2399 + freq = 1; 2408 2400 } 2401 + 2402 + if (!rotate && !freq) 2403 + goto done; 2409 2404 2410 2405 perf_ctx_lock(cpuctx, cpuctx->task_ctx); 2411 2406 perf_pmu_disable(cpuctx->ctx.pmu); 2412 - perf_ctx_adjust_freq(&cpuctx->ctx, interval); 2413 - if (ctx) 2414 - perf_ctx_adjust_freq(ctx, interval); 2415 2407 2416 - if (!rotate) 2417 - goto done; 2408 + if (freq) { 2409 + perf_ctx_adjust_freq(&cpuctx->ctx, interval); 2410 + if (ctx) 2411 + perf_ctx_adjust_freq(ctx, interval); 2412 + } 2418 2413 2419 - cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2420 - if (ctx) 2421 - ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); 2414 + if (rotate) { 2415 + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2416 + if (ctx) 2417 + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); 2422 2418 2423 - rotate_ctx(&cpuctx->ctx); 2424 - if (ctx) 2425 - rotate_ctx(ctx); 2419 + rotate_ctx(&cpuctx->ctx); 2420 + if (ctx) 2421 + rotate_ctx(ctx); 2426 2422 2427 - perf_event_sched_in(cpuctx, ctx, current); 2423 + perf_event_sched_in(cpuctx, ctx, current); 2424 + } 2425 + 2426 + perf_pmu_enable(cpuctx->ctx.pmu); 2427 + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 2428 2428 2429 2429 done: 2430 2430 if (remove) 2431 2431 list_del_init(&cpuctx->rotation_list); 2432 - 2433 - perf_pmu_enable(cpuctx->ctx.pmu); 2434 - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 2435 2432 } 2436 2433 2437 2434 void perf_event_task_tick(void) ··· 2465 2448 if (event->state >= PERF_EVENT_STATE_INACTIVE) 2466 2449 return 0; 2467 2450 2468 - __perf_event_mark_enabled(event, ctx); 2451 + __perf_event_mark_enabled(event); 2469 2452 2470 2453 return 1; 2471 2454 } ··· 2497 2480 raw_spin_lock(&ctx->lock); 2498 2481 task_ctx_sched_out(ctx); 2499 2482 2500 - list_for_each_entry(event, &ctx->pinned_groups, group_entry) { 2501 - ret = event_enable_on_exec(event, ctx); 2502 - if (ret) 2503 - enabled = 1; 2504 - } 2505 - 2506 - list_for_each_entry(event, &ctx->flexible_groups, group_entry) { 2483 + list_for_each_entry(event, &ctx->event_list, event_entry) { 2507 2484 ret = event_enable_on_exec(event, ctx); 2508 2485 if (ret) 2509 2486 enabled = 1; ··· 2582 2571 } 2583 2572 2584 2573 return perf_event_count(event); 2585 - } 2586 - 2587 - /* 2588 - * Callchain support 2589 - */ 2590 - 2591 - struct callchain_cpus_entries { 2592 - struct rcu_head rcu_head; 2593 - struct perf_callchain_entry *cpu_entries[0]; 2594 - }; 2595 - 2596 - static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); 2597 - static atomic_t nr_callchain_events; 2598 - static DEFINE_MUTEX(callchain_mutex); 2599 - struct callchain_cpus_entries *callchain_cpus_entries; 2600 - 2601 - 2602 - __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, 2603 - struct pt_regs *regs) 2604 - { 2605 - } 2606 - 2607 - __weak void perf_callchain_user(struct perf_callchain_entry *entry, 2608 - struct pt_regs *regs) 2609 - { 2610 - } 2611 - 2612 - static void release_callchain_buffers_rcu(struct rcu_head *head) 2613 - { 2614 - struct callchain_cpus_entries *entries; 2615 - int cpu; 2616 - 2617 - entries = container_of(head, struct callchain_cpus_entries, rcu_head); 2618 - 2619 - for_each_possible_cpu(cpu) 2620 - kfree(entries->cpu_entries[cpu]); 2621 - 2622 - kfree(entries); 2623 - } 2624 - 2625 - static void release_callchain_buffers(void) 2626 - { 2627 - struct callchain_cpus_entries *entries; 2628 - 2629 - entries = callchain_cpus_entries; 2630 - rcu_assign_pointer(callchain_cpus_entries, NULL); 2631 - call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); 2632 - } 2633 - 2634 - static int alloc_callchain_buffers(void) 2635 - { 2636 - int cpu; 2637 - int size; 2638 - struct callchain_cpus_entries *entries; 2639 - 2640 - /* 2641 - * We can't use the percpu allocation API for data that can be 2642 - * accessed from NMI. Use a temporary manual per cpu allocation 2643 - * until that gets sorted out. 2644 - */ 2645 - size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); 2646 - 2647 - entries = kzalloc(size, GFP_KERNEL); 2648 - if (!entries) 2649 - return -ENOMEM; 2650 - 2651 - size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; 2652 - 2653 - for_each_possible_cpu(cpu) { 2654 - entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, 2655 - cpu_to_node(cpu)); 2656 - if (!entries->cpu_entries[cpu]) 2657 - goto fail; 2658 - } 2659 - 2660 - rcu_assign_pointer(callchain_cpus_entries, entries); 2661 - 2662 - return 0; 2663 - 2664 - fail: 2665 - for_each_possible_cpu(cpu) 2666 - kfree(entries->cpu_entries[cpu]); 2667 - kfree(entries); 2668 - 2669 - return -ENOMEM; 2670 - } 2671 - 2672 - static int get_callchain_buffers(void) 2673 - { 2674 - int err = 0; 2675 - int count; 2676 - 2677 - mutex_lock(&callchain_mutex); 2678 - 2679 - count = atomic_inc_return(&nr_callchain_events); 2680 - if (WARN_ON_ONCE(count < 1)) { 2681 - err = -EINVAL; 2682 - goto exit; 2683 - } 2684 - 2685 - if (count > 1) { 2686 - /* If the allocation failed, give up */ 2687 - if (!callchain_cpus_entries) 2688 - err = -ENOMEM; 2689 - goto exit; 2690 - } 2691 - 2692 - err = alloc_callchain_buffers(); 2693 - if (err) 2694 - release_callchain_buffers(); 2695 - exit: 2696 - mutex_unlock(&callchain_mutex); 2697 - 2698 - return err; 2699 - } 2700 - 2701 - static void put_callchain_buffers(void) 2702 - { 2703 - if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { 2704 - release_callchain_buffers(); 2705 - mutex_unlock(&callchain_mutex); 2706 - } 2707 - } 2708 - 2709 - static int get_recursion_context(int *recursion) 2710 - { 2711 - int rctx; 2712 - 2713 - if (in_nmi()) 2714 - rctx = 3; 2715 - else if (in_irq()) 2716 - rctx = 2; 2717 - else if (in_softirq()) 2718 - rctx = 1; 2719 - else 2720 - rctx = 0; 2721 - 2722 - if (recursion[rctx]) 2723 - return -1; 2724 - 2725 - recursion[rctx]++; 2726 - barrier(); 2727 - 2728 - return rctx; 2729 - } 2730 - 2731 - static inline void put_recursion_context(int *recursion, int rctx) 2732 - { 2733 - barrier(); 2734 - recursion[rctx]--; 2735 - } 2736 - 2737 - static struct perf_callchain_entry *get_callchain_entry(int *rctx) 2738 - { 2739 - int cpu; 2740 - struct callchain_cpus_entries *entries; 2741 - 2742 - *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); 2743 - if (*rctx == -1) 2744 - return NULL; 2745 - 2746 - entries = rcu_dereference(callchain_cpus_entries); 2747 - if (!entries) 2748 - return NULL; 2749 - 2750 - cpu = smp_processor_id(); 2751 - 2752 - return &entries->cpu_entries[cpu][*rctx]; 2753 - } 2754 - 2755 - static void 2756 - put_callchain_entry(int rctx) 2757 - { 2758 - put_recursion_context(__get_cpu_var(callchain_recursion), rctx); 2759 - } 2760 - 2761 - static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) 2762 - { 2763 - int rctx; 2764 - struct perf_callchain_entry *entry; 2765 - 2766 - 2767 - entry = get_callchain_entry(&rctx); 2768 - if (rctx == -1) 2769 - return NULL; 2770 - 2771 - if (!entry) 2772 - goto exit_put; 2773 - 2774 - entry->nr = 0; 2775 - 2776 - if (!user_mode(regs)) { 2777 - perf_callchain_store(entry, PERF_CONTEXT_KERNEL); 2778 - perf_callchain_kernel(entry, regs); 2779 - if (current->mm) 2780 - regs = task_pt_regs(current); 2781 - else 2782 - regs = NULL; 2783 - } 2784 - 2785 - if (regs) { 2786 - perf_callchain_store(entry, PERF_CONTEXT_USER); 2787 - perf_callchain_user(entry, regs); 2788 - } 2789 - 2790 - exit_put: 2791 - put_callchain_entry(rctx); 2792 - 2793 - return entry; 2794 2574 } 2795 2575 2796 2576 /* ··· 2748 2946 2749 2947 if (!event->parent) { 2750 2948 if (event->attach_state & PERF_ATTACH_TASK) 2751 - jump_label_dec(&perf_sched_events); 2949 + jump_label_dec_deferred(&perf_sched_events); 2752 2950 if (event->attr.mmap || event->attr.mmap_data) 2753 2951 atomic_dec(&nr_mmap_events); 2754 2952 if (event->attr.comm) ··· 2759 2957 put_callchain_buffers(); 2760 2958 if (is_cgroup_event(event)) { 2761 2959 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); 2762 - jump_label_dec(&perf_sched_events); 2960 + jump_label_dec_deferred(&perf_sched_events); 2763 2961 } 2764 2962 } 2765 2963 ··· 4622 4820 struct hw_perf_event *hwc = &event->hw; 4623 4821 int throttle = 0; 4624 4822 4625 - data->period = event->hw.last_period; 4626 4823 if (!overflow) 4627 4824 overflow = perf_swevent_set_period(event); 4628 4825 ··· 4654 4853 4655 4854 if (!is_sampling_event(event)) 4656 4855 return; 4856 + 4857 + if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) { 4858 + data->period = nr; 4859 + return perf_swevent_overflow(event, 1, data, regs); 4860 + } else 4861 + data->period = event->hw.last_period; 4657 4862 4658 4863 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4659 4864 return perf_swevent_overflow(event, 1, data, regs); ··· 5788 5981 5789 5982 if (!event->parent) { 5790 5983 if (event->attach_state & PERF_ATTACH_TASK) 5791 - jump_label_inc(&perf_sched_events); 5984 + jump_label_inc(&perf_sched_events.key); 5792 5985 if (event->attr.mmap || event->attr.mmap_data) 5793 5986 atomic_inc(&nr_mmap_events); 5794 5987 if (event->attr.comm) ··· 6026 6219 * - that may need work on context switch 6027 6220 */ 6028 6221 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); 6029 - jump_label_inc(&perf_sched_events); 6222 + jump_label_inc(&perf_sched_events.key); 6030 6223 } 6031 6224 6032 6225 /* ··· 6872 7065 6873 7066 ret = init_hw_breakpoint(); 6874 7067 WARN(ret, "hw_breakpoint initialization failed with: %d", ret); 7068 + 7069 + /* do not patch jump label more than once per second */ 7070 + jump_label_rate_limit(&perf_sched_events, HZ); 6875 7071 } 6876 7072 6877 7073 static int __init perf_event_sysfs_init(void)
+38 -1
kernel/events/internal.h
··· 1 1 #ifndef _KERNEL_EVENTS_INTERNAL_H 2 2 #define _KERNEL_EVENTS_INTERNAL_H 3 3 4 + #include <linux/hardirq.h> 5 + 6 + /* Buffer handling */ 7 + 4 8 #define RING_BUFFER_WRITABLE 0x01 5 9 6 10 struct ring_buffer { ··· 71 67 } 72 68 #endif 73 69 74 - static unsigned long perf_data_size(struct ring_buffer *rb) 70 + static inline unsigned long perf_data_size(struct ring_buffer *rb) 75 71 { 76 72 return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); 77 73 } ··· 98 94 handle->size = PAGE_SIZE << page_order(rb); 99 95 } 100 96 } while (len); 97 + } 98 + 99 + /* Callchain handling */ 100 + extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); 101 + extern int get_callchain_buffers(void); 102 + extern void put_callchain_buffers(void); 103 + 104 + static inline int get_recursion_context(int *recursion) 105 + { 106 + int rctx; 107 + 108 + if (in_nmi()) 109 + rctx = 3; 110 + else if (in_irq()) 111 + rctx = 2; 112 + else if (in_softirq()) 113 + rctx = 1; 114 + else 115 + rctx = 0; 116 + 117 + if (recursion[rctx]) 118 + return -1; 119 + 120 + recursion[rctx]++; 121 + barrier(); 122 + 123 + return rctx; 124 + } 125 + 126 + static inline void put_recursion_context(int *recursion, int rctx) 127 + { 128 + barrier(); 129 + recursion[rctx]--; 101 130 } 102 131 103 132 #endif /* _KERNEL_EVENTS_INTERNAL_H */
+42 -7
kernel/jump_label.c
··· 72 72 jump_label_unlock(); 73 73 } 74 74 75 - void jump_label_dec(struct jump_label_key *key) 75 + static void __jump_label_dec(struct jump_label_key *key, 76 + unsigned long rate_limit, struct delayed_work *work) 76 77 { 77 78 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) 78 79 return; 79 80 80 - jump_label_update(key, JUMP_LABEL_DISABLE); 81 + if (rate_limit) { 82 + atomic_inc(&key->enabled); 83 + schedule_delayed_work(work, rate_limit); 84 + } else 85 + jump_label_update(key, JUMP_LABEL_DISABLE); 86 + 81 87 jump_label_unlock(); 88 + } 89 + 90 + static void jump_label_update_timeout(struct work_struct *work) 91 + { 92 + struct jump_label_key_deferred *key = 93 + container_of(work, struct jump_label_key_deferred, work.work); 94 + __jump_label_dec(&key->key, 0, NULL); 95 + } 96 + 97 + void jump_label_dec(struct jump_label_key *key) 98 + { 99 + __jump_label_dec(key, 0, NULL); 100 + } 101 + 102 + void jump_label_dec_deferred(struct jump_label_key_deferred *key) 103 + { 104 + __jump_label_dec(&key->key, key->timeout, &key->work); 105 + } 106 + 107 + 108 + void jump_label_rate_limit(struct jump_label_key_deferred *key, 109 + unsigned long rl) 110 + { 111 + key->timeout = rl; 112 + INIT_DELAYED_WORK(&key->work, jump_label_update_timeout); 82 113 } 83 114 84 115 static int addr_conflict(struct jump_entry *entry, void *start, void *end) ··· 142 111 * running code can override this to make the non-live update case 143 112 * cheaper. 144 113 */ 145 - void __weak arch_jump_label_transform_static(struct jump_entry *entry, 114 + void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry, 146 115 enum jump_label_type type) 147 116 { 148 117 arch_jump_label_transform(entry, type); ··· 248 217 if (iter_start == iter_stop) 249 218 return; 250 219 251 - for (iter = iter_start; iter < iter_stop; iter++) 252 - arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); 220 + for (iter = iter_start; iter < iter_stop; iter++) { 221 + struct jump_label_key *iterk; 222 + 223 + iterk = (struct jump_label_key *)(unsigned long)iter->key; 224 + arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? 225 + JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); 226 + } 253 227 } 254 228 255 229 static int jump_label_add_module(struct module *mod) ··· 294 258 key->next = jlm; 295 259 296 260 if (jump_label_enabled(key)) 297 - __jump_label_update(key, iter, iter_stop, 298 - JUMP_LABEL_ENABLE); 261 + __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); 299 262 } 300 263 301 264 return 0;
+13 -17
kernel/lockdep.c
··· 500 500 usage[i] = '\0'; 501 501 } 502 502 503 - static int __print_lock_name(struct lock_class *class) 503 + static void __print_lock_name(struct lock_class *class) 504 504 { 505 505 char str[KSYM_NAME_LEN]; 506 506 const char *name; 507 507 508 508 name = class->name; 509 - if (!name) 510 - name = __get_key_name(class->key, str); 511 - 512 - return printk("%s", name); 513 - } 514 - 515 - static void print_lock_name(struct lock_class *class) 516 - { 517 - char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS]; 518 - const char *name; 519 - 520 - get_usage_chars(class, usage); 521 - 522 - name = class->name; 523 509 if (!name) { 524 510 name = __get_key_name(class->key, str); 525 - printk(" (%s", name); 511 + printk("%s", name); 526 512 } else { 527 - printk(" (%s", name); 513 + printk("%s", name); 528 514 if (class->name_version > 1) 529 515 printk("#%d", class->name_version); 530 516 if (class->subclass) 531 517 printk("/%d", class->subclass); 532 518 } 519 + } 520 + 521 + static void print_lock_name(struct lock_class *class) 522 + { 523 + char usage[LOCK_USAGE_CHARS]; 524 + 525 + get_usage_chars(class, usage); 526 + 527 + printk(" ("); 528 + __print_lock_name(class); 533 529 printk("){%s}", usage); 534 530 } 535 531
+79 -26
kernel/trace/trace.c
··· 338 338 /* trace_flags holds trace_options default values */ 339 339 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 340 340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 341 - TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE; 341 + TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | 342 + TRACE_ITER_IRQ_INFO; 342 343 343 344 static int trace_stop_count; 344 345 static DEFINE_RAW_SPINLOCK(tracing_start_lock); ··· 427 426 "record-cmd", 428 427 "overwrite", 429 428 "disable_on_free", 429 + "irq-info", 430 430 NULL 431 431 }; 432 432 ··· 1845 1843 trace_event_read_unlock(); 1846 1844 } 1847 1845 1846 + static void 1847 + get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries) 1848 + { 1849 + unsigned long count; 1850 + int cpu; 1851 + 1852 + *total = 0; 1853 + *entries = 0; 1854 + 1855 + for_each_tracing_cpu(cpu) { 1856 + count = ring_buffer_entries_cpu(tr->buffer, cpu); 1857 + /* 1858 + * If this buffer has skipped entries, then we hold all 1859 + * entries for the trace and we need to ignore the 1860 + * ones before the time stamp. 1861 + */ 1862 + if (tr->data[cpu]->skipped_entries) { 1863 + count -= tr->data[cpu]->skipped_entries; 1864 + /* total is the same as the entries */ 1865 + *total += count; 1866 + } else 1867 + *total += count + 1868 + ring_buffer_overrun_cpu(tr->buffer, cpu); 1869 + *entries += count; 1870 + } 1871 + } 1872 + 1848 1873 static void print_lat_help_header(struct seq_file *m) 1849 1874 { 1850 1875 seq_puts(m, "# _------=> CPU# \n"); ··· 1884 1855 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1885 1856 } 1886 1857 1887 - static void print_func_help_header(struct seq_file *m) 1858 + static void print_event_info(struct trace_array *tr, struct seq_file *m) 1888 1859 { 1889 - seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1860 + unsigned long total; 1861 + unsigned long entries; 1862 + 1863 + get_total_entries(tr, &total, &entries); 1864 + seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 1865 + entries, total, num_online_cpus()); 1866 + seq_puts(m, "#\n"); 1867 + } 1868 + 1869 + static void print_func_help_header(struct trace_array *tr, struct seq_file *m) 1870 + { 1871 + print_event_info(tr, m); 1872 + seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1890 1873 seq_puts(m, "# | | | | |\n"); 1891 1874 } 1892 1875 1876 + static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m) 1877 + { 1878 + print_event_info(tr, m); 1879 + seq_puts(m, "# _-----=> irqs-off\n"); 1880 + seq_puts(m, "# / _----=> need-resched\n"); 1881 + seq_puts(m, "# | / _---=> hardirq/softirq\n"); 1882 + seq_puts(m, "# || / _--=> preempt-depth\n"); 1883 + seq_puts(m, "# ||| / delay\n"); 1884 + seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); 1885 + seq_puts(m, "# | | | |||| | |\n"); 1886 + } 1893 1887 1894 1888 void 1895 1889 print_trace_header(struct seq_file *m, struct trace_iterator *iter) ··· 1921 1869 struct trace_array *tr = iter->tr; 1922 1870 struct trace_array_cpu *data = tr->data[tr->cpu]; 1923 1871 struct tracer *type = current_trace; 1924 - unsigned long entries = 0; 1925 - unsigned long total = 0; 1926 - unsigned long count; 1872 + unsigned long entries; 1873 + unsigned long total; 1927 1874 const char *name = "preemption"; 1928 - int cpu; 1929 1875 1930 1876 if (type) 1931 1877 name = type->name; 1932 1878 1933 - 1934 - for_each_tracing_cpu(cpu) { 1935 - count = ring_buffer_entries_cpu(tr->buffer, cpu); 1936 - /* 1937 - * If this buffer has skipped entries, then we hold all 1938 - * entries for the trace and we need to ignore the 1939 - * ones before the time stamp. 1940 - */ 1941 - if (tr->data[cpu]->skipped_entries) { 1942 - count -= tr->data[cpu]->skipped_entries; 1943 - /* total is the same as the entries */ 1944 - total += count; 1945 - } else 1946 - total += count + 1947 - ring_buffer_overrun_cpu(tr->buffer, cpu); 1948 - entries += count; 1949 - } 1879 + get_total_entries(tr, &total, &entries); 1950 1880 1951 1881 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1952 1882 name, UTS_RELEASE); ··· 2174 2140 return print_trace_fmt(iter); 2175 2141 } 2176 2142 2143 + void trace_latency_header(struct seq_file *m) 2144 + { 2145 + struct trace_iterator *iter = m->private; 2146 + 2147 + /* print nothing if the buffers are empty */ 2148 + if (trace_empty(iter)) 2149 + return; 2150 + 2151 + if (iter->iter_flags & TRACE_FILE_LAT_FMT) 2152 + print_trace_header(m, iter); 2153 + 2154 + if (!(trace_flags & TRACE_ITER_VERBOSE)) 2155 + print_lat_help_header(m); 2156 + } 2157 + 2177 2158 void trace_default_header(struct seq_file *m) 2178 2159 { 2179 2160 struct trace_iterator *iter = m->private; ··· 2204 2155 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2205 2156 print_lat_help_header(m); 2206 2157 } else { 2207 - if (!(trace_flags & TRACE_ITER_VERBOSE)) 2208 - print_func_help_header(m); 2158 + if (!(trace_flags & TRACE_ITER_VERBOSE)) { 2159 + if (trace_flags & TRACE_ITER_IRQ_INFO) 2160 + print_func_help_header_irq(iter->tr, m); 2161 + else 2162 + print_func_help_header(iter->tr, m); 2163 + } 2209 2164 } 2210 2165 } 2211 2166
+2
kernel/trace/trace.h
··· 370 370 unsigned long ip, 371 371 unsigned long parent_ip, 372 372 unsigned long flags, int pc); 373 + void trace_latency_header(struct seq_file *m); 373 374 void trace_default_header(struct seq_file *m); 374 375 void print_trace_header(struct seq_file *m, struct trace_iterator *iter); 375 376 int trace_empty(struct trace_iterator *iter); ··· 655 654 TRACE_ITER_RECORD_CMD = 0x100000, 656 655 TRACE_ITER_OVERWRITE = 0x200000, 657 656 TRACE_ITER_STOP_ON_FREE = 0x400000, 657 + TRACE_ITER_IRQ_INFO = 0x800000, 658 658 }; 659 659 660 660 /*
+19 -7
kernel/trace/trace_events_filter.c
··· 27 27 #include "trace.h" 28 28 #include "trace_output.h" 29 29 30 + #define DEFAULT_SYS_FILTER_MESSAGE \ 31 + "### global filter ###\n" \ 32 + "# Use this to set filters for multiple events.\n" \ 33 + "# Only events with the given fields will be affected.\n" \ 34 + "# If no events are modified, an error message will be displayed here" 35 + 30 36 enum filter_op_ids 31 37 { 32 38 OP_OR, ··· 652 646 if (filter && filter->filter_string) 653 647 trace_seq_printf(s, "%s\n", filter->filter_string); 654 648 else 655 - trace_seq_printf(s, "none\n"); 649 + trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); 656 650 mutex_unlock(&event_mutex); 657 651 } 658 652 ··· 1844 1838 if (!filter) 1845 1839 goto out; 1846 1840 1847 - replace_filter_string(filter, filter_string); 1841 + /* System filters just show a default message */ 1842 + kfree(filter->filter_string); 1843 + filter->filter_string = NULL; 1844 + 1848 1845 /* 1849 1846 * No event actually uses the system filter 1850 1847 * we can free it without synchronize_sched(). ··· 1857 1848 1858 1849 parse_init(ps, filter_ops, filter_string); 1859 1850 err = filter_parse(ps); 1860 - if (err) { 1861 - append_filter_err(ps, system->filter); 1862 - goto out; 1863 - } 1851 + if (err) 1852 + goto err_filter; 1864 1853 1865 1854 err = replace_system_preds(system, ps, filter_string); 1866 1855 if (err) 1867 - append_filter_err(ps, system->filter); 1856 + goto err_filter; 1868 1857 1869 1858 out: 1870 1859 filter_opstack_clear(ps); ··· 1872 1865 mutex_unlock(&event_mutex); 1873 1866 1874 1867 return err; 1868 + 1869 + err_filter: 1870 + replace_filter_string(filter, filter_string); 1871 + append_filter_err(ps, system->filter); 1872 + goto out; 1875 1873 } 1876 1874 1877 1875 #ifdef CONFIG_PERF_EVENTS
+12 -1
kernel/trace/trace_irqsoff.c
··· 280 280 } 281 281 282 282 static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } 283 - static void irqsoff_print_header(struct seq_file *s) { } 284 283 static void irqsoff_trace_open(struct trace_iterator *iter) { } 285 284 static void irqsoff_trace_close(struct trace_iterator *iter) { } 285 + 286 + #ifdef CONFIG_FUNCTION_TRACER 287 + static void irqsoff_print_header(struct seq_file *s) 288 + { 289 + trace_default_header(s); 290 + } 291 + #else 292 + static void irqsoff_print_header(struct seq_file *s) 293 + { 294 + trace_latency_header(s); 295 + } 296 + #endif /* CONFIG_FUNCTION_TRACER */ 286 297 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 287 298 288 299 /*
+14 -2
kernel/trace/trace_output.c
··· 627 627 unsigned long usec_rem = do_div(t, USEC_PER_SEC); 628 628 unsigned long secs = (unsigned long)t; 629 629 char comm[TASK_COMM_LEN]; 630 + int ret; 630 631 631 632 trace_find_cmdline(entry->pid, comm); 632 633 633 - return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ", 634 - comm, entry->pid, iter->cpu, secs, usec_rem); 634 + ret = trace_seq_printf(s, "%16s-%-5d [%03d] ", 635 + comm, entry->pid, iter->cpu); 636 + if (!ret) 637 + return 0; 638 + 639 + if (trace_flags & TRACE_ITER_IRQ_INFO) { 640 + ret = trace_print_lat_fmt(s, entry); 641 + if (!ret) 642 + return 0; 643 + } 644 + 645 + return trace_seq_printf(s, " %5lu.%06lu: ", 646 + secs, usec_rem); 635 647 } 636 648 637 649 int trace_print_lat_context(struct trace_iterator *iter)
+12 -1
kernel/trace/trace_sched_wakeup.c
··· 280 280 } 281 281 282 282 static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } 283 - static void wakeup_print_header(struct seq_file *s) { } 284 283 static void wakeup_trace_open(struct trace_iterator *iter) { } 285 284 static void wakeup_trace_close(struct trace_iterator *iter) { } 285 + 286 + #ifdef CONFIG_FUNCTION_TRACER 287 + static void wakeup_print_header(struct seq_file *s) 288 + { 289 + trace_default_header(s); 290 + } 291 + #else 292 + static void wakeup_print_header(struct seq_file *s) 293 + { 294 + trace_latency_header(s); 295 + } 296 + #endif /* CONFIG_FUNCTION_TRACER */ 286 297 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 287 298 288 299 /*
+2 -2
tools/perf/Documentation/perf-annotate.txt
··· 22 22 ------- 23 23 -i:: 24 24 --input=:: 25 - Input file name. (default: perf.data) 25 + Input file name. (default: perf.data unless stdin is a fifo) 26 26 27 27 -d:: 28 28 --dsos=<dso[,dso...]>:: ··· 66 66 used. This interfaces starts by centering on the line with more 67 67 samples, TAB/UNTAB cycles through the lines with more samples. 68 68 69 - -c:: 69 + -C:: 70 70 --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 71 71 be provided as a comma-separated list with no space: 0,1. Ranges of 72 72 CPUs are specified with -: 0-2. Default is to report samples on all
+1 -1
tools/perf/Documentation/perf-buildid-list.txt
··· 26 26 Show only DSOs with hits. 27 27 -i:: 28 28 --input=:: 29 - Input file name. (default: perf.data) 29 + Input file name. (default: perf.data unless stdin is a fifo) 30 30 -f:: 31 31 --force:: 32 32 Don't do ownership validation.
+1 -1
tools/perf/Documentation/perf-evlist.txt
··· 18 18 ------- 19 19 -i:: 20 20 --input=:: 21 - Input file name. (default: perf.data) 21 + Input file name. (default: perf.data unless stdin is a fifo) 22 22 23 23 SEE ALSO 24 24 --------
+1 -1
tools/perf/Documentation/perf-kmem.txt
··· 23 23 ------- 24 24 -i <file>:: 25 25 --input=<file>:: 26 - Select the input file (default: perf.data) 26 + Select the input file (default: perf.data unless stdin is a fifo) 27 27 28 28 --caller:: 29 29 Show per-callsite statistics
+1 -1
tools/perf/Documentation/perf-lock.txt
··· 29 29 30 30 -i:: 31 31 --input=<file>:: 32 - Input file name. 32 + Input file name. (default: perf.data unless stdin is a fifo) 33 33 34 34 -v:: 35 35 --verbose::
+1 -1
tools/perf/Documentation/perf-record.txt
··· 89 89 90 90 -m:: 91 91 --mmap-pages=:: 92 - Number of mmap data pages. 92 + Number of mmap data pages. Must be a power of two. 93 93 94 94 -g:: 95 95 --call-graph::
+6 -5
tools/perf/Documentation/perf-report.txt
··· 19 19 ------- 20 20 -i:: 21 21 --input=:: 22 - Input file name. (default: perf.data) 22 + Input file name. (default: perf.data unless stdin is a fifo) 23 23 24 24 -v:: 25 25 --verbose:: ··· 39 39 -T:: 40 40 --threads:: 41 41 Show per-thread event counters 42 - -C:: 42 + -c:: 43 43 --comms=:: 44 44 Only consider symbols in these comms. CSV that understands 45 45 file://filename entries. ··· 80 80 --dump-raw-trace:: 81 81 Dump raw trace in ASCII. 82 82 83 - -g [type,min,order]:: 83 + -g [type,min[,limit],order]:: 84 84 --call-graph:: 85 - Display call chains using type, min percent threshold and order. 85 + Display call chains using type, min percent threshold, optional print 86 + limit and order. 86 87 type can be either: 87 88 - flat: single column, linear exposure of call chains. 88 89 - graph: use a graph tree, displaying absolute overhead rates. ··· 129 128 --symfs=<directory>:: 130 129 Look for files with symbols relative to this directory. 131 130 132 - -c:: 131 + -C:: 133 132 --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 134 133 be provided as a comma-separated list with no space: 0,1. Ranges of 135 134 CPUs are specified with -: 0-2. Default is to report samples on all
+1 -1
tools/perf/Documentation/perf-sched.txt
··· 40 40 ------- 41 41 -i:: 42 42 --input=<file>:: 43 - Input file name. (default: perf.data) 43 + Input file name. (default: perf.data unless stdin is a fifo) 44 44 45 45 -v:: 46 46 --verbose::
+7 -2
tools/perf/Documentation/perf-script.txt
··· 106 106 107 107 -i:: 108 108 --input=:: 109 - Input file name. 109 + Input file name. (default: perf.data unless stdin is a fifo) 110 110 111 111 -d:: 112 112 --debug-mode:: ··· 182 182 --hide-call-graph:: 183 183 When printing symbols do not display call chain. 184 184 185 - -c:: 185 + -C:: 186 186 --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 187 187 be provided as a comma-separated list with no space: 0,1. Ranges of 188 188 CPUs are specified with -: 0-2. Default is to report samples on all 189 189 CPUs. 190 + 191 + -c:: 192 + --comms=:: 193 + Only display events for these comms. CSV that understands 194 + file://filename entries. 190 195 191 196 -I:: 192 197 --show-info::
+7 -1
tools/perf/Documentation/perf-test.txt
··· 8 8 SYNOPSIS 9 9 -------- 10 10 [verse] 11 - 'perf test <options>' 11 + 'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]' 12 12 13 13 DESCRIPTION 14 14 ----------- 15 15 This command does assorted sanity tests, initially through linked routines but 16 16 also will look for a directory with more tests in the form of scripts. 17 + 18 + To get a list of available tests use 'perf test list', specifying a test name 19 + fragment will show all tests that have it. 20 + 21 + To run just specific tests, inform test name fragments or the numbers obtained 22 + from 'perf test list'. 17 23 18 24 OPTIONS 19 25 -------
+1 -1
tools/perf/Documentation/perf-timechart.txt
··· 27 27 Select the output file (default: output.svg) 28 28 -i:: 29 29 --input=:: 30 - Select the input file (default: perf.data) 30 + Select the input file (default: perf.data unless stdin is a fifo) 31 31 -w:: 32 32 --width=:: 33 33 Select the width of the SVG file (default: 1000)
+1
tools/perf/Makefile
··· 278 278 LIB_H += util/strlist.h 279 279 LIB_H += util/strfilter.h 280 280 LIB_H += util/svghelper.h 281 + LIB_H += util/tool.h 281 282 LIB_H += util/run-command.h 282 283 LIB_H += util/sigchain.h 283 284 LIB_H += util/symbol.h
+67 -65
tools/perf/builtin-annotate.c
··· 27 27 #include "util/sort.h" 28 28 #include "util/hist.h" 29 29 #include "util/session.h" 30 + #include "util/tool.h" 30 31 31 32 #include <linux/bitmap.h> 32 33 33 - static char const *input_name = "perf.data"; 34 + struct perf_annotate { 35 + struct perf_tool tool; 36 + char const *input_name; 37 + bool force, use_tui, use_stdio; 38 + bool full_paths; 39 + bool print_line; 40 + const char *sym_hist_filter; 41 + const char *cpu_list; 42 + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 43 + }; 34 44 35 - static bool force, use_tui, use_stdio; 36 - 37 - static bool full_paths; 38 - 39 - static bool print_line; 40 - 41 - static const char *sym_hist_filter; 42 - 43 - static const char *cpu_list; 44 - static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 45 - 46 - static int perf_evlist__add_sample(struct perf_evlist *evlist, 47 - struct perf_sample *sample, 48 - struct perf_evsel *evsel, 49 - struct addr_location *al) 45 + static int perf_evsel__add_sample(struct perf_evsel *evsel, 46 + struct perf_sample *sample, 47 + struct addr_location *al, 48 + struct perf_annotate *ann) 50 49 { 51 50 struct hist_entry *he; 52 51 int ret; 53 52 54 - if (sym_hist_filter != NULL && 55 - (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { 53 + if (ann->sym_hist_filter != NULL && 54 + (al->sym == NULL || 55 + strcmp(ann->sym_hist_filter, al->sym->name) != 0)) { 56 56 /* We're only interested in a symbol named sym_hist_filter */ 57 57 if (al->sym != NULL) { 58 58 rb_erase(&al->sym->rb_node, ··· 69 69 ret = 0; 70 70 if (he->ms.sym != NULL) { 71 71 struct annotation *notes = symbol__annotation(he->ms.sym); 72 - if (notes->src == NULL && 73 - symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0) 72 + if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0) 74 73 return -ENOMEM; 75 74 76 75 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); ··· 80 81 return ret; 81 82 } 82 83 83 - static int process_sample_event(union perf_event *event, 84 + static int process_sample_event(struct perf_tool *tool, 85 + union perf_event *event, 84 86 struct perf_sample *sample, 85 87 struct perf_evsel *evsel, 86 - struct perf_session *session) 88 + struct machine *machine) 87 89 { 90 + struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); 88 91 struct addr_location al; 89 92 90 - if (perf_event__preprocess_sample(event, session, &al, sample, 93 + if (perf_event__preprocess_sample(event, machine, &al, sample, 91 94 symbol__annotate_init) < 0) { 92 95 pr_warning("problem processing %d event, skipping it.\n", 93 96 event->header.type); 94 97 return -1; 95 98 } 96 99 97 - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 100 + if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) 98 101 return 0; 99 102 100 - if (!al.filtered && 101 - perf_evlist__add_sample(session->evlist, sample, evsel, &al)) { 103 + if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) { 102 104 pr_warning("problem incrementing symbol count, " 103 105 "skipping event\n"); 104 106 return -1; ··· 108 108 return 0; 109 109 } 110 110 111 - static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) 111 + static int hist_entry__tty_annotate(struct hist_entry *he, int evidx, 112 + struct perf_annotate *ann) 112 113 { 113 114 return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx, 114 - print_line, full_paths, 0, 0); 115 + ann->print_line, ann->full_paths, 0, 0); 115 116 } 116 117 117 118 static void hists__find_annotations(struct hists *self, int evidx, 118 - int nr_events) 119 + struct perf_annotate *ann) 119 120 { 120 121 struct rb_node *nd = rb_first(&self->entries), *next; 121 122 int key = K_RIGHT; ··· 139 138 } 140 139 141 140 if (use_browser > 0) { 142 - key = hist_entry__tui_annotate(he, evidx, nr_events, 143 - NULL, NULL, 0); 141 + key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0); 144 142 switch (key) { 145 143 case K_RIGHT: 146 144 next = rb_next(nd); ··· 154 154 if (next != NULL) 155 155 nd = next; 156 156 } else { 157 - hist_entry__tty_annotate(he, evidx); 157 + hist_entry__tty_annotate(he, evidx, ann); 158 158 nd = rb_next(nd); 159 159 /* 160 160 * Since we have a hist_entry per IP for the same ··· 167 167 } 168 168 } 169 169 170 - static struct perf_event_ops event_ops = { 171 - .sample = process_sample_event, 172 - .mmap = perf_event__process_mmap, 173 - .comm = perf_event__process_comm, 174 - .fork = perf_event__process_task, 175 - .ordered_samples = true, 176 - .ordering_requires_timestamps = true, 177 - }; 178 - 179 - static int __cmd_annotate(void) 170 + static int __cmd_annotate(struct perf_annotate *ann) 180 171 { 181 172 int ret; 182 173 struct perf_session *session; 183 174 struct perf_evsel *pos; 184 175 u64 total_nr_samples; 185 176 186 - session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 177 + session = perf_session__new(ann->input_name, O_RDONLY, 178 + ann->force, false, &ann->tool); 187 179 if (session == NULL) 188 180 return -ENOMEM; 189 181 190 - if (cpu_list) { 191 - ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 182 + if (ann->cpu_list) { 183 + ret = perf_session__cpu_bitmap(session, ann->cpu_list, 184 + ann->cpu_bitmap); 192 185 if (ret) 193 186 goto out_delete; 194 187 } 195 188 196 - ret = perf_session__process_events(session, &event_ops); 189 + ret = perf_session__process_events(session, &ann->tool); 197 190 if (ret) 198 191 goto out_delete; 199 192 ··· 210 217 total_nr_samples += nr_samples; 211 218 hists__collapse_resort(hists); 212 219 hists__output_resort(hists); 213 - hists__find_annotations(hists, pos->idx, 214 - session->evlist->nr_entries); 220 + hists__find_annotations(hists, pos->idx, ann); 215 221 } 216 222 } 217 223 218 224 if (total_nr_samples == 0) { 219 - ui__warning("The %s file has no samples!\n", input_name); 225 + ui__warning("The %s file has no samples!\n", session->filename); 220 226 goto out_delete; 221 227 } 222 228 out_delete: ··· 239 247 NULL 240 248 }; 241 249 242 - static const struct option options[] = { 243 - OPT_STRING('i', "input", &input_name, "file", 250 + int cmd_annotate(int argc, const char **argv, const char *prefix __used) 251 + { 252 + struct perf_annotate annotate = { 253 + .tool = { 254 + .sample = process_sample_event, 255 + .mmap = perf_event__process_mmap, 256 + .comm = perf_event__process_comm, 257 + .fork = perf_event__process_task, 258 + .ordered_samples = true, 259 + .ordering_requires_timestamps = true, 260 + }, 261 + }; 262 + const struct option options[] = { 263 + OPT_STRING('i', "input", &annotate.input_name, "file", 244 264 "input file name"), 245 265 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 246 266 "only consider symbols in these dsos"), 247 - OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", 267 + OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol", 248 268 "symbol to annotate"), 249 - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 269 + OPT_BOOLEAN('f', "force", &annotate.force, "don't complain, do it"), 250 270 OPT_INCR('v', "verbose", &verbose, 251 271 "be more verbose (show symbol address, etc)"), 252 272 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 253 273 "dump raw trace in ASCII"), 254 - OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 255 - OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 274 + OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"), 275 + OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"), 256 276 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 257 277 "file", "vmlinux pathname"), 258 278 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 259 279 "load module symbols - WARNING: use only with -k and LIVE kernel"), 260 - OPT_BOOLEAN('l', "print-line", &print_line, 280 + OPT_BOOLEAN('l', "print-line", &annotate.print_line, 261 281 "print matching source lines (may be slow)"), 262 - OPT_BOOLEAN('P', "full-paths", &full_paths, 282 + OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, 263 283 "Don't shorten the displayed pathnames"), 264 - OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 284 + OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), 265 285 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 266 286 "Look for files with symbols relative to this directory"), 267 287 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, ··· 283 279 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 284 280 "Specify disassembler style (e.g. -M intel for intel syntax)"), 285 281 OPT_END() 286 - }; 282 + }; 287 283 288 - int cmd_annotate(int argc, const char **argv, const char *prefix __used) 289 - { 290 284 argc = parse_options(argc, argv, options, annotate_usage, 0); 291 285 292 - if (use_stdio) 286 + if (annotate.use_stdio) 293 287 use_browser = 0; 294 - else if (use_tui) 288 + else if (annotate.use_tui) 295 289 use_browser = 1; 296 290 297 291 setup_browser(true); ··· 310 308 if (argc > 1) 311 309 usage_with_options(annotate_usage, options); 312 310 313 - sym_hist_filter = argv[0]; 311 + annotate.sym_hist_filter = argv[0]; 314 312 } 315 313 316 314 if (field_sep && *field_sep == '.') { ··· 318 316 return -1; 319 317 } 320 318 321 - return __cmd_annotate(); 319 + return __cmd_annotate(&annotate); 322 320 }
+27 -26
tools/perf/builtin-buildid-list.c
··· 18 18 19 19 #include <libelf.h> 20 20 21 - static char const *input_name = "perf.data"; 21 + static const char *input_name; 22 22 static bool force; 23 23 static bool show_kernel; 24 24 static bool with_hits; ··· 38 38 "be more verbose"), 39 39 OPT_END() 40 40 }; 41 - 42 - static int perf_session__list_build_ids(void) 43 - { 44 - struct perf_session *session; 45 - 46 - session = perf_session__new(input_name, O_RDONLY, force, false, 47 - &build_id__mark_dso_hit_ops); 48 - if (session == NULL) 49 - return -1; 50 - 51 - if (with_hits) 52 - perf_session__process_events(session, &build_id__mark_dso_hit_ops); 53 - 54 - perf_session__fprintf_dsos_buildid(session, stdout, with_hits); 55 - 56 - perf_session__delete(session); 57 - return 0; 58 - } 59 41 60 42 static int sysfs__fprintf_build_id(FILE *fp) 61 43 { ··· 67 85 return fprintf(fp, "%s\n", sbuild_id); 68 86 } 69 87 88 + static int perf_session__list_build_ids(void) 89 + { 90 + struct perf_session *session; 91 + 92 + elf_version(EV_CURRENT); 93 + 94 + session = perf_session__new(input_name, O_RDONLY, force, false, 95 + &build_id__mark_dso_hit_ops); 96 + if (session == NULL) 97 + return -1; 98 + 99 + /* 100 + * See if this is an ELF file first: 101 + */ 102 + if (filename__fprintf_build_id(session->filename, stdout)) 103 + goto out; 104 + 105 + if (with_hits) 106 + perf_session__process_events(session, &build_id__mark_dso_hit_ops); 107 + 108 + perf_session__fprintf_dsos_buildid(session, stdout, with_hits); 109 + out: 110 + perf_session__delete(session); 111 + return 0; 112 + } 113 + 70 114 static int __cmd_buildid_list(void) 71 115 { 72 116 if (show_kernel) 73 117 return sysfs__fprintf_build_id(stdout); 74 - 75 - elf_version(EV_CURRENT); 76 - /* 77 - * See if this is an ELF file first: 78 - */ 79 - if (filename__fprintf_build_id(input_name, stdout)) 80 - return 0; 81 118 82 119 return perf_session__list_build_ids(); 83 120 }
+12 -9
tools/perf/builtin-diff.c
··· 9 9 #include "util/debug.h" 10 10 #include "util/event.h" 11 11 #include "util/hist.h" 12 + #include "util/evsel.h" 12 13 #include "util/session.h" 14 + #include "util/tool.h" 13 15 #include "util/sort.h" 14 16 #include "util/symbol.h" 15 17 #include "util/util.h" ··· 32 30 return -ENOMEM; 33 31 } 34 32 35 - static int diff__process_sample_event(union perf_event *event, 33 + static int diff__process_sample_event(struct perf_tool *tool __used, 34 + union perf_event *event, 36 35 struct perf_sample *sample, 37 36 struct perf_evsel *evsel __used, 38 - struct perf_session *session) 37 + struct machine *machine) 39 38 { 40 39 struct addr_location al; 41 40 42 - if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 41 + if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) { 43 42 pr_warning("problem processing %d event, skipping it.\n", 44 43 event->header.type); 45 44 return -1; ··· 49 46 if (al.filtered || al.sym == NULL) 50 47 return 0; 51 48 52 - if (hists__add_entry(&session->hists, &al, sample->period)) { 49 + if (hists__add_entry(&evsel->hists, &al, sample->period)) { 53 50 pr_warning("problem incrementing symbol period, skipping event\n"); 54 51 return -1; 55 52 } 56 53 57 - session->hists.stats.total_period += sample->period; 54 + evsel->hists.stats.total_period += sample->period; 58 55 return 0; 59 56 } 60 57 61 - static struct perf_event_ops event_ops = { 58 + static struct perf_tool perf_diff = { 62 59 .sample = diff__process_sample_event, 63 60 .mmap = perf_event__process_mmap, 64 61 .comm = perf_event__process_comm, ··· 148 145 int ret, i; 149 146 struct perf_session *session[2]; 150 147 151 - session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops); 152 - session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops); 148 + session[0] = perf_session__new(input_old, O_RDONLY, force, false, &perf_diff); 149 + session[1] = perf_session__new(input_new, O_RDONLY, force, false, &perf_diff); 153 150 if (session[0] == NULL || session[1] == NULL) 154 151 return -ENOMEM; 155 152 156 153 for (i = 0; i < 2; ++i) { 157 - ret = perf_session__process_events(session[i], &event_ops); 154 + ret = perf_session__process_events(session[i], &perf_diff); 158 155 if (ret) 159 156 goto out_delete; 160 157 }
+1 -1
tools/perf/builtin-evlist.c
··· 15 15 #include "util/parse-options.h" 16 16 #include "util/session.h" 17 17 18 - static char const *input_name = "perf.data"; 18 + static const char *input_name; 19 19 20 20 static int __cmd_evlist(void) 21 21 {
+73 -47
tools/perf/builtin-inject.c
··· 9 9 10 10 #include "perf.h" 11 11 #include "util/session.h" 12 + #include "util/tool.h" 12 13 #include "util/debug.h" 13 14 14 15 #include "util/parse-options.h" ··· 17 16 static char const *input_name = "-"; 18 17 static bool inject_build_ids; 19 18 20 - static int perf_event__repipe_synth(union perf_event *event, 21 - struct perf_session *session __used) 19 + static int perf_event__repipe_synth(struct perf_tool *tool __used, 20 + union perf_event *event, 21 + struct machine *machine __used) 22 22 { 23 23 uint32_t size; 24 24 void *buf = event; ··· 38 36 return 0; 39 37 } 40 38 41 - static int perf_event__repipe(union perf_event *event, 42 - struct perf_sample *sample __used, 43 - struct perf_session *session) 39 + static int perf_event__repipe_op2_synth(struct perf_tool *tool, 40 + union perf_event *event, 41 + struct perf_session *session __used) 44 42 { 45 - return perf_event__repipe_synth(event, session); 43 + return perf_event__repipe_synth(tool, event, NULL); 46 44 } 47 45 48 - static int perf_event__repipe_sample(union perf_event *event, 46 + static int perf_event__repipe_event_type_synth(struct perf_tool *tool, 47 + union perf_event *event) 48 + { 49 + return perf_event__repipe_synth(tool, event, NULL); 50 + } 51 + 52 + static int perf_event__repipe_tracing_data_synth(union perf_event *event, 53 + struct perf_session *session __used) 54 + { 55 + return perf_event__repipe_synth(NULL, event, NULL); 56 + } 57 + 58 + static int perf_event__repipe_attr(union perf_event *event, 59 + struct perf_evlist **pevlist __used) 60 + { 61 + return perf_event__repipe_synth(NULL, event, NULL); 62 + } 63 + 64 + static int perf_event__repipe(struct perf_tool *tool, 65 + union perf_event *event, 66 + struct perf_sample *sample __used, 67 + struct machine *machine) 68 + { 69 + return perf_event__repipe_synth(tool, event, machine); 70 + } 71 + 72 + static int perf_event__repipe_sample(struct perf_tool *tool, 73 + union perf_event *event, 49 74 struct perf_sample *sample __used, 50 75 struct perf_evsel *evsel __used, 51 - struct perf_session *session) 76 + struct machine *machine) 52 77 { 53 - return perf_event__repipe_synth(event, session); 78 + return perf_event__repipe_synth(tool, event, machine); 54 79 } 55 80 56 - static int perf_event__repipe_mmap(union perf_event *event, 81 + static int perf_event__repipe_mmap(struct perf_tool *tool, 82 + union perf_event *event, 57 83 struct perf_sample *sample, 58 - struct perf_session *session) 84 + struct machine *machine) 59 85 { 60 86 int err; 61 87 62 - err = perf_event__process_mmap(event, sample, session); 63 - perf_event__repipe(event, sample, session); 88 + err = perf_event__process_mmap(tool, event, sample, machine); 89 + perf_event__repipe(tool, event, sample, machine); 64 90 65 91 return err; 66 92 } 67 93 68 - static int perf_event__repipe_task(union perf_event *event, 94 + static int perf_event__repipe_task(struct perf_tool *tool, 95 + union perf_event *event, 69 96 struct perf_sample *sample, 70 - struct perf_session *session) 97 + struct machine *machine) 71 98 { 72 99 int err; 73 100 74 - err = perf_event__process_task(event, sample, session); 75 - perf_event__repipe(event, sample, session); 101 + err = perf_event__process_task(tool, event, sample, machine); 102 + perf_event__repipe(tool, event, sample, machine); 76 103 77 104 return err; 78 105 } ··· 111 80 { 112 81 int err; 113 82 114 - perf_event__repipe_synth(event, session); 83 + perf_event__repipe_synth(NULL, event, NULL); 115 84 err = perf_event__process_tracing_data(event, session); 116 85 117 86 return err; ··· 131 100 return -1; 132 101 } 133 102 134 - static int dso__inject_build_id(struct dso *self, struct perf_session *session) 103 + static int dso__inject_build_id(struct dso *self, struct perf_tool *tool, 104 + struct machine *machine) 135 105 { 136 106 u16 misc = PERF_RECORD_MISC_USER; 137 - struct machine *machine; 138 107 int err; 139 108 140 109 if (dso__read_build_id(self) < 0) { ··· 142 111 return -1; 143 112 } 144 113 145 - machine = perf_session__find_host_machine(session); 146 - if (machine == NULL) { 147 - pr_err("Can't find machine for session\n"); 148 - return -1; 149 - } 150 - 151 114 if (self->kernel) 152 115 misc = PERF_RECORD_MISC_KERNEL; 153 116 154 - err = perf_event__synthesize_build_id(self, misc, perf_event__repipe, 155 - machine, session); 117 + err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe, 118 + machine); 156 119 if (err) { 157 120 pr_err("Can't synthesize build_id event for %s\n", self->long_name); 158 121 return -1; ··· 155 130 return 0; 156 131 } 157 132 158 - static int perf_event__inject_buildid(union perf_event *event, 133 + static int perf_event__inject_buildid(struct perf_tool *tool, 134 + union perf_event *event, 159 135 struct perf_sample *sample, 160 136 struct perf_evsel *evsel __used, 161 - struct perf_session *session) 137 + struct machine *machine) 162 138 { 163 139 struct addr_location al; 164 140 struct thread *thread; ··· 167 141 168 142 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 169 143 170 - thread = perf_session__findnew(session, event->ip.pid); 144 + thread = machine__findnew_thread(machine, event->ip.pid); 171 145 if (thread == NULL) { 172 146 pr_err("problem processing %d event, skipping it.\n", 173 147 event->header.type); 174 148 goto repipe; 175 149 } 176 150 177 - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 178 - event->ip.pid, event->ip.ip, &al); 151 + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, 152 + event->ip.ip, &al); 179 153 180 154 if (al.map != NULL) { 181 155 if (!al.map->dso->hit) { 182 156 al.map->dso->hit = 1; 183 157 if (map__load(al.map, NULL) >= 0) { 184 - dso__inject_build_id(al.map->dso, session); 158 + dso__inject_build_id(al.map->dso, tool, machine); 185 159 /* 186 160 * If this fails, too bad, let the other side 187 161 * account this as unresolved. ··· 194 168 } 195 169 196 170 repipe: 197 - perf_event__repipe(event, sample, session); 171 + perf_event__repipe(tool, event, sample, machine); 198 172 return 0; 199 173 } 200 174 201 - struct perf_event_ops inject_ops = { 175 + struct perf_tool perf_inject = { 202 176 .sample = perf_event__repipe_sample, 203 177 .mmap = perf_event__repipe, 204 178 .comm = perf_event__repipe, 205 179 .fork = perf_event__repipe, 206 180 .exit = perf_event__repipe, 207 181 .lost = perf_event__repipe, 208 - .read = perf_event__repipe, 182 + .read = perf_event__repipe_sample, 209 183 .throttle = perf_event__repipe, 210 184 .unthrottle = perf_event__repipe, 211 - .attr = perf_event__repipe_synth, 212 - .event_type = perf_event__repipe_synth, 213 - .tracing_data = perf_event__repipe_synth, 214 - .build_id = perf_event__repipe_synth, 185 + .attr = perf_event__repipe_attr, 186 + .event_type = perf_event__repipe_event_type_synth, 187 + .tracing_data = perf_event__repipe_tracing_data_synth, 188 + .build_id = perf_event__repipe_op2_synth, 215 189 }; 216 190 217 191 extern volatile int session_done; ··· 229 203 signal(SIGINT, sig_handler); 230 204 231 205 if (inject_build_ids) { 232 - inject_ops.sample = perf_event__inject_buildid; 233 - inject_ops.mmap = perf_event__repipe_mmap; 234 - inject_ops.fork = perf_event__repipe_task; 235 - inject_ops.tracing_data = perf_event__repipe_tracing_data; 206 + perf_inject.sample = perf_event__inject_buildid; 207 + perf_inject.mmap = perf_event__repipe_mmap; 208 + perf_inject.fork = perf_event__repipe_task; 209 + perf_inject.tracing_data = perf_event__repipe_tracing_data; 236 210 } 237 211 238 - session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); 212 + session = perf_session__new(input_name, O_RDONLY, false, true, &perf_inject); 239 213 if (session == NULL) 240 214 return -ENOMEM; 241 215 242 - ret = perf_session__process_events(session, &inject_ops); 216 + ret = perf_session__process_events(session, &perf_inject); 243 217 244 218 perf_session__delete(session); 245 219
+9 -7
tools/perf/builtin-kmem.c
··· 7 7 #include "util/thread.h" 8 8 #include "util/header.h" 9 9 #include "util/session.h" 10 + #include "util/tool.h" 10 11 11 12 #include "util/parse-options.h" 12 13 #include "util/trace-event.h" ··· 19 18 struct alloc_stat; 20 19 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 21 20 22 - static char const *input_name = "perf.data"; 21 + static const char *input_name; 23 22 24 23 static int alloc_flag; 25 24 static int caller_flag; ··· 304 303 } 305 304 } 306 305 307 - static int process_sample_event(union perf_event *event, 306 + static int process_sample_event(struct perf_tool *tool __used, 307 + union perf_event *event, 308 308 struct perf_sample *sample, 309 309 struct perf_evsel *evsel __used, 310 - struct perf_session *session) 310 + struct machine *machine) 311 311 { 312 - struct thread *thread = perf_session__findnew(session, event->ip.pid); 312 + struct thread *thread = machine__findnew_thread(machine, event->ip.pid); 313 313 314 314 if (thread == NULL) { 315 315 pr_debug("problem processing %d event, skipping it.\n", ··· 326 324 return 0; 327 325 } 328 326 329 - static struct perf_event_ops event_ops = { 327 + static struct perf_tool perf_kmem = { 330 328 .sample = process_sample_event, 331 329 .comm = perf_event__process_comm, 332 330 .ordered_samples = true, ··· 485 483 { 486 484 int err = -EINVAL; 487 485 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 488 - 0, false, &event_ops); 486 + 0, false, &perf_kmem); 489 487 if (session == NULL) 490 488 return -ENOMEM; 491 489 ··· 496 494 goto out_delete; 497 495 498 496 setup_pager(); 499 - err = perf_session__process_events(session, &event_ops); 497 + err = perf_session__process_events(session, &perf_kmem); 500 498 if (err != 0) 501 499 goto out_delete; 502 500 sort_result();
+1 -1
tools/perf/builtin-kvm.c
··· 38 38 OPT_BOOLEAN(0, "guest", &perf_guest, 39 39 "Collect guest os data"), 40 40 OPT_BOOLEAN(0, "host", &perf_host, 41 - "Collect guest os data"), 41 + "Collect host os data"), 42 42 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 43 43 "guest mount directory under which every guest os" 44 44 " instance has a subdir"),
+7 -5
tools/perf/builtin-lock.c
··· 12 12 13 13 #include "util/debug.h" 14 14 #include "util/session.h" 15 + #include "util/tool.h" 15 16 16 17 #include <sys/types.h> 17 18 #include <sys/prctl.h> ··· 326 325 die("memory allocation failed\n"); 327 326 } 328 327 329 - static char const *input_name = "perf.data"; 328 + static const char *input_name; 330 329 331 330 struct raw_event_sample { 332 331 u32 size; ··· 846 845 die("Unknown type of information\n"); 847 846 } 848 847 849 - static int process_sample_event(union perf_event *event, 848 + static int process_sample_event(struct perf_tool *tool __used, 849 + union perf_event *event, 850 850 struct perf_sample *sample, 851 851 struct perf_evsel *evsel __used, 852 - struct perf_session *s) 852 + struct machine *machine) 853 853 { 854 - struct thread *thread = perf_session__findnew(s, sample->tid); 854 + struct thread *thread = machine__findnew_thread(machine, sample->tid); 855 855 856 856 if (thread == NULL) { 857 857 pr_debug("problem processing %d event, skipping it.\n", ··· 865 863 return 0; 866 864 } 867 865 868 - static struct perf_event_ops eops = { 866 + static struct perf_tool eops = { 869 867 .sample = process_sample_event, 870 868 .comm = perf_event__process_comm, 871 869 .ordered_samples = true,
-1
tools/perf/builtin-probe.c
··· 46 46 47 47 #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" 48 48 #define DEFAULT_FUNC_FILTER "!_*" 49 - #define MAX_PATH_LEN 256 50 49 51 50 /* Session management structure */ 52 51 static struct {
+257 -344
tools/perf/builtin-record.c
··· 22 22 #include "util/evsel.h" 23 23 #include "util/debug.h" 24 24 #include "util/session.h" 25 + #include "util/tool.h" 25 26 #include "util/symbol.h" 26 27 #include "util/cpumap.h" 27 28 #include "util/thread_map.h" ··· 36 35 WRITE_APPEND 37 36 }; 38 37 39 - static u64 user_interval = ULLONG_MAX; 40 - static u64 default_interval = 0; 38 + struct perf_record { 39 + struct perf_tool tool; 40 + struct perf_record_opts opts; 41 + u64 bytes_written; 42 + const char *output_name; 43 + struct perf_evlist *evlist; 44 + struct perf_session *session; 45 + const char *progname; 46 + int output; 47 + unsigned int page_size; 48 + int realtime_prio; 49 + enum write_mode_t write_mode; 50 + bool no_buildid; 51 + bool no_buildid_cache; 52 + bool force; 53 + bool file_new; 54 + bool append_file; 55 + long samples; 56 + off_t post_processing_offset; 57 + }; 41 58 42 - static unsigned int page_size; 43 - static unsigned int mmap_pages = UINT_MAX; 44 - static unsigned int user_freq = UINT_MAX; 45 - static int freq = 1000; 46 - static int output; 47 - static int pipe_output = 0; 48 - static const char *output_name = NULL; 49 - static bool group = false; 50 - static int realtime_prio = 0; 51 - static bool nodelay = false; 52 - static bool raw_samples = false; 53 - static bool sample_id_all_avail = true; 54 - static bool system_wide = false; 55 - static pid_t target_pid = -1; 56 - static pid_t target_tid = -1; 57 - static pid_t child_pid = -1; 58 - static bool no_inherit = false; 59 - static enum write_mode_t write_mode = WRITE_FORCE; 60 - static bool call_graph = false; 61 - static bool inherit_stat = false; 62 - static bool no_samples = false; 63 - static bool sample_address = false; 64 - static bool sample_time = false; 65 - static bool no_buildid = false; 66 - static bool no_buildid_cache = false; 67 - static struct perf_evlist *evsel_list; 68 - 69 - static long samples = 0; 70 - static u64 bytes_written = 0; 71 - 72 - static int file_new = 1; 73 - static off_t post_processing_offset; 74 - 75 - static struct perf_session *session; 76 - static const char *cpu_list; 77 - static const char *progname; 78 - 79 - static void advance_output(size_t size) 59 + static void advance_output(struct perf_record *rec, size_t size) 80 60 { 81 - bytes_written += size; 61 + rec->bytes_written += size; 82 62 } 83 63 84 - static void write_output(void *buf, size_t size) 64 + static void write_output(struct perf_record *rec, void *buf, size_t size) 85 65 { 86 66 while (size) { 87 - int ret = write(output, buf, size); 67 + int ret = write(rec->output, buf, size); 88 68 89 69 if (ret < 0) 90 70 die("failed to write"); ··· 73 91 size -= ret; 74 92 buf += ret; 75 93 76 - bytes_written += ret; 94 + rec->bytes_written += ret; 77 95 } 78 96 } 79 97 80 - static int process_synthesized_event(union perf_event *event, 98 + static int process_synthesized_event(struct perf_tool *tool, 99 + union perf_event *event, 81 100 struct perf_sample *sample __used, 82 - struct perf_session *self __used) 101 + struct machine *machine __used) 83 102 { 84 - write_output(event, event->header.size); 103 + struct perf_record *rec = container_of(tool, struct perf_record, tool); 104 + write_output(rec, event, event->header.size); 85 105 return 0; 86 106 } 87 107 88 - static void mmap_read(struct perf_mmap *md) 108 + static void perf_record__mmap_read(struct perf_record *rec, 109 + struct perf_mmap *md) 89 110 { 90 111 unsigned int head = perf_mmap__read_head(md); 91 112 unsigned int old = md->prev; 92 - unsigned char *data = md->base + page_size; 113 + unsigned char *data = md->base + rec->page_size; 93 114 unsigned long size; 94 115 void *buf; 95 116 96 117 if (old == head) 97 118 return; 98 119 99 - samples++; 120 + rec->samples++; 100 121 101 122 size = head - old; 102 123 ··· 108 123 size = md->mask + 1 - (old & md->mask); 109 124 old += size; 110 125 111 - write_output(buf, size); 126 + write_output(rec, buf, size); 112 127 } 113 128 114 129 buf = &data[old & md->mask]; 115 130 size = head - old; 116 131 old += size; 117 132 118 - write_output(buf, size); 133 + write_output(rec, buf, size); 119 134 120 135 md->prev = old; 121 136 perf_mmap__write_tail(md, old); ··· 134 149 signr = sig; 135 150 } 136 151 137 - static void sig_atexit(void) 152 + static void perf_record__sig_exit(int exit_status __used, void *arg) 138 153 { 154 + struct perf_record *rec = arg; 139 155 int status; 140 156 141 - if (child_pid > 0) { 157 + if (rec->evlist->workload.pid > 0) { 142 158 if (!child_finished) 143 - kill(child_pid, SIGTERM); 159 + kill(rec->evlist->workload.pid, SIGTERM); 144 160 145 161 wait(&status); 146 162 if (WIFSIGNALED(status)) 147 - psignal(WTERMSIG(status), progname); 163 + psignal(WTERMSIG(status), rec->progname); 148 164 } 149 165 150 166 if (signr == -1 || signr == SIGUSR1) ··· 153 167 154 168 signal(signr, SIG_DFL); 155 169 kill(getpid(), signr); 156 - } 157 - 158 - static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) 159 - { 160 - struct perf_event_attr *attr = &evsel->attr; 161 - int track = !evsel->idx; /* only the first counter needs these */ 162 - 163 - attr->disabled = 1; 164 - attr->inherit = !no_inherit; 165 - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 166 - PERF_FORMAT_TOTAL_TIME_RUNNING | 167 - PERF_FORMAT_ID; 168 - 169 - attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 170 - 171 - if (evlist->nr_entries > 1) 172 - attr->sample_type |= PERF_SAMPLE_ID; 173 - 174 - /* 175 - * We default some events to a 1 default interval. But keep 176 - * it a weak assumption overridable by the user. 177 - */ 178 - if (!attr->sample_period || (user_freq != UINT_MAX && 179 - user_interval != ULLONG_MAX)) { 180 - if (freq) { 181 - attr->sample_type |= PERF_SAMPLE_PERIOD; 182 - attr->freq = 1; 183 - attr->sample_freq = freq; 184 - } else { 185 - attr->sample_period = default_interval; 186 - } 187 - } 188 - 189 - if (no_samples) 190 - attr->sample_freq = 0; 191 - 192 - if (inherit_stat) 193 - attr->inherit_stat = 1; 194 - 195 - if (sample_address) { 196 - attr->sample_type |= PERF_SAMPLE_ADDR; 197 - attr->mmap_data = track; 198 - } 199 - 200 - if (call_graph) 201 - attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 202 - 203 - if (system_wide) 204 - attr->sample_type |= PERF_SAMPLE_CPU; 205 - 206 - if (sample_id_all_avail && 207 - (sample_time || system_wide || !no_inherit || cpu_list)) 208 - attr->sample_type |= PERF_SAMPLE_TIME; 209 - 210 - if (raw_samples) { 211 - attr->sample_type |= PERF_SAMPLE_TIME; 212 - attr->sample_type |= PERF_SAMPLE_RAW; 213 - attr->sample_type |= PERF_SAMPLE_CPU; 214 - } 215 - 216 - if (nodelay) { 217 - attr->watermark = 0; 218 - attr->wakeup_events = 1; 219 - } 220 - 221 - attr->mmap = track; 222 - attr->comm = track; 223 - 224 - if (target_pid == -1 && target_tid == -1 && !system_wide) { 225 - attr->disabled = 1; 226 - attr->enable_on_exec = 1; 227 - } 228 170 } 229 171 230 172 static bool perf_evlist__equal(struct perf_evlist *evlist, ··· 174 260 return true; 175 261 } 176 262 177 - static void open_counters(struct perf_evlist *evlist) 263 + static void perf_record__open(struct perf_record *rec) 178 264 { 179 265 struct perf_evsel *pos, *first; 180 - 181 - if (evlist->cpus->map[0] < 0) 182 - no_inherit = true; 266 + struct perf_evlist *evlist = rec->evlist; 267 + struct perf_session *session = rec->session; 268 + struct perf_record_opts *opts = &rec->opts; 183 269 184 270 first = list_entry(evlist->entries.next, struct perf_evsel, node); 271 + 272 + perf_evlist__config_attrs(evlist, opts); 185 273 186 274 list_for_each_entry(pos, &evlist->entries, node) { 187 275 struct perf_event_attr *attr = &pos->attr; ··· 202 286 */ 203 287 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; 204 288 205 - if (group && pos != first) 289 + if (opts->group && pos != first) 206 290 group_fd = first->fd; 207 - 208 - config_attr(pos, evlist); 209 291 retry_sample_id: 210 - attr->sample_id_all = sample_id_all_avail ? 1 : 0; 292 + attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 211 293 try_again: 212 - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group, 213 - group_fd) < 0) { 294 + if (perf_evsel__open(pos, evlist->cpus, evlist->threads, 295 + opts->group, group_fd) < 0) { 214 296 int err = errno; 215 297 216 298 if (err == EPERM || err == EACCES) { 217 299 ui__error_paranoid(); 218 300 exit(EXIT_FAILURE); 219 - } else if (err == ENODEV && cpu_list) { 301 + } else if (err == ENODEV && opts->cpu_list) { 220 302 die("No such device - did you specify" 221 303 " an out-of-range profile CPU?\n"); 222 - } else if (err == EINVAL && sample_id_all_avail) { 304 + } else if (err == EINVAL && opts->sample_id_all_avail) { 223 305 /* 224 306 * Old kernel, no attr->sample_id_type_all field 225 307 */ 226 - sample_id_all_avail = false; 227 - if (!sample_time && !raw_samples && !time_needed) 308 + opts->sample_id_all_avail = false; 309 + if (!opts->sample_time && !opts->raw_samples && !time_needed) 228 310 attr->sample_type &= ~PERF_SAMPLE_TIME; 229 311 230 312 goto retry_sample_id; ··· 272 358 exit(-1); 273 359 } 274 360 275 - if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) 276 - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 361 + if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { 362 + if (errno == EPERM) 363 + die("Permission error mapping pages.\n" 364 + "Consider increasing " 365 + "/proc/sys/kernel/perf_event_mlock_kb,\n" 366 + "or try again with a smaller value of -m/--mmap_pages.\n" 367 + "(current value: %d)\n", opts->mmap_pages); 368 + else if (!is_power_of_2(opts->mmap_pages)) 369 + die("--mmap_pages/-m value must be a power of two."); 277 370 278 - if (file_new) 371 + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 372 + } 373 + 374 + if (rec->file_new) 279 375 session->evlist = evlist; 280 376 else { 281 377 if (!perf_evlist__equal(session->evlist, evlist)) { ··· 297 373 perf_session__update_sample_type(session); 298 374 } 299 375 300 - static int process_buildids(void) 376 + static int process_buildids(struct perf_record *rec) 301 377 { 302 - u64 size = lseek(output, 0, SEEK_CUR); 378 + u64 size = lseek(rec->output, 0, SEEK_CUR); 303 379 304 380 if (size == 0) 305 381 return 0; 306 382 307 - session->fd = output; 308 - return __perf_session__process_events(session, post_processing_offset, 309 - size - post_processing_offset, 383 + rec->session->fd = rec->output; 384 + return __perf_session__process_events(rec->session, rec->post_processing_offset, 385 + size - rec->post_processing_offset, 310 386 size, &build_id__mark_dso_hit_ops); 311 387 } 312 388 313 - static void atexit_header(void) 389 + static void perf_record__exit(int status __used, void *arg) 314 390 { 315 - if (!pipe_output) { 316 - session->header.data_size += bytes_written; 391 + struct perf_record *rec = arg; 317 392 318 - if (!no_buildid) 319 - process_buildids(); 320 - perf_session__write_header(session, evsel_list, output, true); 321 - perf_session__delete(session); 322 - perf_evlist__delete(evsel_list); 393 + if (!rec->opts.pipe_output) { 394 + rec->session->header.data_size += rec->bytes_written; 395 + 396 + if (!rec->no_buildid) 397 + process_buildids(rec); 398 + perf_session__write_header(rec->session, rec->evlist, 399 + rec->output, true); 400 + perf_session__delete(rec->session); 401 + perf_evlist__delete(rec->evlist); 323 402 symbol__exit(); 324 403 } 325 404 } ··· 330 403 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 331 404 { 332 405 int err; 333 - struct perf_session *psession = data; 406 + struct perf_tool *tool = data; 334 407 335 408 if (machine__is_host(machine)) 336 409 return; ··· 343 416 *method is used to avoid symbol missing when the first addr is 344 417 *in module instead of in guest kernel. 345 418 */ 346 - err = perf_event__synthesize_modules(process_synthesized_event, 347 - psession, machine); 419 + err = perf_event__synthesize_modules(tool, process_synthesized_event, 420 + machine); 348 421 if (err < 0) 349 422 pr_err("Couldn't record guest kernel [%d]'s reference" 350 423 " relocation symbol.\n", machine->pid); ··· 353 426 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 354 427 * have no _text sometimes. 355 428 */ 356 - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 357 - psession, machine, "_text"); 429 + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 430 + machine, "_text"); 358 431 if (err < 0) 359 - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 360 - psession, machine, 361 - "_stext"); 432 + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 433 + machine, "_stext"); 362 434 if (err < 0) 363 435 pr_err("Couldn't record guest kernel [%d]'s reference" 364 436 " relocation symbol.\n", machine->pid); ··· 368 442 .type = PERF_RECORD_FINISHED_ROUND, 369 443 }; 370 444 371 - static void mmap_read_all(void) 445 + static void perf_record__mmap_read_all(struct perf_record *rec) 372 446 { 373 447 int i; 374 448 375 - for (i = 0; i < evsel_list->nr_mmaps; i++) { 376 - if (evsel_list->mmap[i].base) 377 - mmap_read(&evsel_list->mmap[i]); 449 + for (i = 0; i < rec->evlist->nr_mmaps; i++) { 450 + if (rec->evlist->mmap[i].base) 451 + perf_record__mmap_read(rec, &rec->evlist->mmap[i]); 378 452 } 379 453 380 - if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) 381 - write_output(&finished_round_event, sizeof(finished_round_event)); 454 + if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO)) 455 + write_output(rec, &finished_round_event, sizeof(finished_round_event)); 382 456 } 383 457 384 - static int __cmd_record(int argc, const char **argv) 458 + static int __cmd_record(struct perf_record *rec, int argc, const char **argv) 385 459 { 386 460 struct stat st; 387 461 int flags; 388 - int err; 462 + int err, output; 389 463 unsigned long waking = 0; 390 - int child_ready_pipe[2], go_pipe[2]; 391 464 const bool forks = argc > 0; 392 - char buf; 393 465 struct machine *machine; 466 + struct perf_tool *tool = &rec->tool; 467 + struct perf_record_opts *opts = &rec->opts; 468 + struct perf_evlist *evsel_list = rec->evlist; 469 + const char *output_name = rec->output_name; 470 + struct perf_session *session; 394 471 395 - progname = argv[0]; 472 + rec->progname = argv[0]; 396 473 397 - page_size = sysconf(_SC_PAGE_SIZE); 474 + rec->page_size = sysconf(_SC_PAGE_SIZE); 398 475 399 - atexit(sig_atexit); 476 + on_exit(perf_record__sig_exit, rec); 400 477 signal(SIGCHLD, sig_handler); 401 478 signal(SIGINT, sig_handler); 402 479 signal(SIGUSR1, sig_handler); 403 480 404 - if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 405 - perror("failed to create pipes"); 406 - exit(-1); 407 - } 408 - 409 481 if (!output_name) { 410 482 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 411 - pipe_output = 1; 483 + opts->pipe_output = true; 412 484 else 413 - output_name = "perf.data"; 485 + rec->output_name = output_name = "perf.data"; 414 486 } 415 487 if (output_name) { 416 488 if (!strcmp(output_name, "-")) 417 - pipe_output = 1; 489 + opts->pipe_output = true; 418 490 else if (!stat(output_name, &st) && st.st_size) { 419 - if (write_mode == WRITE_FORCE) { 491 + if (rec->write_mode == WRITE_FORCE) { 420 492 char oldname[PATH_MAX]; 421 493 snprintf(oldname, sizeof(oldname), "%s.old", 422 494 output_name); 423 495 unlink(oldname); 424 496 rename(output_name, oldname); 425 497 } 426 - } else if (write_mode == WRITE_APPEND) { 427 - write_mode = WRITE_FORCE; 498 + } else if (rec->write_mode == WRITE_APPEND) { 499 + rec->write_mode = WRITE_FORCE; 428 500 } 429 501 } 430 502 431 503 flags = O_CREAT|O_RDWR; 432 - if (write_mode == WRITE_APPEND) 433 - file_new = 0; 504 + if (rec->write_mode == WRITE_APPEND) 505 + rec->file_new = 0; 434 506 else 435 507 flags |= O_TRUNC; 436 508 437 - if (pipe_output) 509 + if (opts->pipe_output) 438 510 output = STDOUT_FILENO; 439 511 else 440 512 output = open(output_name, flags, S_IRUSR | S_IWUSR); ··· 441 517 exit(-1); 442 518 } 443 519 520 + rec->output = output; 521 + 444 522 session = perf_session__new(output_name, O_WRONLY, 445 - write_mode == WRITE_FORCE, false, NULL); 523 + rec->write_mode == WRITE_FORCE, false, NULL); 446 524 if (session == NULL) { 447 525 pr_err("Not enough memory for reading perf file header\n"); 448 526 return -1; 449 527 } 450 528 451 - if (!no_buildid) 529 + rec->session = session; 530 + 531 + if (!rec->no_buildid) 452 532 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 453 533 454 - if (!file_new) { 534 + if (!rec->file_new) { 455 535 err = perf_session__read_header(session, output); 456 536 if (err < 0) 457 537 goto out_delete_session; ··· 477 549 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); 478 550 perf_header__set_feat(&session->header, HEADER_CPUID); 479 551 480 - /* 512 kiB: default amount of unprivileged mlocked memory */ 481 - if (mmap_pages == UINT_MAX) 482 - mmap_pages = (512 * 1024) / page_size; 483 - 484 552 if (forks) { 485 - child_pid = fork(); 486 - if (child_pid < 0) { 487 - perror("failed to fork"); 488 - exit(-1); 553 + err = perf_evlist__prepare_workload(evsel_list, opts, argv); 554 + if (err < 0) { 555 + pr_err("Couldn't run the workload!\n"); 556 + goto out_delete_session; 489 557 } 490 - 491 - if (!child_pid) { 492 - if (pipe_output) 493 - dup2(2, 1); 494 - close(child_ready_pipe[0]); 495 - close(go_pipe[1]); 496 - fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 497 - 498 - /* 499 - * Do a dummy execvp to get the PLT entry resolved, 500 - * so we avoid the resolver overhead on the real 501 - * execvp call. 502 - */ 503 - execvp("", (char **)argv); 504 - 505 - /* 506 - * Tell the parent we're ready to go 507 - */ 508 - close(child_ready_pipe[1]); 509 - 510 - /* 511 - * Wait until the parent tells us to go. 512 - */ 513 - if (read(go_pipe[0], &buf, 1) == -1) 514 - perror("unable to read pipe"); 515 - 516 - execvp(argv[0], (char **)argv); 517 - 518 - perror(argv[0]); 519 - kill(getppid(), SIGUSR1); 520 - exit(-1); 521 - } 522 - 523 - if (!system_wide && target_tid == -1 && target_pid == -1) 524 - evsel_list->threads->map[0] = child_pid; 525 - 526 - close(child_ready_pipe[1]); 527 - close(go_pipe[0]); 528 - /* 529 - * wait for child to settle 530 - */ 531 - if (read(child_ready_pipe[0], &buf, 1) == -1) { 532 - perror("unable to read pipe"); 533 - exit(-1); 534 - } 535 - close(child_ready_pipe[0]); 536 558 } 537 559 538 - open_counters(evsel_list); 560 + perf_record__open(rec); 539 561 540 562 /* 541 - * perf_session__delete(session) will be called at atexit_header() 563 + * perf_session__delete(session) will be called at perf_record__exit() 542 564 */ 543 - atexit(atexit_header); 565 + on_exit(perf_record__exit, rec); 544 566 545 - if (pipe_output) { 567 + if (opts->pipe_output) { 546 568 err = perf_header__write_pipe(output); 547 569 if (err < 0) 548 570 return err; 549 - } else if (file_new) { 571 + } else if (rec->file_new) { 550 572 err = perf_session__write_header(session, evsel_list, 551 573 output, false); 552 574 if (err < 0) 553 575 return err; 554 576 } 555 577 556 - post_processing_offset = lseek(output, 0, SEEK_CUR); 578 + if (!!rec->no_buildid 579 + && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 580 + pr_err("Couldn't generating buildids. " 581 + "Use --no-buildid to profile anyway.\n"); 582 + return -1; 583 + } 557 584 558 - if (pipe_output) { 559 - err = perf_session__synthesize_attrs(session, 560 - process_synthesized_event); 585 + rec->post_processing_offset = lseek(output, 0, SEEK_CUR); 586 + 587 + machine = perf_session__find_host_machine(session); 588 + if (!machine) { 589 + pr_err("Couldn't find native kernel information.\n"); 590 + return -1; 591 + } 592 + 593 + if (opts->pipe_output) { 594 + err = perf_event__synthesize_attrs(tool, session, 595 + process_synthesized_event); 561 596 if (err < 0) { 562 597 pr_err("Couldn't synthesize attrs.\n"); 563 598 return err; 564 599 } 565 600 566 - err = perf_event__synthesize_event_types(process_synthesized_event, 567 - session); 601 + err = perf_event__synthesize_event_types(tool, process_synthesized_event, 602 + machine); 568 603 if (err < 0) { 569 604 pr_err("Couldn't synthesize event_types.\n"); 570 605 return err; ··· 542 651 * return this more properly and also 543 652 * propagate errors that now are calling die() 544 653 */ 545 - err = perf_event__synthesize_tracing_data(output, evsel_list, 546 - process_synthesized_event, 547 - session); 654 + err = perf_event__synthesize_tracing_data(tool, output, evsel_list, 655 + process_synthesized_event); 548 656 if (err <= 0) { 549 657 pr_err("Couldn't record tracing data.\n"); 550 658 return err; 551 659 } 552 - advance_output(err); 660 + advance_output(rec, err); 553 661 } 554 662 } 555 663 556 - machine = perf_session__find_host_machine(session); 557 - if (!machine) { 558 - pr_err("Couldn't find native kernel information.\n"); 559 - return -1; 560 - } 561 - 562 - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 563 - session, machine, "_text"); 664 + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 665 + machine, "_text"); 564 666 if (err < 0) 565 - err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 566 - session, machine, "_stext"); 667 + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 668 + machine, "_stext"); 567 669 if (err < 0) 568 670 pr_err("Couldn't record kernel reference relocation symbol\n" 569 671 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 570 672 "Check /proc/kallsyms permission or run as root.\n"); 571 673 572 - err = perf_event__synthesize_modules(process_synthesized_event, 573 - session, machine); 674 + err = perf_event__synthesize_modules(tool, process_synthesized_event, 675 + machine); 574 676 if (err < 0) 575 677 pr_err("Couldn't record kernel module information.\n" 576 678 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 577 679 "Check /proc/modules permission or run as root.\n"); 578 680 579 681 if (perf_guest) 580 - perf_session__process_machines(session, 682 + perf_session__process_machines(session, tool, 581 683 perf_event__synthesize_guest_os); 582 684 583 - if (!system_wide) 584 - perf_event__synthesize_thread_map(evsel_list->threads, 685 + if (!opts->system_wide) 686 + perf_event__synthesize_thread_map(tool, evsel_list->threads, 585 687 process_synthesized_event, 586 - session); 688 + machine); 587 689 else 588 - perf_event__synthesize_threads(process_synthesized_event, 589 - session); 690 + perf_event__synthesize_threads(tool, process_synthesized_event, 691 + machine); 590 692 591 - if (realtime_prio) { 693 + if (rec->realtime_prio) { 592 694 struct sched_param param; 593 695 594 - param.sched_priority = realtime_prio; 696 + param.sched_priority = rec->realtime_prio; 595 697 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 596 698 pr_err("Could not set realtime priority.\n"); 597 699 exit(-1); ··· 597 713 * Let the child rip 598 714 */ 599 715 if (forks) 600 - close(go_pipe[1]); 716 + perf_evlist__start_workload(evsel_list); 601 717 602 718 for (;;) { 603 - int hits = samples; 719 + int hits = rec->samples; 604 720 605 - mmap_read_all(); 721 + perf_record__mmap_read_all(rec); 606 722 607 - if (hits == samples) { 723 + if (hits == rec->samples) { 608 724 if (done) 609 725 break; 610 726 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); ··· 625 741 */ 626 742 fprintf(stderr, 627 743 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", 628 - (double)bytes_written / 1024.0 / 1024.0, 744 + (double)rec->bytes_written / 1024.0 / 1024.0, 629 745 output_name, 630 - bytes_written / 24); 746 + rec->bytes_written / 24); 631 747 632 748 return 0; 633 749 ··· 642 758 NULL 643 759 }; 644 760 645 - static bool force, append_file; 761 + /* 762 + * XXX Ideally would be local to cmd_record() and passed to a perf_record__new 763 + * because we need to have access to it in perf_record__exit, that is called 764 + * after cmd_record() exits, but since record_options need to be accessible to 765 + * builtin-script, leave it here. 766 + * 767 + * At least we don't ouch it in all the other functions here directly. 768 + * 769 + * Just say no to tons of global variables, sigh. 770 + */ 771 + static struct perf_record record = { 772 + .opts = { 773 + .target_pid = -1, 774 + .target_tid = -1, 775 + .mmap_pages = UINT_MAX, 776 + .user_freq = UINT_MAX, 777 + .user_interval = ULLONG_MAX, 778 + .freq = 1000, 779 + .sample_id_all_avail = true, 780 + }, 781 + .write_mode = WRITE_FORCE, 782 + .file_new = true, 783 + }; 646 784 785 + /* 786 + * XXX Will stay a global variable till we fix builtin-script.c to stop messing 787 + * with it and switch to use the library functions in perf_evlist that came 788 + * from builtin-record.c, i.e. use perf_record_opts, 789 + * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 790 + * using pipes, etc. 791 + */ 647 792 const struct option record_options[] = { 648 - OPT_CALLBACK('e', "event", &evsel_list, "event", 793 + OPT_CALLBACK('e', "event", &record.evlist, "event", 649 794 "event selector. use 'perf list' to list available events", 650 795 parse_events_option), 651 - OPT_CALLBACK(0, "filter", &evsel_list, "filter", 796 + OPT_CALLBACK(0, "filter", &record.evlist, "filter", 652 797 "event filter", parse_filter), 653 - OPT_INTEGER('p', "pid", &target_pid, 798 + OPT_INTEGER('p', "pid", &record.opts.target_pid, 654 799 "record events on existing process id"), 655 - OPT_INTEGER('t', "tid", &target_tid, 800 + OPT_INTEGER('t', "tid", &record.opts.target_tid, 656 801 "record events on existing thread id"), 657 - OPT_INTEGER('r', "realtime", &realtime_prio, 802 + OPT_INTEGER('r', "realtime", &record.realtime_prio, 658 803 "collect data with this RT SCHED_FIFO priority"), 659 - OPT_BOOLEAN('D', "no-delay", &nodelay, 804 + OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay, 660 805 "collect data without buffering"), 661 - OPT_BOOLEAN('R', "raw-samples", &raw_samples, 806 + OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 662 807 "collect raw sample records from all opened counters"), 663 - OPT_BOOLEAN('a', "all-cpus", &system_wide, 808 + OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide, 664 809 "system-wide collection from all CPUs"), 665 - OPT_BOOLEAN('A', "append", &append_file, 810 + OPT_BOOLEAN('A', "append", &record.append_file, 666 811 "append to the output file to do incremental profiling"), 667 - OPT_STRING('C', "cpu", &cpu_list, "cpu", 812 + OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu", 668 813 "list of cpus to monitor"), 669 - OPT_BOOLEAN('f', "force", &force, 814 + OPT_BOOLEAN('f', "force", &record.force, 670 815 "overwrite existing data file (deprecated)"), 671 - OPT_U64('c', "count", &user_interval, "event period to sample"), 672 - OPT_STRING('o', "output", &output_name, "file", 816 + OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 817 + OPT_STRING('o', "output", &record.output_name, "file", 673 818 "output file name"), 674 - OPT_BOOLEAN('i', "no-inherit", &no_inherit, 819 + OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, 675 820 "child tasks do not inherit counters"), 676 - OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), 677 - OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 678 - OPT_BOOLEAN(0, "group", &group, 821 + OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 822 + OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages, 823 + "number of mmap data pages"), 824 + OPT_BOOLEAN(0, "group", &record.opts.group, 679 825 "put the counters into a counter group"), 680 - OPT_BOOLEAN('g', "call-graph", &call_graph, 826 + OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph, 681 827 "do call-graph (stack chain/backtrace) recording"), 682 828 OPT_INCR('v', "verbose", &verbose, 683 829 "be more verbose (show counter open errors, etc)"), 684 830 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 685 - OPT_BOOLEAN('s', "stat", &inherit_stat, 831 + OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 686 832 "per thread counts"), 687 - OPT_BOOLEAN('d', "data", &sample_address, 833 + OPT_BOOLEAN('d', "data", &record.opts.sample_address, 688 834 "Sample addresses"), 689 - OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), 690 - OPT_BOOLEAN('n', "no-samples", &no_samples, 835 + OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), 836 + OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"), 837 + OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 691 838 "don't sample"), 692 - OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, 839 + OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, 693 840 "do not update the buildid cache"), 694 - OPT_BOOLEAN('B', "no-buildid", &no_buildid, 841 + OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, 695 842 "do not collect buildids in perf.data"), 696 - OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 843 + OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 697 844 "monitor event in cgroup name only", 698 845 parse_cgroups), 699 846 OPT_END() ··· 734 819 { 735 820 int err = -ENOMEM; 736 821 struct perf_evsel *pos; 822 + struct perf_evlist *evsel_list; 823 + struct perf_record *rec = &record; 737 824 738 825 perf_header__set_cmdline(argc, argv); 739 826 ··· 743 826 if (evsel_list == NULL) 744 827 return -ENOMEM; 745 828 829 + rec->evlist = evsel_list; 830 + 746 831 argc = parse_options(argc, argv, record_options, record_usage, 747 832 PARSE_OPT_STOP_AT_NON_OPTION); 748 - if (!argc && target_pid == -1 && target_tid == -1 && 749 - !system_wide && !cpu_list) 833 + if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && 834 + !rec->opts.system_wide && !rec->opts.cpu_list) 750 835 usage_with_options(record_usage, record_options); 751 836 752 - if (force && append_file) { 837 + if (rec->force && rec->append_file) { 753 838 fprintf(stderr, "Can't overwrite and append at the same time." 754 839 " You need to choose between -f and -A"); 755 840 usage_with_options(record_usage, record_options); 756 - } else if (append_file) { 757 - write_mode = WRITE_APPEND; 841 + } else if (rec->append_file) { 842 + rec->write_mode = WRITE_APPEND; 758 843 } else { 759 - write_mode = WRITE_FORCE; 844 + rec->write_mode = WRITE_FORCE; 760 845 } 761 846 762 - if (nr_cgroups && !system_wide) { 847 + if (nr_cgroups && !rec->opts.system_wide) { 763 848 fprintf(stderr, "cgroup monitoring only available in" 764 849 " system-wide mode\n"); 765 850 usage_with_options(record_usage, record_options); ··· 779 860 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 780 861 "even with a suitable vmlinux or kallsyms file.\n\n"); 781 862 782 - if (no_buildid_cache || no_buildid) 863 + if (rec->no_buildid_cache || rec->no_buildid) 783 864 disable_buildid_cache(); 784 865 785 866 if (evsel_list->nr_entries == 0 && ··· 788 869 goto out_symbol_exit; 789 870 } 790 871 791 - if (target_pid != -1) 792 - target_tid = target_pid; 872 + if (rec->opts.target_pid != -1) 873 + rec->opts.target_tid = rec->opts.target_pid; 793 874 794 - if (perf_evlist__create_maps(evsel_list, target_pid, 795 - target_tid, cpu_list) < 0) 875 + if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, 876 + rec->opts.target_tid, rec->opts.cpu_list) < 0) 796 877 usage_with_options(record_usage, record_options); 797 878 798 879 list_for_each_entry(pos, &evsel_list->entries, node) { 799 - if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, 800 - evsel_list->threads->nr) < 0) 801 - goto out_free_fd; 802 880 if (perf_header__push_event(pos->attr.config, event_name(pos))) 803 881 goto out_free_fd; 804 882 } 805 883 806 - if (perf_evlist__alloc_pollfd(evsel_list) < 0) 807 - goto out_free_fd; 808 - 809 - if (user_interval != ULLONG_MAX) 810 - default_interval = user_interval; 811 - if (user_freq != UINT_MAX) 812 - freq = user_freq; 884 + if (rec->opts.user_interval != ULLONG_MAX) 885 + rec->opts.default_interval = rec->opts.user_interval; 886 + if (rec->opts.user_freq != UINT_MAX) 887 + rec->opts.freq = rec->opts.user_freq; 813 888 814 889 /* 815 890 * User specified count overrides default frequency. 816 891 */ 817 - if (default_interval) 818 - freq = 0; 819 - else if (freq) { 820 - default_interval = freq; 892 + if (rec->opts.default_interval) 893 + rec->opts.freq = 0; 894 + else if (rec->opts.freq) { 895 + rec->opts.default_interval = rec->opts.freq; 821 896 } else { 822 897 fprintf(stderr, "frequency and count are zero, aborting\n"); 823 898 err = -EINVAL; 824 899 goto out_free_fd; 825 900 } 826 901 827 - err = __cmd_record(argc, argv); 902 + err = __cmd_record(&record, argc, argv); 828 903 out_free_fd: 829 904 perf_evlist__delete_maps(evsel_list); 830 905 out_symbol_exit:
+130 -106
tools/perf/builtin-report.c
··· 25 25 #include "util/evsel.h" 26 26 #include "util/header.h" 27 27 #include "util/session.h" 28 + #include "util/tool.h" 28 29 29 30 #include "util/parse-options.h" 30 31 #include "util/parse-events.h" ··· 36 35 37 36 #include <linux/bitmap.h> 38 37 39 - static char const *input_name = "perf.data"; 38 + struct perf_report { 39 + struct perf_tool tool; 40 + struct perf_session *session; 41 + char const *input_name; 42 + bool force, use_tui, use_stdio; 43 + bool hide_unresolved; 44 + bool dont_use_callchains; 45 + bool show_full_info; 46 + bool show_threads; 47 + bool inverted_callchain; 48 + struct perf_read_values show_threads_values; 49 + const char *pretty_printing_style; 50 + symbol_filter_t annotate_init; 51 + const char *cpu_list; 52 + DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 53 + }; 40 54 41 - static bool force, use_tui, use_stdio; 42 - static bool hide_unresolved; 43 - static bool dont_use_callchains; 44 - static bool show_full_info; 45 - 46 - static bool show_threads; 47 - static struct perf_read_values show_threads_values; 48 - 49 - static const char default_pretty_printing_style[] = "normal"; 50 - static const char *pretty_printing_style = default_pretty_printing_style; 51 - 52 - static char callchain_default_opt[] = "fractal,0.5,callee"; 53 - static bool inverted_callchain; 54 - static symbol_filter_t annotate_init; 55 - 56 - static const char *cpu_list; 57 - static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 58 - 59 - static int perf_session__add_hist_entry(struct perf_session *session, 60 - struct addr_location *al, 61 - struct perf_sample *sample, 62 - struct perf_evsel *evsel) 55 + static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, 56 + struct addr_location *al, 57 + struct perf_sample *sample, 58 + struct machine *machine) 63 59 { 64 60 struct symbol *parent = NULL; 65 61 int err = 0; 66 62 struct hist_entry *he; 67 63 68 64 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { 69 - err = perf_session__resolve_callchain(session, al->thread, 70 - sample->callchain, &parent); 65 + err = machine__resolve_callchain(machine, evsel, al->thread, 66 + sample->callchain, &parent); 71 67 if (err) 72 68 return err; 73 69 } ··· 74 76 return -ENOMEM; 75 77 76 78 if (symbol_conf.use_callchain) { 77 - err = callchain_append(he->callchain, &session->callchain_cursor, 79 + err = callchain_append(he->callchain, 80 + &evsel->hists.callchain_cursor, 78 81 sample->period); 79 82 if (err) 80 83 return err; ··· 91 92 assert(evsel != NULL); 92 93 93 94 err = -ENOMEM; 94 - if (notes->src == NULL && 95 - symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0) 95 + if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0) 96 96 goto out; 97 97 98 98 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); ··· 104 106 } 105 107 106 108 107 - static int process_sample_event(union perf_event *event, 109 + static int process_sample_event(struct perf_tool *tool, 110 + union perf_event *event, 108 111 struct perf_sample *sample, 109 112 struct perf_evsel *evsel, 110 - struct perf_session *session) 113 + struct machine *machine) 111 114 { 115 + struct perf_report *rep = container_of(tool, struct perf_report, tool); 112 116 struct addr_location al; 113 117 114 - if (perf_event__preprocess_sample(event, session, &al, sample, 115 - annotate_init) < 0) { 118 + if (perf_event__preprocess_sample(event, machine, &al, sample, 119 + rep->annotate_init) < 0) { 116 120 fprintf(stderr, "problem processing %d event, skipping it.\n", 117 121 event->header.type); 118 122 return -1; 119 123 } 120 124 121 - if (al.filtered || (hide_unresolved && al.sym == NULL)) 125 + if (al.filtered || (rep->hide_unresolved && al.sym == NULL)) 122 126 return 0; 123 127 124 - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 128 + if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 125 129 return 0; 126 130 127 131 if (al.map != NULL) 128 132 al.map->dso->hit = 1; 129 133 130 - if (perf_session__add_hist_entry(session, &al, sample, evsel)) { 134 + if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { 131 135 pr_debug("problem incrementing symbol period, skipping event\n"); 132 136 return -1; 133 137 } ··· 137 137 return 0; 138 138 } 139 139 140 - static int process_read_event(union perf_event *event, 140 + static int process_read_event(struct perf_tool *tool, 141 + union perf_event *event, 141 142 struct perf_sample *sample __used, 142 - struct perf_session *session) 143 + struct perf_evsel *evsel, 144 + struct machine *machine __used) 143 145 { 144 - struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, 145 - event->read.id); 146 - if (show_threads) { 146 + struct perf_report *rep = container_of(tool, struct perf_report, tool); 147 + 148 + if (rep->show_threads) { 147 149 const char *name = evsel ? event_name(evsel) : "unknown"; 148 - perf_read_values_add_value(&show_threads_values, 150 + perf_read_values_add_value(&rep->show_threads_values, 149 151 event->read.pid, event->read.tid, 150 152 event->read.id, 151 153 name, ··· 161 159 return 0; 162 160 } 163 161 164 - static int perf_session__setup_sample_type(struct perf_session *self) 162 + static int perf_report__setup_sample_type(struct perf_report *rep) 165 163 { 164 + struct perf_session *self = rep->session; 165 + 166 166 if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { 167 167 if (sort__has_parent) { 168 168 ui__warning("Selected --sort parent, but no " ··· 177 173 "you call 'perf record' without -g?\n"); 178 174 return -1; 179 175 } 180 - } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && 176 + } else if (!rep->dont_use_callchains && 177 + callchain_param.mode != CHAIN_NONE && 181 178 !symbol_conf.use_callchain) { 182 179 symbol_conf.use_callchain = true; 183 180 if (callchain_register_param(&callchain_param) < 0) { ··· 190 185 191 186 return 0; 192 187 } 193 - 194 - static struct perf_event_ops event_ops = { 195 - .sample = process_sample_event, 196 - .mmap = perf_event__process_mmap, 197 - .comm = perf_event__process_comm, 198 - .exit = perf_event__process_task, 199 - .fork = perf_event__process_task, 200 - .lost = perf_event__process_lost, 201 - .read = process_read_event, 202 - .attr = perf_event__process_attr, 203 - .event_type = perf_event__process_event_type, 204 - .tracing_data = perf_event__process_tracing_data, 205 - .build_id = perf_event__process_build_id, 206 - .ordered_samples = true, 207 - .ordering_requires_timestamps = true, 208 - }; 209 188 210 189 extern volatile int session_done; 211 190 ··· 213 224 } 214 225 215 226 static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, 227 + struct perf_report *rep, 216 228 const char *help) 217 229 { 218 230 struct perf_evsel *pos; ··· 231 241 parent_pattern == default_parent_pattern) { 232 242 fprintf(stdout, "#\n# (%s)\n#\n", help); 233 243 234 - if (show_threads) { 235 - bool style = !strcmp(pretty_printing_style, "raw"); 236 - perf_read_values_display(stdout, &show_threads_values, 244 + if (rep->show_threads) { 245 + bool style = !strcmp(rep->pretty_printing_style, "raw"); 246 + perf_read_values_display(stdout, &rep->show_threads_values, 237 247 style); 238 - perf_read_values_destroy(&show_threads_values); 248 + perf_read_values_destroy(&rep->show_threads_values); 239 249 } 240 250 } 241 251 242 252 return 0; 243 253 } 244 254 245 - static int __cmd_report(void) 255 + static int __cmd_report(struct perf_report *rep) 246 256 { 247 257 int ret = -EINVAL; 248 258 u64 nr_samples; ··· 254 264 255 265 signal(SIGINT, sig_handler); 256 266 257 - session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 267 + session = perf_session__new(rep->input_name, O_RDONLY, 268 + rep->force, false, &rep->tool); 258 269 if (session == NULL) 259 270 return -ENOMEM; 260 271 261 - if (cpu_list) { 262 - ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 272 + rep->session = session; 273 + 274 + if (rep->cpu_list) { 275 + ret = perf_session__cpu_bitmap(session, rep->cpu_list, 276 + rep->cpu_bitmap); 263 277 if (ret) 264 278 goto out_delete; 265 279 } 266 280 267 281 if (use_browser <= 0) 268 - perf_session__fprintf_info(session, stdout, show_full_info); 282 + perf_session__fprintf_info(session, stdout, rep->show_full_info); 269 283 270 - if (show_threads) 271 - perf_read_values_init(&show_threads_values); 284 + if (rep->show_threads) 285 + perf_read_values_init(&rep->show_threads_values); 272 286 273 - ret = perf_session__setup_sample_type(session); 287 + ret = perf_report__setup_sample_type(rep); 274 288 if (ret) 275 289 goto out_delete; 276 290 277 - ret = perf_session__process_events(session, &event_ops); 291 + ret = perf_session__process_events(session, &rep->tool); 278 292 if (ret) 279 293 goto out_delete; 280 294 ··· 321 327 } 322 328 323 329 if (nr_samples == 0) { 324 - ui__warning("The %s file has no samples!\n", input_name); 330 + ui__warning("The %s file has no samples!\n", session->filename); 325 331 goto out_delete; 326 332 } 327 333 ··· 329 335 perf_evlist__tui_browse_hists(session->evlist, help, 330 336 NULL, NULL, 0); 331 337 } else 332 - perf_evlist__tty_browse_hists(session->evlist, help); 338 + perf_evlist__tty_browse_hists(session->evlist, rep, help); 333 339 334 340 out_delete: 335 341 /* ··· 348 354 } 349 355 350 356 static int 351 - parse_callchain_opt(const struct option *opt __used, const char *arg, 352 - int unset) 357 + parse_callchain_opt(const struct option *opt, const char *arg, int unset) 353 358 { 359 + struct perf_report *rep = (struct perf_report *)opt->value; 354 360 char *tok, *tok2; 355 361 char *endptr; 356 362 ··· 358 364 * --no-call-graph 359 365 */ 360 366 if (unset) { 361 - dont_use_callchains = true; 367 + rep->dont_use_callchains = true; 362 368 return 0; 363 369 } 364 370 ··· 406 412 goto setup; 407 413 408 414 if (tok2[0] != 'c') { 409 - callchain_param.print_limit = strtod(tok2, &endptr); 415 + callchain_param.print_limit = strtoul(tok2, &endptr, 0); 410 416 tok2 = strtok(NULL, ","); 411 417 if (!tok2) 412 418 goto setup; ··· 427 433 return 0; 428 434 } 429 435 430 - static const char * const report_usage[] = { 431 - "perf report [<options>] <command>", 432 - NULL 433 - }; 434 - 435 - static const struct option options[] = { 436 - OPT_STRING('i', "input", &input_name, "file", 436 + int cmd_report(int argc, const char **argv, const char *prefix __used) 437 + { 438 + struct stat st; 439 + char callchain_default_opt[] = "fractal,0.5,callee"; 440 + const char * const report_usage[] = { 441 + "perf report [<options>]", 442 + NULL 443 + }; 444 + struct perf_report report = { 445 + .tool = { 446 + .sample = process_sample_event, 447 + .mmap = perf_event__process_mmap, 448 + .comm = perf_event__process_comm, 449 + .exit = perf_event__process_task, 450 + .fork = perf_event__process_task, 451 + .lost = perf_event__process_lost, 452 + .read = process_read_event, 453 + .attr = perf_event__process_attr, 454 + .event_type = perf_event__process_event_type, 455 + .tracing_data = perf_event__process_tracing_data, 456 + .build_id = perf_event__process_build_id, 457 + .ordered_samples = true, 458 + .ordering_requires_timestamps = true, 459 + }, 460 + .pretty_printing_style = "normal", 461 + }; 462 + const struct option options[] = { 463 + OPT_STRING('i', "input", &report.input_name, "file", 437 464 "input file name"), 438 465 OPT_INCR('v', "verbose", &verbose, 439 466 "be more verbose (show symbol address, etc)"), ··· 464 449 "file", "vmlinux pathname"), 465 450 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 466 451 "file", "kallsyms pathname"), 467 - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 452 + OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"), 468 453 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 469 454 "load module symbols - WARNING: use only with -k and LIVE kernel"), 470 455 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 471 456 "Show a column with the number of samples"), 472 - OPT_BOOLEAN('T', "threads", &show_threads, 457 + OPT_BOOLEAN('T', "threads", &report.show_threads, 473 458 "Show per-thread event counters"), 474 - OPT_STRING(0, "pretty", &pretty_printing_style, "key", 459 + OPT_STRING(0, "pretty", &report.pretty_printing_style, "key", 475 460 "pretty printing style key: normal raw"), 476 - OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 477 - OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 461 + OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"), 462 + OPT_BOOLEAN(0, "stdio", &report.use_stdio, 463 + "Use the stdio interface"), 478 464 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 479 465 "sort by key(s): pid, comm, dso, symbol, parent"), 480 466 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, ··· 484 468 "regex filter to identify parent, see: '--sort parent'"), 485 469 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 486 470 "Only display entries with parent-match"), 487 - OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order", 488 - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. " 471 + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", 472 + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. " 489 473 "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), 490 - OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"), 474 + OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, 475 + "alias for inverted call graph"), 491 476 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 492 477 "only consider symbols in these dsos"), 493 - OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 478 + OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 494 479 "only consider symbols in these comms"), 495 480 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 496 481 "only consider these symbols"), ··· 501 484 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 502 485 "separator for columns, no spaces will be added between " 503 486 "columns '.' is reserved."), 504 - OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, 487 + OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved, 505 488 "Only display entries resolved to a symbol"), 506 489 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 507 490 "Look for files with symbols relative to this directory"), 508 - OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 509 - OPT_BOOLEAN('I', "show-info", &show_full_info, 491 + OPT_STRING('C', "cpu", &report.cpu_list, "cpu", 492 + "list of cpus to profile"), 493 + OPT_BOOLEAN('I', "show-info", &report.show_full_info, 510 494 "Display extended information about perf.data file"), 511 495 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, 512 496 "Interleave source code with assembly code (default)"), ··· 518 500 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 519 501 "Show a column with the sum of periods"), 520 502 OPT_END() 521 - }; 503 + }; 522 504 523 - int cmd_report(int argc, const char **argv, const char *prefix __used) 524 - { 525 505 argc = parse_options(argc, argv, options, report_usage, 0); 526 506 527 - if (use_stdio) 507 + if (report.use_stdio) 528 508 use_browser = 0; 529 - else if (use_tui) 509 + else if (report.use_tui) 530 510 use_browser = 1; 531 511 532 - if (inverted_callchain) 512 + if (report.inverted_callchain) 533 513 callchain_param.order = ORDER_CALLER; 534 514 535 - if (strcmp(input_name, "-") != 0) 515 + if (!report.input_name || !strlen(report.input_name)) { 516 + if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 517 + report.input_name = "-"; 518 + else 519 + report.input_name = "perf.data"; 520 + } 521 + 522 + if (strcmp(report.input_name, "-") != 0) 536 523 setup_browser(true); 537 524 else 538 525 use_browser = 0; 526 + 539 527 /* 540 528 * Only in the newt browser we are doing integrated annotation, 541 529 * so don't allocate extra space that won't be used in the stdio ··· 549 525 */ 550 526 if (use_browser > 0) { 551 527 symbol_conf.priv_size = sizeof(struct annotation); 552 - annotate_init = symbol__annotate_init; 528 + report.annotate_init = symbol__annotate_init; 553 529 /* 554 530 * For searching by name on the "Browse map details". 555 531 * providing it only in verbose mode not to bloat too ··· 596 572 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); 597 573 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); 598 574 599 - return __cmd_report(); 575 + return __cmd_report(&report); 600 576 }
+102 -100
tools/perf/builtin-sched.c
··· 2 2 #include "perf.h" 3 3 4 4 #include "util/util.h" 5 + #include "util/evlist.h" 5 6 #include "util/cache.h" 7 + #include "util/evsel.h" 6 8 #include "util/symbol.h" 7 9 #include "util/thread.h" 8 10 #include "util/header.h" 9 11 #include "util/session.h" 12 + #include "util/tool.h" 10 13 11 14 #include "util/parse-options.h" 12 15 #include "util/trace-event.h" ··· 22 19 #include <pthread.h> 23 20 #include <math.h> 24 21 25 - static char const *input_name = "perf.data"; 22 + static const char *input_name; 26 23 27 24 static char default_sort_order[] = "avg, max, switch, runtime"; 28 25 static const char *sort_order = default_sort_order; ··· 726 723 727 724 struct trace_sched_handler { 728 725 void (*switch_event)(struct trace_switch_event *, 729 - struct perf_session *, 726 + struct machine *, 730 727 struct event *, 731 728 int cpu, 732 729 u64 timestamp, 733 730 struct thread *thread); 734 731 735 732 void (*runtime_event)(struct trace_runtime_event *, 736 - struct perf_session *, 733 + struct machine *, 737 734 struct event *, 738 735 int cpu, 739 736 u64 timestamp, 740 737 struct thread *thread); 741 738 742 739 void (*wakeup_event)(struct trace_wakeup_event *, 743 - struct perf_session *, 740 + struct machine *, 744 741 struct event *, 745 742 int cpu, 746 743 u64 timestamp, ··· 753 750 struct thread *thread); 754 751 755 752 void (*migrate_task_event)(struct trace_migrate_task_event *, 756 - struct perf_session *session, 753 + struct machine *machine, 757 754 struct event *, 758 755 int cpu, 759 756 u64 timestamp, ··· 763 760 764 761 static void 765 762 replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 766 - struct perf_session *session __used, 763 + struct machine *machine __used, 767 764 struct event *event, 768 765 int cpu __used, 769 766 u64 timestamp __used, ··· 790 787 791 788 static void 792 789 replay_switch_event(struct trace_switch_event *switch_event, 793 - struct perf_session *session __used, 790 + struct machine *machine __used, 794 791 struct event *event, 795 792 int cpu, 796 793 u64 timestamp, ··· 1024 1021 1025 1022 static void 1026 1023 latency_switch_event(struct trace_switch_event *switch_event, 1027 - struct perf_session *session, 1024 + struct machine *machine, 1028 1025 struct event *event __used, 1029 1026 int cpu, 1030 1027 u64 timestamp, ··· 1048 1045 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1049 1046 1050 1047 1051 - sched_out = perf_session__findnew(session, switch_event->prev_pid); 1052 - sched_in = perf_session__findnew(session, switch_event->next_pid); 1048 + sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1049 + sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1053 1050 1054 1051 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1055 1052 if (!out_events) { ··· 1077 1074 1078 1075 static void 1079 1076 latency_runtime_event(struct trace_runtime_event *runtime_event, 1080 - struct perf_session *session, 1077 + struct machine *machine, 1081 1078 struct event *event __used, 1082 1079 int cpu, 1083 1080 u64 timestamp, 1084 1081 struct thread *this_thread __used) 1085 1082 { 1086 - struct thread *thread = perf_session__findnew(session, runtime_event->pid); 1083 + struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); 1087 1084 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1088 1085 1089 1086 BUG_ON(cpu >= MAX_CPUS || cpu < 0); ··· 1100 1097 1101 1098 static void 1102 1099 latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1103 - struct perf_session *session, 1100 + struct machine *machine, 1104 1101 struct event *__event __used, 1105 1102 int cpu __used, 1106 1103 u64 timestamp, ··· 1114 1111 if (!wakeup_event->success) 1115 1112 return; 1116 1113 1117 - wakee = perf_session__findnew(session, wakeup_event->pid); 1114 + wakee = machine__findnew_thread(machine, wakeup_event->pid); 1118 1115 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1119 1116 if (!atoms) { 1120 1117 thread_atoms_insert(wakee); ··· 1148 1145 1149 1146 static void 1150 1147 latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1151 - struct perf_session *session, 1148 + struct machine *machine, 1152 1149 struct event *__event __used, 1153 1150 int cpu __used, 1154 1151 u64 timestamp, ··· 1164 1161 if (profile_cpu == -1) 1165 1162 return; 1166 1163 1167 - migrant = perf_session__findnew(session, migrate_task_event->pid); 1164 + migrant = machine__findnew_thread(machine, migrate_task_event->pid); 1168 1165 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1169 1166 if (!atoms) { 1170 1167 thread_atoms_insert(migrant); ··· 1359 1356 static struct trace_sched_handler *trace_handler; 1360 1357 1361 1358 static void 1362 - process_sched_wakeup_event(void *data, struct perf_session *session, 1359 + process_sched_wakeup_event(struct perf_tool *tool __used, 1363 1360 struct event *event, 1364 - int cpu __used, 1365 - u64 timestamp __used, 1366 - struct thread *thread __used) 1361 + struct perf_sample *sample, 1362 + struct machine *machine, 1363 + struct thread *thread) 1367 1364 { 1365 + void *data = sample->raw_data; 1368 1366 struct trace_wakeup_event wakeup_event; 1369 1367 1370 1368 FILL_COMMON_FIELDS(wakeup_event, event, data); ··· 1377 1373 FILL_FIELD(wakeup_event, cpu, event, data); 1378 1374 1379 1375 if (trace_handler->wakeup_event) 1380 - trace_handler->wakeup_event(&wakeup_event, session, event, 1381 - cpu, timestamp, thread); 1376 + trace_handler->wakeup_event(&wakeup_event, machine, event, 1377 + sample->cpu, sample->time, thread); 1382 1378 } 1383 1379 1384 1380 /* ··· 1396 1392 1397 1393 static void 1398 1394 map_switch_event(struct trace_switch_event *switch_event, 1399 - struct perf_session *session, 1395 + struct machine *machine, 1400 1396 struct event *event __used, 1401 1397 int this_cpu, 1402 1398 u64 timestamp, ··· 1424 1420 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1425 1421 1426 1422 1427 - sched_out = perf_session__findnew(session, switch_event->prev_pid); 1428 - sched_in = perf_session__findnew(session, switch_event->next_pid); 1423 + sched_out = machine__findnew_thread(machine, switch_event->prev_pid); 1424 + sched_in = machine__findnew_thread(machine, switch_event->next_pid); 1429 1425 1430 1426 curr_thread[this_cpu] = sched_in; 1431 1427 ··· 1473 1469 } 1474 1470 } 1475 1471 1476 - 1477 1472 static void 1478 - process_sched_switch_event(void *data, struct perf_session *session, 1473 + process_sched_switch_event(struct perf_tool *tool __used, 1479 1474 struct event *event, 1480 - int this_cpu, 1481 - u64 timestamp __used, 1482 - struct thread *thread __used) 1475 + struct perf_sample *sample, 1476 + struct machine *machine, 1477 + struct thread *thread) 1483 1478 { 1479 + int this_cpu = sample->cpu; 1480 + void *data = sample->raw_data; 1484 1481 struct trace_switch_event switch_event; 1485 1482 1486 1483 FILL_COMMON_FIELDS(switch_event, event, data); ··· 1503 1498 nr_context_switch_bugs++; 1504 1499 } 1505 1500 if (trace_handler->switch_event) 1506 - trace_handler->switch_event(&switch_event, session, event, 1507 - this_cpu, timestamp, thread); 1501 + trace_handler->switch_event(&switch_event, machine, event, 1502 + this_cpu, sample->time, thread); 1508 1503 1509 1504 curr_pid[this_cpu] = switch_event.next_pid; 1510 1505 } 1511 1506 1512 1507 static void 1513 - process_sched_runtime_event(void *data, struct perf_session *session, 1514 - struct event *event, 1515 - int cpu __used, 1516 - u64 timestamp __used, 1517 - struct thread *thread __used) 1508 + process_sched_runtime_event(struct perf_tool *tool __used, 1509 + struct event *event, 1510 + struct perf_sample *sample, 1511 + struct machine *machine, 1512 + struct thread *thread) 1518 1513 { 1514 + void *data = sample->raw_data; 1519 1515 struct trace_runtime_event runtime_event; 1520 1516 1521 1517 FILL_ARRAY(runtime_event, comm, event, data); ··· 1525 1519 FILL_FIELD(runtime_event, vruntime, event, data); 1526 1520 1527 1521 if (trace_handler->runtime_event) 1528 - trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread); 1522 + trace_handler->runtime_event(&runtime_event, machine, event, 1523 + sample->cpu, sample->time, thread); 1529 1524 } 1530 1525 1531 1526 static void 1532 - process_sched_fork_event(void *data, 1527 + process_sched_fork_event(struct perf_tool *tool __used, 1533 1528 struct event *event, 1534 - int cpu __used, 1535 - u64 timestamp __used, 1536 - struct thread *thread __used) 1529 + struct perf_sample *sample, 1530 + struct machine *machine __used, 1531 + struct thread *thread) 1537 1532 { 1533 + void *data = sample->raw_data; 1538 1534 struct trace_fork_event fork_event; 1539 1535 1540 1536 FILL_COMMON_FIELDS(fork_event, event, data); ··· 1548 1540 1549 1541 if (trace_handler->fork_event) 1550 1542 trace_handler->fork_event(&fork_event, event, 1551 - cpu, timestamp, thread); 1543 + sample->cpu, sample->time, thread); 1552 1544 } 1553 1545 1554 1546 static void 1555 - process_sched_exit_event(struct event *event, 1556 - int cpu __used, 1557 - u64 timestamp __used, 1547 + process_sched_exit_event(struct perf_tool *tool __used, 1548 + struct event *event, 1549 + struct perf_sample *sample __used, 1550 + struct machine *machine __used, 1558 1551 struct thread *thread __used) 1559 1552 { 1560 1553 if (verbose) ··· 1563 1554 } 1564 1555 1565 1556 static void 1566 - process_sched_migrate_task_event(void *data, struct perf_session *session, 1567 - struct event *event, 1568 - int cpu __used, 1569 - u64 timestamp __used, 1570 - struct thread *thread __used) 1557 + process_sched_migrate_task_event(struct perf_tool *tool __used, 1558 + struct event *event, 1559 + struct perf_sample *sample, 1560 + struct machine *machine, 1561 + struct thread *thread) 1571 1562 { 1563 + void *data = sample->raw_data; 1572 1564 struct trace_migrate_task_event migrate_task_event; 1573 1565 1574 1566 FILL_COMMON_FIELDS(migrate_task_event, event, data); ··· 1580 1570 FILL_FIELD(migrate_task_event, cpu, event, data); 1581 1571 1582 1572 if (trace_handler->migrate_task_event) 1583 - trace_handler->migrate_task_event(&migrate_task_event, session, 1584 - event, cpu, timestamp, thread); 1573 + trace_handler->migrate_task_event(&migrate_task_event, machine, 1574 + event, sample->cpu, 1575 + sample->time, thread); 1585 1576 } 1586 1577 1587 - static void process_raw_event(union perf_event *raw_event __used, 1588 - struct perf_session *session, void *data, int cpu, 1589 - u64 timestamp, struct thread *thread) 1578 + typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event *event, 1579 + struct perf_sample *sample, 1580 + struct machine *machine, 1581 + struct thread *thread); 1582 + 1583 + static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, 1584 + union perf_event *event __used, 1585 + struct perf_sample *sample, 1586 + struct perf_evsel *evsel, 1587 + struct machine *machine) 1590 1588 { 1591 - struct event *event; 1592 - int type; 1589 + struct thread *thread = machine__findnew_thread(machine, sample->pid); 1593 1590 1594 - 1595 - type = trace_parse_common_type(data); 1596 - event = trace_find_event(type); 1597 - 1598 - if (!strcmp(event->name, "sched_switch")) 1599 - process_sched_switch_event(data, session, event, cpu, timestamp, thread); 1600 - if (!strcmp(event->name, "sched_stat_runtime")) 1601 - process_sched_runtime_event(data, session, event, cpu, timestamp, thread); 1602 - if (!strcmp(event->name, "sched_wakeup")) 1603 - process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1604 - if (!strcmp(event->name, "sched_wakeup_new")) 1605 - process_sched_wakeup_event(data, session, event, cpu, timestamp, thread); 1606 - if (!strcmp(event->name, "sched_process_fork")) 1607 - process_sched_fork_event(data, event, cpu, timestamp, thread); 1608 - if (!strcmp(event->name, "sched_process_exit")) 1609 - process_sched_exit_event(event, cpu, timestamp, thread); 1610 - if (!strcmp(event->name, "sched_migrate_task")) 1611 - process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1612 - } 1613 - 1614 - static int process_sample_event(union perf_event *event, 1615 - struct perf_sample *sample, 1616 - struct perf_evsel *evsel __used, 1617 - struct perf_session *session) 1618 - { 1619 - struct thread *thread; 1620 - 1621 - if (!(session->sample_type & PERF_SAMPLE_RAW)) 1622 - return 0; 1623 - 1624 - thread = perf_session__findnew(session, sample->pid); 1625 1591 if (thread == NULL) { 1626 - pr_debug("problem processing %d event, skipping it.\n", 1627 - event->header.type); 1592 + pr_debug("problem processing %s event, skipping it.\n", 1593 + evsel->name); 1628 1594 return -1; 1629 1595 } 1630 1596 1631 - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1597 + evsel->hists.stats.total_period += sample->period; 1598 + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 1632 1599 1633 - if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) 1634 - return 0; 1600 + if (evsel->handler.func != NULL) { 1601 + tracepoint_handler f = evsel->handler.func; 1635 1602 1636 - process_raw_event(event, session, sample->raw_data, sample->cpu, 1637 - sample->time, thread); 1603 + if (evsel->handler.data == NULL) 1604 + evsel->handler.data = trace_find_event(evsel->attr.config); 1605 + 1606 + f(tool, evsel->handler.data, sample, machine, thread); 1607 + } 1638 1608 1639 1609 return 0; 1640 1610 } 1641 1611 1642 - static struct perf_event_ops event_ops = { 1643 - .sample = process_sample_event, 1612 + static struct perf_tool perf_sched = { 1613 + .sample = perf_sched__process_tracepoint_sample, 1644 1614 .comm = perf_event__process_comm, 1645 1615 .lost = perf_event__process_lost, 1646 1616 .fork = perf_event__process_task, ··· 1630 1640 static void read_events(bool destroy, struct perf_session **psession) 1631 1641 { 1632 1642 int err = -EINVAL; 1643 + const struct perf_evsel_str_handler handlers[] = { 1644 + { "sched:sched_switch", process_sched_switch_event, }, 1645 + { "sched:sched_stat_runtime", process_sched_runtime_event, }, 1646 + { "sched:sched_wakeup", process_sched_wakeup_event, }, 1647 + { "sched:sched_wakeup_new", process_sched_wakeup_event, }, 1648 + { "sched:sched_process_fork", process_sched_fork_event, }, 1649 + { "sched:sched_process_exit", process_sched_exit_event, }, 1650 + { "sched:sched_migrate_task", process_sched_migrate_task_event, }, 1651 + }; 1633 1652 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1634 - 0, false, &event_ops); 1653 + 0, false, &perf_sched); 1635 1654 if (session == NULL) 1636 1655 die("No Memory"); 1637 1656 1657 + err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers); 1658 + assert(err == 0); 1659 + 1638 1660 if (perf_session__has_traces(session, "record -R")) { 1639 - err = perf_session__process_events(session, &event_ops); 1661 + err = perf_session__process_events(session, &perf_sched); 1640 1662 if (err) 1641 1663 die("Failed to process events, error %d", err); 1642 1664
+74 -56
tools/perf/builtin-script.c
··· 7 7 #include "util/header.h" 8 8 #include "util/parse-options.h" 9 9 #include "util/session.h" 10 + #include "util/tool.h" 10 11 #include "util/symbol.h" 11 12 #include "util/thread.h" 12 13 #include "util/trace-event.h" ··· 24 23 extern const struct option record_options[]; 25 24 static bool no_callchain; 26 25 static bool show_full_info; 26 + static bool system_wide; 27 27 static const char *cpu_list; 28 28 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 29 29 ··· 317 315 318 316 static void print_sample_addr(union perf_event *event, 319 317 struct perf_sample *sample, 320 - struct perf_session *session, 318 + struct machine *machine, 321 319 struct thread *thread, 322 320 struct perf_event_attr *attr) 323 321 { ··· 330 328 if (!sample_addr_correlates_sym(attr)) 331 329 return; 332 330 333 - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 334 - event->ip.pid, sample->addr, &al); 331 + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, 332 + sample->addr, &al); 335 333 if (!al.map) 336 - thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE, 337 - event->ip.pid, sample->addr, &al); 334 + thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE, 335 + sample->addr, &al); 338 336 339 337 al.cpu = sample->cpu; 340 338 al.sym = NULL; ··· 364 362 static void process_event(union perf_event *event __unused, 365 363 struct perf_sample *sample, 366 364 struct perf_evsel *evsel, 367 - struct perf_session *session, 365 + struct machine *machine, 368 366 struct thread *thread) 369 367 { 370 368 struct perf_event_attr *attr = &evsel->attr; ··· 379 377 sample->raw_size); 380 378 381 379 if (PRINT_FIELD(ADDR)) 382 - print_sample_addr(event, sample, session, thread, attr); 380 + print_sample_addr(event, sample, machine, thread, attr); 383 381 384 382 if (PRINT_FIELD(IP)) { 385 383 if (!symbol_conf.use_callchain) 386 384 printf(" "); 387 385 else 388 386 printf("\n"); 389 - perf_session__print_ip(event, sample, session, 390 - PRINT_FIELD(SYM), PRINT_FIELD(DSO)); 387 + perf_event__print_ip(event, sample, machine, evsel, 388 + PRINT_FIELD(SYM), PRINT_FIELD(DSO)); 391 389 } 392 390 393 391 printf("\n"); ··· 434 432 return scripting_ops->stop_script(); 435 433 } 436 434 437 - static char const *input_name = "perf.data"; 435 + static const char *input_name; 438 436 439 - static int process_sample_event(union perf_event *event, 437 + static int process_sample_event(struct perf_tool *tool __used, 438 + union perf_event *event, 440 439 struct perf_sample *sample, 441 440 struct perf_evsel *evsel, 442 - struct perf_session *session) 441 + struct machine *machine) 443 442 { 444 - struct thread *thread = perf_session__findnew(session, event->ip.pid); 443 + struct addr_location al; 444 + struct thread *thread = machine__findnew_thread(machine, event->ip.tid); 445 445 446 446 if (thread == NULL) { 447 447 pr_debug("problem processing %d event, skipping it.\n", ··· 462 458 return 0; 463 459 } 464 460 461 + if (perf_event__preprocess_sample(event, machine, &al, sample, 0) < 0) { 462 + pr_err("problem processing %d event, skipping it.\n", 463 + event->header.type); 464 + return -1; 465 + } 466 + 467 + if (al.filtered) 468 + return 0; 469 + 465 470 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 466 471 return 0; 467 472 468 - scripting_ops->process_event(event, sample, evsel, session, thread); 473 + scripting_ops->process_event(event, sample, evsel, machine, thread); 469 474 470 - session->hists.stats.total_period += sample->period; 475 + evsel->hists.stats.total_period += sample->period; 471 476 return 0; 472 477 } 473 478 474 - static struct perf_event_ops event_ops = { 479 + static struct perf_tool perf_script = { 475 480 .sample = process_sample_event, 476 481 .mmap = perf_event__process_mmap, 477 482 .comm = perf_event__process_comm, ··· 507 494 508 495 signal(SIGINT, sig_handler); 509 496 510 - ret = perf_session__process_events(session, &event_ops); 497 + ret = perf_session__process_events(session, &perf_script); 511 498 512 499 if (debug_mode) 513 500 pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); ··· 536 523 return s; 537 524 } 538 525 539 - static void script_spec__delete(struct script_spec *s) 540 - { 541 - free(s->spec); 542 - free(s); 543 - } 544 - 545 526 static void script_spec__add(struct script_spec *s) 546 527 { 547 528 list_add_tail(&s->node, &script_specs); ··· 561 554 562 555 s = script_spec__new(spec, ops); 563 556 if (!s) 564 - goto out_delete_spec; 557 + return NULL; 565 558 566 559 script_spec__add(s); 567 560 568 561 return s; 569 - 570 - out_delete_spec: 571 - script_spec__delete(s); 572 - 573 - return NULL; 574 562 } 575 563 576 564 int script_spec_register(const char *spec, struct scripting_ops *ops) ··· 683 681 type = PERF_TYPE_RAW; 684 682 else { 685 683 fprintf(stderr, "Invalid event type in field string.\n"); 686 - return -EINVAL; 684 + rc = -EINVAL; 685 + goto out; 687 686 } 688 687 689 688 if (output[type].user_set) ··· 926 923 return 0; 927 924 } 928 925 926 + static char *get_script_root(struct dirent *script_dirent, const char *suffix) 927 + { 928 + char *script_root, *str; 929 + 930 + script_root = strdup(script_dirent->d_name); 931 + if (!script_root) 932 + return NULL; 933 + 934 + str = (char *)ends_with(script_root, suffix); 935 + if (!str) { 936 + free(script_root); 937 + return NULL; 938 + } 939 + 940 + *str = '\0'; 941 + return script_root; 942 + } 943 + 929 944 static int list_available_scripts(const struct option *opt __used, 930 945 const char *s __used, int unset __used) 931 946 { ··· 955 934 struct script_desc *desc; 956 935 char first_half[BUFSIZ]; 957 936 char *script_root; 958 - char *str; 959 937 960 938 snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); 961 939 ··· 970 950 continue; 971 951 972 952 for_each_script(lang_path, lang_dir, script_dirent, script_next) { 973 - script_root = strdup(script_dirent.d_name); 974 - str = (char *)ends_with(script_root, REPORT_SUFFIX); 975 - if (str) { 976 - *str = '\0'; 953 + script_root = get_script_root(&script_dirent, REPORT_SUFFIX); 954 + if (script_root) { 977 955 desc = script_desc__findnew(script_root); 978 956 snprintf(script_path, MAXPATHLEN, "%s/%s", 979 957 lang_path, script_dirent.d_name); 980 958 read_script_info(desc, script_path); 959 + free(script_root); 981 960 } 982 - free(script_root); 983 961 } 984 962 } 985 963 ··· 999 981 char script_path[MAXPATHLEN]; 1000 982 DIR *scripts_dir, *lang_dir; 1001 983 char lang_path[MAXPATHLEN]; 1002 - char *str, *__script_root; 1003 - char *path = NULL; 984 + char *__script_root; 1004 985 1005 986 snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); 1006 987 ··· 1015 998 continue; 1016 999 1017 1000 for_each_script(lang_path, lang_dir, script_dirent, script_next) { 1018 - __script_root = strdup(script_dirent.d_name); 1019 - str = (char *)ends_with(__script_root, suffix); 1020 - if (str) { 1021 - *str = '\0'; 1022 - if (strcmp(__script_root, script_root)) 1023 - continue; 1001 + __script_root = get_script_root(&script_dirent, suffix); 1002 + if (__script_root && !strcmp(script_root, __script_root)) { 1003 + free(__script_root); 1024 1004 snprintf(script_path, MAXPATHLEN, "%s/%s", 1025 1005 lang_path, script_dirent.d_name); 1026 - path = strdup(script_path); 1027 - free(__script_root); 1028 - break; 1006 + return strdup(script_path); 1029 1007 } 1030 1008 free(__script_root); 1031 1009 } 1032 1010 } 1033 1011 1034 - return path; 1012 + return NULL; 1035 1013 } 1036 1014 1037 1015 static bool is_top_script(const char *script_path) ··· 1095 1083 OPT_CALLBACK('f', "fields", NULL, "str", 1096 1084 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", 1097 1085 parse_output_fields), 1098 - OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 1086 + OPT_BOOLEAN('a', "all-cpus", &system_wide, 1087 + "system-wide collection from all CPUs"), 1088 + OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 1089 + OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 1090 + "only display events for these comms"), 1099 1091 OPT_BOOLEAN('I', "show-info", &show_full_info, 1100 1092 "display extended information from perf.data file"), 1101 1093 OPT_END() ··· 1126 1110 struct perf_session *session; 1127 1111 char *script_path = NULL; 1128 1112 const char **__argv; 1129 - bool system_wide; 1130 1113 int i, j, err; 1131 1114 1132 1115 setup_scripting(); ··· 1193 1178 } 1194 1179 1195 1180 if (!pid) { 1196 - system_wide = true; 1197 1181 j = 0; 1198 1182 1199 1183 dup2(live_pipe[1], 1); 1200 1184 close(live_pipe[0]); 1201 1185 1202 - if (!is_top_script(argv[0])) 1186 + if (is_top_script(argv[0])) { 1187 + system_wide = true; 1188 + } else if (!system_wide) { 1203 1189 system_wide = !have_cmd(argc - rep_args, 1204 1190 &argv[rep_args]); 1191 + } 1205 1192 1206 1193 __argv = malloc((argc + 6) * sizeof(const char *)); 1207 1194 if (!__argv) ··· 1251 1234 script_path = rep_script_path; 1252 1235 1253 1236 if (script_path) { 1254 - system_wide = false; 1255 1237 j = 0; 1256 1238 1257 - if (rec_script_path) 1239 + if (!rec_script_path) 1240 + system_wide = false; 1241 + else if (!system_wide) 1258 1242 system_wide = !have_cmd(argc - 1, &argv[1]); 1259 1243 1260 1244 __argv = malloc((argc + 2) * sizeof(const char *)); ··· 1279 1261 if (!script_name) 1280 1262 setup_pager(); 1281 1263 1282 - session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops); 1264 + session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script); 1283 1265 if (session == NULL) 1284 1266 return -ENOMEM; 1285 1267 ··· 1305 1287 return -1; 1306 1288 } 1307 1289 1308 - input = open(input_name, O_RDONLY); 1290 + input = open(session->filename, O_RDONLY); /* input_name */ 1309 1291 if (input < 0) { 1310 1292 perror("failed to open file"); 1311 1293 exit(-1);
+43 -91
tools/perf/builtin-stat.c
··· 578 578 avg / avg_stats(&walltime_nsecs_stats)); 579 579 } 580 580 581 + /* used for get_ratio_color() */ 582 + enum grc_type { 583 + GRC_STALLED_CYCLES_FE, 584 + GRC_STALLED_CYCLES_BE, 585 + GRC_CACHE_MISSES, 586 + GRC_MAX_NR 587 + }; 588 + 589 + static const char *get_ratio_color(enum grc_type type, double ratio) 590 + { 591 + static const double grc_table[GRC_MAX_NR][3] = { 592 + [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 593 + [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 594 + [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 595 + }; 596 + const char *color = PERF_COLOR_NORMAL; 597 + 598 + if (ratio > grc_table[type][0]) 599 + color = PERF_COLOR_RED; 600 + else if (ratio > grc_table[type][1]) 601 + color = PERF_COLOR_MAGENTA; 602 + else if (ratio > grc_table[type][2]) 603 + color = PERF_COLOR_YELLOW; 604 + 605 + return color; 606 + } 607 + 581 608 static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) 582 609 { 583 610 double total, ratio = 0.0; ··· 615 588 if (total) 616 589 ratio = avg / total * 100.0; 617 590 618 - color = PERF_COLOR_NORMAL; 619 - if (ratio > 50.0) 620 - color = PERF_COLOR_RED; 621 - else if (ratio > 30.0) 622 - color = PERF_COLOR_MAGENTA; 623 - else if (ratio > 10.0) 624 - color = PERF_COLOR_YELLOW; 591 + color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 625 592 626 593 fprintf(output, " # "); 627 594 color_fprintf(output, color, "%6.2f%%", ratio); ··· 632 611 if (total) 633 612 ratio = avg / total * 100.0; 634 613 635 - color = PERF_COLOR_NORMAL; 636 - if (ratio > 75.0) 637 - color = PERF_COLOR_RED; 638 - else if (ratio > 50.0) 639 - color = PERF_COLOR_MAGENTA; 640 - else if (ratio > 20.0) 641 - color = PERF_COLOR_YELLOW; 614 + color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 642 615 643 616 fprintf(output, " # "); 644 617 color_fprintf(output, color, "%6.2f%%", ratio); ··· 649 634 if (total) 650 635 ratio = avg / total * 100.0; 651 636 652 - color = PERF_COLOR_NORMAL; 653 - if (ratio > 20.0) 654 - color = PERF_COLOR_RED; 655 - else if (ratio > 10.0) 656 - color = PERF_COLOR_MAGENTA; 657 - else if (ratio > 5.0) 658 - color = PERF_COLOR_YELLOW; 637 + color = get_ratio_color(GRC_CACHE_MISSES, ratio); 659 638 660 639 fprintf(output, " # "); 661 640 color_fprintf(output, color, "%6.2f%%", ratio); ··· 666 657 if (total) 667 658 ratio = avg / total * 100.0; 668 659 669 - color = PERF_COLOR_NORMAL; 670 - if (ratio > 20.0) 671 - color = PERF_COLOR_RED; 672 - else if (ratio > 10.0) 673 - color = PERF_COLOR_MAGENTA; 674 - else if (ratio > 5.0) 675 - color = PERF_COLOR_YELLOW; 660 + color = get_ratio_color(GRC_CACHE_MISSES, ratio); 676 661 677 662 fprintf(output, " # "); 678 663 color_fprintf(output, color, "%6.2f%%", ratio); ··· 683 680 if (total) 684 681 ratio = avg / total * 100.0; 685 682 686 - color = PERF_COLOR_NORMAL; 687 - if (ratio > 20.0) 688 - color = PERF_COLOR_RED; 689 - else if (ratio > 10.0) 690 - color = PERF_COLOR_MAGENTA; 691 - else if (ratio > 5.0) 692 - color = PERF_COLOR_YELLOW; 683 + color = get_ratio_color(GRC_CACHE_MISSES, ratio); 693 684 694 685 fprintf(output, " # "); 695 686 color_fprintf(output, color, "%6.2f%%", ratio); ··· 700 703 if (total) 701 704 ratio = avg / total * 100.0; 702 705 703 - color = PERF_COLOR_NORMAL; 704 - if (ratio > 20.0) 705 - color = PERF_COLOR_RED; 706 - else if (ratio > 10.0) 707 - color = PERF_COLOR_MAGENTA; 708 - else if (ratio > 5.0) 709 - color = PERF_COLOR_YELLOW; 706 + color = get_ratio_color(GRC_CACHE_MISSES, ratio); 710 707 711 708 fprintf(output, " # "); 712 709 color_fprintf(output, color, "%6.2f%%", ratio); ··· 717 726 if (total) 718 727 ratio = avg / total * 100.0; 719 728 720 - color = PERF_COLOR_NORMAL; 721 - if (ratio > 20.0) 722 - color = PERF_COLOR_RED; 723 - else if (ratio > 10.0) 724 - color = PERF_COLOR_MAGENTA; 725 - else if (ratio > 5.0) 726 - color = PERF_COLOR_YELLOW; 729 + color = get_ratio_color(GRC_CACHE_MISSES, ratio); 727 730 728 731 fprintf(output, " # "); 729 732 color_fprintf(output, color, "%6.2f%%", ratio); ··· 734 749 if (total) 735 750 ratio = avg / total * 100.0; 736 751 737 - color = PERF_COLOR_NORMAL; 738 - if (ratio > 20.0) 739 - color = PERF_COLOR_RED; 740 - else if (ratio > 10.0) 741 - color = PERF_COLOR_MAGENTA; 742 - else if (ratio > 5.0) 743 - color = PERF_COLOR_YELLOW; 752 + color = get_ratio_color(GRC_CACHE_MISSES, ratio); 744 753 745 754 fprintf(output, " # "); 746 755 color_fprintf(output, color, "%6.2f%%", ratio); ··· 1087 1108 */ 1088 1109 static int add_default_attributes(void) 1089 1110 { 1090 - struct perf_evsel *pos; 1091 - size_t attr_nr = 0; 1092 - size_t c; 1093 - 1094 1111 /* Set attrs if no event is selected and !null_run: */ 1095 1112 if (null_run) 1096 1113 return 0; 1097 1114 1098 1115 if (!evsel_list->nr_entries) { 1099 - for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { 1100 - pos = perf_evsel__new(default_attrs + c, c + attr_nr); 1101 - if (pos == NULL) 1102 - return -1; 1103 - perf_evlist__add(evsel_list, pos); 1104 - } 1105 - attr_nr += c; 1116 + if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) 1117 + return -1; 1106 1118 } 1107 1119 1108 1120 /* Detailed events get appended to the event list: */ ··· 1102 1132 return 0; 1103 1133 1104 1134 /* Append detailed run extra attributes: */ 1105 - for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { 1106 - pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); 1107 - if (pos == NULL) 1108 - return -1; 1109 - perf_evlist__add(evsel_list, pos); 1110 - } 1111 - attr_nr += c; 1135 + if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) 1136 + return -1; 1112 1137 1113 1138 if (detailed_run < 2) 1114 1139 return 0; 1115 1140 1116 1141 /* Append very detailed run extra attributes: */ 1117 - for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { 1118 - pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); 1119 - if (pos == NULL) 1120 - return -1; 1121 - perf_evlist__add(evsel_list, pos); 1122 - } 1142 + if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) 1143 + return -1; 1123 1144 1124 1145 if (detailed_run < 3) 1125 1146 return 0; 1126 1147 1127 1148 /* Append very, very detailed run extra attributes: */ 1128 - for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { 1129 - pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); 1130 - if (pos == NULL) 1131 - return -1; 1132 - perf_evlist__add(evsel_list, pos); 1133 - } 1134 - 1135 - 1136 - return 0; 1149 + return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); 1137 1150 } 1138 1151 1139 1152 int cmd_stat(int argc, const char **argv, const char *prefix __used) ··· 1220 1267 1221 1268 list_for_each_entry(pos, &evsel_list->entries, node) { 1222 1269 if (perf_evsel__alloc_stat_priv(pos) < 0 || 1223 - perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 || 1224 - perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0) 1270 + perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0) 1225 1271 goto out_free_fd; 1226 1272 } 1227 1273
+523 -24
tools/perf/builtin-test.c
··· 7 7 8 8 #include "util/cache.h" 9 9 #include "util/debug.h" 10 + #include "util/debugfs.h" 10 11 #include "util/evlist.h" 11 12 #include "util/parse-options.h" 12 13 #include "util/parse-events.h" 13 14 #include "util/symbol.h" 14 15 #include "util/thread_map.h" 15 16 #include "../../include/linux/hw_breakpoint.h" 16 - 17 - static long page_size; 18 17 19 18 static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) 20 19 { ··· 30 31 struct map *kallsyms_map, *vmlinux_map; 31 32 struct machine kallsyms, vmlinux; 32 33 enum map_type type = MAP__FUNCTION; 34 + long page_size = sysconf(_SC_PAGE_SIZE); 33 35 struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", }; 34 36 35 37 /* ··· 247 247 248 248 if (asprintf(&filename, 249 249 "%s/syscalls/%s/id", 250 - debugfs_path, evname) < 0) 250 + tracing_events_path, evname) < 0) 251 251 return -1; 252 252 253 253 fd = open(filename, O_RDONLY); ··· 603 603 604 604 #define TEST_ASSERT_VAL(text, cond) \ 605 605 do { \ 606 - if (!cond) { \ 606 + if (!(cond)) { \ 607 607 pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ 608 608 return -1; \ 609 609 } \ ··· 759 759 return 0; 760 760 } 761 761 762 + static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist) 763 + { 764 + struct perf_evsel *evsel = list_entry(evlist->entries.next, 765 + struct perf_evsel, node); 766 + 767 + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); 768 + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); 769 + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); 770 + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 771 + 772 + return test__checkevent_tracepoint(evlist); 773 + } 774 + 775 + static int 776 + test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist) 777 + { 778 + struct perf_evsel *evsel; 779 + 780 + TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); 781 + 782 + list_for_each_entry(evsel, &evlist->entries, node) { 783 + TEST_ASSERT_VAL("wrong exclude_user", 784 + !evsel->attr.exclude_user); 785 + TEST_ASSERT_VAL("wrong exclude_kernel", 786 + evsel->attr.exclude_kernel); 787 + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); 788 + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 789 + } 790 + 791 + return test__checkevent_tracepoint_multi(evlist); 792 + } 793 + 794 + static int test__checkevent_raw_modifier(struct perf_evlist *evlist) 795 + { 796 + struct perf_evsel *evsel = list_entry(evlist->entries.next, 797 + struct perf_evsel, node); 798 + 799 + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); 800 + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); 801 + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); 802 + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); 803 + 804 + return test__checkevent_raw(evlist); 805 + } 806 + 807 + static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) 808 + { 809 + struct perf_evsel *evsel = list_entry(evlist->entries.next, 810 + struct perf_evsel, node); 811 + 812 + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); 813 + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); 814 + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); 815 + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); 816 + 817 + return test__checkevent_numeric(evlist); 818 + } 819 + 820 + static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) 821 + { 822 + struct perf_evsel *evsel = list_entry(evlist->entries.next, 823 + struct perf_evsel, node); 824 + 825 + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); 826 + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); 827 + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); 828 + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 829 + 830 + return test__checkevent_symbolic_name(evlist); 831 + } 832 + 833 + static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) 834 + { 835 + struct perf_evsel *evsel = list_entry(evlist->entries.next, 836 + struct perf_evsel, node); 837 + 838 + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); 839 + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); 840 + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); 841 + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 842 + 843 + return test__checkevent_symbolic_alias(evlist); 844 + } 845 + 846 + static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) 847 + { 848 + struct perf_evsel *evsel = list_entry(evlist->entries.next, 849 + struct perf_evsel, node); 850 + 851 + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); 852 + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); 853 + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); 854 + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); 855 + 856 + return test__checkevent_genhw(evlist); 857 + } 858 + 762 859 static struct test__event_st { 763 860 const char *name; 764 861 __u32 type; ··· 905 808 .name = "mem:0:w", 906 809 .check = test__checkevent_breakpoint_w, 907 810 }, 811 + { 812 + .name = "syscalls:sys_enter_open:k", 813 + .check = test__checkevent_tracepoint_modifier, 814 + }, 815 + { 816 + .name = "syscalls:*:u", 817 + .check = test__checkevent_tracepoint_multi_modifier, 818 + }, 819 + { 820 + .name = "r1:kp", 821 + .check = test__checkevent_raw_modifier, 822 + }, 823 + { 824 + .name = "1:1:hp", 825 + .check = test__checkevent_numeric_modifier, 826 + }, 827 + { 828 + .name = "instructions:h", 829 + .check = test__checkevent_symbolic_name_modifier, 830 + }, 831 + { 832 + .name = "faults:u", 833 + .check = test__checkevent_symbolic_alias_modifier, 834 + }, 835 + { 836 + .name = "L1-dcache-load-miss:kp", 837 + .check = test__checkevent_genhw_modifier, 838 + }, 908 839 }; 909 840 910 841 #define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) ··· 966 841 967 842 return ret; 968 843 } 844 + 845 + static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t **maskp, 846 + size_t *sizep) 847 + { 848 + cpu_set_t *mask; 849 + size_t size; 850 + int i, cpu = -1, nrcpus = 1024; 851 + realloc: 852 + mask = CPU_ALLOC(nrcpus); 853 + size = CPU_ALLOC_SIZE(nrcpus); 854 + CPU_ZERO_S(size, mask); 855 + 856 + if (sched_getaffinity(pid, size, mask) == -1) { 857 + CPU_FREE(mask); 858 + if (errno == EINVAL && nrcpus < (1024 << 8)) { 859 + nrcpus = nrcpus << 2; 860 + goto realloc; 861 + } 862 + perror("sched_getaffinity"); 863 + return -1; 864 + } 865 + 866 + for (i = 0; i < nrcpus; i++) { 867 + if (CPU_ISSET_S(i, size, mask)) { 868 + if (cpu == -1) { 869 + cpu = i; 870 + *maskp = mask; 871 + *sizep = size; 872 + } else 873 + CPU_CLR_S(i, size, mask); 874 + } 875 + } 876 + 877 + if (cpu == -1) 878 + CPU_FREE(mask); 879 + 880 + return cpu; 881 + } 882 + 883 + static int test__PERF_RECORD(void) 884 + { 885 + struct perf_record_opts opts = { 886 + .target_pid = -1, 887 + .target_tid = -1, 888 + .no_delay = true, 889 + .freq = 10, 890 + .mmap_pages = 256, 891 + .sample_id_all_avail = true, 892 + }; 893 + cpu_set_t *cpu_mask = NULL; 894 + size_t cpu_mask_size = 0; 895 + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); 896 + struct perf_evsel *evsel; 897 + struct perf_sample sample; 898 + const char *cmd = "sleep"; 899 + const char *argv[] = { cmd, "1", NULL, }; 900 + char *bname; 901 + u64 sample_type, prev_time = 0; 902 + bool found_cmd_mmap = false, 903 + found_libc_mmap = false, 904 + found_vdso_mmap = false, 905 + found_ld_mmap = false; 906 + int err = -1, errs = 0, i, wakeups = 0, sample_size; 907 + u32 cpu; 908 + int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, }; 909 + 910 + if (evlist == NULL || argv == NULL) { 911 + pr_debug("Not enough memory to create evlist\n"); 912 + goto out; 913 + } 914 + 915 + /* 916 + * We need at least one evsel in the evlist, use the default 917 + * one: "cycles". 918 + */ 919 + err = perf_evlist__add_default(evlist); 920 + if (err < 0) { 921 + pr_debug("Not enough memory to create evsel\n"); 922 + goto out_delete_evlist; 923 + } 924 + 925 + /* 926 + * Create maps of threads and cpus to monitor. In this case 927 + * we start with all threads and cpus (-1, -1) but then in 928 + * perf_evlist__prepare_workload we'll fill in the only thread 929 + * we're monitoring, the one forked there. 930 + */ 931 + err = perf_evlist__create_maps(evlist, opts.target_pid, 932 + opts.target_tid, opts.cpu_list); 933 + if (err < 0) { 934 + pr_debug("Not enough memory to create thread/cpu maps\n"); 935 + goto out_delete_evlist; 936 + } 937 + 938 + /* 939 + * Prepare the workload in argv[] to run, it'll fork it, and then wait 940 + * for perf_evlist__start_workload() to exec it. This is done this way 941 + * so that we have time to open the evlist (calling sys_perf_event_open 942 + * on all the fds) and then mmap them. 943 + */ 944 + err = perf_evlist__prepare_workload(evlist, &opts, argv); 945 + if (err < 0) { 946 + pr_debug("Couldn't run the workload!\n"); 947 + goto out_delete_evlist; 948 + } 949 + 950 + /* 951 + * Config the evsels, setting attr->comm on the first one, etc. 952 + */ 953 + evsel = list_entry(evlist->entries.next, struct perf_evsel, node); 954 + evsel->attr.sample_type |= PERF_SAMPLE_CPU; 955 + evsel->attr.sample_type |= PERF_SAMPLE_TID; 956 + evsel->attr.sample_type |= PERF_SAMPLE_TIME; 957 + perf_evlist__config_attrs(evlist, &opts); 958 + 959 + err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask, 960 + &cpu_mask_size); 961 + if (err < 0) { 962 + pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno)); 963 + goto out_delete_evlist; 964 + } 965 + 966 + cpu = err; 967 + 968 + /* 969 + * So that we can check perf_sample.cpu on all the samples. 970 + */ 971 + if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) { 972 + pr_debug("sched_setaffinity: %s\n", strerror(errno)); 973 + goto out_free_cpu_mask; 974 + } 975 + 976 + /* 977 + * Call sys_perf_event_open on all the fds on all the evsels, 978 + * grouping them if asked to. 979 + */ 980 + err = perf_evlist__open(evlist, opts.group); 981 + if (err < 0) { 982 + pr_debug("perf_evlist__open: %s\n", strerror(errno)); 983 + goto out_delete_evlist; 984 + } 985 + 986 + /* 987 + * mmap the first fd on a given CPU and ask for events for the other 988 + * fds in the same CPU to be injected in the same mmap ring buffer 989 + * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)). 990 + */ 991 + err = perf_evlist__mmap(evlist, opts.mmap_pages, false); 992 + if (err < 0) { 993 + pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); 994 + goto out_delete_evlist; 995 + } 996 + 997 + /* 998 + * We'll need these two to parse the PERF_SAMPLE_* fields in each 999 + * event. 1000 + */ 1001 + sample_type = perf_evlist__sample_type(evlist); 1002 + sample_size = __perf_evsel__sample_size(sample_type); 1003 + 1004 + /* 1005 + * Now that all is properly set up, enable the events, they will 1006 + * count just on workload.pid, which will start... 1007 + */ 1008 + perf_evlist__enable(evlist); 1009 + 1010 + /* 1011 + * Now! 1012 + */ 1013 + perf_evlist__start_workload(evlist); 1014 + 1015 + while (1) { 1016 + int before = total_events; 1017 + 1018 + for (i = 0; i < evlist->nr_mmaps; i++) { 1019 + union perf_event *event; 1020 + 1021 + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { 1022 + const u32 type = event->header.type; 1023 + const char *name = perf_event__name(type); 1024 + 1025 + ++total_events; 1026 + if (type < PERF_RECORD_MAX) 1027 + nr_events[type]++; 1028 + 1029 + err = perf_event__parse_sample(event, sample_type, 1030 + sample_size, true, 1031 + &sample, false); 1032 + if (err < 0) { 1033 + if (verbose) 1034 + perf_event__fprintf(event, stderr); 1035 + pr_debug("Couldn't parse sample\n"); 1036 + goto out_err; 1037 + } 1038 + 1039 + if (verbose) { 1040 + pr_info("%" PRIu64" %d ", sample.time, sample.cpu); 1041 + perf_event__fprintf(event, stderr); 1042 + } 1043 + 1044 + if (prev_time > sample.time) { 1045 + pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n", 1046 + name, prev_time, sample.time); 1047 + ++errs; 1048 + } 1049 + 1050 + prev_time = sample.time; 1051 + 1052 + if (sample.cpu != cpu) { 1053 + pr_debug("%s with unexpected cpu, expected %d, got %d\n", 1054 + name, cpu, sample.cpu); 1055 + ++errs; 1056 + } 1057 + 1058 + if ((pid_t)sample.pid != evlist->workload.pid) { 1059 + pr_debug("%s with unexpected pid, expected %d, got %d\n", 1060 + name, evlist->workload.pid, sample.pid); 1061 + ++errs; 1062 + } 1063 + 1064 + if ((pid_t)sample.tid != evlist->workload.pid) { 1065 + pr_debug("%s with unexpected tid, expected %d, got %d\n", 1066 + name, evlist->workload.pid, sample.tid); 1067 + ++errs; 1068 + } 1069 + 1070 + if ((type == PERF_RECORD_COMM || 1071 + type == PERF_RECORD_MMAP || 1072 + type == PERF_RECORD_FORK || 1073 + type == PERF_RECORD_EXIT) && 1074 + (pid_t)event->comm.pid != evlist->workload.pid) { 1075 + pr_debug("%s with unexpected pid/tid\n", name); 1076 + ++errs; 1077 + } 1078 + 1079 + if ((type == PERF_RECORD_COMM || 1080 + type == PERF_RECORD_MMAP) && 1081 + event->comm.pid != event->comm.tid) { 1082 + pr_debug("%s with different pid/tid!\n", name); 1083 + ++errs; 1084 + } 1085 + 1086 + switch (type) { 1087 + case PERF_RECORD_COMM: 1088 + if (strcmp(event->comm.comm, cmd)) { 1089 + pr_debug("%s with unexpected comm!\n", name); 1090 + ++errs; 1091 + } 1092 + break; 1093 + case PERF_RECORD_EXIT: 1094 + goto found_exit; 1095 + case PERF_RECORD_MMAP: 1096 + bname = strrchr(event->mmap.filename, '/'); 1097 + if (bname != NULL) { 1098 + if (!found_cmd_mmap) 1099 + found_cmd_mmap = !strcmp(bname + 1, cmd); 1100 + if (!found_libc_mmap) 1101 + found_libc_mmap = !strncmp(bname + 1, "libc", 4); 1102 + if (!found_ld_mmap) 1103 + found_ld_mmap = !strncmp(bname + 1, "ld", 2); 1104 + } else if (!found_vdso_mmap) 1105 + found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]"); 1106 + break; 1107 + 1108 + case PERF_RECORD_SAMPLE: 1109 + /* Just ignore samples for now */ 1110 + break; 1111 + default: 1112 + pr_debug("Unexpected perf_event->header.type %d!\n", 1113 + type); 1114 + ++errs; 1115 + } 1116 + } 1117 + } 1118 + 1119 + /* 1120 + * We don't use poll here because at least at 3.1 times the 1121 + * PERF_RECORD_{!SAMPLE} events don't honour 1122 + * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does. 1123 + */ 1124 + if (total_events == before && false) 1125 + poll(evlist->pollfd, evlist->nr_fds, -1); 1126 + 1127 + sleep(1); 1128 + if (++wakeups > 5) { 1129 + pr_debug("No PERF_RECORD_EXIT event!\n"); 1130 + break; 1131 + } 1132 + } 1133 + 1134 + found_exit: 1135 + if (nr_events[PERF_RECORD_COMM] > 1) { 1136 + pr_debug("Excessive number of PERF_RECORD_COMM events!\n"); 1137 + ++errs; 1138 + } 1139 + 1140 + if (nr_events[PERF_RECORD_COMM] == 0) { 1141 + pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd); 1142 + ++errs; 1143 + } 1144 + 1145 + if (!found_cmd_mmap) { 1146 + pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd); 1147 + ++errs; 1148 + } 1149 + 1150 + if (!found_libc_mmap) { 1151 + pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc"); 1152 + ++errs; 1153 + } 1154 + 1155 + if (!found_ld_mmap) { 1156 + pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld"); 1157 + ++errs; 1158 + } 1159 + 1160 + if (!found_vdso_mmap) { 1161 + pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]"); 1162 + ++errs; 1163 + } 1164 + out_err: 1165 + perf_evlist__munmap(evlist); 1166 + out_free_cpu_mask: 1167 + CPU_FREE(cpu_mask); 1168 + out_delete_evlist: 1169 + perf_evlist__delete(evlist); 1170 + out: 1171 + return (err < 0 || errs > 0) ? -1 : 0; 1172 + } 1173 + 969 1174 static struct test { 970 1175 const char *desc; 971 1176 int (*func)(void); ··· 1321 866 .func = test__parse_events, 1322 867 }, 1323 868 { 869 + .desc = "Validate PERF_RECORD_* events & perf_sample fields", 870 + .func = test__PERF_RECORD, 871 + }, 872 + { 1324 873 .func = NULL, 1325 874 }, 1326 875 }; 1327 876 1328 - static int __cmd_test(void) 877 + static bool perf_test__matches(int curr, int argc, const char *argv[]) 878 + { 879 + int i; 880 + 881 + if (argc == 0) 882 + return true; 883 + 884 + for (i = 0; i < argc; ++i) { 885 + char *end; 886 + long nr = strtoul(argv[i], &end, 10); 887 + 888 + if (*end == '\0') { 889 + if (nr == curr + 1) 890 + return true; 891 + continue; 892 + } 893 + 894 + if (strstr(tests[curr].desc, argv[i])) 895 + return true; 896 + } 897 + 898 + return false; 899 + } 900 + 901 + static int __cmd_test(int argc, const char *argv[]) 1329 902 { 1330 903 int i = 0; 1331 904 1332 - page_size = sysconf(_SC_PAGE_SIZE); 1333 - 1334 905 while (tests[i].func) { 1335 - int err; 1336 - pr_info("%2d: %s:", i + 1, tests[i].desc); 906 + int curr = i++, err; 907 + 908 + if (!perf_test__matches(curr, argc, argv)) 909 + continue; 910 + 911 + pr_info("%2d: %s:", i, tests[curr].desc); 1337 912 pr_debug("\n--- start ---\n"); 1338 - err = tests[i].func(); 1339 - pr_debug("---- end ----\n%s:", tests[i].desc); 913 + err = tests[curr].func(); 914 + pr_debug("---- end ----\n%s:", tests[curr].desc); 1340 915 pr_info(" %s\n", err ? "FAILED!\n" : "Ok"); 1341 - ++i; 1342 916 } 1343 917 1344 918 return 0; 1345 919 } 1346 920 1347 - static const char * const test_usage[] = { 1348 - "perf test [<options>]", 1349 - NULL, 1350 - }; 921 + static int perf_test__list(int argc, const char **argv) 922 + { 923 + int i = 0; 1351 924 1352 - static const struct option test_options[] = { 1353 - OPT_INTEGER('v', "verbose", &verbose, 1354 - "be more verbose (show symbol address, etc)"), 1355 - OPT_END() 1356 - }; 925 + while (tests[i].func) { 926 + int curr = i++; 927 + 928 + if (argc > 1 && !strstr(tests[curr].desc, argv[1])) 929 + continue; 930 + 931 + pr_info("%2d: %s\n", i, tests[curr].desc); 932 + } 933 + 934 + return 0; 935 + } 1357 936 1358 937 int cmd_test(int argc, const char **argv, const char *prefix __used) 1359 938 { 939 + const char * const test_usage[] = { 940 + "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", 941 + NULL, 942 + }; 943 + const struct option test_options[] = { 944 + OPT_INTEGER('v', "verbose", &verbose, 945 + "be more verbose (show symbol address, etc)"), 946 + OPT_END() 947 + }; 948 + 1360 949 argc = parse_options(argc, argv, test_options, test_usage, 0); 1361 - if (argc) 1362 - usage_with_options(test_usage, test_options); 950 + if (argc >= 1 && !strcmp(argv[0], "list")) 951 + return perf_test__list(argc, argv); 1363 952 1364 953 symbol_conf.priv_size = sizeof(int); 1365 954 symbol_conf.sort_by_name = true; ··· 1414 915 1415 916 setup_pager(); 1416 917 1417 - return __cmd_test(); 918 + return __cmd_test(argc, argv); 1418 919 }
+22 -16
tools/perf/builtin-timechart.c
··· 19 19 #include "util/color.h" 20 20 #include <linux/list.h> 21 21 #include "util/cache.h" 22 + #include "util/evsel.h" 22 23 #include <linux/rbtree.h> 23 24 #include "util/symbol.h" 24 25 #include "util/callchain.h" ··· 32 31 #include "util/event.h" 33 32 #include "util/session.h" 34 33 #include "util/svghelper.h" 34 + #include "util/tool.h" 35 35 36 36 #define SUPPORT_OLD_POWER_EVENTS 1 37 37 #define PWR_EVENT_EXIT -1 38 38 39 39 40 - static char const *input_name = "perf.data"; 41 - static char const *output_name = "output.svg"; 40 + static const char *input_name; 41 + static const char *output_name = "output.svg"; 42 42 43 43 static unsigned int numcpus; 44 44 static u64 min_freq; /* Lowest CPU frequency seen */ ··· 275 273 static u64 cpus_pstate_start_times[MAX_CPUS]; 276 274 static u64 cpus_pstate_state[MAX_CPUS]; 277 275 278 - static int process_comm_event(union perf_event *event, 276 + static int process_comm_event(struct perf_tool *tool __used, 277 + union perf_event *event, 279 278 struct perf_sample *sample __used, 280 - struct perf_session *session __used) 279 + struct machine *machine __used) 281 280 { 282 281 pid_set_comm(event->comm.tid, event->comm.comm); 283 282 return 0; 284 283 } 285 284 286 - static int process_fork_event(union perf_event *event, 285 + static int process_fork_event(struct perf_tool *tool __used, 286 + union perf_event *event, 287 287 struct perf_sample *sample __used, 288 - struct perf_session *session __used) 288 + struct machine *machine __used) 289 289 { 290 290 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 291 291 return 0; 292 292 } 293 293 294 - static int process_exit_event(union perf_event *event, 294 + static int process_exit_event(struct perf_tool *tool __used, 295 + union perf_event *event, 295 296 struct perf_sample *sample __used, 296 - struct perf_session *session __used) 297 + struct machine *machine __used) 297 298 { 298 299 pid_exit(event->fork.pid, event->fork.time); 299 300 return 0; ··· 491 486 } 492 487 493 488 494 - static int process_sample_event(union perf_event *event __used, 489 + static int process_sample_event(struct perf_tool *tool __used, 490 + union perf_event *event __used, 495 491 struct perf_sample *sample, 496 - struct perf_evsel *evsel __used, 497 - struct perf_session *session) 492 + struct perf_evsel *evsel, 493 + struct machine *machine __used) 498 494 { 499 495 struct trace_entry *te; 500 496 501 - if (session->sample_type & PERF_SAMPLE_TIME) { 497 + if (evsel->attr.sample_type & PERF_SAMPLE_TIME) { 502 498 if (!first_time || first_time > sample->time) 503 499 first_time = sample->time; 504 500 if (last_time < sample->time) ··· 507 501 } 508 502 509 503 te = (void *)sample->raw_data; 510 - if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) { 504 + if ((evsel->attr.sample_type & PERF_SAMPLE_RAW) && sample->raw_size > 0) { 511 505 char *event_str; 512 506 #ifdef SUPPORT_OLD_POWER_EVENTS 513 507 struct power_entry_old *peo; ··· 980 974 svg_close(); 981 975 } 982 976 983 - static struct perf_event_ops event_ops = { 977 + static struct perf_tool perf_timechart = { 984 978 .comm = process_comm_event, 985 979 .fork = process_fork_event, 986 980 .exit = process_exit_event, ··· 991 985 static int __cmd_timechart(void) 992 986 { 993 987 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 994 - 0, false, &event_ops); 988 + 0, false, &perf_timechart); 995 989 int ret = -EINVAL; 996 990 997 991 if (session == NULL) ··· 1000 994 if (!perf_session__has_traces(session, "timechart record")) 1001 995 goto out_delete; 1002 996 1003 - ret = perf_session__process_events(session, &event_ops); 997 + ret = perf_session__process_events(session, &perf_timechart); 1004 998 if (ret) 1005 999 goto out_delete; 1006 1000
+272 -286
tools/perf/builtin-top.c
··· 64 64 #include <linux/unistd.h> 65 65 #include <linux/types.h> 66 66 67 - static struct perf_top top = { 68 - .count_filter = 5, 69 - .delay_secs = 2, 70 - .target_pid = -1, 71 - .target_tid = -1, 72 - .freq = 1000, /* 1 KHz */ 73 - }; 74 - 75 - static bool system_wide = false; 76 - 77 - static bool use_tui, use_stdio; 78 - 79 - static bool sort_has_symbols; 80 - 81 - static bool dont_use_callchains; 82 - static char callchain_default_opt[] = "fractal,0.5,callee"; 83 - 84 - 85 - static int default_interval = 0; 86 - 87 - static bool kptr_restrict_warned; 88 - static bool vmlinux_warned; 89 - static bool inherit = false; 90 - static int realtime_prio = 0; 91 - static bool group = false; 92 - static bool sample_id_all_avail = true; 93 - static unsigned int mmap_pages = 128; 94 - 95 - static bool dump_symtab = false; 96 - 97 - static struct winsize winsize; 98 - 99 - static const char *sym_filter = NULL; 100 - static int sym_pcnt_filter = 5; 101 - 102 - /* 103 - * Source functions 104 - */ 105 67 106 68 void get_term_dimensions(struct winsize *ws) 107 69 { ··· 87 125 ws->ws_col = 80; 88 126 } 89 127 90 - static void update_print_entries(struct winsize *ws) 128 + static void perf_top__update_print_entries(struct perf_top *top) 91 129 { 92 - top.print_entries = ws->ws_row; 130 + top->print_entries = top->winsize.ws_row; 93 131 94 - if (top.print_entries > 9) 95 - top.print_entries -= 9; 132 + if (top->print_entries > 9) 133 + top->print_entries -= 9; 96 134 } 97 135 98 - static void sig_winch_handler(int sig __used) 136 + static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg) 99 137 { 100 - get_term_dimensions(&winsize); 101 - update_print_entries(&winsize); 138 + struct perf_top *top = arg; 139 + 140 + get_term_dimensions(&top->winsize); 141 + perf_top__update_print_entries(top); 102 142 } 103 143 104 - static int parse_source(struct hist_entry *he) 144 + static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) 105 145 { 106 146 struct symbol *sym; 107 147 struct annotation *notes; ··· 134 170 135 171 pthread_mutex_lock(&notes->lock); 136 172 137 - if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { 173 + if (symbol__alloc_hist(sym) < 0) { 138 174 pthread_mutex_unlock(&notes->lock); 139 175 pr_err("Not enough memory for annotating '%s' symbol!\n", 140 176 sym->name); ··· 145 181 err = symbol__annotate(sym, map, 0); 146 182 if (err == 0) { 147 183 out_assign: 148 - top.sym_filter_entry = he; 184 + top->sym_filter_entry = he; 149 185 } 150 186 151 187 pthread_mutex_unlock(&notes->lock); ··· 158 194 symbol__annotate_zero_histograms(sym); 159 195 } 160 196 161 - static void record_precise_ip(struct hist_entry *he, int counter, u64 ip) 197 + static void perf_top__record_precise_ip(struct perf_top *top, 198 + struct hist_entry *he, 199 + int counter, u64 ip) 162 200 { 163 201 struct annotation *notes; 164 202 struct symbol *sym; 165 203 166 204 if (he == NULL || he->ms.sym == NULL || 167 - ((top.sym_filter_entry == NULL || 168 - top.sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1)) 205 + ((top->sym_filter_entry == NULL || 206 + top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1)) 169 207 return; 170 208 171 209 sym = he->ms.sym; ··· 176 210 if (pthread_mutex_trylock(&notes->lock)) 177 211 return; 178 212 179 - if (notes->src == NULL && 180 - symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { 213 + if (notes->src == NULL && symbol__alloc_hist(sym) < 0) { 181 214 pthread_mutex_unlock(&notes->lock); 182 215 pr_err("Not enough memory for annotating '%s' symbol!\n", 183 216 sym->name); ··· 190 225 pthread_mutex_unlock(&notes->lock); 191 226 } 192 227 193 - static void show_details(struct hist_entry *he) 228 + static void perf_top__show_details(struct perf_top *top) 194 229 { 230 + struct hist_entry *he = top->sym_filter_entry; 195 231 struct annotation *notes; 196 232 struct symbol *symbol; 197 233 int more; ··· 208 242 if (notes->src == NULL) 209 243 goto out_unlock; 210 244 211 - printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); 212 - printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 245 + printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name); 246 + printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter); 213 247 214 - more = symbol__annotate_printf(symbol, he->ms.map, top.sym_evsel->idx, 215 - 0, sym_pcnt_filter, top.print_entries, 4); 216 - if (top.zero) 217 - symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); 248 + more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx, 249 + 0, top->sym_pcnt_filter, top->print_entries, 4); 250 + if (top->zero) 251 + symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx); 218 252 else 219 - symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx); 253 + symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx); 220 254 if (more != 0) 221 255 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 222 256 out_unlock: ··· 225 259 226 260 static const char CONSOLE_CLEAR[] = ""; 227 261 228 - static struct hist_entry * 229 - perf_session__add_hist_entry(struct perf_session *session, 230 - struct addr_location *al, 231 - struct perf_sample *sample, 232 - struct perf_evsel *evsel) 262 + static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, 263 + struct addr_location *al, 264 + struct perf_sample *sample) 233 265 { 234 266 struct hist_entry *he; 235 267 ··· 235 271 if (he == NULL) 236 272 return NULL; 237 273 238 - session->hists.stats.total_period += sample->period; 274 + evsel->hists.stats.total_period += sample->period; 239 275 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 240 276 return he; 241 277 } 242 278 243 - static void print_sym_table(void) 279 + static void perf_top__print_sym_table(struct perf_top *top) 244 280 { 245 281 char bf[160]; 246 282 int printed = 0; 247 - const int win_width = winsize.ws_col - 1; 283 + const int win_width = top->winsize.ws_col - 1; 248 284 249 285 puts(CONSOLE_CLEAR); 250 286 251 - perf_top__header_snprintf(&top, bf, sizeof(bf)); 287 + perf_top__header_snprintf(top, bf, sizeof(bf)); 252 288 printf("%s\n", bf); 253 289 254 - perf_top__reset_sample_counters(&top); 290 + perf_top__reset_sample_counters(top); 255 291 256 292 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 257 293 258 - if (top.sym_evsel->hists.stats.nr_lost_warned != 259 - top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) { 260 - top.sym_evsel->hists.stats.nr_lost_warned = 261 - top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]; 294 + if (top->sym_evsel->hists.stats.nr_lost_warned != 295 + top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) { 296 + top->sym_evsel->hists.stats.nr_lost_warned = 297 + top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]; 262 298 color_fprintf(stdout, PERF_COLOR_RED, 263 299 "WARNING: LOST %d chunks, Check IO/CPU overload", 264 - top.sym_evsel->hists.stats.nr_lost_warned); 300 + top->sym_evsel->hists.stats.nr_lost_warned); 265 301 ++printed; 266 302 } 267 303 268 - if (top.sym_filter_entry) { 269 - show_details(top.sym_filter_entry); 304 + if (top->sym_filter_entry) { 305 + perf_top__show_details(top); 270 306 return; 271 307 } 272 308 273 - hists__collapse_resort_threaded(&top.sym_evsel->hists); 274 - hists__output_resort_threaded(&top.sym_evsel->hists); 275 - hists__decay_entries_threaded(&top.sym_evsel->hists, 276 - top.hide_user_symbols, 277 - top.hide_kernel_symbols); 278 - hists__output_recalc_col_len(&top.sym_evsel->hists, winsize.ws_row - 3); 309 + hists__collapse_resort_threaded(&top->sym_evsel->hists); 310 + hists__output_resort_threaded(&top->sym_evsel->hists); 311 + hists__decay_entries_threaded(&top->sym_evsel->hists, 312 + top->hide_user_symbols, 313 + top->hide_kernel_symbols); 314 + hists__output_recalc_col_len(&top->sym_evsel->hists, 315 + top->winsize.ws_row - 3); 279 316 putchar('\n'); 280 - hists__fprintf(&top.sym_evsel->hists, NULL, false, false, 281 - winsize.ws_row - 4 - printed, win_width, stdout); 317 + hists__fprintf(&top->sym_evsel->hists, NULL, false, false, 318 + top->winsize.ws_row - 4 - printed, win_width, stdout); 282 319 } 283 320 284 321 static void prompt_integer(int *target, const char *msg) ··· 317 352 *target = tmp; 318 353 } 319 354 320 - static void prompt_symbol(struct hist_entry **target, const char *msg) 355 + static void perf_top__prompt_symbol(struct perf_top *top, const char *msg) 321 356 { 322 357 char *buf = malloc(0), *p; 323 - struct hist_entry *syme = *target, *n, *found = NULL; 358 + struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL; 324 359 struct rb_node *next; 325 360 size_t dummy = 0; 326 361 327 362 /* zero counters of active symbol */ 328 363 if (syme) { 329 364 __zero_source_counters(syme); 330 - *target = NULL; 365 + top->sym_filter_entry = NULL; 331 366 } 332 367 333 368 fprintf(stdout, "\n%s: ", msg); ··· 338 373 if (p) 339 374 *p = 0; 340 375 341 - next = rb_first(&top.sym_evsel->hists.entries); 376 + next = rb_first(&top->sym_evsel->hists.entries); 342 377 while (next) { 343 378 n = rb_entry(next, struct hist_entry, rb_node); 344 379 if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { ··· 351 386 if (!found) { 352 387 fprintf(stderr, "Sorry, %s is not active.\n", buf); 353 388 sleep(1); 354 - return; 355 389 } else 356 - parse_source(found); 390 + perf_top__parse_source(top, found); 357 391 358 392 out_free: 359 393 free(buf); 360 394 } 361 395 362 - static void print_mapped_keys(void) 396 + static void perf_top__print_mapped_keys(struct perf_top *top) 363 397 { 364 398 char *name = NULL; 365 399 366 - if (top.sym_filter_entry) { 367 - struct symbol *sym = top.sym_filter_entry->ms.sym; 400 + if (top->sym_filter_entry) { 401 + struct symbol *sym = top->sym_filter_entry->ms.sym; 368 402 name = sym->name; 369 403 } 370 404 371 405 fprintf(stdout, "\nMapped keys:\n"); 372 - fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs); 373 - fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries); 406 + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top->delay_secs); 407 + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries); 374 408 375 - if (top.evlist->nr_entries > 1) 376 - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel)); 409 + if (top->evlist->nr_entries > 1) 410 + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top->sym_evsel)); 377 411 378 - fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter); 412 + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); 379 413 380 - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 414 + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter); 381 415 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 382 416 fprintf(stdout, "\t[S] stop annotation.\n"); 383 417 384 418 fprintf(stdout, 385 419 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 386 - top.hide_kernel_symbols ? "yes" : "no"); 420 + top->hide_kernel_symbols ? "yes" : "no"); 387 421 fprintf(stdout, 388 422 "\t[U] hide user symbols. \t(%s)\n", 389 - top.hide_user_symbols ? "yes" : "no"); 390 - fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0); 423 + top->hide_user_symbols ? "yes" : "no"); 424 + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top->zero ? 1 : 0); 391 425 fprintf(stdout, "\t[qQ] quit.\n"); 392 426 } 393 427 394 - static int key_mapped(int c) 428 + static int perf_top__key_mapped(struct perf_top *top, int c) 395 429 { 396 430 switch (c) { 397 431 case 'd': ··· 406 442 case 'S': 407 443 return 1; 408 444 case 'E': 409 - return top.evlist->nr_entries > 1 ? 1 : 0; 445 + return top->evlist->nr_entries > 1 ? 1 : 0; 410 446 default: 411 447 break; 412 448 } ··· 414 450 return 0; 415 451 } 416 452 417 - static void handle_keypress(int c) 453 + static void perf_top__handle_keypress(struct perf_top *top, int c) 418 454 { 419 - if (!key_mapped(c)) { 455 + if (!perf_top__key_mapped(top, c)) { 420 456 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 421 457 struct termios tc, save; 422 458 423 - print_mapped_keys(); 459 + perf_top__print_mapped_keys(top); 424 460 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 425 461 fflush(stdout); 426 462 ··· 435 471 c = getc(stdin); 436 472 437 473 tcsetattr(0, TCSAFLUSH, &save); 438 - if (!key_mapped(c)) 474 + if (!perf_top__key_mapped(top, c)) 439 475 return; 440 476 } 441 477 442 478 switch (c) { 443 479 case 'd': 444 - prompt_integer(&top.delay_secs, "Enter display delay"); 445 - if (top.delay_secs < 1) 446 - top.delay_secs = 1; 480 + prompt_integer(&top->delay_secs, "Enter display delay"); 481 + if (top->delay_secs < 1) 482 + top->delay_secs = 1; 447 483 break; 448 484 case 'e': 449 - prompt_integer(&top.print_entries, "Enter display entries (lines)"); 450 - if (top.print_entries == 0) { 451 - sig_winch_handler(SIGWINCH); 452 - signal(SIGWINCH, sig_winch_handler); 485 + prompt_integer(&top->print_entries, "Enter display entries (lines)"); 486 + if (top->print_entries == 0) { 487 + struct sigaction act = { 488 + .sa_sigaction = perf_top__sig_winch, 489 + .sa_flags = SA_SIGINFO, 490 + }; 491 + perf_top__sig_winch(SIGWINCH, NULL, top); 492 + sigaction(SIGWINCH, &act, NULL); 453 493 } else 454 494 signal(SIGWINCH, SIG_DFL); 455 495 break; 456 496 case 'E': 457 - if (top.evlist->nr_entries > 1) { 497 + if (top->evlist->nr_entries > 1) { 458 498 /* Select 0 as the default event: */ 459 499 int counter = 0; 460 500 461 501 fprintf(stderr, "\nAvailable events:"); 462 502 463 - list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) 464 - fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel)); 503 + list_for_each_entry(top->sym_evsel, &top->evlist->entries, node) 504 + fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel)); 465 505 466 506 prompt_integer(&counter, "Enter details event counter"); 467 507 468 - if (counter >= top.evlist->nr_entries) { 469 - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); 470 - fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel)); 508 + if (counter >= top->evlist->nr_entries) { 509 + top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); 510 + fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel)); 471 511 sleep(1); 472 512 break; 473 513 } 474 - list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) 475 - if (top.sym_evsel->idx == counter) 514 + list_for_each_entry(top->sym_evsel, &top->evlist->entries, node) 515 + if (top->sym_evsel->idx == counter) 476 516 break; 477 517 } else 478 - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); 518 + top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); 479 519 break; 480 520 case 'f': 481 - prompt_integer(&top.count_filter, "Enter display event count filter"); 521 + prompt_integer(&top->count_filter, "Enter display event count filter"); 482 522 break; 483 523 case 'F': 484 - prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 524 + prompt_percent(&top->sym_pcnt_filter, 525 + "Enter details display event filter (percent)"); 485 526 break; 486 527 case 'K': 487 - top.hide_kernel_symbols = !top.hide_kernel_symbols; 528 + top->hide_kernel_symbols = !top->hide_kernel_symbols; 488 529 break; 489 530 case 'q': 490 531 case 'Q': 491 532 printf("exiting.\n"); 492 - if (dump_symtab) 493 - perf_session__fprintf_dsos(top.session, stderr); 533 + if (top->dump_symtab) 534 + perf_session__fprintf_dsos(top->session, stderr); 494 535 exit(0); 495 536 case 's': 496 - prompt_symbol(&top.sym_filter_entry, "Enter details symbol"); 537 + perf_top__prompt_symbol(top, "Enter details symbol"); 497 538 break; 498 539 case 'S': 499 - if (!top.sym_filter_entry) 540 + if (!top->sym_filter_entry) 500 541 break; 501 542 else { 502 - struct hist_entry *syme = top.sym_filter_entry; 543 + struct hist_entry *syme = top->sym_filter_entry; 503 544 504 - top.sym_filter_entry = NULL; 545 + top->sym_filter_entry = NULL; 505 546 __zero_source_counters(syme); 506 547 } 507 548 break; 508 549 case 'U': 509 - top.hide_user_symbols = !top.hide_user_symbols; 550 + top->hide_user_symbols = !top->hide_user_symbols; 510 551 break; 511 552 case 'z': 512 - top.zero = !top.zero; 553 + top->zero = !top->zero; 513 554 break; 514 555 default: 515 556 break; ··· 532 563 hists__collapse_resort_threaded(&t->sym_evsel->hists); 533 564 hists__output_resort_threaded(&t->sym_evsel->hists); 534 565 hists__decay_entries_threaded(&t->sym_evsel->hists, 535 - top.hide_user_symbols, 536 - top.hide_kernel_symbols); 566 + t->hide_user_symbols, 567 + t->hide_kernel_symbols); 537 568 } 538 569 539 - static void *display_thread_tui(void *arg __used) 570 + static void *display_thread_tui(void *arg) 540 571 { 572 + struct perf_top *top = arg; 541 573 const char *help = "For a higher level overview, try: perf top --sort comm,dso"; 542 574 543 - perf_top__sort_new_samples(&top); 544 - perf_evlist__tui_browse_hists(top.evlist, help, 575 + perf_top__sort_new_samples(top); 576 + perf_evlist__tui_browse_hists(top->evlist, help, 545 577 perf_top__sort_new_samples, 546 - &top, top.delay_secs); 578 + top, top->delay_secs); 547 579 548 580 exit_browser(0); 549 581 exit(0); 550 582 return NULL; 551 583 } 552 584 553 - static void *display_thread(void *arg __used) 585 + static void *display_thread(void *arg) 554 586 { 555 587 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 556 588 struct termios tc, save; 589 + struct perf_top *top = arg; 557 590 int delay_msecs, c; 558 591 559 592 tcgetattr(0, &save); ··· 566 595 567 596 pthread__unblock_sigwinch(); 568 597 repeat: 569 - delay_msecs = top.delay_secs * 1000; 598 + delay_msecs = top->delay_secs * 1000; 570 599 tcsetattr(0, TCSANOW, &tc); 571 600 /* trash return*/ 572 601 getc(stdin); 573 602 574 603 while (1) { 575 - print_sym_table(); 604 + perf_top__print_sym_table(top); 576 605 /* 577 606 * Either timeout expired or we got an EINTR due to SIGWINCH, 578 607 * refresh screen in both cases. ··· 592 621 c = getc(stdin); 593 622 tcsetattr(0, TCSAFLUSH, &save); 594 623 595 - handle_keypress(c); 624 + perf_top__handle_keypress(top, c); 596 625 goto repeat; 597 626 598 627 return NULL; ··· 644 673 return 0; 645 674 } 646 675 647 - static void perf_event__process_sample(const union perf_event *event, 676 + static void perf_event__process_sample(struct perf_tool *tool, 677 + const union perf_event *event, 648 678 struct perf_evsel *evsel, 649 679 struct perf_sample *sample, 650 - struct perf_session *session) 680 + struct machine *machine) 651 681 { 682 + struct perf_top *top = container_of(tool, struct perf_top, tool); 652 683 struct symbol *parent = NULL; 653 684 u64 ip = event->ip.ip; 654 685 struct addr_location al; 655 - struct machine *machine; 656 686 int err; 657 - u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 658 - 659 - ++top.samples; 660 - 661 - switch (origin) { 662 - case PERF_RECORD_MISC_USER: 663 - ++top.us_samples; 664 - if (top.hide_user_symbols) 665 - return; 666 - machine = perf_session__find_host_machine(session); 667 - break; 668 - case PERF_RECORD_MISC_KERNEL: 669 - ++top.kernel_samples; 670 - if (top.hide_kernel_symbols) 671 - return; 672 - machine = perf_session__find_host_machine(session); 673 - break; 674 - case PERF_RECORD_MISC_GUEST_KERNEL: 675 - ++top.guest_kernel_samples; 676 - machine = perf_session__find_machine(session, event->ip.pid); 677 - break; 678 - case PERF_RECORD_MISC_GUEST_USER: 679 - ++top.guest_us_samples; 680 - /* 681 - * TODO: we don't process guest user from host side 682 - * except simple counting. 683 - */ 684 - return; 685 - default: 686 - return; 687 - } 688 687 689 688 if (!machine && perf_guest) { 690 689 pr_err("Can't find guest [%d]'s kernel information\n", ··· 663 722 } 664 723 665 724 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) 666 - top.exact_samples++; 725 + top->exact_samples++; 667 726 668 - if (perf_event__preprocess_sample(event, session, &al, sample, 727 + if (perf_event__preprocess_sample(event, machine, &al, sample, 669 728 symbol_filter) < 0 || 670 729 al.filtered) 671 730 return; 672 731 673 - if (!kptr_restrict_warned && 732 + if (!top->kptr_restrict_warned && 674 733 symbol_conf.kptr_restrict && 675 734 al.cpumode == PERF_RECORD_MISC_KERNEL) { 676 735 ui__warning( ··· 681 740 " modules" : ""); 682 741 if (use_browser <= 0) 683 742 sleep(5); 684 - kptr_restrict_warned = true; 743 + top->kptr_restrict_warned = true; 685 744 } 686 745 687 746 if (al.sym == NULL) { ··· 697 756 * --hide-kernel-symbols, even if the user specifies an 698 757 * invalid --vmlinux ;-) 699 758 */ 700 - if (!kptr_restrict_warned && !vmlinux_warned && 759 + if (!top->kptr_restrict_warned && !top->vmlinux_warned && 701 760 al.map == machine->vmlinux_maps[MAP__FUNCTION] && 702 761 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 703 762 if (symbol_conf.vmlinux_name) { ··· 710 769 711 770 if (use_browser <= 0) 712 771 sleep(5); 713 - vmlinux_warned = true; 772 + top->vmlinux_warned = true; 714 773 } 715 774 } 716 775 ··· 719 778 720 779 if ((sort__has_parent || symbol_conf.use_callchain) && 721 780 sample->callchain) { 722 - err = perf_session__resolve_callchain(session, al.thread, 723 - sample->callchain, &parent); 781 + err = machine__resolve_callchain(machine, evsel, al.thread, 782 + sample->callchain, &parent); 724 783 if (err) 725 784 return; 726 785 } 727 786 728 - he = perf_session__add_hist_entry(session, &al, sample, evsel); 787 + he = perf_evsel__add_hist_entry(evsel, &al, sample); 729 788 if (he == NULL) { 730 789 pr_err("Problem incrementing symbol period, skipping event\n"); 731 790 return; 732 791 } 733 792 734 793 if (symbol_conf.use_callchain) { 735 - err = callchain_append(he->callchain, &session->callchain_cursor, 794 + err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, 736 795 sample->period); 737 796 if (err) 738 797 return; 739 798 } 740 799 741 - if (sort_has_symbols) 742 - record_precise_ip(he, evsel->idx, ip); 800 + if (top->sort_has_symbols) 801 + perf_top__record_precise_ip(top, he, evsel->idx, ip); 743 802 } 744 803 745 804 return; 746 805 } 747 806 748 - static void perf_session__mmap_read_idx(struct perf_session *self, int idx) 807 + static void perf_top__mmap_read_idx(struct perf_top *top, int idx) 749 808 { 750 809 struct perf_sample sample; 751 810 struct perf_evsel *evsel; 811 + struct perf_session *session = top->session; 752 812 union perf_event *event; 813 + struct machine *machine; 814 + u8 origin; 753 815 int ret; 754 816 755 - while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) { 756 - ret = perf_session__parse_sample(self, event, &sample); 817 + while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { 818 + ret = perf_session__parse_sample(session, event, &sample); 757 819 if (ret) { 758 820 pr_err("Can't parse sample, err = %d\n", ret); 759 821 continue; 760 822 } 761 823 762 - evsel = perf_evlist__id2evsel(self->evlist, sample.id); 824 + evsel = perf_evlist__id2evsel(session->evlist, sample.id); 763 825 assert(evsel != NULL); 764 826 827 + origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 828 + 765 829 if (event->header.type == PERF_RECORD_SAMPLE) 766 - perf_event__process_sample(event, evsel, &sample, self); 767 - else if (event->header.type < PERF_RECORD_MAX) { 830 + ++top->samples; 831 + 832 + switch (origin) { 833 + case PERF_RECORD_MISC_USER: 834 + ++top->us_samples; 835 + if (top->hide_user_symbols) 836 + continue; 837 + machine = perf_session__find_host_machine(session); 838 + break; 839 + case PERF_RECORD_MISC_KERNEL: 840 + ++top->kernel_samples; 841 + if (top->hide_kernel_symbols) 842 + continue; 843 + machine = perf_session__find_host_machine(session); 844 + break; 845 + case PERF_RECORD_MISC_GUEST_KERNEL: 846 + ++top->guest_kernel_samples; 847 + machine = perf_session__find_machine(session, event->ip.pid); 848 + break; 849 + case PERF_RECORD_MISC_GUEST_USER: 850 + ++top->guest_us_samples; 851 + /* 852 + * TODO: we don't process guest user from host side 853 + * except simple counting. 854 + */ 855 + /* Fall thru */ 856 + default: 857 + continue; 858 + } 859 + 860 + 861 + if (event->header.type == PERF_RECORD_SAMPLE) { 862 + perf_event__process_sample(&top->tool, event, evsel, 863 + &sample, machine); 864 + } else if (event->header.type < PERF_RECORD_MAX) { 768 865 hists__inc_nr_events(&evsel->hists, event->header.type); 769 - perf_event__process(event, &sample, self); 866 + perf_event__process(&top->tool, event, &sample, machine); 770 867 } else 771 - ++self->hists.stats.nr_unknown_events; 868 + ++session->hists.stats.nr_unknown_events; 772 869 } 773 870 } 774 871 775 - static void perf_session__mmap_read(struct perf_session *self) 872 + static void perf_top__mmap_read(struct perf_top *top) 776 873 { 777 874 int i; 778 875 779 - for (i = 0; i < top.evlist->nr_mmaps; i++) 780 - perf_session__mmap_read_idx(self, i); 876 + for (i = 0; i < top->evlist->nr_mmaps; i++) 877 + perf_top__mmap_read_idx(top, i); 781 878 } 782 879 783 - static void start_counters(struct perf_evlist *evlist) 880 + static void perf_top__start_counters(struct perf_top *top) 784 881 { 785 882 struct perf_evsel *counter, *first; 883 + struct perf_evlist *evlist = top->evlist; 786 884 787 885 first = list_entry(evlist->entries.next, struct perf_evsel, node); 788 886 ··· 829 849 struct perf_event_attr *attr = &counter->attr; 830 850 struct xyarray *group_fd = NULL; 831 851 832 - if (group && counter != first) 852 + if (top->group && counter != first) 833 853 group_fd = first->fd; 834 854 835 855 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 836 856 837 - if (top.freq) { 857 + if (top->freq) { 838 858 attr->sample_type |= PERF_SAMPLE_PERIOD; 839 859 attr->freq = 1; 840 - attr->sample_freq = top.freq; 860 + attr->sample_freq = top->freq; 841 861 } 842 862 843 863 if (evlist->nr_entries > 1) { ··· 850 870 851 871 attr->mmap = 1; 852 872 attr->comm = 1; 853 - attr->inherit = inherit; 873 + attr->inherit = top->inherit; 854 874 retry_sample_id: 855 - attr->sample_id_all = sample_id_all_avail ? 1 : 0; 875 + attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; 856 876 try_again: 857 - if (perf_evsel__open(counter, top.evlist->cpus, 858 - top.evlist->threads, group, 877 + if (perf_evsel__open(counter, top->evlist->cpus, 878 + top->evlist->threads, top->group, 859 879 group_fd) < 0) { 860 880 int err = errno; 861 881 862 882 if (err == EPERM || err == EACCES) { 863 883 ui__error_paranoid(); 864 884 goto out_err; 865 - } else if (err == EINVAL && sample_id_all_avail) { 885 + } else if (err == EINVAL && top->sample_id_all_avail) { 866 886 /* 867 887 * Old kernel, no attr->sample_id_type_all field 868 888 */ 869 - sample_id_all_avail = false; 889 + top->sample_id_all_avail = false; 870 890 goto retry_sample_id; 871 891 } 872 892 /* ··· 900 920 } 901 921 } 902 922 903 - if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) { 923 + if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) { 904 924 ui__warning("Failed to mmap with %d (%s)\n", 905 925 errno, strerror(errno)); 906 926 goto out_err; ··· 913 933 exit(0); 914 934 } 915 935 916 - static int setup_sample_type(void) 936 + static int perf_top__setup_sample_type(struct perf_top *top) 917 937 { 918 - if (!sort_has_symbols) { 938 + if (!top->sort_has_symbols) { 919 939 if (symbol_conf.use_callchain) { 920 940 ui__warning("Selected -g but \"sym\" not present in --sort/-s."); 921 941 return -EINVAL; 922 942 } 923 - } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE) { 943 + } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) { 924 944 if (callchain_register_param(&callchain_param) < 0) { 925 945 ui__warning("Can't register callchain params.\n"); 926 946 return -EINVAL; ··· 930 950 return 0; 931 951 } 932 952 933 - static int __cmd_top(void) 953 + static int __cmd_top(struct perf_top *top) 934 954 { 935 955 pthread_t thread; 936 956 int ret; ··· 938 958 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 939 959 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 940 960 */ 941 - top.session = perf_session__new(NULL, O_WRONLY, false, false, NULL); 942 - if (top.session == NULL) 961 + top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL); 962 + if (top->session == NULL) 943 963 return -ENOMEM; 944 964 945 - ret = setup_sample_type(); 965 + ret = perf_top__setup_sample_type(top); 946 966 if (ret) 947 967 goto out_delete; 948 968 949 - if (top.target_tid != -1) 950 - perf_event__synthesize_thread_map(top.evlist->threads, 951 - perf_event__process, top.session); 969 + if (top->target_tid != -1) 970 + perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, 971 + perf_event__process, 972 + &top->session->host_machine); 952 973 else 953 - perf_event__synthesize_threads(perf_event__process, top.session); 954 - 955 - start_counters(top.evlist); 956 - top.session->evlist = top.evlist; 957 - perf_session__update_sample_type(top.session); 974 + perf_event__synthesize_threads(&top->tool, perf_event__process, 975 + &top->session->host_machine); 976 + perf_top__start_counters(top); 977 + top->session->evlist = top->evlist; 978 + perf_session__update_sample_type(top->session); 958 979 959 980 /* Wait for a minimal set of events before starting the snapshot */ 960 - poll(top.evlist->pollfd, top.evlist->nr_fds, 100); 981 + poll(top->evlist->pollfd, top->evlist->nr_fds, 100); 961 982 962 - perf_session__mmap_read(top.session); 983 + perf_top__mmap_read(top); 963 984 964 985 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : 965 - display_thread), NULL)) { 986 + display_thread), top)) { 966 987 printf("Could not create display thread.\n"); 967 988 exit(-1); 968 989 } 969 990 970 - if (realtime_prio) { 991 + if (top->realtime_prio) { 971 992 struct sched_param param; 972 993 973 - param.sched_priority = realtime_prio; 994 + param.sched_priority = top->realtime_prio; 974 995 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 975 996 printf("Could not set realtime priority.\n"); 976 997 exit(-1); ··· 979 998 } 980 999 981 1000 while (1) { 982 - u64 hits = top.samples; 1001 + u64 hits = top->samples; 983 1002 984 - perf_session__mmap_read(top.session); 1003 + perf_top__mmap_read(top); 985 1004 986 - if (hits == top.samples) 987 - ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100); 1005 + if (hits == top->samples) 1006 + ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100); 988 1007 } 989 1008 990 1009 out_delete: 991 - perf_session__delete(top.session); 992 - top.session = NULL; 1010 + perf_session__delete(top->session); 1011 + top->session = NULL; 993 1012 994 1013 return 0; 995 1014 } 996 1015 997 1016 static int 998 - parse_callchain_opt(const struct option *opt __used, const char *arg, 999 - int unset) 1017 + parse_callchain_opt(const struct option *opt, const char *arg, int unset) 1000 1018 { 1019 + struct perf_top *top = (struct perf_top *)opt->value; 1001 1020 char *tok, *tok2; 1002 1021 char *endptr; 1003 1022 ··· 1005 1024 * --no-call-graph 1006 1025 */ 1007 1026 if (unset) { 1008 - dont_use_callchains = true; 1027 + top->dont_use_callchains = true; 1009 1028 return 0; 1010 1029 } 1011 1030 ··· 1033 1052 symbol_conf.use_callchain = false; 1034 1053 1035 1054 return 0; 1036 - } 1037 - 1038 - else 1055 + } else 1039 1056 return -1; 1040 1057 1041 1058 /* get the min percentage */ ··· 1077 1098 NULL 1078 1099 }; 1079 1100 1080 - static const struct option options[] = { 1101 + int cmd_top(int argc, const char **argv, const char *prefix __used) 1102 + { 1103 + struct perf_evsel *pos; 1104 + int status = -ENOMEM; 1105 + struct perf_top top = { 1106 + .count_filter = 5, 1107 + .delay_secs = 2, 1108 + .target_pid = -1, 1109 + .target_tid = -1, 1110 + .freq = 1000, /* 1 KHz */ 1111 + .sample_id_all_avail = true, 1112 + .mmap_pages = 128, 1113 + .sym_pcnt_filter = 5, 1114 + }; 1115 + char callchain_default_opt[] = "fractal,0.5,callee"; 1116 + const struct option options[] = { 1081 1117 OPT_CALLBACK('e', "event", &top.evlist, "event", 1082 1118 "event selector. use 'perf list' to list available events", 1083 1119 parse_events_option), 1084 - OPT_INTEGER('c', "count", &default_interval, 1120 + OPT_INTEGER('c', "count", &top.default_interval, 1085 1121 "event period to sample"), 1086 1122 OPT_INTEGER('p', "pid", &top.target_pid, 1087 1123 "profile events on existing process id"), 1088 1124 OPT_INTEGER('t', "tid", &top.target_tid, 1089 1125 "profile events on existing thread id"), 1090 - OPT_BOOLEAN('a', "all-cpus", &system_wide, 1126 + OPT_BOOLEAN('a', "all-cpus", &top.system_wide, 1091 1127 "system-wide collection from all CPUs"), 1092 1128 OPT_STRING('C', "cpu", &top.cpu_list, "cpu", 1093 1129 "list of cpus to monitor"), ··· 1110 1116 "file", "vmlinux pathname"), 1111 1117 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, 1112 1118 "hide kernel symbols"), 1113 - OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 1114 - OPT_INTEGER('r', "realtime", &realtime_prio, 1119 + OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"), 1120 + OPT_INTEGER('r', "realtime", &top.realtime_prio, 1115 1121 "collect data with this RT SCHED_FIFO priority"), 1116 1122 OPT_INTEGER('d', "delay", &top.delay_secs, 1117 1123 "number of seconds to delay between refreshes"), 1118 - OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1124 + OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab, 1119 1125 "dump the symbol table used for profiling"), 1120 1126 OPT_INTEGER('f', "count-filter", &top.count_filter, 1121 1127 "only display functions with more events than this"), 1122 - OPT_BOOLEAN('g', "group", &group, 1128 + OPT_BOOLEAN('g', "group", &top.group, 1123 1129 "put the counters into a counter group"), 1124 - OPT_BOOLEAN('i', "inherit", &inherit, 1130 + OPT_BOOLEAN('i', "inherit", &top.inherit, 1125 1131 "child tasks inherit counters"), 1126 - OPT_STRING(0, "sym-annotate", &sym_filter, "symbol name", 1132 + OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name", 1127 1133 "symbol to annotate"), 1128 1134 OPT_BOOLEAN('z', "zero", &top.zero, 1129 1135 "zero history across updates"), ··· 1133 1139 "display this many functions"), 1134 1140 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, 1135 1141 "hide user symbols"), 1136 - OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 1137 - OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 1142 + OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"), 1143 + OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"), 1138 1144 OPT_INCR('v', "verbose", &verbose, 1139 1145 "be more verbose (show counter open errors, etc)"), 1140 1146 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1141 1147 "sort by key(s): pid, comm, dso, symbol, parent"), 1142 1148 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 1143 1149 "Show a column with the number of samples"), 1144 - OPT_CALLBACK_DEFAULT('G', "call-graph", NULL, "output_type,min_percent, call_order", 1150 + OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order", 1145 1151 "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " 1146 1152 "Default: fractal,0.5,callee", &parse_callchain_opt, 1147 1153 callchain_default_opt), ··· 1160 1166 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1161 1167 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1162 1168 OPT_END() 1163 - }; 1164 - 1165 - int cmd_top(int argc, const char **argv, const char *prefix __used) 1166 - { 1167 - struct perf_evsel *pos; 1168 - int status = -ENOMEM; 1169 + }; 1169 1170 1170 1171 top.evlist = perf_evlist__new(NULL, NULL); 1171 1172 if (top.evlist == NULL) ··· 1177 1188 1178 1189 setup_sorting(top_usage, options); 1179 1190 1180 - if (use_stdio) 1191 + if (top.use_stdio) 1181 1192 use_browser = 0; 1182 - else if (use_tui) 1193 + else if (top.use_tui) 1183 1194 use_browser = 1; 1184 1195 1185 1196 setup_browser(false); ··· 1204 1215 return -ENOMEM; 1205 1216 } 1206 1217 1218 + symbol_conf.nr_events = top.evlist->nr_entries; 1219 + 1207 1220 if (top.delay_secs < 1) 1208 1221 top.delay_secs = 1; 1209 1222 1210 1223 /* 1211 1224 * User specified count overrides default frequency. 1212 1225 */ 1213 - if (default_interval) 1226 + if (top.default_interval) 1214 1227 top.freq = 0; 1215 1228 else if (top.freq) { 1216 - default_interval = top.freq; 1229 + top.default_interval = top.freq; 1217 1230 } else { 1218 1231 fprintf(stderr, "frequency and count are zero, aborting\n"); 1219 1232 exit(EXIT_FAILURE); 1220 1233 } 1221 1234 1222 1235 list_for_each_entry(pos, &top.evlist->entries, node) { 1223 - if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr, 1224 - top.evlist->threads->nr) < 0) 1225 - goto out_free_fd; 1226 1236 /* 1227 1237 * Fill in the ones not specifically initialized via -c: 1228 1238 */ 1229 - if (pos->attr.sample_period) 1230 - continue; 1231 - 1232 - pos->attr.sample_period = default_interval; 1239 + if (!pos->attr.sample_period) 1240 + pos->attr.sample_period = top.default_interval; 1233 1241 } 1234 - 1235 - if (perf_evlist__alloc_pollfd(top.evlist) < 0 || 1236 - perf_evlist__alloc_mmap(top.evlist) < 0) 1237 - goto out_free_fd; 1238 1242 1239 1243 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); 1240 1244 ··· 1245 1263 * Avoid annotation data structures overhead when symbols aren't on the 1246 1264 * sort list. 1247 1265 */ 1248 - sort_has_symbols = sort_sym.list.next != NULL; 1266 + top.sort_has_symbols = sort_sym.list.next != NULL; 1249 1267 1250 - get_term_dimensions(&winsize); 1268 + get_term_dimensions(&top.winsize); 1251 1269 if (top.print_entries == 0) { 1252 - update_print_entries(&winsize); 1253 - signal(SIGWINCH, sig_winch_handler); 1270 + struct sigaction act = { 1271 + .sa_sigaction = perf_top__sig_winch, 1272 + .sa_flags = SA_SIGINFO, 1273 + }; 1274 + perf_top__update_print_entries(&top); 1275 + sigaction(SIGWINCH, &act, NULL); 1254 1276 } 1255 1277 1256 - status = __cmd_top(); 1257 - out_free_fd: 1278 + status = __cmd_top(&top); 1279 + 1258 1280 perf_evlist__delete(top.evlist); 1259 1281 1260 1282 return status;
+4 -29
tools/perf/perf.c
··· 29 29 int val; 30 30 }; 31 31 32 - static char debugfs_mntpt[MAXPATHLEN]; 33 - 34 32 static int pager_command_config(const char *var, const char *value, void *data) 35 33 { 36 34 struct pager_config *c = data; ··· 77 79 default: 78 80 break; 79 81 } 80 - } 81 - 82 - static void set_debugfs_path(void) 83 - { 84 - char *path; 85 - 86 - path = getenv(PERF_DEBUGFS_ENVIRONMENT); 87 - snprintf(debugfs_path, MAXPATHLEN, "%s/%s", path ?: debugfs_mntpt, 88 - "tracing/events"); 89 82 } 90 83 91 84 static int handle_options(const char ***argv, int *argc, int *envchanged) ··· 150 161 fprintf(stderr, "No directory given for --debugfs-dir.\n"); 151 162 usage(perf_usage_string); 152 163 } 153 - strncpy(debugfs_mntpt, (*argv)[1], MAXPATHLEN); 154 - debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 164 + debugfs_set_path((*argv)[1]); 155 165 if (envchanged) 156 166 *envchanged = 1; 157 167 (*argv)++; 158 168 (*argc)--; 159 169 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { 160 - strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN); 161 - debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 170 + debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); 171 + fprintf(stderr, "dir: %s\n", debugfs_mountpoint); 162 172 if (envchanged) 163 173 *envchanged = 1; 164 174 } else { ··· 269 281 if (use_pager == -1 && p->option & USE_PAGER) 270 282 use_pager = 1; 271 283 commit_pager_choice(); 272 - set_debugfs_path(); 273 284 274 285 status = p->fn(argc, argv, prefix); 275 286 exit_browser(status); ··· 403 416 return done_alias; 404 417 } 405 418 406 - /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ 407 - static void get_debugfs_mntpt(void) 408 - { 409 - const char *path = debugfs_mount(NULL); 410 - 411 - if (path) 412 - strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt)); 413 - else 414 - debugfs_mntpt[0] = '\0'; 415 - } 416 - 417 419 static void pthread__block_sigwinch(void) 418 420 { 419 421 sigset_t set; ··· 429 453 if (!cmd) 430 454 cmd = "perf-help"; 431 455 /* get debugfs mount point from /proc/mounts */ 432 - get_debugfs_mntpt(); 456 + debugfs_mount(NULL); 433 457 /* 434 458 * "perf-xxxx" is the same as "perf xxxx", but we obviously: 435 459 * ··· 452 476 argc--; 453 477 handle_options(&argv, &argc, NULL); 454 478 commit_pager_choice(); 455 - set_debugfs_path(); 456 479 set_buildid_dir(); 457 480 458 481 if (argc > 0) {
+24
tools/perf/perf.h
··· 185 185 186 186 void pthread__unblock_sigwinch(void); 187 187 188 + struct perf_record_opts { 189 + pid_t target_pid; 190 + pid_t target_tid; 191 + bool call_graph; 192 + bool group; 193 + bool inherit_stat; 194 + bool no_delay; 195 + bool no_inherit; 196 + bool no_samples; 197 + bool pipe_output; 198 + bool raw_samples; 199 + bool sample_address; 200 + bool sample_time; 201 + bool sample_id_all_avail; 202 + bool system_wide; 203 + bool period; 204 + unsigned int freq; 205 + unsigned int mmap_pages; 206 + unsigned int user_freq; 207 + u64 default_interval; 208 + u64 user_interval; 209 + const char *cpu_list; 210 + }; 211 + 188 212 #endif
+4 -4
tools/perf/util/annotate.c
··· 25 25 return 0; 26 26 } 27 27 28 - int symbol__alloc_hist(struct symbol *sym, int nevents) 28 + int symbol__alloc_hist(struct symbol *sym) 29 29 { 30 30 struct annotation *notes = symbol__annotation(sym); 31 31 size_t sizeof_sym_hist = (sizeof(struct sym_hist) + 32 32 (sym->end - sym->start) * sizeof(u64)); 33 33 34 - notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist); 34 + notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist); 35 35 if (notes->src == NULL) 36 36 return -1; 37 37 notes->src->sizeof_sym_hist = sizeof_sym_hist; 38 - notes->src->nr_histograms = nevents; 38 + notes->src->nr_histograms = symbol_conf.nr_events; 39 39 INIT_LIST_HEAD(&notes->src->source); 40 40 return 0; 41 41 } ··· 334 334 disassembler_style ? "-M " : "", 335 335 disassembler_style ? disassembler_style : "", 336 336 map__rip_2objdump(map, sym->start), 337 - map__rip_2objdump(map, sym->end), 337 + map__rip_2objdump(map, sym->end+1), 338 338 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw", 339 339 symbol_conf.annotate_src ? "-S" : "", 340 340 symfs_filename, filename);
+2 -3
tools/perf/util/annotate.h
··· 72 72 73 73 int symbol__inc_addr_samples(struct symbol *sym, struct map *map, 74 74 int evidx, u64 addr); 75 - int symbol__alloc_hist(struct symbol *sym, int nevents); 75 + int symbol__alloc_hist(struct symbol *sym); 76 76 void symbol__annotate_zero_histograms(struct symbol *sym); 77 77 78 78 int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); ··· 99 99 } 100 100 #else 101 101 int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, 102 - int nr_events, void(*timer)(void *arg), void *arg, 103 - int delay_secs); 102 + void(*timer)(void *arg), void *arg, int delay_secs); 104 103 #endif 105 104 106 105 extern const char *disassembler_style;
+15 -11
tools/perf/util/build-id.c
··· 13 13 #include "symbol.h" 14 14 #include <linux/kernel.h> 15 15 #include "debug.h" 16 + #include "session.h" 17 + #include "tool.h" 16 18 17 - static int build_id__mark_dso_hit(union perf_event *event, 19 + static int build_id__mark_dso_hit(struct perf_tool *tool __used, 20 + union perf_event *event, 18 21 struct perf_sample *sample __used, 19 22 struct perf_evsel *evsel __used, 20 - struct perf_session *session) 23 + struct machine *machine) 21 24 { 22 25 struct addr_location al; 23 26 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 24 - struct thread *thread = perf_session__findnew(session, event->ip.pid); 27 + struct thread *thread = machine__findnew_thread(machine, event->ip.pid); 25 28 26 29 if (thread == NULL) { 27 30 pr_err("problem processing %d event, skipping it.\n", ··· 32 29 return -1; 33 30 } 34 31 35 - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 36 - event->ip.pid, event->ip.ip, &al); 32 + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, 33 + event->ip.ip, &al); 37 34 38 35 if (al.map != NULL) 39 36 al.map->dso->hit = 1; ··· 41 38 return 0; 42 39 } 43 40 44 - static int perf_event__exit_del_thread(union perf_event *event, 41 + static int perf_event__exit_del_thread(struct perf_tool *tool __used, 42 + union perf_event *event, 45 43 struct perf_sample *sample __used, 46 - struct perf_session *session) 44 + struct machine *machine) 47 45 { 48 - struct thread *thread = perf_session__findnew(session, event->fork.tid); 46 + struct thread *thread = machine__findnew_thread(machine, event->fork.tid); 49 47 50 48 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, 51 49 event->fork.ppid, event->fork.ptid); 52 50 53 51 if (thread) { 54 - rb_erase(&thread->rb_node, &session->threads); 55 - session->last_match = NULL; 52 + rb_erase(&thread->rb_node, &machine->threads); 53 + machine->last_match = NULL; 56 54 thread__delete(thread); 57 55 } 58 56 59 57 return 0; 60 58 } 61 59 62 - struct perf_event_ops build_id__mark_dso_hit_ops = { 60 + struct perf_tool build_id__mark_dso_hit_ops = { 63 61 .sample = build_id__mark_dso_hit, 64 62 .mmap = perf_event__process_mmap, 65 63 .fork = perf_event__process_task,
+1 -1
tools/perf/util/build-id.h
··· 3 3 4 4 #include "session.h" 5 5 6 - extern struct perf_event_ops build_id__mark_dso_hit_ops; 6 + extern struct perf_tool build_id__mark_dso_hit_ops; 7 7 8 8 char *dso__build_id_filename(struct dso *self, char *bf, size_t size); 9 9
+3
tools/perf/util/callchain.h
··· 101 101 int callchain_merge(struct callchain_cursor *cursor, 102 102 struct callchain_root *dst, struct callchain_root *src); 103 103 104 + struct ip_callchain; 105 + union perf_event; 106 + 104 107 bool ip_callchain__valid(struct ip_callchain *chain, 105 108 const union perf_event *event); 106 109 /*
+7 -8
tools/perf/util/cgroup.c
··· 3 3 #include "parse-options.h" 4 4 #include "evsel.h" 5 5 #include "cgroup.h" 6 - #include "debugfs.h" /* MAX_PATH, STR() */ 7 6 #include "evlist.h" 8 7 9 8 int nr_cgroups; ··· 11 12 cgroupfs_find_mountpoint(char *buf, size_t maxlen) 12 13 { 13 14 FILE *fp; 14 - char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; 15 + char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; 15 16 char *token, *saved_ptr = NULL; 16 17 int found = 0; 17 18 ··· 24 25 * and inspect every cgroupfs mount point to find one that has 25 26 * perf_event subsystem 26 27 */ 27 - while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %" 28 - STR(MAX_PATH)"s %*d %*d\n", 28 + while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %" 29 + STR(PATH_MAX)"s %*d %*d\n", 29 30 mountpoint, type, tokens) == 3) { 30 31 31 32 if (!strcmp(type, "cgroup")) { ··· 56 57 57 58 static int open_cgroup(char *name) 58 59 { 59 - char path[MAX_PATH+1]; 60 - char mnt[MAX_PATH+1]; 60 + char path[PATH_MAX + 1]; 61 + char mnt[PATH_MAX + 1]; 61 62 int fd; 62 63 63 64 64 - if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1)) 65 + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) 65 66 return -1; 66 67 67 - snprintf(path, MAX_PATH, "%s/%s", mnt, name); 68 + snprintf(path, PATH_MAX, "%s/%s", mnt, name); 68 69 69 70 fd = open(path, O_RDONLY); 70 71 if (fd == -1)
+4 -1
tools/perf/util/config.c
··· 1 1 /* 2 - * GIT - The information manager from hell 2 + * config.c 3 + * 4 + * Helper functions for parsing config items. 5 + * Originally copied from GIT source. 3 6 * 4 7 * Copyright (C) Linus Torvalds, 2005 5 8 * Copyright (C) Johannes Schindelin, 2005
+25 -10
tools/perf/util/debugfs.c
··· 2 2 #include "debugfs.h" 3 3 #include "cache.h" 4 4 5 + #include <linux/kernel.h> 6 + #include <sys/mount.h> 7 + 5 8 static int debugfs_premounted; 6 - static char debugfs_mountpoint[MAX_PATH+1]; 9 + char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; 10 + char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; 7 11 8 12 static const char *debugfs_known_mountpoints[] = { 9 13 "/sys/kernel/debug/", ··· 66 62 /* give up and parse /proc/mounts */ 67 63 fp = fopen("/proc/mounts", "r"); 68 64 if (fp == NULL) 69 - die("Can't open /proc/mounts for read"); 65 + return NULL; 70 66 71 - while (fscanf(fp, "%*s %" 72 - STR(MAX_PATH) 73 - "s %99s %*s %*d %*d\n", 67 + while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", 74 68 debugfs_mountpoint, type) == 2) { 75 69 if (strcmp(type, "debugfs") == 0) 76 70 break; ··· 108 106 return 0; 109 107 } 110 108 109 + static void debugfs_set_tracing_events_path(const char *mountpoint) 110 + { 111 + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", 112 + mountpoint, "tracing/events"); 113 + } 114 + 111 115 /* mount the debugfs somewhere if it's not mounted */ 112 116 113 117 char *debugfs_mount(const char *mountpoint) ··· 121 113 /* see if it's already mounted */ 122 114 if (debugfs_find_mountpoint()) { 123 115 debugfs_premounted = 1; 124 - return debugfs_mountpoint; 116 + goto out; 125 117 } 126 118 127 119 /* if not mounted and no argument */ ··· 137 129 return NULL; 138 130 139 131 /* save the mountpoint */ 140 - strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); 141 132 debugfs_found = 1; 142 - 133 + strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); 134 + out: 135 + debugfs_set_tracing_events_path(debugfs_mountpoint); 143 136 return debugfs_mountpoint; 137 + } 138 + 139 + void debugfs_set_path(const char *mountpoint) 140 + { 141 + snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); 142 + debugfs_set_tracing_events_path(mountpoint); 144 143 } 145 144 146 145 /* umount the debugfs */ ··· 173 158 174 159 int debugfs_write(const char *entry, const char *value) 175 160 { 176 - char path[MAX_PATH+1]; 161 + char path[PATH_MAX + 1]; 177 162 int ret, count; 178 163 int fd; 179 164 ··· 218 203 */ 219 204 int debugfs_read(const char *entry, char *buffer, size_t size) 220 205 { 221 - char path[MAX_PATH+1]; 206 + char path[PATH_MAX + 1]; 222 207 int ret; 223 208 int fd; 224 209
+12 -19
tools/perf/util/debugfs.h
··· 1 1 #ifndef __DEBUGFS_H__ 2 2 #define __DEBUGFS_H__ 3 3 4 - #include <sys/mount.h> 4 + const char *debugfs_find_mountpoint(void); 5 + int debugfs_valid_mountpoint(const char *debugfs); 6 + int debugfs_valid_entry(const char *path); 7 + char *debugfs_mount(const char *mountpoint); 8 + int debugfs_umount(void); 9 + void debugfs_set_path(const char *mountpoint); 10 + int debugfs_write(const char *entry, const char *value); 11 + int debugfs_read(const char *entry, char *buffer, size_t size); 12 + void debugfs_force_cleanup(void); 13 + int debugfs_make_path(const char *element, char *buffer, int size); 5 14 6 - #ifndef MAX_PATH 7 - # define MAX_PATH 256 8 - #endif 9 - 10 - #ifndef STR 11 - # define _STR(x) #x 12 - # define STR(x) _STR(x) 13 - #endif 14 - 15 - extern const char *debugfs_find_mountpoint(void); 16 - extern int debugfs_valid_mountpoint(const char *debugfs); 17 - extern int debugfs_valid_entry(const char *path); 18 - extern char *debugfs_mount(const char *mountpoint); 19 - extern int debugfs_umount(void); 20 - extern int debugfs_write(const char *entry, const char *value); 21 - extern int debugfs_read(const char *entry, char *buffer, size_t size); 22 - extern void debugfs_force_cleanup(void); 23 - extern int debugfs_make_path(const char *element, char *buffer, int size); 15 + extern char debugfs_mountpoint[]; 16 + extern char tracing_events_path[]; 24 17 25 18 #endif /* __DEBUGFS_H__ */
+230 -140
tools/perf/util/event.c
··· 1 1 #include <linux/types.h> 2 2 #include "event.h" 3 3 #include "debug.h" 4 - #include "session.h" 5 4 #include "sort.h" 6 5 #include "string.h" 7 6 #include "strlist.h" ··· 43 44 .period = 1, 44 45 }; 45 46 46 - static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid, 47 - int full, perf_event__handler_t process, 48 - struct perf_session *session) 47 + static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) 49 48 { 50 49 char filename[PATH_MAX]; 51 50 char bf[BUFSIZ]; 52 51 FILE *fp; 53 52 size_t size = 0; 54 - DIR *tasks; 55 - struct dirent dirent, *next; 56 - pid_t tgid = 0; 53 + pid_t tgid = -1; 57 54 58 55 snprintf(filename, sizeof(filename), "/proc/%d/status", pid); 59 56 60 57 fp = fopen(filename, "r"); 61 58 if (fp == NULL) { 62 - out_race: 63 - /* 64 - * We raced with a task exiting - just return: 65 - */ 66 59 pr_debug("couldn't open %s\n", filename); 67 60 return 0; 68 61 } 69 62 70 - memset(&event->comm, 0, sizeof(event->comm)); 71 - 72 - while (!event->comm.comm[0] || !event->comm.pid) { 63 + while (!comm[0] || (tgid < 0)) { 73 64 if (fgets(bf, sizeof(bf), fp) == NULL) { 74 - pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); 75 - goto out; 65 + pr_warning("couldn't get COMM and pgid, malformed %s\n", 66 + filename); 67 + break; 76 68 } 77 69 78 70 if (memcmp(bf, "Name:", 5) == 0) { ··· 71 81 while (*name && isspace(*name)) 72 82 ++name; 73 83 size = strlen(name) - 1; 74 - memcpy(event->comm.comm, name, size++); 84 + if (size >= len) 85 + size = len - 1; 86 + memcpy(comm, name, size); 87 + 75 88 } else if (memcmp(bf, "Tgid:", 5) == 0) { 76 89 char *tgids = bf + 5; 77 90 while (*tgids && isspace(*tgids)) 78 91 ++tgids; 79 - tgid = event->comm.pid = atoi(tgids); 92 + tgid = atoi(tgids); 80 93 } 81 94 } 82 95 96 + fclose(fp); 97 + 98 + return tgid; 99 + } 100 + 101 + static pid_t perf_event__synthesize_comm(struct perf_tool *tool, 102 + union perf_event *event, pid_t pid, 103 + int full, 104 + perf_event__handler_t process, 105 + struct machine *machine) 106 + { 107 + char filename[PATH_MAX]; 108 + size_t size; 109 + DIR *tasks; 110 + struct dirent dirent, *next; 111 + pid_t tgid; 112 + 113 + memset(&event->comm, 0, sizeof(event->comm)); 114 + 115 + tgid = perf_event__get_comm_tgid(pid, event->comm.comm, 116 + sizeof(event->comm.comm)); 117 + if (tgid < 0) 118 + goto out; 119 + 120 + event->comm.pid = tgid; 83 121 event->comm.header.type = PERF_RECORD_COMM; 122 + 123 + size = strlen(event->comm.comm) + 1; 84 124 size = ALIGN(size, sizeof(u64)); 85 - memset(event->comm.comm + size, 0, session->id_hdr_size); 125 + memset(event->comm.comm + size, 0, machine->id_hdr_size); 86 126 event->comm.header.size = (sizeof(event->comm) - 87 127 (sizeof(event->comm.comm) - size) + 88 - session->id_hdr_size); 128 + machine->id_hdr_size); 89 129 if (!full) { 90 130 event->comm.tid = pid; 91 131 92 - process(event, &synth_sample, session); 132 + process(tool, event, &synth_sample, machine); 93 133 goto out; 94 134 } 95 135 96 136 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 97 137 98 138 tasks = opendir(filename); 99 - if (tasks == NULL) 100 - goto out_race; 139 + if (tasks == NULL) { 140 + pr_debug("couldn't open %s\n", filename); 141 + return 0; 142 + } 101 143 102 144 while (!readdir_r(tasks, &dirent, &next) && next) { 103 145 char *end; ··· 137 115 if (*end) 138 116 continue; 139 117 118 + /* already have tgid; jut want to update the comm */ 119 + (void) perf_event__get_comm_tgid(pid, event->comm.comm, 120 + sizeof(event->comm.comm)); 121 + 122 + size = strlen(event->comm.comm) + 1; 123 + size = ALIGN(size, sizeof(u64)); 124 + memset(event->comm.comm + size, 0, machine->id_hdr_size); 125 + event->comm.header.size = (sizeof(event->comm) - 126 + (sizeof(event->comm.comm) - size) + 127 + machine->id_hdr_size); 128 + 140 129 event->comm.tid = pid; 141 130 142 - process(event, &synth_sample, session); 131 + process(tool, event, &synth_sample, machine); 143 132 } 144 133 145 134 closedir(tasks); 146 135 out: 147 - fclose(fp); 148 - 149 136 return tgid; 150 137 } 151 138 152 - static int perf_event__synthesize_mmap_events(union perf_event *event, 139 + static int perf_event__synthesize_mmap_events(struct perf_tool *tool, 140 + union perf_event *event, 153 141 pid_t pid, pid_t tgid, 154 142 perf_event__handler_t process, 155 - struct perf_session *session) 143 + struct machine *machine) 156 144 { 157 145 char filename[PATH_MAX]; 158 146 FILE *fp; ··· 225 193 event->mmap.len -= event->mmap.start; 226 194 event->mmap.header.size = (sizeof(event->mmap) - 227 195 (sizeof(event->mmap.filename) - size)); 228 - memset(event->mmap.filename + size, 0, session->id_hdr_size); 229 - event->mmap.header.size += session->id_hdr_size; 196 + memset(event->mmap.filename + size, 0, machine->id_hdr_size); 197 + event->mmap.header.size += machine->id_hdr_size; 230 198 event->mmap.pid = tgid; 231 199 event->mmap.tid = pid; 232 200 233 - process(event, &synth_sample, session); 201 + process(tool, event, &synth_sample, machine); 234 202 } 235 203 } 236 204 ··· 238 206 return 0; 239 207 } 240 208 241 - int perf_event__synthesize_modules(perf_event__handler_t process, 242 - struct perf_session *session, 209 + int perf_event__synthesize_modules(struct perf_tool *tool, 210 + perf_event__handler_t process, 243 211 struct machine *machine) 244 212 { 245 213 struct rb_node *nd; 246 214 struct map_groups *kmaps = &machine->kmaps; 247 215 union perf_event *event = zalloc((sizeof(event->mmap) + 248 - session->id_hdr_size)); 216 + machine->id_hdr_size)); 249 217 if (event == NULL) { 250 218 pr_debug("Not enough memory synthesizing mmap event " 251 219 "for kernel modules\n"); ··· 275 243 event->mmap.header.type = PERF_RECORD_MMAP; 276 244 event->mmap.header.size = (sizeof(event->mmap) - 277 245 (sizeof(event->mmap.filename) - size)); 278 - memset(event->mmap.filename + size, 0, session->id_hdr_size); 279 - event->mmap.header.size += session->id_hdr_size; 246 + memset(event->mmap.filename + size, 0, machine->id_hdr_size); 247 + event->mmap.header.size += machine->id_hdr_size; 280 248 event->mmap.start = pos->start; 281 249 event->mmap.len = pos->end - pos->start; 282 250 event->mmap.pid = machine->pid; 283 251 284 252 memcpy(event->mmap.filename, pos->dso->long_name, 285 253 pos->dso->long_name_len + 1); 286 - process(event, &synth_sample, session); 254 + process(tool, event, &synth_sample, machine); 287 255 } 288 256 289 257 free(event); ··· 292 260 293 261 static int __event__synthesize_thread(union perf_event *comm_event, 294 262 union perf_event *mmap_event, 295 - pid_t pid, perf_event__handler_t process, 296 - struct perf_session *session) 263 + pid_t pid, int full, 264 + perf_event__handler_t process, 265 + struct perf_tool *tool, 266 + struct machine *machine) 297 267 { 298 - pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process, 299 - session); 268 + pid_t tgid = perf_event__synthesize_comm(tool, comm_event, pid, full, 269 + process, machine); 300 270 if (tgid == -1) 301 271 return -1; 302 - return perf_event__synthesize_mmap_events(mmap_event, pid, tgid, 303 - process, session); 272 + return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, 273 + process, machine); 304 274 } 305 275 306 - int perf_event__synthesize_thread_map(struct thread_map *threads, 276 + int perf_event__synthesize_thread_map(struct perf_tool *tool, 277 + struct thread_map *threads, 307 278 perf_event__handler_t process, 308 - struct perf_session *session) 279 + struct machine *machine) 309 280 { 310 281 union perf_event *comm_event, *mmap_event; 311 - int err = -1, thread; 282 + int err = -1, thread, j; 312 283 313 - comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 284 + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); 314 285 if (comm_event == NULL) 315 286 goto out; 316 287 317 - mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); 288 + mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size); 318 289 if (mmap_event == NULL) 319 290 goto out_free_comm; 320 291 321 292 err = 0; 322 293 for (thread = 0; thread < threads->nr; ++thread) { 323 294 if (__event__synthesize_thread(comm_event, mmap_event, 324 - threads->map[thread], 325 - process, session)) { 295 + threads->map[thread], 0, 296 + process, tool, machine)) { 326 297 err = -1; 327 298 break; 299 + } 300 + 301 + /* 302 + * comm.pid is set to thread group id by 303 + * perf_event__synthesize_comm 304 + */ 305 + if ((int) comm_event->comm.pid != threads->map[thread]) { 306 + bool need_leader = true; 307 + 308 + /* is thread group leader in thread_map? */ 309 + for (j = 0; j < threads->nr; ++j) { 310 + if ((int) comm_event->comm.pid == threads->map[j]) { 311 + need_leader = false; 312 + break; 313 + } 314 + } 315 + 316 + /* if not, generate events for it */ 317 + if (need_leader && 318 + __event__synthesize_thread(comm_event, 319 + mmap_event, 320 + comm_event->comm.pid, 0, 321 + process, tool, machine)) { 322 + err = -1; 323 + break; 324 + } 328 325 } 329 326 } 330 327 free(mmap_event); ··· 363 302 return err; 364 303 } 365 304 366 - int perf_event__synthesize_threads(perf_event__handler_t process, 367 - struct perf_session *session) 305 + int perf_event__synthesize_threads(struct perf_tool *tool, 306 + perf_event__handler_t process, 307 + struct machine *machine) 368 308 { 369 309 DIR *proc; 370 310 struct dirent dirent, *next; 371 311 union perf_event *comm_event, *mmap_event; 372 312 int err = -1; 373 313 374 - comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 314 + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); 375 315 if (comm_event == NULL) 376 316 goto out; 377 317 378 - mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); 318 + mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size); 379 319 if (mmap_event == NULL) 380 320 goto out_free_comm; 381 321 ··· 391 329 if (*end) /* only interested in proper numerical dirents */ 392 330 continue; 393 331 394 - __event__synthesize_thread(comm_event, mmap_event, pid, 395 - process, session); 332 + __event__synthesize_thread(comm_event, mmap_event, pid, 1, 333 + process, tool, machine); 396 334 } 397 335 398 336 closedir(proc); ··· 427 365 return 1; 428 366 } 429 367 430 - int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, 431 - struct perf_session *session, 368 + int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, 369 + perf_event__handler_t process, 432 370 struct machine *machine, 433 371 const char *symbol_name) 434 372 { ··· 445 383 */ 446 384 struct process_symbol_args args = { .name = symbol_name, }; 447 385 union perf_event *event = zalloc((sizeof(event->mmap) + 448 - session->id_hdr_size)); 386 + machine->id_hdr_size)); 449 387 if (event == NULL) { 450 388 pr_debug("Not enough memory synthesizing mmap event " 451 389 "for kernel modules\n"); ··· 479 417 size = ALIGN(size, sizeof(u64)); 480 418 event->mmap.header.type = PERF_RECORD_MMAP; 481 419 event->mmap.header.size = (sizeof(event->mmap) - 482 - (sizeof(event->mmap.filename) - size) + session->id_hdr_size); 420 + (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); 483 421 event->mmap.pgoff = args.start; 484 422 event->mmap.start = map->start; 485 423 event->mmap.len = map->end - event->mmap.start; 486 424 event->mmap.pid = machine->pid; 487 425 488 - err = process(event, &synth_sample, session); 426 + err = process(tool, event, &synth_sample, machine); 489 427 free(event); 490 428 491 429 return err; 492 430 } 493 431 494 - int perf_event__process_comm(union perf_event *event, 495 - struct perf_sample *sample __used, 496 - struct perf_session *session) 432 + size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) 497 433 { 498 - struct thread *thread = perf_session__findnew(session, event->comm.tid); 434 + return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid); 435 + } 499 436 500 - dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid); 437 + int perf_event__process_comm(struct perf_tool *tool __used, 438 + union perf_event *event, 439 + struct perf_sample *sample __used, 440 + struct machine *machine) 441 + { 442 + struct thread *thread = machine__findnew_thread(machine, event->comm.tid); 443 + 444 + if (dump_trace) 445 + perf_event__fprintf_comm(event, stdout); 501 446 502 447 if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { 503 448 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); ··· 514 445 return 0; 515 446 } 516 447 517 - int perf_event__process_lost(union perf_event *event, 448 + int perf_event__process_lost(struct perf_tool *tool __used, 449 + union perf_event *event, 518 450 struct perf_sample *sample __used, 519 - struct perf_session *session) 451 + struct machine *machine __used) 520 452 { 521 453 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", 522 454 event->lost.id, event->lost.lost); 523 - session->hists.stats.total_lost += event->lost.lost; 524 455 return 0; 525 456 } 526 457 ··· 537 468 maps[MAP__FUNCTION]->end = ~0ULL; 538 469 } 539 470 540 - static int perf_event__process_kernel_mmap(union perf_event *event, 541 - struct perf_session *session) 471 + static int perf_event__process_kernel_mmap(struct perf_tool *tool __used, 472 + union perf_event *event, 473 + struct machine *machine) 542 474 { 543 475 struct map *map; 544 476 char kmmap_prefix[PATH_MAX]; 545 - struct machine *machine; 546 477 enum dso_kernel_type kernel_type; 547 478 bool is_kernel_mmap; 548 - 549 - machine = perf_session__findnew_machine(session, event->mmap.pid); 550 - if (!machine) { 551 - pr_err("Can't find id %d's machine\n", event->mmap.pid); 552 - goto out_problem; 553 - } 554 479 555 480 machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); 556 481 if (machine__is_host(machine)) ··· 612 549 * time /proc/sys/kernel/kptr_restrict was non zero. 613 550 */ 614 551 if (event->mmap.pgoff != 0) { 615 - perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, 616 - symbol_name, 617 - event->mmap.pgoff); 552 + maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, 553 + symbol_name, 554 + event->mmap.pgoff); 618 555 } 619 556 620 557 if (machine__is_default_guest(machine)) { ··· 630 567 return -1; 631 568 } 632 569 633 - int perf_event__process_mmap(union perf_event *event, 634 - struct perf_sample *sample __used, 635 - struct perf_session *session) 570 + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) 636 571 { 637 - struct machine *machine; 572 + return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", 573 + event->mmap.pid, event->mmap.tid, event->mmap.start, 574 + event->mmap.len, event->mmap.pgoff, event->mmap.filename); 575 + } 576 + 577 + int perf_event__process_mmap(struct perf_tool *tool, 578 + union perf_event *event, 579 + struct perf_sample *sample __used, 580 + struct machine *machine) 581 + { 638 582 struct thread *thread; 639 583 struct map *map; 640 584 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 641 585 int ret = 0; 642 586 643 - dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", 644 - event->mmap.pid, event->mmap.tid, event->mmap.start, 645 - event->mmap.len, event->mmap.pgoff, event->mmap.filename); 587 + if (dump_trace) 588 + perf_event__fprintf_mmap(event, stdout); 646 589 647 590 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || 648 591 cpumode == PERF_RECORD_MISC_KERNEL) { 649 - ret = perf_event__process_kernel_mmap(event, session); 592 + ret = perf_event__process_kernel_mmap(tool, event, machine); 650 593 if (ret < 0) 651 594 goto out_problem; 652 595 return 0; 653 596 } 654 597 655 - machine = perf_session__find_host_machine(session); 656 - if (machine == NULL) 657 - goto out_problem; 658 - thread = perf_session__findnew(session, event->mmap.pid); 598 + thread = machine__findnew_thread(machine, event->mmap.pid); 659 599 if (thread == NULL) 660 600 goto out_problem; 661 601 map = map__new(&machine->user_dsos, event->mmap.start, ··· 676 610 return 0; 677 611 } 678 612 679 - int perf_event__process_task(union perf_event *event, 680 - struct perf_sample *sample __used, 681 - struct perf_session *session) 613 + size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) 682 614 { 683 - struct thread *thread = perf_session__findnew(session, event->fork.tid); 684 - struct thread *parent = perf_session__findnew(session, event->fork.ptid); 615 + return fprintf(fp, "(%d:%d):(%d:%d)\n", 616 + event->fork.pid, event->fork.tid, 617 + event->fork.ppid, event->fork.ptid); 618 + } 685 619 686 - dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, 687 - event->fork.ppid, event->fork.ptid); 620 + int perf_event__process_task(struct perf_tool *tool __used, 621 + union perf_event *event, 622 + struct perf_sample *sample __used, 623 + struct machine *machine) 624 + { 625 + struct thread *thread = machine__findnew_thread(machine, event->fork.tid); 626 + struct thread *parent = machine__findnew_thread(machine, event->fork.ptid); 627 + 628 + if (dump_trace) 629 + perf_event__fprintf_task(event, stdout); 688 630 689 631 if (event->header.type == PERF_RECORD_EXIT) { 690 - perf_session__remove_thread(session, thread); 632 + machine__remove_thread(machine, thread); 691 633 return 0; 692 634 } 693 635 ··· 708 634 return 0; 709 635 } 710 636 711 - int perf_event__process(union perf_event *event, struct perf_sample *sample, 712 - struct perf_session *session) 637 + size_t perf_event__fprintf(union perf_event *event, FILE *fp) 713 638 { 639 + size_t ret = fprintf(fp, "PERF_RECORD_%s", 640 + perf_event__name(event->header.type)); 641 + 714 642 switch (event->header.type) { 715 643 case PERF_RECORD_COMM: 716 - perf_event__process_comm(event, sample, session); 717 - break; 718 - case PERF_RECORD_MMAP: 719 - perf_event__process_mmap(event, sample, session); 644 + ret += perf_event__fprintf_comm(event, fp); 720 645 break; 721 646 case PERF_RECORD_FORK: 722 647 case PERF_RECORD_EXIT: 723 - perf_event__process_task(event, sample, session); 648 + ret += perf_event__fprintf_task(event, fp); 649 + break; 650 + case PERF_RECORD_MMAP: 651 + ret += perf_event__fprintf_mmap(event, fp); 652 + break; 653 + default: 654 + ret += fprintf(fp, "\n"); 655 + } 656 + 657 + return ret; 658 + } 659 + 660 + int perf_event__process(struct perf_tool *tool, union perf_event *event, 661 + struct perf_sample *sample, struct machine *machine) 662 + { 663 + switch (event->header.type) { 664 + case PERF_RECORD_COMM: 665 + perf_event__process_comm(tool, event, sample, machine); 666 + break; 667 + case PERF_RECORD_MMAP: 668 + perf_event__process_mmap(tool, event, sample, machine); 669 + break; 670 + case PERF_RECORD_FORK: 671 + case PERF_RECORD_EXIT: 672 + perf_event__process_task(tool, event, sample, machine); 724 673 break; 725 674 case PERF_RECORD_LOST: 726 - perf_event__process_lost(event, sample, session); 675 + perf_event__process_lost(tool, event, sample, machine); 727 676 default: 728 677 break; 729 678 } ··· 755 658 } 756 659 757 660 void thread__find_addr_map(struct thread *self, 758 - struct perf_session *session, u8 cpumode, 759 - enum map_type type, pid_t pid, u64 addr, 661 + struct machine *machine, u8 cpumode, 662 + enum map_type type, u64 addr, 760 663 struct addr_location *al) 761 664 { 762 665 struct map_groups *mg = &self->mg; 763 - struct machine *machine = NULL; 764 666 765 667 al->thread = self; 766 668 al->addr = addr; 767 669 al->cpumode = cpumode; 768 670 al->filtered = false; 769 671 672 + if (machine == NULL) { 673 + al->map = NULL; 674 + return; 675 + } 676 + 770 677 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { 771 678 al->level = 'k'; 772 - machine = perf_session__find_host_machine(session); 773 - if (machine == NULL) { 774 - al->map = NULL; 775 - return; 776 - } 777 679 mg = &machine->kmaps; 778 680 } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { 779 681 al->level = '.'; 780 - machine = perf_session__find_host_machine(session); 781 682 } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { 782 683 al->level = 'g'; 783 - machine = perf_session__find_machine(session, pid); 784 - if (machine == NULL) { 785 - al->map = NULL; 786 - return; 787 - } 788 684 mg = &machine->kmaps; 789 685 } else { 790 686 /* ··· 823 733 al->addr = al->map->map_ip(al->map, al->addr); 824 734 } 825 735 826 - void thread__find_addr_location(struct thread *self, 827 - struct perf_session *session, u8 cpumode, 828 - enum map_type type, pid_t pid, u64 addr, 736 + void thread__find_addr_location(struct thread *thread, struct machine *machine, 737 + u8 cpumode, enum map_type type, u64 addr, 829 738 struct addr_location *al, 830 739 symbol_filter_t filter) 831 740 { 832 - thread__find_addr_map(self, session, cpumode, type, pid, addr, al); 741 + thread__find_addr_map(thread, machine, cpumode, type, addr, al); 833 742 if (al->map != NULL) 834 743 al->sym = map__find_symbol(al->map, al->addr, filter); 835 744 else ··· 836 747 } 837 748 838 749 int perf_event__preprocess_sample(const union perf_event *event, 839 - struct perf_session *session, 750 + struct machine *machine, 840 751 struct addr_location *al, 841 752 struct perf_sample *sample, 842 753 symbol_filter_t filter) 843 754 { 844 755 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 845 - struct thread *thread = perf_session__findnew(session, event->ip.pid); 756 + struct thread *thread = machine__findnew_thread(machine, event->ip.pid); 846 757 847 758 if (thread == NULL) 848 759 return -1; ··· 853 764 854 765 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 855 766 /* 856 - * Have we already created the kernel maps for the host machine? 767 + * Have we already created the kernel maps for this machine? 857 768 * 858 769 * This should have happened earlier, when we processed the kernel MMAP 859 770 * events, but for older perf.data files there was no such thing, so do 860 771 * it now. 861 772 */ 862 773 if (cpumode == PERF_RECORD_MISC_KERNEL && 863 - session->host_machine.vmlinux_maps[MAP__FUNCTION] == NULL) 864 - machine__create_kernel_maps(&session->host_machine); 774 + machine->vmlinux_maps[MAP__FUNCTION] == NULL) 775 + machine__create_kernel_maps(machine); 865 776 866 - thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 867 - event->ip.pid, event->ip.ip, al); 777 + thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, 778 + event->ip.ip, al); 868 779 dump_printf(" ...... dso: %s\n", 869 780 al->map ? al->map->dso->long_name : 870 781 al->level == 'H' ? "[hypervisor]" : "<not found>"); ··· 872 783 al->cpu = sample->cpu; 873 784 874 785 if (al->map) { 786 + struct dso *dso = al->map->dso; 787 + 875 788 if (symbol_conf.dso_list && 876 - (!al->map || !al->map->dso || 877 - !(strlist__has_entry(symbol_conf.dso_list, 878 - al->map->dso->short_name) || 879 - (al->map->dso->short_name != al->map->dso->long_name && 880 - strlist__has_entry(symbol_conf.dso_list, 881 - al->map->dso->long_name))))) 789 + (!dso || !(strlist__has_entry(symbol_conf.dso_list, 790 + dso->short_name) || 791 + (dso->short_name != dso->long_name && 792 + strlist__has_entry(symbol_conf.dso_list, 793 + dso->long_name))))) 882 794 goto out_filtered; 883 795 884 796 al->sym = map__find_symbol(al->map, al->addr, filter);
+44 -24
tools/perf/util/event.h
··· 2 2 #define __PERF_RECORD_H 3 3 4 4 #include <limits.h> 5 + #include <stdio.h> 5 6 6 7 #include "../perf.h" 7 8 #include "map.h" ··· 142 141 143 142 void perf_event__print_totals(void); 144 143 145 - struct perf_session; 144 + struct perf_tool; 146 145 struct thread_map; 147 146 148 - typedef int (*perf_event__handler_synth_t)(union perf_event *event, 149 - struct perf_session *session); 150 - typedef int (*perf_event__handler_t)(union perf_event *event, 147 + typedef int (*perf_event__handler_t)(struct perf_tool *tool, 148 + union perf_event *event, 151 149 struct perf_sample *sample, 152 - struct perf_session *session); 150 + struct machine *machine); 153 151 154 - int perf_event__synthesize_thread_map(struct thread_map *threads, 152 + int perf_event__synthesize_thread_map(struct perf_tool *tool, 153 + struct thread_map *threads, 155 154 perf_event__handler_t process, 156 - struct perf_session *session); 157 - int perf_event__synthesize_threads(perf_event__handler_t process, 158 - struct perf_session *session); 159 - int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, 160 - struct perf_session *session, 155 + struct machine *machine); 156 + int perf_event__synthesize_threads(struct perf_tool *tool, 157 + perf_event__handler_t process, 158 + struct machine *machine); 159 + int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, 160 + perf_event__handler_t process, 161 161 struct machine *machine, 162 162 const char *symbol_name); 163 163 164 - int perf_event__synthesize_modules(perf_event__handler_t process, 165 - struct perf_session *session, 164 + int perf_event__synthesize_modules(struct perf_tool *tool, 165 + perf_event__handler_t process, 166 166 struct machine *machine); 167 167 168 - int perf_event__process_comm(union perf_event *event, struct perf_sample *sample, 169 - struct perf_session *session); 170 - int perf_event__process_lost(union perf_event *event, struct perf_sample *sample, 171 - struct perf_session *session); 172 - int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample, 173 - struct perf_session *session); 174 - int perf_event__process_task(union perf_event *event, struct perf_sample *sample, 175 - struct perf_session *session); 176 - int perf_event__process(union perf_event *event, struct perf_sample *sample, 177 - struct perf_session *session); 168 + int perf_event__process_comm(struct perf_tool *tool, 169 + union perf_event *event, 170 + struct perf_sample *sample, 171 + struct machine *machine); 172 + int perf_event__process_lost(struct perf_tool *tool, 173 + union perf_event *event, 174 + struct perf_sample *sample, 175 + struct machine *machine); 176 + int perf_event__process_mmap(struct perf_tool *tool, 177 + union perf_event *event, 178 + struct perf_sample *sample, 179 + struct machine *machine); 180 + int perf_event__process_task(struct perf_tool *tool, 181 + union perf_event *event, 182 + struct perf_sample *sample, 183 + struct machine *machine); 184 + int perf_event__process(struct perf_tool *tool, 185 + union perf_event *event, 186 + struct perf_sample *sample, 187 + struct machine *machine); 178 188 179 189 struct addr_location; 180 190 int perf_event__preprocess_sample(const union perf_event *self, 181 - struct perf_session *session, 191 + struct machine *machine, 182 192 struct addr_location *al, 183 193 struct perf_sample *sample, 184 194 symbol_filter_t filter); ··· 199 187 int perf_event__parse_sample(const union perf_event *event, u64 type, 200 188 int sample_size, bool sample_id_all, 201 189 struct perf_sample *sample, bool swapped); 190 + int perf_event__synthesize_sample(union perf_event *event, u64 type, 191 + const struct perf_sample *sample, 192 + bool swapped); 193 + 194 + size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); 195 + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); 196 + size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); 197 + size_t perf_event__fprintf(union perf_event *event, FILE *fp); 202 198 203 199 #endif /* __PERF_RECORD_H */
+292 -7
tools/perf/util/evlist.c
··· 6 6 * 7 7 * Released under the GPL v2. (and only v2, not any later version) 8 8 */ 9 + #include "util.h" 10 + #include "debugfs.h" 9 11 #include <poll.h> 10 12 #include "cpumap.h" 11 13 #include "thread_map.h" 12 14 #include "evlist.h" 13 15 #include "evsel.h" 14 - #include "util.h" 16 + #include <unistd.h> 17 + 18 + #include "parse-events.h" 15 19 16 20 #include <sys/mman.h> 17 21 ··· 34 30 INIT_HLIST_HEAD(&evlist->heads[i]); 35 31 INIT_LIST_HEAD(&evlist->entries); 36 32 perf_evlist__set_maps(evlist, cpus, threads); 33 + evlist->workload.pid = -1; 37 34 } 38 35 39 36 struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, ··· 46 41 perf_evlist__init(evlist, cpus, threads); 47 42 48 43 return evlist; 44 + } 45 + 46 + void perf_evlist__config_attrs(struct perf_evlist *evlist, 47 + struct perf_record_opts *opts) 48 + { 49 + struct perf_evsel *evsel; 50 + 51 + if (evlist->cpus->map[0] < 0) 52 + opts->no_inherit = true; 53 + 54 + list_for_each_entry(evsel, &evlist->entries, node) { 55 + perf_evsel__config(evsel, opts); 56 + 57 + if (evlist->nr_entries > 1) 58 + evsel->attr.sample_type |= PERF_SAMPLE_ID; 59 + } 49 60 } 50 61 51 62 static void perf_evlist__purge(struct perf_evlist *evlist) ··· 97 76 ++evlist->nr_entries; 98 77 } 99 78 79 + static void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 80 + struct list_head *list, 81 + int nr_entries) 82 + { 83 + list_splice_tail(list, &evlist->entries); 84 + evlist->nr_entries += nr_entries; 85 + } 86 + 100 87 int perf_evlist__add_default(struct perf_evlist *evlist) 101 88 { 102 89 struct perf_event_attr attr = { ··· 127 98 perf_evsel__delete(evsel); 128 99 error: 129 100 return -ENOMEM; 101 + } 102 + 103 + int perf_evlist__add_attrs(struct perf_evlist *evlist, 104 + struct perf_event_attr *attrs, size_t nr_attrs) 105 + { 106 + struct perf_evsel *evsel, *n; 107 + LIST_HEAD(head); 108 + size_t i; 109 + 110 + for (i = 0; i < nr_attrs; i++) { 111 + evsel = perf_evsel__new(attrs + i, evlist->nr_entries + i); 112 + if (evsel == NULL) 113 + goto out_delete_partial_list; 114 + list_add_tail(&evsel->node, &head); 115 + } 116 + 117 + perf_evlist__splice_list_tail(evlist, &head, nr_attrs); 118 + 119 + return 0; 120 + 121 + out_delete_partial_list: 122 + list_for_each_entry_safe(evsel, n, &head, node) 123 + perf_evsel__delete(evsel); 124 + return -1; 125 + } 126 + 127 + static int trace_event__id(const char *evname) 128 + { 129 + char *filename, *colon; 130 + int err = -1, fd; 131 + 132 + if (asprintf(&filename, "%s/%s/id", tracing_events_path, evname) < 0) 133 + return -1; 134 + 135 + colon = strrchr(filename, ':'); 136 + if (colon != NULL) 137 + *colon = '/'; 138 + 139 + fd = open(filename, O_RDONLY); 140 + if (fd >= 0) { 141 + char id[16]; 142 + if (read(fd, id, sizeof(id)) > 0) 143 + err = atoi(id); 144 + close(fd); 145 + } 146 + 147 + free(filename); 148 + return err; 149 + } 150 + 151 + int perf_evlist__add_tracepoints(struct perf_evlist *evlist, 152 + const char *tracepoints[], 153 + size_t nr_tracepoints) 154 + { 155 + int err; 156 + size_t i; 157 + struct perf_event_attr *attrs = zalloc(nr_tracepoints * sizeof(*attrs)); 158 + 159 + if (attrs == NULL) 160 + return -1; 161 + 162 + for (i = 0; i < nr_tracepoints; i++) { 163 + err = trace_event__id(tracepoints[i]); 164 + 165 + if (err < 0) 166 + goto out_free_attrs; 167 + 168 + attrs[i].type = PERF_TYPE_TRACEPOINT; 169 + attrs[i].config = err; 170 + attrs[i].sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | 171 + PERF_SAMPLE_CPU); 172 + attrs[i].sample_period = 1; 173 + } 174 + 175 + err = perf_evlist__add_attrs(evlist, attrs, nr_tracepoints); 176 + out_free_attrs: 177 + free(attrs); 178 + return err; 179 + } 180 + 181 + static struct perf_evsel * 182 + perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 183 + { 184 + struct perf_evsel *evsel; 185 + 186 + list_for_each_entry(evsel, &evlist->entries, node) { 187 + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 188 + (int)evsel->attr.config == id) 189 + return evsel; 190 + } 191 + 192 + return NULL; 193 + } 194 + 195 + int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, 196 + const struct perf_evsel_str_handler *assocs, 197 + size_t nr_assocs) 198 + { 199 + struct perf_evsel *evsel; 200 + int err; 201 + size_t i; 202 + 203 + for (i = 0; i < nr_assocs; i++) { 204 + err = trace_event__id(assocs[i].name); 205 + if (err < 0) 206 + goto out; 207 + 208 + evsel = perf_evlist__find_tracepoint_by_id(evlist, err); 209 + if (evsel == NULL) 210 + continue; 211 + 212 + err = -EEXIST; 213 + if (evsel->handler.func != NULL) 214 + goto out; 215 + evsel->handler.func = assocs[i].handler; 216 + } 217 + 218 + err = 0; 219 + out: 220 + return err; 130 221 } 131 222 132 223 void perf_evlist__disable(struct perf_evlist *evlist) ··· 275 126 } 276 127 } 277 128 278 - int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 129 + static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 279 130 { 280 131 int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; 281 132 evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); ··· 431 282 evlist->mmap = NULL; 432 283 } 433 284 434 - int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 285 + static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 435 286 { 436 287 evlist->nr_mmaps = evlist->cpus->nr; 437 288 if (evlist->cpus->map[0] == -1) ··· 447 298 evlist->mmap[idx].mask = mask; 448 299 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot, 449 300 MAP_SHARED, fd, 0); 450 - if (evlist->mmap[idx].base == MAP_FAILED) 301 + if (evlist->mmap[idx].base == MAP_FAILED) { 302 + evlist->mmap[idx].base = NULL; 451 303 return -1; 304 + } 452 305 453 306 perf_evlist__add_pollfd(evlist, fd); 454 307 return 0; ··· 551 400 * 552 401 * Using perf_evlist__read_on_cpu does this automatically. 553 402 */ 554 - int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) 403 + int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 404 + bool overwrite) 555 405 { 556 406 unsigned int page_size = sysconf(_SC_PAGE_SIZE); 557 - int mask = pages * page_size - 1; 558 407 struct perf_evsel *evsel; 559 408 const struct cpu_map *cpus = evlist->cpus; 560 409 const struct thread_map *threads = evlist->threads; 561 - int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); 410 + int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask; 411 + 412 + /* 512 kiB: default amount of unprivileged mlocked memory */ 413 + if (pages == UINT_MAX) 414 + pages = (512 * 1024) / page_size; 415 + else if (!is_power_of_2(pages)) 416 + return -EINVAL; 417 + 418 + mask = pages * page_size - 1; 562 419 563 420 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 564 421 return -ENOMEM; ··· 671 512 return first->attr.sample_type; 672 513 } 673 514 515 + u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist) 516 + { 517 + struct perf_evsel *first; 518 + struct perf_sample *data; 519 + u64 sample_type; 520 + u16 size = 0; 521 + 522 + first = list_entry(evlist->entries.next, struct perf_evsel, node); 523 + 524 + if (!first->attr.sample_id_all) 525 + goto out; 526 + 527 + sample_type = first->attr.sample_type; 528 + 529 + if (sample_type & PERF_SAMPLE_TID) 530 + size += sizeof(data->tid) * 2; 531 + 532 + if (sample_type & PERF_SAMPLE_TIME) 533 + size += sizeof(data->time); 534 + 535 + if (sample_type & PERF_SAMPLE_ID) 536 + size += sizeof(data->id); 537 + 538 + if (sample_type & PERF_SAMPLE_STREAM_ID) 539 + size += sizeof(data->stream_id); 540 + 541 + if (sample_type & PERF_SAMPLE_CPU) 542 + size += sizeof(data->cpu) * 2; 543 + out: 544 + return size; 545 + } 546 + 674 547 bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) 675 548 { 676 549 struct perf_evsel *pos, *first; ··· 759 568 perf_evsel__close(evsel, ncpus, nthreads); 760 569 761 570 return err; 571 + } 572 + 573 + int perf_evlist__prepare_workload(struct perf_evlist *evlist, 574 + struct perf_record_opts *opts, 575 + const char *argv[]) 576 + { 577 + int child_ready_pipe[2], go_pipe[2]; 578 + char bf; 579 + 580 + if (pipe(child_ready_pipe) < 0) { 581 + perror("failed to create 'ready' pipe"); 582 + return -1; 583 + } 584 + 585 + if (pipe(go_pipe) < 0) { 586 + perror("failed to create 'go' pipe"); 587 + goto out_close_ready_pipe; 588 + } 589 + 590 + evlist->workload.pid = fork(); 591 + if (evlist->workload.pid < 0) { 592 + perror("failed to fork"); 593 + goto out_close_pipes; 594 + } 595 + 596 + if (!evlist->workload.pid) { 597 + if (opts->pipe_output) 598 + dup2(2, 1); 599 + 600 + close(child_ready_pipe[0]); 601 + close(go_pipe[1]); 602 + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 603 + 604 + /* 605 + * Do a dummy execvp to get the PLT entry resolved, 606 + * so we avoid the resolver overhead on the real 607 + * execvp call. 608 + */ 609 + execvp("", (char **)argv); 610 + 611 + /* 612 + * Tell the parent we're ready to go 613 + */ 614 + close(child_ready_pipe[1]); 615 + 616 + /* 617 + * Wait until the parent tells us to go. 618 + */ 619 + if (read(go_pipe[0], &bf, 1) == -1) 620 + perror("unable to read pipe"); 621 + 622 + execvp(argv[0], (char **)argv); 623 + 624 + perror(argv[0]); 625 + kill(getppid(), SIGUSR1); 626 + exit(-1); 627 + } 628 + 629 + if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) 630 + evlist->threads->map[0] = evlist->workload.pid; 631 + 632 + close(child_ready_pipe[1]); 633 + close(go_pipe[0]); 634 + /* 635 + * wait for child to settle 636 + */ 637 + if (read(child_ready_pipe[0], &bf, 1) == -1) { 638 + perror("unable to read pipe"); 639 + goto out_close_pipes; 640 + } 641 + 642 + evlist->workload.cork_fd = go_pipe[1]; 643 + close(child_ready_pipe[0]); 644 + return 0; 645 + 646 + out_close_pipes: 647 + close(go_pipe[0]); 648 + close(go_pipe[1]); 649 + out_close_ready_pipe: 650 + close(child_ready_pipe[0]); 651 + close(child_ready_pipe[1]); 652 + return -1; 653 + } 654 + 655 + int perf_evlist__start_workload(struct perf_evlist *evlist) 656 + { 657 + if (evlist->workload.cork_fd > 0) { 658 + /* 659 + * Remove the cork, let it rip! 660 + */ 661 + return close(evlist->workload.cork_fd); 662 + } 663 + 664 + return 0; 762 665 }
+40 -3
tools/perf/util/evlist.h
··· 2 2 #define __PERF_EVLIST_H 1 3 3 4 4 #include <linux/list.h> 5 + #include <stdio.h> 5 6 #include "../perf.h" 6 7 #include "event.h" 8 + #include "util.h" 9 + #include <unistd.h> 7 10 8 11 struct pollfd; 9 12 struct thread_map; 10 13 struct cpu_map; 14 + struct perf_record_opts; 11 15 12 16 #define PERF_EVLIST__HLIST_BITS 8 13 17 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) ··· 23 19 int nr_fds; 24 20 int nr_mmaps; 25 21 int mmap_len; 22 + struct { 23 + int cork_fd; 24 + pid_t pid; 25 + } workload; 26 26 bool overwrite; 27 27 union perf_event event_copy; 28 28 struct perf_mmap *mmap; ··· 34 26 struct thread_map *threads; 35 27 struct cpu_map *cpus; 36 28 struct perf_evsel *selected; 29 + }; 30 + 31 + struct perf_evsel_str_handler { 32 + const char *name; 33 + void *handler; 37 34 }; 38 35 39 36 struct perf_evsel; ··· 52 39 53 40 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); 54 41 int perf_evlist__add_default(struct perf_evlist *evlist); 42 + int perf_evlist__add_attrs(struct perf_evlist *evlist, 43 + struct perf_event_attr *attrs, size_t nr_attrs); 44 + int perf_evlist__add_tracepoints(struct perf_evlist *evlist, 45 + const char *tracepoints[], size_t nr_tracepoints); 46 + int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, 47 + const struct perf_evsel_str_handler *assocs, 48 + size_t nr_assocs); 49 + 50 + #define perf_evlist__add_attrs_array(evlist, array) \ 51 + perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) 52 + 53 + #define perf_evlist__add_tracepoints_array(evlist, array) \ 54 + perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) 55 + 56 + #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \ 57 + perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) 55 58 56 59 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 57 60 int cpu, int thread, u64 id); 58 61 59 - int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); 60 62 void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); 61 63 62 64 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); ··· 80 52 81 53 int perf_evlist__open(struct perf_evlist *evlist, bool group); 82 54 83 - int perf_evlist__alloc_mmap(struct perf_evlist *evlist); 84 - int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); 55 + void perf_evlist__config_attrs(struct perf_evlist *evlist, 56 + struct perf_record_opts *opts); 57 + 58 + int perf_evlist__prepare_workload(struct perf_evlist *evlist, 59 + struct perf_record_opts *opts, 60 + const char *argv[]); 61 + int perf_evlist__start_workload(struct perf_evlist *evlist); 62 + 63 + int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 64 + bool overwrite); 85 65 void perf_evlist__munmap(struct perf_evlist *evlist); 86 66 87 67 void perf_evlist__disable(struct perf_evlist *evlist); ··· 113 77 114 78 u64 perf_evlist__sample_type(const struct perf_evlist *evlist); 115 79 bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); 80 + u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); 116 81 117 82 bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); 118 83 bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
+153 -1
tools/perf/util/evsel.c
··· 63 63 return evsel; 64 64 } 65 65 66 + void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) 67 + { 68 + struct perf_event_attr *attr = &evsel->attr; 69 + int track = !evsel->idx; /* only the first counter needs these */ 70 + 71 + attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 72 + attr->inherit = !opts->no_inherit; 73 + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 74 + PERF_FORMAT_TOTAL_TIME_RUNNING | 75 + PERF_FORMAT_ID; 76 + 77 + attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 78 + 79 + /* 80 + * We default some events to a 1 default interval. But keep 81 + * it a weak assumption overridable by the user. 82 + */ 83 + if (!attr->sample_period || (opts->user_freq != UINT_MAX && 84 + opts->user_interval != ULLONG_MAX)) { 85 + if (opts->freq) { 86 + attr->sample_type |= PERF_SAMPLE_PERIOD; 87 + attr->freq = 1; 88 + attr->sample_freq = opts->freq; 89 + } else { 90 + attr->sample_period = opts->default_interval; 91 + } 92 + } 93 + 94 + if (opts->no_samples) 95 + attr->sample_freq = 0; 96 + 97 + if (opts->inherit_stat) 98 + attr->inherit_stat = 1; 99 + 100 + if (opts->sample_address) { 101 + attr->sample_type |= PERF_SAMPLE_ADDR; 102 + attr->mmap_data = track; 103 + } 104 + 105 + if (opts->call_graph) 106 + attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 107 + 108 + if (opts->system_wide) 109 + attr->sample_type |= PERF_SAMPLE_CPU; 110 + 111 + if (opts->period) 112 + attr->sample_type |= PERF_SAMPLE_PERIOD; 113 + 114 + if (opts->sample_id_all_avail && 115 + (opts->sample_time || opts->system_wide || 116 + !opts->no_inherit || opts->cpu_list)) 117 + attr->sample_type |= PERF_SAMPLE_TIME; 118 + 119 + if (opts->raw_samples) { 120 + attr->sample_type |= PERF_SAMPLE_TIME; 121 + attr->sample_type |= PERF_SAMPLE_RAW; 122 + attr->sample_type |= PERF_SAMPLE_CPU; 123 + } 124 + 125 + if (opts->no_delay) { 126 + attr->watermark = 0; 127 + attr->wakeup_events = 1; 128 + } 129 + 130 + attr->mmap = track; 131 + attr->comm = track; 132 + 133 + if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { 134 + attr->disabled = 1; 135 + attr->enable_on_exec = 1; 136 + } 137 + } 138 + 66 139 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 67 140 { 68 141 int cpu, thread; ··· 460 387 u32 val32[2]; 461 388 } u; 462 389 463 - 390 + memset(data, 0, sizeof(*data)); 464 391 data->cpu = data->pid = data->tid = -1; 465 392 data->stream_id = data->id = data->time = -1ULL; 466 393 ··· 573 500 return -EFAULT; 574 501 575 502 data->raw_data = (void *) pdata; 503 + } 504 + 505 + return 0; 506 + } 507 + 508 + int perf_event__synthesize_sample(union perf_event *event, u64 type, 509 + const struct perf_sample *sample, 510 + bool swapped) 511 + { 512 + u64 *array; 513 + 514 + /* 515 + * used for cross-endian analysis. See git commit 65014ab3 516 + * for why this goofiness is needed. 517 + */ 518 + union { 519 + u64 val64; 520 + u32 val32[2]; 521 + } u; 522 + 523 + array = event->sample.array; 524 + 525 + if (type & PERF_SAMPLE_IP) { 526 + event->ip.ip = sample->ip; 527 + array++; 528 + } 529 + 530 + if (type & PERF_SAMPLE_TID) { 531 + u.val32[0] = sample->pid; 532 + u.val32[1] = sample->tid; 533 + if (swapped) { 534 + /* 535 + * Inverse of what is done in perf_event__parse_sample 536 + */ 537 + u.val32[0] = bswap_32(u.val32[0]); 538 + u.val32[1] = bswap_32(u.val32[1]); 539 + u.val64 = bswap_64(u.val64); 540 + } 541 + 542 + *array = u.val64; 543 + array++; 544 + } 545 + 546 + if (type & PERF_SAMPLE_TIME) { 547 + *array = sample->time; 548 + array++; 549 + } 550 + 551 + if (type & PERF_SAMPLE_ADDR) { 552 + *array = sample->addr; 553 + array++; 554 + } 555 + 556 + if (type & PERF_SAMPLE_ID) { 557 + *array = sample->id; 558 + array++; 559 + } 560 + 561 + if (type & PERF_SAMPLE_STREAM_ID) { 562 + *array = sample->stream_id; 563 + array++; 564 + } 565 + 566 + if (type & PERF_SAMPLE_CPU) { 567 + u.val32[0] = sample->cpu; 568 + if (swapped) { 569 + /* 570 + * Inverse of what is done in perf_event__parse_sample 571 + */ 572 + u.val32[0] = bswap_32(u.val32[0]); 573 + u.val64 = bswap_64(u.val64); 574 + } 575 + *array = u.val64; 576 + array++; 577 + } 578 + 579 + if (type & PERF_SAMPLE_PERIOD) { 580 + *array = sample->period; 581 + array++; 576 582 } 577 583 578 584 return 0;
+8
tools/perf/util/evsel.h
··· 61 61 off_t id_offset; 62 62 }; 63 63 struct cgroup_sel *cgrp; 64 + struct { 65 + void *func; 66 + void *data; 67 + } handler; 64 68 bool supported; 65 69 }; 66 70 67 71 struct cpu_map; 68 72 struct thread_map; 69 73 struct perf_evlist; 74 + struct perf_record_opts; 70 75 71 76 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); 72 77 void perf_evsel__init(struct perf_evsel *evsel, 73 78 struct perf_event_attr *attr, int idx); 74 79 void perf_evsel__exit(struct perf_evsel *evsel); 75 80 void perf_evsel__delete(struct perf_evsel *evsel); 81 + 82 + void perf_evsel__config(struct perf_evsel *evsel, 83 + struct perf_record_opts *opts); 76 84 77 85 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 78 86 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
+338 -403
tools/perf/util/header.c
··· 8 8 #include <stdlib.h> 9 9 #include <linux/list.h> 10 10 #include <linux/kernel.h> 11 + #include <linux/bitops.h> 11 12 #include <sys/utsname.h> 12 13 13 14 #include "evlist.h" ··· 28 27 29 28 static u32 header_argc; 30 29 static const char **header_argv; 31 - 32 - static int dsos__write_buildid_table(struct perf_header *header, int fd); 33 - static int perf_session__cache_build_ids(struct perf_session *session); 34 30 35 31 int perf_header__push_event(u64 id, const char *name) 36 32 { ··· 185 187 return 0; 186 188 } 187 189 190 + #define dsos__for_each_with_build_id(pos, head) \ 191 + list_for_each_entry(pos, head, node) \ 192 + if (!pos->has_build_id) \ 193 + continue; \ 194 + else 195 + 196 + static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, 197 + u16 misc, int fd) 198 + { 199 + struct dso *pos; 200 + 201 + dsos__for_each_with_build_id(pos, head) { 202 + int err; 203 + struct build_id_event b; 204 + size_t len; 205 + 206 + if (!pos->hit) 207 + continue; 208 + len = pos->long_name_len + 1; 209 + len = ALIGN(len, NAME_ALIGN); 210 + memset(&b, 0, sizeof(b)); 211 + memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); 212 + b.pid = pid; 213 + b.header.misc = misc; 214 + b.header.size = sizeof(b) + len; 215 + err = do_write(fd, &b, sizeof(b)); 216 + if (err < 0) 217 + return err; 218 + err = write_padded(fd, pos->long_name, 219 + pos->long_name_len + 1, len); 220 + if (err < 0) 221 + return err; 222 + } 223 + 224 + return 0; 225 + } 226 + 227 + static int machine__write_buildid_table(struct machine *machine, int fd) 228 + { 229 + int err; 230 + u16 kmisc = PERF_RECORD_MISC_KERNEL, 231 + umisc = PERF_RECORD_MISC_USER; 232 + 233 + if (!machine__is_host(machine)) { 234 + kmisc = PERF_RECORD_MISC_GUEST_KERNEL; 235 + umisc = PERF_RECORD_MISC_GUEST_USER; 236 + } 237 + 238 + err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid, 239 + kmisc, fd); 240 + if (err == 0) 241 + err = __dsos__write_buildid_table(&machine->user_dsos, 242 + machine->pid, umisc, fd); 243 + return err; 244 + } 245 + 246 + static int dsos__write_buildid_table(struct perf_header *header, int fd) 247 + { 248 + struct perf_session *session = container_of(header, 249 + struct perf_session, header); 250 + struct rb_node *nd; 251 + int err = machine__write_buildid_table(&session->host_machine, fd); 252 + 253 + if (err) 254 + return err; 255 + 256 + for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 257 + struct machine *pos = rb_entry(nd, struct machine, rb_node); 258 + err = machine__write_buildid_table(pos, fd); 259 + if (err) 260 + break; 261 + } 262 + return err; 263 + } 264 + 265 + int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, 266 + const char *name, bool is_kallsyms) 267 + { 268 + const size_t size = PATH_MAX; 269 + char *realname, *filename = zalloc(size), 270 + *linkname = zalloc(size), *targetname; 271 + int len, err = -1; 272 + 273 + if (is_kallsyms) { 274 + if (symbol_conf.kptr_restrict) { 275 + pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); 276 + return 0; 277 + } 278 + realname = (char *)name; 279 + } else 280 + realname = realpath(name, NULL); 281 + 282 + if (realname == NULL || filename == NULL || linkname == NULL) 283 + goto out_free; 284 + 285 + len = snprintf(filename, size, "%s%s%s", 286 + debugdir, is_kallsyms ? "/" : "", realname); 287 + if (mkdir_p(filename, 0755)) 288 + goto out_free; 289 + 290 + snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id); 291 + 292 + if (access(filename, F_OK)) { 293 + if (is_kallsyms) { 294 + if (copyfile("/proc/kallsyms", filename)) 295 + goto out_free; 296 + } else if (link(realname, filename) && copyfile(name, filename)) 297 + goto out_free; 298 + } 299 + 300 + len = snprintf(linkname, size, "%s/.build-id/%.2s", 301 + debugdir, sbuild_id); 302 + 303 + if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) 304 + goto out_free; 305 + 306 + snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); 307 + targetname = filename + strlen(debugdir) - 5; 308 + memcpy(targetname, "../..", 5); 309 + 310 + if (symlink(targetname, linkname) == 0) 311 + err = 0; 312 + out_free: 313 + if (!is_kallsyms) 314 + free(realname); 315 + free(filename); 316 + free(linkname); 317 + return err; 318 + } 319 + 320 + static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, 321 + const char *name, const char *debugdir, 322 + bool is_kallsyms) 323 + { 324 + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 325 + 326 + build_id__sprintf(build_id, build_id_size, sbuild_id); 327 + 328 + return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms); 329 + } 330 + 331 + int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) 332 + { 333 + const size_t size = PATH_MAX; 334 + char *filename = zalloc(size), 335 + *linkname = zalloc(size); 336 + int err = -1; 337 + 338 + if (filename == NULL || linkname == NULL) 339 + goto out_free; 340 + 341 + snprintf(linkname, size, "%s/.build-id/%.2s/%s", 342 + debugdir, sbuild_id, sbuild_id + 2); 343 + 344 + if (access(linkname, F_OK)) 345 + goto out_free; 346 + 347 + if (readlink(linkname, filename, size - 1) < 0) 348 + goto out_free; 349 + 350 + if (unlink(linkname)) 351 + goto out_free; 352 + 353 + /* 354 + * Since the link is relative, we must make it absolute: 355 + */ 356 + snprintf(linkname, size, "%s/.build-id/%.2s/%s", 357 + debugdir, sbuild_id, filename); 358 + 359 + if (unlink(linkname)) 360 + goto out_free; 361 + 362 + err = 0; 363 + out_free: 364 + free(filename); 365 + free(linkname); 366 + return err; 367 + } 368 + 369 + static int dso__cache_build_id(struct dso *dso, const char *debugdir) 370 + { 371 + bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; 372 + 373 + return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), 374 + dso->long_name, debugdir, is_kallsyms); 375 + } 376 + 377 + static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) 378 + { 379 + struct dso *pos; 380 + int err = 0; 381 + 382 + dsos__for_each_with_build_id(pos, head) 383 + if (dso__cache_build_id(pos, debugdir)) 384 + err = -1; 385 + 386 + return err; 387 + } 388 + 389 + static int machine__cache_build_ids(struct machine *machine, const char *debugdir) 390 + { 391 + int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir); 392 + ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir); 393 + return ret; 394 + } 395 + 396 + static int perf_session__cache_build_ids(struct perf_session *session) 397 + { 398 + struct rb_node *nd; 399 + int ret; 400 + char debugdir[PATH_MAX]; 401 + 402 + snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); 403 + 404 + if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 405 + return -1; 406 + 407 + ret = machine__cache_build_ids(&session->host_machine, debugdir); 408 + 409 + for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 410 + struct machine *pos = rb_entry(nd, struct machine, rb_node); 411 + ret |= machine__cache_build_ids(pos, debugdir); 412 + } 413 + return ret ? -1 : 0; 414 + } 415 + 416 + static bool machine__read_build_ids(struct machine *machine, bool with_hits) 417 + { 418 + bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits); 419 + ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits); 420 + return ret; 421 + } 422 + 423 + static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) 424 + { 425 + struct rb_node *nd; 426 + bool ret = machine__read_build_ids(&session->host_machine, with_hits); 427 + 428 + for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 429 + struct machine *pos = rb_entry(nd, struct machine, rb_node); 430 + ret |= machine__read_build_ids(pos, with_hits); 431 + } 432 + 433 + return ret; 434 + } 435 + 188 436 static int write_trace_info(int fd, struct perf_header *h __used, 189 437 struct perf_evlist *evlist) 190 438 { ··· 445 201 int err; 446 202 447 203 session = container_of(h, struct perf_session, header); 204 + 205 + if (!perf_session__read_build_ids(session, true)) 206 + return -1; 448 207 449 208 err = dsos__write_buildid_table(h, fd); 450 209 if (err < 0) { ··· 1312 1065 bool full_only; 1313 1066 }; 1314 1067 1315 - #define FEAT_OPA(n, w, p) \ 1316 - [n] = { .name = #n, .write = w, .print = p } 1317 - #define FEAT_OPF(n, w, p) \ 1318 - [n] = { .name = #n, .write = w, .print = p, .full_only = true } 1068 + #define FEAT_OPA(n, func) \ 1069 + [n] = { .name = #n, .write = write_##func, .print = print_##func } 1070 + #define FEAT_OPF(n, func) \ 1071 + [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true } 1072 + 1073 + /* feature_ops not implemented: */ 1074 + #define print_trace_info NULL 1075 + #define print_build_id NULL 1319 1076 1320 1077 static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { 1321 - FEAT_OPA(HEADER_TRACE_INFO, write_trace_info, NULL), 1322 - FEAT_OPA(HEADER_BUILD_ID, write_build_id, NULL), 1323 - FEAT_OPA(HEADER_HOSTNAME, write_hostname, print_hostname), 1324 - FEAT_OPA(HEADER_OSRELEASE, write_osrelease, print_osrelease), 1325 - FEAT_OPA(HEADER_VERSION, write_version, print_version), 1326 - FEAT_OPA(HEADER_ARCH, write_arch, print_arch), 1327 - FEAT_OPA(HEADER_NRCPUS, write_nrcpus, print_nrcpus), 1328 - FEAT_OPA(HEADER_CPUDESC, write_cpudesc, print_cpudesc), 1329 - FEAT_OPA(HEADER_CPUID, write_cpuid, print_cpuid), 1330 - FEAT_OPA(HEADER_TOTAL_MEM, write_total_mem, print_total_mem), 1331 - FEAT_OPA(HEADER_EVENT_DESC, write_event_desc, print_event_desc), 1332 - FEAT_OPA(HEADER_CMDLINE, write_cmdline, print_cmdline), 1333 - FEAT_OPF(HEADER_CPU_TOPOLOGY, write_cpu_topology, print_cpu_topology), 1334 - FEAT_OPF(HEADER_NUMA_TOPOLOGY, write_numa_topology, print_numa_topology), 1078 + FEAT_OPA(HEADER_TRACE_INFO, trace_info), 1079 + FEAT_OPA(HEADER_BUILD_ID, build_id), 1080 + FEAT_OPA(HEADER_HOSTNAME, hostname), 1081 + FEAT_OPA(HEADER_OSRELEASE, osrelease), 1082 + FEAT_OPA(HEADER_VERSION, version), 1083 + FEAT_OPA(HEADER_ARCH, arch), 1084 + FEAT_OPA(HEADER_NRCPUS, nrcpus), 1085 + FEAT_OPA(HEADER_CPUDESC, cpudesc), 1086 + FEAT_OPA(HEADER_CPUID, cpuid), 1087 + FEAT_OPA(HEADER_TOTAL_MEM, total_mem), 1088 + FEAT_OPA(HEADER_EVENT_DESC, event_desc), 1089 + FEAT_OPA(HEADER_CMDLINE, cmdline), 1090 + FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), 1091 + FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), 1335 1092 }; 1336 1093 1337 1094 struct header_print_data { ··· 1354 1103 "%d, continuing...\n", section->offset, feat); 1355 1104 return 0; 1356 1105 } 1357 - if (feat < HEADER_TRACE_INFO || feat >= HEADER_LAST_FEATURE) { 1106 + if (feat >= HEADER_LAST_FEATURE) { 1358 1107 pr_warning("unknown feature %d\n", feat); 1359 - return -1; 1108 + return 0; 1360 1109 } 1361 1110 if (!feat_ops[feat].print) 1362 1111 return 0; ··· 1383 1132 return 0; 1384 1133 } 1385 1134 1386 - #define dsos__for_each_with_build_id(pos, head) \ 1387 - list_for_each_entry(pos, head, node) \ 1388 - if (!pos->has_build_id) \ 1389 - continue; \ 1390 - else 1391 - 1392 - static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, 1393 - u16 misc, int fd) 1394 - { 1395 - struct dso *pos; 1396 - 1397 - dsos__for_each_with_build_id(pos, head) { 1398 - int err; 1399 - struct build_id_event b; 1400 - size_t len; 1401 - 1402 - if (!pos->hit) 1403 - continue; 1404 - len = pos->long_name_len + 1; 1405 - len = ALIGN(len, NAME_ALIGN); 1406 - memset(&b, 0, sizeof(b)); 1407 - memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); 1408 - b.pid = pid; 1409 - b.header.misc = misc; 1410 - b.header.size = sizeof(b) + len; 1411 - err = do_write(fd, &b, sizeof(b)); 1412 - if (err < 0) 1413 - return err; 1414 - err = write_padded(fd, pos->long_name, 1415 - pos->long_name_len + 1, len); 1416 - if (err < 0) 1417 - return err; 1418 - } 1419 - 1420 - return 0; 1421 - } 1422 - 1423 - static int machine__write_buildid_table(struct machine *machine, int fd) 1424 - { 1425 - int err; 1426 - u16 kmisc = PERF_RECORD_MISC_KERNEL, 1427 - umisc = PERF_RECORD_MISC_USER; 1428 - 1429 - if (!machine__is_host(machine)) { 1430 - kmisc = PERF_RECORD_MISC_GUEST_KERNEL; 1431 - umisc = PERF_RECORD_MISC_GUEST_USER; 1432 - } 1433 - 1434 - err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid, 1435 - kmisc, fd); 1436 - if (err == 0) 1437 - err = __dsos__write_buildid_table(&machine->user_dsos, 1438 - machine->pid, umisc, fd); 1439 - return err; 1440 - } 1441 - 1442 - static int dsos__write_buildid_table(struct perf_header *header, int fd) 1443 - { 1444 - struct perf_session *session = container_of(header, 1445 - struct perf_session, header); 1446 - struct rb_node *nd; 1447 - int err = machine__write_buildid_table(&session->host_machine, fd); 1448 - 1449 - if (err) 1450 - return err; 1451 - 1452 - for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 1453 - struct machine *pos = rb_entry(nd, struct machine, rb_node); 1454 - err = machine__write_buildid_table(pos, fd); 1455 - if (err) 1456 - break; 1457 - } 1458 - return err; 1459 - } 1460 - 1461 - int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, 1462 - const char *name, bool is_kallsyms) 1463 - { 1464 - const size_t size = PATH_MAX; 1465 - char *realname, *filename = zalloc(size), 1466 - *linkname = zalloc(size), *targetname; 1467 - int len, err = -1; 1468 - 1469 - if (is_kallsyms) { 1470 - if (symbol_conf.kptr_restrict) { 1471 - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); 1472 - return 0; 1473 - } 1474 - realname = (char *)name; 1475 - } else 1476 - realname = realpath(name, NULL); 1477 - 1478 - if (realname == NULL || filename == NULL || linkname == NULL) 1479 - goto out_free; 1480 - 1481 - len = snprintf(filename, size, "%s%s%s", 1482 - debugdir, is_kallsyms ? "/" : "", realname); 1483 - if (mkdir_p(filename, 0755)) 1484 - goto out_free; 1485 - 1486 - snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id); 1487 - 1488 - if (access(filename, F_OK)) { 1489 - if (is_kallsyms) { 1490 - if (copyfile("/proc/kallsyms", filename)) 1491 - goto out_free; 1492 - } else if (link(realname, filename) && copyfile(name, filename)) 1493 - goto out_free; 1494 - } 1495 - 1496 - len = snprintf(linkname, size, "%s/.build-id/%.2s", 1497 - debugdir, sbuild_id); 1498 - 1499 - if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) 1500 - goto out_free; 1501 - 1502 - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); 1503 - targetname = filename + strlen(debugdir) - 5; 1504 - memcpy(targetname, "../..", 5); 1505 - 1506 - if (symlink(targetname, linkname) == 0) 1507 - err = 0; 1508 - out_free: 1509 - if (!is_kallsyms) 1510 - free(realname); 1511 - free(filename); 1512 - free(linkname); 1513 - return err; 1514 - } 1515 - 1516 - static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, 1517 - const char *name, const char *debugdir, 1518 - bool is_kallsyms) 1519 - { 1520 - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 1521 - 1522 - build_id__sprintf(build_id, build_id_size, sbuild_id); 1523 - 1524 - return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms); 1525 - } 1526 - 1527 - int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) 1528 - { 1529 - const size_t size = PATH_MAX; 1530 - char *filename = zalloc(size), 1531 - *linkname = zalloc(size); 1532 - int err = -1; 1533 - 1534 - if (filename == NULL || linkname == NULL) 1535 - goto out_free; 1536 - 1537 - snprintf(linkname, size, "%s/.build-id/%.2s/%s", 1538 - debugdir, sbuild_id, sbuild_id + 2); 1539 - 1540 - if (access(linkname, F_OK)) 1541 - goto out_free; 1542 - 1543 - if (readlink(linkname, filename, size - 1) < 0) 1544 - goto out_free; 1545 - 1546 - if (unlink(linkname)) 1547 - goto out_free; 1548 - 1549 - /* 1550 - * Since the link is relative, we must make it absolute: 1551 - */ 1552 - snprintf(linkname, size, "%s/.build-id/%.2s/%s", 1553 - debugdir, sbuild_id, filename); 1554 - 1555 - if (unlink(linkname)) 1556 - goto out_free; 1557 - 1558 - err = 0; 1559 - out_free: 1560 - free(filename); 1561 - free(linkname); 1562 - return err; 1563 - } 1564 - 1565 - static int dso__cache_build_id(struct dso *dso, const char *debugdir) 1566 - { 1567 - bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; 1568 - 1569 - return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), 1570 - dso->long_name, debugdir, is_kallsyms); 1571 - } 1572 - 1573 - static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) 1574 - { 1575 - struct dso *pos; 1576 - int err = 0; 1577 - 1578 - dsos__for_each_with_build_id(pos, head) 1579 - if (dso__cache_build_id(pos, debugdir)) 1580 - err = -1; 1581 - 1582 - return err; 1583 - } 1584 - 1585 - static int machine__cache_build_ids(struct machine *machine, const char *debugdir) 1586 - { 1587 - int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir); 1588 - ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir); 1589 - return ret; 1590 - } 1591 - 1592 - static int perf_session__cache_build_ids(struct perf_session *session) 1593 - { 1594 - struct rb_node *nd; 1595 - int ret; 1596 - char debugdir[PATH_MAX]; 1597 - 1598 - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); 1599 - 1600 - if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 1601 - return -1; 1602 - 1603 - ret = machine__cache_build_ids(&session->host_machine, debugdir); 1604 - 1605 - for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 1606 - struct machine *pos = rb_entry(nd, struct machine, rb_node); 1607 - ret |= machine__cache_build_ids(pos, debugdir); 1608 - } 1609 - return ret ? -1 : 0; 1610 - } 1611 - 1612 - static bool machine__read_build_ids(struct machine *machine, bool with_hits) 1613 - { 1614 - bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits); 1615 - ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits); 1616 - return ret; 1617 - } 1618 - 1619 - static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) 1620 - { 1621 - struct rb_node *nd; 1622 - bool ret = machine__read_build_ids(&session->host_machine, with_hits); 1623 - 1624 - for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 1625 - struct machine *pos = rb_entry(nd, struct machine, rb_node); 1626 - ret |= machine__read_build_ids(pos, with_hits); 1627 - } 1628 - 1629 - return ret; 1630 - } 1631 - 1632 1135 static int do_write_feat(int fd, struct perf_header *h, int type, 1633 1136 struct perf_file_section **p, 1634 1137 struct perf_evlist *evlist) ··· 1391 1386 int ret = 0; 1392 1387 1393 1388 if (perf_header__has_feat(h, type)) { 1389 + if (!feat_ops[type].write) 1390 + return -1; 1394 1391 1395 1392 (*p)->offset = lseek(fd, 0, SEEK_CUR); 1396 1393 ··· 1415 1408 struct perf_evlist *evlist, int fd) 1416 1409 { 1417 1410 int nr_sections; 1418 - struct perf_session *session; 1419 1411 struct perf_file_section *feat_sec, *p; 1420 1412 int sec_size; 1421 1413 u64 sec_start; 1414 + int feat; 1422 1415 int err; 1423 - 1424 - session = container_of(header, struct perf_session, header); 1425 - 1426 - if (perf_header__has_feat(header, HEADER_BUILD_ID && 1427 - !perf_session__read_build_ids(session, true))) 1428 - perf_header__clear_feat(header, HEADER_BUILD_ID); 1429 1416 1430 1417 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); 1431 1418 if (!nr_sections) ··· 1434 1433 sec_start = header->data_offset + header->data_size; 1435 1434 lseek(fd, sec_start + sec_size, SEEK_SET); 1436 1435 1437 - err = do_write_feat(fd, header, HEADER_TRACE_INFO, &p, evlist); 1438 - if (err) 1439 - goto out_free; 1440 - 1441 - err = do_write_feat(fd, header, HEADER_BUILD_ID, &p, evlist); 1442 - if (err) { 1443 - perf_header__clear_feat(header, HEADER_BUILD_ID); 1444 - goto out_free; 1436 + for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { 1437 + if (do_write_feat(fd, header, feat, &p, evlist)) 1438 + perf_header__clear_feat(header, feat); 1445 1439 } 1446 - 1447 - err = do_write_feat(fd, header, HEADER_HOSTNAME, &p, evlist); 1448 - if (err) 1449 - perf_header__clear_feat(header, HEADER_HOSTNAME); 1450 - 1451 - err = do_write_feat(fd, header, HEADER_OSRELEASE, &p, evlist); 1452 - if (err) 1453 - perf_header__clear_feat(header, HEADER_OSRELEASE); 1454 - 1455 - err = do_write_feat(fd, header, HEADER_VERSION, &p, evlist); 1456 - if (err) 1457 - perf_header__clear_feat(header, HEADER_VERSION); 1458 - 1459 - err = do_write_feat(fd, header, HEADER_ARCH, &p, evlist); 1460 - if (err) 1461 - perf_header__clear_feat(header, HEADER_ARCH); 1462 - 1463 - err = do_write_feat(fd, header, HEADER_NRCPUS, &p, evlist); 1464 - if (err) 1465 - perf_header__clear_feat(header, HEADER_NRCPUS); 1466 - 1467 - err = do_write_feat(fd, header, HEADER_CPUDESC, &p, evlist); 1468 - if (err) 1469 - perf_header__clear_feat(header, HEADER_CPUDESC); 1470 - 1471 - err = do_write_feat(fd, header, HEADER_CPUID, &p, evlist); 1472 - if (err) 1473 - perf_header__clear_feat(header, HEADER_CPUID); 1474 - 1475 - err = do_write_feat(fd, header, HEADER_TOTAL_MEM, &p, evlist); 1476 - if (err) 1477 - perf_header__clear_feat(header, HEADER_TOTAL_MEM); 1478 - 1479 - err = do_write_feat(fd, header, HEADER_CMDLINE, &p, evlist); 1480 - if (err) 1481 - perf_header__clear_feat(header, HEADER_CMDLINE); 1482 - 1483 - err = do_write_feat(fd, header, HEADER_EVENT_DESC, &p, evlist); 1484 - if (err) 1485 - perf_header__clear_feat(header, HEADER_EVENT_DESC); 1486 - 1487 - err = do_write_feat(fd, header, HEADER_CPU_TOPOLOGY, &p, evlist); 1488 - if (err) 1489 - perf_header__clear_feat(header, HEADER_CPU_TOPOLOGY); 1490 - 1491 - err = do_write_feat(fd, header, HEADER_NUMA_TOPOLOGY, &p, evlist); 1492 - if (err) 1493 - perf_header__clear_feat(header, HEADER_NUMA_TOPOLOGY); 1494 1440 1495 1441 lseek(fd, sec_start, SEEK_SET); 1496 1442 /* ··· 1447 1499 err = do_write(fd, feat_sec, sec_size); 1448 1500 if (err < 0) 1449 1501 pr_debug("failed to write feature section\n"); 1450 - out_free: 1451 1502 free(feat_sec); 1452 1503 return err; 1453 1504 } ··· 1584 1637 int perf_header__process_sections(struct perf_header *header, int fd, 1585 1638 void *data, 1586 1639 int (*process)(struct perf_file_section *section, 1587 - struct perf_header *ph, 1588 - int feat, int fd, void *data)) 1640 + struct perf_header *ph, 1641 + int feat, int fd, void *data)) 1589 1642 { 1590 - struct perf_file_section *feat_sec; 1643 + struct perf_file_section *feat_sec, *sec; 1591 1644 int nr_sections; 1592 1645 int sec_size; 1593 - int idx = 0; 1594 - int err = -1, feat = 1; 1646 + int feat; 1647 + int err; 1595 1648 1596 1649 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); 1597 1650 if (!nr_sections) 1598 1651 return 0; 1599 1652 1600 - feat_sec = calloc(sizeof(*feat_sec), nr_sections); 1653 + feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections); 1601 1654 if (!feat_sec) 1602 1655 return -1; 1603 1656 ··· 1605 1658 1606 1659 lseek(fd, header->data_offset + header->data_size, SEEK_SET); 1607 1660 1608 - if (perf_header__getbuffer64(header, fd, feat_sec, sec_size)) 1661 + err = perf_header__getbuffer64(header, fd, feat_sec, sec_size); 1662 + if (err < 0) 1609 1663 goto out_free; 1610 1664 1611 - err = 0; 1612 - while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { 1613 - if (perf_header__has_feat(header, feat)) { 1614 - struct perf_file_section *sec = &feat_sec[idx++]; 1615 - 1616 - err = process(sec, header, feat, fd, data); 1617 - if (err < 0) 1618 - break; 1619 - } 1620 - ++feat; 1665 + for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) { 1666 + err = process(sec++, header, feat, fd, data); 1667 + if (err < 0) 1668 + goto out_free; 1621 1669 } 1670 + err = 0; 1622 1671 out_free: 1623 1672 free(feat_sec); 1624 1673 return err; ··· 1849 1906 return 0; 1850 1907 } 1851 1908 1909 + if (feat >= HEADER_LAST_FEATURE) { 1910 + pr_debug("unknown feature %d, continuing...\n", feat); 1911 + return 0; 1912 + } 1913 + 1852 1914 switch (feat) { 1853 1915 case HEADER_TRACE_INFO: 1854 1916 trace_report(fd, false); 1855 1917 break; 1856 - 1857 1918 case HEADER_BUILD_ID: 1858 1919 if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) 1859 1920 pr_debug("Failed to read buildids, continuing...\n"); 1860 1921 break; 1861 - 1862 - case HEADER_HOSTNAME: 1863 - case HEADER_OSRELEASE: 1864 - case HEADER_VERSION: 1865 - case HEADER_ARCH: 1866 - case HEADER_NRCPUS: 1867 - case HEADER_CPUDESC: 1868 - case HEADER_CPUID: 1869 - case HEADER_TOTAL_MEM: 1870 - case HEADER_CMDLINE: 1871 - case HEADER_EVENT_DESC: 1872 - case HEADER_CPU_TOPOLOGY: 1873 - case HEADER_NUMA_TOPOLOGY: 1874 - break; 1875 - 1876 1922 default: 1877 - pr_debug("unknown feature %d, continuing...\n", feat); 1923 + break; 1878 1924 } 1879 1925 1880 1926 return 0; ··· 1973 2041 lseek(fd, tmp, SEEK_SET); 1974 2042 } 1975 2043 2044 + symbol_conf.nr_events = nr_attrs; 2045 + 1976 2046 if (f_header.event_types.size) { 1977 2047 lseek(fd, f_header.event_types.offset, SEEK_SET); 1978 2048 events = malloc(f_header.event_types.size); ··· 2002 2068 return -ENOMEM; 2003 2069 } 2004 2070 2005 - int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 2006 - perf_event__handler_t process, 2007 - struct perf_session *session) 2071 + int perf_event__synthesize_attr(struct perf_tool *tool, 2072 + struct perf_event_attr *attr, u16 ids, u64 *id, 2073 + perf_event__handler_t process) 2008 2074 { 2009 2075 union perf_event *ev; 2010 2076 size_t size; ··· 2026 2092 ev->attr.header.type = PERF_RECORD_HEADER_ATTR; 2027 2093 ev->attr.header.size = size; 2028 2094 2029 - err = process(ev, NULL, session); 2095 + err = process(tool, ev, NULL, NULL); 2030 2096 2031 2097 free(ev); 2032 2098 2033 2099 return err; 2034 2100 } 2035 2101 2036 - int perf_session__synthesize_attrs(struct perf_session *session, 2102 + int perf_event__synthesize_attrs(struct perf_tool *tool, 2103 + struct perf_session *session, 2037 2104 perf_event__handler_t process) 2038 2105 { 2039 2106 struct perf_evsel *attr; 2040 2107 int err = 0; 2041 2108 2042 2109 list_for_each_entry(attr, &session->evlist->entries, node) { 2043 - err = perf_event__synthesize_attr(&attr->attr, attr->ids, 2044 - attr->id, process, session); 2110 + err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids, 2111 + attr->id, process); 2045 2112 if (err) { 2046 2113 pr_debug("failed to create perf header attribute\n"); 2047 2114 return err; ··· 2053 2118 } 2054 2119 2055 2120 int perf_event__process_attr(union perf_event *event, 2056 - struct perf_session *session) 2121 + struct perf_evlist **pevlist) 2057 2122 { 2058 2123 unsigned int i, ids, n_ids; 2059 2124 struct perf_evsel *evsel; 2125 + struct perf_evlist *evlist = *pevlist; 2060 2126 2061 - if (session->evlist == NULL) { 2062 - session->evlist = perf_evlist__new(NULL, NULL); 2063 - if (session->evlist == NULL) 2127 + if (evlist == NULL) { 2128 + *pevlist = evlist = perf_evlist__new(NULL, NULL); 2129 + if (evlist == NULL) 2064 2130 return -ENOMEM; 2065 2131 } 2066 2132 2067 - evsel = perf_evsel__new(&event->attr.attr, 2068 - session->evlist->nr_entries); 2133 + evsel = perf_evsel__new(&event->attr.attr, evlist->nr_entries); 2069 2134 if (evsel == NULL) 2070 2135 return -ENOMEM; 2071 2136 2072 - perf_evlist__add(session->evlist, evsel); 2137 + perf_evlist__add(evlist, evsel); 2073 2138 2074 2139 ids = event->header.size; 2075 2140 ids -= (void *)&event->attr.id - (void *)event; ··· 2083 2148 return -ENOMEM; 2084 2149 2085 2150 for (i = 0; i < n_ids; i++) { 2086 - perf_evlist__id_add(session->evlist, evsel, 0, i, 2087 - event->attr.id[i]); 2151 + perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); 2088 2152 } 2089 - 2090 - perf_session__update_sample_type(session); 2091 2153 2092 2154 return 0; 2093 2155 } 2094 2156 2095 - int perf_event__synthesize_event_type(u64 event_id, char *name, 2157 + int perf_event__synthesize_event_type(struct perf_tool *tool, 2158 + u64 event_id, char *name, 2096 2159 perf_event__handler_t process, 2097 - struct perf_session *session) 2160 + struct machine *machine) 2098 2161 { 2099 2162 union perf_event ev; 2100 2163 size_t size = 0; ··· 2110 2177 ev.event_type.header.size = sizeof(ev.event_type) - 2111 2178 (sizeof(ev.event_type.event_type.name) - size); 2112 2179 2113 - err = process(&ev, NULL, session); 2180 + err = process(tool, &ev, NULL, machine); 2114 2181 2115 2182 return err; 2116 2183 } 2117 2184 2118 - int perf_event__synthesize_event_types(perf_event__handler_t process, 2119 - struct perf_session *session) 2185 + int perf_event__synthesize_event_types(struct perf_tool *tool, 2186 + perf_event__handler_t process, 2187 + struct machine *machine) 2120 2188 { 2121 2189 struct perf_trace_event_type *type; 2122 2190 int i, err = 0; ··· 2125 2191 for (i = 0; i < event_count; i++) { 2126 2192 type = &events[i]; 2127 2193 2128 - err = perf_event__synthesize_event_type(type->event_id, 2194 + err = perf_event__synthesize_event_type(tool, type->event_id, 2129 2195 type->name, process, 2130 - session); 2196 + machine); 2131 2197 if (err) { 2132 2198 pr_debug("failed to create perf header event type\n"); 2133 2199 return err; ··· 2137 2203 return err; 2138 2204 } 2139 2205 2140 - int perf_event__process_event_type(union perf_event *event, 2141 - struct perf_session *session __unused) 2206 + int perf_event__process_event_type(struct perf_tool *tool __unused, 2207 + union perf_event *event) 2142 2208 { 2143 2209 if (perf_header__push_event(event->event_type.event_type.event_id, 2144 2210 event->event_type.event_type.name) < 0) ··· 2147 2213 return 0; 2148 2214 } 2149 2215 2150 - int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, 2151 - perf_event__handler_t process, 2152 - struct perf_session *session __unused) 2216 + int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, 2217 + struct perf_evlist *evlist, 2218 + perf_event__handler_t process) 2153 2219 { 2154 2220 union perf_event ev; 2155 2221 struct tracing_data *tdata; ··· 2180 2246 ev.tracing_data.header.size = sizeof(ev.tracing_data); 2181 2247 ev.tracing_data.size = aligned_size; 2182 2248 2183 - process(&ev, NULL, session); 2249 + process(tool, &ev, NULL, NULL); 2184 2250 2185 2251 /* 2186 2252 * The put function will copy all the tracing data ··· 2222 2288 return size_read + padding; 2223 2289 } 2224 2290 2225 - int perf_event__synthesize_build_id(struct dso *pos, u16 misc, 2291 + int perf_event__synthesize_build_id(struct perf_tool *tool, 2292 + struct dso *pos, u16 misc, 2226 2293 perf_event__handler_t process, 2227 - struct machine *machine, 2228 - struct perf_session *session) 2294 + struct machine *machine) 2229 2295 { 2230 2296 union perf_event ev; 2231 2297 size_t len; ··· 2245 2311 ev.build_id.header.size = sizeof(ev.build_id) + len; 2246 2312 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); 2247 2313 2248 - err = process(&ev, NULL, session); 2314 + err = process(tool, &ev, NULL, machine); 2249 2315 2250 2316 return err; 2251 2317 } 2252 2318 2253 - int perf_event__process_build_id(union perf_event *event, 2319 + int perf_event__process_build_id(struct perf_tool *tool __used, 2320 + union perf_event *event, 2254 2321 struct perf_session *session) 2255 2322 { 2256 2323 __event_process_build_id(&event->build_id,
+27 -22
tools/perf/util/header.h
··· 10 10 #include <linux/bitmap.h> 11 11 12 12 enum { 13 - HEADER_TRACE_INFO = 1, 13 + HEADER_RESERVED = 0, /* always cleared */ 14 + HEADER_TRACE_INFO = 1, 14 15 HEADER_BUILD_ID, 15 16 16 17 HEADER_HOSTNAME, ··· 28 27 HEADER_NUMA_TOPOLOGY, 29 28 30 29 HEADER_LAST_FEATURE, 30 + HEADER_FEAT_BITS = 256, 31 31 }; 32 - 33 - #define HEADER_FEAT_BITS 256 34 32 35 33 struct perf_file_section { 36 34 u64 offset; ··· 68 68 }; 69 69 70 70 struct perf_evlist; 71 + struct perf_session; 71 72 72 73 int perf_session__read_header(struct perf_session *session, int fd); 73 74 int perf_session__write_header(struct perf_session *session, ··· 97 96 const char *name, bool is_kallsyms); 98 97 int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); 99 98 100 - int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 101 - perf_event__handler_t process, 102 - struct perf_session *session); 103 - int perf_session__synthesize_attrs(struct perf_session *session, 104 - perf_event__handler_t process); 105 - int perf_event__process_attr(union perf_event *event, struct perf_session *session); 99 + int perf_event__synthesize_attr(struct perf_tool *tool, 100 + struct perf_event_attr *attr, u16 ids, u64 *id, 101 + perf_event__handler_t process); 102 + int perf_event__synthesize_attrs(struct perf_tool *tool, 103 + struct perf_session *session, 104 + perf_event__handler_t process); 105 + int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist); 106 106 107 - int perf_event__synthesize_event_type(u64 event_id, char *name, 107 + int perf_event__synthesize_event_type(struct perf_tool *tool, 108 + u64 event_id, char *name, 108 109 perf_event__handler_t process, 109 - struct perf_session *session); 110 - int perf_event__synthesize_event_types(perf_event__handler_t process, 111 - struct perf_session *session); 112 - int perf_event__process_event_type(union perf_event *event, 113 - struct perf_session *session); 110 + struct machine *machine); 111 + int perf_event__synthesize_event_types(struct perf_tool *tool, 112 + perf_event__handler_t process, 113 + struct machine *machine); 114 + int perf_event__process_event_type(struct perf_tool *tool, 115 + union perf_event *event); 114 116 115 - int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, 116 - perf_event__handler_t process, 117 - struct perf_session *session); 117 + int perf_event__synthesize_tracing_data(struct perf_tool *tool, 118 + int fd, struct perf_evlist *evlist, 119 + perf_event__handler_t process); 118 120 int perf_event__process_tracing_data(union perf_event *event, 119 121 struct perf_session *session); 120 122 121 - int perf_event__synthesize_build_id(struct dso *pos, u16 misc, 123 + int perf_event__synthesize_build_id(struct perf_tool *tool, 124 + struct dso *pos, u16 misc, 122 125 perf_event__handler_t process, 123 - struct machine *machine, 124 - struct perf_session *session); 125 - int perf_event__process_build_id(union perf_event *event, 126 + struct machine *machine); 127 + int perf_event__process_build_id(struct perf_tool *tool, 128 + union perf_event *event, 126 129 struct perf_session *session); 127 130 128 131 /*
+1 -2
tools/perf/util/hist.h
··· 117 117 118 118 static inline int hist_entry__tui_annotate(struct hist_entry *self __used, 119 119 int evidx __used, 120 - int nr_events __used, 121 120 void(*timer)(void *arg) __used, 122 121 void *arg __used, 123 122 int delay_secs __used) ··· 127 128 #define K_RIGHT -2 128 129 #else 129 130 #include "ui/keysyms.h" 130 - int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events, 131 + int hist_entry__tui_annotate(struct hist_entry *he, int evidx, 131 132 void(*timer)(void *arg), void *arg, int delay_secs); 132 133 133 134 int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
+118
tools/perf/util/include/linux/bitops.h
··· 9 9 #define BITS_PER_BYTE 8 10 10 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) 11 11 12 + #define for_each_set_bit(bit, addr, size) \ 13 + for ((bit) = find_first_bit((addr), (size)); \ 14 + (bit) < (size); \ 15 + (bit) = find_next_bit((addr), (size), (bit) + 1)) 16 + 17 + /* same as for_each_set_bit() but use bit as value to start with */ 18 + #define for_each_set_bit_cont(bit, addr, size) \ 19 + for ((bit) = find_next_bit((addr), (size), (bit)); \ 20 + (bit) < (size); \ 21 + (bit) = find_next_bit((addr), (size), (bit) + 1)) 22 + 12 23 static inline void set_bit(int nr, unsigned long *addr) 13 24 { 14 25 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); ··· 39 28 static inline unsigned long hweight_long(unsigned long w) 40 29 { 41 30 return sizeof(w) == 4 ? hweight32(w) : hweight64(w); 31 + } 32 + 33 + #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) 34 + 35 + /** 36 + * __ffs - find first bit in word. 37 + * @word: The word to search 38 + * 39 + * Undefined if no bit exists, so code should check against 0 first. 40 + */ 41 + static __always_inline unsigned long __ffs(unsigned long word) 42 + { 43 + int num = 0; 44 + 45 + #if BITS_PER_LONG == 64 46 + if ((word & 0xffffffff) == 0) { 47 + num += 32; 48 + word >>= 32; 49 + } 50 + #endif 51 + if ((word & 0xffff) == 0) { 52 + num += 16; 53 + word >>= 16; 54 + } 55 + if ((word & 0xff) == 0) { 56 + num += 8; 57 + word >>= 8; 58 + } 59 + if ((word & 0xf) == 0) { 60 + num += 4; 61 + word >>= 4; 62 + } 63 + if ((word & 0x3) == 0) { 64 + num += 2; 65 + word >>= 2; 66 + } 67 + if ((word & 0x1) == 0) 68 + num += 1; 69 + return num; 70 + } 71 + 72 + /* 73 + * Find the first set bit in a memory region. 74 + */ 75 + static inline unsigned long 76 + find_first_bit(const unsigned long *addr, unsigned long size) 77 + { 78 + const unsigned long *p = addr; 79 + unsigned long result = 0; 80 + unsigned long tmp; 81 + 82 + while (size & ~(BITS_PER_LONG-1)) { 83 + if ((tmp = *(p++))) 84 + goto found; 85 + result += BITS_PER_LONG; 86 + size -= BITS_PER_LONG; 87 + } 88 + if (!size) 89 + return result; 90 + 91 + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); 92 + if (tmp == 0UL) /* Are any bits set? */ 93 + return result + size; /* Nope. */ 94 + found: 95 + return result + __ffs(tmp); 96 + } 97 + 98 + /* 99 + * Find the next set bit in a memory region. 100 + */ 101 + static inline unsigned long 102 + find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) 103 + { 104 + const unsigned long *p = addr + BITOP_WORD(offset); 105 + unsigned long result = offset & ~(BITS_PER_LONG-1); 106 + unsigned long tmp; 107 + 108 + if (offset >= size) 109 + return size; 110 + size -= result; 111 + offset %= BITS_PER_LONG; 112 + if (offset) { 113 + tmp = *(p++); 114 + tmp &= (~0UL << offset); 115 + if (size < BITS_PER_LONG) 116 + goto found_first; 117 + if (tmp) 118 + goto found_middle; 119 + size -= BITS_PER_LONG; 120 + result += BITS_PER_LONG; 121 + } 122 + while (size & ~(BITS_PER_LONG-1)) { 123 + if ((tmp = *(p++))) 124 + goto found_middle; 125 + result += BITS_PER_LONG; 126 + size -= BITS_PER_LONG; 127 + } 128 + if (!size) 129 + return result; 130 + tmp = *p; 131 + 132 + found_first: 133 + tmp &= (~0UL >> (BITS_PER_LONG - size)); 134 + if (tmp == 0UL) /* Are any bits set? */ 135 + return result + size; /* Nope. */ 136 + found_middle: 137 + return result + __ffs(tmp); 42 138 } 43 139 44 140 #endif
+4
tools/perf/util/map.c
··· 562 562 INIT_LIST_HEAD(&self->user_dsos); 563 563 INIT_LIST_HEAD(&self->kernel_dsos); 564 564 565 + self->threads = RB_ROOT; 566 + INIT_LIST_HEAD(&self->dead_threads); 567 + self->last_match = NULL; 568 + 565 569 self->kmaps.machine = self; 566 570 self->pid = pid; 567 571 self->root_dir = strdup(root_dir);
+19
tools/perf/util/map.h
··· 18 18 extern const char *map_type__name[MAP__NR_TYPES]; 19 19 20 20 struct dso; 21 + struct ip_callchain; 21 22 struct ref_reloc_sym; 22 23 struct map_groups; 23 24 struct machine; 25 + struct perf_evsel; 24 26 25 27 struct map { 26 28 union { ··· 63 61 struct machine { 64 62 struct rb_node rb_node; 65 63 pid_t pid; 64 + u16 id_hdr_size; 66 65 char *root_dir; 66 + struct rb_root threads; 67 + struct list_head dead_threads; 68 + struct thread *last_match; 67 69 struct list_head user_dsos; 68 70 struct list_head kernel_dsos; 69 71 struct map_groups kmaps; ··· 154 148 void machine__exit(struct machine *self); 155 149 void machine__delete(struct machine *self); 156 150 151 + int machine__resolve_callchain(struct machine *machine, 152 + struct perf_evsel *evsel, struct thread *thread, 153 + struct ip_callchain *chain, 154 + struct symbol **parent); 155 + int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, 156 + u64 addr); 157 + 157 158 /* 158 159 * Default guest kernel is defined by parameter --guestkallsyms 159 160 * and --guestmodules ··· 202 189 const char *name, 203 190 struct map **mapp, 204 191 symbol_filter_t filter); 192 + 193 + 194 + struct thread *machine__findnew_thread(struct machine *machine, pid_t pid); 195 + void machine__remove_thread(struct machine *machine, struct thread *th); 196 + 197 + size_t machine__fprintf(struct machine *machine, FILE *fp); 205 198 206 199 static inline 207 200 struct symbol *machine__find_kernel_symbol(struct machine *self,
+15 -15
tools/perf/util/parse-events.c
··· 25 25 EVT_HANDLED_ALL 26 26 }; 27 27 28 - char debugfs_path[MAXPATHLEN]; 29 - 30 28 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x 31 29 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x 32 30 ··· 38 40 { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, 39 41 { CHW(BRANCH_MISSES), "branch-misses", "" }, 40 42 { CHW(BUS_CYCLES), "bus-cycles", "" }, 43 + { CHW(REF_CPU_CYCLES), "ref-cycles", "" }, 41 44 42 45 { CSW(CPU_CLOCK), "cpu-clock", "" }, 43 46 { CSW(TASK_CLOCK), "task-clock", "" }, ··· 69 70 "bus-cycles", 70 71 "stalled-cycles-frontend", 71 72 "stalled-cycles-backend", 73 + "ref-cycles", 72 74 }; 73 75 74 76 static const char *sw_event_names[PERF_COUNT_SW_MAX] = { ··· 140 140 char evt_path[MAXPATHLEN]; 141 141 int fd; 142 142 143 - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 143 + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, 144 144 sys_dir->d_name, evt_dir->d_name); 145 145 fd = open(evt_path, O_RDONLY); 146 146 if (fd < 0) ··· 171 171 char evt_path[MAXPATHLEN]; 172 172 char dir_path[MAXPATHLEN]; 173 173 174 - if (debugfs_valid_mountpoint(debugfs_path)) 174 + if (debugfs_valid_mountpoint(tracing_events_path)) 175 175 return NULL; 176 176 177 - sys_dir = opendir(debugfs_path); 177 + sys_dir = opendir(tracing_events_path); 178 178 if (!sys_dir) 179 179 return NULL; 180 180 181 181 for_each_subsystem(sys_dir, sys_dirent, sys_next) { 182 182 183 - snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 183 + snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, 184 184 sys_dirent.d_name); 185 185 evt_dir = opendir(dir_path); 186 186 if (!evt_dir) ··· 447 447 u64 id; 448 448 int fd; 449 449 450 - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 450 + snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, 451 451 sys_name, evt_name); 452 452 453 453 fd = open(evt_path, O_RDONLY); ··· 485 485 struct dirent *evt_ent; 486 486 DIR *evt_dir; 487 487 488 - snprintf(evt_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_name); 488 + snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); 489 489 evt_dir = opendir(evt_path); 490 490 491 491 if (!evt_dir) { ··· 528 528 char sys_name[MAX_EVENT_LENGTH]; 529 529 unsigned int sys_length, evt_length; 530 530 531 - if (debugfs_valid_mountpoint(debugfs_path)) 531 + if (debugfs_valid_mountpoint(tracing_events_path)) 532 532 return 0; 533 533 534 534 evt_name = strchr(*strp, ':'); ··· 920 920 char evt_path[MAXPATHLEN]; 921 921 char dir_path[MAXPATHLEN]; 922 922 923 - if (debugfs_valid_mountpoint(debugfs_path)) 923 + if (debugfs_valid_mountpoint(tracing_events_path)) 924 924 return; 925 925 926 - sys_dir = opendir(debugfs_path); 926 + sys_dir = opendir(tracing_events_path); 927 927 if (!sys_dir) 928 928 return; 929 929 ··· 932 932 !strglobmatch(sys_dirent.d_name, subsys_glob)) 933 933 continue; 934 934 935 - snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 935 + snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, 936 936 sys_dirent.d_name); 937 937 evt_dir = opendir(dir_path); 938 938 if (!evt_dir) ··· 964 964 char evt_path[MAXPATHLEN]; 965 965 char dir_path[MAXPATHLEN]; 966 966 967 - if (debugfs_valid_mountpoint(debugfs_path)) 967 + if (debugfs_valid_mountpoint(tracing_events_path)) 968 968 return 0; 969 969 970 - sys_dir = opendir(debugfs_path); 970 + sys_dir = opendir(tracing_events_path); 971 971 if (!sys_dir) 972 972 return 0; 973 973 974 974 for_each_subsystem(sys_dir, sys_dirent, sys_next) { 975 975 976 - snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 976 + snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, 977 977 sys_dirent.d_name); 978 978 evt_dir = opendir(dir_path); 979 979 if (!evt_dir)
-1
tools/perf/util/parse-events.h
··· 39 39 int print_hwcache_events(const char *event_glob); 40 40 extern int is_valid_tracepoint(const char *event_string); 41 41 42 - extern char debugfs_path[]; 43 42 extern int valid_debugfs_mount(const char *debugfs); 44 43 45 44 #endif /* __PERF_PARSE_EVENTS_H */
-1
tools/perf/util/probe-finder.h
··· 5 5 #include "util.h" 6 6 #include "probe-event.h" 7 7 8 - #define MAX_PATH_LEN 256 9 8 #define MAX_PROBE_BUFFER 1024 10 9 #define MAX_PROBES 128 11 10
+69 -6
tools/perf/util/scripting-engines/trace-event-perl.c
··· 27 27 28 28 #include "../../perf.h" 29 29 #include "../util.h" 30 + #include "../thread.h" 31 + #include "../event.h" 30 32 #include "../trace-event.h" 33 + #include "../evsel.h" 31 34 32 35 #include <EXTERN.h> 33 36 #include <perl.h> ··· 248 245 return event; 249 246 } 250 247 251 - static void perl_process_event(union perf_event *pevent __unused, 252 - struct perf_sample *sample, 253 - struct perf_evsel *evsel, 254 - struct perf_session *session __unused, 255 - struct thread *thread) 248 + static void perl_process_tracepoint(union perf_event *pevent __unused, 249 + struct perf_sample *sample, 250 + struct perf_evsel *evsel, 251 + struct machine *machine __unused, 252 + struct thread *thread) 256 253 { 257 254 struct format_field *field; 258 255 static char handler[256]; ··· 267 264 char *comm = thread->comm; 268 265 269 266 dSP; 267 + 268 + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 269 + return; 270 270 271 271 type = trace_parse_common_type(data); 272 272 ··· 336 330 PUTBACK; 337 331 FREETMPS; 338 332 LEAVE; 333 + } 334 + 335 + static void perl_process_event_generic(union perf_event *pevent __unused, 336 + struct perf_sample *sample, 337 + struct perf_evsel *evsel __unused, 338 + struct machine *machine __unused, 339 + struct thread *thread __unused) 340 + { 341 + dSP; 342 + 343 + if (!get_cv("process_event", 0)) 344 + return; 345 + 346 + ENTER; 347 + SAVETMPS; 348 + PUSHMARK(SP); 349 + XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size))); 350 + XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr)))); 351 + XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample)))); 352 + XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size))); 353 + PUTBACK; 354 + call_pv("process_event", G_SCALAR); 355 + SPAGAIN; 356 + PUTBACK; 357 + FREETMPS; 358 + LEAVE; 359 + } 360 + 361 + static void perl_process_event(union perf_event *pevent, 362 + struct perf_sample *sample, 363 + struct perf_evsel *evsel, 364 + struct machine *machine, 365 + struct thread *thread) 366 + { 367 + perl_process_tracepoint(pevent, sample, evsel, machine, thread); 368 + perl_process_event_generic(pevent, sample, evsel, machine, thread); 339 369 } 340 370 341 371 static void run_start_sub(void) ··· 595 553 fprintf(ofp, "sub print_header\n{\n" 596 554 "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" 597 555 "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t " 598 - "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}"); 556 + "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n"); 557 + 558 + fprintf(ofp, 559 + "\n# Packed byte string args of process_event():\n" 560 + "#\n" 561 + "# $event:\tunion perf_event\tutil/event.h\n" 562 + "# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n" 563 + "# $sample:\tstruct perf_sample\tutil/event.h\n" 564 + "# $raw_data:\tperf_sample->raw_data\tutil/event.h\n" 565 + "\n" 566 + "sub process_event\n" 567 + "{\n" 568 + "\tmy ($event, $attr, $sample, $raw_data) = @_;\n" 569 + "\n" 570 + "\tmy @event\t= unpack(\"LSS\", $event);\n" 571 + "\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n" 572 + "\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n" 573 + "\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n" 574 + "\n" 575 + "\tuse Data::Dumper;\n" 576 + "\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n" 577 + "}\n"); 599 578 600 579 fclose(ofp); 601 580
+3 -1
tools/perf/util/scripting-engines/trace-event-python.c
··· 29 29 30 30 #include "../../perf.h" 31 31 #include "../util.h" 32 + #include "../event.h" 33 + #include "../thread.h" 32 34 #include "../trace-event.h" 33 35 34 36 PyMODINIT_FUNC initperf_trace_context(void); ··· 209 207 static void python_process_event(union perf_event *pevent __unused, 210 208 struct perf_sample *sample, 211 209 struct perf_evsel *evsel __unused, 212 - struct perf_session *session __unused, 210 + struct machine *machine __unused, 213 211 struct thread *thread) 214 212 { 215 213 PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
+199 -145
tools/perf/util/session.c
··· 10 10 #include "evlist.h" 11 11 #include "evsel.h" 12 12 #include "session.h" 13 + #include "tool.h" 13 14 #include "sort.h" 14 15 #include "util.h" 15 16 #include "cpumap.h" ··· 79 78 return -1; 80 79 } 81 80 82 - static void perf_session__id_header_size(struct perf_session *session) 83 - { 84 - struct perf_sample *data; 85 - u64 sample_type = session->sample_type; 86 - u16 size = 0; 87 - 88 - if (!session->sample_id_all) 89 - goto out; 90 - 91 - if (sample_type & PERF_SAMPLE_TID) 92 - size += sizeof(data->tid) * 2; 93 - 94 - if (sample_type & PERF_SAMPLE_TIME) 95 - size += sizeof(data->time); 96 - 97 - if (sample_type & PERF_SAMPLE_ID) 98 - size += sizeof(data->id); 99 - 100 - if (sample_type & PERF_SAMPLE_STREAM_ID) 101 - size += sizeof(data->stream_id); 102 - 103 - if (sample_type & PERF_SAMPLE_CPU) 104 - size += sizeof(data->cpu) * 2; 105 - out: 106 - session->id_hdr_size = size; 107 - } 108 - 109 81 void perf_session__update_sample_type(struct perf_session *self) 110 82 { 111 83 self->sample_type = perf_evlist__sample_type(self->evlist); 112 84 self->sample_size = __perf_evsel__sample_size(self->sample_type); 113 85 self->sample_id_all = perf_evlist__sample_id_all(self->evlist); 114 - perf_session__id_header_size(self); 86 + self->id_hdr_size = perf_evlist__id_hdr_size(self->evlist); 87 + self->host_machine.id_hdr_size = self->id_hdr_size; 115 88 } 116 89 117 90 int perf_session__create_kernel_maps(struct perf_session *self) ··· 105 130 106 131 struct perf_session *perf_session__new(const char *filename, int mode, 107 132 bool force, bool repipe, 108 - struct perf_event_ops *ops) 133 + struct perf_tool *tool) 109 134 { 110 - size_t len = filename ? strlen(filename) + 1 : 0; 111 - struct perf_session *self = zalloc(sizeof(*self) + len); 135 + struct perf_session *self; 136 + struct stat st; 137 + size_t len; 138 + 139 + if (!filename || !strlen(filename)) { 140 + if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 141 + filename = "-"; 142 + else 143 + filename = "perf.data"; 144 + } 145 + 146 + len = strlen(filename); 147 + self = zalloc(sizeof(*self) + len); 112 148 113 149 if (self == NULL) 114 150 goto out; 115 151 116 152 memcpy(self->filename, filename, len); 117 - self->threads = RB_ROOT; 118 - INIT_LIST_HEAD(&self->dead_threads); 119 - self->last_match = NULL; 120 153 /* 121 154 * On 64bit we can mmap the data file in one go. No need for tiny mmap 122 155 * slices. On 32bit we use 32MB. ··· 154 171 goto out_delete; 155 172 } 156 173 157 - if (ops && ops->ordering_requires_timestamps && 158 - ops->ordered_samples && !self->sample_id_all) { 174 + if (tool && tool->ordering_requires_timestamps && 175 + tool->ordered_samples && !self->sample_id_all) { 159 176 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); 160 - ops->ordered_samples = false; 177 + tool->ordered_samples = false; 161 178 } 162 179 163 180 out: ··· 167 184 return NULL; 168 185 } 169 186 170 - static void perf_session__delete_dead_threads(struct perf_session *self) 187 + static void machine__delete_dead_threads(struct machine *machine) 171 188 { 172 189 struct thread *n, *t; 173 190 174 - list_for_each_entry_safe(t, n, &self->dead_threads, node) { 191 + list_for_each_entry_safe(t, n, &machine->dead_threads, node) { 175 192 list_del(&t->node); 176 193 thread__delete(t); 177 194 } 178 195 } 179 196 180 - static void perf_session__delete_threads(struct perf_session *self) 197 + static void perf_session__delete_dead_threads(struct perf_session *session) 198 + { 199 + machine__delete_dead_threads(&session->host_machine); 200 + } 201 + 202 + static void machine__delete_threads(struct machine *self) 181 203 { 182 204 struct rb_node *nd = rb_first(&self->threads); 183 205 ··· 195 207 } 196 208 } 197 209 210 + static void perf_session__delete_threads(struct perf_session *session) 211 + { 212 + machine__delete_threads(&session->host_machine); 213 + } 214 + 198 215 void perf_session__delete(struct perf_session *self) 199 216 { 200 217 perf_session__destroy_kernel_maps(self); ··· 210 217 free(self); 211 218 } 212 219 213 - void perf_session__remove_thread(struct perf_session *self, struct thread *th) 220 + void machine__remove_thread(struct machine *self, struct thread *th) 214 221 { 215 222 self->last_match = NULL; 216 223 rb_erase(&th->rb_node, &self->threads); ··· 229 236 return 0; 230 237 } 231 238 232 - int perf_session__resolve_callchain(struct perf_session *self, 233 - struct thread *thread, 234 - struct ip_callchain *chain, 235 - struct symbol **parent) 239 + int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, 240 + struct thread *thread, 241 + struct ip_callchain *chain, 242 + struct symbol **parent) 236 243 { 237 244 u8 cpumode = PERF_RECORD_MISC_USER; 238 245 unsigned int i; 239 246 int err; 240 247 241 - callchain_cursor_reset(&self->callchain_cursor); 248 + callchain_cursor_reset(&evsel->hists.callchain_cursor); 242 249 243 250 for (i = 0; i < chain->nr; i++) { 244 251 u64 ip; ··· 265 272 266 273 al.filtered = false; 267 274 thread__find_addr_location(thread, self, cpumode, 268 - MAP__FUNCTION, thread->pid, ip, &al, NULL); 275 + MAP__FUNCTION, ip, &al, NULL); 269 276 if (al.sym != NULL) { 270 277 if (sort__has_parent && !*parent && 271 278 symbol__match_parent_regex(al.sym)) ··· 274 281 break; 275 282 } 276 283 277 - err = callchain_cursor_append(&self->callchain_cursor, 284 + err = callchain_cursor_append(&evsel->hists.callchain_cursor, 278 285 ip, al.map, al.sym); 279 286 if (err) 280 287 return err; ··· 283 290 return 0; 284 291 } 285 292 286 - static int process_event_synth_stub(union perf_event *event __used, 287 - struct perf_session *session __used) 293 + static int process_event_synth_tracing_data_stub(union perf_event *event __used, 294 + struct perf_session *session __used) 288 295 { 289 296 dump_printf(": unhandled!\n"); 290 297 return 0; 291 298 } 292 299 293 - static int process_event_sample_stub(union perf_event *event __used, 300 + static int process_event_synth_attr_stub(union perf_event *event __used, 301 + struct perf_evlist **pevlist __used) 302 + { 303 + dump_printf(": unhandled!\n"); 304 + return 0; 305 + } 306 + 307 + static int process_event_sample_stub(struct perf_tool *tool __used, 308 + union perf_event *event __used, 294 309 struct perf_sample *sample __used, 295 310 struct perf_evsel *evsel __used, 296 - struct perf_session *session __used) 311 + struct machine *machine __used) 297 312 { 298 313 dump_printf(": unhandled!\n"); 299 314 return 0; 300 315 } 301 316 302 - static int process_event_stub(union perf_event *event __used, 317 + static int process_event_stub(struct perf_tool *tool __used, 318 + union perf_event *event __used, 303 319 struct perf_sample *sample __used, 304 - struct perf_session *session __used) 320 + struct machine *machine __used) 305 321 { 306 322 dump_printf(": unhandled!\n"); 307 323 return 0; 308 324 } 309 325 310 - static int process_finished_round_stub(union perf_event *event __used, 311 - struct perf_session *session __used, 312 - struct perf_event_ops *ops __used) 326 + static int process_finished_round_stub(struct perf_tool *tool __used, 327 + union perf_event *event __used, 328 + struct perf_session *perf_session __used) 313 329 { 314 330 dump_printf(": unhandled!\n"); 315 331 return 0; 316 332 } 317 333 318 - static int process_finished_round(union perf_event *event, 319 - struct perf_session *session, 320 - struct perf_event_ops *ops); 321 - 322 - static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) 334 + static int process_event_type_stub(struct perf_tool *tool __used, 335 + union perf_event *event __used) 323 336 { 324 - if (handler->sample == NULL) 325 - handler->sample = process_event_sample_stub; 326 - if (handler->mmap == NULL) 327 - handler->mmap = process_event_stub; 328 - if (handler->comm == NULL) 329 - handler->comm = process_event_stub; 330 - if (handler->fork == NULL) 331 - handler->fork = process_event_stub; 332 - if (handler->exit == NULL) 333 - handler->exit = process_event_stub; 334 - if (handler->lost == NULL) 335 - handler->lost = perf_event__process_lost; 336 - if (handler->read == NULL) 337 - handler->read = process_event_stub; 338 - if (handler->throttle == NULL) 339 - handler->throttle = process_event_stub; 340 - if (handler->unthrottle == NULL) 341 - handler->unthrottle = process_event_stub; 342 - if (handler->attr == NULL) 343 - handler->attr = process_event_synth_stub; 344 - if (handler->event_type == NULL) 345 - handler->event_type = process_event_synth_stub; 346 - if (handler->tracing_data == NULL) 347 - handler->tracing_data = process_event_synth_stub; 348 - if (handler->build_id == NULL) 349 - handler->build_id = process_event_synth_stub; 350 - if (handler->finished_round == NULL) { 351 - if (handler->ordered_samples) 352 - handler->finished_round = process_finished_round; 337 + dump_printf(": unhandled!\n"); 338 + return 0; 339 + } 340 + 341 + static int process_finished_round(struct perf_tool *tool, 342 + union perf_event *event, 343 + struct perf_session *session); 344 + 345 + static void perf_tool__fill_defaults(struct perf_tool *tool) 346 + { 347 + if (tool->sample == NULL) 348 + tool->sample = process_event_sample_stub; 349 + if (tool->mmap == NULL) 350 + tool->mmap = process_event_stub; 351 + if (tool->comm == NULL) 352 + tool->comm = process_event_stub; 353 + if (tool->fork == NULL) 354 + tool->fork = process_event_stub; 355 + if (tool->exit == NULL) 356 + tool->exit = process_event_stub; 357 + if (tool->lost == NULL) 358 + tool->lost = perf_event__process_lost; 359 + if (tool->read == NULL) 360 + tool->read = process_event_sample_stub; 361 + if (tool->throttle == NULL) 362 + tool->throttle = process_event_stub; 363 + if (tool->unthrottle == NULL) 364 + tool->unthrottle = process_event_stub; 365 + if (tool->attr == NULL) 366 + tool->attr = process_event_synth_attr_stub; 367 + if (tool->event_type == NULL) 368 + tool->event_type = process_event_type_stub; 369 + if (tool->tracing_data == NULL) 370 + tool->tracing_data = process_event_synth_tracing_data_stub; 371 + if (tool->build_id == NULL) 372 + tool->build_id = process_finished_round_stub; 373 + if (tool->finished_round == NULL) { 374 + if (tool->ordered_samples) 375 + tool->finished_round = process_finished_round; 353 376 else 354 - handler->finished_round = process_finished_round_stub; 377 + tool->finished_round = process_finished_round_stub; 355 378 } 356 379 } 357 380 ··· 499 490 static int perf_session_deliver_event(struct perf_session *session, 500 491 union perf_event *event, 501 492 struct perf_sample *sample, 502 - struct perf_event_ops *ops, 493 + struct perf_tool *tool, 503 494 u64 file_offset); 504 495 505 496 static void flush_sample_queue(struct perf_session *s, 506 - struct perf_event_ops *ops) 497 + struct perf_tool *tool) 507 498 { 508 499 struct ordered_samples *os = &s->ordered_samples; 509 500 struct list_head *head = &os->samples; ··· 514 505 unsigned idx = 0, progress_next = os->nr_samples / 16; 515 506 int ret; 516 507 517 - if (!ops->ordered_samples || !limit) 508 + if (!tool->ordered_samples || !limit) 518 509 return; 519 510 520 511 list_for_each_entry_safe(iter, tmp, head, list) { ··· 525 516 if (ret) 526 517 pr_err("Can't parse sample, err = %d\n", ret); 527 518 else 528 - perf_session_deliver_event(s, iter->event, &sample, ops, 519 + perf_session_deliver_event(s, iter->event, &sample, tool, 529 520 iter->file_offset); 530 521 531 522 os->last_flush = iter->timestamp; ··· 587 578 * Flush every events below timestamp 7 588 579 * etc... 589 580 */ 590 - static int process_finished_round(union perf_event *event __used, 591 - struct perf_session *session, 592 - struct perf_event_ops *ops) 581 + static int process_finished_round(struct perf_tool *tool, 582 + union perf_event *event __used, 583 + struct perf_session *session) 593 584 { 594 - flush_sample_queue(session, ops); 585 + flush_sample_queue(session, tool); 595 586 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; 596 587 597 588 return 0; ··· 746 737 callchain__printf(sample); 747 738 } 748 739 740 + static struct machine * 741 + perf_session__find_machine_for_cpumode(struct perf_session *session, 742 + union perf_event *event) 743 + { 744 + const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 745 + 746 + if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) 747 + return perf_session__find_machine(session, event->ip.pid); 748 + 749 + return perf_session__find_host_machine(session); 750 + } 751 + 749 752 static int perf_session_deliver_event(struct perf_session *session, 750 753 union perf_event *event, 751 754 struct perf_sample *sample, 752 - struct perf_event_ops *ops, 755 + struct perf_tool *tool, 753 756 u64 file_offset) 754 757 { 755 758 struct perf_evsel *evsel; 759 + struct machine *machine; 756 760 757 761 dump_event(session, event, file_offset, sample); 758 762 ··· 787 765 hists__inc_nr_events(&evsel->hists, event->header.type); 788 766 } 789 767 768 + machine = perf_session__find_machine_for_cpumode(session, event); 769 + 790 770 switch (event->header.type) { 791 771 case PERF_RECORD_SAMPLE: 792 772 dump_sample(session, event, sample); ··· 796 772 ++session->hists.stats.nr_unknown_id; 797 773 return -1; 798 774 } 799 - return ops->sample(event, sample, evsel, session); 775 + return tool->sample(tool, event, sample, evsel, machine); 800 776 case PERF_RECORD_MMAP: 801 - return ops->mmap(event, sample, session); 777 + return tool->mmap(tool, event, sample, machine); 802 778 case PERF_RECORD_COMM: 803 - return ops->comm(event, sample, session); 779 + return tool->comm(tool, event, sample, machine); 804 780 case PERF_RECORD_FORK: 805 - return ops->fork(event, sample, session); 781 + return tool->fork(tool, event, sample, machine); 806 782 case PERF_RECORD_EXIT: 807 - return ops->exit(event, sample, session); 783 + return tool->exit(tool, event, sample, machine); 808 784 case PERF_RECORD_LOST: 809 - return ops->lost(event, sample, session); 785 + if (tool->lost == perf_event__process_lost) 786 + session->hists.stats.total_lost += event->lost.lost; 787 + return tool->lost(tool, event, sample, machine); 810 788 case PERF_RECORD_READ: 811 - return ops->read(event, sample, session); 789 + return tool->read(tool, event, sample, evsel, machine); 812 790 case PERF_RECORD_THROTTLE: 813 - return ops->throttle(event, sample, session); 791 + return tool->throttle(tool, event, sample, machine); 814 792 case PERF_RECORD_UNTHROTTLE: 815 - return ops->unthrottle(event, sample, session); 793 + return tool->unthrottle(tool, event, sample, machine); 816 794 default: 817 795 ++session->hists.stats.nr_unknown_events; 818 796 return -1; ··· 838 812 } 839 813 840 814 static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, 841 - struct perf_event_ops *ops, u64 file_offset) 815 + struct perf_tool *tool, u64 file_offset) 842 816 { 817 + int err; 818 + 843 819 dump_event(session, event, file_offset, NULL); 844 820 845 821 /* These events are processed right away */ 846 822 switch (event->header.type) { 847 823 case PERF_RECORD_HEADER_ATTR: 848 - return ops->attr(event, session); 824 + err = tool->attr(event, &session->evlist); 825 + if (err == 0) 826 + perf_session__update_sample_type(session); 827 + return err; 849 828 case PERF_RECORD_HEADER_EVENT_TYPE: 850 - return ops->event_type(event, session); 829 + return tool->event_type(tool, event); 851 830 case PERF_RECORD_HEADER_TRACING_DATA: 852 831 /* setup for reading amidst mmap */ 853 832 lseek(session->fd, file_offset, SEEK_SET); 854 - return ops->tracing_data(event, session); 833 + return tool->tracing_data(event, session); 855 834 case PERF_RECORD_HEADER_BUILD_ID: 856 - return ops->build_id(event, session); 835 + return tool->build_id(tool, event, session); 857 836 case PERF_RECORD_FINISHED_ROUND: 858 - return ops->finished_round(event, session, ops); 837 + return tool->finished_round(tool, event, session); 859 838 default: 860 839 return -EINVAL; 861 840 } ··· 868 837 869 838 static int perf_session__process_event(struct perf_session *session, 870 839 union perf_event *event, 871 - struct perf_event_ops *ops, 840 + struct perf_tool *tool, 872 841 u64 file_offset) 873 842 { 874 843 struct perf_sample sample; ··· 884 853 hists__inc_nr_events(&session->hists, event->header.type); 885 854 886 855 if (event->header.type >= PERF_RECORD_USER_TYPE_START) 887 - return perf_session__process_user_event(session, event, ops, file_offset); 856 + return perf_session__process_user_event(session, event, tool, file_offset); 888 857 889 858 /* 890 859 * For all kernel events we get the sample data ··· 897 866 if (perf_session__preprocess_sample(session, event, &sample)) 898 867 return 0; 899 868 900 - if (ops->ordered_samples) { 869 + if (tool->ordered_samples) { 901 870 ret = perf_session_queue_event(session, event, &sample, 902 871 file_offset); 903 872 if (ret != -ETIME) 904 873 return ret; 905 874 } 906 875 907 - return perf_session_deliver_event(session, event, &sample, ops, 876 + return perf_session_deliver_event(session, event, &sample, tool, 908 877 file_offset); 909 878 } 910 879 ··· 913 882 self->type = bswap_32(self->type); 914 883 self->misc = bswap_16(self->misc); 915 884 self->size = bswap_16(self->size); 885 + } 886 + 887 + struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) 888 + { 889 + return machine__findnew_thread(&session->host_machine, pid); 916 890 } 917 891 918 892 static struct thread *perf_session__register_idle_thread(struct perf_session *self) ··· 933 897 } 934 898 935 899 static void perf_session__warn_about_errors(const struct perf_session *session, 936 - const struct perf_event_ops *ops) 900 + const struct perf_tool *tool) 937 901 { 938 - if (ops->lost == perf_event__process_lost && 902 + if (tool->lost == perf_event__process_lost && 939 903 session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) { 940 904 ui__warning("Processed %d events and lost %d chunks!\n\n" 941 905 "Check IO/CPU overload!\n\n", ··· 970 934 volatile int session_done; 971 935 972 936 static int __perf_session__process_pipe_events(struct perf_session *self, 973 - struct perf_event_ops *ops) 937 + struct perf_tool *tool) 974 938 { 975 939 union perf_event event; 976 940 uint32_t size; ··· 979 943 int err; 980 944 void *p; 981 945 982 - perf_event_ops__fill_defaults(ops); 946 + perf_tool__fill_defaults(tool); 983 947 984 948 head = 0; 985 949 more: ··· 1015 979 } 1016 980 } 1017 981 1018 - if (size == 0 || 1019 - (skip = perf_session__process_event(self, &event, ops, head)) < 0) { 982 + if ((skip = perf_session__process_event(self, &event, tool, head)) < 0) { 1020 983 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", 1021 984 head, event.header.size, event.header.type); 1022 985 /* ··· 1038 1003 done: 1039 1004 err = 0; 1040 1005 out_err: 1041 - perf_session__warn_about_errors(self, ops); 1006 + perf_session__warn_about_errors(self, tool); 1042 1007 perf_session_free_sample_buffers(self); 1043 1008 return err; 1044 1009 } ··· 1069 1034 1070 1035 int __perf_session__process_events(struct perf_session *session, 1071 1036 u64 data_offset, u64 data_size, 1072 - u64 file_size, struct perf_event_ops *ops) 1037 + u64 file_size, struct perf_tool *tool) 1073 1038 { 1074 1039 u64 head, page_offset, file_offset, file_pos, progress_next; 1075 1040 int err, mmap_prot, mmap_flags, map_idx = 0; ··· 1078 1043 union perf_event *event; 1079 1044 uint32_t size; 1080 1045 1081 - perf_event_ops__fill_defaults(ops); 1046 + perf_tool__fill_defaults(tool); 1082 1047 1083 1048 page_size = sysconf(_SC_PAGESIZE); 1084 1049 ··· 1133 1098 size = event->header.size; 1134 1099 1135 1100 if (size == 0 || 1136 - perf_session__process_event(session, event, ops, file_pos) < 0) { 1101 + perf_session__process_event(session, event, tool, file_pos) < 0) { 1137 1102 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", 1138 1103 file_offset + head, event->header.size, 1139 1104 event->header.type); ··· 1162 1127 err = 0; 1163 1128 /* do the final flush for ordered samples */ 1164 1129 session->ordered_samples.next_flush = ULLONG_MAX; 1165 - flush_sample_queue(session, ops); 1130 + flush_sample_queue(session, tool); 1166 1131 out_err: 1167 - perf_session__warn_about_errors(session, ops); 1132 + perf_session__warn_about_errors(session, tool); 1168 1133 perf_session_free_sample_buffers(session); 1169 1134 return err; 1170 1135 } 1171 1136 1172 1137 int perf_session__process_events(struct perf_session *self, 1173 - struct perf_event_ops *ops) 1138 + struct perf_tool *tool) 1174 1139 { 1175 1140 int err; 1176 1141 ··· 1181 1146 err = __perf_session__process_events(self, 1182 1147 self->header.data_offset, 1183 1148 self->header.data_size, 1184 - self->size, ops); 1149 + self->size, tool); 1185 1150 else 1186 - err = __perf_session__process_pipe_events(self, ops); 1151 + err = __perf_session__process_pipe_events(self, tool); 1187 1152 1188 1153 return err; 1189 1154 } ··· 1198 1163 return true; 1199 1164 } 1200 1165 1201 - int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps, 1202 - const char *symbol_name, 1203 - u64 addr) 1166 + int maps__set_kallsyms_ref_reloc_sym(struct map **maps, 1167 + const char *symbol_name, u64 addr) 1204 1168 { 1205 1169 char *bracket; 1206 1170 enum map_type i; ··· 1258 1224 return ret; 1259 1225 } 1260 1226 1227 + size_t perf_session__fprintf(struct perf_session *session, FILE *fp) 1228 + { 1229 + /* 1230 + * FIXME: Here we have to actually print all the machines in this 1231 + * session, not just the host... 1232 + */ 1233 + return machine__fprintf(&session->host_machine, fp); 1234 + } 1235 + 1236 + void perf_session__remove_thread(struct perf_session *session, 1237 + struct thread *th) 1238 + { 1239 + /* 1240 + * FIXME: This one makes no sense, we need to remove the thread from 1241 + * the machine it belongs to, perf_session can have many machines, so 1242 + * doing it always on ->host_machine is wrong. Fix when auditing all 1243 + * the 'perf kvm' code. 1244 + */ 1245 + machine__remove_thread(&session->host_machine, th); 1246 + } 1247 + 1261 1248 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 1262 1249 unsigned int type) 1263 1250 { ··· 1291 1236 return NULL; 1292 1237 } 1293 1238 1294 - void perf_session__print_ip(union perf_event *event, 1295 - struct perf_sample *sample, 1296 - struct perf_session *session, 1297 - int print_sym, int print_dso) 1239 + void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 1240 + struct machine *machine, struct perf_evsel *evsel, 1241 + int print_sym, int print_dso) 1298 1242 { 1299 1243 struct addr_location al; 1300 1244 const char *symname, *dsoname; 1301 - struct callchain_cursor *cursor = &session->callchain_cursor; 1245 + struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; 1302 1246 struct callchain_cursor_node *node; 1303 1247 1304 - if (perf_event__preprocess_sample(event, session, &al, sample, 1248 + if (perf_event__preprocess_sample(event, machine, &al, sample, 1305 1249 NULL) < 0) { 1306 1250 error("problem processing %d event, skipping it.\n", 1307 1251 event->header.type); ··· 1309 1255 1310 1256 if (symbol_conf.use_callchain && sample->callchain) { 1311 1257 1312 - if (perf_session__resolve_callchain(session, al.thread, 1258 + if (machine__resolve_callchain(machine, evsel, al.thread, 1313 1259 sample->callchain, NULL) != 0) { 1314 1260 if (verbose) 1315 1261 error("Failed to resolve callchain. Skipping\n");
+23 -49
tools/perf/util/session.h
··· 30 30 struct perf_header header; 31 31 unsigned long size; 32 32 unsigned long mmap_window; 33 - struct rb_root threads; 34 - struct list_head dead_threads; 35 - struct thread *last_match; 36 33 struct machine host_machine; 37 34 struct rb_root machines; 38 35 struct perf_evlist *evlist; ··· 50 53 int cwdlen; 51 54 char *cwd; 52 55 struct ordered_samples ordered_samples; 53 - struct callchain_cursor callchain_cursor; 54 - char filename[0]; 56 + char filename[1]; 55 57 }; 56 58 57 - struct perf_evsel; 58 - struct perf_event_ops; 59 - 60 - typedef int (*event_sample)(union perf_event *event, struct perf_sample *sample, 61 - struct perf_evsel *evsel, struct perf_session *session); 62 - typedef int (*event_op)(union perf_event *self, struct perf_sample *sample, 63 - struct perf_session *session); 64 - typedef int (*event_synth_op)(union perf_event *self, 65 - struct perf_session *session); 66 - typedef int (*event_op2)(union perf_event *self, struct perf_session *session, 67 - struct perf_event_ops *ops); 68 - 69 - struct perf_event_ops { 70 - event_sample sample; 71 - event_op mmap, 72 - comm, 73 - fork, 74 - exit, 75 - lost, 76 - read, 77 - throttle, 78 - unthrottle; 79 - event_synth_op attr, 80 - event_type, 81 - tracing_data, 82 - build_id; 83 - event_op2 finished_round; 84 - bool ordered_samples; 85 - bool ordering_requires_timestamps; 86 - }; 59 + struct perf_tool; 87 60 88 61 struct perf_session *perf_session__new(const char *filename, int mode, 89 62 bool force, bool repipe, 90 - struct perf_event_ops *ops); 63 + struct perf_tool *tool); 91 64 void perf_session__delete(struct perf_session *self); 92 65 93 66 void perf_event_header__bswap(struct perf_event_header *self); 94 67 95 68 int __perf_session__process_events(struct perf_session *self, 96 69 u64 data_offset, u64 data_size, u64 size, 97 - struct perf_event_ops *ops); 70 + struct perf_tool *tool); 98 71 int perf_session__process_events(struct perf_session *self, 99 - struct perf_event_ops *event_ops); 72 + struct perf_tool *tool); 100 73 101 - int perf_session__resolve_callchain(struct perf_session *self, 74 + int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel, 102 75 struct thread *thread, 103 76 struct ip_callchain *chain, 104 77 struct symbol **parent); 105 78 106 79 bool perf_session__has_traces(struct perf_session *self, const char *msg); 107 - 108 - int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps, 109 - const char *symbol_name, 110 - u64 addr); 111 80 112 81 void mem_bswap_64(void *src, int byte_size); 113 82 void perf_event__attr_swap(struct perf_event_attr *attr); ··· 107 144 108 145 static inline 109 146 void perf_session__process_machines(struct perf_session *self, 147 + struct perf_tool *tool, 110 148 machine__process_t process) 111 149 { 112 - process(&self->host_machine, self); 113 - return machines__process(&self->machines, process, self); 150 + process(&self->host_machine, tool); 151 + return machines__process(&self->machines, process, tool); 114 152 } 153 + 154 + struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 155 + size_t perf_session__fprintf(struct perf_session *self, FILE *fp); 115 156 116 157 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); 117 158 ··· 134 167 session->header.needs_swap); 135 168 } 136 169 170 + static inline int perf_session__synthesize_sample(struct perf_session *session, 171 + union perf_event *event, 172 + const struct perf_sample *sample) 173 + { 174 + return perf_event__synthesize_sample(event, session->sample_type, 175 + sample, session->header.needs_swap); 176 + } 177 + 137 178 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 138 179 unsigned int type); 139 180 140 - void perf_session__print_ip(union perf_event *event, 141 - struct perf_sample *sample, 142 - struct perf_session *session, 143 - int print_sym, int print_dso); 181 + void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 182 + struct machine *machine, struct perf_evsel *evsel, 183 + int print_sym, int print_dso); 144 184 145 185 int perf_session__cpu_bitmap(struct perf_session *session, 146 186 const char *cpu_list, unsigned long *cpu_bitmap);
+2 -1
tools/perf/util/setup.py
··· 27 27 perf = Extension('perf', 28 28 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', 29 29 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', 30 - 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'], 30 + 'util/util.c', 'util/xyarray.c', 'util/cgroup.c', 31 + 'util/debugfs.c'], 31 32 include_dirs = ['util/include'], 32 33 extra_compile_args = cflags, 33 34 )
+3 -8
tools/perf/util/symbol.c
··· 1757 1757 struct stat st; 1758 1758 1759 1759 /*sshfs might return bad dent->d_type, so we have to stat*/ 1760 - sprintf(path, "%s/%s", dir_name, dent->d_name); 1760 + snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); 1761 1761 if (stat(path, &st)) 1762 1762 continue; 1763 1763 ··· 1766 1766 !strcmp(dent->d_name, "..")) 1767 1767 continue; 1768 1768 1769 - snprintf(path, sizeof(path), "%s/%s", 1770 - dir_name, dent->d_name); 1771 1769 ret = map_groups__set_modules_path_dir(mg, path); 1772 1770 if (ret < 0) 1773 1771 goto out; ··· 1785 1787 dso_name); 1786 1788 if (map == NULL) 1787 1789 continue; 1788 - 1789 - snprintf(path, sizeof(path), "%s/%s", 1790 - dir_name, dent->d_name); 1791 1790 1792 1791 long_name = strdup(path); 1793 1792 if (long_name == NULL) { ··· 2604 2609 symbol_conf.initialized = true; 2605 2610 return 0; 2606 2611 2607 - out_free_dso_list: 2608 - strlist__delete(symbol_conf.dso_list); 2609 2612 out_free_comm_list: 2610 2613 strlist__delete(symbol_conf.comm_list); 2614 + out_free_dso_list: 2615 + strlist__delete(symbol_conf.dso_list); 2611 2616 return -1; 2612 2617 } 2613 2618
+1
tools/perf/util/symbol.h
··· 68 68 69 69 struct symbol_conf { 70 70 unsigned short priv_size; 71 + unsigned short nr_events; 71 72 bool try_vmlinux_path, 72 73 use_modules, 73 74 sort_by_name,
+3 -3
tools/perf/util/thread.c
··· 61 61 map_groups__fprintf(&self->mg, verbose, fp); 62 62 } 63 63 64 - struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) 64 + struct thread *machine__findnew_thread(struct machine *self, pid_t pid) 65 65 { 66 66 struct rb_node **p = &self->threads.rb_node; 67 67 struct rb_node *parent = NULL; ··· 125 125 return 0; 126 126 } 127 127 128 - size_t perf_session__fprintf(struct perf_session *self, FILE *fp) 128 + size_t machine__fprintf(struct machine *machine, FILE *fp) 129 129 { 130 130 size_t ret = 0; 131 131 struct rb_node *nd; 132 132 133 - for (nd = rb_first(&self->threads); nd; nd = rb_next(nd)) { 133 + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { 134 134 struct thread *pos = rb_entry(nd, struct thread, rb_node); 135 135 136 136 ret += thread__fprintf(pos, fp);
+5 -9
tools/perf/util/thread.h
··· 18 18 int comm_len; 19 19 }; 20 20 21 - struct perf_session; 21 + struct machine; 22 22 23 23 void thread__delete(struct thread *self); 24 24 25 25 int thread__set_comm(struct thread *self, const char *comm); 26 26 int thread__comm_len(struct thread *self); 27 - struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 28 27 void thread__insert_map(struct thread *self, struct map *map); 29 28 int thread__fork(struct thread *self, struct thread *parent); 30 - size_t perf_session__fprintf(struct perf_session *self, FILE *fp); 31 29 32 30 static inline struct map *thread__find_map(struct thread *self, 33 31 enum map_type type, u64 addr) ··· 33 35 return self ? map_groups__find(&self->mg, type, addr) : NULL; 34 36 } 35 37 36 - void thread__find_addr_map(struct thread *self, 37 - struct perf_session *session, u8 cpumode, 38 - enum map_type type, pid_t pid, u64 addr, 38 + void thread__find_addr_map(struct thread *thread, struct machine *machine, 39 + u8 cpumode, enum map_type type, u64 addr, 39 40 struct addr_location *al); 40 41 41 - void thread__find_addr_location(struct thread *self, 42 - struct perf_session *session, u8 cpumode, 43 - enum map_type type, pid_t pid, u64 addr, 42 + void thread__find_addr_location(struct thread *thread, struct machine *machine, 43 + u8 cpumode, enum map_type type, u64 addr, 44 44 struct addr_location *al, 45 45 symbol_filter_t filter); 46 46 #endif /* __PERF_THREAD_H */
+50
tools/perf/util/tool.h
··· 1 + #ifndef __PERF_TOOL_H 2 + #define __PERF_TOOL_H 3 + 4 + #include <stdbool.h> 5 + 6 + struct perf_session; 7 + union perf_event; 8 + struct perf_evlist; 9 + struct perf_evsel; 10 + struct perf_sample; 11 + struct perf_tool; 12 + struct machine; 13 + 14 + typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, 15 + struct perf_sample *sample, 16 + struct perf_evsel *evsel, struct machine *machine); 17 + 18 + typedef int (*event_op)(struct perf_tool *tool, union perf_event *event, 19 + struct perf_sample *sample, struct machine *machine); 20 + 21 + typedef int (*event_attr_op)(union perf_event *event, 22 + struct perf_evlist **pevlist); 23 + typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event); 24 + 25 + typedef int (*event_synth_op)(union perf_event *event, 26 + struct perf_session *session); 27 + 28 + typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, 29 + struct perf_session *session); 30 + 31 + struct perf_tool { 32 + event_sample sample, 33 + read; 34 + event_op mmap, 35 + comm, 36 + fork, 37 + exit, 38 + lost, 39 + throttle, 40 + unthrottle; 41 + event_attr_op attr; 42 + event_synth_op tracing_data; 43 + event_simple_op event_type; 44 + event_op2 finished_round, 45 + build_id; 46 + bool ordered_samples; 47 + bool ordering_requires_timestamps; 48 + }; 49 + 50 + #endif /* __PERF_TOOL_H */
+19 -1
tools/perf/util/top.h
··· 1 1 #ifndef __PERF_TOP_H 2 2 #define __PERF_TOP_H 1 3 3 4 + #include "tool.h" 4 5 #include "types.h" 5 - #include "../perf.h" 6 6 #include <stddef.h> 7 + #include <stdbool.h> 7 8 8 9 struct perf_evlist; 9 10 struct perf_evsel; 10 11 struct perf_session; 11 12 12 13 struct perf_top { 14 + struct perf_tool tool; 13 15 struct perf_evlist *evlist; 14 16 /* 15 17 * Symbols will be added here in perf_event__process_sample and will ··· 25 23 int freq; 26 24 pid_t target_pid, target_tid; 27 25 bool hide_kernel_symbols, hide_user_symbols, zero; 26 + bool system_wide; 27 + bool use_tui, use_stdio; 28 + bool sort_has_symbols; 29 + bool dont_use_callchains; 30 + bool kptr_restrict_warned; 31 + bool vmlinux_warned; 32 + bool inherit; 33 + bool group; 34 + bool sample_id_all_avail; 35 + bool dump_symtab; 28 36 const char *cpu_list; 29 37 struct hist_entry *sym_filter_entry; 30 38 struct perf_evsel *sym_evsel; 31 39 struct perf_session *session; 40 + struct winsize winsize; 41 + unsigned int mmap_pages; 42 + int default_interval; 43 + int realtime_prio; 44 + int sym_pcnt_filter; 45 + const char *sym_filter; 32 46 }; 33 47 34 48 size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+2 -26
tools/perf/util/trace-event-info.c
··· 18 18 * 19 19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 20 20 */ 21 - #define _GNU_SOURCE 21 + #include <ctype.h> 22 + #include "util.h" 22 23 #include <dirent.h> 23 24 #include <mntent.h> 24 25 #include <stdio.h> ··· 32 31 #include <pthread.h> 33 32 #include <fcntl.h> 34 33 #include <unistd.h> 35 - #include <ctype.h> 36 34 #include <errno.h> 37 35 #include <stdbool.h> 38 36 #include <linux/list.h> ··· 43 43 #include "evsel.h" 44 44 45 45 #define VERSION "0.5" 46 - 47 - #define _STR(x) #x 48 - #define STR(x) _STR(x) 49 - #define MAX_PATH 256 50 46 51 47 #define TRACE_CTRL "tracing_on" 52 48 #define TRACE "trace" ··· 68 72 char *name; 69 73 }; 70 74 71 - 72 - 73 - static void die(const char *fmt, ...) 74 - { 75 - va_list ap; 76 - int ret = errno; 77 - 78 - if (errno) 79 - perror("perf"); 80 - else 81 - ret = -1; 82 - 83 - va_start(ap, fmt); 84 - fprintf(stderr, " "); 85 - vfprintf(stderr, fmt, ap); 86 - va_end(ap); 87 - 88 - fprintf(stderr, "\n"); 89 - exit(ret); 90 - } 91 75 92 76 void *malloc_or_die(unsigned int size) 93 77 {
+1 -1
tools/perf/util/trace-event-scripting.c
··· 39 39 static void process_event_unsupported(union perf_event *event __unused, 40 40 struct perf_sample *sample __unused, 41 41 struct perf_evsel *evsel __unused, 42 - struct perf_session *session __unused, 42 + struct machine *machine __unused, 43 43 struct thread *thread __unused) 44 44 { 45 45 }
+6 -2
tools/perf/util/trace-event.h
··· 3 3 4 4 #include <stdbool.h> 5 5 #include "parse-events.h" 6 - #include "session.h" 6 + 7 + struct machine; 8 + struct perf_sample; 9 + union perf_event; 10 + struct thread; 7 11 8 12 #define __unused __attribute__((unused)) 9 13 ··· 296 292 void (*process_event) (union perf_event *event, 297 293 struct perf_sample *sample, 298 294 struct perf_evsel *evsel, 299 - struct perf_session *session, 295 + struct machine *machine, 300 296 struct thread *thread); 301 297 int (*generate_script) (const char *outfile); 302 298 };
+7 -9
tools/perf/util/ui/browsers/annotate.c
··· 224 224 } 225 225 226 226 static int annotate_browser__run(struct annotate_browser *self, int evidx, 227 - int nr_events, void(*timer)(void *arg), 227 + void(*timer)(void *arg), 228 228 void *arg, int delay_secs) 229 229 { 230 230 struct rb_node *nd = NULL; ··· 328 328 notes = symbol__annotation(target); 329 329 pthread_mutex_lock(&notes->lock); 330 330 331 - if (notes->src == NULL && 332 - symbol__alloc_hist(target, nr_events) < 0) { 331 + if (notes->src == NULL && symbol__alloc_hist(target) < 0) { 333 332 pthread_mutex_unlock(&notes->lock); 334 333 ui__warning("Not enough memory for annotating '%s' symbol!\n", 335 334 target->name); ··· 336 337 } 337 338 338 339 pthread_mutex_unlock(&notes->lock); 339 - symbol__tui_annotate(target, ms->map, evidx, nr_events, 340 + symbol__tui_annotate(target, ms->map, evidx, 340 341 timer, arg, delay_secs); 341 342 } 342 343 continue; ··· 357 358 return key; 358 359 } 359 360 360 - int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events, 361 + int hist_entry__tui_annotate(struct hist_entry *he, int evidx, 361 362 void(*timer)(void *arg), void *arg, int delay_secs) 362 363 { 363 - return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, nr_events, 364 + return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 364 365 timer, arg, delay_secs); 365 366 } 366 367 367 368 int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, 368 - int nr_events, void(*timer)(void *arg), void *arg, 369 + void(*timer)(void *arg), void *arg, 369 370 int delay_secs) 370 371 { 371 372 struct objdump_line *pos, *n; ··· 418 419 browser.b.nr_entries = browser.nr_entries; 419 420 browser.b.entries = &notes->src->source, 420 421 browser.b.width += 18; /* Percentage */ 421 - ret = annotate_browser__run(&browser, evidx, nr_events, 422 - timer, arg, delay_secs); 422 + ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs); 423 423 list_for_each_entry_safe(pos, n, &notes->src->source, node) { 424 424 list_del(&pos->node); 425 425 objdump_line__free(pos);
+1 -1
tools/perf/util/ui/browsers/hists.c
··· 1020 1020 * Don't let this be freed, say, by hists__decay_entry. 1021 1021 */ 1022 1022 he->used = true; 1023 - err = hist_entry__tui_annotate(he, evsel->idx, nr_events, 1023 + err = hist_entry__tui_annotate(he, evsel->idx, 1024 1024 timer, arg, delay_secs); 1025 1025 he->used = false; 1026 1026 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
+3
tools/perf/util/ui/progress.c
··· 14 14 if (use_browser <= 0) 15 15 return; 16 16 17 + if (total == 0) 18 + return; 19 + 17 20 ui__refresh_dimensions(true); 18 21 pthread_mutex_lock(&ui__lock); 19 22 y = SLtt_Screen_Rows / 2 - 2;
+4 -1
tools/perf/util/usage.c
··· 1 1 /* 2 - * GIT - The information manager from hell 2 + * usage.c 3 + * 4 + * Various reporting routines. 5 + * Originally copied from GIT source. 3 6 * 4 7 * Copyright (C) Linus Torvalds, 2005 5 8 */
+11
tools/perf/util/util.h
··· 245 245 #define _STR(x) #x 246 246 #define STR(x) _STR(x) 247 247 248 + /* 249 + * Determine whether some value is a power of two, where zero is 250 + * *not* considered a power of two. 251 + */ 252 + 253 + static inline __attribute__((const)) 254 + bool is_power_of_2(unsigned long n) 255 + { 256 + return (n != 0 && ((n & (n - 1)) == 0)); 257 + } 258 + 248 259 #endif
+1
tools/perf/util/values.c
··· 32 32 33 33 for (i = 0; i < values->threads; i++) 34 34 free(values->value[i]); 35 + free(values->value); 35 36 free(values->pid); 36 37 free(values->tid); 37 38 free(values->counterrawid);