ia64: Remove perfmon · tjh.dev/kernel@ecf5b72

-9

arch/ia64/Kconfig

··· 362 362 config IA64_MCA_RECOVERY 363 363 tristate "MCA recovery from errors other than TLB." 364 364 365 - config PERFMON 366 - bool "Performance monitor support" 367 - depends on BROKEN 368 - help 369 - Selects whether support for the IA-64 performance monitor hardware 370 - is included in the kernel. This makes some kernel data-structures a 371 - little bigger and slows down execution a bit, but it is generally 372 - a good idea to turn this on. If you're unsure, say Y. 373 - 374 365 config IA64_PALINFO 375 366 tristate "/proc/pal support" 376 367 help

-1

arch/ia64/configs/bigsur_defconfig

··· 11 11 CONFIG_NR_CPUS=2 12 12 CONFIG_PREEMPT=y 13 13 # CONFIG_VIRTUAL_MEM_MAP is not set 14 - CONFIG_PERFMON=y 15 14 CONFIG_IA64_PALINFO=y 16 15 CONFIG_EFI_VARS=y 17 16 CONFIG_BINFMT_MISC=m

-1

arch/ia64/configs/generic_defconfig

··· 18 18 CONFIG_SMP=y 19 19 CONFIG_HOTPLUG_CPU=y 20 20 CONFIG_IA64_MCA_RECOVERY=y 21 - CONFIG_PERFMON=y 22 21 CONFIG_IA64_PALINFO=y 23 22 CONFIG_KEXEC=y 24 23 CONFIG_CRASH_DUMP=y

-1

arch/ia64/configs/gensparse_defconfig

··· 17 17 CONFIG_HOTPLUG_CPU=y 18 18 CONFIG_SPARSEMEM_MANUAL=y 19 19 CONFIG_IA64_MCA_RECOVERY=y 20 - CONFIG_PERFMON=y 21 20 CONFIG_IA64_PALINFO=y 22 21 CONFIG_EFI_VARS=y 23 22 CONFIG_BINFMT_MISC=m

-1

arch/ia64/configs/tiger_defconfig

··· 21 21 CONFIG_PERMIT_BSP_REMOVE=y 22 22 CONFIG_FORCE_CPEI_RETARGET=y 23 23 CONFIG_IA64_MCA_RECOVERY=y 24 - CONFIG_PERFMON=y 25 24 CONFIG_IA64_PALINFO=y 26 25 CONFIG_KEXEC=y 27 26 CONFIG_EFI_VARS=y

-1

arch/ia64/configs/zx1_defconfig

··· 10 10 CONFIG_HOTPLUG_CPU=y 11 11 CONFIG_FLATMEM_MANUAL=y 12 12 CONFIG_IA64_MCA_RECOVERY=y 13 - CONFIG_PERFMON=y 14 13 CONFIG_IA64_PALINFO=y 15 14 CONFIG_CRASH_DUMP=y 16 15 CONFIG_EFI_VARS=y

-10

arch/ia64/include/asm/processor.h

··· 280 280 __u64 map_base; /* base address for get_unmapped_area() */ 281 281 __u64 rbs_bot; /* the base address for the RBS */ 282 282 int last_fph_cpu; /* CPU that may hold the contents of f32-f127 */ 283 - 284 - #ifdef CONFIG_PERFMON 285 - void *pfm_context; /* pointer to detailed PMU context */ 286 - unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */ 287 - # define INIT_THREAD_PM .pfm_context = NULL, \ 288 - .pfm_needs_checking = 0UL, 289 - #else 290 - # define INIT_THREAD_PM 291 - #endif 292 283 unsigned long dbr[IA64_NUM_DBG_REGS]; 293 284 unsigned long ibr[IA64_NUM_DBG_REGS]; 294 285 struct ia64_fpreg fph[96]; /* saved/loaded on demand */ ··· 292 301 .map_base = DEFAULT_MAP_BASE, \ 293 302 .rbs_bot = STACK_TOP - DEFAULT_USER_STACK_SIZE, \ 294 303 .last_fph_cpu = -1, \ 295 - INIT_THREAD_PM \ 296 304 .dbr = {0, }, \ 297 305 .ibr = {0, }, \ 298 306 .fph = {{{{0}}}, } \

+1 -9

arch/ia64/include/asm/switch_to.h

··· 31 31 extern void ia64_save_extra (struct task_struct *task); 32 32 extern void ia64_load_extra (struct task_struct *task); 33 33 34 - #ifdef CONFIG_PERFMON 35 - DECLARE_PER_CPU(unsigned long, pfm_syst_info); 36 - # define PERFMON_IS_SYSWIDE() (__this_cpu_read(pfm_syst_info) & 0x1) 37 - #else 38 - # define PERFMON_IS_SYSWIDE() (0) 39 - #endif 40 - 41 34 #define IA64_HAS_EXTRA_STATE(t) \ 42 - ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \ 43 - || PERFMON_IS_SYSWIDE()) 35 + ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)) 44 36 45 37 #define __switch_to(prev,next,last) do { \ 46 38 if (IA64_HAS_EXTRA_STATE(prev)) \

+1 -2

arch/ia64/kernel/Makefile

··· 10 10 extra-y := head.o vmlinux.lds 11 11 12 12 obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ 13 - irq_lsapic.o ivt.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ 13 + irq_lsapic.o ivt.o pal.o patch.o process.o ptrace.o sal.o \ 14 14 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ 15 15 unwind.o mca.o mca_asm.o topology.o dma-mapping.o iosapic.o acpi.o \ 16 16 acpi-ext.o ··· 21 21 obj-$(CONFIG_MODULES) += module.o 22 22 obj-$(CONFIG_SMP) += smp.o smpboot.o 23 23 obj-$(CONFIG_NUMA) += numa.o 24 - obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o 25 24 obj-$(CONFIG_IA64_CYCLONE) += cyclone.o 26 25 obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o 27 26 obj-$(CONFIG_KPROBES) += kprobes.o

-7

arch/ia64/kernel/irq_ia64.c

··· 40 40 #include <asm/hw_irq.h> 41 41 #include <asm/tlbflush.h> 42 42 43 - #ifdef CONFIG_PERFMON 44 - # include <asm/perfmon.h> 45 - #endif 46 - 47 43 #define IRQ_DEBUG 0 48 44 49 45 #define IRQ_VECTOR_UNASSIGNED (0) ··· 622 626 smp_irq_move_cleanup_interrupt, 0, 623 627 "irq_move"); 624 628 } 625 - #endif 626 - #ifdef CONFIG_PERFMON 627 - pfm_init_percpu(); 628 629 #endif 629 630 } 630 631

-6703

arch/ia64/kernel/perfmon.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * This file implements the perfmon-2 subsystem which is used 4 - * to program the IA-64 Performance Monitoring Unit (PMU). 5 - * 6 - * The initial version of perfmon.c was written by 7 - * Ganesh Venkitachalam, IBM Corp. 8 - * 9 - * Then it was modified for perfmon-1.x by Stephane Eranian and 10 - * David Mosberger, Hewlett Packard Co. 11 - * 12 - * Version Perfmon-2.x is a rewrite of perfmon-1.x 13 - * by Stephane Eranian, Hewlett Packard Co. 14 - * 15 - * Copyright (C) 1999-2005 Hewlett Packard Co 16 - * Stephane Eranian <eranian@hpl.hp.com> 17 - * David Mosberger-Tang <davidm@hpl.hp.com> 18 - * 19 - * More information about perfmon available at: 20 - * http://www.hpl.hp.com/research/linux/perfmon 21 - */ 22 - 23 - #include <linux/module.h> 24 - #include <linux/kernel.h> 25 - #include <linux/sched.h> 26 - #include <linux/sched/task.h> 27 - #include <linux/sched/task_stack.h> 28 - #include <linux/interrupt.h> 29 - #include <linux/proc_fs.h> 30 - #include <linux/seq_file.h> 31 - #include <linux/init.h> 32 - #include <linux/vmalloc.h> 33 - #include <linux/mm.h> 34 - #include <linux/sysctl.h> 35 - #include <linux/list.h> 36 - #include <linux/file.h> 37 - #include <linux/poll.h> 38 - #include <linux/vfs.h> 39 - #include <linux/smp.h> 40 - #include <linux/pagemap.h> 41 - #include <linux/mount.h> 42 - #include <linux/pseudo_fs.h> 43 - #include <linux/bitops.h> 44 - #include <linux/capability.h> 45 - #include <linux/rcupdate.h> 46 - #include <linux/completion.h> 47 - #include <linux/tracehook.h> 48 - #include <linux/slab.h> 49 - #include <linux/cpu.h> 50 - 51 - #include <asm/errno.h> 52 - #include <asm/intrinsics.h> 53 - #include <asm/page.h> 54 - #include <asm/perfmon.h> 55 - #include <asm/processor.h> 56 - #include <asm/signal.h> 57 - #include <linux/uaccess.h> 58 - #include <asm/delay.h> 59 - 60 - #include "irq.h" 61 - 62 - #ifdef CONFIG_PERFMON 63 - /* 64 - * perfmon context state 65 - */ 66 - #define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ 67 - #define PFM_CTX_LOADED 2 /* context is loaded onto a task */ 68 - #define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ 69 - #define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ 70 - 71 - #define PFM_INVALID_ACTIVATION (~0UL) 72 - 73 - #define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ 74 - #define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ 75 - 76 - /* 77 - * depth of message queue 78 - */ 79 - #define PFM_MAX_MSGS 32 80 - #define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) 81 - 82 - /* 83 - * type of a PMU register (bitmask). 84 - * bitmask structure: 85 - * bit0 : register implemented 86 - * bit1 : end marker 87 - * bit2-3 : reserved 88 - * bit4 : pmc has pmc.pm 89 - * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter 90 - * bit6-7 : register type 91 - * bit8-31: reserved 92 - */ 93 - #define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ 94 - #define PFM_REG_IMPL 0x1 /* register implemented */ 95 - #define PFM_REG_END 0x2 /* end marker */ 96 - #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ 97 - #define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ 98 - #define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ 99 - #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ 100 - #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ 101 - 102 - #define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) 103 - #define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) 104 - 105 - #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) 106 - 107 - /* i assumed unsigned */ 108 - #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) 109 - #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) 110 - 111 - /* XXX: these assume that register i is implemented */ 112 - #define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 113 - #define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 114 - #define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) 115 - #define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) 116 - 117 - #define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value 118 - #define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask 119 - #define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] 120 - #define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] 121 - 122 - #define PFM_NUM_IBRS IA64_NUM_DBG_REGS 123 - #define PFM_NUM_DBRS IA64_NUM_DBG_REGS 124 - 125 - #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) 126 - #define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) 127 - #define PFM_CTX_TASK(h) (h)->ctx_task 128 - 129 - #define PMU_PMC_OI 5 /* position of pmc.oi bit */ 130 - 131 - /* XXX: does not support more than 64 PMDs */ 132 - #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) 133 - #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) 134 - 135 - #define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) 136 - 137 - #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) 138 - #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) 139 - #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) 140 - #define PFM_CODE_RR 0 /* requesting code range restriction */ 141 - #define PFM_DATA_RR 1 /* requestion data range restriction */ 142 - 143 - #define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) 144 - #define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) 145 - #define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) 146 - 147 - #define RDEP(x) (1UL<<(x)) 148 - 149 - /* 150 - * context protection macros 151 - * in SMP: 152 - * - we need to protect against CPU concurrency (spin_lock) 153 - * - we need to protect against PMU overflow interrupts (local_irq_disable) 154 - * in UP: 155 - * - we need to protect against PMU overflow interrupts (local_irq_disable) 156 - * 157 - * spin_lock_irqsave()/spin_unlock_irqrestore(): 158 - * in SMP: local_irq_disable + spin_lock 159 - * in UP : local_irq_disable 160 - * 161 - * spin_lock()/spin_lock(): 162 - * in UP : removed automatically 163 - * in SMP: protect against context accesses from other CPU. interrupts 164 - * are not masked. This is useful for the PMU interrupt handler 165 - * because we know we will not get PMU concurrency in that code. 166 - */ 167 - #define PROTECT_CTX(c, f) \ 168 - do { \ 169 - DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ 170 - spin_lock_irqsave(&(c)->ctx_lock, f); \ 171 - DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ 172 - } while(0) 173 - 174 - #define UNPROTECT_CTX(c, f) \ 175 - do { \ 176 - DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ 177 - spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 178 - } while(0) 179 - 180 - #define PROTECT_CTX_NOPRINT(c, f) \ 181 - do { \ 182 - spin_lock_irqsave(&(c)->ctx_lock, f); \ 183 - } while(0) 184 - 185 - 186 - #define UNPROTECT_CTX_NOPRINT(c, f) \ 187 - do { \ 188 - spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 189 - } while(0) 190 - 191 - 192 - #define PROTECT_CTX_NOIRQ(c) \ 193 - do { \ 194 - spin_lock(&(c)->ctx_lock); \ 195 - } while(0) 196 - 197 - #define UNPROTECT_CTX_NOIRQ(c) \ 198 - do { \ 199 - spin_unlock(&(c)->ctx_lock); \ 200 - } while(0) 201 - 202 - 203 - #ifdef CONFIG_SMP 204 - 205 - #define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) 206 - #define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ 207 - #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() 208 - 209 - #else /* !CONFIG_SMP */ 210 - #define SET_ACTIVATION(t) do {} while(0) 211 - #define GET_ACTIVATION(t) do {} while(0) 212 - #define INC_ACTIVATION(t) do {} while(0) 213 - #endif /* CONFIG_SMP */ 214 - 215 - #define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) 216 - #define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) 217 - #define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) 218 - 219 - #define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) 220 - #define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) 221 - 222 - #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) 223 - 224 - /* 225 - * cmp0 must be the value of pmc0 226 - */ 227 - #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) 228 - 229 - #define PFMFS_MAGIC 0xa0b4d889 230 - 231 - /* 232 - * debugging 233 - */ 234 - #define PFM_DEBUGGING 1 235 - #ifdef PFM_DEBUGGING 236 - #define DPRINT(a) \ 237 - do { \ 238 - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 239 - } while (0) 240 - 241 - #define DPRINT_ovfl(a) \ 242 - do { \ 243 - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 244 - } while (0) 245 - #endif 246 - 247 - /* 248 - * 64-bit software counter structure 249 - * 250 - * the next_reset_type is applied to the next call to pfm_reset_regs() 251 - */ 252 - typedef struct { 253 - unsigned long val; /* virtual 64bit counter value */ 254 - unsigned long lval; /* last reset value */ 255 - unsigned long long_reset; /* reset value on sampling overflow */ 256 - unsigned long short_reset; /* reset value on overflow */ 257 - unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ 258 - unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ 259 - unsigned long seed; /* seed for random-number generator */ 260 - unsigned long mask; /* mask for random-number generator */ 261 - unsigned int flags; /* notify/do not notify */ 262 - unsigned long eventid; /* overflow event identifier */ 263 - } pfm_counter_t; 264 - 265 - /* 266 - * context flags 267 - */ 268 - typedef struct { 269 - unsigned int block:1; /* when 1, task will blocked on user notifications */ 270 - unsigned int system:1; /* do system wide monitoring */ 271 - unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ 272 - unsigned int is_sampling:1; /* true if using a custom format */ 273 - unsigned int excl_idle:1; /* exclude idle task in system wide session */ 274 - unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ 275 - unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ 276 - unsigned int no_msg:1; /* no message sent on overflow */ 277 - unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ 278 - unsigned int reserved:22; 279 - } pfm_context_flags_t; 280 - 281 - #define PFM_TRAP_REASON_NONE 0x0 /* default value */ 282 - #define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ 283 - #define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ 284 - 285 - 286 - /* 287 - * perfmon context: encapsulates all the state of a monitoring session 288 - */ 289 - 290 - typedef struct pfm_context { 291 - spinlock_t ctx_lock; /* context protection */ 292 - 293 - pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ 294 - unsigned int ctx_state; /* state: active/inactive (no bitfield) */ 295 - 296 - struct task_struct *ctx_task; /* task to which context is attached */ 297 - 298 - unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ 299 - 300 - struct completion ctx_restart_done; /* use for blocking notification mode */ 301 - 302 - unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ 303 - unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ 304 - unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ 305 - 306 - unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ 307 - unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ 308 - unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ 309 - 310 - unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ 311 - 312 - unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ 313 - unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ 314 - unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ 315 - unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ 316 - 317 - pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ 318 - 319 - unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ 320 - unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ 321 - 322 - unsigned long ctx_saved_psr_up; /* only contains psr.up value */ 323 - 324 - unsigned long ctx_last_activation; /* context last activation number for last_cpu */ 325 - unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ 326 - unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ 327 - 328 - int ctx_fd; /* file descriptor used my this context */ 329 - pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ 330 - 331 - pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ 332 - void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ 333 - unsigned long ctx_smpl_size; /* size of sampling buffer */ 334 - void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ 335 - 336 - wait_queue_head_t ctx_msgq_wait; 337 - pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; 338 - int ctx_msgq_head; 339 - int ctx_msgq_tail; 340 - struct fasync_struct *ctx_async_queue; 341 - 342 - wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ 343 - } pfm_context_t; 344 - 345 - /* 346 - * magic number used to verify that structure is really 347 - * a perfmon context 348 - */ 349 - #define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) 350 - 351 - #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) 352 - 353 - #ifdef CONFIG_SMP 354 - #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) 355 - #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu 356 - #else 357 - #define SET_LAST_CPU(ctx, v) do {} while(0) 358 - #define GET_LAST_CPU(ctx) do {} while(0) 359 - #endif 360 - 361 - 362 - #define ctx_fl_block ctx_flags.block 363 - #define ctx_fl_system ctx_flags.system 364 - #define ctx_fl_using_dbreg ctx_flags.using_dbreg 365 - #define ctx_fl_is_sampling ctx_flags.is_sampling 366 - #define ctx_fl_excl_idle ctx_flags.excl_idle 367 - #define ctx_fl_going_zombie ctx_flags.going_zombie 368 - #define ctx_fl_trap_reason ctx_flags.trap_reason 369 - #define ctx_fl_no_msg ctx_flags.no_msg 370 - #define ctx_fl_can_restart ctx_flags.can_restart 371 - 372 - #define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); 373 - #define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking 374 - 375 - /* 376 - * global information about all sessions 377 - * mostly used to synchronize between system wide and per-process 378 - */ 379 - typedef struct { 380 - spinlock_t pfs_lock; /* lock the structure */ 381 - 382 - unsigned int pfs_task_sessions; /* number of per task sessions */ 383 - unsigned int pfs_sys_sessions; /* number of per system wide sessions */ 384 - unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ 385 - unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ 386 - struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ 387 - } pfm_session_t; 388 - 389 - /* 390 - * information about a PMC or PMD. 391 - * dep_pmd[]: a bitmask of dependent PMD registers 392 - * dep_pmc[]: a bitmask of dependent PMC registers 393 - */ 394 - typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); 395 - typedef struct { 396 - unsigned int type; 397 - int pm_pos; 398 - unsigned long default_value; /* power-on default value */ 399 - unsigned long reserved_mask; /* bitmask of reserved bits */ 400 - pfm_reg_check_t read_check; 401 - pfm_reg_check_t write_check; 402 - unsigned long dep_pmd[4]; 403 - unsigned long dep_pmc[4]; 404 - } pfm_reg_desc_t; 405 - 406 - /* assume cnum is a valid monitor */ 407 - #define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) 408 - 409 - /* 410 - * This structure is initialized at boot time and contains 411 - * a description of the PMU main characteristics. 412 - * 413 - * If the probe function is defined, detection is based 414 - * on its return value: 415 - * - 0 means recognized PMU 416 - * - anything else means not supported 417 - * When the probe function is not defined, then the pmu_family field 418 - * is used and it must match the host CPU family such that: 419 - * - cpu->family & config->pmu_family != 0 420 - */ 421 - typedef struct { 422 - unsigned long ovfl_val; /* overflow value for counters */ 423 - 424 - pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ 425 - pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ 426 - 427 - unsigned int num_pmcs; /* number of PMCS: computed at init time */ 428 - unsigned int num_pmds; /* number of PMDS: computed at init time */ 429 - unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ 430 - unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ 431 - 432 - char *pmu_name; /* PMU family name */ 433 - unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ 434 - unsigned int flags; /* pmu specific flags */ 435 - unsigned int num_ibrs; /* number of IBRS: computed at init time */ 436 - unsigned int num_dbrs; /* number of DBRS: computed at init time */ 437 - unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ 438 - int (*probe)(void); /* customized probe routine */ 439 - unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ 440 - } pmu_config_t; 441 - /* 442 - * PMU specific flags 443 - */ 444 - #define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ 445 - 446 - /* 447 - * debug register related type definitions 448 - */ 449 - typedef struct { 450 - unsigned long ibr_mask:56; 451 - unsigned long ibr_plm:4; 452 - unsigned long ibr_ig:3; 453 - unsigned long ibr_x:1; 454 - } ibr_mask_reg_t; 455 - 456 - typedef struct { 457 - unsigned long dbr_mask:56; 458 - unsigned long dbr_plm:4; 459 - unsigned long dbr_ig:2; 460 - unsigned long dbr_w:1; 461 - unsigned long dbr_r:1; 462 - } dbr_mask_reg_t; 463 - 464 - typedef union { 465 - unsigned long val; 466 - ibr_mask_reg_t ibr; 467 - dbr_mask_reg_t dbr; 468 - } dbreg_t; 469 - 470 - 471 - /* 472 - * perfmon command descriptions 473 - */ 474 - typedef struct { 475 - int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 476 - char *cmd_name; 477 - int cmd_flags; 478 - unsigned int cmd_narg; 479 - size_t cmd_argsize; 480 - int (*cmd_getsize)(void *arg, size_t *sz); 481 - } pfm_cmd_desc_t; 482 - 483 - #define PFM_CMD_FD 0x01 /* command requires a file descriptor */ 484 - #define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ 485 - #define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ 486 - #define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ 487 - 488 - 489 - #define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name 490 - #define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) 491 - #define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) 492 - #define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) 493 - #define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) 494 - 495 - #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ 496 - 497 - typedef struct { 498 - unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ 499 - unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ 500 - unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ 501 - unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ 502 - unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ 503 - unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ 504 - unsigned long pfm_smpl_handler_calls; 505 - unsigned long pfm_smpl_handler_cycles; 506 - char pad[SMP_CACHE_BYTES] ____cacheline_aligned; 507 - } pfm_stats_t; 508 - 509 - /* 510 - * perfmon internal variables 511 - */ 512 - static pfm_stats_t pfm_stats[NR_CPUS]; 513 - static pfm_session_t pfm_sessions; /* global sessions information */ 514 - 515 - static DEFINE_SPINLOCK(pfm_alt_install_check); 516 - static pfm_intr_handler_desc_t *pfm_alt_intr_handler; 517 - 518 - static struct proc_dir_entry *perfmon_dir; 519 - static pfm_uuid_t pfm_null_uuid = {0,}; 520 - 521 - static spinlock_t pfm_buffer_fmt_lock; 522 - static LIST_HEAD(pfm_buffer_fmt_list); 523 - 524 - static pmu_config_t *pmu_conf; 525 - 526 - /* sysctl() controls */ 527 - pfm_sysctl_t pfm_sysctl; 528 - EXPORT_SYMBOL(pfm_sysctl); 529 - 530 - static struct ctl_table pfm_ctl_table[] = { 531 - { 532 - .procname = "debug", 533 - .data = &pfm_sysctl.debug, 534 - .maxlen = sizeof(int), 535 - .mode = 0666, 536 - .proc_handler = proc_dointvec, 537 - }, 538 - { 539 - .procname = "debug_ovfl", 540 - .data = &pfm_sysctl.debug_ovfl, 541 - .maxlen = sizeof(int), 542 - .mode = 0666, 543 - .proc_handler = proc_dointvec, 544 - }, 545 - { 546 - .procname = "fastctxsw", 547 - .data = &pfm_sysctl.fastctxsw, 548 - .maxlen = sizeof(int), 549 - .mode = 0600, 550 - .proc_handler = proc_dointvec, 551 - }, 552 - { 553 - .procname = "expert_mode", 554 - .data = &pfm_sysctl.expert_mode, 555 - .maxlen = sizeof(int), 556 - .mode = 0600, 557 - .proc_handler = proc_dointvec, 558 - }, 559 - {} 560 - }; 561 - static struct ctl_table pfm_sysctl_dir[] = { 562 - { 563 - .procname = "perfmon", 564 - .mode = 0555, 565 - .child = pfm_ctl_table, 566 - }, 567 - {} 568 - }; 569 - static struct ctl_table pfm_sysctl_root[] = { 570 - { 571 - .procname = "kernel", 572 - .mode = 0555, 573 - .child = pfm_sysctl_dir, 574 - }, 575 - {} 576 - }; 577 - static struct ctl_table_header *pfm_sysctl_header; 578 - 579 - static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 580 - 581 - #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) 582 - #define pfm_get_cpu_data(a,b) per_cpu(a, b) 583 - 584 - static inline void 585 - pfm_put_task(struct task_struct *task) 586 - { 587 - if (task != current) put_task_struct(task); 588 - } 589 - 590 - static inline unsigned long 591 - pfm_protect_ctx_ctxsw(pfm_context_t *x) 592 - { 593 - spin_lock(&(x)->ctx_lock); 594 - return 0UL; 595 - } 596 - 597 - static inline void 598 - pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) 599 - { 600 - spin_unlock(&(x)->ctx_lock); 601 - } 602 - 603 - /* forward declaration */ 604 - static const struct dentry_operations pfmfs_dentry_operations; 605 - 606 - static int pfmfs_init_fs_context(struct fs_context *fc) 607 - { 608 - struct pseudo_fs_context *ctx = init_pseudo(fc, PFMFS_MAGIC); 609 - if (!ctx) 610 - return -ENOMEM; 611 - ctx->dops = &pfmfs_dentry_operations; 612 - return 0; 613 - } 614 - 615 - static struct file_system_type pfm_fs_type = { 616 - .name = "pfmfs", 617 - .init_fs_context = pfmfs_init_fs_context, 618 - .kill_sb = kill_anon_super, 619 - }; 620 - MODULE_ALIAS_FS("pfmfs"); 621 - 622 - DEFINE_PER_CPU(unsigned long, pfm_syst_info); 623 - DEFINE_PER_CPU(struct task_struct *, pmu_owner); 624 - DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 625 - DEFINE_PER_CPU(unsigned long, pmu_activation_number); 626 - EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); 627 - 628 - 629 - /* forward declaration */ 630 - static const struct file_operations pfm_file_ops; 631 - 632 - /* 633 - * forward declarations 634 - */ 635 - #ifndef CONFIG_SMP 636 - static void pfm_lazy_save_regs (struct task_struct *ta); 637 - #endif 638 - 639 - void dump_pmu_state(const char *); 640 - static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 641 - 642 - #include "perfmon_itanium.h" 643 - #include "perfmon_mckinley.h" 644 - #include "perfmon_montecito.h" 645 - #include "perfmon_generic.h" 646 - 647 - static pmu_config_t *pmu_confs[]={ 648 - &pmu_conf_mont, 649 - &pmu_conf_mck, 650 - &pmu_conf_ita, 651 - &pmu_conf_gen, /* must be last */ 652 - NULL 653 - }; 654 - 655 - 656 - static int pfm_end_notify_user(pfm_context_t *ctx); 657 - 658 - static inline void 659 - pfm_clear_psr_pp(void) 660 - { 661 - ia64_rsm(IA64_PSR_PP); 662 - ia64_srlz_i(); 663 - } 664 - 665 - static inline void 666 - pfm_set_psr_pp(void) 667 - { 668 - ia64_ssm(IA64_PSR_PP); 669 - ia64_srlz_i(); 670 - } 671 - 672 - static inline void 673 - pfm_clear_psr_up(void) 674 - { 675 - ia64_rsm(IA64_PSR_UP); 676 - ia64_srlz_i(); 677 - } 678 - 679 - static inline void 680 - pfm_set_psr_up(void) 681 - { 682 - ia64_ssm(IA64_PSR_UP); 683 - ia64_srlz_i(); 684 - } 685 - 686 - static inline unsigned long 687 - pfm_get_psr(void) 688 - { 689 - unsigned long tmp; 690 - tmp = ia64_getreg(_IA64_REG_PSR); 691 - ia64_srlz_i(); 692 - return tmp; 693 - } 694 - 695 - static inline void 696 - pfm_set_psr_l(unsigned long val) 697 - { 698 - ia64_setreg(_IA64_REG_PSR_L, val); 699 - ia64_srlz_i(); 700 - } 701 - 702 - static inline void 703 - pfm_freeze_pmu(void) 704 - { 705 - ia64_set_pmc(0,1UL); 706 - ia64_srlz_d(); 707 - } 708 - 709 - static inline void 710 - pfm_unfreeze_pmu(void) 711 - { 712 - ia64_set_pmc(0,0UL); 713 - ia64_srlz_d(); 714 - } 715 - 716 - static inline void 717 - pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) 718 - { 719 - int i; 720 - 721 - for (i=0; i < nibrs; i++) { 722 - ia64_set_ibr(i, ibrs[i]); 723 - ia64_dv_serialize_instruction(); 724 - } 725 - ia64_srlz_i(); 726 - } 727 - 728 - static inline void 729 - pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) 730 - { 731 - int i; 732 - 733 - for (i=0; i < ndbrs; i++) { 734 - ia64_set_dbr(i, dbrs[i]); 735 - ia64_dv_serialize_data(); 736 - } 737 - ia64_srlz_d(); 738 - } 739 - 740 - /* 741 - * PMD[i] must be a counter. no check is made 742 - */ 743 - static inline unsigned long 744 - pfm_read_soft_counter(pfm_context_t *ctx, int i) 745 - { 746 - return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); 747 - } 748 - 749 - /* 750 - * PMD[i] must be a counter. no check is made 751 - */ 752 - static inline void 753 - pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) 754 - { 755 - unsigned long ovfl_val = pmu_conf->ovfl_val; 756 - 757 - ctx->ctx_pmds[i].val = val & ~ovfl_val; 758 - /* 759 - * writing to unimplemented part is ignore, so we do not need to 760 - * mask off top part 761 - */ 762 - ia64_set_pmd(i, val & ovfl_val); 763 - } 764 - 765 - static pfm_msg_t * 766 - pfm_get_new_msg(pfm_context_t *ctx) 767 - { 768 - int idx, next; 769 - 770 - next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; 771 - 772 - DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 773 - if (next == ctx->ctx_msgq_head) return NULL; 774 - 775 - idx = ctx->ctx_msgq_tail; 776 - ctx->ctx_msgq_tail = next; 777 - 778 - DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); 779 - 780 - return ctx->ctx_msgq+idx; 781 - } 782 - 783 - static pfm_msg_t * 784 - pfm_get_next_msg(pfm_context_t *ctx) 785 - { 786 - pfm_msg_t *msg; 787 - 788 - DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 789 - 790 - if (PFM_CTXQ_EMPTY(ctx)) return NULL; 791 - 792 - /* 793 - * get oldest message 794 - */ 795 - msg = ctx->ctx_msgq+ctx->ctx_msgq_head; 796 - 797 - /* 798 - * and move forward 799 - */ 800 - ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; 801 - 802 - DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); 803 - 804 - return msg; 805 - } 806 - 807 - static void 808 - pfm_reset_msgq(pfm_context_t *ctx) 809 - { 810 - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 811 - DPRINT(("ctx=%p msgq reset\n", ctx)); 812 - } 813 - 814 - static pfm_context_t * 815 - pfm_context_alloc(int ctx_flags) 816 - { 817 - pfm_context_t *ctx; 818 - 819 - /* 820 - * allocate context descriptor 821 - * must be able to free with interrupts disabled 822 - */ 823 - ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); 824 - if (ctx) { 825 - DPRINT(("alloc ctx @%p\n", ctx)); 826 - 827 - /* 828 - * init context protection lock 829 - */ 830 - spin_lock_init(&ctx->ctx_lock); 831 - 832 - /* 833 - * context is unloaded 834 - */ 835 - ctx->ctx_state = PFM_CTX_UNLOADED; 836 - 837 - /* 838 - * initialization of context's flags 839 - */ 840 - ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; 841 - ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; 842 - ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; 843 - /* 844 - * will move to set properties 845 - * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; 846 - */ 847 - 848 - /* 849 - * init restart semaphore to locked 850 - */ 851 - init_completion(&ctx->ctx_restart_done); 852 - 853 - /* 854 - * activation is used in SMP only 855 - */ 856 - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 857 - SET_LAST_CPU(ctx, -1); 858 - 859 - /* 860 - * initialize notification message queue 861 - */ 862 - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 863 - init_waitqueue_head(&ctx->ctx_msgq_wait); 864 - init_waitqueue_head(&ctx->ctx_zombieq); 865 - 866 - } 867 - return ctx; 868 - } 869 - 870 - static void 871 - pfm_context_free(pfm_context_t *ctx) 872 - { 873 - if (ctx) { 874 - DPRINT(("free ctx @%p\n", ctx)); 875 - kfree(ctx); 876 - } 877 - } 878 - 879 - static void 880 - pfm_mask_monitoring(struct task_struct *task) 881 - { 882 - pfm_context_t *ctx = PFM_GET_CTX(task); 883 - unsigned long mask, val, ovfl_mask; 884 - int i; 885 - 886 - DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); 887 - 888 - ovfl_mask = pmu_conf->ovfl_val; 889 - /* 890 - * monitoring can only be masked as a result of a valid 891 - * counter overflow. In UP, it means that the PMU still 892 - * has an owner. Note that the owner can be different 893 - * from the current task. However the PMU state belongs 894 - * to the owner. 895 - * In SMP, a valid overflow only happens when task is 896 - * current. Therefore if we come here, we know that 897 - * the PMU state belongs to the current task, therefore 898 - * we can access the live registers. 899 - * 900 - * So in both cases, the live register contains the owner's 901 - * state. We can ONLY touch the PMU registers and NOT the PSR. 902 - * 903 - * As a consequence to this call, the ctx->th_pmds[] array 904 - * contains stale information which must be ignored 905 - * when context is reloaded AND monitoring is active (see 906 - * pfm_restart). 907 - */ 908 - mask = ctx->ctx_used_pmds[0]; 909 - for (i = 0; mask; i++, mask>>=1) { 910 - /* skip non used pmds */ 911 - if ((mask & 0x1) == 0) continue; 912 - val = ia64_get_pmd(i); 913 - 914 - if (PMD_IS_COUNTING(i)) { 915 - /* 916 - * we rebuild the full 64 bit value of the counter 917 - */ 918 - ctx->ctx_pmds[i].val += (val & ovfl_mask); 919 - } else { 920 - ctx->ctx_pmds[i].val = val; 921 - } 922 - DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 923 - i, 924 - ctx->ctx_pmds[i].val, 925 - val & ovfl_mask)); 926 - } 927 - /* 928 - * mask monitoring by setting the privilege level to 0 929 - * we cannot use psr.pp/psr.up for this, it is controlled by 930 - * the user 931 - * 932 - * if task is current, modify actual registers, otherwise modify 933 - * thread save state, i.e., what will be restored in pfm_load_regs() 934 - */ 935 - mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 936 - for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 937 - if ((mask & 0x1) == 0UL) continue; 938 - ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); 939 - ctx->th_pmcs[i] &= ~0xfUL; 940 - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 941 - } 942 - /* 943 - * make all of this visible 944 - */ 945 - ia64_srlz_d(); 946 - } 947 - 948 - /* 949 - * must always be done with task == current 950 - * 951 - * context must be in MASKED state when calling 952 - */ 953 - static void 954 - pfm_restore_monitoring(struct task_struct *task) 955 - { 956 - pfm_context_t *ctx = PFM_GET_CTX(task); 957 - unsigned long mask, ovfl_mask; 958 - unsigned long psr, val; 959 - int i, is_system; 960 - 961 - is_system = ctx->ctx_fl_system; 962 - ovfl_mask = pmu_conf->ovfl_val; 963 - 964 - if (task != current) { 965 - printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); 966 - return; 967 - } 968 - if (ctx->ctx_state != PFM_CTX_MASKED) { 969 - printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 970 - task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); 971 - return; 972 - } 973 - psr = pfm_get_psr(); 974 - /* 975 - * monitoring is masked via the PMC. 976 - * As we restore their value, we do not want each counter to 977 - * restart right away. We stop monitoring using the PSR, 978 - * restore the PMC (and PMD) and then re-establish the psr 979 - * as it was. Note that there can be no pending overflow at 980 - * this point, because monitoring was MASKED. 981 - * 982 - * system-wide session are pinned and self-monitoring 983 - */ 984 - if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 985 - /* disable dcr pp */ 986 - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 987 - pfm_clear_psr_pp(); 988 - } else { 989 - pfm_clear_psr_up(); 990 - } 991 - /* 992 - * first, we restore the PMD 993 - */ 994 - mask = ctx->ctx_used_pmds[0]; 995 - for (i = 0; mask; i++, mask>>=1) { 996 - /* skip non used pmds */ 997 - if ((mask & 0x1) == 0) continue; 998 - 999 - if (PMD_IS_COUNTING(i)) { 1000 - /* 1001 - * we split the 64bit value according to 1002 - * counter width 1003 - */ 1004 - val = ctx->ctx_pmds[i].val & ovfl_mask; 1005 - ctx->ctx_pmds[i].val &= ~ovfl_mask; 1006 - } else { 1007 - val = ctx->ctx_pmds[i].val; 1008 - } 1009 - ia64_set_pmd(i, val); 1010 - 1011 - DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 1012 - i, 1013 - ctx->ctx_pmds[i].val, 1014 - val)); 1015 - } 1016 - /* 1017 - * restore the PMCs 1018 - */ 1019 - mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 1020 - for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 1021 - if ((mask & 0x1) == 0UL) continue; 1022 - ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1023 - ia64_set_pmc(i, ctx->th_pmcs[i]); 1024 - DPRINT(("[%d] pmc[%d]=0x%lx\n", 1025 - task_pid_nr(task), i, ctx->th_pmcs[i])); 1026 - } 1027 - ia64_srlz_d(); 1028 - 1029 - /* 1030 - * must restore DBR/IBR because could be modified while masked 1031 - * XXX: need to optimize 1032 - */ 1033 - if (ctx->ctx_fl_using_dbreg) { 1034 - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 1035 - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 1036 - } 1037 - 1038 - /* 1039 - * now restore PSR 1040 - */ 1041 - if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1042 - /* enable dcr pp */ 1043 - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 1044 - ia64_srlz_i(); 1045 - } 1046 - pfm_set_psr_l(psr); 1047 - } 1048 - 1049 - static inline void 1050 - pfm_save_pmds(unsigned long *pmds, unsigned long mask) 1051 - { 1052 - int i; 1053 - 1054 - ia64_srlz_d(); 1055 - 1056 - for (i=0; mask; i++, mask>>=1) { 1057 - if (mask & 0x1) pmds[i] = ia64_get_pmd(i); 1058 - } 1059 - } 1060 - 1061 - /* 1062 - * reload from thread state (used for ctxw only) 1063 - */ 1064 - static inline void 1065 - pfm_restore_pmds(unsigned long *pmds, unsigned long mask) 1066 - { 1067 - int i; 1068 - unsigned long val, ovfl_val = pmu_conf->ovfl_val; 1069 - 1070 - for (i=0; mask; i++, mask>>=1) { 1071 - if ((mask & 0x1) == 0) continue; 1072 - val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; 1073 - ia64_set_pmd(i, val); 1074 - } 1075 - ia64_srlz_d(); 1076 - } 1077 - 1078 - /* 1079 - * propagate PMD from context to thread-state 1080 - */ 1081 - static inline void 1082 - pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) 1083 - { 1084 - unsigned long ovfl_val = pmu_conf->ovfl_val; 1085 - unsigned long mask = ctx->ctx_all_pmds[0]; 1086 - unsigned long val; 1087 - int i; 1088 - 1089 - DPRINT(("mask=0x%lx\n", mask)); 1090 - 1091 - for (i=0; mask; i++, mask>>=1) { 1092 - 1093 - val = ctx->ctx_pmds[i].val; 1094 - 1095 - /* 1096 - * We break up the 64 bit value into 2 pieces 1097 - * the lower bits go to the machine state in the 1098 - * thread (will be reloaded on ctxsw in). 1099 - * The upper part stays in the soft-counter. 1100 - */ 1101 - if (PMD_IS_COUNTING(i)) { 1102 - ctx->ctx_pmds[i].val = val & ~ovfl_val; 1103 - val &= ovfl_val; 1104 - } 1105 - ctx->th_pmds[i] = val; 1106 - 1107 - DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", 1108 - i, 1109 - ctx->th_pmds[i], 1110 - ctx->ctx_pmds[i].val)); 1111 - } 1112 - } 1113 - 1114 - /* 1115 - * propagate PMC from context to thread-state 1116 - */ 1117 - static inline void 1118 - pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) 1119 - { 1120 - unsigned long mask = ctx->ctx_all_pmcs[0]; 1121 - int i; 1122 - 1123 - DPRINT(("mask=0x%lx\n", mask)); 1124 - 1125 - for (i=0; mask; i++, mask>>=1) { 1126 - /* masking 0 with ovfl_val yields 0 */ 1127 - ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1128 - DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 1129 - } 1130 - } 1131 - 1132 - 1133 - 1134 - static inline void 1135 - pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) 1136 - { 1137 - int i; 1138 - 1139 - for (i=0; mask; i++, mask>>=1) { 1140 - if ((mask & 0x1) == 0) continue; 1141 - ia64_set_pmc(i, pmcs[i]); 1142 - } 1143 - ia64_srlz_d(); 1144 - } 1145 - 1146 - static inline int 1147 - pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) 1148 - { 1149 - return memcmp(a, b, sizeof(pfm_uuid_t)); 1150 - } 1151 - 1152 - static inline int 1153 - pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) 1154 - { 1155 - int ret = 0; 1156 - if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); 1157 - return ret; 1158 - } 1159 - 1160 - static inline int 1161 - pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) 1162 - { 1163 - int ret = 0; 1164 - if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); 1165 - return ret; 1166 - } 1167 - 1168 - 1169 - static inline int 1170 - pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, 1171 - int cpu, void *arg) 1172 - { 1173 - int ret = 0; 1174 - if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); 1175 - return ret; 1176 - } 1177 - 1178 - static inline int 1179 - pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, 1180 - int cpu, void *arg) 1181 - { 1182 - int ret = 0; 1183 - if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); 1184 - return ret; 1185 - } 1186 - 1187 - static inline int 1188 - pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1189 - { 1190 - int ret = 0; 1191 - if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); 1192 - return ret; 1193 - } 1194 - 1195 - static inline int 1196 - pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1197 - { 1198 - int ret = 0; 1199 - if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); 1200 - return ret; 1201 - } 1202 - 1203 - static pfm_buffer_fmt_t * 1204 - __pfm_find_buffer_fmt(pfm_uuid_t uuid) 1205 - { 1206 - struct list_head * pos; 1207 - pfm_buffer_fmt_t * entry; 1208 - 1209 - list_for_each(pos, &pfm_buffer_fmt_list) { 1210 - entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 1211 - if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) 1212 - return entry; 1213 - } 1214 - return NULL; 1215 - } 1216 - 1217 - /* 1218 - * find a buffer format based on its uuid 1219 - */ 1220 - static pfm_buffer_fmt_t * 1221 - pfm_find_buffer_fmt(pfm_uuid_t uuid) 1222 - { 1223 - pfm_buffer_fmt_t * fmt; 1224 - spin_lock(&pfm_buffer_fmt_lock); 1225 - fmt = __pfm_find_buffer_fmt(uuid); 1226 - spin_unlock(&pfm_buffer_fmt_lock); 1227 - return fmt; 1228 - } 1229 - 1230 - int 1231 - pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) 1232 - { 1233 - int ret = 0; 1234 - 1235 - /* some sanity checks */ 1236 - if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; 1237 - 1238 - /* we need at least a handler */ 1239 - if (fmt->fmt_handler == NULL) return -EINVAL; 1240 - 1241 - /* 1242 - * XXX: need check validity of fmt_arg_size 1243 - */ 1244 - 1245 - spin_lock(&pfm_buffer_fmt_lock); 1246 - 1247 - if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { 1248 - printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); 1249 - ret = -EBUSY; 1250 - goto out; 1251 - } 1252 - list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); 1253 - printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); 1254 - 1255 - out: 1256 - spin_unlock(&pfm_buffer_fmt_lock); 1257 - return ret; 1258 - } 1259 - EXPORT_SYMBOL(pfm_register_buffer_fmt); 1260 - 1261 - int 1262 - pfm_unregister_buffer_fmt(pfm_uuid_t uuid) 1263 - { 1264 - pfm_buffer_fmt_t *fmt; 1265 - int ret = 0; 1266 - 1267 - spin_lock(&pfm_buffer_fmt_lock); 1268 - 1269 - fmt = __pfm_find_buffer_fmt(uuid); 1270 - if (!fmt) { 1271 - printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); 1272 - ret = -EINVAL; 1273 - goto out; 1274 - } 1275 - list_del_init(&fmt->fmt_list); 1276 - printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); 1277 - 1278 - out: 1279 - spin_unlock(&pfm_buffer_fmt_lock); 1280 - return ret; 1281 - 1282 - } 1283 - EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1284 - 1285 - static int 1286 - pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1287 - { 1288 - unsigned long flags; 1289 - /* 1290 - * validity checks on cpu_mask have been done upstream 1291 - */ 1292 - LOCK_PFS(flags); 1293 - 1294 - DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1295 - pfm_sessions.pfs_sys_sessions, 1296 - pfm_sessions.pfs_task_sessions, 1297 - pfm_sessions.pfs_sys_use_dbregs, 1298 - is_syswide, 1299 - cpu)); 1300 - 1301 - if (is_syswide) { 1302 - /* 1303 - * cannot mix system wide and per-task sessions 1304 - */ 1305 - if (pfm_sessions.pfs_task_sessions > 0UL) { 1306 - DPRINT(("system wide not possible, %u conflicting task_sessions\n", 1307 - pfm_sessions.pfs_task_sessions)); 1308 - goto abort; 1309 - } 1310 - 1311 - if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; 1312 - 1313 - DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); 1314 - 1315 - pfm_sessions.pfs_sys_session[cpu] = task; 1316 - 1317 - pfm_sessions.pfs_sys_sessions++ ; 1318 - 1319 - } else { 1320 - if (pfm_sessions.pfs_sys_sessions) goto abort; 1321 - pfm_sessions.pfs_task_sessions++; 1322 - } 1323 - 1324 - DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1325 - pfm_sessions.pfs_sys_sessions, 1326 - pfm_sessions.pfs_task_sessions, 1327 - pfm_sessions.pfs_sys_use_dbregs, 1328 - is_syswide, 1329 - cpu)); 1330 - 1331 - /* 1332 - * Force idle() into poll mode 1333 - */ 1334 - cpu_idle_poll_ctrl(true); 1335 - 1336 - UNLOCK_PFS(flags); 1337 - 1338 - return 0; 1339 - 1340 - error_conflict: 1341 - DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1342 - task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), 1343 - cpu)); 1344 - abort: 1345 - UNLOCK_PFS(flags); 1346 - 1347 - return -EBUSY; 1348 - 1349 - } 1350 - 1351 - static int 1352 - pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) 1353 - { 1354 - unsigned long flags; 1355 - /* 1356 - * validity checks on cpu_mask have been done upstream 1357 - */ 1358 - LOCK_PFS(flags); 1359 - 1360 - DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1361 - pfm_sessions.pfs_sys_sessions, 1362 - pfm_sessions.pfs_task_sessions, 1363 - pfm_sessions.pfs_sys_use_dbregs, 1364 - is_syswide, 1365 - cpu)); 1366 - 1367 - 1368 - if (is_syswide) { 1369 - pfm_sessions.pfs_sys_session[cpu] = NULL; 1370 - /* 1371 - * would not work with perfmon+more than one bit in cpu_mask 1372 - */ 1373 - if (ctx && ctx->ctx_fl_using_dbreg) { 1374 - if (pfm_sessions.pfs_sys_use_dbregs == 0) { 1375 - printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); 1376 - } else { 1377 - pfm_sessions.pfs_sys_use_dbregs--; 1378 - } 1379 - } 1380 - pfm_sessions.pfs_sys_sessions--; 1381 - } else { 1382 - pfm_sessions.pfs_task_sessions--; 1383 - } 1384 - DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1385 - pfm_sessions.pfs_sys_sessions, 1386 - pfm_sessions.pfs_task_sessions, 1387 - pfm_sessions.pfs_sys_use_dbregs, 1388 - is_syswide, 1389 - cpu)); 1390 - 1391 - /* Undo forced polling. Last session reenables pal_halt */ 1392 - cpu_idle_poll_ctrl(false); 1393 - 1394 - UNLOCK_PFS(flags); 1395 - 1396 - return 0; 1397 - } 1398 - 1399 - /* 1400 - * removes virtual mapping of the sampling buffer. 1401 - * IMPORTANT: cannot be called with interrupts disable, e.g. inside 1402 - * a PROTECT_CTX() section. 1403 - */ 1404 - static int 1405 - pfm_remove_smpl_mapping(void *vaddr, unsigned long size) 1406 - { 1407 - struct task_struct *task = current; 1408 - int r; 1409 - 1410 - /* sanity checks */ 1411 - if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1412 - printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); 1413 - return -EINVAL; 1414 - } 1415 - 1416 - DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); 1417 - 1418 - /* 1419 - * does the actual unmapping 1420 - */ 1421 - r = vm_munmap((unsigned long)vaddr, size); 1422 - 1423 - if (r !=0) { 1424 - printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); 1425 - } 1426 - 1427 - DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1428 - 1429 - return 0; 1430 - } 1431 - 1432 - /* 1433 - * free actual physical storage used by sampling buffer 1434 - */ 1435 - #if 0 1436 - static int 1437 - pfm_free_smpl_buffer(pfm_context_t *ctx) 1438 - { 1439 - pfm_buffer_fmt_t *fmt; 1440 - 1441 - if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; 1442 - 1443 - /* 1444 - * we won't use the buffer format anymore 1445 - */ 1446 - fmt = ctx->ctx_buf_fmt; 1447 - 1448 - DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", 1449 - ctx->ctx_smpl_hdr, 1450 - ctx->ctx_smpl_size, 1451 - ctx->ctx_smpl_vaddr)); 1452 - 1453 - pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1454 - 1455 - /* 1456 - * free the buffer 1457 - */ 1458 - vfree(ctx->ctx_smpl_hdr); 1459 - 1460 - ctx->ctx_smpl_hdr = NULL; 1461 - ctx->ctx_smpl_size = 0UL; 1462 - 1463 - return 0; 1464 - 1465 - invalid_free: 1466 - printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current)); 1467 - return -EINVAL; 1468 - } 1469 - #endif 1470 - 1471 - static inline void 1472 - pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) 1473 - { 1474 - if (fmt == NULL) return; 1475 - 1476 - pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1477 - 1478 - } 1479 - 1480 - /* 1481 - * pfmfs should _never_ be mounted by userland - too much of security hassle, 1482 - * no real gain from having the whole whorehouse mounted. So we don't need 1483 - * any operations on the root directory. However, we need a non-trivial 1484 - * d_name - pfm: will go nicely and kill the special-casing in procfs. 1485 - */ 1486 - static struct vfsmount *pfmfs_mnt __read_mostly; 1487 - 1488 - static int __init 1489 - init_pfm_fs(void) 1490 - { 1491 - int err = register_filesystem(&pfm_fs_type); 1492 - if (!err) { 1493 - pfmfs_mnt = kern_mount(&pfm_fs_type); 1494 - err = PTR_ERR(pfmfs_mnt); 1495 - if (IS_ERR(pfmfs_mnt)) 1496 - unregister_filesystem(&pfm_fs_type); 1497 - else 1498 - err = 0; 1499 - } 1500 - return err; 1501 - } 1502 - 1503 - static ssize_t 1504 - pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) 1505 - { 1506 - pfm_context_t *ctx; 1507 - pfm_msg_t *msg; 1508 - ssize_t ret; 1509 - unsigned long flags; 1510 - DECLARE_WAITQUEUE(wait, current); 1511 - if (PFM_IS_FILE(filp) == 0) { 1512 - printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1513 - return -EINVAL; 1514 - } 1515 - 1516 - ctx = filp->private_data; 1517 - if (ctx == NULL) { 1518 - printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1519 - return -EINVAL; 1520 - } 1521 - 1522 - /* 1523 - * check even when there is no message 1524 - */ 1525 - if (size < sizeof(pfm_msg_t)) { 1526 - DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); 1527 - return -EINVAL; 1528 - } 1529 - 1530 - PROTECT_CTX(ctx, flags); 1531 - 1532 - /* 1533 - * put ourselves on the wait queue 1534 - */ 1535 - add_wait_queue(&ctx->ctx_msgq_wait, &wait); 1536 - 1537 - 1538 - for(;;) { 1539 - /* 1540 - * check wait queue 1541 - */ 1542 - 1543 - set_current_state(TASK_INTERRUPTIBLE); 1544 - 1545 - DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 1546 - 1547 - ret = 0; 1548 - if(PFM_CTXQ_EMPTY(ctx) == 0) break; 1549 - 1550 - UNPROTECT_CTX(ctx, flags); 1551 - 1552 - /* 1553 - * check non-blocking read 1554 - */ 1555 - ret = -EAGAIN; 1556 - if(filp->f_flags & O_NONBLOCK) break; 1557 - 1558 - /* 1559 - * check pending signals 1560 - */ 1561 - if(signal_pending(current)) { 1562 - ret = -EINTR; 1563 - break; 1564 - } 1565 - /* 1566 - * no message, so wait 1567 - */ 1568 - schedule(); 1569 - 1570 - PROTECT_CTX(ctx, flags); 1571 - } 1572 - DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); 1573 - set_current_state(TASK_RUNNING); 1574 - remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1575 - 1576 - if (ret < 0) goto abort; 1577 - 1578 - ret = -EINVAL; 1579 - msg = pfm_get_next_msg(ctx); 1580 - if (msg == NULL) { 1581 - printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); 1582 - goto abort_locked; 1583 - } 1584 - 1585 - DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); 1586 - 1587 - ret = -EFAULT; 1588 - if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); 1589 - 1590 - abort_locked: 1591 - UNPROTECT_CTX(ctx, flags); 1592 - abort: 1593 - return ret; 1594 - } 1595 - 1596 - static ssize_t 1597 - pfm_write(struct file *file, const char __user *ubuf, 1598 - size_t size, loff_t *ppos) 1599 - { 1600 - DPRINT(("pfm_write called\n")); 1601 - return -EINVAL; 1602 - } 1603 - 1604 - static __poll_t 1605 - pfm_poll(struct file *filp, poll_table * wait) 1606 - { 1607 - pfm_context_t *ctx; 1608 - unsigned long flags; 1609 - __poll_t mask = 0; 1610 - 1611 - if (PFM_IS_FILE(filp) == 0) { 1612 - printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1613 - return 0; 1614 - } 1615 - 1616 - ctx = filp->private_data; 1617 - if (ctx == NULL) { 1618 - printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1619 - return 0; 1620 - } 1621 - 1622 - 1623 - DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); 1624 - 1625 - poll_wait(filp, &ctx->ctx_msgq_wait, wait); 1626 - 1627 - PROTECT_CTX(ctx, flags); 1628 - 1629 - if (PFM_CTXQ_EMPTY(ctx) == 0) 1630 - mask = EPOLLIN | EPOLLRDNORM; 1631 - 1632 - UNPROTECT_CTX(ctx, flags); 1633 - 1634 - DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); 1635 - 1636 - return mask; 1637 - } 1638 - 1639 - static long 1640 - pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1641 - { 1642 - DPRINT(("pfm_ioctl called\n")); 1643 - return -EINVAL; 1644 - } 1645 - 1646 - /* 1647 - * interrupt cannot be masked when coming here 1648 - */ 1649 - static inline int 1650 - pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) 1651 - { 1652 - int ret; 1653 - 1654 - ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1655 - 1656 - DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1657 - task_pid_nr(current), 1658 - fd, 1659 - on, 1660 - ctx->ctx_async_queue, ret)); 1661 - 1662 - return ret; 1663 - } 1664 - 1665 - static int 1666 - pfm_fasync(int fd, struct file *filp, int on) 1667 - { 1668 - pfm_context_t *ctx; 1669 - int ret; 1670 - 1671 - if (PFM_IS_FILE(filp) == 0) { 1672 - printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); 1673 - return -EBADF; 1674 - } 1675 - 1676 - ctx = filp->private_data; 1677 - if (ctx == NULL) { 1678 - printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1679 - return -EBADF; 1680 - } 1681 - /* 1682 - * we cannot mask interrupts during this call because this may 1683 - * may go to sleep if memory is not readily avalaible. 1684 - * 1685 - * We are protected from the conetxt disappearing by the get_fd()/put_fd() 1686 - * done in caller. Serialization of this function is ensured by caller. 1687 - */ 1688 - ret = pfm_do_fasync(fd, filp, ctx, on); 1689 - 1690 - 1691 - DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1692 - fd, 1693 - on, 1694 - ctx->ctx_async_queue, ret)); 1695 - 1696 - return ret; 1697 - } 1698 - 1699 - #ifdef CONFIG_SMP 1700 - /* 1701 - * this function is exclusively called from pfm_close(). 1702 - * The context is not protected at that time, nor are interrupts 1703 - * on the remote CPU. That's necessary to avoid deadlocks. 1704 - */ 1705 - static void 1706 - pfm_syswide_force_stop(void *info) 1707 - { 1708 - pfm_context_t *ctx = (pfm_context_t *)info; 1709 - struct pt_regs *regs = task_pt_regs(current); 1710 - struct task_struct *owner; 1711 - unsigned long flags; 1712 - int ret; 1713 - 1714 - if (ctx->ctx_cpu != smp_processor_id()) { 1715 - printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", 1716 - ctx->ctx_cpu, 1717 - smp_processor_id()); 1718 - return; 1719 - } 1720 - owner = GET_PMU_OWNER(); 1721 - if (owner != ctx->ctx_task) { 1722 - printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1723 - smp_processor_id(), 1724 - task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); 1725 - return; 1726 - } 1727 - if (GET_PMU_CTX() != ctx) { 1728 - printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", 1729 - smp_processor_id(), 1730 - GET_PMU_CTX(), ctx); 1731 - return; 1732 - } 1733 - 1734 - DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); 1735 - /* 1736 - * the context is already protected in pfm_close(), we simply 1737 - * need to mask interrupts to avoid a PMU interrupt race on 1738 - * this CPU 1739 - */ 1740 - local_irq_save(flags); 1741 - 1742 - ret = pfm_context_unload(ctx, NULL, 0, regs); 1743 - if (ret) { 1744 - DPRINT(("context_unload returned %d\n", ret)); 1745 - } 1746 - 1747 - /* 1748 - * unmask interrupts, PMU interrupts are now spurious here 1749 - */ 1750 - local_irq_restore(flags); 1751 - } 1752 - 1753 - static void 1754 - pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) 1755 - { 1756 - int ret; 1757 - 1758 - DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); 1759 - ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); 1760 - DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); 1761 - } 1762 - #endif /* CONFIG_SMP */ 1763 - 1764 - /* 1765 - * called for each close(). Partially free resources. 1766 - * When caller is self-monitoring, the context is unloaded. 1767 - */ 1768 - static int 1769 - pfm_flush(struct file *filp, fl_owner_t id) 1770 - { 1771 - pfm_context_t *ctx; 1772 - struct task_struct *task; 1773 - struct pt_regs *regs; 1774 - unsigned long flags; 1775 - unsigned long smpl_buf_size = 0UL; 1776 - void *smpl_buf_vaddr = NULL; 1777 - int state, is_system; 1778 - 1779 - if (PFM_IS_FILE(filp) == 0) { 1780 - DPRINT(("bad magic for\n")); 1781 - return -EBADF; 1782 - } 1783 - 1784 - ctx = filp->private_data; 1785 - if (ctx == NULL) { 1786 - printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1787 - return -EBADF; 1788 - } 1789 - 1790 - /* 1791 - * remove our file from the async queue, if we use this mode. 1792 - * This can be done without the context being protected. We come 1793 - * here when the context has become unreachable by other tasks. 1794 - * 1795 - * We may still have active monitoring at this point and we may 1796 - * end up in pfm_overflow_handler(). However, fasync_helper() 1797 - * operates with interrupts disabled and it cleans up the 1798 - * queue. If the PMU handler is called prior to entering 1799 - * fasync_helper() then it will send a signal. If it is 1800 - * invoked after, it will find an empty queue and no 1801 - * signal will be sent. In both case, we are safe 1802 - */ 1803 - PROTECT_CTX(ctx, flags); 1804 - 1805 - state = ctx->ctx_state; 1806 - is_system = ctx->ctx_fl_system; 1807 - 1808 - task = PFM_CTX_TASK(ctx); 1809 - regs = task_pt_regs(task); 1810 - 1811 - DPRINT(("ctx_state=%d is_current=%d\n", 1812 - state, 1813 - task == current ? 1 : 0)); 1814 - 1815 - /* 1816 - * if state == UNLOADED, then task is NULL 1817 - */ 1818 - 1819 - /* 1820 - * we must stop and unload because we are losing access to the context. 1821 - */ 1822 - if (task == current) { 1823 - #ifdef CONFIG_SMP 1824 - /* 1825 - * the task IS the owner but it migrated to another CPU: that's bad 1826 - * but we must handle this cleanly. Unfortunately, the kernel does 1827 - * not provide a mechanism to block migration (while the context is loaded). 1828 - * 1829 - * We need to release the resource on the ORIGINAL cpu. 1830 - */ 1831 - if (is_system && ctx->ctx_cpu != smp_processor_id()) { 1832 - 1833 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 1834 - /* 1835 - * keep context protected but unmask interrupt for IPI 1836 - */ 1837 - local_irq_restore(flags); 1838 - 1839 - pfm_syswide_cleanup_other_cpu(ctx); 1840 - 1841 - /* 1842 - * restore interrupt masking 1843 - */ 1844 - local_irq_save(flags); 1845 - 1846 - /* 1847 - * context is unloaded at this point 1848 - */ 1849 - } else 1850 - #endif /* CONFIG_SMP */ 1851 - { 1852 - 1853 - DPRINT(("forcing unload\n")); 1854 - /* 1855 - * stop and unload, returning with state UNLOADED 1856 - * and session unreserved. 1857 - */ 1858 - pfm_context_unload(ctx, NULL, 0, regs); 1859 - 1860 - DPRINT(("ctx_state=%d\n", ctx->ctx_state)); 1861 - } 1862 - } 1863 - 1864 - /* 1865 - * remove virtual mapping, if any, for the calling task. 1866 - * cannot reset ctx field until last user is calling close(). 1867 - * 1868 - * ctx_smpl_vaddr must never be cleared because it is needed 1869 - * by every task with access to the context 1870 - * 1871 - * When called from do_exit(), the mm context is gone already, therefore 1872 - * mm is NULL, i.e., the VMA is already gone and we do not have to 1873 - * do anything here 1874 - */ 1875 - if (ctx->ctx_smpl_vaddr && current->mm) { 1876 - smpl_buf_vaddr = ctx->ctx_smpl_vaddr; 1877 - smpl_buf_size = ctx->ctx_smpl_size; 1878 - } 1879 - 1880 - UNPROTECT_CTX(ctx, flags); 1881 - 1882 - /* 1883 - * if there was a mapping, then we systematically remove it 1884 - * at this point. Cannot be done inside critical section 1885 - * because some VM function reenables interrupts. 1886 - * 1887 - */ 1888 - if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); 1889 - 1890 - return 0; 1891 - } 1892 - /* 1893 - * called either on explicit close() or from exit_files(). 1894 - * Only the LAST user of the file gets to this point, i.e., it is 1895 - * called only ONCE. 1896 - * 1897 - * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 1898 - * (fput()),i.e, last task to access the file. Nobody else can access the 1899 - * file at this point. 1900 - * 1901 - * When called from exit_files(), the VMA has been freed because exit_mm() 1902 - * is executed before exit_files(). 1903 - * 1904 - * When called from exit_files(), the current task is not yet ZOMBIE but we 1905 - * flush the PMU state to the context. 1906 - */ 1907 - static int 1908 - pfm_close(struct inode *inode, struct file *filp) 1909 - { 1910 - pfm_context_t *ctx; 1911 - struct task_struct *task; 1912 - struct pt_regs *regs; 1913 - DECLARE_WAITQUEUE(wait, current); 1914 - unsigned long flags; 1915 - unsigned long smpl_buf_size = 0UL; 1916 - void *smpl_buf_addr = NULL; 1917 - int free_possible = 1; 1918 - int state, is_system; 1919 - 1920 - DPRINT(("pfm_close called private=%p\n", filp->private_data)); 1921 - 1922 - if (PFM_IS_FILE(filp) == 0) { 1923 - DPRINT(("bad magic\n")); 1924 - return -EBADF; 1925 - } 1926 - 1927 - ctx = filp->private_data; 1928 - if (ctx == NULL) { 1929 - printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1930 - return -EBADF; 1931 - } 1932 - 1933 - PROTECT_CTX(ctx, flags); 1934 - 1935 - state = ctx->ctx_state; 1936 - is_system = ctx->ctx_fl_system; 1937 - 1938 - task = PFM_CTX_TASK(ctx); 1939 - regs = task_pt_regs(task); 1940 - 1941 - DPRINT(("ctx_state=%d is_current=%d\n", 1942 - state, 1943 - task == current ? 1 : 0)); 1944 - 1945 - /* 1946 - * if task == current, then pfm_flush() unloaded the context 1947 - */ 1948 - if (state == PFM_CTX_UNLOADED) goto doit; 1949 - 1950 - /* 1951 - * context is loaded/masked and task != current, we need to 1952 - * either force an unload or go zombie 1953 - */ 1954 - 1955 - /* 1956 - * The task is currently blocked or will block after an overflow. 1957 - * we must force it to wakeup to get out of the 1958 - * MASKED state and transition to the unloaded state by itself. 1959 - * 1960 - * This situation is only possible for per-task mode 1961 - */ 1962 - if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { 1963 - 1964 - /* 1965 - * set a "partial" zombie state to be checked 1966 - * upon return from down() in pfm_handle_work(). 1967 - * 1968 - * We cannot use the ZOMBIE state, because it is checked 1969 - * by pfm_load_regs() which is called upon wakeup from down(). 1970 - * In such case, it would free the context and then we would 1971 - * return to pfm_handle_work() which would access the 1972 - * stale context. Instead, we set a flag invisible to pfm_load_regs() 1973 - * but visible to pfm_handle_work(). 1974 - * 1975 - * For some window of time, we have a zombie context with 1976 - * ctx_state = MASKED and not ZOMBIE 1977 - */ 1978 - ctx->ctx_fl_going_zombie = 1; 1979 - 1980 - /* 1981 - * force task to wake up from MASKED state 1982 - */ 1983 - complete(&ctx->ctx_restart_done); 1984 - 1985 - DPRINT(("waking up ctx_state=%d\n", state)); 1986 - 1987 - /* 1988 - * put ourself to sleep waiting for the other 1989 - * task to report completion 1990 - * 1991 - * the context is protected by mutex, therefore there 1992 - * is no risk of being notified of completion before 1993 - * begin actually on the waitq. 1994 - */ 1995 - set_current_state(TASK_INTERRUPTIBLE); 1996 - add_wait_queue(&ctx->ctx_zombieq, &wait); 1997 - 1998 - UNPROTECT_CTX(ctx, flags); 1999 - 2000 - /* 2001 - * XXX: check for signals : 2002 - * - ok for explicit close 2003 - * - not ok when coming from exit_files() 2004 - */ 2005 - schedule(); 2006 - 2007 - 2008 - PROTECT_CTX(ctx, flags); 2009 - 2010 - 2011 - remove_wait_queue(&ctx->ctx_zombieq, &wait); 2012 - set_current_state(TASK_RUNNING); 2013 - 2014 - /* 2015 - * context is unloaded at this point 2016 - */ 2017 - DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); 2018 - } 2019 - else if (task != current) { 2020 - #ifdef CONFIG_SMP 2021 - /* 2022 - * switch context to zombie state 2023 - */ 2024 - ctx->ctx_state = PFM_CTX_ZOMBIE; 2025 - 2026 - DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); 2027 - /* 2028 - * cannot free the context on the spot. deferred until 2029 - * the task notices the ZOMBIE state 2030 - */ 2031 - free_possible = 0; 2032 - #else 2033 - pfm_context_unload(ctx, NULL, 0, regs); 2034 - #endif 2035 - } 2036 - 2037 - doit: 2038 - /* reload state, may have changed during opening of critical section */ 2039 - state = ctx->ctx_state; 2040 - 2041 - /* 2042 - * the context is still attached to a task (possibly current) 2043 - * we cannot destroy it right now 2044 - */ 2045 - 2046 - /* 2047 - * we must free the sampling buffer right here because 2048 - * we cannot rely on it being cleaned up later by the 2049 - * monitored task. It is not possible to free vmalloc'ed 2050 - * memory in pfm_load_regs(). Instead, we remove the buffer 2051 - * now. should there be subsequent PMU overflow originally 2052 - * meant for sampling, the will be converted to spurious 2053 - * and that's fine because the monitoring tools is gone anyway. 2054 - */ 2055 - if (ctx->ctx_smpl_hdr) { 2056 - smpl_buf_addr = ctx->ctx_smpl_hdr; 2057 - smpl_buf_size = ctx->ctx_smpl_size; 2058 - /* no more sampling */ 2059 - ctx->ctx_smpl_hdr = NULL; 2060 - ctx->ctx_fl_is_sampling = 0; 2061 - } 2062 - 2063 - DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", 2064 - state, 2065 - free_possible, 2066 - smpl_buf_addr, 2067 - smpl_buf_size)); 2068 - 2069 - if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); 2070 - 2071 - /* 2072 - * UNLOADED that the session has already been unreserved. 2073 - */ 2074 - if (state == PFM_CTX_ZOMBIE) { 2075 - pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); 2076 - } 2077 - 2078 - /* 2079 - * disconnect file descriptor from context must be done 2080 - * before we unlock. 2081 - */ 2082 - filp->private_data = NULL; 2083 - 2084 - /* 2085 - * if we free on the spot, the context is now completely unreachable 2086 - * from the callers side. The monitored task side is also cut, so we 2087 - * can freely cut. 2088 - * 2089 - * If we have a deferred free, only the caller side is disconnected. 2090 - */ 2091 - UNPROTECT_CTX(ctx, flags); 2092 - 2093 - /* 2094 - * All memory free operations (especially for vmalloc'ed memory) 2095 - * MUST be done with interrupts ENABLED. 2096 - */ 2097 - vfree(smpl_buf_addr); 2098 - 2099 - /* 2100 - * return the memory used by the context 2101 - */ 2102 - if (free_possible) pfm_context_free(ctx); 2103 - 2104 - return 0; 2105 - } 2106 - 2107 - static const struct file_operations pfm_file_ops = { 2108 - .llseek = no_llseek, 2109 - .read = pfm_read, 2110 - .write = pfm_write, 2111 - .poll = pfm_poll, 2112 - .unlocked_ioctl = pfm_ioctl, 2113 - .fasync = pfm_fasync, 2114 - .release = pfm_close, 2115 - .flush = pfm_flush 2116 - }; 2117 - 2118 - static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) 2119 - { 2120 - return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", 2121 - d_inode(dentry)->i_ino); 2122 - } 2123 - 2124 - static const struct dentry_operations pfmfs_dentry_operations = { 2125 - .d_delete = always_delete_dentry, 2126 - .d_dname = pfmfs_dname, 2127 - }; 2128 - 2129 - 2130 - static struct file * 2131 - pfm_alloc_file(pfm_context_t *ctx) 2132 - { 2133 - struct file *file; 2134 - struct inode *inode; 2135 - struct path path; 2136 - struct qstr this = { .name = "" }; 2137 - 2138 - /* 2139 - * allocate a new inode 2140 - */ 2141 - inode = new_inode(pfmfs_mnt->mnt_sb); 2142 - if (!inode) 2143 - return ERR_PTR(-ENOMEM); 2144 - 2145 - DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); 2146 - 2147 - inode->i_mode = S_IFCHR|S_IRUGO; 2148 - inode->i_uid = current_fsuid(); 2149 - inode->i_gid = current_fsgid(); 2150 - 2151 - /* 2152 - * allocate a new dcache entry 2153 - */ 2154 - path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); 2155 - if (!path.dentry) { 2156 - iput(inode); 2157 - return ERR_PTR(-ENOMEM); 2158 - } 2159 - path.mnt = mntget(pfmfs_mnt); 2160 - 2161 - d_add(path.dentry, inode); 2162 - 2163 - file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2164 - if (IS_ERR(file)) { 2165 - path_put(&path); 2166 - return file; 2167 - } 2168 - 2169 - file->f_flags = O_RDONLY; 2170 - file->private_data = ctx; 2171 - 2172 - return file; 2173 - } 2174 - 2175 - static int 2176 - pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) 2177 - { 2178 - DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); 2179 - 2180 - while (size > 0) { 2181 - unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; 2182 - 2183 - 2184 - if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) 2185 - return -ENOMEM; 2186 - 2187 - addr += PAGE_SIZE; 2188 - buf += PAGE_SIZE; 2189 - size -= PAGE_SIZE; 2190 - } 2191 - return 0; 2192 - } 2193 - 2194 - /* 2195 - * allocate a sampling buffer and remaps it into the user address space of the task 2196 - */ 2197 - static int 2198 - pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) 2199 - { 2200 - struct mm_struct *mm = task->mm; 2201 - struct vm_area_struct *vma = NULL; 2202 - unsigned long size; 2203 - void *smpl_buf; 2204 - 2205 - 2206 - /* 2207 - * the fixed header + requested size and align to page boundary 2208 - */ 2209 - size = PAGE_ALIGN(rsize); 2210 - 2211 - DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); 2212 - 2213 - /* 2214 - * check requested size to avoid Denial-of-service attacks 2215 - * XXX: may have to refine this test 2216 - * Check against address space limit. 2217 - * 2218 - * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) 2219 - * return -ENOMEM; 2220 - */ 2221 - if (size > task_rlimit(task, RLIMIT_MEMLOCK)) 2222 - return -ENOMEM; 2223 - 2224 - /* 2225 - * We do the easy to undo allocations first. 2226 - */ 2227 - smpl_buf = vzalloc(size); 2228 - if (smpl_buf == NULL) { 2229 - DPRINT(("Can't allocate sampling buffer\n")); 2230 - return -ENOMEM; 2231 - } 2232 - 2233 - DPRINT(("smpl_buf @%p\n", smpl_buf)); 2234 - 2235 - /* allocate vma */ 2236 - vma = vm_area_alloc(mm); 2237 - if (!vma) { 2238 - DPRINT(("Cannot allocate vma\n")); 2239 - goto error_kmem; 2240 - } 2241 - 2242 - /* 2243 - * partially initialize the vma for the sampling buffer 2244 - */ 2245 - vma->vm_file = get_file(filp); 2246 - vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; 2247 - vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2248 - 2249 - /* 2250 - * Now we have everything we need and we can initialize 2251 - * and connect all the data structures 2252 - */ 2253 - 2254 - ctx->ctx_smpl_hdr = smpl_buf; 2255 - ctx->ctx_smpl_size = size; /* aligned size */ 2256 - 2257 - /* 2258 - * Let's do the difficult operations next. 2259 - * 2260 - * now we atomically find some area in the address space and 2261 - * remap the buffer in it. 2262 - */ 2263 - mmap_write_lock(task->mm); 2264 - 2265 - /* find some free area in address space, must have mmap sem held */ 2266 - vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); 2267 - if (IS_ERR_VALUE(vma->vm_start)) { 2268 - DPRINT(("Cannot find unmapped area for size %ld\n", size)); 2269 - mmap_write_unlock(task->mm); 2270 - goto error; 2271 - } 2272 - vma->vm_end = vma->vm_start + size; 2273 - vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 2274 - 2275 - DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); 2276 - 2277 - /* can only be applied to current task, need to have the mm semaphore held when called */ 2278 - if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { 2279 - DPRINT(("Can't remap buffer\n")); 2280 - mmap_write_unlock(task->mm); 2281 - goto error; 2282 - } 2283 - 2284 - /* 2285 - * now insert the vma in the vm list for the process, must be 2286 - * done with mmap lock held 2287 - */ 2288 - insert_vm_struct(mm, vma); 2289 - 2290 - vm_stat_account(vma->vm_mm, vma->vm_flags, vma_pages(vma)); 2291 - mmap_write_unlock(task->mm); 2292 - 2293 - /* 2294 - * keep track of user level virtual address 2295 - */ 2296 - ctx->ctx_smpl_vaddr = (void *)vma->vm_start; 2297 - *(unsigned long *)user_vaddr = vma->vm_start; 2298 - 2299 - return 0; 2300 - 2301 - error: 2302 - vm_area_free(vma); 2303 - error_kmem: 2304 - vfree(smpl_buf); 2305 - 2306 - return -ENOMEM; 2307 - } 2308 - 2309 - /* 2310 - * XXX: do something better here 2311 - */ 2312 - static int 2313 - pfm_bad_permissions(struct task_struct *task) 2314 - { 2315 - const struct cred *tcred; 2316 - kuid_t uid = current_uid(); 2317 - kgid_t gid = current_gid(); 2318 - int ret; 2319 - 2320 - rcu_read_lock(); 2321 - tcred = __task_cred(task); 2322 - 2323 - /* inspired by ptrace_attach() */ 2324 - DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", 2325 - from_kuid(&init_user_ns, uid), 2326 - from_kgid(&init_user_ns, gid), 2327 - from_kuid(&init_user_ns, tcred->euid), 2328 - from_kuid(&init_user_ns, tcred->suid), 2329 - from_kuid(&init_user_ns, tcred->uid), 2330 - from_kgid(&init_user_ns, tcred->egid), 2331 - from_kgid(&init_user_ns, tcred->sgid))); 2332 - 2333 - ret = ((!uid_eq(uid, tcred->euid)) 2334 - || (!uid_eq(uid, tcred->suid)) 2335 - || (!uid_eq(uid, tcred->uid)) 2336 - || (!gid_eq(gid, tcred->egid)) 2337 - || (!gid_eq(gid, tcred->sgid)) 2338 - || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); 2339 - 2340 - rcu_read_unlock(); 2341 - return ret; 2342 - } 2343 - 2344 - static int 2345 - pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) 2346 - { 2347 - int ctx_flags; 2348 - 2349 - /* valid signal */ 2350 - 2351 - ctx_flags = pfx->ctx_flags; 2352 - 2353 - if (ctx_flags & PFM_FL_SYSTEM_WIDE) { 2354 - 2355 - /* 2356 - * cannot block in this mode 2357 - */ 2358 - if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { 2359 - DPRINT(("cannot use blocking mode when in system wide monitoring\n")); 2360 - return -EINVAL; 2361 - } 2362 - } else { 2363 - } 2364 - /* probably more to add here */ 2365 - 2366 - return 0; 2367 - } 2368 - 2369 - static int 2370 - pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, 2371 - unsigned int cpu, pfarg_context_t *arg) 2372 - { 2373 - pfm_buffer_fmt_t *fmt = NULL; 2374 - unsigned long size = 0UL; 2375 - void *uaddr = NULL; 2376 - void *fmt_arg = NULL; 2377 - int ret = 0; 2378 - #define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) 2379 - 2380 - /* invoke and lock buffer format, if found */ 2381 - fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2382 - if (fmt == NULL) { 2383 - DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); 2384 - return -EINVAL; 2385 - } 2386 - 2387 - /* 2388 - * buffer argument MUST be contiguous to pfarg_context_t 2389 - */ 2390 - if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); 2391 - 2392 - ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2393 - 2394 - DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); 2395 - 2396 - if (ret) goto error; 2397 - 2398 - /* link buffer format and context */ 2399 - ctx->ctx_buf_fmt = fmt; 2400 - ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ 2401 - 2402 - /* 2403 - * check if buffer format wants to use perfmon buffer allocation/mapping service 2404 - */ 2405 - ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); 2406 - if (ret) goto error; 2407 - 2408 - if (size) { 2409 - /* 2410 - * buffer is always remapped into the caller's address space 2411 - */ 2412 - ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); 2413 - if (ret) goto error; 2414 - 2415 - /* keep track of user address of buffer */ 2416 - arg->ctx_smpl_vaddr = uaddr; 2417 - } 2418 - ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); 2419 - 2420 - error: 2421 - return ret; 2422 - } 2423 - 2424 - static void 2425 - pfm_reset_pmu_state(pfm_context_t *ctx) 2426 - { 2427 - int i; 2428 - 2429 - /* 2430 - * install reset values for PMC. 2431 - */ 2432 - for (i=1; PMC_IS_LAST(i) == 0; i++) { 2433 - if (PMC_IS_IMPL(i) == 0) continue; 2434 - ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); 2435 - DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); 2436 - } 2437 - /* 2438 - * PMD registers are set to 0UL when the context in memset() 2439 - */ 2440 - 2441 - /* 2442 - * On context switched restore, we must restore ALL pmc and ALL pmd even 2443 - * when they are not actively used by the task. In UP, the incoming process 2444 - * may otherwise pick up left over PMC, PMD state from the previous process. 2445 - * As opposed to PMD, stale PMC can cause harm to the incoming 2446 - * process because they may change what is being measured. 2447 - * Therefore, we must systematically reinstall the entire 2448 - * PMC state. In SMP, the same thing is possible on the 2449 - * same CPU but also on between 2 CPUs. 2450 - * 2451 - * The problem with PMD is information leaking especially 2452 - * to user level when psr.sp=0 2453 - * 2454 - * There is unfortunately no easy way to avoid this problem 2455 - * on either UP or SMP. This definitively slows down the 2456 - * pfm_load_regs() function. 2457 - */ 2458 - 2459 - /* 2460 - * bitmask of all PMCs accessible to this context 2461 - * 2462 - * PMC0 is treated differently. 2463 - */ 2464 - ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; 2465 - 2466 - /* 2467 - * bitmask of all PMDs that are accessible to this context 2468 - */ 2469 - ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; 2470 - 2471 - DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); 2472 - 2473 - /* 2474 - * useful in case of re-enable after disable 2475 - */ 2476 - ctx->ctx_used_ibrs[0] = 0UL; 2477 - ctx->ctx_used_dbrs[0] = 0UL; 2478 - } 2479 - 2480 - static int 2481 - pfm_ctx_getsize(void *arg, size_t *sz) 2482 - { 2483 - pfarg_context_t *req = (pfarg_context_t *)arg; 2484 - pfm_buffer_fmt_t *fmt; 2485 - 2486 - *sz = 0; 2487 - 2488 - if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; 2489 - 2490 - fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); 2491 - if (fmt == NULL) { 2492 - DPRINT(("cannot find buffer format\n")); 2493 - return -EINVAL; 2494 - } 2495 - /* get just enough to copy in user parameters */ 2496 - *sz = fmt->fmt_arg_size; 2497 - DPRINT(("arg_size=%lu\n", *sz)); 2498 - 2499 - return 0; 2500 - } 2501 - 2502 - 2503 - 2504 - /* 2505 - * cannot attach if : 2506 - * - kernel task 2507 - * - task not owned by caller 2508 - * - task incompatible with context mode 2509 - */ 2510 - static int 2511 - pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) 2512 - { 2513 - /* 2514 - * no kernel task or task not owner by caller 2515 - */ 2516 - if (task->mm == NULL) { 2517 - DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); 2518 - return -EPERM; 2519 - } 2520 - if (pfm_bad_permissions(task)) { 2521 - DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); 2522 - return -EPERM; 2523 - } 2524 - /* 2525 - * cannot block in self-monitoring mode 2526 - */ 2527 - if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2528 - DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); 2529 - return -EINVAL; 2530 - } 2531 - 2532 - if (task->exit_state == EXIT_ZOMBIE) { 2533 - DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); 2534 - return -EBUSY; 2535 - } 2536 - 2537 - /* 2538 - * always ok for self 2539 - */ 2540 - if (task == current) return 0; 2541 - 2542 - if (!task_is_stopped_or_traced(task)) { 2543 - DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); 2544 - return -EBUSY; 2545 - } 2546 - /* 2547 - * make sure the task is off any CPU 2548 - */ 2549 - wait_task_inactive(task, 0); 2550 - 2551 - /* more to come... */ 2552 - 2553 - return 0; 2554 - } 2555 - 2556 - static int 2557 - pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) 2558 - { 2559 - struct task_struct *p = current; 2560 - int ret; 2561 - 2562 - /* XXX: need to add more checks here */ 2563 - if (pid < 2) return -EPERM; 2564 - 2565 - if (pid != task_pid_vnr(current)) { 2566 - /* make sure task cannot go away while we operate on it */ 2567 - p = find_get_task_by_vpid(pid); 2568 - if (!p) 2569 - return -ESRCH; 2570 - } 2571 - 2572 - ret = pfm_task_incompatible(ctx, p); 2573 - if (ret == 0) { 2574 - *task = p; 2575 - } else if (p != current) { 2576 - pfm_put_task(p); 2577 - } 2578 - return ret; 2579 - } 2580 - 2581 - 2582 - 2583 - static int 2584 - pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2585 - { 2586 - pfarg_context_t *req = (pfarg_context_t *)arg; 2587 - struct file *filp; 2588 - struct path path; 2589 - int ctx_flags; 2590 - int fd; 2591 - int ret; 2592 - 2593 - /* let's check the arguments first */ 2594 - ret = pfarg_is_sane(current, req); 2595 - if (ret < 0) 2596 - return ret; 2597 - 2598 - ctx_flags = req->ctx_flags; 2599 - 2600 - ret = -ENOMEM; 2601 - 2602 - fd = get_unused_fd_flags(0); 2603 - if (fd < 0) 2604 - return fd; 2605 - 2606 - ctx = pfm_context_alloc(ctx_flags); 2607 - if (!ctx) 2608 - goto error; 2609 - 2610 - filp = pfm_alloc_file(ctx); 2611 - if (IS_ERR(filp)) { 2612 - ret = PTR_ERR(filp); 2613 - goto error_file; 2614 - } 2615 - 2616 - req->ctx_fd = ctx->ctx_fd = fd; 2617 - 2618 - /* 2619 - * does the user want to sample? 2620 - */ 2621 - if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { 2622 - ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); 2623 - if (ret) 2624 - goto buffer_error; 2625 - } 2626 - 2627 - DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", 2628 - ctx, 2629 - ctx_flags, 2630 - ctx->ctx_fl_system, 2631 - ctx->ctx_fl_block, 2632 - ctx->ctx_fl_excl_idle, 2633 - ctx->ctx_fl_no_msg, 2634 - ctx->ctx_fd)); 2635 - 2636 - /* 2637 - * initialize soft PMU state 2638 - */ 2639 - pfm_reset_pmu_state(ctx); 2640 - 2641 - fd_install(fd, filp); 2642 - 2643 - return 0; 2644 - 2645 - buffer_error: 2646 - path = filp->f_path; 2647 - put_filp(filp); 2648 - path_put(&path); 2649 - 2650 - if (ctx->ctx_buf_fmt) { 2651 - pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); 2652 - } 2653 - error_file: 2654 - pfm_context_free(ctx); 2655 - 2656 - error: 2657 - put_unused_fd(fd); 2658 - return ret; 2659 - } 2660 - 2661 - static inline unsigned long 2662 - pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) 2663 - { 2664 - unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; 2665 - unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; 2666 - extern unsigned long carta_random32 (unsigned long seed); 2667 - 2668 - if (reg->flags & PFM_REGFL_RANDOM) { 2669 - new_seed = carta_random32(old_seed); 2670 - val -= (old_seed & mask); /* counter values are negative numbers! */ 2671 - if ((mask >> 32) != 0) 2672 - /* construct a full 64-bit random value: */ 2673 - new_seed |= carta_random32(old_seed >> 32) << 32; 2674 - reg->seed = new_seed; 2675 - } 2676 - reg->lval = val; 2677 - return val; 2678 - } 2679 - 2680 - static void 2681 - pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2682 - { 2683 - unsigned long mask = ovfl_regs[0]; 2684 - unsigned long reset_others = 0UL; 2685 - unsigned long val; 2686 - int i; 2687 - 2688 - /* 2689 - * now restore reset value on sampling overflowed counters 2690 - */ 2691 - mask >>= PMU_FIRST_COUNTER; 2692 - for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2693 - 2694 - if ((mask & 0x1UL) == 0UL) continue; 2695 - 2696 - ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2697 - reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2698 - 2699 - DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2700 - } 2701 - 2702 - /* 2703 - * Now take care of resetting the other registers 2704 - */ 2705 - for(i = 0; reset_others; i++, reset_others >>= 1) { 2706 - 2707 - if ((reset_others & 0x1) == 0) continue; 2708 - 2709 - ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2710 - 2711 - DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2712 - is_long_reset ? "long" : "short", i, val)); 2713 - } 2714 - } 2715 - 2716 - static void 2717 - pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2718 - { 2719 - unsigned long mask = ovfl_regs[0]; 2720 - unsigned long reset_others = 0UL; 2721 - unsigned long val; 2722 - int i; 2723 - 2724 - DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); 2725 - 2726 - if (ctx->ctx_state == PFM_CTX_MASKED) { 2727 - pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); 2728 - return; 2729 - } 2730 - 2731 - /* 2732 - * now restore reset value on sampling overflowed counters 2733 - */ 2734 - mask >>= PMU_FIRST_COUNTER; 2735 - for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2736 - 2737 - if ((mask & 0x1UL) == 0UL) continue; 2738 - 2739 - val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2740 - reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2741 - 2742 - DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2743 - 2744 - pfm_write_soft_counter(ctx, i, val); 2745 - } 2746 - 2747 - /* 2748 - * Now take care of resetting the other registers 2749 - */ 2750 - for(i = 0; reset_others; i++, reset_others >>= 1) { 2751 - 2752 - if ((reset_others & 0x1) == 0) continue; 2753 - 2754 - val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2755 - 2756 - if (PMD_IS_COUNTING(i)) { 2757 - pfm_write_soft_counter(ctx, i, val); 2758 - } else { 2759 - ia64_set_pmd(i, val); 2760 - } 2761 - DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2762 - is_long_reset ? "long" : "short", i, val)); 2763 - } 2764 - ia64_srlz_d(); 2765 - } 2766 - 2767 - static int 2768 - pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2769 - { 2770 - struct task_struct *task; 2771 - pfarg_reg_t *req = (pfarg_reg_t *)arg; 2772 - unsigned long value, pmc_pm; 2773 - unsigned long smpl_pmds, reset_pmds, impl_pmds; 2774 - unsigned int cnum, reg_flags, flags, pmc_type; 2775 - int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; 2776 - int is_monitor, is_counting, state; 2777 - int ret = -EINVAL; 2778 - pfm_reg_check_t wr_func; 2779 - #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) 2780 - 2781 - state = ctx->ctx_state; 2782 - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 2783 - is_system = ctx->ctx_fl_system; 2784 - task = ctx->ctx_task; 2785 - impl_pmds = pmu_conf->impl_pmds[0]; 2786 - 2787 - if (state == PFM_CTX_ZOMBIE) return -EINVAL; 2788 - 2789 - if (is_loaded) { 2790 - /* 2791 - * In system wide and when the context is loaded, access can only happen 2792 - * when the caller is running on the CPU being monitored by the session. 2793 - * It does not have to be the owner (ctx_task) of the context per se. 2794 - */ 2795 - if (is_system && ctx->ctx_cpu != smp_processor_id()) { 2796 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 2797 - return -EBUSY; 2798 - } 2799 - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 2800 - } 2801 - expert_mode = pfm_sysctl.expert_mode; 2802 - 2803 - for (i = 0; i < count; i++, req++) { 2804 - 2805 - cnum = req->reg_num; 2806 - reg_flags = req->reg_flags; 2807 - value = req->reg_value; 2808 - smpl_pmds = req->reg_smpl_pmds[0]; 2809 - reset_pmds = req->reg_reset_pmds[0]; 2810 - flags = 0; 2811 - 2812 - 2813 - if (cnum >= PMU_MAX_PMCS) { 2814 - DPRINT(("pmc%u is invalid\n", cnum)); 2815 - goto error; 2816 - } 2817 - 2818 - pmc_type = pmu_conf->pmc_desc[cnum].type; 2819 - pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; 2820 - is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; 2821 - is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; 2822 - 2823 - /* 2824 - * we reject all non implemented PMC as well 2825 - * as attempts to modify PMC[0-3] which are used 2826 - * as status registers by the PMU 2827 - */ 2828 - if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { 2829 - DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); 2830 - goto error; 2831 - } 2832 - wr_func = pmu_conf->pmc_desc[cnum].write_check; 2833 - /* 2834 - * If the PMC is a monitor, then if the value is not the default: 2835 - * - system-wide session: PMCx.pm=1 (privileged monitor) 2836 - * - per-task : PMCx.pm=0 (user monitor) 2837 - */ 2838 - if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { 2839 - DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", 2840 - cnum, 2841 - pmc_pm, 2842 - is_system)); 2843 - goto error; 2844 - } 2845 - 2846 - if (is_counting) { 2847 - /* 2848 - * enforce generation of overflow interrupt. Necessary on all 2849 - * CPUs. 2850 - */ 2851 - value |= 1 << PMU_PMC_OI; 2852 - 2853 - if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { 2854 - flags |= PFM_REGFL_OVFL_NOTIFY; 2855 - } 2856 - 2857 - if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; 2858 - 2859 - /* verify validity of smpl_pmds */ 2860 - if ((smpl_pmds & impl_pmds) != smpl_pmds) { 2861 - DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); 2862 - goto error; 2863 - } 2864 - 2865 - /* verify validity of reset_pmds */ 2866 - if ((reset_pmds & impl_pmds) != reset_pmds) { 2867 - DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); 2868 - goto error; 2869 - } 2870 - } else { 2871 - if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { 2872 - DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); 2873 - goto error; 2874 - } 2875 - /* eventid on non-counting monitors are ignored */ 2876 - } 2877 - 2878 - /* 2879 - * execute write checker, if any 2880 - */ 2881 - if (likely(expert_mode == 0 && wr_func)) { 2882 - ret = (*wr_func)(task, ctx, cnum, &value, regs); 2883 - if (ret) goto error; 2884 - ret = -EINVAL; 2885 - } 2886 - 2887 - /* 2888 - * no error on this register 2889 - */ 2890 - PFM_REG_RETFLAG_SET(req->reg_flags, 0); 2891 - 2892 - /* 2893 - * Now we commit the changes to the software state 2894 - */ 2895 - 2896 - /* 2897 - * update overflow information 2898 - */ 2899 - if (is_counting) { 2900 - /* 2901 - * full flag update each time a register is programmed 2902 - */ 2903 - ctx->ctx_pmds[cnum].flags = flags; 2904 - 2905 - ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; 2906 - ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; 2907 - ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; 2908 - 2909 - /* 2910 - * Mark all PMDS to be accessed as used. 2911 - * 2912 - * We do not keep track of PMC because we have to 2913 - * systematically restore ALL of them. 2914 - * 2915 - * We do not update the used_monitors mask, because 2916 - * if we have not programmed them, then will be in 2917 - * a quiescent state, therefore we will not need to 2918 - * mask/restore then when context is MASKED. 2919 - */ 2920 - CTX_USED_PMD(ctx, reset_pmds); 2921 - CTX_USED_PMD(ctx, smpl_pmds); 2922 - /* 2923 - * make sure we do not try to reset on 2924 - * restart because we have established new values 2925 - */ 2926 - if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 2927 - } 2928 - /* 2929 - * Needed in case the user does not initialize the equivalent 2930 - * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no 2931 - * possible leak here. 2932 - */ 2933 - CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); 2934 - 2935 - /* 2936 - * keep track of the monitor PMC that we are using. 2937 - * we save the value of the pmc in ctx_pmcs[] and if 2938 - * the monitoring is not stopped for the context we also 2939 - * place it in the saved state area so that it will be 2940 - * picked up later by the context switch code. 2941 - * 2942 - * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). 2943 - * 2944 - * The value in th_pmcs[] may be modified on overflow, i.e., when 2945 - * monitoring needs to be stopped. 2946 - */ 2947 - if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); 2948 - 2949 - /* 2950 - * update context state 2951 - */ 2952 - ctx->ctx_pmcs[cnum] = value; 2953 - 2954 - if (is_loaded) { 2955 - /* 2956 - * write thread state 2957 - */ 2958 - if (is_system == 0) ctx->th_pmcs[cnum] = value; 2959 - 2960 - /* 2961 - * write hardware register if we can 2962 - */ 2963 - if (can_access_pmu) { 2964 - ia64_set_pmc(cnum, value); 2965 - } 2966 - #ifdef CONFIG_SMP 2967 - else { 2968 - /* 2969 - * per-task SMP only here 2970 - * 2971 - * we are guaranteed that the task is not running on the other CPU, 2972 - * we indicate that this PMD will need to be reloaded if the task 2973 - * is rescheduled on the CPU it ran last on. 2974 - */ 2975 - ctx->ctx_reload_pmcs[0] |= 1UL << cnum; 2976 - } 2977 - #endif 2978 - } 2979 - 2980 - DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", 2981 - cnum, 2982 - value, 2983 - is_loaded, 2984 - can_access_pmu, 2985 - flags, 2986 - ctx->ctx_all_pmcs[0], 2987 - ctx->ctx_used_pmds[0], 2988 - ctx->ctx_pmds[cnum].eventid, 2989 - smpl_pmds, 2990 - reset_pmds, 2991 - ctx->ctx_reload_pmcs[0], 2992 - ctx->ctx_used_monitors[0], 2993 - ctx->ctx_ovfl_regs[0])); 2994 - } 2995 - 2996 - /* 2997 - * make sure the changes are visible 2998 - */ 2999 - if (can_access_pmu) ia64_srlz_d(); 3000 - 3001 - return 0; 3002 - error: 3003 - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3004 - return ret; 3005 - } 3006 - 3007 - static int 3008 - pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3009 - { 3010 - struct task_struct *task; 3011 - pfarg_reg_t *req = (pfarg_reg_t *)arg; 3012 - unsigned long value, hw_value, ovfl_mask; 3013 - unsigned int cnum; 3014 - int i, can_access_pmu = 0, state; 3015 - int is_counting, is_loaded, is_system, expert_mode; 3016 - int ret = -EINVAL; 3017 - pfm_reg_check_t wr_func; 3018 - 3019 - 3020 - state = ctx->ctx_state; 3021 - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3022 - is_system = ctx->ctx_fl_system; 3023 - ovfl_mask = pmu_conf->ovfl_val; 3024 - task = ctx->ctx_task; 3025 - 3026 - if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; 3027 - 3028 - /* 3029 - * on both UP and SMP, we can only write to the PMC when the task is 3030 - * the owner of the local PMU. 3031 - */ 3032 - if (likely(is_loaded)) { 3033 - /* 3034 - * In system wide and when the context is loaded, access can only happen 3035 - * when the caller is running on the CPU being monitored by the session. 3036 - * It does not have to be the owner (ctx_task) of the context per se. 3037 - */ 3038 - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3039 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3040 - return -EBUSY; 3041 - } 3042 - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3043 - } 3044 - expert_mode = pfm_sysctl.expert_mode; 3045 - 3046 - for (i = 0; i < count; i++, req++) { 3047 - 3048 - cnum = req->reg_num; 3049 - value = req->reg_value; 3050 - 3051 - if (!PMD_IS_IMPL(cnum)) { 3052 - DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); 3053 - goto abort_mission; 3054 - } 3055 - is_counting = PMD_IS_COUNTING(cnum); 3056 - wr_func = pmu_conf->pmd_desc[cnum].write_check; 3057 - 3058 - /* 3059 - * execute write checker, if any 3060 - */ 3061 - if (unlikely(expert_mode == 0 && wr_func)) { 3062 - unsigned long v = value; 3063 - 3064 - ret = (*wr_func)(task, ctx, cnum, &v, regs); 3065 - if (ret) goto abort_mission; 3066 - 3067 - value = v; 3068 - ret = -EINVAL; 3069 - } 3070 - 3071 - /* 3072 - * no error on this register 3073 - */ 3074 - PFM_REG_RETFLAG_SET(req->reg_flags, 0); 3075 - 3076 - /* 3077 - * now commit changes to software state 3078 - */ 3079 - hw_value = value; 3080 - 3081 - /* 3082 - * update virtualized (64bits) counter 3083 - */ 3084 - if (is_counting) { 3085 - /* 3086 - * write context state 3087 - */ 3088 - ctx->ctx_pmds[cnum].lval = value; 3089 - 3090 - /* 3091 - * when context is load we use the split value 3092 - */ 3093 - if (is_loaded) { 3094 - hw_value = value & ovfl_mask; 3095 - value = value & ~ovfl_mask; 3096 - } 3097 - } 3098 - /* 3099 - * update reset values (not just for counters) 3100 - */ 3101 - ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; 3102 - ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; 3103 - 3104 - /* 3105 - * update randomization parameters (not just for counters) 3106 - */ 3107 - ctx->ctx_pmds[cnum].seed = req->reg_random_seed; 3108 - ctx->ctx_pmds[cnum].mask = req->reg_random_mask; 3109 - 3110 - /* 3111 - * update context value 3112 - */ 3113 - ctx->ctx_pmds[cnum].val = value; 3114 - 3115 - /* 3116 - * Keep track of what we use 3117 - * 3118 - * We do not keep track of PMC because we have to 3119 - * systematically restore ALL of them. 3120 - */ 3121 - CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); 3122 - 3123 - /* 3124 - * mark this PMD register used as well 3125 - */ 3126 - CTX_USED_PMD(ctx, RDEP(cnum)); 3127 - 3128 - /* 3129 - * make sure we do not try to reset on 3130 - * restart because we have established new values 3131 - */ 3132 - if (is_counting && state == PFM_CTX_MASKED) { 3133 - ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3134 - } 3135 - 3136 - if (is_loaded) { 3137 - /* 3138 - * write thread state 3139 - */ 3140 - if (is_system == 0) ctx->th_pmds[cnum] = hw_value; 3141 - 3142 - /* 3143 - * write hardware register if we can 3144 - */ 3145 - if (can_access_pmu) { 3146 - ia64_set_pmd(cnum, hw_value); 3147 - } else { 3148 - #ifdef CONFIG_SMP 3149 - /* 3150 - * we are guaranteed that the task is not running on the other CPU, 3151 - * we indicate that this PMD will need to be reloaded if the task 3152 - * is rescheduled on the CPU it ran last on. 3153 - */ 3154 - ctx->ctx_reload_pmds[0] |= 1UL << cnum; 3155 - #endif 3156 - } 3157 - } 3158 - 3159 - DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " 3160 - "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", 3161 - cnum, 3162 - value, 3163 - is_loaded, 3164 - can_access_pmu, 3165 - hw_value, 3166 - ctx->ctx_pmds[cnum].val, 3167 - ctx->ctx_pmds[cnum].short_reset, 3168 - ctx->ctx_pmds[cnum].long_reset, 3169 - PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', 3170 - ctx->ctx_pmds[cnum].seed, 3171 - ctx->ctx_pmds[cnum].mask, 3172 - ctx->ctx_used_pmds[0], 3173 - ctx->ctx_pmds[cnum].reset_pmds[0], 3174 - ctx->ctx_reload_pmds[0], 3175 - ctx->ctx_all_pmds[0], 3176 - ctx->ctx_ovfl_regs[0])); 3177 - } 3178 - 3179 - /* 3180 - * make changes visible 3181 - */ 3182 - if (can_access_pmu) ia64_srlz_d(); 3183 - 3184 - return 0; 3185 - 3186 - abort_mission: 3187 - /* 3188 - * for now, we have only one possibility for error 3189 - */ 3190 - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3191 - return ret; 3192 - } 3193 - 3194 - /* 3195 - * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. 3196 - * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an 3197 - * interrupt is delivered during the call, it will be kept pending until we leave, making 3198 - * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are 3199 - * guaranteed to return consistent data to the user, it may simply be old. It is not 3200 - * trivial to treat the overflow while inside the call because you may end up in 3201 - * some module sampling buffer code causing deadlocks. 3202 - */ 3203 - static int 3204 - pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3205 - { 3206 - struct task_struct *task; 3207 - unsigned long val = 0UL, lval, ovfl_mask, sval; 3208 - pfarg_reg_t *req = (pfarg_reg_t *)arg; 3209 - unsigned int cnum, reg_flags = 0; 3210 - int i, can_access_pmu = 0, state; 3211 - int is_loaded, is_system, is_counting, expert_mode; 3212 - int ret = -EINVAL; 3213 - pfm_reg_check_t rd_func; 3214 - 3215 - /* 3216 - * access is possible when loaded only for 3217 - * self-monitoring tasks or in UP mode 3218 - */ 3219 - 3220 - state = ctx->ctx_state; 3221 - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3222 - is_system = ctx->ctx_fl_system; 3223 - ovfl_mask = pmu_conf->ovfl_val; 3224 - task = ctx->ctx_task; 3225 - 3226 - if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3227 - 3228 - if (likely(is_loaded)) { 3229 - /* 3230 - * In system wide and when the context is loaded, access can only happen 3231 - * when the caller is running on the CPU being monitored by the session. 3232 - * It does not have to be the owner (ctx_task) of the context per se. 3233 - */ 3234 - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3235 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3236 - return -EBUSY; 3237 - } 3238 - /* 3239 - * this can be true when not self-monitoring only in UP 3240 - */ 3241 - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3242 - 3243 - if (can_access_pmu) ia64_srlz_d(); 3244 - } 3245 - expert_mode = pfm_sysctl.expert_mode; 3246 - 3247 - DPRINT(("ld=%d apmu=%d ctx_state=%d\n", 3248 - is_loaded, 3249 - can_access_pmu, 3250 - state)); 3251 - 3252 - /* 3253 - * on both UP and SMP, we can only read the PMD from the hardware register when 3254 - * the task is the owner of the local PMU. 3255 - */ 3256 - 3257 - for (i = 0; i < count; i++, req++) { 3258 - 3259 - cnum = req->reg_num; 3260 - reg_flags = req->reg_flags; 3261 - 3262 - if (unlikely(!PMD_IS_IMPL(cnum))) goto error; 3263 - /* 3264 - * we can only read the register that we use. That includes 3265 - * the one we explicitly initialize AND the one we want included 3266 - * in the sampling buffer (smpl_regs). 3267 - * 3268 - * Having this restriction allows optimization in the ctxsw routine 3269 - * without compromising security (leaks) 3270 - */ 3271 - if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; 3272 - 3273 - sval = ctx->ctx_pmds[cnum].val; 3274 - lval = ctx->ctx_pmds[cnum].lval; 3275 - is_counting = PMD_IS_COUNTING(cnum); 3276 - 3277 - /* 3278 - * If the task is not the current one, then we check if the 3279 - * PMU state is still in the local live register due to lazy ctxsw. 3280 - * If true, then we read directly from the registers. 3281 - */ 3282 - if (can_access_pmu){ 3283 - val = ia64_get_pmd(cnum); 3284 - } else { 3285 - /* 3286 - * context has been saved 3287 - * if context is zombie, then task does not exist anymore. 3288 - * In this case, we use the full value saved in the context (pfm_flush_regs()). 3289 - */ 3290 - val = is_loaded ? ctx->th_pmds[cnum] : 0UL; 3291 - } 3292 - rd_func = pmu_conf->pmd_desc[cnum].read_check; 3293 - 3294 - if (is_counting) { 3295 - /* 3296 - * XXX: need to check for overflow when loaded 3297 - */ 3298 - val &= ovfl_mask; 3299 - val += sval; 3300 - } 3301 - 3302 - /* 3303 - * execute read checker, if any 3304 - */ 3305 - if (unlikely(expert_mode == 0 && rd_func)) { 3306 - unsigned long v = val; 3307 - ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); 3308 - if (ret) goto error; 3309 - val = v; 3310 - ret = -EINVAL; 3311 - } 3312 - 3313 - PFM_REG_RETFLAG_SET(reg_flags, 0); 3314 - 3315 - DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); 3316 - 3317 - /* 3318 - * update register return value, abort all if problem during copy. 3319 - * we only modify the reg_flags field. no check mode is fine because 3320 - * access has been verified upfront in sys_perfmonctl(). 3321 - */ 3322 - req->reg_value = val; 3323 - req->reg_flags = reg_flags; 3324 - req->reg_last_reset_val = lval; 3325 - } 3326 - 3327 - return 0; 3328 - 3329 - error: 3330 - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3331 - return ret; 3332 - } 3333 - 3334 - int 3335 - pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3336 - { 3337 - pfm_context_t *ctx; 3338 - 3339 - if (req == NULL) return -EINVAL; 3340 - 3341 - ctx = GET_PMU_CTX(); 3342 - 3343 - if (ctx == NULL) return -EINVAL; 3344 - 3345 - /* 3346 - * for now limit to current task, which is enough when calling 3347 - * from overflow handler 3348 - */ 3349 - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3350 - 3351 - return pfm_write_pmcs(ctx, req, nreq, regs); 3352 - } 3353 - EXPORT_SYMBOL(pfm_mod_write_pmcs); 3354 - 3355 - int 3356 - pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3357 - { 3358 - pfm_context_t *ctx; 3359 - 3360 - if (req == NULL) return -EINVAL; 3361 - 3362 - ctx = GET_PMU_CTX(); 3363 - 3364 - if (ctx == NULL) return -EINVAL; 3365 - 3366 - /* 3367 - * for now limit to current task, which is enough when calling 3368 - * from overflow handler 3369 - */ 3370 - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3371 - 3372 - return pfm_read_pmds(ctx, req, nreq, regs); 3373 - } 3374 - EXPORT_SYMBOL(pfm_mod_read_pmds); 3375 - 3376 - /* 3377 - * Only call this function when a process it trying to 3378 - * write the debug registers (reading is always allowed) 3379 - */ 3380 - int 3381 - pfm_use_debug_registers(struct task_struct *task) 3382 - { 3383 - pfm_context_t *ctx = task->thread.pfm_context; 3384 - unsigned long flags; 3385 - int ret = 0; 3386 - 3387 - if (pmu_conf->use_rr_dbregs == 0) return 0; 3388 - 3389 - DPRINT(("called for [%d]\n", task_pid_nr(task))); 3390 - 3391 - /* 3392 - * do it only once 3393 - */ 3394 - if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; 3395 - 3396 - /* 3397 - * Even on SMP, we do not need to use an atomic here because 3398 - * the only way in is via ptrace() and this is possible only when the 3399 - * process is stopped. Even in the case where the ctxsw out is not totally 3400 - * completed by the time we come here, there is no way the 'stopped' process 3401 - * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. 3402 - * So this is always safe. 3403 - */ 3404 - if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; 3405 - 3406 - LOCK_PFS(flags); 3407 - 3408 - /* 3409 - * We cannot allow setting breakpoints when system wide monitoring 3410 - * sessions are using the debug registers. 3411 - */ 3412 - if (pfm_sessions.pfs_sys_use_dbregs> 0) 3413 - ret = -1; 3414 - else 3415 - pfm_sessions.pfs_ptrace_use_dbregs++; 3416 - 3417 - DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3418 - pfm_sessions.pfs_ptrace_use_dbregs, 3419 - pfm_sessions.pfs_sys_use_dbregs, 3420 - task_pid_nr(task), ret)); 3421 - 3422 - UNLOCK_PFS(flags); 3423 - 3424 - return ret; 3425 - } 3426 - 3427 - /* 3428 - * This function is called for every task that exits with the 3429 - * IA64_THREAD_DBG_VALID set. This indicates a task which was 3430 - * able to use the debug registers for debugging purposes via 3431 - * ptrace(). Therefore we know it was not using them for 3432 - * performance monitoring, so we only decrement the number 3433 - * of "ptraced" debug register users to keep the count up to date 3434 - */ 3435 - int 3436 - pfm_release_debug_registers(struct task_struct *task) 3437 - { 3438 - unsigned long flags; 3439 - int ret; 3440 - 3441 - if (pmu_conf->use_rr_dbregs == 0) return 0; 3442 - 3443 - LOCK_PFS(flags); 3444 - if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3445 - printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); 3446 - ret = -1; 3447 - } else { 3448 - pfm_sessions.pfs_ptrace_use_dbregs--; 3449 - ret = 0; 3450 - } 3451 - UNLOCK_PFS(flags); 3452 - 3453 - return ret; 3454 - } 3455 - 3456 - static int 3457 - pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3458 - { 3459 - struct task_struct *task; 3460 - pfm_buffer_fmt_t *fmt; 3461 - pfm_ovfl_ctrl_t rst_ctrl; 3462 - int state, is_system; 3463 - int ret = 0; 3464 - 3465 - state = ctx->ctx_state; 3466 - fmt = ctx->ctx_buf_fmt; 3467 - is_system = ctx->ctx_fl_system; 3468 - task = PFM_CTX_TASK(ctx); 3469 - 3470 - switch(state) { 3471 - case PFM_CTX_MASKED: 3472 - break; 3473 - case PFM_CTX_LOADED: 3474 - if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; 3475 - fallthrough; 3476 - case PFM_CTX_UNLOADED: 3477 - case PFM_CTX_ZOMBIE: 3478 - DPRINT(("invalid state=%d\n", state)); 3479 - return -EBUSY; 3480 - default: 3481 - DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); 3482 - return -EINVAL; 3483 - } 3484 - 3485 - /* 3486 - * In system wide and when the context is loaded, access can only happen 3487 - * when the caller is running on the CPU being monitored by the session. 3488 - * It does not have to be the owner (ctx_task) of the context per se. 3489 - */ 3490 - if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3491 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3492 - return -EBUSY; 3493 - } 3494 - 3495 - /* sanity check */ 3496 - if (unlikely(task == NULL)) { 3497 - printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); 3498 - return -EINVAL; 3499 - } 3500 - 3501 - if (task == current || is_system) { 3502 - 3503 - fmt = ctx->ctx_buf_fmt; 3504 - 3505 - DPRINT(("restarting self %d ovfl=0x%lx\n", 3506 - task_pid_nr(task), 3507 - ctx->ctx_ovfl_regs[0])); 3508 - 3509 - if (CTX_HAS_SMPL(ctx)) { 3510 - 3511 - prefetch(ctx->ctx_smpl_hdr); 3512 - 3513 - rst_ctrl.bits.mask_monitoring = 0; 3514 - rst_ctrl.bits.reset_ovfl_pmds = 0; 3515 - 3516 - if (state == PFM_CTX_LOADED) 3517 - ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3518 - else 3519 - ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3520 - } else { 3521 - rst_ctrl.bits.mask_monitoring = 0; 3522 - rst_ctrl.bits.reset_ovfl_pmds = 1; 3523 - } 3524 - 3525 - if (ret == 0) { 3526 - if (rst_ctrl.bits.reset_ovfl_pmds) 3527 - pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3528 - 3529 - if (rst_ctrl.bits.mask_monitoring == 0) { 3530 - DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); 3531 - 3532 - if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3533 - } else { 3534 - DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); 3535 - 3536 - // cannot use pfm_stop_monitoring(task, regs); 3537 - } 3538 - } 3539 - /* 3540 - * clear overflowed PMD mask to remove any stale information 3541 - */ 3542 - ctx->ctx_ovfl_regs[0] = 0UL; 3543 - 3544 - /* 3545 - * back to LOADED state 3546 - */ 3547 - ctx->ctx_state = PFM_CTX_LOADED; 3548 - 3549 - /* 3550 - * XXX: not really useful for self monitoring 3551 - */ 3552 - ctx->ctx_fl_can_restart = 0; 3553 - 3554 - return 0; 3555 - } 3556 - 3557 - /* 3558 - * restart another task 3559 - */ 3560 - 3561 - /* 3562 - * When PFM_CTX_MASKED, we cannot issue a restart before the previous 3563 - * one is seen by the task. 3564 - */ 3565 - if (state == PFM_CTX_MASKED) { 3566 - if (ctx->ctx_fl_can_restart == 0) return -EINVAL; 3567 - /* 3568 - * will prevent subsequent restart before this one is 3569 - * seen by other task 3570 - */ 3571 - ctx->ctx_fl_can_restart = 0; 3572 - } 3573 - 3574 - /* 3575 - * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. 3576 - * the task is blocked or on its way to block. That's the normal 3577 - * restart path. If the monitoring is not masked, then the task 3578 - * can be actively monitoring and we cannot directly intervene. 3579 - * Therefore we use the trap mechanism to catch the task and 3580 - * force it to reset the buffer/reset PMDs. 3581 - * 3582 - * if non-blocking, then we ensure that the task will go into 3583 - * pfm_handle_work() before returning to user mode. 3584 - * 3585 - * We cannot explicitly reset another task, it MUST always 3586 - * be done by the task itself. This works for system wide because 3587 - * the tool that is controlling the session is logically doing 3588 - * "self-monitoring". 3589 - */ 3590 - if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3591 - DPRINT(("unblocking [%d]\n", task_pid_nr(task))); 3592 - complete(&ctx->ctx_restart_done); 3593 - } else { 3594 - DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); 3595 - 3596 - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3597 - 3598 - PFM_SET_WORK_PENDING(task, 1); 3599 - 3600 - set_notify_resume(task); 3601 - 3602 - /* 3603 - * XXX: send reschedule if task runs on another CPU 3604 - */ 3605 - } 3606 - return 0; 3607 - } 3608 - 3609 - static int 3610 - pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3611 - { 3612 - unsigned int m = *(unsigned int *)arg; 3613 - 3614 - pfm_sysctl.debug = m == 0 ? 0 : 1; 3615 - 3616 - printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); 3617 - 3618 - if (m == 0) { 3619 - memset(pfm_stats, 0, sizeof(pfm_stats)); 3620 - for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; 3621 - } 3622 - return 0; 3623 - } 3624 - 3625 - /* 3626 - * arg can be NULL and count can be zero for this function 3627 - */ 3628 - static int 3629 - pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3630 - { 3631 - struct thread_struct *thread = NULL; 3632 - struct task_struct *task; 3633 - pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; 3634 - unsigned long flags; 3635 - dbreg_t dbreg; 3636 - unsigned int rnum; 3637 - int first_time; 3638 - int ret = 0, state; 3639 - int i, can_access_pmu = 0; 3640 - int is_system, is_loaded; 3641 - 3642 - if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; 3643 - 3644 - state = ctx->ctx_state; 3645 - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3646 - is_system = ctx->ctx_fl_system; 3647 - task = ctx->ctx_task; 3648 - 3649 - if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3650 - 3651 - /* 3652 - * on both UP and SMP, we can only write to the PMC when the task is 3653 - * the owner of the local PMU. 3654 - */ 3655 - if (is_loaded) { 3656 - thread = &task->thread; 3657 - /* 3658 - * In system wide and when the context is loaded, access can only happen 3659 - * when the caller is running on the CPU being monitored by the session. 3660 - * It does not have to be the owner (ctx_task) of the context per se. 3661 - */ 3662 - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3663 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3664 - return -EBUSY; 3665 - } 3666 - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3667 - } 3668 - 3669 - /* 3670 - * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w 3671 - * ensuring that no real breakpoint can be installed via this call. 3672 - * 3673 - * IMPORTANT: regs can be NULL in this function 3674 - */ 3675 - 3676 - first_time = ctx->ctx_fl_using_dbreg == 0; 3677 - 3678 - /* 3679 - * don't bother if we are loaded and task is being debugged 3680 - */ 3681 - if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3682 - DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); 3683 - return -EBUSY; 3684 - } 3685 - 3686 - /* 3687 - * check for debug registers in system wide mode 3688 - * 3689 - * If though a check is done in pfm_context_load(), 3690 - * we must repeat it here, in case the registers are 3691 - * written after the context is loaded 3692 - */ 3693 - if (is_loaded) { 3694 - LOCK_PFS(flags); 3695 - 3696 - if (first_time && is_system) { 3697 - if (pfm_sessions.pfs_ptrace_use_dbregs) 3698 - ret = -EBUSY; 3699 - else 3700 - pfm_sessions.pfs_sys_use_dbregs++; 3701 - } 3702 - UNLOCK_PFS(flags); 3703 - } 3704 - 3705 - if (ret != 0) return ret; 3706 - 3707 - /* 3708 - * mark ourself as user of the debug registers for 3709 - * perfmon purposes. 3710 - */ 3711 - ctx->ctx_fl_using_dbreg = 1; 3712 - 3713 - /* 3714 - * clear hardware registers to make sure we don't 3715 - * pick up stale state. 3716 - * 3717 - * for a system wide session, we do not use 3718 - * thread.dbr, thread.ibr because this process 3719 - * never leaves the current CPU and the state 3720 - * is shared by all processes running on it 3721 - */ 3722 - if (first_time && can_access_pmu) { 3723 - DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); 3724 - for (i=0; i < pmu_conf->num_ibrs; i++) { 3725 - ia64_set_ibr(i, 0UL); 3726 - ia64_dv_serialize_instruction(); 3727 - } 3728 - ia64_srlz_i(); 3729 - for (i=0; i < pmu_conf->num_dbrs; i++) { 3730 - ia64_set_dbr(i, 0UL); 3731 - ia64_dv_serialize_data(); 3732 - } 3733 - ia64_srlz_d(); 3734 - } 3735 - 3736 - /* 3737 - * Now install the values into the registers 3738 - */ 3739 - for (i = 0; i < count; i++, req++) { 3740 - 3741 - rnum = req->dbreg_num; 3742 - dbreg.val = req->dbreg_value; 3743 - 3744 - ret = -EINVAL; 3745 - 3746 - if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { 3747 - DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", 3748 - rnum, dbreg.val, mode, i, count)); 3749 - 3750 - goto abort_mission; 3751 - } 3752 - 3753 - /* 3754 - * make sure we do not install enabled breakpoint 3755 - */ 3756 - if (rnum & 0x1) { 3757 - if (mode == PFM_CODE_RR) 3758 - dbreg.ibr.ibr_x = 0; 3759 - else 3760 - dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; 3761 - } 3762 - 3763 - PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); 3764 - 3765 - /* 3766 - * Debug registers, just like PMC, can only be modified 3767 - * by a kernel call. Moreover, perfmon() access to those 3768 - * registers are centralized in this routine. The hardware 3769 - * does not modify the value of these registers, therefore, 3770 - * if we save them as they are written, we can avoid having 3771 - * to save them on context switch out. This is made possible 3772 - * by the fact that when perfmon uses debug registers, ptrace() 3773 - * won't be able to modify them concurrently. 3774 - */ 3775 - if (mode == PFM_CODE_RR) { 3776 - CTX_USED_IBR(ctx, rnum); 3777 - 3778 - if (can_access_pmu) { 3779 - ia64_set_ibr(rnum, dbreg.val); 3780 - ia64_dv_serialize_instruction(); 3781 - } 3782 - 3783 - ctx->ctx_ibrs[rnum] = dbreg.val; 3784 - 3785 - DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", 3786 - rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); 3787 - } else { 3788 - CTX_USED_DBR(ctx, rnum); 3789 - 3790 - if (can_access_pmu) { 3791 - ia64_set_dbr(rnum, dbreg.val); 3792 - ia64_dv_serialize_data(); 3793 - } 3794 - ctx->ctx_dbrs[rnum] = dbreg.val; 3795 - 3796 - DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", 3797 - rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); 3798 - } 3799 - } 3800 - 3801 - return 0; 3802 - 3803 - abort_mission: 3804 - /* 3805 - * in case it was our first attempt, we undo the global modifications 3806 - */ 3807 - if (first_time) { 3808 - LOCK_PFS(flags); 3809 - if (ctx->ctx_fl_system) { 3810 - pfm_sessions.pfs_sys_use_dbregs--; 3811 - } 3812 - UNLOCK_PFS(flags); 3813 - ctx->ctx_fl_using_dbreg = 0; 3814 - } 3815 - /* 3816 - * install error return flag 3817 - */ 3818 - PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); 3819 - 3820 - return ret; 3821 - } 3822 - 3823 - static int 3824 - pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3825 - { 3826 - return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); 3827 - } 3828 - 3829 - static int 3830 - pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3831 - { 3832 - return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); 3833 - } 3834 - 3835 - int 3836 - pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3837 - { 3838 - pfm_context_t *ctx; 3839 - 3840 - if (req == NULL) return -EINVAL; 3841 - 3842 - ctx = GET_PMU_CTX(); 3843 - 3844 - if (ctx == NULL) return -EINVAL; 3845 - 3846 - /* 3847 - * for now limit to current task, which is enough when calling 3848 - * from overflow handler 3849 - */ 3850 - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3851 - 3852 - return pfm_write_ibrs(ctx, req, nreq, regs); 3853 - } 3854 - EXPORT_SYMBOL(pfm_mod_write_ibrs); 3855 - 3856 - int 3857 - pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3858 - { 3859 - pfm_context_t *ctx; 3860 - 3861 - if (req == NULL) return -EINVAL; 3862 - 3863 - ctx = GET_PMU_CTX(); 3864 - 3865 - if (ctx == NULL) return -EINVAL; 3866 - 3867 - /* 3868 - * for now limit to current task, which is enough when calling 3869 - * from overflow handler 3870 - */ 3871 - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3872 - 3873 - return pfm_write_dbrs(ctx, req, nreq, regs); 3874 - } 3875 - EXPORT_SYMBOL(pfm_mod_write_dbrs); 3876 - 3877 - 3878 - static int 3879 - pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3880 - { 3881 - pfarg_features_t *req = (pfarg_features_t *)arg; 3882 - 3883 - req->ft_version = PFM_VERSION; 3884 - return 0; 3885 - } 3886 - 3887 - static int 3888 - pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3889 - { 3890 - struct pt_regs *tregs; 3891 - struct task_struct *task = PFM_CTX_TASK(ctx); 3892 - int state, is_system; 3893 - 3894 - state = ctx->ctx_state; 3895 - is_system = ctx->ctx_fl_system; 3896 - 3897 - /* 3898 - * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) 3899 - */ 3900 - if (state == PFM_CTX_UNLOADED) return -EINVAL; 3901 - 3902 - /* 3903 - * In system wide and when the context is loaded, access can only happen 3904 - * when the caller is running on the CPU being monitored by the session. 3905 - * It does not have to be the owner (ctx_task) of the context per se. 3906 - */ 3907 - if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3908 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3909 - return -EBUSY; 3910 - } 3911 - DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 3912 - task_pid_nr(PFM_CTX_TASK(ctx)), 3913 - state, 3914 - is_system)); 3915 - /* 3916 - * in system mode, we need to update the PMU directly 3917 - * and the user level state of the caller, which may not 3918 - * necessarily be the creator of the context. 3919 - */ 3920 - if (is_system) { 3921 - /* 3922 - * Update local PMU first 3923 - * 3924 - * disable dcr pp 3925 - */ 3926 - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 3927 - ia64_srlz_i(); 3928 - 3929 - /* 3930 - * update local cpuinfo 3931 - */ 3932 - PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 3933 - 3934 - /* 3935 - * stop monitoring, does srlz.i 3936 - */ 3937 - pfm_clear_psr_pp(); 3938 - 3939 - /* 3940 - * stop monitoring in the caller 3941 - */ 3942 - ia64_psr(regs)->pp = 0; 3943 - 3944 - return 0; 3945 - } 3946 - /* 3947 - * per-task mode 3948 - */ 3949 - 3950 - if (task == current) { 3951 - /* stop monitoring at kernel level */ 3952 - pfm_clear_psr_up(); 3953 - 3954 - /* 3955 - * stop monitoring at the user level 3956 - */ 3957 - ia64_psr(regs)->up = 0; 3958 - } else { 3959 - tregs = task_pt_regs(task); 3960 - 3961 - /* 3962 - * stop monitoring at the user level 3963 - */ 3964 - ia64_psr(tregs)->up = 0; 3965 - 3966 - /* 3967 - * monitoring disabled in kernel at next reschedule 3968 - */ 3969 - ctx->ctx_saved_psr_up = 0; 3970 - DPRINT(("task=[%d]\n", task_pid_nr(task))); 3971 - } 3972 - return 0; 3973 - } 3974 - 3975 - 3976 - static int 3977 - pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3978 - { 3979 - struct pt_regs *tregs; 3980 - int state, is_system; 3981 - 3982 - state = ctx->ctx_state; 3983 - is_system = ctx->ctx_fl_system; 3984 - 3985 - if (state != PFM_CTX_LOADED) return -EINVAL; 3986 - 3987 - /* 3988 - * In system wide and when the context is loaded, access can only happen 3989 - * when the caller is running on the CPU being monitored by the session. 3990 - * It does not have to be the owner (ctx_task) of the context per se. 3991 - */ 3992 - if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3993 - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3994 - return -EBUSY; 3995 - } 3996 - 3997 - /* 3998 - * in system mode, we need to update the PMU directly 3999 - * and the user level state of the caller, which may not 4000 - * necessarily be the creator of the context. 4001 - */ 4002 - if (is_system) { 4003 - 4004 - /* 4005 - * set user level psr.pp for the caller 4006 - */ 4007 - ia64_psr(regs)->pp = 1; 4008 - 4009 - /* 4010 - * now update the local PMU and cpuinfo 4011 - */ 4012 - PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); 4013 - 4014 - /* 4015 - * start monitoring at kernel level 4016 - */ 4017 - pfm_set_psr_pp(); 4018 - 4019 - /* enable dcr pp */ 4020 - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 4021 - ia64_srlz_i(); 4022 - 4023 - return 0; 4024 - } 4025 - 4026 - /* 4027 - * per-process mode 4028 - */ 4029 - 4030 - if (ctx->ctx_task == current) { 4031 - 4032 - /* start monitoring at kernel level */ 4033 - pfm_set_psr_up(); 4034 - 4035 - /* 4036 - * activate monitoring at user level 4037 - */ 4038 - ia64_psr(regs)->up = 1; 4039 - 4040 - } else { 4041 - tregs = task_pt_regs(ctx->ctx_task); 4042 - 4043 - /* 4044 - * start monitoring at the kernel level the next 4045 - * time the task is scheduled 4046 - */ 4047 - ctx->ctx_saved_psr_up = IA64_PSR_UP; 4048 - 4049 - /* 4050 - * activate monitoring at user level 4051 - */ 4052 - ia64_psr(tregs)->up = 1; 4053 - } 4054 - return 0; 4055 - } 4056 - 4057 - static int 4058 - pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4059 - { 4060 - pfarg_reg_t *req = (pfarg_reg_t *)arg; 4061 - unsigned int cnum; 4062 - int i; 4063 - int ret = -EINVAL; 4064 - 4065 - for (i = 0; i < count; i++, req++) { 4066 - 4067 - cnum = req->reg_num; 4068 - 4069 - if (!PMC_IS_IMPL(cnum)) goto abort_mission; 4070 - 4071 - req->reg_value = PMC_DFL_VAL(cnum); 4072 - 4073 - PFM_REG_RETFLAG_SET(req->reg_flags, 0); 4074 - 4075 - DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); 4076 - } 4077 - return 0; 4078 - 4079 - abort_mission: 4080 - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 4081 - return ret; 4082 - } 4083 - 4084 - static int 4085 - pfm_check_task_exist(pfm_context_t *ctx) 4086 - { 4087 - struct task_struct *g, *t; 4088 - int ret = -ESRCH; 4089 - 4090 - read_lock(&tasklist_lock); 4091 - 4092 - do_each_thread (g, t) { 4093 - if (t->thread.pfm_context == ctx) { 4094 - ret = 0; 4095 - goto out; 4096 - } 4097 - } while_each_thread (g, t); 4098 - out: 4099 - read_unlock(&tasklist_lock); 4100 - 4101 - DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); 4102 - 4103 - return ret; 4104 - } 4105 - 4106 - static int 4107 - pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4108 - { 4109 - struct task_struct *task; 4110 - struct thread_struct *thread; 4111 - struct pfm_context_t *old; 4112 - unsigned long flags; 4113 - #ifndef CONFIG_SMP 4114 - struct task_struct *owner_task = NULL; 4115 - #endif 4116 - pfarg_load_t *req = (pfarg_load_t *)arg; 4117 - unsigned long *pmcs_source, *pmds_source; 4118 - int the_cpu; 4119 - int ret = 0; 4120 - int state, is_system, set_dbregs = 0; 4121 - 4122 - state = ctx->ctx_state; 4123 - is_system = ctx->ctx_fl_system; 4124 - /* 4125 - * can only load from unloaded or terminated state 4126 - */ 4127 - if (state != PFM_CTX_UNLOADED) { 4128 - DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4129 - req->load_pid, 4130 - ctx->ctx_state)); 4131 - return -EBUSY; 4132 - } 4133 - 4134 - DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4135 - 4136 - if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { 4137 - DPRINT(("cannot use blocking mode on self\n")); 4138 - return -EINVAL; 4139 - } 4140 - 4141 - ret = pfm_get_task(ctx, req->load_pid, &task); 4142 - if (ret) { 4143 - DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); 4144 - return ret; 4145 - } 4146 - 4147 - ret = -EINVAL; 4148 - 4149 - /* 4150 - * system wide is self monitoring only 4151 - */ 4152 - if (is_system && task != current) { 4153 - DPRINT(("system wide is self monitoring only load_pid=%d\n", 4154 - req->load_pid)); 4155 - goto error; 4156 - } 4157 - 4158 - thread = &task->thread; 4159 - 4160 - ret = 0; 4161 - /* 4162 - * cannot load a context which is using range restrictions, 4163 - * into a task that is being debugged. 4164 - */ 4165 - if (ctx->ctx_fl_using_dbreg) { 4166 - if (thread->flags & IA64_THREAD_DBG_VALID) { 4167 - ret = -EBUSY; 4168 - DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); 4169 - goto error; 4170 - } 4171 - LOCK_PFS(flags); 4172 - 4173 - if (is_system) { 4174 - if (pfm_sessions.pfs_ptrace_use_dbregs) { 4175 - DPRINT(("cannot load [%d] dbregs in use\n", 4176 - task_pid_nr(task))); 4177 - ret = -EBUSY; 4178 - } else { 4179 - pfm_sessions.pfs_sys_use_dbregs++; 4180 - DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); 4181 - set_dbregs = 1; 4182 - } 4183 - } 4184 - 4185 - UNLOCK_PFS(flags); 4186 - 4187 - if (ret) goto error; 4188 - } 4189 - 4190 - /* 4191 - * SMP system-wide monitoring implies self-monitoring. 4192 - * 4193 - * The programming model expects the task to 4194 - * be pinned on a CPU throughout the session. 4195 - * Here we take note of the current CPU at the 4196 - * time the context is loaded. No call from 4197 - * another CPU will be allowed. 4198 - * 4199 - * The pinning via shed_setaffinity() 4200 - * must be done by the calling task prior 4201 - * to this call. 4202 - * 4203 - * systemwide: keep track of CPU this session is supposed to run on 4204 - */ 4205 - the_cpu = ctx->ctx_cpu = smp_processor_id(); 4206 - 4207 - ret = -EBUSY; 4208 - /* 4209 - * now reserve the session 4210 - */ 4211 - ret = pfm_reserve_session(current, is_system, the_cpu); 4212 - if (ret) goto error; 4213 - 4214 - /* 4215 - * task is necessarily stopped at this point. 4216 - * 4217 - * If the previous context was zombie, then it got removed in 4218 - * pfm_save_regs(). Therefore we should not see it here. 4219 - * If we see a context, then this is an active context 4220 - * 4221 - * XXX: needs to be atomic 4222 - */ 4223 - DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", 4224 - thread->pfm_context, ctx)); 4225 - 4226 - ret = -EBUSY; 4227 - old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); 4228 - if (old != NULL) { 4229 - DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); 4230 - goto error_unres; 4231 - } 4232 - 4233 - pfm_reset_msgq(ctx); 4234 - 4235 - ctx->ctx_state = PFM_CTX_LOADED; 4236 - 4237 - /* 4238 - * link context to task 4239 - */ 4240 - ctx->ctx_task = task; 4241 - 4242 - if (is_system) { 4243 - /* 4244 - * we load as stopped 4245 - */ 4246 - PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); 4247 - PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4248 - 4249 - if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); 4250 - } else { 4251 - thread->flags |= IA64_THREAD_PM_VALID; 4252 - } 4253 - 4254 - /* 4255 - * propagate into thread-state 4256 - */ 4257 - pfm_copy_pmds(task, ctx); 4258 - pfm_copy_pmcs(task, ctx); 4259 - 4260 - pmcs_source = ctx->th_pmcs; 4261 - pmds_source = ctx->th_pmds; 4262 - 4263 - /* 4264 - * always the case for system-wide 4265 - */ 4266 - if (task == current) { 4267 - 4268 - if (is_system == 0) { 4269 - 4270 - /* allow user level control */ 4271 - ia64_psr(regs)->sp = 0; 4272 - DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); 4273 - 4274 - SET_LAST_CPU(ctx, smp_processor_id()); 4275 - INC_ACTIVATION(); 4276 - SET_ACTIVATION(ctx); 4277 - #ifndef CONFIG_SMP 4278 - /* 4279 - * push the other task out, if any 4280 - */ 4281 - owner_task = GET_PMU_OWNER(); 4282 - if (owner_task) pfm_lazy_save_regs(owner_task); 4283 - #endif 4284 - } 4285 - /* 4286 - * load all PMD from ctx to PMU (as opposed to thread state) 4287 - * restore all PMC from ctx to PMU 4288 - */ 4289 - pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); 4290 - pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); 4291 - 4292 - ctx->ctx_reload_pmcs[0] = 0UL; 4293 - ctx->ctx_reload_pmds[0] = 0UL; 4294 - 4295 - /* 4296 - * guaranteed safe by earlier check against DBG_VALID 4297 - */ 4298 - if (ctx->ctx_fl_using_dbreg) { 4299 - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 4300 - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 4301 - } 4302 - /* 4303 - * set new ownership 4304 - */ 4305 - SET_PMU_OWNER(task, ctx); 4306 - 4307 - DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); 4308 - } else { 4309 - /* 4310 - * when not current, task MUST be stopped, so this is safe 4311 - */ 4312 - regs = task_pt_regs(task); 4313 - 4314 - /* force a full reload */ 4315 - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4316 - SET_LAST_CPU(ctx, -1); 4317 - 4318 - /* initial saved psr (stopped) */ 4319 - ctx->ctx_saved_psr_up = 0UL; 4320 - ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; 4321 - } 4322 - 4323 - ret = 0; 4324 - 4325 - error_unres: 4326 - if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); 4327 - error: 4328 - /* 4329 - * we must undo the dbregs setting (for system-wide) 4330 - */ 4331 - if (ret && set_dbregs) { 4332 - LOCK_PFS(flags); 4333 - pfm_sessions.pfs_sys_use_dbregs--; 4334 - UNLOCK_PFS(flags); 4335 - } 4336 - /* 4337 - * release task, there is now a link with the context 4338 - */ 4339 - if (is_system == 0 && task != current) { 4340 - pfm_put_task(task); 4341 - 4342 - if (ret == 0) { 4343 - ret = pfm_check_task_exist(ctx); 4344 - if (ret) { 4345 - ctx->ctx_state = PFM_CTX_UNLOADED; 4346 - ctx->ctx_task = NULL; 4347 - } 4348 - } 4349 - } 4350 - return ret; 4351 - } 4352 - 4353 - /* 4354 - * in this function, we do not need to increase the use count 4355 - * for the task via get_task_struct(), because we hold the 4356 - * context lock. If the task were to disappear while having 4357 - * a context attached, it would go through pfm_exit_thread() 4358 - * which also grabs the context lock and would therefore be blocked 4359 - * until we are here. 4360 - */ 4361 - static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); 4362 - 4363 - static int 4364 - pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4365 - { 4366 - struct task_struct *task = PFM_CTX_TASK(ctx); 4367 - struct pt_regs *tregs; 4368 - int prev_state, is_system; 4369 - int ret; 4370 - 4371 - DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); 4372 - 4373 - prev_state = ctx->ctx_state; 4374 - is_system = ctx->ctx_fl_system; 4375 - 4376 - /* 4377 - * unload only when necessary 4378 - */ 4379 - if (prev_state == PFM_CTX_UNLOADED) { 4380 - DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); 4381 - return 0; 4382 - } 4383 - 4384 - /* 4385 - * clear psr and dcr bits 4386 - */ 4387 - ret = pfm_stop(ctx, NULL, 0, regs); 4388 - if (ret) return ret; 4389 - 4390 - ctx->ctx_state = PFM_CTX_UNLOADED; 4391 - 4392 - /* 4393 - * in system mode, we need to update the PMU directly 4394 - * and the user level state of the caller, which may not 4395 - * necessarily be the creator of the context. 4396 - */ 4397 - if (is_system) { 4398 - 4399 - /* 4400 - * Update cpuinfo 4401 - * 4402 - * local PMU is taken care of in pfm_stop() 4403 - */ 4404 - PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); 4405 - PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); 4406 - 4407 - /* 4408 - * save PMDs in context 4409 - * release ownership 4410 - */ 4411 - pfm_flush_pmds(current, ctx); 4412 - 4413 - /* 4414 - * at this point we are done with the PMU 4415 - * so we can unreserve the resource. 4416 - */ 4417 - if (prev_state != PFM_CTX_ZOMBIE) 4418 - pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); 4419 - 4420 - /* 4421 - * disconnect context from task 4422 - */ 4423 - task->thread.pfm_context = NULL; 4424 - /* 4425 - * disconnect task from context 4426 - */ 4427 - ctx->ctx_task = NULL; 4428 - 4429 - /* 4430 - * There is nothing more to cleanup here. 4431 - */ 4432 - return 0; 4433 - } 4434 - 4435 - /* 4436 - * per-task mode 4437 - */ 4438 - tregs = task == current ? regs : task_pt_regs(task); 4439 - 4440 - if (task == current) { 4441 - /* 4442 - * cancel user level control 4443 - */ 4444 - ia64_psr(regs)->sp = 1; 4445 - 4446 - DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); 4447 - } 4448 - /* 4449 - * save PMDs to context 4450 - * release ownership 4451 - */ 4452 - pfm_flush_pmds(task, ctx); 4453 - 4454 - /* 4455 - * at this point we are done with the PMU 4456 - * so we can unreserve the resource. 4457 - * 4458 - * when state was ZOMBIE, we have already unreserved. 4459 - */ 4460 - if (prev_state != PFM_CTX_ZOMBIE) 4461 - pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); 4462 - 4463 - /* 4464 - * reset activation counter and psr 4465 - */ 4466 - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4467 - SET_LAST_CPU(ctx, -1); 4468 - 4469 - /* 4470 - * PMU state will not be restored 4471 - */ 4472 - task->thread.flags &= ~IA64_THREAD_PM_VALID; 4473 - 4474 - /* 4475 - * break links between context and task 4476 - */ 4477 - task->thread.pfm_context = NULL; 4478 - ctx->ctx_task = NULL; 4479 - 4480 - PFM_SET_WORK_PENDING(task, 0); 4481 - 4482 - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4483 - ctx->ctx_fl_can_restart = 0; 4484 - ctx->ctx_fl_going_zombie = 0; 4485 - 4486 - DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); 4487 - 4488 - return 0; 4489 - } 4490 - 4491 - 4492 - /* 4493 - * called only from exit_thread() 4494 - * we come here only if the task has a context attached (loaded or masked) 4495 - */ 4496 - void 4497 - pfm_exit_thread(struct task_struct *task) 4498 - { 4499 - pfm_context_t *ctx; 4500 - unsigned long flags; 4501 - struct pt_regs *regs = task_pt_regs(task); 4502 - int ret, state; 4503 - int free_ok = 0; 4504 - 4505 - ctx = PFM_GET_CTX(task); 4506 - 4507 - PROTECT_CTX(ctx, flags); 4508 - 4509 - DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); 4510 - 4511 - state = ctx->ctx_state; 4512 - switch(state) { 4513 - case PFM_CTX_UNLOADED: 4514 - /* 4515 - * only comes to this function if pfm_context is not NULL, i.e., cannot 4516 - * be in unloaded state 4517 - */ 4518 - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); 4519 - break; 4520 - case PFM_CTX_LOADED: 4521 - case PFM_CTX_MASKED: 4522 - ret = pfm_context_unload(ctx, NULL, 0, regs); 4523 - if (ret) { 4524 - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4525 - } 4526 - DPRINT(("ctx unloaded for current state was %d\n", state)); 4527 - 4528 - pfm_end_notify_user(ctx); 4529 - break; 4530 - case PFM_CTX_ZOMBIE: 4531 - ret = pfm_context_unload(ctx, NULL, 0, regs); 4532 - if (ret) { 4533 - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4534 - } 4535 - free_ok = 1; 4536 - break; 4537 - default: 4538 - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); 4539 - break; 4540 - } 4541 - UNPROTECT_CTX(ctx, flags); 4542 - 4543 - { u64 psr = pfm_get_psr(); 4544 - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 4545 - BUG_ON(GET_PMU_OWNER()); 4546 - BUG_ON(ia64_psr(regs)->up); 4547 - BUG_ON(ia64_psr(regs)->pp); 4548 - } 4549 - 4550 - /* 4551 - * All memory free operations (especially for vmalloc'ed memory) 4552 - * MUST be done with interrupts ENABLED. 4553 - */ 4554 - if (free_ok) pfm_context_free(ctx); 4555 - } 4556 - 4557 - /* 4558 - * functions MUST be listed in the increasing order of their index (see permfon.h) 4559 - */ 4560 - #define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } 4561 - #define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } 4562 - #define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) 4563 - #define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) 4564 - #define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} 4565 - 4566 - static pfm_cmd_desc_t pfm_cmd_tab[]={ 4567 - /* 0 */PFM_CMD_NONE, 4568 - /* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4569 - /* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4570 - /* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4571 - /* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), 4572 - /* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), 4573 - /* 6 */PFM_CMD_NONE, 4574 - /* 7 */PFM_CMD_NONE, 4575 - /* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), 4576 - /* 9 */PFM_CMD_NONE, 4577 - /* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), 4578 - /* 11 */PFM_CMD_NONE, 4579 - /* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), 4580 - /* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), 4581 - /* 14 */PFM_CMD_NONE, 4582 - /* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4583 - /* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), 4584 - /* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), 4585 - /* 18 */PFM_CMD_NONE, 4586 - /* 19 */PFM_CMD_NONE, 4587 - /* 20 */PFM_CMD_NONE, 4588 - /* 21 */PFM_CMD_NONE, 4589 - /* 22 */PFM_CMD_NONE, 4590 - /* 23 */PFM_CMD_NONE, 4591 - /* 24 */PFM_CMD_NONE, 4592 - /* 25 */PFM_CMD_NONE, 4593 - /* 26 */PFM_CMD_NONE, 4594 - /* 27 */PFM_CMD_NONE, 4595 - /* 28 */PFM_CMD_NONE, 4596 - /* 29 */PFM_CMD_NONE, 4597 - /* 30 */PFM_CMD_NONE, 4598 - /* 31 */PFM_CMD_NONE, 4599 - /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), 4600 - /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) 4601 - }; 4602 - #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) 4603 - 4604 - static int 4605 - pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) 4606 - { 4607 - struct task_struct *task; 4608 - int state, old_state; 4609 - 4610 - recheck: 4611 - state = ctx->ctx_state; 4612 - task = ctx->ctx_task; 4613 - 4614 - if (task == NULL) { 4615 - DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); 4616 - return 0; 4617 - } 4618 - 4619 - DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4620 - ctx->ctx_fd, 4621 - state, 4622 - task_pid_nr(task), 4623 - task->state, PFM_CMD_STOPPED(cmd))); 4624 - 4625 - /* 4626 - * self-monitoring always ok. 4627 - * 4628 - * for system-wide the caller can either be the creator of the 4629 - * context (to one to which the context is attached to) OR 4630 - * a task running on the same CPU as the session. 4631 - */ 4632 - if (task == current || ctx->ctx_fl_system) return 0; 4633 - 4634 - /* 4635 - * we are monitoring another thread 4636 - */ 4637 - switch(state) { 4638 - case PFM_CTX_UNLOADED: 4639 - /* 4640 - * if context is UNLOADED we are safe to go 4641 - */ 4642 - return 0; 4643 - case PFM_CTX_ZOMBIE: 4644 - /* 4645 - * no command can operate on a zombie context 4646 - */ 4647 - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4648 - return -EINVAL; 4649 - case PFM_CTX_MASKED: 4650 - /* 4651 - * PMU state has been saved to software even though 4652 - * the thread may still be running. 4653 - */ 4654 - if (cmd != PFM_UNLOAD_CONTEXT) return 0; 4655 - } 4656 - 4657 - /* 4658 - * context is LOADED or MASKED. Some commands may need to have 4659 - * the task stopped. 4660 - * 4661 - * We could lift this restriction for UP but it would mean that 4662 - * the user has no guarantee the task would not run between 4663 - * two successive calls to perfmonctl(). That's probably OK. 4664 - * If this user wants to ensure the task does not run, then 4665 - * the task must be stopped. 4666 - */ 4667 - if (PFM_CMD_STOPPED(cmd)) { 4668 - if (!task_is_stopped_or_traced(task)) { 4669 - DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); 4670 - return -EBUSY; 4671 - } 4672 - /* 4673 - * task is now stopped, wait for ctxsw out 4674 - * 4675 - * This is an interesting point in the code. 4676 - * We need to unprotect the context because 4677 - * the pfm_save_regs() routines needs to grab 4678 - * the same lock. There are danger in doing 4679 - * this because it leaves a window open for 4680 - * another task to get access to the context 4681 - * and possibly change its state. The one thing 4682 - * that is not possible is for the context to disappear 4683 - * because we are protected by the VFS layer, i.e., 4684 - * get_fd()/put_fd(). 4685 - */ 4686 - old_state = state; 4687 - 4688 - UNPROTECT_CTX(ctx, flags); 4689 - 4690 - wait_task_inactive(task, 0); 4691 - 4692 - PROTECT_CTX(ctx, flags); 4693 - 4694 - /* 4695 - * we must recheck to verify if state has changed 4696 - */ 4697 - if (ctx->ctx_state != old_state) { 4698 - DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); 4699 - goto recheck; 4700 - } 4701 - } 4702 - return 0; 4703 - } 4704 - 4705 - /* 4706 - * system-call entry point (must return long) 4707 - */ 4708 - asmlinkage long 4709 - sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4710 - { 4711 - struct fd f = {NULL, 0}; 4712 - pfm_context_t *ctx = NULL; 4713 - unsigned long flags = 0UL; 4714 - void *args_k = NULL; 4715 - long ret; /* will expand int return types */ 4716 - size_t base_sz, sz, xtra_sz = 0; 4717 - int narg, completed_args = 0, call_made = 0, cmd_flags; 4718 - int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 4719 - int (*getsize)(void *arg, size_t *sz); 4720 - #define PFM_MAX_ARGSIZE 4096 4721 - 4722 - /* 4723 - * reject any call if perfmon was disabled at initialization 4724 - */ 4725 - if (unlikely(pmu_conf == NULL)) return -ENOSYS; 4726 - 4727 - if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { 4728 - DPRINT(("invalid cmd=%d\n", cmd)); 4729 - return -EINVAL; 4730 - } 4731 - 4732 - func = pfm_cmd_tab[cmd].cmd_func; 4733 - narg = pfm_cmd_tab[cmd].cmd_narg; 4734 - base_sz = pfm_cmd_tab[cmd].cmd_argsize; 4735 - getsize = pfm_cmd_tab[cmd].cmd_getsize; 4736 - cmd_flags = pfm_cmd_tab[cmd].cmd_flags; 4737 - 4738 - if (unlikely(func == NULL)) { 4739 - DPRINT(("invalid cmd=%d\n", cmd)); 4740 - return -EINVAL; 4741 - } 4742 - 4743 - DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", 4744 - PFM_CMD_NAME(cmd), 4745 - cmd, 4746 - narg, 4747 - base_sz, 4748 - count)); 4749 - 4750 - /* 4751 - * check if number of arguments matches what the command expects 4752 - */ 4753 - if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) 4754 - return -EINVAL; 4755 - 4756 - restart_args: 4757 - sz = xtra_sz + base_sz*count; 4758 - /* 4759 - * limit abuse to min page size 4760 - */ 4761 - if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4762 - printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); 4763 - return -E2BIG; 4764 - } 4765 - 4766 - /* 4767 - * allocate default-sized argument buffer 4768 - */ 4769 - if (likely(count && args_k == NULL)) { 4770 - args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); 4771 - if (args_k == NULL) return -ENOMEM; 4772 - } 4773 - 4774 - ret = -EFAULT; 4775 - 4776 - /* 4777 - * copy arguments 4778 - * 4779 - * assume sz = 0 for command without parameters 4780 - */ 4781 - if (sz && copy_from_user(args_k, arg, sz)) { 4782 - DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); 4783 - goto error_args; 4784 - } 4785 - 4786 - /* 4787 - * check if command supports extra parameters 4788 - */ 4789 - if (completed_args == 0 && getsize) { 4790 - /* 4791 - * get extra parameters size (based on main argument) 4792 - */ 4793 - ret = (*getsize)(args_k, &xtra_sz); 4794 - if (ret) goto error_args; 4795 - 4796 - completed_args = 1; 4797 - 4798 - DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); 4799 - 4800 - /* retry if necessary */ 4801 - if (likely(xtra_sz)) goto restart_args; 4802 - } 4803 - 4804 - if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; 4805 - 4806 - ret = -EBADF; 4807 - 4808 - f = fdget(fd); 4809 - if (unlikely(f.file == NULL)) { 4810 - DPRINT(("invalid fd %d\n", fd)); 4811 - goto error_args; 4812 - } 4813 - if (unlikely(PFM_IS_FILE(f.file) == 0)) { 4814 - DPRINT(("fd %d not related to perfmon\n", fd)); 4815 - goto error_args; 4816 - } 4817 - 4818 - ctx = f.file->private_data; 4819 - if (unlikely(ctx == NULL)) { 4820 - DPRINT(("no context for fd %d\n", fd)); 4821 - goto error_args; 4822 - } 4823 - prefetch(&ctx->ctx_state); 4824 - 4825 - PROTECT_CTX(ctx, flags); 4826 - 4827 - /* 4828 - * check task is stopped 4829 - */ 4830 - ret = pfm_check_task_state(ctx, cmd, flags); 4831 - if (unlikely(ret)) goto abort_locked; 4832 - 4833 - skip_fd: 4834 - ret = (*func)(ctx, args_k, count, task_pt_regs(current)); 4835 - 4836 - call_made = 1; 4837 - 4838 - abort_locked: 4839 - if (likely(ctx)) { 4840 - DPRINT(("context unlocked\n")); 4841 - UNPROTECT_CTX(ctx, flags); 4842 - } 4843 - 4844 - /* copy argument back to user, if needed */ 4845 - if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4846 - 4847 - error_args: 4848 - if (f.file) 4849 - fdput(f); 4850 - 4851 - kfree(args_k); 4852 - 4853 - DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); 4854 - 4855 - return ret; 4856 - } 4857 - 4858 - static void 4859 - pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) 4860 - { 4861 - pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; 4862 - pfm_ovfl_ctrl_t rst_ctrl; 4863 - int state; 4864 - int ret = 0; 4865 - 4866 - state = ctx->ctx_state; 4867 - /* 4868 - * Unlock sampling buffer and reset index atomically 4869 - * XXX: not really needed when blocking 4870 - */ 4871 - if (CTX_HAS_SMPL(ctx)) { 4872 - 4873 - rst_ctrl.bits.mask_monitoring = 0; 4874 - rst_ctrl.bits.reset_ovfl_pmds = 0; 4875 - 4876 - if (state == PFM_CTX_LOADED) 4877 - ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4878 - else 4879 - ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4880 - } else { 4881 - rst_ctrl.bits.mask_monitoring = 0; 4882 - rst_ctrl.bits.reset_ovfl_pmds = 1; 4883 - } 4884 - 4885 - if (ret == 0) { 4886 - if (rst_ctrl.bits.reset_ovfl_pmds) { 4887 - pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); 4888 - } 4889 - if (rst_ctrl.bits.mask_monitoring == 0) { 4890 - DPRINT(("resuming monitoring\n")); 4891 - if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); 4892 - } else { 4893 - DPRINT(("stopping monitoring\n")); 4894 - //pfm_stop_monitoring(current, regs); 4895 - } 4896 - ctx->ctx_state = PFM_CTX_LOADED; 4897 - } 4898 - } 4899 - 4900 - /* 4901 - * context MUST BE LOCKED when calling 4902 - * can only be called for current 4903 - */ 4904 - static void 4905 - pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) 4906 - { 4907 - int ret; 4908 - 4909 - DPRINT(("entering for [%d]\n", task_pid_nr(current))); 4910 - 4911 - ret = pfm_context_unload(ctx, NULL, 0, regs); 4912 - if (ret) { 4913 - printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); 4914 - } 4915 - 4916 - /* 4917 - * and wakeup controlling task, indicating we are now disconnected 4918 - */ 4919 - wake_up_interruptible(&ctx->ctx_zombieq); 4920 - 4921 - /* 4922 - * given that context is still locked, the controlling 4923 - * task will only get access when we return from 4924 - * pfm_handle_work(). 4925 - */ 4926 - } 4927 - 4928 - static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); 4929 - 4930 - /* 4931 - * pfm_handle_work() can be called with interrupts enabled 4932 - * (TIF_NEED_RESCHED) or disabled. The down_interruptible 4933 - * call may sleep, therefore we must re-enable interrupts 4934 - * to avoid deadlocks. It is safe to do so because this function 4935 - * is called ONLY when returning to user level (pUStk=1), in which case 4936 - * there is no risk of kernel stack overflow due to deep 4937 - * interrupt nesting. 4938 - */ 4939 - void 4940 - pfm_handle_work(void) 4941 - { 4942 - pfm_context_t *ctx; 4943 - struct pt_regs *regs; 4944 - unsigned long flags, dummy_flags; 4945 - unsigned long ovfl_regs; 4946 - unsigned int reason; 4947 - int ret; 4948 - 4949 - ctx = PFM_GET_CTX(current); 4950 - if (ctx == NULL) { 4951 - printk(KERN_ERR "perfmon: [%d] has no PFM context\n", 4952 - task_pid_nr(current)); 4953 - return; 4954 - } 4955 - 4956 - PROTECT_CTX(ctx, flags); 4957 - 4958 - PFM_SET_WORK_PENDING(current, 0); 4959 - 4960 - regs = task_pt_regs(current); 4961 - 4962 - /* 4963 - * extract reason for being here and clear 4964 - */ 4965 - reason = ctx->ctx_fl_trap_reason; 4966 - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4967 - ovfl_regs = ctx->ctx_ovfl_regs[0]; 4968 - 4969 - DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); 4970 - 4971 - /* 4972 - * must be done before we check for simple-reset mode 4973 - */ 4974 - if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) 4975 - goto do_zombie; 4976 - 4977 - //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; 4978 - if (reason == PFM_TRAP_REASON_RESET) 4979 - goto skip_blocking; 4980 - 4981 - /* 4982 - * restore interrupt mask to what it was on entry. 4983 - * Could be enabled/diasbled. 4984 - */ 4985 - UNPROTECT_CTX(ctx, flags); 4986 - 4987 - /* 4988 - * force interrupt enable because of down_interruptible() 4989 - */ 4990 - local_irq_enable(); 4991 - 4992 - DPRINT(("before block sleeping\n")); 4993 - 4994 - /* 4995 - * may go through without blocking on SMP systems 4996 - * if restart has been received already by the time we call down() 4997 - */ 4998 - ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); 4999 - 5000 - DPRINT(("after block sleeping ret=%d\n", ret)); 5001 - 5002 - /* 5003 - * lock context and mask interrupts again 5004 - * We save flags into a dummy because we may have 5005 - * altered interrupts mask compared to entry in this 5006 - * function. 5007 - */ 5008 - PROTECT_CTX(ctx, dummy_flags); 5009 - 5010 - /* 5011 - * we need to read the ovfl_regs only after wake-up 5012 - * because we may have had pfm_write_pmds() in between 5013 - * and that can changed PMD values and therefore 5014 - * ovfl_regs is reset for these new PMD values. 5015 - */ 5016 - ovfl_regs = ctx->ctx_ovfl_regs[0]; 5017 - 5018 - if (ctx->ctx_fl_going_zombie) { 5019 - do_zombie: 5020 - DPRINT(("context is zombie, bailing out\n")); 5021 - pfm_context_force_terminate(ctx, regs); 5022 - goto nothing_to_do; 5023 - } 5024 - /* 5025 - * in case of interruption of down() we don't restart anything 5026 - */ 5027 - if (ret < 0) 5028 - goto nothing_to_do; 5029 - 5030 - skip_blocking: 5031 - pfm_resume_after_ovfl(ctx, ovfl_regs, regs); 5032 - ctx->ctx_ovfl_regs[0] = 0UL; 5033 - 5034 - nothing_to_do: 5035 - /* 5036 - * restore flags as they were upon entry 5037 - */ 5038 - UNPROTECT_CTX(ctx, flags); 5039 - } 5040 - 5041 - static int 5042 - pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) 5043 - { 5044 - if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5045 - DPRINT(("ignoring overflow notification, owner is zombie\n")); 5046 - return 0; 5047 - } 5048 - 5049 - DPRINT(("waking up somebody\n")); 5050 - 5051 - if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); 5052 - 5053 - /* 5054 - * safe, we are not in intr handler, nor in ctxsw when 5055 - * we come here 5056 - */ 5057 - kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); 5058 - 5059 - return 0; 5060 - } 5061 - 5062 - static int 5063 - pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) 5064 - { 5065 - pfm_msg_t *msg = NULL; 5066 - 5067 - if (ctx->ctx_fl_no_msg == 0) { 5068 - msg = pfm_get_new_msg(ctx); 5069 - if (msg == NULL) { 5070 - printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); 5071 - return -1; 5072 - } 5073 - 5074 - msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; 5075 - msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; 5076 - msg->pfm_ovfl_msg.msg_active_set = 0; 5077 - msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; 5078 - msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; 5079 - msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; 5080 - msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; 5081 - msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5082 - } 5083 - 5084 - DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", 5085 - msg, 5086 - ctx->ctx_fl_no_msg, 5087 - ctx->ctx_fd, 5088 - ovfl_pmds)); 5089 - 5090 - return pfm_notify_user(ctx, msg); 5091 - } 5092 - 5093 - static int 5094 - pfm_end_notify_user(pfm_context_t *ctx) 5095 - { 5096 - pfm_msg_t *msg; 5097 - 5098 - msg = pfm_get_new_msg(ctx); 5099 - if (msg == NULL) { 5100 - printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); 5101 - return -1; 5102 - } 5103 - /* no leak */ 5104 - memset(msg, 0, sizeof(*msg)); 5105 - 5106 - msg->pfm_end_msg.msg_type = PFM_MSG_END; 5107 - msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; 5108 - msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5109 - 5110 - DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", 5111 - msg, 5112 - ctx->ctx_fl_no_msg, 5113 - ctx->ctx_fd)); 5114 - 5115 - return pfm_notify_user(ctx, msg); 5116 - } 5117 - 5118 - /* 5119 - * main overflow processing routine. 5120 - * it can be called from the interrupt path or explicitly during the context switch code 5121 - */ 5122 - static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, 5123 - unsigned long pmc0, struct pt_regs *regs) 5124 - { 5125 - pfm_ovfl_arg_t *ovfl_arg; 5126 - unsigned long mask; 5127 - unsigned long old_val, ovfl_val, new_val; 5128 - unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; 5129 - unsigned long tstamp; 5130 - pfm_ovfl_ctrl_t ovfl_ctrl; 5131 - unsigned int i, has_smpl; 5132 - int must_notify = 0; 5133 - 5134 - if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; 5135 - 5136 - /* 5137 - * sanity test. Should never happen 5138 - */ 5139 - if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; 5140 - 5141 - tstamp = ia64_get_itc(); 5142 - mask = pmc0 >> PMU_FIRST_COUNTER; 5143 - ovfl_val = pmu_conf->ovfl_val; 5144 - has_smpl = CTX_HAS_SMPL(ctx); 5145 - 5146 - DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5147 - "used_pmds=0x%lx\n", 5148 - pmc0, 5149 - task ? task_pid_nr(task): -1, 5150 - (regs ? regs->cr_iip : 0), 5151 - CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5152 - ctx->ctx_used_pmds[0])); 5153 - 5154 - 5155 - /* 5156 - * first we update the virtual counters 5157 - * assume there was a prior ia64_srlz_d() issued 5158 - */ 5159 - for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { 5160 - 5161 - /* skip pmd which did not overflow */ 5162 - if ((mask & 0x1) == 0) continue; 5163 - 5164 - /* 5165 - * Note that the pmd is not necessarily 0 at this point as qualified events 5166 - * may have happened before the PMU was frozen. The residual count is not 5167 - * taken into consideration here but will be with any read of the pmd via 5168 - * pfm_read_pmds(). 5169 - */ 5170 - old_val = new_val = ctx->ctx_pmds[i].val; 5171 - new_val += 1 + ovfl_val; 5172 - ctx->ctx_pmds[i].val = new_val; 5173 - 5174 - /* 5175 - * check for overflow condition 5176 - */ 5177 - if (likely(old_val > new_val)) { 5178 - ovfl_pmds |= 1UL << i; 5179 - if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; 5180 - } 5181 - 5182 - DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 5183 - i, 5184 - new_val, 5185 - old_val, 5186 - ia64_get_pmd(i) & ovfl_val, 5187 - ovfl_pmds, 5188 - ovfl_notify)); 5189 - } 5190 - 5191 - /* 5192 - * there was no 64-bit overflow, nothing else to do 5193 - */ 5194 - if (ovfl_pmds == 0UL) return; 5195 - 5196 - /* 5197 - * reset all control bits 5198 - */ 5199 - ovfl_ctrl.val = 0; 5200 - reset_pmds = 0UL; 5201 - 5202 - /* 5203 - * if a sampling format module exists, then we "cache" the overflow by 5204 - * calling the module's handler() routine. 5205 - */ 5206 - if (has_smpl) { 5207 - unsigned long start_cycles, end_cycles; 5208 - unsigned long pmd_mask; 5209 - int j, k, ret = 0; 5210 - int this_cpu = smp_processor_id(); 5211 - 5212 - pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; 5213 - ovfl_arg = &ctx->ctx_ovfl_arg; 5214 - 5215 - prefetch(ctx->ctx_smpl_hdr); 5216 - 5217 - for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { 5218 - 5219 - mask = 1UL << i; 5220 - 5221 - if ((pmd_mask & 0x1) == 0) continue; 5222 - 5223 - ovfl_arg->ovfl_pmd = (unsigned char )i; 5224 - ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; 5225 - ovfl_arg->active_set = 0; 5226 - ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ 5227 - ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; 5228 - 5229 - ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; 5230 - ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; 5231 - ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; 5232 - 5233 - /* 5234 - * copy values of pmds of interest. Sampling format may copy them 5235 - * into sampling buffer. 5236 - */ 5237 - if (smpl_pmds) { 5238 - for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { 5239 - if ((smpl_pmds & 0x1) == 0) continue; 5240 - ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); 5241 - DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); 5242 - } 5243 - } 5244 - 5245 - pfm_stats[this_cpu].pfm_smpl_handler_calls++; 5246 - 5247 - start_cycles = ia64_get_itc(); 5248 - 5249 - /* 5250 - * call custom buffer format record (handler) routine 5251 - */ 5252 - ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); 5253 - 5254 - end_cycles = ia64_get_itc(); 5255 - 5256 - /* 5257 - * For those controls, we take the union because they have 5258 - * an all or nothing behavior. 5259 - */ 5260 - ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; 5261 - ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; 5262 - ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; 5263 - /* 5264 - * build the bitmask of pmds to reset now 5265 - */ 5266 - if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; 5267 - 5268 - pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; 5269 - } 5270 - /* 5271 - * when the module cannot handle the rest of the overflows, we abort right here 5272 - */ 5273 - if (ret && pmd_mask) { 5274 - DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", 5275 - pmd_mask<<PMU_FIRST_COUNTER)); 5276 - } 5277 - /* 5278 - * remove the pmds we reset now from the set of pmds to reset in pfm_restart() 5279 - */ 5280 - ovfl_pmds &= ~reset_pmds; 5281 - } else { 5282 - /* 5283 - * when no sampling module is used, then the default 5284 - * is to notify on overflow if requested by user 5285 - */ 5286 - ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; 5287 - ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; 5288 - ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ 5289 - ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; 5290 - /* 5291 - * if needed, we reset all overflowed pmds 5292 - */ 5293 - if (ovfl_notify == 0) reset_pmds = ovfl_pmds; 5294 - } 5295 - 5296 - DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); 5297 - 5298 - /* 5299 - * reset the requested PMD registers using the short reset values 5300 - */ 5301 - if (reset_pmds) { 5302 - unsigned long bm = reset_pmds; 5303 - pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); 5304 - } 5305 - 5306 - if (ovfl_notify && ovfl_ctrl.bits.notify_user) { 5307 - /* 5308 - * keep track of what to reset when unblocking 5309 - */ 5310 - ctx->ctx_ovfl_regs[0] = ovfl_pmds; 5311 - 5312 - /* 5313 - * check for blocking context 5314 - */ 5315 - if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { 5316 - 5317 - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; 5318 - 5319 - /* 5320 - * set the perfmon specific checking pending work for the task 5321 - */ 5322 - PFM_SET_WORK_PENDING(task, 1); 5323 - 5324 - /* 5325 - * when coming from ctxsw, current still points to the 5326 - * previous task, therefore we must work with task and not current. 5327 - */ 5328 - set_notify_resume(task); 5329 - } 5330 - /* 5331 - * defer until state is changed (shorten spin window). the context is locked 5332 - * anyway, so the signal receiver would come spin for nothing. 5333 - */ 5334 - must_notify = 1; 5335 - } 5336 - 5337 - DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5338 - GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, 5339 - PFM_GET_WORK_PENDING(task), 5340 - ctx->ctx_fl_trap_reason, 5341 - ovfl_pmds, 5342 - ovfl_notify, 5343 - ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); 5344 - /* 5345 - * in case monitoring must be stopped, we toggle the psr bits 5346 - */ 5347 - if (ovfl_ctrl.bits.mask_monitoring) { 5348 - pfm_mask_monitoring(task); 5349 - ctx->ctx_state = PFM_CTX_MASKED; 5350 - ctx->ctx_fl_can_restart = 1; 5351 - } 5352 - 5353 - /* 5354 - * send notification now 5355 - */ 5356 - if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); 5357 - 5358 - return; 5359 - 5360 - sanity_check: 5361 - printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5362 - smp_processor_id(), 5363 - task ? task_pid_nr(task) : -1, 5364 - pmc0); 5365 - return; 5366 - 5367 - stop_monitoring: 5368 - /* 5369 - * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). 5370 - * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can 5371 - * come here as zombie only if the task is the current task. In which case, we 5372 - * can access the PMU hardware directly. 5373 - * 5374 - * Note that zombies do have PM_VALID set. So here we do the minimal. 5375 - * 5376 - * In case the context was zombified it could not be reclaimed at the time 5377 - * the monitoring program exited. At this point, the PMU reservation has been 5378 - * returned, the sampiing buffer has been freed. We must convert this call 5379 - * into a spurious interrupt. However, we must also avoid infinite overflows 5380 - * by stopping monitoring for this task. We can only come here for a per-task 5381 - * context. All we need to do is to stop monitoring using the psr bits which 5382 - * are always task private. By re-enabling secure montioring, we ensure that 5383 - * the monitored task will not be able to re-activate monitoring. 5384 - * The task will eventually be context switched out, at which point the context 5385 - * will be reclaimed (that includes releasing ownership of the PMU). 5386 - * 5387 - * So there might be a window of time where the number of per-task session is zero 5388 - * yet one PMU might have a owner and get at most one overflow interrupt for a zombie 5389 - * context. This is safe because if a per-task session comes in, it will push this one 5390 - * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide 5391 - * session is force on that CPU, given that we use task pinning, pfm_save_regs() will 5392 - * also push our zombie context out. 5393 - * 5394 - * Overall pretty hairy stuff.... 5395 - */ 5396 - DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); 5397 - pfm_clear_psr_up(); 5398 - ia64_psr(regs)->up = 0; 5399 - ia64_psr(regs)->sp = 1; 5400 - return; 5401 - } 5402 - 5403 - static int 5404 - pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) 5405 - { 5406 - struct task_struct *task; 5407 - pfm_context_t *ctx; 5408 - unsigned long flags; 5409 - u64 pmc0; 5410 - int this_cpu = smp_processor_id(); 5411 - int retval = 0; 5412 - 5413 - pfm_stats[this_cpu].pfm_ovfl_intr_count++; 5414 - 5415 - /* 5416 - * srlz.d done before arriving here 5417 - */ 5418 - pmc0 = ia64_get_pmc(0); 5419 - 5420 - task = GET_PMU_OWNER(); 5421 - ctx = GET_PMU_CTX(); 5422 - 5423 - /* 5424 - * if we have some pending bits set 5425 - * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 5426 - */ 5427 - if (PMC0_HAS_OVFL(pmc0) && task) { 5428 - /* 5429 - * we assume that pmc0.fr is always set here 5430 - */ 5431 - 5432 - /* sanity check */ 5433 - if (!ctx) goto report_spurious1; 5434 - 5435 - if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 5436 - goto report_spurious2; 5437 - 5438 - PROTECT_CTX_NOPRINT(ctx, flags); 5439 - 5440 - pfm_overflow_handler(task, ctx, pmc0, regs); 5441 - 5442 - UNPROTECT_CTX_NOPRINT(ctx, flags); 5443 - 5444 - } else { 5445 - pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; 5446 - retval = -1; 5447 - } 5448 - /* 5449 - * keep it unfrozen at all times 5450 - */ 5451 - pfm_unfreeze_pmu(); 5452 - 5453 - return retval; 5454 - 5455 - report_spurious1: 5456 - printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5457 - this_cpu, task_pid_nr(task)); 5458 - pfm_unfreeze_pmu(); 5459 - return -1; 5460 - report_spurious2: 5461 - printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5462 - this_cpu, 5463 - task_pid_nr(task)); 5464 - pfm_unfreeze_pmu(); 5465 - return -1; 5466 - } 5467 - 5468 - static irqreturn_t 5469 - pfm_interrupt_handler(int irq, void *arg) 5470 - { 5471 - unsigned long start_cycles, total_cycles; 5472 - unsigned long min, max; 5473 - int this_cpu; 5474 - int ret; 5475 - struct pt_regs *regs = get_irq_regs(); 5476 - 5477 - this_cpu = get_cpu(); 5478 - if (likely(!pfm_alt_intr_handler)) { 5479 - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5480 - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5481 - 5482 - start_cycles = ia64_get_itc(); 5483 - 5484 - ret = pfm_do_interrupt_handler(arg, regs); 5485 - 5486 - total_cycles = ia64_get_itc(); 5487 - 5488 - /* 5489 - * don't measure spurious interrupts 5490 - */ 5491 - if (likely(ret == 0)) { 5492 - total_cycles -= start_cycles; 5493 - 5494 - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5495 - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5496 - 5497 - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5498 - } 5499 - } 5500 - else { 5501 - (*pfm_alt_intr_handler->handler)(irq, arg, regs); 5502 - } 5503 - 5504 - put_cpu(); 5505 - return IRQ_HANDLED; 5506 - } 5507 - 5508 - /* 5509 - * /proc/perfmon interface, for debug only 5510 - */ 5511 - 5512 - #define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) 5513 - 5514 - static void * 5515 - pfm_proc_start(struct seq_file *m, loff_t *pos) 5516 - { 5517 - if (*pos == 0) { 5518 - return PFM_PROC_SHOW_HEADER; 5519 - } 5520 - 5521 - while (*pos <= nr_cpu_ids) { 5522 - if (cpu_online(*pos - 1)) { 5523 - return (void *)*pos; 5524 - } 5525 - ++*pos; 5526 - } 5527 - return NULL; 5528 - } 5529 - 5530 - static void * 5531 - pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) 5532 - { 5533 - ++*pos; 5534 - return pfm_proc_start(m, pos); 5535 - } 5536 - 5537 - static void 5538 - pfm_proc_stop(struct seq_file *m, void *v) 5539 - { 5540 - } 5541 - 5542 - static void 5543 - pfm_proc_show_header(struct seq_file *m) 5544 - { 5545 - struct list_head * pos; 5546 - pfm_buffer_fmt_t * entry; 5547 - unsigned long flags; 5548 - 5549 - seq_printf(m, 5550 - "perfmon version : %u.%u\n" 5551 - "model : %s\n" 5552 - "fastctxsw : %s\n" 5553 - "expert mode : %s\n" 5554 - "ovfl_mask : 0x%lx\n" 5555 - "PMU flags : 0x%x\n", 5556 - PFM_VERSION_MAJ, PFM_VERSION_MIN, 5557 - pmu_conf->pmu_name, 5558 - pfm_sysctl.fastctxsw > 0 ? "Yes": "No", 5559 - pfm_sysctl.expert_mode > 0 ? "Yes": "No", 5560 - pmu_conf->ovfl_val, 5561 - pmu_conf->flags); 5562 - 5563 - LOCK_PFS(flags); 5564 - 5565 - seq_printf(m, 5566 - "proc_sessions : %u\n" 5567 - "sys_sessions : %u\n" 5568 - "sys_use_dbregs : %u\n" 5569 - "ptrace_use_dbregs : %u\n", 5570 - pfm_sessions.pfs_task_sessions, 5571 - pfm_sessions.pfs_sys_sessions, 5572 - pfm_sessions.pfs_sys_use_dbregs, 5573 - pfm_sessions.pfs_ptrace_use_dbregs); 5574 - 5575 - UNLOCK_PFS(flags); 5576 - 5577 - spin_lock(&pfm_buffer_fmt_lock); 5578 - 5579 - list_for_each(pos, &pfm_buffer_fmt_list) { 5580 - entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 5581 - seq_printf(m, "format : %16phD %s\n", 5582 - entry->fmt_uuid, entry->fmt_name); 5583 - } 5584 - spin_unlock(&pfm_buffer_fmt_lock); 5585 - 5586 - } 5587 - 5588 - static int 5589 - pfm_proc_show(struct seq_file *m, void *v) 5590 - { 5591 - unsigned long psr; 5592 - unsigned int i; 5593 - int cpu; 5594 - 5595 - if (v == PFM_PROC_SHOW_HEADER) { 5596 - pfm_proc_show_header(m); 5597 - return 0; 5598 - } 5599 - 5600 - /* show info for CPU (v - 1) */ 5601 - 5602 - cpu = (long)v - 1; 5603 - seq_printf(m, 5604 - "CPU%-2d overflow intrs : %lu\n" 5605 - "CPU%-2d overflow cycles : %lu\n" 5606 - "CPU%-2d overflow min : %lu\n" 5607 - "CPU%-2d overflow max : %lu\n" 5608 - "CPU%-2d smpl handler calls : %lu\n" 5609 - "CPU%-2d smpl handler cycles : %lu\n" 5610 - "CPU%-2d spurious intrs : %lu\n" 5611 - "CPU%-2d replay intrs : %lu\n" 5612 - "CPU%-2d syst_wide : %d\n" 5613 - "CPU%-2d dcr_pp : %d\n" 5614 - "CPU%-2d exclude idle : %d\n" 5615 - "CPU%-2d owner : %d\n" 5616 - "CPU%-2d context : %p\n" 5617 - "CPU%-2d activations : %lu\n", 5618 - cpu, pfm_stats[cpu].pfm_ovfl_intr_count, 5619 - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, 5620 - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, 5621 - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, 5622 - cpu, pfm_stats[cpu].pfm_smpl_handler_calls, 5623 - cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, 5624 - cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, 5625 - cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, 5626 - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, 5627 - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, 5628 - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, 5629 - cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, 5630 - cpu, pfm_get_cpu_data(pmu_ctx, cpu), 5631 - cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); 5632 - 5633 - if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { 5634 - 5635 - psr = pfm_get_psr(); 5636 - 5637 - ia64_srlz_d(); 5638 - 5639 - seq_printf(m, 5640 - "CPU%-2d psr : 0x%lx\n" 5641 - "CPU%-2d pmc0 : 0x%lx\n", 5642 - cpu, psr, 5643 - cpu, ia64_get_pmc(0)); 5644 - 5645 - for (i=0; PMC_IS_LAST(i) == 0; i++) { 5646 - if (PMC_IS_COUNTING(i) == 0) continue; 5647 - seq_printf(m, 5648 - "CPU%-2d pmc%u : 0x%lx\n" 5649 - "CPU%-2d pmd%u : 0x%lx\n", 5650 - cpu, i, ia64_get_pmc(i), 5651 - cpu, i, ia64_get_pmd(i)); 5652 - } 5653 - } 5654 - return 0; 5655 - } 5656 - 5657 - const struct seq_operations pfm_seq_ops = { 5658 - .start = pfm_proc_start, 5659 - .next = pfm_proc_next, 5660 - .stop = pfm_proc_stop, 5661 - .show = pfm_proc_show 5662 - }; 5663 - 5664 - /* 5665 - * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens 5666 - * during pfm_enable() hence before pfm_start(). We cannot assume monitoring 5667 - * is active or inactive based on mode. We must rely on the value in 5668 - * local_cpu_data->pfm_syst_info 5669 - */ 5670 - void 5671 - pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) 5672 - { 5673 - struct pt_regs *regs; 5674 - unsigned long dcr; 5675 - unsigned long dcr_pp; 5676 - 5677 - dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; 5678 - 5679 - /* 5680 - * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 5681 - * on every CPU, so we can rely on the pid to identify the idle task. 5682 - */ 5683 - if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { 5684 - regs = task_pt_regs(task); 5685 - ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; 5686 - return; 5687 - } 5688 - /* 5689 - * if monitoring has started 5690 - */ 5691 - if (dcr_pp) { 5692 - dcr = ia64_getreg(_IA64_REG_CR_DCR); 5693 - /* 5694 - * context switching in? 5695 - */ 5696 - if (is_ctxswin) { 5697 - /* mask monitoring for the idle task */ 5698 - ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); 5699 - pfm_clear_psr_pp(); 5700 - ia64_srlz_i(); 5701 - return; 5702 - } 5703 - /* 5704 - * context switching out 5705 - * restore monitoring for next task 5706 - * 5707 - * Due to inlining this odd if-then-else construction generates 5708 - * better code. 5709 - */ 5710 - ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); 5711 - pfm_set_psr_pp(); 5712 - ia64_srlz_i(); 5713 - } 5714 - } 5715 - 5716 - #ifdef CONFIG_SMP 5717 - 5718 - static void 5719 - pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) 5720 - { 5721 - struct task_struct *task = ctx->ctx_task; 5722 - 5723 - ia64_psr(regs)->up = 0; 5724 - ia64_psr(regs)->sp = 1; 5725 - 5726 - if (GET_PMU_OWNER() == task) { 5727 - DPRINT(("cleared ownership for [%d]\n", 5728 - task_pid_nr(ctx->ctx_task))); 5729 - SET_PMU_OWNER(NULL, NULL); 5730 - } 5731 - 5732 - /* 5733 - * disconnect the task from the context and vice-versa 5734 - */ 5735 - PFM_SET_WORK_PENDING(task, 0); 5736 - 5737 - task->thread.pfm_context = NULL; 5738 - task->thread.flags &= ~IA64_THREAD_PM_VALID; 5739 - 5740 - DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); 5741 - } 5742 - 5743 - 5744 - /* 5745 - * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5746 - */ 5747 - void 5748 - pfm_save_regs(struct task_struct *task) 5749 - { 5750 - pfm_context_t *ctx; 5751 - unsigned long flags; 5752 - u64 psr; 5753 - 5754 - 5755 - ctx = PFM_GET_CTX(task); 5756 - if (ctx == NULL) return; 5757 - 5758 - /* 5759 - * we always come here with interrupts ALREADY disabled by 5760 - * the scheduler. So we simply need to protect against concurrent 5761 - * access, not CPU concurrency. 5762 - */ 5763 - flags = pfm_protect_ctx_ctxsw(ctx); 5764 - 5765 - if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5766 - struct pt_regs *regs = task_pt_regs(task); 5767 - 5768 - pfm_clear_psr_up(); 5769 - 5770 - pfm_force_cleanup(ctx, regs); 5771 - 5772 - BUG_ON(ctx->ctx_smpl_hdr); 5773 - 5774 - pfm_unprotect_ctx_ctxsw(ctx, flags); 5775 - 5776 - pfm_context_free(ctx); 5777 - return; 5778 - } 5779 - 5780 - /* 5781 - * save current PSR: needed because we modify it 5782 - */ 5783 - ia64_srlz_d(); 5784 - psr = pfm_get_psr(); 5785 - 5786 - BUG_ON(psr & (IA64_PSR_I)); 5787 - 5788 - /* 5789 - * stop monitoring: 5790 - * This is the last instruction which may generate an overflow 5791 - * 5792 - * We do not need to set psr.sp because, it is irrelevant in kernel. 5793 - * It will be restored from ipsr when going back to user level 5794 - */ 5795 - pfm_clear_psr_up(); 5796 - 5797 - /* 5798 - * keep a copy of psr.up (for reload) 5799 - */ 5800 - ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5801 - 5802 - /* 5803 - * release ownership of this PMU. 5804 - * PM interrupts are masked, so nothing 5805 - * can happen. 5806 - */ 5807 - SET_PMU_OWNER(NULL, NULL); 5808 - 5809 - /* 5810 - * we systematically save the PMD as we have no 5811 - * guarantee we will be schedule at that same 5812 - * CPU again. 5813 - */ 5814 - pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5815 - 5816 - /* 5817 - * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5818 - * we will need it on the restore path to check 5819 - * for pending overflow. 5820 - */ 5821 - ctx->th_pmcs[0] = ia64_get_pmc(0); 5822 - 5823 - /* 5824 - * unfreeze PMU if had pending overflows 5825 - */ 5826 - if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5827 - 5828 - /* 5829 - * finally, allow context access. 5830 - * interrupts will still be masked after this call. 5831 - */ 5832 - pfm_unprotect_ctx_ctxsw(ctx, flags); 5833 - } 5834 - 5835 - #else /* !CONFIG_SMP */ 5836 - void 5837 - pfm_save_regs(struct task_struct *task) 5838 - { 5839 - pfm_context_t *ctx; 5840 - u64 psr; 5841 - 5842 - ctx = PFM_GET_CTX(task); 5843 - if (ctx == NULL) return; 5844 - 5845 - /* 5846 - * save current PSR: needed because we modify it 5847 - */ 5848 - psr = pfm_get_psr(); 5849 - 5850 - BUG_ON(psr & (IA64_PSR_I)); 5851 - 5852 - /* 5853 - * stop monitoring: 5854 - * This is the last instruction which may generate an overflow 5855 - * 5856 - * We do not need to set psr.sp because, it is irrelevant in kernel. 5857 - * It will be restored from ipsr when going back to user level 5858 - */ 5859 - pfm_clear_psr_up(); 5860 - 5861 - /* 5862 - * keep a copy of psr.up (for reload) 5863 - */ 5864 - ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5865 - } 5866 - 5867 - static void 5868 - pfm_lazy_save_regs (struct task_struct *task) 5869 - { 5870 - pfm_context_t *ctx; 5871 - unsigned long flags; 5872 - 5873 - { u64 psr = pfm_get_psr(); 5874 - BUG_ON(psr & IA64_PSR_UP); 5875 - } 5876 - 5877 - ctx = PFM_GET_CTX(task); 5878 - 5879 - /* 5880 - * we need to mask PMU overflow here to 5881 - * make sure that we maintain pmc0 until 5882 - * we save it. overflow interrupts are 5883 - * treated as spurious if there is no 5884 - * owner. 5885 - * 5886 - * XXX: I don't think this is necessary 5887 - */ 5888 - PROTECT_CTX(ctx,flags); 5889 - 5890 - /* 5891 - * release ownership of this PMU. 5892 - * must be done before we save the registers. 5893 - * 5894 - * after this call any PMU interrupt is treated 5895 - * as spurious. 5896 - */ 5897 - SET_PMU_OWNER(NULL, NULL); 5898 - 5899 - /* 5900 - * save all the pmds we use 5901 - */ 5902 - pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5903 - 5904 - /* 5905 - * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5906 - * it is needed to check for pended overflow 5907 - * on the restore path 5908 - */ 5909 - ctx->th_pmcs[0] = ia64_get_pmc(0); 5910 - 5911 - /* 5912 - * unfreeze PMU if had pending overflows 5913 - */ 5914 - if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5915 - 5916 - /* 5917 - * now get can unmask PMU interrupts, they will 5918 - * be treated as purely spurious and we will not 5919 - * lose any information 5920 - */ 5921 - UNPROTECT_CTX(ctx,flags); 5922 - } 5923 - #endif /* CONFIG_SMP */ 5924 - 5925 - #ifdef CONFIG_SMP 5926 - /* 5927 - * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5928 - */ 5929 - void 5930 - pfm_load_regs (struct task_struct *task) 5931 - { 5932 - pfm_context_t *ctx; 5933 - unsigned long pmc_mask = 0UL, pmd_mask = 0UL; 5934 - unsigned long flags; 5935 - u64 psr, psr_up; 5936 - int need_irq_resend; 5937 - 5938 - ctx = PFM_GET_CTX(task); 5939 - if (unlikely(ctx == NULL)) return; 5940 - 5941 - BUG_ON(GET_PMU_OWNER()); 5942 - 5943 - /* 5944 - * possible on unload 5945 - */ 5946 - if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; 5947 - 5948 - /* 5949 - * we always come here with interrupts ALREADY disabled by 5950 - * the scheduler. So we simply need to protect against concurrent 5951 - * access, not CPU concurrency. 5952 - */ 5953 - flags = pfm_protect_ctx_ctxsw(ctx); 5954 - psr = pfm_get_psr(); 5955 - 5956 - need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 5957 - 5958 - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 5959 - BUG_ON(psr & IA64_PSR_I); 5960 - 5961 - if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { 5962 - struct pt_regs *regs = task_pt_regs(task); 5963 - 5964 - BUG_ON(ctx->ctx_smpl_hdr); 5965 - 5966 - pfm_force_cleanup(ctx, regs); 5967 - 5968 - pfm_unprotect_ctx_ctxsw(ctx, flags); 5969 - 5970 - /* 5971 - * this one (kmalloc'ed) is fine with interrupts disabled 5972 - */ 5973 - pfm_context_free(ctx); 5974 - 5975 - return; 5976 - } 5977 - 5978 - /* 5979 - * we restore ALL the debug registers to avoid picking up 5980 - * stale state. 5981 - */ 5982 - if (ctx->ctx_fl_using_dbreg) { 5983 - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 5984 - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 5985 - } 5986 - /* 5987 - * retrieve saved psr.up 5988 - */ 5989 - psr_up = ctx->ctx_saved_psr_up; 5990 - 5991 - /* 5992 - * if we were the last user of the PMU on that CPU, 5993 - * then nothing to do except restore psr 5994 - */ 5995 - if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { 5996 - 5997 - /* 5998 - * retrieve partial reload masks (due to user modifications) 5999 - */ 6000 - pmc_mask = ctx->ctx_reload_pmcs[0]; 6001 - pmd_mask = ctx->ctx_reload_pmds[0]; 6002 - 6003 - } else { 6004 - /* 6005 - * To avoid leaking information to the user level when psr.sp=0, 6006 - * we must reload ALL implemented pmds (even the ones we don't use). 6007 - * In the kernel we only allow PFM_READ_PMDS on registers which 6008 - * we initialized or requested (sampling) so there is no risk there. 6009 - */ 6010 - pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6011 - 6012 - /* 6013 - * ALL accessible PMCs are systematically reloaded, unused registers 6014 - * get their default (from pfm_reset_pmu_state()) values to avoid picking 6015 - * up stale configuration. 6016 - * 6017 - * PMC0 is never in the mask. It is always restored separately. 6018 - */ 6019 - pmc_mask = ctx->ctx_all_pmcs[0]; 6020 - } 6021 - /* 6022 - * when context is MASKED, we will restore PMC with plm=0 6023 - * and PMD with stale information, but that's ok, nothing 6024 - * will be captured. 6025 - * 6026 - * XXX: optimize here 6027 - */ 6028 - if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6029 - if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6030 - 6031 - /* 6032 - * check for pending overflow at the time the state 6033 - * was saved. 6034 - */ 6035 - if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6036 - /* 6037 - * reload pmc0 with the overflow information 6038 - * On McKinley PMU, this will trigger a PMU interrupt 6039 - */ 6040 - ia64_set_pmc(0, ctx->th_pmcs[0]); 6041 - ia64_srlz_d(); 6042 - ctx->th_pmcs[0] = 0UL; 6043 - 6044 - /* 6045 - * will replay the PMU interrupt 6046 - */ 6047 - if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6048 - 6049 - pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6050 - } 6051 - 6052 - /* 6053 - * we just did a reload, so we reset the partial reload fields 6054 - */ 6055 - ctx->ctx_reload_pmcs[0] = 0UL; 6056 - ctx->ctx_reload_pmds[0] = 0UL; 6057 - 6058 - SET_LAST_CPU(ctx, smp_processor_id()); 6059 - 6060 - /* 6061 - * dump activation value for this PMU 6062 - */ 6063 - INC_ACTIVATION(); 6064 - /* 6065 - * record current activation for this context 6066 - */ 6067 - SET_ACTIVATION(ctx); 6068 - 6069 - /* 6070 - * establish new ownership. 6071 - */ 6072 - SET_PMU_OWNER(task, ctx); 6073 - 6074 - /* 6075 - * restore the psr.up bit. measurement 6076 - * is active again. 6077 - * no PMU interrupt can happen at this point 6078 - * because we still have interrupts disabled. 6079 - */ 6080 - if (likely(psr_up)) pfm_set_psr_up(); 6081 - 6082 - /* 6083 - * allow concurrent access to context 6084 - */ 6085 - pfm_unprotect_ctx_ctxsw(ctx, flags); 6086 - } 6087 - #else /* !CONFIG_SMP */ 6088 - /* 6089 - * reload PMU state for UP kernels 6090 - * in 2.5 we come here with interrupts disabled 6091 - */ 6092 - void 6093 - pfm_load_regs (struct task_struct *task) 6094 - { 6095 - pfm_context_t *ctx; 6096 - struct task_struct *owner; 6097 - unsigned long pmd_mask, pmc_mask; 6098 - u64 psr, psr_up; 6099 - int need_irq_resend; 6100 - 6101 - owner = GET_PMU_OWNER(); 6102 - ctx = PFM_GET_CTX(task); 6103 - psr = pfm_get_psr(); 6104 - 6105 - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6106 - BUG_ON(psr & IA64_PSR_I); 6107 - 6108 - /* 6109 - * we restore ALL the debug registers to avoid picking up 6110 - * stale state. 6111 - * 6112 - * This must be done even when the task is still the owner 6113 - * as the registers may have been modified via ptrace() 6114 - * (not perfmon) by the previous task. 6115 - */ 6116 - if (ctx->ctx_fl_using_dbreg) { 6117 - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6118 - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6119 - } 6120 - 6121 - /* 6122 - * retrieved saved psr.up 6123 - */ 6124 - psr_up = ctx->ctx_saved_psr_up; 6125 - need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6126 - 6127 - /* 6128 - * short path, our state is still there, just 6129 - * need to restore psr and we go 6130 - * 6131 - * we do not touch either PMC nor PMD. the psr is not touched 6132 - * by the overflow_handler. So we are safe w.r.t. to interrupt 6133 - * concurrency even without interrupt masking. 6134 - */ 6135 - if (likely(owner == task)) { 6136 - if (likely(psr_up)) pfm_set_psr_up(); 6137 - return; 6138 - } 6139 - 6140 - /* 6141 - * someone else is still using the PMU, first push it out and 6142 - * then we'll be able to install our stuff ! 6143 - * 6144 - * Upon return, there will be no owner for the current PMU 6145 - */ 6146 - if (owner) pfm_lazy_save_regs(owner); 6147 - 6148 - /* 6149 - * To avoid leaking information to the user level when psr.sp=0, 6150 - * we must reload ALL implemented pmds (even the ones we don't use). 6151 - * In the kernel we only allow PFM_READ_PMDS on registers which 6152 - * we initialized or requested (sampling) so there is no risk there. 6153 - */ 6154 - pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6155 - 6156 - /* 6157 - * ALL accessible PMCs are systematically reloaded, unused registers 6158 - * get their default (from pfm_reset_pmu_state()) values to avoid picking 6159 - * up stale configuration. 6160 - * 6161 - * PMC0 is never in the mask. It is always restored separately 6162 - */ 6163 - pmc_mask = ctx->ctx_all_pmcs[0]; 6164 - 6165 - pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6166 - pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6167 - 6168 - /* 6169 - * check for pending overflow at the time the state 6170 - * was saved. 6171 - */ 6172 - if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6173 - /* 6174 - * reload pmc0 with the overflow information 6175 - * On McKinley PMU, this will trigger a PMU interrupt 6176 - */ 6177 - ia64_set_pmc(0, ctx->th_pmcs[0]); 6178 - ia64_srlz_d(); 6179 - 6180 - ctx->th_pmcs[0] = 0UL; 6181 - 6182 - /* 6183 - * will replay the PMU interrupt 6184 - */ 6185 - if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6186 - 6187 - pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6188 - } 6189 - 6190 - /* 6191 - * establish new ownership. 6192 - */ 6193 - SET_PMU_OWNER(task, ctx); 6194 - 6195 - /* 6196 - * restore the psr.up bit. measurement 6197 - * is active again. 6198 - * no PMU interrupt can happen at this point 6199 - * because we still have interrupts disabled. 6200 - */ 6201 - if (likely(psr_up)) pfm_set_psr_up(); 6202 - } 6203 - #endif /* CONFIG_SMP */ 6204 - 6205 - /* 6206 - * this function assumes monitoring is stopped 6207 - */ 6208 - static void 6209 - pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) 6210 - { 6211 - u64 pmc0; 6212 - unsigned long mask2, val, pmd_val, ovfl_val; 6213 - int i, can_access_pmu = 0; 6214 - int is_self; 6215 - 6216 - /* 6217 - * is the caller the task being monitored (or which initiated the 6218 - * session for system wide measurements) 6219 - */ 6220 - is_self = ctx->ctx_task == task ? 1 : 0; 6221 - 6222 - /* 6223 - * can access PMU is task is the owner of the PMU state on the current CPU 6224 - * or if we are running on the CPU bound to the context in system-wide mode 6225 - * (that is not necessarily the task the context is attached to in this mode). 6226 - * In system-wide we always have can_access_pmu true because a task running on an 6227 - * invalid processor is flagged earlier in the call stack (see pfm_stop). 6228 - */ 6229 - can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); 6230 - if (can_access_pmu) { 6231 - /* 6232 - * Mark the PMU as not owned 6233 - * This will cause the interrupt handler to do nothing in case an overflow 6234 - * interrupt was in-flight 6235 - * This also guarantees that pmc0 will contain the final state 6236 - * It virtually gives us full control on overflow processing from that point 6237 - * on. 6238 - */ 6239 - SET_PMU_OWNER(NULL, NULL); 6240 - DPRINT(("releasing ownership\n")); 6241 - 6242 - /* 6243 - * read current overflow status: 6244 - * 6245 - * we are guaranteed to read the final stable state 6246 - */ 6247 - ia64_srlz_d(); 6248 - pmc0 = ia64_get_pmc(0); /* slow */ 6249 - 6250 - /* 6251 - * reset freeze bit, overflow status information destroyed 6252 - */ 6253 - pfm_unfreeze_pmu(); 6254 - } else { 6255 - pmc0 = ctx->th_pmcs[0]; 6256 - /* 6257 - * clear whatever overflow status bits there were 6258 - */ 6259 - ctx->th_pmcs[0] = 0; 6260 - } 6261 - ovfl_val = pmu_conf->ovfl_val; 6262 - /* 6263 - * we save all the used pmds 6264 - * we take care of overflows for counting PMDs 6265 - * 6266 - * XXX: sampling situation is not taken into account here 6267 - */ 6268 - mask2 = ctx->ctx_used_pmds[0]; 6269 - 6270 - DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); 6271 - 6272 - for (i = 0; mask2; i++, mask2>>=1) { 6273 - 6274 - /* skip non used pmds */ 6275 - if ((mask2 & 0x1) == 0) continue; 6276 - 6277 - /* 6278 - * can access PMU always true in system wide mode 6279 - */ 6280 - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; 6281 - 6282 - if (PMD_IS_COUNTING(i)) { 6283 - DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", 6284 - task_pid_nr(task), 6285 - i, 6286 - ctx->ctx_pmds[i].val, 6287 - val & ovfl_val)); 6288 - 6289 - /* 6290 - * we rebuild the full 64 bit value of the counter 6291 - */ 6292 - val = ctx->ctx_pmds[i].val + (val & ovfl_val); 6293 - 6294 - /* 6295 - * now everything is in ctx_pmds[] and we need 6296 - * to clear the saved context from save_regs() such that 6297 - * pfm_read_pmds() gets the correct value 6298 - */ 6299 - pmd_val = 0UL; 6300 - 6301 - /* 6302 - * take care of overflow inline 6303 - */ 6304 - if (pmc0 & (1UL << i)) { 6305 - val += 1 + ovfl_val; 6306 - DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i)); 6307 - } 6308 - } 6309 - 6310 - DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val)); 6311 - 6312 - if (is_self) ctx->th_pmds[i] = pmd_val; 6313 - 6314 - ctx->ctx_pmds[i].val = val; 6315 - } 6316 - } 6317 - 6318 - static void 6319 - pfm_alt_save_pmu_state(void *data) 6320 - { 6321 - struct pt_regs *regs; 6322 - 6323 - regs = task_pt_regs(current); 6324 - 6325 - DPRINT(("called\n")); 6326 - 6327 - /* 6328 - * should not be necessary but 6329 - * let's take not risk 6330 - */ 6331 - pfm_clear_psr_up(); 6332 - pfm_clear_psr_pp(); 6333 - ia64_psr(regs)->pp = 0; 6334 - 6335 - /* 6336 - * This call is required 6337 - * May cause a spurious interrupt on some processors 6338 - */ 6339 - pfm_freeze_pmu(); 6340 - 6341 - ia64_srlz_d(); 6342 - } 6343 - 6344 - void 6345 - pfm_alt_restore_pmu_state(void *data) 6346 - { 6347 - struct pt_regs *regs; 6348 - 6349 - regs = task_pt_regs(current); 6350 - 6351 - DPRINT(("called\n")); 6352 - 6353 - /* 6354 - * put PMU back in state expected 6355 - * by perfmon 6356 - */ 6357 - pfm_clear_psr_up(); 6358 - pfm_clear_psr_pp(); 6359 - ia64_psr(regs)->pp = 0; 6360 - 6361 - /* 6362 - * perfmon runs with PMU unfrozen at all times 6363 - */ 6364 - pfm_unfreeze_pmu(); 6365 - 6366 - ia64_srlz_d(); 6367 - } 6368 - 6369 - int 6370 - pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6371 - { 6372 - int ret, i; 6373 - int reserve_cpu; 6374 - 6375 - /* some sanity checks */ 6376 - if (hdl == NULL || hdl->handler == NULL) return -EINVAL; 6377 - 6378 - /* do the easy test first */ 6379 - if (pfm_alt_intr_handler) return -EBUSY; 6380 - 6381 - /* one at a time in the install or remove, just fail the others */ 6382 - if (!spin_trylock(&pfm_alt_install_check)) { 6383 - return -EBUSY; 6384 - } 6385 - 6386 - /* reserve our session */ 6387 - for_each_online_cpu(reserve_cpu) { 6388 - ret = pfm_reserve_session(NULL, 1, reserve_cpu); 6389 - if (ret) goto cleanup_reserve; 6390 - } 6391 - 6392 - /* save the current system wide pmu states */ 6393 - on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); 6394 - 6395 - /* officially change to the alternate interrupt handler */ 6396 - pfm_alt_intr_handler = hdl; 6397 - 6398 - spin_unlock(&pfm_alt_install_check); 6399 - 6400 - return 0; 6401 - 6402 - cleanup_reserve: 6403 - for_each_online_cpu(i) { 6404 - /* don't unreserve more than we reserved */ 6405 - if (i >= reserve_cpu) break; 6406 - 6407 - pfm_unreserve_session(NULL, 1, i); 6408 - } 6409 - 6410 - spin_unlock(&pfm_alt_install_check); 6411 - 6412 - return ret; 6413 - } 6414 - EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); 6415 - 6416 - int 6417 - pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6418 - { 6419 - int i; 6420 - 6421 - if (hdl == NULL) return -EINVAL; 6422 - 6423 - /* cannot remove someone else's handler! */ 6424 - if (pfm_alt_intr_handler != hdl) return -EINVAL; 6425 - 6426 - /* one at a time in the install or remove, just fail the others */ 6427 - if (!spin_trylock(&pfm_alt_install_check)) { 6428 - return -EBUSY; 6429 - } 6430 - 6431 - pfm_alt_intr_handler = NULL; 6432 - 6433 - on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); 6434 - 6435 - for_each_online_cpu(i) { 6436 - pfm_unreserve_session(NULL, 1, i); 6437 - } 6438 - 6439 - spin_unlock(&pfm_alt_install_check); 6440 - 6441 - return 0; 6442 - } 6443 - EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); 6444 - 6445 - /* 6446 - * perfmon initialization routine, called from the initcall() table 6447 - */ 6448 - static int init_pfm_fs(void); 6449 - 6450 - static int __init 6451 - pfm_probe_pmu(void) 6452 - { 6453 - pmu_config_t **p; 6454 - int family; 6455 - 6456 - family = local_cpu_data->family; 6457 - p = pmu_confs; 6458 - 6459 - while(*p) { 6460 - if ((*p)->probe) { 6461 - if ((*p)->probe() == 0) goto found; 6462 - } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { 6463 - goto found; 6464 - } 6465 - p++; 6466 - } 6467 - return -1; 6468 - found: 6469 - pmu_conf = *p; 6470 - return 0; 6471 - } 6472 - 6473 - int __init 6474 - pfm_init(void) 6475 - { 6476 - unsigned int n, n_counters, i; 6477 - 6478 - printk("perfmon: version %u.%u IRQ %u\n", 6479 - PFM_VERSION_MAJ, 6480 - PFM_VERSION_MIN, 6481 - IA64_PERFMON_VECTOR); 6482 - 6483 - if (pfm_probe_pmu()) { 6484 - printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 6485 - local_cpu_data->family); 6486 - return -ENODEV; 6487 - } 6488 - 6489 - /* 6490 - * compute the number of implemented PMD/PMC from the 6491 - * description tables 6492 - */ 6493 - n = 0; 6494 - for (i=0; PMC_IS_LAST(i) == 0; i++) { 6495 - if (PMC_IS_IMPL(i) == 0) continue; 6496 - pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); 6497 - n++; 6498 - } 6499 - pmu_conf->num_pmcs = n; 6500 - 6501 - n = 0; n_counters = 0; 6502 - for (i=0; PMD_IS_LAST(i) == 0; i++) { 6503 - if (PMD_IS_IMPL(i) == 0) continue; 6504 - pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); 6505 - n++; 6506 - if (PMD_IS_COUNTING(i)) n_counters++; 6507 - } 6508 - pmu_conf->num_pmds = n; 6509 - pmu_conf->num_counters = n_counters; 6510 - 6511 - /* 6512 - * sanity checks on the number of debug registers 6513 - */ 6514 - if (pmu_conf->use_rr_dbregs) { 6515 - if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { 6516 - printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); 6517 - pmu_conf = NULL; 6518 - return -1; 6519 - } 6520 - if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { 6521 - printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); 6522 - pmu_conf = NULL; 6523 - return -1; 6524 - } 6525 - } 6526 - 6527 - printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", 6528 - pmu_conf->pmu_name, 6529 - pmu_conf->num_pmcs, 6530 - pmu_conf->num_pmds, 6531 - pmu_conf->num_counters, 6532 - ffz(pmu_conf->ovfl_val)); 6533 - 6534 - /* sanity check */ 6535 - if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { 6536 - printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); 6537 - pmu_conf = NULL; 6538 - return -1; 6539 - } 6540 - 6541 - /* 6542 - * create /proc/perfmon (mostly for debugging purposes) 6543 - */ 6544 - perfmon_dir = proc_create_seq("perfmon", S_IRUGO, NULL, &pfm_seq_ops); 6545 - if (perfmon_dir == NULL) { 6546 - printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); 6547 - pmu_conf = NULL; 6548 - return -1; 6549 - } 6550 - 6551 - /* 6552 - * create /proc/sys/kernel/perfmon (for debugging purposes) 6553 - */ 6554 - pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root); 6555 - 6556 - /* 6557 - * initialize all our spinlocks 6558 - */ 6559 - spin_lock_init(&pfm_sessions.pfs_lock); 6560 - spin_lock_init(&pfm_buffer_fmt_lock); 6561 - 6562 - init_pfm_fs(); 6563 - 6564 - for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; 6565 - 6566 - return 0; 6567 - } 6568 - 6569 - __initcall(pfm_init); 6570 - 6571 - /* 6572 - * this function is called before pfm_init() 6573 - */ 6574 - void 6575 - pfm_init_percpu (void) 6576 - { 6577 - static int first_time=1; 6578 - /* 6579 - * make sure no measurement is active 6580 - * (may inherit programmed PMCs from EFI). 6581 - */ 6582 - pfm_clear_psr_pp(); 6583 - pfm_clear_psr_up(); 6584 - 6585 - /* 6586 - * we run with the PMU not frozen at all times 6587 - */ 6588 - pfm_unfreeze_pmu(); 6589 - 6590 - if (first_time) { 6591 - register_percpu_irq(IA64_PERFMON_VECTOR, pfm_interrupt_handler, 6592 - 0, "perfmon"); 6593 - first_time=0; 6594 - } 6595 - 6596 - ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); 6597 - ia64_srlz_d(); 6598 - } 6599 - 6600 - /* 6601 - * used for debug purposes only 6602 - */ 6603 - void 6604 - dump_pmu_state(const char *from) 6605 - { 6606 - struct task_struct *task; 6607 - struct pt_regs *regs; 6608 - pfm_context_t *ctx; 6609 - unsigned long psr, dcr, info, flags; 6610 - int i, this_cpu; 6611 - 6612 - local_irq_save(flags); 6613 - 6614 - this_cpu = smp_processor_id(); 6615 - regs = task_pt_regs(current); 6616 - info = PFM_CPUINFO_GET(); 6617 - dcr = ia64_getreg(_IA64_REG_CR_DCR); 6618 - 6619 - if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { 6620 - local_irq_restore(flags); 6621 - return; 6622 - } 6623 - 6624 - printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 6625 - this_cpu, 6626 - from, 6627 - task_pid_nr(current), 6628 - regs->cr_iip, 6629 - current->comm); 6630 - 6631 - task = GET_PMU_OWNER(); 6632 - ctx = GET_PMU_CTX(); 6633 - 6634 - printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx); 6635 - 6636 - psr = pfm_get_psr(); 6637 - 6638 - printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 6639 - this_cpu, 6640 - ia64_get_pmc(0), 6641 - psr & IA64_PSR_PP ? 1 : 0, 6642 - psr & IA64_PSR_UP ? 1 : 0, 6643 - dcr & IA64_DCR_PP ? 1 : 0, 6644 - info, 6645 - ia64_psr(regs)->up, 6646 - ia64_psr(regs)->pp); 6647 - 6648 - ia64_psr(regs)->up = 0; 6649 - ia64_psr(regs)->pp = 0; 6650 - 6651 - for (i=1; PMC_IS_LAST(i) == 0; i++) { 6652 - if (PMC_IS_IMPL(i) == 0) continue; 6653 - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); 6654 - } 6655 - 6656 - for (i=1; PMD_IS_LAST(i) == 0; i++) { 6657 - if (PMD_IS_IMPL(i) == 0) continue; 6658 - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); 6659 - } 6660 - 6661 - if (ctx) { 6662 - printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", 6663 - this_cpu, 6664 - ctx->ctx_state, 6665 - ctx->ctx_smpl_vaddr, 6666 - ctx->ctx_smpl_hdr, 6667 - ctx->ctx_msgq_head, 6668 - ctx->ctx_msgq_tail, 6669 - ctx->ctx_saved_psr_up); 6670 - } 6671 - local_irq_restore(flags); 6672 - } 6673 - 6674 - /* 6675 - * called from process.c:copy_thread(). task is new child. 6676 - */ 6677 - void 6678 - pfm_inherit(struct task_struct *task, struct pt_regs *regs) 6679 - { 6680 - struct thread_struct *thread; 6681 - 6682 - DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task))); 6683 - 6684 - thread = &task->thread; 6685 - 6686 - /* 6687 - * cut links inherited from parent (current) 6688 - */ 6689 - thread->pfm_context = NULL; 6690 - 6691 - PFM_SET_WORK_PENDING(task, 0); 6692 - 6693 - /* 6694 - * the psr bits are already set properly in copy_threads() 6695 - */ 6696 - } 6697 - #else /* !CONFIG_PERFMON */ 6698 - asmlinkage long 6699 - sys_perfmonctl (int fd, int cmd, void *arg, int count) 6700 - { 6701 - return -ENOSYS; 6702 - } 6703 - #endif /* CONFIG_PERFMON */

-53

arch/ia64/kernel/process.c

··· 51 51 52 52 #include "entry.h" 53 53 54 - #ifdef CONFIG_PERFMON 55 - # include <asm/perfmon.h> 56 - #endif 57 - 58 54 #include "sigframe.h" 59 55 60 56 void (*ia64_mark_idle)(int); ··· 170 174 return; 171 175 } 172 176 173 - #ifdef CONFIG_PERFMON 174 - if (current->thread.pfm_needs_checking) 175 - /* 176 - * Note: pfm_handle_work() allow us to call it with interrupts 177 - * disabled, and may enable interrupts within the function. 178 - */ 179 - pfm_handle_work(); 180 - #endif 181 - 182 177 /* deal with pending signal delivery */ 183 178 if (test_thread_flag(TIF_SIGPENDING)) { 184 179 local_irq_enable(); /* force interrupt enable */ ··· 251 264 void 252 265 ia64_save_extra (struct task_struct *task) 253 266 { 254 - #ifdef CONFIG_PERFMON 255 - unsigned long info; 256 - #endif 257 - 258 267 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) 259 268 ia64_save_debug_regs(&task->thread.dbr[0]); 260 - 261 - #ifdef CONFIG_PERFMON 262 - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) 263 - pfm_save_regs(task); 264 - 265 - info = __this_cpu_read(pfm_syst_info); 266 - if (info & PFM_CPUINFO_SYST_WIDE) 267 - pfm_syst_wide_update_task(task, info, 0); 268 - #endif 269 269 } 270 270 271 271 void 272 272 ia64_load_extra (struct task_struct *task) 273 273 { 274 - #ifdef CONFIG_PERFMON 275 - unsigned long info; 276 - #endif 277 - 278 274 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) 279 275 ia64_load_debug_regs(&task->thread.dbr[0]); 280 - 281 - #ifdef CONFIG_PERFMON 282 - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) 283 - pfm_load_regs(task); 284 - 285 - info = __this_cpu_read(pfm_syst_info); 286 - if (info & PFM_CPUINFO_SYST_WIDE) 287 - pfm_syst_wide_update_task(task, info, 1); 288 - #endif 289 276 } 290 277 291 278 /* ··· 393 432 */ 394 433 child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) 395 434 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); 396 - 397 - #ifdef CONFIG_PERFMON 398 - if (current->thread.pfm_context) 399 - pfm_inherit(p, child_ptregs); 400 - #endif 401 435 return retval; 402 436 } 403 437 ··· 519 563 { 520 564 521 565 ia64_drop_fpu(tsk); 522 - #ifdef CONFIG_PERFMON 523 - /* if needed, stop monitoring and flush state to perfmon context */ 524 - if (tsk->thread.pfm_context) 525 - pfm_exit_thread(tsk); 526 - 527 - /* free debug register resources */ 528 - if (tsk->thread.flags & IA64_THREAD_DBG_VALID) 529 - pfm_release_debug_registers(tsk); 530 - #endif 531 566 } 532 567 533 568 unsigned long

-24

arch/ia64/kernel/ptrace.c

··· 30 30 #include <asm/rse.h> 31 31 #include <linux/uaccess.h> 32 32 #include <asm/unwind.h> 33 - #ifdef CONFIG_PERFMON 34 - #include <asm/perfmon.h> 35 - #endif 36 33 37 34 #include "entry.h" 38 35 ··· 1948 1951 "address 0x%lx\n", addr); 1949 1952 return -1; 1950 1953 } 1951 - #ifdef CONFIG_PERFMON 1952 - /* 1953 - * Check if debug registers are used by perfmon. This 1954 - * test must be done once we know that we can do the 1955 - * operation, i.e. the arguments are all valid, but 1956 - * before we start modifying the state. 1957 - * 1958 - * Perfmon needs to keep a count of how many processes 1959 - * are trying to modify the debug registers for system 1960 - * wide monitoring sessions. 1961 - * 1962 - * We also include read access here, because they may 1963 - * cause the PMU-installed debug register state 1964 - * (dbr[], ibr[]) to be reset. The two arrays are also 1965 - * used by perfmon, but we do not use 1966 - * IA64_THREAD_DBG_VALID. The registers are restored 1967 - * by the PMU context switch code. 1968 - */ 1969 - if (pfm_use_debug_registers(child)) 1970 - return -1; 1971 - #endif 1972 1954 1973 1955 if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { 1974 1956 child->thread.flags |= IA64_THREAD_DBG_VALID;

-8

arch/ia64/kernel/smpboot.c

··· 355 355 extern void ia64_init_itm(void); 356 356 extern volatile int time_keeper_id; 357 357 358 - #ifdef CONFIG_PERFMON 359 - extern void pfm_init_percpu(void); 360 - #endif 361 - 362 358 cpuid = smp_processor_id(); 363 359 phys_id = hard_smp_processor_id(); 364 360 itc_master = time_keeper_id; ··· 384 388 smp_setup_percpu_timer(); 385 389 386 390 ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ 387 - 388 - #ifdef CONFIG_PERFMON 389 - pfm_init_percpu(); 390 - #endif 391 391 392 392 local_irq_enable(); 393 393

+1 -1

arch/ia64/kernel/syscalls/syscall.tbl

··· 160 160 148 common mmap2 sys_mmap2 161 161 149 common pciconfig_read sys_pciconfig_read 162 162 150 common pciconfig_write sys_pciconfig_write 163 - 151 common perfmonctl sys_perfmonctl 163 + 151 common perfmonctl sys_ni_syscall 164 164 152 common sigaltstack sys_sigaltstack 165 165 153 common rt_sigaction sys_rt_sigaction 166 166 154 common rt_sigpending sys_rt_sigpending

-1

arch/ia64/lib/Makefile

··· 12 12 13 13 lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o 14 14 lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o 15 - lib-$(CONFIG_PERFMON) += carta_random.o 16 15 17 16 AFLAGS___divdi3.o = 18 17 AFLAGS___udivdi3.o = -DUNSIGNED

-55

arch/ia64/lib/carta_random.S

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * Fast, simple, yet decent quality random number generator based on 4 - * a paper by David G. Carta ("Two Fast Implementations of the 5 - * `Minimal Standard' Random Number Generator," Communications of the 6 - * ACM, January, 1990). 7 - * 8 - * Copyright (C) 2002 Hewlett-Packard Co 9 - * David Mosberger-Tang <davidm@hpl.hp.com> 10 - */ 11 - 12 - #include <asm/asmmacro.h> 13 - 14 - #define a r2 15 - #define m r3 16 - #define lo r8 17 - #define hi r9 18 - #define t0 r16 19 - #define t1 r17 20 - #define seed r32 21 - 22 - GLOBAL_ENTRY(carta_random32) 23 - movl a = (16807 << 16) | 16807 24 - ;; 25 - pmpyshr2.u t0 = a, seed, 0 26 - pmpyshr2.u t1 = a, seed, 16 27 - ;; 28 - unpack2.l t0 = t1, t0 29 - dep m = -1, r0, 0, 31 30 - ;; 31 - zxt4 lo = t0 32 - shr.u hi = t0, 32 33 - ;; 34 - dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff) 35 - ;; 36 - shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16 37 - shr t1 = hi, 15 // t1 = (hi >> 15) 38 - ;; 39 - add lo = lo, t0 40 - ;; 41 - cmp.gtu p6, p0 = lo, m 42 - ;; 43 - (p6) and lo = lo, m 44 - ;; 45 - (p6) add lo = 1, lo 46 - ;; 47 - add lo = lo, t1 48 - ;; 49 - cmp.gtu p6, p0 = lo, m 50 - ;; 51 - (p6) and lo = lo, m 52 - ;; 53 - (p6) add lo = 1, lo 54 - br.ret.sptk.many rp 55 - END(carta_random32)

-1

arch/ia64/oprofile/Makefile

··· 8 8 timer_int.o ) 9 9 10 10 oprofile-y := $(DRIVER_OBJS) init.o backtrace.o 11 - oprofile-$(CONFIG_PERFMON) += perfmon.o

+1 -11

arch/ia64/oprofile/init.c

··· 18 18 19 19 int __init oprofile_arch_init(struct oprofile_operations *ops) 20 20 { 21 - int ret = -ENODEV; 22 - 23 - #ifdef CONFIG_PERFMON 24 - /* perfmon_init() can fail, but we have no way to report it */ 25 - ret = perfmon_init(ops); 26 - #endif 27 21 ops->backtrace = ia64_backtrace; 28 - 29 - return ret; 22 + return -ENODEV; 30 23 } 31 24 32 25 33 26 void oprofile_arch_exit(void) 34 27 { 35 - #ifdef CONFIG_PERFMON 36 - perfmon_exit(); 37 - #endif 38 28 }

-99

arch/ia64/oprofile/perfmon.c

··· 1 - /** 2 - * @file perfmon.c 3 - * 4 - * @remark Copyright 2003 OProfile authors 5 - * @remark Read the file COPYING 6 - * 7 - * @author John Levon <levon@movementarian.org> 8 - */ 9 - 10 - #include <linux/kernel.h> 11 - #include <linux/oprofile.h> 12 - #include <linux/sched.h> 13 - #include <asm/perfmon.h> 14 - #include <asm/ptrace.h> 15 - #include <asm/errno.h> 16 - 17 - static int allow_ints; 18 - 19 - static int 20 - perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, 21 - struct pt_regs *regs, unsigned long stamp) 22 - { 23 - int event = arg->pmd_eventid; 24 - 25 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; 26 - 27 - /* the owner of the oprofile event buffer may have exited 28 - * without perfmon being shutdown (e.g. SIGSEGV) 29 - */ 30 - if (allow_ints) 31 - oprofile_add_sample(regs, event); 32 - return 0; 33 - } 34 - 35 - 36 - static int perfmon_start(void) 37 - { 38 - allow_ints = 1; 39 - return 0; 40 - } 41 - 42 - 43 - static void perfmon_stop(void) 44 - { 45 - allow_ints = 0; 46 - } 47 - 48 - 49 - #define OPROFILE_FMT_UUID { \ 50 - 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } 51 - 52 - static pfm_buffer_fmt_t oprofile_fmt = { 53 - .fmt_name = "oprofile_format", 54 - .fmt_uuid = OPROFILE_FMT_UUID, 55 - .fmt_handler = perfmon_handler, 56 - }; 57 - 58 - 59 - static char *get_cpu_type(void) 60 - { 61 - __u8 family = local_cpu_data->family; 62 - 63 - switch (family) { 64 - case 0x07: 65 - return "ia64/itanium"; 66 - case 0x1f: 67 - return "ia64/itanium2"; 68 - default: 69 - return "ia64/ia64"; 70 - } 71 - } 72 - 73 - 74 - /* all the ops are handled via userspace for IA64 perfmon */ 75 - 76 - static int using_perfmon; 77 - 78 - int perfmon_init(struct oprofile_operations *ops) 79 - { 80 - int ret = pfm_register_buffer_fmt(&oprofile_fmt); 81 - if (ret) 82 - return -ENODEV; 83 - 84 - ops->cpu_type = get_cpu_type(); 85 - ops->start = perfmon_start; 86 - ops->stop = perfmon_stop; 87 - using_perfmon = 1; 88 - printk(KERN_INFO "oprofile: using perfmon.\n"); 89 - return 0; 90 - } 91 - 92 - 93 - void perfmon_exit(void) 94 - { 95 - if (!using_perfmon) 96 - return; 97 - 98 - pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); 99 - }