Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6:
sparc: using HZ needs an include of linux/param.h
sparc32: convert to asm-generic/hardirq.h
sparc64: Cache per-cpu %pcr register value in perf code.
sparc64: Fix comment typo in perf_event.c
sparc64: Minor coding style fixups in perf code.
sparc64: Add a basic conflict engine in preparation for multi-counter support.
sparc64: Increase vmalloc size to fix percpu regressions.
sparc64: Add initial perf event conflict resolution and checks.
sparc: Niagara1 perf event support.
sparc: Add Niagara2 HW cache event support.
sparc: Support all ultra3 and ultra4 derivatives.
sparc: Support HW cache events.

+547 -59
+1 -11
arch/sparc/include/asm/hardirq_32.h
··· 7 7 #ifndef __SPARC_HARDIRQ_H 8 8 #define __SPARC_HARDIRQ_H 9 9 10 - #include <linux/threads.h> 11 - #include <linux/spinlock.h> 12 - #include <linux/cache.h> 13 - 14 - /* entry.S is sensitive to the offsets of these fields */ /* XXX P3 Is it? */ 15 - typedef struct { 16 - unsigned int __softirq_pending; 17 - } ____cacheline_aligned irq_cpustat_t; 18 - 19 - #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ 20 - 21 10 #define HARDIRQ_BITS 8 11 + #include <asm-generic/hardirq.h> 22 12 23 13 #endif /* __SPARC_HARDIRQ_H */
+2 -2
arch/sparc/include/asm/irq_32.h
··· 6 6 #ifndef _SPARC_IRQ_H 7 7 #define _SPARC_IRQ_H 8 8 9 - #include <linux/interrupt.h> 10 - 11 9 #define NR_IRQS 16 10 + 11 + #include <linux/interrupt.h> 12 12 13 13 #define irq_canonicalize(irq) (irq) 14 14
+2 -2
arch/sparc/include/asm/pgtable_64.h
··· 41 41 #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) 42 42 #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) 43 43 #define VMALLOC_START _AC(0x0000000100000000,UL) 44 - #define VMALLOC_END _AC(0x0000000200000000,UL) 45 - #define VMEMMAP_BASE _AC(0x0000000200000000,UL) 44 + #define VMALLOC_END _AC(0x0000010000000000,UL) 45 + #define VMEMMAP_BASE _AC(0x0000010000000000,UL) 46 46 47 47 #define vmemmap ((struct page *)VMEMMAP_BASE) 48 48
+4 -4
arch/sparc/kernel/ktlb.S
··· 280 280 281 281 #ifdef CONFIG_SPARSEMEM_VMEMMAP 282 282 /* Do not use the TSB for vmemmap. */ 283 - mov (VMEMMAP_BASE >> 24), %g5 284 - sllx %g5, 24, %g5 283 + mov (VMEMMAP_BASE >> 40), %g5 284 + sllx %g5, 40, %g5 285 285 cmp %g4,%g5 286 286 bgeu,pn %xcc, kvmap_vmemmap 287 287 nop ··· 293 293 sethi %hi(MODULES_VADDR), %g5 294 294 cmp %g4, %g5 295 295 blu,pn %xcc, kvmap_dtlb_longpath 296 - mov (VMALLOC_END >> 24), %g5 297 - sllx %g5, 24, %g5 296 + mov (VMALLOC_END >> 40), %g5 297 + sllx %g5, 40, %g5 298 298 cmp %g4, %g5 299 299 bgeu,pn %xcc, kvmap_dtlb_longpath 300 300 nop
+537 -40
arch/sparc/kernel/perf_event.c
··· 56 56 struct perf_event *events[MAX_HWEVENTS]; 57 57 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 58 58 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 59 - int enabled; 59 + u64 pcr; 60 + int enabled; 60 61 }; 61 62 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; 62 63 ··· 69 68 #define PIC_LOWER 0x02 70 69 }; 71 70 71 + static unsigned long perf_event_encode(const struct perf_event_map *pmap) 72 + { 73 + return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask; 74 + } 75 + 76 + static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk) 77 + { 78 + *msk = val & 0xff; 79 + *enc = val >> 16; 80 + } 81 + 82 + #define C(x) PERF_COUNT_HW_CACHE_##x 83 + 84 + #define CACHE_OP_UNSUPPORTED 0xfffe 85 + #define CACHE_OP_NONSENSE 0xffff 86 + 87 + typedef struct perf_event_map cache_map_t 88 + [PERF_COUNT_HW_CACHE_MAX] 89 + [PERF_COUNT_HW_CACHE_OP_MAX] 90 + [PERF_COUNT_HW_CACHE_RESULT_MAX]; 91 + 72 92 struct sparc_pmu { 73 93 const struct perf_event_map *(*event_map)(int); 94 + const cache_map_t *cache_map; 74 95 int max_events; 75 96 int upper_shift; 76 97 int lower_shift; ··· 103 80 int lower_nop; 104 81 }; 105 82 106 - static const struct perf_event_map ultra3i_perfmon_event_map[] = { 83 + static const struct perf_event_map ultra3_perfmon_event_map[] = { 107 84 [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, 108 85 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, 109 86 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, 110 87 [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, 111 88 }; 112 89 113 - static const struct perf_event_map *ultra3i_event_map(int event_id) 90 + static const struct perf_event_map *ultra3_event_map(int event_id) 114 91 { 115 - return &ultra3i_perfmon_event_map[event_id]; 92 + return &ultra3_perfmon_event_map[event_id]; 116 93 } 117 94 118 - static const struct sparc_pmu ultra3i_pmu = { 119 - .event_map = ultra3i_event_map, 120 - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), 95 + static const cache_map_t ultra3_cache_map = { 96 + [C(L1D)] = { 97 + [C(OP_READ)] = { 98 + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 99 + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 100 + }, 101 + [C(OP_WRITE)] = { 102 + [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER }, 103 + [C(RESULT_MISS)] = { 0x0a, PIC_UPPER }, 104 + }, 105 + [C(OP_PREFETCH)] = { 106 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 107 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 108 + }, 109 + }, 110 + [C(L1I)] = { 111 + [C(OP_READ)] = { 112 + [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, }, 113 + [C(RESULT_MISS)] = { 0x09, PIC_UPPER, }, 114 + }, 115 + [ C(OP_WRITE) ] = { 116 + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 117 + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 118 + }, 119 + [ C(OP_PREFETCH) ] = { 120 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 121 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 122 + }, 123 + }, 124 + [C(LL)] = { 125 + [C(OP_READ)] = { 126 + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, }, 127 + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, }, 128 + }, 129 + [C(OP_WRITE)] = { 130 + [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER }, 131 + [C(RESULT_MISS)] = { 0x0c, PIC_UPPER }, 132 + }, 133 + [C(OP_PREFETCH)] = { 134 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 135 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 136 + }, 137 + }, 138 + [C(DTLB)] = { 139 + [C(OP_READ)] = { 140 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 141 + [C(RESULT_MISS)] = { 0x12, PIC_UPPER, }, 142 + }, 143 + [ C(OP_WRITE) ] = { 144 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 145 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 146 + }, 147 + [ C(OP_PREFETCH) ] = { 148 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 149 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 150 + }, 151 + }, 152 + [C(ITLB)] = { 153 + [C(OP_READ)] = { 154 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 155 + [C(RESULT_MISS)] = { 0x11, PIC_UPPER, }, 156 + }, 157 + [ C(OP_WRITE) ] = { 158 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 159 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 160 + }, 161 + [ C(OP_PREFETCH) ] = { 162 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 163 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 164 + }, 165 + }, 166 + [C(BPU)] = { 167 + [C(OP_READ)] = { 168 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 169 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 170 + }, 171 + [ C(OP_WRITE) ] = { 172 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 173 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 174 + }, 175 + [ C(OP_PREFETCH) ] = { 176 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 177 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 178 + }, 179 + }, 180 + }; 181 + 182 + static const struct sparc_pmu ultra3_pmu = { 183 + .event_map = ultra3_event_map, 184 + .cache_map = &ultra3_cache_map, 185 + .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), 121 186 .upper_shift = 11, 122 187 .lower_shift = 4, 123 188 .event_mask = 0x3f, 124 189 .upper_nop = 0x1c, 125 190 .lower_nop = 0x14, 191 + }; 192 + 193 + /* Niagara1 is very limited. The upper PIC is hard-locked to count 194 + * only instructions, so it is free running which creates all kinds of 195 + * problems. Some hardware designs make one wonder if the creator 196 + * even looked at how this stuff gets used by software. 197 + */ 198 + static const struct perf_event_map niagara1_perfmon_event_map[] = { 199 + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER }, 200 + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER }, 201 + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE }, 202 + [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER }, 203 + }; 204 + 205 + static const struct perf_event_map *niagara1_event_map(int event_id) 206 + { 207 + return &niagara1_perfmon_event_map[event_id]; 208 + } 209 + 210 + static const cache_map_t niagara1_cache_map = { 211 + [C(L1D)] = { 212 + [C(OP_READ)] = { 213 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 214 + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 215 + }, 216 + [C(OP_WRITE)] = { 217 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 218 + [C(RESULT_MISS)] = { 0x03, PIC_LOWER, }, 219 + }, 220 + [C(OP_PREFETCH)] = { 221 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 222 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 223 + }, 224 + }, 225 + [C(L1I)] = { 226 + [C(OP_READ)] = { 227 + [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER }, 228 + [C(RESULT_MISS)] = { 0x02, PIC_LOWER, }, 229 + }, 230 + [ C(OP_WRITE) ] = { 231 + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 232 + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 233 + }, 234 + [ C(OP_PREFETCH) ] = { 235 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 236 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 237 + }, 238 + }, 239 + [C(LL)] = { 240 + [C(OP_READ)] = { 241 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 242 + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 243 + }, 244 + [C(OP_WRITE)] = { 245 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 246 + [C(RESULT_MISS)] = { 0x07, PIC_LOWER, }, 247 + }, 248 + [C(OP_PREFETCH)] = { 249 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 250 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 251 + }, 252 + }, 253 + [C(DTLB)] = { 254 + [C(OP_READ)] = { 255 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 256 + [C(RESULT_MISS)] = { 0x05, PIC_LOWER, }, 257 + }, 258 + [ C(OP_WRITE) ] = { 259 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 260 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 261 + }, 262 + [ C(OP_PREFETCH) ] = { 263 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 264 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 265 + }, 266 + }, 267 + [C(ITLB)] = { 268 + [C(OP_READ)] = { 269 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 270 + [C(RESULT_MISS)] = { 0x04, PIC_LOWER, }, 271 + }, 272 + [ C(OP_WRITE) ] = { 273 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 274 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 275 + }, 276 + [ C(OP_PREFETCH) ] = { 277 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 278 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 279 + }, 280 + }, 281 + [C(BPU)] = { 282 + [C(OP_READ)] = { 283 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 284 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 285 + }, 286 + [ C(OP_WRITE) ] = { 287 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 288 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 289 + }, 290 + [ C(OP_PREFETCH) ] = { 291 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 292 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 293 + }, 294 + }, 295 + }; 296 + 297 + static const struct sparc_pmu niagara1_pmu = { 298 + .event_map = niagara1_event_map, 299 + .cache_map = &niagara1_cache_map, 300 + .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), 301 + .upper_shift = 0, 302 + .lower_shift = 4, 303 + .event_mask = 0x7, 304 + .upper_nop = 0x0, 305 + .lower_nop = 0x0, 126 306 }; 127 307 128 308 static const struct perf_event_map niagara2_perfmon_event_map[] = { ··· 342 116 return &niagara2_perfmon_event_map[event_id]; 343 117 } 344 118 119 + static const cache_map_t niagara2_cache_map = { 120 + [C(L1D)] = { 121 + [C(OP_READ)] = { 122 + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 123 + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 124 + }, 125 + [C(OP_WRITE)] = { 126 + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 127 + [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, }, 128 + }, 129 + [C(OP_PREFETCH)] = { 130 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 131 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 132 + }, 133 + }, 134 + [C(L1I)] = { 135 + [C(OP_READ)] = { 136 + [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, }, 137 + [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, }, 138 + }, 139 + [ C(OP_WRITE) ] = { 140 + [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, 141 + [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, 142 + }, 143 + [ C(OP_PREFETCH) ] = { 144 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 145 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 146 + }, 147 + }, 148 + [C(LL)] = { 149 + [C(OP_READ)] = { 150 + [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, }, 151 + [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, }, 152 + }, 153 + [C(OP_WRITE)] = { 154 + [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, }, 155 + [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, }, 156 + }, 157 + [C(OP_PREFETCH)] = { 158 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 159 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 160 + }, 161 + }, 162 + [C(DTLB)] = { 163 + [C(OP_READ)] = { 164 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 165 + [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, }, 166 + }, 167 + [ C(OP_WRITE) ] = { 168 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 169 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 170 + }, 171 + [ C(OP_PREFETCH) ] = { 172 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 173 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 174 + }, 175 + }, 176 + [C(ITLB)] = { 177 + [C(OP_READ)] = { 178 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 179 + [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, }, 180 + }, 181 + [ C(OP_WRITE) ] = { 182 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 183 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 184 + }, 185 + [ C(OP_PREFETCH) ] = { 186 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 187 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 188 + }, 189 + }, 190 + [C(BPU)] = { 191 + [C(OP_READ)] = { 192 + [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, 193 + [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, 194 + }, 195 + [ C(OP_WRITE) ] = { 196 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 197 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 198 + }, 199 + [ C(OP_PREFETCH) ] = { 200 + [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, 201 + [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 202 + }, 203 + }, 204 + }; 205 + 345 206 static const struct sparc_pmu niagara2_pmu = { 346 207 .event_map = niagara2_event_map, 208 + .cache_map = &niagara2_cache_map, 347 209 .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), 348 210 .upper_shift = 19, 349 211 .lower_shift = 6, ··· 465 151 sparc_pmu->lower_nop, idx); 466 152 } 467 153 468 - static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, 469 - int idx) 154 + static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 470 155 { 471 156 u64 val, mask = mask_for_index(idx); 472 157 473 - val = pcr_ops->read(); 474 - pcr_ops->write((val & ~mask) | hwc->config); 158 + val = cpuc->pcr; 159 + val &= ~mask; 160 + val |= hwc->config; 161 + cpuc->pcr = val; 162 + 163 + pcr_ops->write(cpuc->pcr); 475 164 } 476 165 477 - static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, 478 - int idx) 166 + static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) 479 167 { 480 168 u64 mask = mask_for_index(idx); 481 169 u64 nop = nop_for_index(idx); 482 - u64 val = pcr_ops->read(); 170 + u64 val; 483 171 484 - pcr_ops->write((val & ~mask) | nop); 172 + val = cpuc->pcr; 173 + val &= ~mask; 174 + val |= nop; 175 + cpuc->pcr = val; 176 + 177 + pcr_ops->write(cpuc->pcr); 485 178 } 486 179 487 180 void hw_perf_enable(void) ··· 503 182 cpuc->enabled = 1; 504 183 barrier(); 505 184 506 - val = pcr_ops->read(); 185 + val = cpuc->pcr; 507 186 508 187 for (i = 0; i < MAX_HWEVENTS; i++) { 509 188 struct perf_event *cp = cpuc->events[i]; ··· 515 194 val |= hwc->config_base; 516 195 } 517 196 518 - pcr_ops->write(val); 197 + cpuc->pcr = val; 198 + 199 + pcr_ops->write(cpuc->pcr); 519 200 } 520 201 521 202 void hw_perf_disable(void) ··· 530 207 531 208 cpuc->enabled = 0; 532 209 533 - val = pcr_ops->read(); 210 + val = cpuc->pcr; 534 211 val &= ~(PCR_UTRACE | PCR_STRACE | 535 212 sparc_pmu->hv_bit | sparc_pmu->irq_bit); 536 - pcr_ops->write(val); 213 + cpuc->pcr = val; 214 + 215 + pcr_ops->write(cpuc->pcr); 537 216 } 538 217 539 218 static u32 read_pmc(int idx) ··· 567 242 } 568 243 569 244 static int sparc_perf_event_set_period(struct perf_event *event, 570 - struct hw_perf_event *hwc, int idx) 245 + struct hw_perf_event *hwc, int idx) 571 246 { 572 247 s64 left = atomic64_read(&hwc->period_left); 573 248 s64 period = hwc->sample_period; ··· 607 282 if (test_and_set_bit(idx, cpuc->used_mask)) 608 283 return -EAGAIN; 609 284 610 - sparc_pmu_disable_event(hwc, idx); 285 + sparc_pmu_disable_event(cpuc, hwc, idx); 611 286 612 287 cpuc->events[idx] = event; 613 288 set_bit(idx, cpuc->active_mask); 614 289 615 290 sparc_perf_event_set_period(event, hwc, idx); 616 - sparc_pmu_enable_event(hwc, idx); 291 + sparc_pmu_enable_event(cpuc, hwc, idx); 617 292 perf_event_update_userpage(event); 618 293 return 0; 619 294 } 620 295 621 296 static u64 sparc_perf_event_update(struct perf_event *event, 622 - struct hw_perf_event *hwc, int idx) 297 + struct hw_perf_event *hwc, int idx) 623 298 { 624 299 int shift = 64 - 32; 625 300 u64 prev_raw_count, new_raw_count; ··· 649 324 int idx = hwc->idx; 650 325 651 326 clear_bit(idx, cpuc->active_mask); 652 - sparc_pmu_disable_event(hwc, idx); 327 + sparc_pmu_disable_event(cpuc, hwc, idx); 653 328 654 329 barrier(); 655 330 ··· 663 338 static void sparc_pmu_read(struct perf_event *event) 664 339 { 665 340 struct hw_perf_event *hwc = &event->hw; 341 + 666 342 sparc_perf_event_update(event, hwc, hwc->idx); 667 343 } 668 344 669 345 static void sparc_pmu_unthrottle(struct perf_event *event) 670 346 { 347 + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 671 348 struct hw_perf_event *hwc = &event->hw; 672 - sparc_pmu_enable_event(hwc, hwc->idx); 349 + 350 + sparc_pmu_enable_event(cpuc, hwc, hwc->idx); 673 351 } 674 352 675 353 static atomic_t active_events = ATOMIC_INIT(0); 676 354 static DEFINE_MUTEX(pmc_grab_mutex); 355 + 356 + static void perf_stop_nmi_watchdog(void *unused) 357 + { 358 + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 359 + 360 + stop_nmi_watchdog(NULL); 361 + cpuc->pcr = pcr_ops->read(); 362 + } 677 363 678 364 void perf_event_grab_pmc(void) 679 365 { ··· 694 358 mutex_lock(&pmc_grab_mutex); 695 359 if (atomic_read(&active_events) == 0) { 696 360 if (atomic_read(&nmi_active) > 0) { 697 - on_each_cpu(stop_nmi_watchdog, NULL, 1); 361 + on_each_cpu(perf_stop_nmi_watchdog, NULL, 1); 698 362 BUG_ON(atomic_read(&nmi_active) != 0); 699 363 } 700 364 atomic_inc(&active_events); ··· 711 375 } 712 376 } 713 377 378 + static const struct perf_event_map *sparc_map_cache_event(u64 config) 379 + { 380 + unsigned int cache_type, cache_op, cache_result; 381 + const struct perf_event_map *pmap; 382 + 383 + if (!sparc_pmu->cache_map) 384 + return ERR_PTR(-ENOENT); 385 + 386 + cache_type = (config >> 0) & 0xff; 387 + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 388 + return ERR_PTR(-EINVAL); 389 + 390 + cache_op = (config >> 8) & 0xff; 391 + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 392 + return ERR_PTR(-EINVAL); 393 + 394 + cache_result = (config >> 16) & 0xff; 395 + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 396 + return ERR_PTR(-EINVAL); 397 + 398 + pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]); 399 + 400 + if (pmap->encoding == CACHE_OP_UNSUPPORTED) 401 + return ERR_PTR(-ENOENT); 402 + 403 + if (pmap->encoding == CACHE_OP_NONSENSE) 404 + return ERR_PTR(-EINVAL); 405 + 406 + return pmap; 407 + } 408 + 714 409 static void hw_perf_event_destroy(struct perf_event *event) 715 410 { 716 411 perf_event_release_pmc(); 717 412 } 718 413 414 + /* Make sure all events can be scheduled into the hardware at 415 + * the same time. This is simplified by the fact that we only 416 + * need to support 2 simultaneous HW events. 417 + */ 418 + static int sparc_check_constraints(unsigned long *events, int n_ev) 419 + { 420 + if (n_ev <= perf_max_events) { 421 + u8 msk1, msk2; 422 + u16 dummy; 423 + 424 + if (n_ev == 1) 425 + return 0; 426 + BUG_ON(n_ev != 2); 427 + perf_event_decode(events[0], &dummy, &msk1); 428 + perf_event_decode(events[1], &dummy, &msk2); 429 + 430 + /* If both events can go on any counter, OK. */ 431 + if (msk1 == (PIC_UPPER | PIC_LOWER) && 432 + msk2 == (PIC_UPPER | PIC_LOWER)) 433 + return 0; 434 + 435 + /* If one event is limited to a specific counter, 436 + * and the other can go on both, OK. 437 + */ 438 + if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) && 439 + msk2 == (PIC_UPPER | PIC_LOWER)) 440 + return 0; 441 + if ((msk2 == PIC_UPPER || msk2 == PIC_LOWER) && 442 + msk1 == (PIC_UPPER | PIC_LOWER)) 443 + return 0; 444 + 445 + /* If the events are fixed to different counters, OK. */ 446 + if ((msk1 == PIC_UPPER && msk2 == PIC_LOWER) || 447 + (msk1 == PIC_LOWER && msk2 == PIC_UPPER)) 448 + return 0; 449 + 450 + /* Otherwise, there is a conflict. */ 451 + } 452 + 453 + return -1; 454 + } 455 + 456 + static int check_excludes(struct perf_event **evts, int n_prev, int n_new) 457 + { 458 + int eu = 0, ek = 0, eh = 0; 459 + struct perf_event *event; 460 + int i, n, first; 461 + 462 + n = n_prev + n_new; 463 + if (n <= 1) 464 + return 0; 465 + 466 + first = 1; 467 + for (i = 0; i < n; i++) { 468 + event = evts[i]; 469 + if (first) { 470 + eu = event->attr.exclude_user; 471 + ek = event->attr.exclude_kernel; 472 + eh = event->attr.exclude_hv; 473 + first = 0; 474 + } else if (event->attr.exclude_user != eu || 475 + event->attr.exclude_kernel != ek || 476 + event->attr.exclude_hv != eh) { 477 + return -EAGAIN; 478 + } 479 + } 480 + 481 + return 0; 482 + } 483 + 484 + static int collect_events(struct perf_event *group, int max_count, 485 + struct perf_event *evts[], unsigned long *events) 486 + { 487 + struct perf_event *event; 488 + int n = 0; 489 + 490 + if (!is_software_event(group)) { 491 + if (n >= max_count) 492 + return -1; 493 + evts[n] = group; 494 + events[n++] = group->hw.event_base; 495 + } 496 + list_for_each_entry(event, &group->sibling_list, group_entry) { 497 + if (!is_software_event(event) && 498 + event->state != PERF_EVENT_STATE_OFF) { 499 + if (n >= max_count) 500 + return -1; 501 + evts[n] = event; 502 + events[n++] = event->hw.event_base; 503 + } 504 + } 505 + return n; 506 + } 507 + 719 508 static int __hw_perf_event_init(struct perf_event *event) 720 509 { 721 510 struct perf_event_attr *attr = &event->attr; 511 + struct perf_event *evts[MAX_HWEVENTS]; 722 512 struct hw_perf_event *hwc = &event->hw; 513 + unsigned long events[MAX_HWEVENTS]; 723 514 const struct perf_event_map *pmap; 724 515 u64 enc; 516 + int n; 725 517 726 518 if (atomic_read(&nmi_active) < 0) 727 519 return -ENODEV; 728 520 729 - if (attr->type != PERF_TYPE_HARDWARE) 521 + if (attr->type == PERF_TYPE_HARDWARE) { 522 + if (attr->config >= sparc_pmu->max_events) 523 + return -EINVAL; 524 + pmap = sparc_pmu->event_map(attr->config); 525 + } else if (attr->type == PERF_TYPE_HW_CACHE) { 526 + pmap = sparc_map_cache_event(attr->config); 527 + if (IS_ERR(pmap)) 528 + return PTR_ERR(pmap); 529 + } else 730 530 return -EOPNOTSUPP; 731 - 732 - if (attr->config >= sparc_pmu->max_events) 733 - return -EINVAL; 734 - 735 - perf_event_grab_pmc(); 736 - event->destroy = hw_perf_event_destroy; 737 531 738 532 /* We save the enable bits in the config_base. So to 739 533 * turn off sampling just write 'config', and to enable ··· 877 411 if (!attr->exclude_hv) 878 412 hwc->config_base |= sparc_pmu->hv_bit; 879 413 414 + hwc->event_base = perf_event_encode(pmap); 415 + 416 + enc = pmap->encoding; 417 + 418 + n = 0; 419 + if (event->group_leader != event) { 420 + n = collect_events(event->group_leader, 421 + perf_max_events - 1, 422 + evts, events); 423 + if (n < 0) 424 + return -EINVAL; 425 + } 426 + events[n] = hwc->event_base; 427 + evts[n] = event; 428 + 429 + if (check_excludes(evts, n, 1)) 430 + return -EINVAL; 431 + 432 + if (sparc_check_constraints(events, n + 1)) 433 + return -EINVAL; 434 + 435 + /* Try to do all error checking before this point, as unwinding 436 + * state after grabbing the PMC is difficult. 437 + */ 438 + perf_event_grab_pmc(); 439 + event->destroy = hw_perf_event_destroy; 440 + 880 441 if (!hwc->sample_period) { 881 442 hwc->sample_period = MAX_PERIOD; 882 443 hwc->last_period = hwc->sample_period; 883 444 atomic64_set(&hwc->period_left, hwc->sample_period); 884 445 } 885 446 886 - pmap = sparc_pmu->event_map(attr->config); 887 - 888 - enc = pmap->encoding; 889 447 if (pmap->pic_mask & PIC_UPPER) { 890 448 hwc->idx = PIC_UPPER_INDEX; 891 449 enc <<= sparc_pmu->upper_shift; ··· 962 472 } 963 473 964 474 static int __kprobes perf_event_nmi_handler(struct notifier_block *self, 965 - unsigned long cmd, void *__args) 475 + unsigned long cmd, void *__args) 966 476 { 967 477 struct die_args *args = __args; 968 478 struct perf_sample_data data; ··· 1003 513 continue; 1004 514 1005 515 if (perf_event_overflow(event, 1, &data, regs)) 1006 - sparc_pmu_disable_event(hwc, idx); 516 + sparc_pmu_disable_event(cpuc, hwc, idx); 1007 517 } 1008 518 1009 519 return NOTIFY_STOP; ··· 1015 525 1016 526 static bool __init supported_pmu(void) 1017 527 { 1018 - if (!strcmp(sparc_pmu_type, "ultra3i")) { 1019 - sparc_pmu = &ultra3i_pmu; 528 + if (!strcmp(sparc_pmu_type, "ultra3") || 529 + !strcmp(sparc_pmu_type, "ultra3+") || 530 + !strcmp(sparc_pmu_type, "ultra3i") || 531 + !strcmp(sparc_pmu_type, "ultra4+")) { 532 + sparc_pmu = &ultra3_pmu; 533 + return true; 534 + } 535 + if (!strcmp(sparc_pmu_type, "niagara")) { 536 + sparc_pmu = &niagara1_pmu; 1020 537 return true; 1021 538 } 1022 539 if (!strcmp(sparc_pmu_type, "niagara2")) {
+1
arch/sparc/oprofile/init.c
··· 11 11 #include <linux/oprofile.h> 12 12 #include <linux/errno.h> 13 13 #include <linux/init.h> 14 + #include <linux/param.h> /* for HZ */ 14 15 15 16 #ifdef CONFIG_SPARC64 16 17 #include <linux/notifier.h>