Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[POWERPC] cell: PPU Oprofile cleanup patch

This is a clean up patch that includes the following changes:

-Some comments were added to clarify the code based on feedback
from the community.
-The write_pm_cntrl() and set_count_mode() were passed a
structure element from a global variable. The argument was
removed so the functions now just operate on the global directly.
-The set_pm_event() function call in the cell_virtual_cntr()
routine was moved to a for-loop before the for_each_cpu loop

Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Maynard Johnson <mpjohn@us.ibm.com>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>

authored by

Carl Love and committed by
Arnd Bergmann
bcb63e25 128b8546

+73 -60
+63 -41
arch/powerpc/oprofile/op_model_cell.c
··· 41 41 #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 42 42 #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 43 43 44 - #define NUM_THREADS 2 45 - #define VIRT_CNTR_SW_TIME_NS 100000000 // 0.5 seconds 44 + #define NUM_THREADS 2 /* number of physical threads in 45 + * physical processor 46 + */ 47 + #define NUM_TRACE_BUS_WORDS 4 48 + #define NUM_INPUT_BUS_WORDS 2 49 + 46 50 47 51 struct pmc_cntrl_data { 48 52 unsigned long vcntr; ··· 97 93 u32 pm07_cntrl[NR_PHYS_CTRS]; 98 94 } pm_regs; 99 95 100 - 101 96 #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) 102 97 #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) 103 98 #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) 104 99 #define GET_POLARITY(x) ((x & 0x00000002) >> 1) 105 100 #define GET_COUNT_CYCLES(x) (x & 0x00000001) 106 101 #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 107 - 108 102 109 103 static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 110 104 ··· 131 129 132 130 static u32 ctr_enabled; 133 131 134 - static unsigned char trace_bus[4]; 135 - static unsigned char input_bus[2]; 132 + static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; 133 + static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; 136 134 137 135 /* 138 136 * Firmware interface functions ··· 185 183 for (j = 0; j < count; j++) { 186 184 /* fw expects physical cpu # */ 187 185 pm_signal_local[j].cpu = node; 188 - pm_signal_local[j].signal_group = pm_signal[j].signal_group; 186 + pm_signal_local[j].signal_group 187 + = pm_signal[j].signal_group; 189 188 pm_signal_local[j].bus_word = pm_signal[j].bus_word; 190 189 pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; 191 190 pm_signal_local[j].bit = pm_signal[j].bit; ··· 235 232 236 233 p->signal_group = event / 100; 237 234 p->bus_word = bus_word; 238 - p->sub_unit = unit_mask & 0x0000f000; 235 + p->sub_unit = (unit_mask & 0x0000f000) >> 12; 239 236 240 237 pm_regs.pm07_cntrl[ctr] = 0; 241 238 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); 242 239 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 243 240 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 244 241 242 + /* Some of the islands signal selection is based on 64 bit words. 243 + * The debug bus words are 32 bits, the input words to the performance 244 + * counters are defined as 32 bits. Need to convert the 64 bit island 245 + * specification to the appropriate 32 input bit and bus word for the 246 + * performance counter event selection. See the CELL Performance 247 + * monitoring signals manual and the Perf cntr hardware descriptions 248 + * for the details. 249 + */ 245 250 if (input_control == 0) { 246 251 if (signal_bit > 31) { 247 252 signal_bit -= 32; ··· 270 259 p->bit = signal_bit; 271 260 } 272 261 273 - for (i = 0; i < 4; i++) { 262 + for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) { 274 263 if (bus_word & (1 << i)) { 275 264 pm_regs.debug_bus_control |= 276 265 (bus_type << (31 - (2 * i) + 1)); 277 266 278 - for (j = 0; j < 2; j++) { 267 + for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { 279 268 if (input_bus[j] == 0xff) { 280 269 input_bus[j] = i; 281 270 pm_regs.group_control |= ··· 289 278 ; 290 279 } 291 280 292 - static void write_pm_cntrl(int cpu, struct pm_cntrl *pm_cntrl) 281 + static void write_pm_cntrl(int cpu) 293 282 { 294 - /* Oprofile will use 32 bit counters, set bits 7:10 to 0 */ 283 + /* Oprofile will use 32 bit counters, set bits 7:10 to 0 284 + * pmregs.pm_cntrl is a global 285 + */ 286 + 295 287 u32 val = 0; 296 - if (pm_cntrl->enable == 1) 288 + if (pm_regs.pm_cntrl.enable == 1) 297 289 val |= CBE_PM_ENABLE_PERF_MON; 298 290 299 - if (pm_cntrl->stop_at_max == 1) 291 + if (pm_regs.pm_cntrl.stop_at_max == 1) 300 292 val |= CBE_PM_STOP_AT_MAX; 301 293 302 - if (pm_cntrl->trace_mode == 1) 303 - val |= CBE_PM_TRACE_MODE_SET(pm_cntrl->trace_mode); 294 + if (pm_regs.pm_cntrl.trace_mode == 1) 295 + val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); 304 296 305 - if (pm_cntrl->freeze == 1) 297 + if (pm_regs.pm_cntrl.freeze == 1) 306 298 val |= CBE_PM_FREEZE_ALL_CTRS; 307 299 308 300 /* Routine set_count_mode must be called previously to set 309 301 * the count mode based on the user selection of user and kernel. 310 302 */ 311 - val |= CBE_PM_COUNT_MODE_SET(pm_cntrl->count_mode); 303 + val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); 312 304 cbe_write_pm(cpu, pm_control, val); 313 305 } 314 306 315 307 static inline void 316 - set_count_mode(u32 kernel, u32 user, struct pm_cntrl *pm_cntrl) 308 + set_count_mode(u32 kernel, u32 user) 317 309 { 318 310 /* The user must specify user and kernel if they want them. If 319 - * neither is specified, OProfile will count in hypervisor mode 311 + * neither is specified, OProfile will count in hypervisor mode. 312 + * pm_regs.pm_cntrl is a global 320 313 */ 321 314 if (kernel) { 322 315 if (user) 323 - pm_cntrl->count_mode = CBE_COUNT_ALL_MODES; 316 + pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES; 324 317 else 325 - pm_cntrl->count_mode = CBE_COUNT_SUPERVISOR_MODE; 318 + pm_regs.pm_cntrl.count_mode = 319 + CBE_COUNT_SUPERVISOR_MODE; 326 320 } else { 327 321 if (user) 328 - pm_cntrl->count_mode = CBE_COUNT_PROBLEM_MODE; 322 + pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE; 329 323 else 330 - pm_cntrl->count_mode = CBE_COUNT_HYPERVISOR_MODE; 324 + pm_regs.pm_cntrl.count_mode = 325 + CBE_COUNT_HYPERVISOR_MODE; 331 326 } 332 327 } 333 328 334 329 static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl) 335 330 { 336 331 337 - pm07_cntrl[ctr] |= PM07_CTR_ENABLE(1); 332 + pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE; 338 333 cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); 339 334 } 340 335 ··· 382 365 hdw_thread = 1 ^ hdw_thread; 383 366 next_hdw_thread = hdw_thread; 384 367 368 + for (i = 0; i < num_counters; i++) 369 + /* There are some per thread events. Must do the 370 + * set event, for the thread that is being started 371 + */ 372 + set_pm_event(i, 373 + pmc_cntrl[next_hdw_thread][i].evnts, 374 + pmc_cntrl[next_hdw_thread][i].masks); 375 + 385 376 /* The following is done only once per each node, but 386 377 * we need cpu #, not node #, to pass to the cbe_xxx functions. 387 378 */ ··· 410 385 == 0xFFFFFFFF) 411 386 /* If the cntr value is 0xffffffff, we must 412 387 * reset that to 0xfffffff0 when the current 413 - * thread is restarted. This will generate a new 414 - * interrupt and make sure that we never restore 415 - * the counters to the max value. If the counters 416 - * were restored to the max value, they do not 417 - * increment and no interrupts are generated. Hence 418 - * no more samples will be collected on that cpu. 388 + * thread is restarted. This will generate a 389 + * new interrupt and make sure that we never 390 + * restore the counters to the max value. If 391 + * the counters were restored to the max value, 392 + * they do not increment and no interrupts are 393 + * generated. Hence no more samples will be 394 + * collected on that cpu. 419 395 */ 420 396 cbe_write_ctr(cpu, i, 0xFFFFFFF0); 421 397 else ··· 436 410 * Must do the set event, enable_cntr 437 411 * for each cpu. 438 412 */ 439 - set_pm_event(i, 440 - pmc_cntrl[next_hdw_thread][i].evnts, 441 - pmc_cntrl[next_hdw_thread][i].masks); 442 413 enable_ctr(cpu, i, 443 414 pm_regs.pm07_cntrl); 444 415 } else { ··· 488 465 pm_regs.pm_cntrl.trace_mode = 0; 489 466 pm_regs.pm_cntrl.freeze = 1; 490 467 491 - set_count_mode(sys->enable_kernel, sys->enable_user, 492 - &pm_regs.pm_cntrl); 468 + set_count_mode(sys->enable_kernel, sys->enable_user); 493 469 494 470 /* Setup the thread 0 events */ 495 471 for (i = 0; i < num_ctrs; ++i) { ··· 520 498 pmc_cntrl[1][i].vcntr = i; 521 499 } 522 500 523 - for (i = 0; i < 4; i++) 501 + for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) 524 502 trace_bus[i] = 0xff; 525 503 526 - for (i = 0; i < 2; i++) 504 + for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 527 505 input_bus[i] = 0xff; 528 506 529 507 /* Our counters count up, and "count" refers to ··· 582 560 cbe_write_pm(cpu, pm_start_stop, 0); 583 561 cbe_write_pm(cpu, group_control, pm_regs.group_control); 584 562 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); 585 - write_pm_cntrl(cpu, &pm_regs.pm_cntrl); 563 + write_pm_cntrl(cpu); 586 564 587 565 for (i = 0; i < num_counters; ++i) { 588 566 if (ctr_enabled & (1 << i)) { ··· 624 602 } 625 603 } 626 604 627 - cbe_clear_pm_interrupts(cpu); 605 + cbe_get_and_clear_pm_interrupts(cpu); 628 606 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); 629 607 cbe_enable_pm(cpu); 630 608 } ··· 694 672 695 673 cbe_disable_pm(cpu); 696 674 697 - interrupt_mask = cbe_clear_pm_interrupts(cpu); 675 + interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 698 676 699 677 /* If the interrupt mask has been cleared, then the virt cntr 700 678 * has cleared the interrupt. When the thread that generated
+4 -10
arch/powerpc/platforms/cell/pmu.c
··· 345 345 * Enabling/disabling interrupts for the entire performance monitoring unit. 346 346 */ 347 347 348 - u32 cbe_query_pm_interrupts(u32 cpu) 349 - { 350 - return cbe_read_pm(cpu, pm_status); 351 - } 352 - EXPORT_SYMBOL_GPL(cbe_query_pm_interrupts); 353 - 354 - u32 cbe_clear_pm_interrupts(u32 cpu) 348 + u32 cbe_get_and_clear_pm_interrupts(u32 cpu) 355 349 { 356 350 /* Reading pm_status clears the interrupt bits. */ 357 - return cbe_query_pm_interrupts(cpu); 351 + return cbe_read_pm(cpu, pm_status); 358 352 } 359 - EXPORT_SYMBOL_GPL(cbe_clear_pm_interrupts); 353 + EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts); 360 354 361 355 void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask) 362 356 { ··· 365 371 366 372 void cbe_disable_pm_interrupts(u32 cpu) 367 373 { 368 - cbe_clear_pm_interrupts(cpu); 374 + cbe_get_and_clear_pm_interrupts(cpu); 369 375 cbe_write_pm(cpu, pm_status, 0); 370 376 } 371 377 EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+6 -9
include/asm-powerpc/cell-pmu.h
··· 53 53 #define CBE_PM_CTR_POLARITY 0x01000000 54 54 #define CBE_PM_CTR_COUNT_CYCLES 0x00800000 55 55 #define CBE_PM_CTR_ENABLE 0x00400000 56 + #define PM07_CTR_INPUT_MUX(x) (((x) & 0x3F) << 26) 57 + #define PM07_CTR_INPUT_CONTROL(x) (((x) & 1) << 25) 58 + #define PM07_CTR_POLARITY(x) (((x) & 1) << 24) 59 + #define PM07_CTR_COUNT_CYCLES(x) (((x) & 1) << 23) 60 + #define PM07_CTR_ENABLE(x) (((x) & 1) << 22) 56 61 57 62 /* Macros for the pm_status register. */ 58 63 #define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7))) ··· 94 89 95 90 extern void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask); 96 91 extern void cbe_disable_pm_interrupts(u32 cpu); 97 - extern u32 cbe_query_pm_interrupts(u32 cpu); 98 - extern u32 cbe_clear_pm_interrupts(u32 cpu); 92 + extern u32 cbe_get_and_clear_pm_interrupts(u32 cpu); 99 93 extern void cbe_sync_irq(int node); 100 94 101 95 /* Utility functions, macros */ ··· 106 102 #define CBE_COUNT_HYPERVISOR_MODE 1 107 103 #define CBE_COUNT_PROBLEM_MODE 2 108 104 #define CBE_COUNT_ALL_MODES 3 109 - 110 - /* Macros for the pm07_control registers. */ 111 - #define PM07_CTR_INPUT_MUX(x) (((x) & 0x3F) << 26) 112 - #define PM07_CTR_INPUT_CONTROL(x) (((x) & 1) << 25) 113 - #define PM07_CTR_POLARITY(x) (((x) & 1) << 24) 114 - #define PM07_CTR_COUNT_CYCLES(x) (((x) & 1) << 23) 115 - #define PM07_CTR_ENABLE(x) (((x) & 1) << 22) 116 105 117 106 #endif /* __ASM_CELL_PMU_H__ */