Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/oprofile: IBM CELL: add SPU event profiling support

This patch adds the SPU event based profiling funcitonality for the
IBM Cell processor. Previously, the CELL OProfile kernel code supported
PPU event, PPU cycle profiling and SPU cycle profiling. The addition of
SPU event profiling allows the users to identify where in their SPU code
various SPU evnets are occuring. This should help users further identify
issues with their code. Note, SPU profiling has some limitations due to HW
constraints. Only one event at a time can be used for profiling and SPU event
profiling must be time sliced across all of the SPUs in a node.

The patch adds a new arch specific file to the OProfile file system. The
file has bit 0 set to indicate that the kernel supports SPU event profiling.
The user tool must check this file/bit to make sure the kernel supports
SPU event profiling before trying to do SPU event profiling. The user tool
check is part of the user tool patch for SPU event profiling.

Signed-off-by: Carl Love <carll@us.ibm.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>

authored by

Carl Love and committed by
Robert Richter
88382329 014cef91

+545 -16
+2
arch/powerpc/include/asm/cell-pmu.h
··· 37 37 #define CBE_PM_STOP_AT_MAX 0x40000000 38 38 #define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3) 39 39 #define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28) 40 + #define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17) 40 41 #define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18) 41 42 #define CBE_PM_FREEZE_ALL_CTRS 0x00100000 42 43 #define CBE_PM_ENABLE_EXT_TRACE 0x00008000 44 + #define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9) 43 45 44 46 /* Macros for the trace_address register. */ 45 47 #define CBE_PM_TRACE_BUF_FULL 0x00000800
+6
arch/powerpc/include/asm/oprofile_impl.h
··· 32 32 unsigned long mmcr0; 33 33 unsigned long mmcr1; 34 34 unsigned long mmcra; 35 + #ifdef CONFIG_OPROFILE_CELL 36 + /* Register for oprofile user tool to check cell kernel profiling 37 + * suport. 38 + */ 39 + unsigned long cell_support; 40 + #endif 35 41 #endif 36 42 unsigned long enable_kernel; 37 43 unsigned long enable_user;
+6 -1
arch/powerpc/oprofile/cell/pr_util.h
··· 30 30 extern struct delayed_work spu_work; 31 31 extern int spu_prof_running; 32 32 33 + #define TRACE_ARRAY_SIZE 1024 34 + 35 + extern spinlock_t oprof_spu_smpl_arry_lck; 36 + 33 37 struct spu_overlay_info { /* map of sections within an SPU overlay */ 34 38 unsigned int vma; /* SPU virtual memory address from elf */ 35 39 unsigned int size; /* size of section from elf */ ··· 94 90 * cycles_reset is the SPU_CYCLES count value specified by the user. 95 91 */ 96 92 int start_spu_profiling_cycles(unsigned int cycles_reset); 93 + void start_spu_profiling_events(void); 97 94 98 95 void stop_spu_profiling_cycles(void); 99 - 96 + void stop_spu_profiling_events(void); 100 97 101 98 /* add the necessary profiling hooks */ 102 99 int spu_sync_start(void);
+32 -2
arch/powerpc/oprofile/cell/spu_profiler.c
··· 18 18 #include <asm/cell-pmu.h> 19 19 #include "pr_util.h" 20 20 21 - #define TRACE_ARRAY_SIZE 1024 22 21 #define SCALE_SHIFT 14 23 22 24 23 static u32 *samples; 25 24 25 + /* spu_prof_running is a flag used to indicate if spu profiling is enabled 26 + * or not. It is set by the routines start_spu_profiling_cycles() and 27 + * start_spu_profiling_events(). The flag is cleared by the routines 28 + * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These 29 + * routines are called via global_start() and global_stop() which are called in 30 + * op_powerpc_start() and op_powerpc_stop(). These routines are called once 31 + * per system as a result of the user starting/stopping oprofile. Hence, only 32 + * one CPU per user at a time will be changing the value of spu_prof_running. 33 + * In general, OProfile does not protect against multiple users trying to run 34 + * OProfile at a time. 35 + */ 26 36 int spu_prof_running; 27 37 static unsigned int profiling_interval; 28 38 ··· 41 31 42 32 #define SPU_PC_MASK 0xFFFF 43 33 44 - static DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); 34 + DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); 45 35 unsigned long oprof_spu_smpl_arry_lck_flags; 46 36 47 37 void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) ··· 222 212 return 0; 223 213 } 224 214 215 + /* 216 + * Entry point for SPU event profiling. 217 + * NOTE: SPU profiling is done system-wide, not per-CPU. 218 + * 219 + * cycles_reset is the count value specified by the user when 220 + * setting up OProfile to count SPU_CYCLES. 221 + */ 222 + void start_spu_profiling_events(void) 223 + { 224 + spu_prof_running = 1; 225 + schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); 226 + 227 + return; 228 + } 229 + 225 230 void stop_spu_profiling_cycles(void) 226 231 { 227 232 spu_prof_running = 0; 228 233 hrtimer_cancel(&timer); 229 234 kfree(samples); 230 235 pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); 236 + } 237 + 238 + void stop_spu_profiling_events(void) 239 + { 240 + spu_prof_running = 0; 231 241 }
+22
arch/powerpc/oprofile/common.c
··· 132 132 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); 133 133 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); 134 134 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); 135 + #ifdef CONFIG_OPROFILE_CELL 136 + /* create a file the user tool can check to see what level of profiling 137 + * support exits with this kernel. Initialize bit mask to indicate 138 + * what support the kernel has: 139 + * bit 0 - Supports SPU event profiling in addition to PPU 140 + * event and cycles; and SPU cycle profiling 141 + * bits 1-31 - Currently unused. 142 + * 143 + * If the file does not exist, then the kernel only supports SPU 144 + * cycle profiling, PPU event and cycle profiling. 145 + */ 146 + oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support); 147 + sys.cell_support = 0x1; /* Note, the user OProfile tool must check 148 + * that this bit is set before attempting to 149 + * user SPU event profiling. Older kernels 150 + * will not have this file, hence the user 151 + * tool is not allowed to do SPU event 152 + * profiling on older kernels. Older kernels 153 + * will accept SPU events but collected data 154 + * is garbage. 155 + */ 156 + #endif 135 157 #endif 136 158 137 159 for (i = 0; i < model->num_counters; ++i) {
+477 -13
arch/powerpc/oprofile/op_model_cell.c
··· 44 44 #define SPU_PROFILING_CYCLES 1 45 45 #define SPU_PROFILING_EVENTS 2 46 46 47 + #define SPU_EVENT_NUM_START 4100 48 + #define SPU_EVENT_NUM_STOP 4399 49 + #define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */ 50 + #define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */ 51 + #define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */ 52 + 47 53 #define NUM_SPUS_PER_NODE 8 48 54 #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ 49 55 ··· 67 61 68 62 #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ 69 63 64 + /* Minumum HW interval timer setting to send value to trace buffer is 10 cycle. 65 + * To configure counter to send value every N cycles set counter to 66 + * 2^32 - 1 - N. 67 + */ 68 + #define NUM_INTERVAL_CYC 0xFFFFFFFF - 10 69 + 70 70 /* 71 71 * spu_cycle_reset is the number of cycles between samples. 72 72 * This variable is used for SPU profiling and should ONLY be set ··· 80 68 */ 81 69 static unsigned int spu_cycle_reset; 82 70 static unsigned int profiling_mode; 71 + static int spu_evnt_phys_spu_indx; 83 72 84 73 struct pmc_cntrl_data { 85 74 unsigned long vcntr; ··· 121 108 u16 trace_mode; 122 109 u16 freeze; 123 110 u16 count_mode; 111 + u16 spu_addr_trace; 112 + u8 trace_buf_ovflw; 124 113 }; 125 114 126 115 static struct { ··· 140 125 #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 141 126 142 127 static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 128 + static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE]; 143 129 static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 144 130 145 131 /* ··· 170 154 171 155 static u32 virt_cntr_inter_mask; 172 156 static struct timer_list timer_virt_cntr; 157 + static struct timer_list timer_spu_event_swap; 173 158 174 159 /* 175 160 * pm_signal needs to be global since it is initialized in ··· 389 372 if (pm_regs.pm_cntrl.trace_mode != 0) 390 373 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); 391 374 375 + if (pm_regs.pm_cntrl.trace_buf_ovflw == 1) 376 + val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw); 392 377 if (pm_regs.pm_cntrl.freeze == 1) 393 378 val |= CBE_PM_FREEZE_ALL_CTRS; 379 + 380 + val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace); 394 381 395 382 /* 396 383 * Routine set_count_mode must be called previously to set ··· 584 563 return 0; 585 564 } 586 565 566 + /* Unfortunately, the hardware will only support event profiling 567 + * on one SPU per node at a time. Therefore, we must time slice 568 + * the profiling across all SPUs in the node. Note, we do this 569 + * in parallel for each node. The following routine is called 570 + * periodically based on kernel timer to switch which SPU is 571 + * being monitored in a round robbin fashion. 572 + */ 573 + static void spu_evnt_swap(unsigned long data) 574 + { 575 + int node; 576 + int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; 577 + unsigned long flags; 578 + int cpu; 579 + int ret; 580 + u32 interrupt_mask; 581 + 582 + 583 + /* enable interrupts on cntr 0 */ 584 + interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0); 585 + 586 + hdw_thread = 0; 587 + 588 + /* Make sure spu event interrupt handler and spu event swap 589 + * don't access the counters simultaneously. 590 + */ 591 + spin_lock_irqsave(&cntr_lock, flags); 592 + 593 + cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx; 594 + 595 + if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE) 596 + spu_evnt_phys_spu_indx = 0; 597 + 598 + pm_signal[0].sub_unit = spu_evnt_phys_spu_indx; 599 + pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; 600 + pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; 601 + 602 + /* switch the SPU being profiled on each node */ 603 + for_each_online_cpu(cpu) { 604 + if (cbe_get_hw_thread_id(cpu)) 605 + continue; 606 + 607 + node = cbe_cpu_to_node(cpu); 608 + cur_phys_spu = (node * NUM_SPUS_PER_NODE) 609 + + cur_spu_evnt_phys_spu_indx; 610 + nxt_phys_spu = (node * NUM_SPUS_PER_NODE) 611 + + spu_evnt_phys_spu_indx; 612 + 613 + /* 614 + * stop counters, save counter values, restore counts 615 + * for previous physical SPU 616 + */ 617 + cbe_disable_pm(cpu); 618 + cbe_disable_pm_interrupts(cpu); 619 + 620 + spu_pm_cnt[cur_phys_spu] 621 + = cbe_read_ctr(cpu, 0); 622 + 623 + /* restore previous count for the next spu to sample */ 624 + /* NOTE, hardware issue, counter will not start if the 625 + * counter value is at max (0xFFFFFFFF). 626 + */ 627 + if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF) 628 + cbe_write_ctr(cpu, 0, 0xFFFFFFF0); 629 + else 630 + cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]); 631 + 632 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 633 + 634 + /* setup the debug bus measure the one event and 635 + * the two events to route the next SPU's PC on 636 + * the debug bus 637 + */ 638 + ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); 639 + if (ret) 640 + printk(KERN_ERR 641 + "%s: pm_rtas_activate_signals failed, SPU event swap\n", 642 + __func__); 643 + 644 + /* clear the trace buffer, don't want to take PC for 645 + * previous SPU*/ 646 + cbe_write_pm(cpu, trace_address, 0); 647 + 648 + enable_ctr(cpu, 0, pm_regs.pm07_cntrl); 649 + 650 + /* Enable interrupts on the CPU thread that is starting */ 651 + cbe_enable_pm_interrupts(cpu, hdw_thread, 652 + interrupt_mask); 653 + cbe_enable_pm(cpu); 654 + } 655 + 656 + spin_unlock_irqrestore(&cntr_lock, flags); 657 + 658 + /* swap approximately every 0.1 seconds */ 659 + mod_timer(&timer_spu_event_swap, jiffies + HZ / 25); 660 + } 661 + 662 + static void start_spu_event_swap(void) 663 + { 664 + init_timer(&timer_spu_event_swap); 665 + timer_spu_event_swap.function = spu_evnt_swap; 666 + timer_spu_event_swap.data = 0UL; 667 + timer_spu_event_swap.expires = jiffies + HZ / 25; 668 + add_timer(&timer_spu_event_swap); 669 + } 670 + 671 + static int cell_reg_setup_spu_events(struct op_counter_config *ctr, 672 + struct op_system_config *sys, int num_ctrs) 673 + { 674 + int i; 675 + 676 + /* routine is called once for all nodes */ 677 + 678 + spu_evnt_phys_spu_indx = 0; 679 + /* 680 + * For all events except PPU CYCLEs, each node will need to make 681 + * the rtas cbe-perftools call to setup and reset the debug bus. 682 + * Make the token lookup call once and store it in the global 683 + * variable pm_rtas_token. 684 + */ 685 + pm_rtas_token = rtas_token("ibm,cbe-perftools"); 686 + 687 + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 688 + printk(KERN_ERR 689 + "%s: rtas token ibm,cbe-perftools unknown\n", 690 + __func__); 691 + return -EIO; 692 + } 693 + 694 + /* setup the pm_control register settings, 695 + * settings will be written per node by the 696 + * cell_cpu_setup() function. 697 + */ 698 + pm_regs.pm_cntrl.trace_buf_ovflw = 1; 699 + 700 + /* Use the occurrence trace mode to have SPU PC saved 701 + * to the trace buffer. Occurrence data in trace buffer 702 + * is not used. Bit 2 must be set to store SPU addresses. 703 + */ 704 + pm_regs.pm_cntrl.trace_mode = 2; 705 + 706 + pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus 707 + event 2 & 3 */ 708 + 709 + /* setup the debug bus event array with the SPU PC routing events. 710 + * Note, pm_signal[0] will be filled in by set_pm_event() call below. 711 + */ 712 + pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100; 713 + pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A); 714 + pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100; 715 + pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; 716 + 717 + pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100; 718 + pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B); 719 + pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100; 720 + pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; 721 + 722 + /* Set the user selected spu event to profile on, 723 + * note, only one SPU profiling event is supported 724 + */ 725 + num_counters = 1; /* Only support one SPU event at a time */ 726 + set_pm_event(0, ctr[0].event, ctr[0].unit_mask); 727 + 728 + reset_value[0] = 0xFFFFFFFF - ctr[0].count; 729 + 730 + /* global, used by cell_cpu_setup */ 731 + ctr_enabled |= 1; 732 + 733 + /* Initialize the count for each SPU to the reset value */ 734 + for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++) 735 + spu_pm_cnt[i] = reset_value[0]; 736 + 737 + return 0; 738 + } 739 + 587 740 static int cell_reg_setup_ppu(struct op_counter_config *ctr, 588 741 struct op_system_config *sys, int num_ctrs) 589 742 { 743 + /* routine is called once for all nodes */ 590 744 int i, j, cpu; 591 745 592 746 num_counters = num_ctrs; ··· 773 577 __func__); 774 578 return -EIO; 775 579 } 776 - pm_regs.group_control = 0; 777 - pm_regs.debug_bus_control = 0; 778 - 779 - /* setup the pm_control register */ 780 - memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl)); 781 - pm_regs.pm_cntrl.stop_at_max = 1; 782 - pm_regs.pm_cntrl.trace_mode = 0; 783 - pm_regs.pm_cntrl.freeze = 1; 784 580 785 581 set_count_mode(sys->enable_kernel, sys->enable_user); 786 582 ··· 845 657 static int cell_reg_setup(struct op_counter_config *ctr, 846 658 struct op_system_config *sys, int num_ctrs) 847 659 { 848 - int ret; 849 - 660 + int ret=0; 850 661 spu_cycle_reset = 0; 662 + 663 + /* initialize the spu_arr_trace value, will be reset if 664 + * doing spu event profiling. 665 + */ 666 + pm_regs.group_control = 0; 667 + pm_regs.debug_bus_control = 0; 668 + pm_regs.pm_cntrl.stop_at_max = 1; 669 + pm_regs.pm_cntrl.trace_mode = 0; 670 + pm_regs.pm_cntrl.freeze = 1; 671 + pm_regs.pm_cntrl.trace_buf_ovflw = 0; 672 + pm_regs.pm_cntrl.spu_addr_trace = 0; 851 673 852 674 /* 853 675 * For all events except PPU CYCLEs, each node will need to make ··· 877 679 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 878 680 profiling_mode = SPU_PROFILING_CYCLES; 879 681 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); 682 + } else if ((ctr[0].event >= SPU_EVENT_NUM_START) && 683 + (ctr[0].event <= SPU_EVENT_NUM_STOP)) { 684 + profiling_mode = SPU_PROFILING_EVENTS; 685 + spu_cycle_reset = ctr[0].count; 686 + 687 + /* for SPU event profiling, need to setup the 688 + * pm_signal array with the events to route the 689 + * SPU PC before making the FW call. Note, only 690 + * one SPU event for profiling can be specified 691 + * at a time. 692 + */ 693 + cell_reg_setup_spu_events(ctr, sys, num_ctrs); 880 694 } else { 881 695 profiling_mode = PPU_PROFILING; 882 696 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); ··· 905 695 u32 cpu = smp_processor_id(); 906 696 u32 num_enabled = 0; 907 697 int i; 698 + int ret; 908 699 909 700 /* Cycle based SPU profiling does not use the performance 910 701 * counters. The trace array is configured to collect ··· 940 729 * The pm_rtas_activate_signals will return -EIO if the FW 941 730 * call failed. 942 731 */ 943 - return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 732 + if (profiling_mode == SPU_PROFILING_EVENTS) { 733 + /* For SPU event profiling also need to setup the 734 + * pm interval timer 735 + */ 736 + ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 737 + num_enabled+2); 738 + /* store PC from debug bus to Trace buffer as often 739 + * as possible (every 10 cycles) 740 + */ 741 + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 742 + return ret; 743 + } else 744 + return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 745 + num_enabled); 944 746 } 945 747 946 748 #define ENTRIES 303 ··· 1150 926 int cpu; 1151 927 1152 928 oprofile_running = 0; 929 + smp_wmb(); 1153 930 1154 931 #ifdef CONFIG_CPU_FREQ 1155 932 cpufreq_unregister_notifier(&cpu_freq_notifier_block, ··· 1182 957 pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 1183 958 } 1184 959 1185 - if (profiling_mode == SPU_PROFILING_CYCLES) 1186 - stop_spu_profiling_cycles(); 960 + stop_spu_profiling_cycles(); 961 + } 962 + 963 + static void cell_global_stop_spu_events(void) 964 + { 965 + int cpu; 966 + oprofile_running = 0; 967 + 968 + stop_spu_profiling_events(); 969 + smp_wmb(); 970 + 971 + for_each_online_cpu(cpu) { 972 + if (cbe_get_hw_thread_id(cpu)) 973 + continue; 974 + 975 + cbe_sync_irq(cbe_cpu_to_node(cpu)); 976 + /* Stop the counters */ 977 + cbe_disable_pm(cpu); 978 + cbe_write_pm07_control(cpu, 0, 0); 979 + 980 + /* Deactivate the signals */ 981 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 982 + 983 + /* Deactivate interrupts */ 984 + cbe_disable_pm_interrupts(cpu); 985 + } 986 + del_timer_sync(&timer_spu_event_swap); 1187 987 } 1188 988 1189 989 static void cell_global_stop_ppu(void) ··· 1244 994 { 1245 995 if (profiling_mode == PPU_PROFILING) 1246 996 cell_global_stop_ppu(); 997 + else if (profiling_mode == SPU_PROFILING_EVENTS) 998 + cell_global_stop_spu_events(); 1247 999 else 1248 1000 cell_global_stop_spu_cycles(); 1249 1001 } ··· 1340 1088 return rtas_error; 1341 1089 } 1342 1090 1091 + static int cell_global_start_spu_events(struct op_counter_config *ctr) 1092 + { 1093 + int cpu; 1094 + u32 interrupt_mask = 0; 1095 + int rtn = 0; 1096 + 1097 + hdw_thread = 0; 1098 + 1099 + /* spu event profiling, uses the performance counters to generate 1100 + * an interrupt. The hardware is setup to store the SPU program 1101 + * counter into the trace array. The occurrence mode is used to 1102 + * enable storing data to the trace buffer. The bits are set 1103 + * to send/store the SPU address in the trace buffer. The debug 1104 + * bus must be setup to route the SPU program counter onto the 1105 + * debug bus. The occurrence data in the trace buffer is not used. 1106 + */ 1107 + 1108 + /* This routine gets called once for the system. 1109 + * There is one performance monitor per node, so we 1110 + * only need to perform this function once per node. 1111 + */ 1112 + 1113 + for_each_online_cpu(cpu) { 1114 + if (cbe_get_hw_thread_id(cpu)) 1115 + continue; 1116 + 1117 + /* 1118 + * Setup SPU event-based profiling. 1119 + * Set perf_mon_control bit 0 to a zero before 1120 + * enabling spu collection hardware. 1121 + * 1122 + * Only support one SPU event on one SPU per node. 1123 + */ 1124 + if (ctr_enabled & 1) { 1125 + cbe_write_ctr(cpu, 0, reset_value[0]); 1126 + enable_ctr(cpu, 0, pm_regs.pm07_cntrl); 1127 + interrupt_mask |= 1128 + CBE_PM_CTR_OVERFLOW_INTR(0); 1129 + } else { 1130 + /* Disable counter */ 1131 + cbe_write_pm07_control(cpu, 0, 0); 1132 + } 1133 + 1134 + cbe_get_and_clear_pm_interrupts(cpu); 1135 + cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); 1136 + cbe_enable_pm(cpu); 1137 + 1138 + /* clear the trace buffer */ 1139 + cbe_write_pm(cpu, trace_address, 0); 1140 + } 1141 + 1142 + /* Start the timer to time slice collecting the event profile 1143 + * on each of the SPUs. Note, can collect profile on one SPU 1144 + * per node at a time. 1145 + */ 1146 + start_spu_event_swap(); 1147 + start_spu_profiling_events(); 1148 + oprofile_running = 1; 1149 + smp_wmb(); 1150 + 1151 + return rtn; 1152 + } 1153 + 1343 1154 static int cell_global_start_ppu(struct op_counter_config *ctr) 1344 1155 { 1345 1156 u32 cpu, i; ··· 1454 1139 { 1455 1140 if (profiling_mode == SPU_PROFILING_CYCLES) 1456 1141 return cell_global_start_spu_cycles(ctr); 1142 + else if (profiling_mode == SPU_PROFILING_EVENTS) 1143 + return cell_global_start_spu_events(ctr); 1457 1144 else 1458 1145 return cell_global_start_ppu(ctr); 1459 1146 } 1460 1147 1148 + 1149 + /* The SPU interrupt handler 1150 + * 1151 + * SPU event profiling works as follows: 1152 + * The pm_signal[0] holds the one SPU event to be measured. It is routed on 1153 + * the debug bus using word 0 or 1. The value of pm_signal[1] and 1154 + * pm_signal[2] contain the necessary events to route the SPU program 1155 + * counter for the selected SPU onto the debug bus using words 2 and 3. 1156 + * The pm_interval register is setup to write the SPU PC value into the 1157 + * trace buffer at the maximum rate possible. The trace buffer is configured 1158 + * to store the PCs, wrapping when it is full. The performance counter is 1159 + * intialized to the max hardware count minus the number of events, N, between 1160 + * samples. Once the N events have occured, a HW counter overflow occurs 1161 + * causing the generation of a HW counter interrupt which also stops the 1162 + * writing of the SPU PC values to the trace buffer. Hence the last PC 1163 + * written to the trace buffer is the SPU PC that we want. Unfortunately, 1164 + * we have to read from the beginning of the trace buffer to get to the 1165 + * last value written. We just hope the PPU has nothing better to do then 1166 + * service this interrupt. The PC for the specific SPU being profiled is 1167 + * extracted from the trace buffer processed and stored. The trace buffer 1168 + * is cleared, interrupts are cleared, the counter is reset to max - N. 1169 + * A kernel timer is used to periodically call the routine spu_evnt_swap() 1170 + * to switch to the next physical SPU in the node to profile in round robbin 1171 + * order. This way data is collected for all SPUs on the node. It does mean 1172 + * that we need to use a relatively small value of N to ensure enough samples 1173 + * on each SPU are collected each SPU is being profiled 1/8 of the time. 1174 + * It may also be necessary to use a longer sample collection period. 1175 + */ 1176 + static void cell_handle_interrupt_spu(struct pt_regs *regs, 1177 + struct op_counter_config *ctr) 1178 + { 1179 + u32 cpu, cpu_tmp; 1180 + u64 trace_entry; 1181 + u32 interrupt_mask; 1182 + u64 trace_buffer[2]; 1183 + u64 last_trace_buffer; 1184 + u32 sample; 1185 + u32 trace_addr; 1186 + unsigned long sample_array_lock_flags; 1187 + int spu_num; 1188 + unsigned long flags; 1189 + 1190 + /* Make sure spu event interrupt handler and spu event swap 1191 + * don't access the counters simultaneously. 1192 + */ 1193 + cpu = smp_processor_id(); 1194 + spin_lock_irqsave(&cntr_lock, flags); 1195 + 1196 + cpu_tmp = cpu; 1197 + cbe_disable_pm(cpu); 1198 + 1199 + interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 1200 + 1201 + sample = 0xABCDEF; 1202 + trace_entry = 0xfedcba; 1203 + last_trace_buffer = 0xdeadbeaf; 1204 + 1205 + if ((oprofile_running == 1) && (interrupt_mask != 0)) { 1206 + /* disable writes to trace buff */ 1207 + cbe_write_pm(cpu, pm_interval, 0); 1208 + 1209 + /* only have one perf cntr being used, cntr 0 */ 1210 + if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0)) 1211 + && ctr[0].enabled) 1212 + /* The SPU PC values will be read 1213 + * from the trace buffer, reset counter 1214 + */ 1215 + 1216 + cbe_write_ctr(cpu, 0, reset_value[0]); 1217 + 1218 + trace_addr = cbe_read_pm(cpu, trace_address); 1219 + 1220 + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { 1221 + /* There is data in the trace buffer to process 1222 + * Read the buffer until you get to the last 1223 + * entry. This is the value we want. 1224 + */ 1225 + 1226 + cbe_read_trace_buffer(cpu, trace_buffer); 1227 + trace_addr = cbe_read_pm(cpu, trace_address); 1228 + } 1229 + 1230 + /* SPU Address 16 bit count format for 128 bit 1231 + * HW trace buffer is used for the SPU PC storage 1232 + * HDR bits 0:15 1233 + * SPU Addr 0 bits 16:31 1234 + * SPU Addr 1 bits 32:47 1235 + * unused bits 48:127 1236 + * 1237 + * HDR: bit4 = 1 SPU Address 0 valid 1238 + * HDR: bit5 = 1 SPU Address 1 valid 1239 + * - unfortunately, the valid bits don't seem to work 1240 + * 1241 + * Note trace_buffer[0] holds bits 0:63 of the HW 1242 + * trace buffer, trace_buffer[1] holds bits 64:127 1243 + */ 1244 + 1245 + trace_entry = trace_buffer[0] 1246 + & 0x00000000FFFF0000; 1247 + 1248 + /* only top 16 of the 18 bit SPU PC address 1249 + * is stored in trace buffer, hence shift right 1250 + * by 16 -2 bits */ 1251 + sample = trace_entry >> 14; 1252 + last_trace_buffer = trace_buffer[0]; 1253 + 1254 + spu_num = spu_evnt_phys_spu_indx 1255 + + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE); 1256 + 1257 + /* make sure only one process at a time is calling 1258 + * spu_sync_buffer() 1259 + */ 1260 + spin_lock_irqsave(&oprof_spu_smpl_arry_lck, 1261 + sample_array_lock_flags); 1262 + spu_sync_buffer(spu_num, &sample, 1); 1263 + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, 1264 + sample_array_lock_flags); 1265 + 1266 + smp_wmb(); /* insure spu event buffer updates are written 1267 + * don't want events intermingled... */ 1268 + 1269 + /* The counters were frozen by the interrupt. 1270 + * Reenable the interrupt and restart the counters. 1271 + */ 1272 + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 1273 + cbe_enable_pm_interrupts(cpu, hdw_thread, 1274 + virt_cntr_inter_mask); 1275 + 1276 + /* clear the trace buffer, re-enable writes to trace buff */ 1277 + cbe_write_pm(cpu, trace_address, 0); 1278 + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 1279 + 1280 + /* The writes to the various performance counters only writes 1281 + * to a latch. The new values (interrupt setting bits, reset 1282 + * counter value etc.) are not copied to the actual registers 1283 + * until the performance monitor is enabled. In order to get 1284 + * this to work as desired, the permormance monitor needs to 1285 + * be disabled while writing to the latches. This is a 1286 + * HW design issue. 1287 + */ 1288 + write_pm_cntrl(cpu); 1289 + cbe_enable_pm(cpu); 1290 + } 1291 + spin_unlock_irqrestore(&cntr_lock, flags); 1292 + } 1461 1293 1462 1294 static void cell_handle_interrupt_ppu(struct pt_regs *regs, 1463 1295 struct op_counter_config *ctr) ··· 1684 1222 { 1685 1223 if (profiling_mode == PPU_PROFILING) 1686 1224 cell_handle_interrupt_ppu(regs, ctr); 1225 + else 1226 + cell_handle_interrupt_spu(regs, ctr); 1687 1227 } 1688 1228 1689 1229 /*