Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile

+2

arch/powerpc/include/asm/cell-pmu.h

··· 37 37 #define CBE_PM_STOP_AT_MAX 0x40000000 38 38 #define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3) 39 39 #define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28) 40 + #define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17) 40 41 #define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18) 41 42 #define CBE_PM_FREEZE_ALL_CTRS 0x00100000 42 43 #define CBE_PM_ENABLE_EXT_TRACE 0x00008000 44 + #define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9) 43 45 44 46 /* Macros for the trace_address register. */ 45 47 #define CBE_PM_TRACE_BUF_FULL 0x00000800

+6

arch/powerpc/include/asm/oprofile_impl.h

··· 32 32 unsigned long mmcr0; 33 33 unsigned long mmcr1; 34 34 unsigned long mmcra; 35 + #ifdef CONFIG_OPROFILE_CELL 36 + /* Register for oprofile user tool to check cell kernel profiling 37 + * suport. 38 + */ 39 + unsigned long cell_support; 40 + #endif 35 41 #endif 36 42 unsigned long enable_kernel; 37 43 unsigned long enable_user;

+8 -3

arch/powerpc/oprofile/cell/pr_util.h

··· 30 30 extern struct delayed_work spu_work; 31 31 extern int spu_prof_running; 32 32 33 + #define TRACE_ARRAY_SIZE 1024 34 + 35 + extern spinlock_t oprof_spu_smpl_arry_lck; 36 + 33 37 struct spu_overlay_info { /* map of sections within an SPU overlay */ 34 38 unsigned int vma; /* SPU virtual memory address from elf */ 35 39 unsigned int size; /* size of section from elf */ ··· 93 89 * Entry point for SPU profiling. 94 90 * cycles_reset is the SPU_CYCLES count value specified by the user. 95 91 */ 96 - int start_spu_profiling(unsigned int cycles_reset); 92 + int start_spu_profiling_cycles(unsigned int cycles_reset); 93 + void start_spu_profiling_events(void); 97 94 98 - void stop_spu_profiling(void); 99 - 95 + void stop_spu_profiling_cycles(void); 96 + void stop_spu_profiling_events(void); 100 97 101 98 /* add the necessary profiling hooks */ 102 99 int spu_sync_start(void);

+43 -13

arch/powerpc/oprofile/cell/spu_profiler.c

··· 18 18 #include <asm/cell-pmu.h> 19 19 #include "pr_util.h" 20 20 21 - #define TRACE_ARRAY_SIZE 1024 22 21 #define SCALE_SHIFT 14 23 22 24 23 static u32 *samples; 25 24 25 + /* spu_prof_running is a flag used to indicate if spu profiling is enabled 26 + * or not. It is set by the routines start_spu_profiling_cycles() and 27 + * start_spu_profiling_events(). The flag is cleared by the routines 28 + * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These 29 + * routines are called via global_start() and global_stop() which are called in 30 + * op_powerpc_start() and op_powerpc_stop(). These routines are called once 31 + * per system as a result of the user starting/stopping oprofile. Hence, only 32 + * one CPU per user at a time will be changing the value of spu_prof_running. 33 + * In general, OProfile does not protect against multiple users trying to run 34 + * OProfile at a time. 35 + */ 26 36 int spu_prof_running; 27 37 static unsigned int profiling_interval; 28 38 ··· 41 31 42 32 #define SPU_PC_MASK 0xFFFF 43 33 44 - static DEFINE_SPINLOCK(sample_array_lock); 45 - unsigned long sample_array_lock_flags; 34 + DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); 35 + unsigned long oprof_spu_smpl_arry_lck_flags; 46 36 47 37 void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) 48 38 { ··· 155 145 * sample array must be loaded and then processed for a given 156 146 * cpu. The sample array is not per cpu. 157 147 */ 158 - spin_lock_irqsave(&sample_array_lock, 159 - sample_array_lock_flags); 148 + spin_lock_irqsave(&oprof_spu_smpl_arry_lck, 149 + oprof_spu_smpl_arry_lck_flags); 160 150 num_samples = cell_spu_pc_collection(cpu); 161 151 162 152 if (num_samples == 0) { 163 - spin_unlock_irqrestore(&sample_array_lock, 164 - sample_array_lock_flags); 153 + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, 154 + oprof_spu_smpl_arry_lck_flags); 165 155 continue; 166 156 } 167 157 ··· 172 162 num_samples); 173 163 } 174 164 175 - spin_unlock_irqrestore(&sample_array_lock, 176 - sample_array_lock_flags); 165 + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, 166 + oprof_spu_smpl_arry_lck_flags); 177 167 178 168 } 179 169 smp_wmb(); /* insure spu event buffer updates are written */ ··· 192 182 193 183 static struct hrtimer timer; 194 184 /* 195 - * Entry point for SPU profiling. 185 + * Entry point for SPU cycle profiling. 196 186 * NOTE: SPU profiling is done system-wide, not per-CPU. 197 187 * 198 188 * cycles_reset is the count value specified by the user when 199 189 * setting up OProfile to count SPU_CYCLES. 200 190 */ 201 - int start_spu_profiling(unsigned int cycles_reset) 191 + int start_spu_profiling_cycles(unsigned int cycles_reset) 202 192 { 203 193 ktime_t kt; 204 194 ··· 222 212 return 0; 223 213 } 224 214 225 - void stop_spu_profiling(void) 215 + /* 216 + * Entry point for SPU event profiling. 217 + * NOTE: SPU profiling is done system-wide, not per-CPU. 218 + * 219 + * cycles_reset is the count value specified by the user when 220 + * setting up OProfile to count SPU_CYCLES. 221 + */ 222 + void start_spu_profiling_events(void) 223 + { 224 + spu_prof_running = 1; 225 + schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); 226 + 227 + return; 228 + } 229 + 230 + void stop_spu_profiling_cycles(void) 226 231 { 227 232 spu_prof_running = 0; 228 233 hrtimer_cancel(&timer); 229 234 kfree(samples); 230 - pr_debug("SPU_PROF: stop_spu_profiling issued\n"); 235 + pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); 236 + } 237 + 238 + void stop_spu_profiling_events(void) 239 + { 240 + spu_prof_running = 0; 231 241 }

+22

arch/powerpc/oprofile/common.c

··· 132 132 oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); 133 133 oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); 134 134 oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); 135 + #ifdef CONFIG_OPROFILE_CELL 136 + /* create a file the user tool can check to see what level of profiling 137 + * support exits with this kernel. Initialize bit mask to indicate 138 + * what support the kernel has: 139 + * bit 0 - Supports SPU event profiling in addition to PPU 140 + * event and cycles; and SPU cycle profiling 141 + * bits 1-31 - Currently unused. 142 + * 143 + * If the file does not exist, then the kernel only supports SPU 144 + * cycle profiling, PPU event and cycle profiling. 145 + */ 146 + oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support); 147 + sys.cell_support = 0x1; /* Note, the user OProfile tool must check 148 + * that this bit is set before attempting to 149 + * user SPU event profiling. Older kernels 150 + * will not have this file, hence the user 151 + * tool is not allowed to do SPU event 152 + * profiling on older kernels. Older kernels 153 + * will accept SPU events but collected data 154 + * is garbage. 155 + */ 156 + #endif 135 157 #endif 136 158 137 159 for (i = 0; i < model->num_counters; ++i) {

+638 -138

arch/powerpc/oprofile/op_model_cell.c

··· 40 40 #include "../platforms/cell/interrupt.h" 41 41 #include "cell/pr_util.h" 42 42 43 - static void cell_global_stop_spu(void); 43 + #define PPU_PROFILING 0 44 + #define SPU_PROFILING_CYCLES 1 45 + #define SPU_PROFILING_EVENTS 2 44 46 45 - /* 46 - * spu_cycle_reset is the number of cycles between samples. 47 - * This variable is used for SPU profiling and should ONLY be set 48 - * at the beginning of cell_reg_setup; otherwise, it's read-only. 49 - */ 50 - static unsigned int spu_cycle_reset; 47 + #define SPU_EVENT_NUM_START 4100 48 + #define SPU_EVENT_NUM_STOP 4399 49 + #define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */ 50 + #define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */ 51 + #define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */ 51 52 52 53 #define NUM_SPUS_PER_NODE 8 53 54 #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ ··· 66 65 #define NUM_INPUT_BUS_WORDS 2 67 66 68 67 #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ 68 + 69 + /* Minumum HW interval timer setting to send value to trace buffer is 10 cycle. 70 + * To configure counter to send value every N cycles set counter to 71 + * 2^32 - 1 - N. 72 + */ 73 + #define NUM_INTERVAL_CYC 0xFFFFFFFF - 10 74 + 75 + /* 76 + * spu_cycle_reset is the number of cycles between samples. 77 + * This variable is used for SPU profiling and should ONLY be set 78 + * at the beginning of cell_reg_setup; otherwise, it's read-only. 79 + */ 80 + static unsigned int spu_cycle_reset; 81 + static unsigned int profiling_mode; 82 + static int spu_evnt_phys_spu_indx; 69 83 70 84 struct pmc_cntrl_data { 71 85 unsigned long vcntr; ··· 121 105 u16 trace_mode; 122 106 u16 freeze; 123 107 u16 count_mode; 108 + u16 spu_addr_trace; 109 + u8 trace_buf_ovflw; 124 110 }; 125 111 126 112 static struct { ··· 140 122 #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 141 123 142 124 static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 143 - 125 + static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE]; 144 126 static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 145 127 146 128 /* ··· 170 152 171 153 static u32 virt_cntr_inter_mask; 172 154 static struct timer_list timer_virt_cntr; 155 + static struct timer_list timer_spu_event_swap; 173 156 174 157 /* 175 158 * pm_signal needs to be global since it is initialized in ··· 184 165 static u32 reset_value[NR_PHYS_CTRS]; 185 166 static int num_counters; 186 167 static int oprofile_running; 187 - static DEFINE_SPINLOCK(virt_cntr_lock); 168 + static DEFINE_SPINLOCK(cntr_lock); 188 169 189 170 static u32 ctr_enabled; 190 171 ··· 355 336 for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { 356 337 if (bus_word & (1 << i)) { 357 338 pm_regs.debug_bus_control |= 358 - (bus_type << (30 - (2 * i))); 339 + (bus_type << (30 - (2 * i))); 359 340 360 341 for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { 361 342 if (input_bus[j] == 0xff) { 362 343 input_bus[j] = i; 363 344 pm_regs.group_control |= 364 - (i << (30 - (2 * j))); 345 + (i << (30 - (2 * j))); 365 346 366 347 break; 367 348 } ··· 386 367 if (pm_regs.pm_cntrl.stop_at_max == 1) 387 368 val |= CBE_PM_STOP_AT_MAX; 388 369 389 - if (pm_regs.pm_cntrl.trace_mode == 1) 370 + if (pm_regs.pm_cntrl.trace_mode != 0) 390 371 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); 391 372 373 + if (pm_regs.pm_cntrl.trace_buf_ovflw == 1) 374 + val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw); 392 375 if (pm_regs.pm_cntrl.freeze == 1) 393 376 val |= CBE_PM_FREEZE_ALL_CTRS; 377 + 378 + val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace); 394 379 395 380 /* 396 381 * Routine set_count_mode must be called previously to set ··· 464 441 * not both playing with the counters on the same node. 465 442 */ 466 443 467 - spin_lock_irqsave(&virt_cntr_lock, flags); 444 + spin_lock_irqsave(&cntr_lock, flags); 468 445 469 446 prev_hdw_thread = hdw_thread; 470 447 ··· 503 480 cbe_disable_pm_interrupts(cpu); 504 481 for (i = 0; i < num_counters; i++) { 505 482 per_cpu(pmc_values, cpu + prev_hdw_thread)[i] 506 - = cbe_read_ctr(cpu, i); 483 + = cbe_read_ctr(cpu, i); 507 484 508 485 if (per_cpu(pmc_values, cpu + next_hdw_thread)[i] 509 486 == 0xFFFFFFFF) ··· 550 527 cbe_enable_pm(cpu); 551 528 } 552 529 553 - spin_unlock_irqrestore(&virt_cntr_lock, flags); 530 + spin_unlock_irqrestore(&cntr_lock, flags); 554 531 555 532 mod_timer(&timer_virt_cntr, jiffies + HZ / 10); 556 533 } ··· 564 541 add_timer(&timer_virt_cntr); 565 542 } 566 543 567 - /* This function is called once for all cpus combined */ 568 - static int cell_reg_setup(struct op_counter_config *ctr, 544 + static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, 569 545 struct op_system_config *sys, int num_ctrs) 570 546 { 571 - int i, j, cpu; 572 - spu_cycle_reset = 0; 573 - 574 - if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 575 - spu_cycle_reset = ctr[0].count; 576 - 577 - /* 578 - * Each node will need to make the rtas call to start 579 - * and stop SPU profiling. Get the token once and store it. 580 - */ 581 - spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); 582 - 583 - if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { 584 - printk(KERN_ERR 585 - "%s: rtas token ibm,cbe-spu-perftools unknown\n", 586 - __func__); 587 - return -EIO; 588 - } 589 - } 590 - 591 - pm_rtas_token = rtas_token("ibm,cbe-perftools"); 547 + spu_cycle_reset = ctr[0].count; 592 548 593 549 /* 594 - * For all events excetp PPU CYCLEs, each node will need to make 550 + * Each node will need to make the rtas call to start 551 + * and stop SPU profiling. Get the token once and store it. 552 + */ 553 + spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); 554 + 555 + if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { 556 + printk(KERN_ERR 557 + "%s: rtas token ibm,cbe-spu-perftools unknown\n", 558 + __func__); 559 + return -EIO; 560 + } 561 + return 0; 562 + } 563 + 564 + /* Unfortunately, the hardware will only support event profiling 565 + * on one SPU per node at a time. Therefore, we must time slice 566 + * the profiling across all SPUs in the node. Note, we do this 567 + * in parallel for each node. The following routine is called 568 + * periodically based on kernel timer to switch which SPU is 569 + * being monitored in a round robbin fashion. 570 + */ 571 + static void spu_evnt_swap(unsigned long data) 572 + { 573 + int node; 574 + int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; 575 + unsigned long flags; 576 + int cpu; 577 + int ret; 578 + u32 interrupt_mask; 579 + 580 + 581 + /* enable interrupts on cntr 0 */ 582 + interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0); 583 + 584 + hdw_thread = 0; 585 + 586 + /* Make sure spu event interrupt handler and spu event swap 587 + * don't access the counters simultaneously. 588 + */ 589 + spin_lock_irqsave(&cntr_lock, flags); 590 + 591 + cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx; 592 + 593 + if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE) 594 + spu_evnt_phys_spu_indx = 0; 595 + 596 + pm_signal[0].sub_unit = spu_evnt_phys_spu_indx; 597 + pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; 598 + pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; 599 + 600 + /* switch the SPU being profiled on each node */ 601 + for_each_online_cpu(cpu) { 602 + if (cbe_get_hw_thread_id(cpu)) 603 + continue; 604 + 605 + node = cbe_cpu_to_node(cpu); 606 + cur_phys_spu = (node * NUM_SPUS_PER_NODE) 607 + + cur_spu_evnt_phys_spu_indx; 608 + nxt_phys_spu = (node * NUM_SPUS_PER_NODE) 609 + + spu_evnt_phys_spu_indx; 610 + 611 + /* 612 + * stop counters, save counter values, restore counts 613 + * for previous physical SPU 614 + */ 615 + cbe_disable_pm(cpu); 616 + cbe_disable_pm_interrupts(cpu); 617 + 618 + spu_pm_cnt[cur_phys_spu] 619 + = cbe_read_ctr(cpu, 0); 620 + 621 + /* restore previous count for the next spu to sample */ 622 + /* NOTE, hardware issue, counter will not start if the 623 + * counter value is at max (0xFFFFFFFF). 624 + */ 625 + if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF) 626 + cbe_write_ctr(cpu, 0, 0xFFFFFFF0); 627 + else 628 + cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]); 629 + 630 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 631 + 632 + /* setup the debug bus measure the one event and 633 + * the two events to route the next SPU's PC on 634 + * the debug bus 635 + */ 636 + ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); 637 + if (ret) 638 + printk(KERN_ERR "%s: pm_rtas_activate_signals failed, " 639 + "SPU event swap\n", __func__); 640 + 641 + /* clear the trace buffer, don't want to take PC for 642 + * previous SPU*/ 643 + cbe_write_pm(cpu, trace_address, 0); 644 + 645 + enable_ctr(cpu, 0, pm_regs.pm07_cntrl); 646 + 647 + /* Enable interrupts on the CPU thread that is starting */ 648 + cbe_enable_pm_interrupts(cpu, hdw_thread, 649 + interrupt_mask); 650 + cbe_enable_pm(cpu); 651 + } 652 + 653 + spin_unlock_irqrestore(&cntr_lock, flags); 654 + 655 + /* swap approximately every 0.1 seconds */ 656 + mod_timer(&timer_spu_event_swap, jiffies + HZ / 25); 657 + } 658 + 659 + static void start_spu_event_swap(void) 660 + { 661 + init_timer(&timer_spu_event_swap); 662 + timer_spu_event_swap.function = spu_evnt_swap; 663 + timer_spu_event_swap.data = 0UL; 664 + timer_spu_event_swap.expires = jiffies + HZ / 25; 665 + add_timer(&timer_spu_event_swap); 666 + } 667 + 668 + static int cell_reg_setup_spu_events(struct op_counter_config *ctr, 669 + struct op_system_config *sys, int num_ctrs) 670 + { 671 + int i; 672 + 673 + /* routine is called once for all nodes */ 674 + 675 + spu_evnt_phys_spu_indx = 0; 676 + /* 677 + * For all events except PPU CYCLEs, each node will need to make 595 678 * the rtas cbe-perftools call to setup and reset the debug bus. 596 679 * Make the token lookup call once and store it in the global 597 680 * variable pm_rtas_token. 598 681 */ 682 + pm_rtas_token = rtas_token("ibm,cbe-perftools"); 683 + 599 684 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 600 685 printk(KERN_ERR 601 686 "%s: rtas token ibm,cbe-perftools unknown\n", 602 687 __func__); 603 688 return -EIO; 604 689 } 690 + 691 + /* setup the pm_control register settings, 692 + * settings will be written per node by the 693 + * cell_cpu_setup() function. 694 + */ 695 + pm_regs.pm_cntrl.trace_buf_ovflw = 1; 696 + 697 + /* Use the occurrence trace mode to have SPU PC saved 698 + * to the trace buffer. Occurrence data in trace buffer 699 + * is not used. Bit 2 must be set to store SPU addresses. 700 + */ 701 + pm_regs.pm_cntrl.trace_mode = 2; 702 + 703 + pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus 704 + event 2 & 3 */ 705 + 706 + /* setup the debug bus event array with the SPU PC routing events. 707 + * Note, pm_signal[0] will be filled in by set_pm_event() call below. 708 + */ 709 + pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100; 710 + pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A); 711 + pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100; 712 + pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; 713 + 714 + pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100; 715 + pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B); 716 + pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100; 717 + pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; 718 + 719 + /* Set the user selected spu event to profile on, 720 + * note, only one SPU profiling event is supported 721 + */ 722 + num_counters = 1; /* Only support one SPU event at a time */ 723 + set_pm_event(0, ctr[0].event, ctr[0].unit_mask); 724 + 725 + reset_value[0] = 0xFFFFFFFF - ctr[0].count; 726 + 727 + /* global, used by cell_cpu_setup */ 728 + ctr_enabled |= 1; 729 + 730 + /* Initialize the count for each SPU to the reset value */ 731 + for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++) 732 + spu_pm_cnt[i] = reset_value[0]; 733 + 734 + return 0; 735 + } 736 + 737 + static int cell_reg_setup_ppu(struct op_counter_config *ctr, 738 + struct op_system_config *sys, int num_ctrs) 739 + { 740 + /* routine is called once for all nodes */ 741 + int i, j, cpu; 605 742 606 743 num_counters = num_ctrs; 607 744 ··· 772 589 __func__); 773 590 return -EIO; 774 591 } 775 - pm_regs.group_control = 0; 776 - pm_regs.debug_bus_control = 0; 777 - 778 - /* setup the pm_control register */ 779 - memset(&pm_regs.pm_cntrl, 0, sizeof(struct pm_cntrl)); 780 - pm_regs.pm_cntrl.stop_at_max = 1; 781 - pm_regs.pm_cntrl.trace_mode = 0; 782 - pm_regs.pm_cntrl.freeze = 1; 783 592 784 593 set_count_mode(sys->enable_kernel, sys->enable_user); 785 594 ··· 840 665 } 841 666 842 667 668 + /* This function is called once for all cpus combined */ 669 + static int cell_reg_setup(struct op_counter_config *ctr, 670 + struct op_system_config *sys, int num_ctrs) 671 + { 672 + int ret=0; 673 + spu_cycle_reset = 0; 674 + 675 + /* initialize the spu_arr_trace value, will be reset if 676 + * doing spu event profiling. 677 + */ 678 + pm_regs.group_control = 0; 679 + pm_regs.debug_bus_control = 0; 680 + pm_regs.pm_cntrl.stop_at_max = 1; 681 + pm_regs.pm_cntrl.trace_mode = 0; 682 + pm_regs.pm_cntrl.freeze = 1; 683 + pm_regs.pm_cntrl.trace_buf_ovflw = 0; 684 + pm_regs.pm_cntrl.spu_addr_trace = 0; 685 + 686 + /* 687 + * For all events except PPU CYCLEs, each node will need to make 688 + * the rtas cbe-perftools call to setup and reset the debug bus. 689 + * Make the token lookup call once and store it in the global 690 + * variable pm_rtas_token. 691 + */ 692 + pm_rtas_token = rtas_token("ibm,cbe-perftools"); 693 + 694 + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 695 + printk(KERN_ERR 696 + "%s: rtas token ibm,cbe-perftools unknown\n", 697 + __func__); 698 + return -EIO; 699 + } 700 + 701 + if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 702 + profiling_mode = SPU_PROFILING_CYCLES; 703 + ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); 704 + } else if ((ctr[0].event >= SPU_EVENT_NUM_START) && 705 + (ctr[0].event <= SPU_EVENT_NUM_STOP)) { 706 + profiling_mode = SPU_PROFILING_EVENTS; 707 + spu_cycle_reset = ctr[0].count; 708 + 709 + /* for SPU event profiling, need to setup the 710 + * pm_signal array with the events to route the 711 + * SPU PC before making the FW call. Note, only 712 + * one SPU event for profiling can be specified 713 + * at a time. 714 + */ 715 + cell_reg_setup_spu_events(ctr, sys, num_ctrs); 716 + } else { 717 + profiling_mode = PPU_PROFILING; 718 + ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); 719 + } 720 + 721 + return ret; 722 + } 723 + 724 + 843 725 844 726 /* This function is called once for each cpu */ 845 727 static int cell_cpu_setup(struct op_counter_config *cntr) ··· 904 672 u32 cpu = smp_processor_id(); 905 673 u32 num_enabled = 0; 906 674 int i; 675 + int ret; 907 676 908 - if (spu_cycle_reset) 677 + /* Cycle based SPU profiling does not use the performance 678 + * counters. The trace array is configured to collect 679 + * the data. 680 + */ 681 + if (profiling_mode == SPU_PROFILING_CYCLES) 909 682 return 0; 910 683 911 684 /* There is one performance monitor per processor chip (i.e. node), ··· 923 686 cbe_disable_pm(cpu); 924 687 cbe_disable_pm_interrupts(cpu); 925 688 926 - cbe_write_pm(cpu, pm_interval, 0); 927 689 cbe_write_pm(cpu, pm_start_stop, 0); 928 690 cbe_write_pm(cpu, group_control, pm_regs.group_control); 929 691 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); ··· 939 703 * The pm_rtas_activate_signals will return -EIO if the FW 940 704 * call failed. 941 705 */ 942 - return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 706 + if (profiling_mode == SPU_PROFILING_EVENTS) { 707 + /* For SPU event profiling also need to setup the 708 + * pm interval timer 709 + */ 710 + ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 711 + num_enabled+2); 712 + /* store PC from debug bus to Trace buffer as often 713 + * as possible (every 10 cycles) 714 + */ 715 + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 716 + return ret; 717 + } else 718 + return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 719 + num_enabled); 943 720 } 944 721 945 722 #define ENTRIES 303 ··· 1134 885 }; 1135 886 #endif 1136 887 1137 - static int cell_global_start_spu(struct op_counter_config *ctr) 888 + /* 889 + * Note the generic OProfile stop calls do not support returning 890 + * an error on stop. Hence, will not return an error if the FW 891 + * calls fail on stop. Failure to reset the debug bus is not an issue. 892 + * Failure to disable the SPU profiling is not an issue. The FW calls 893 + * to enable the performance counters and debug bus will work even if 894 + * the hardware was not cleanly reset. 895 + */ 896 + static void cell_global_stop_spu_cycles(void) 897 + { 898 + int subfunc, rtn_value; 899 + unsigned int lfsr_value; 900 + int cpu; 901 + 902 + oprofile_running = 0; 903 + smp_wmb(); 904 + 905 + #ifdef CONFIG_CPU_FREQ 906 + cpufreq_unregister_notifier(&cpu_freq_notifier_block, 907 + CPUFREQ_TRANSITION_NOTIFIER); 908 + #endif 909 + 910 + for_each_online_cpu(cpu) { 911 + if (cbe_get_hw_thread_id(cpu)) 912 + continue; 913 + 914 + subfunc = 3; /* 915 + * 2 - activate SPU tracing, 916 + * 3 - deactivate 917 + */ 918 + lfsr_value = 0x8f100000; 919 + 920 + rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, 921 + subfunc, cbe_cpu_to_node(cpu), 922 + lfsr_value); 923 + 924 + if (unlikely(rtn_value != 0)) { 925 + printk(KERN_ERR 926 + "%s: rtas call ibm,cbe-spu-perftools " \ 927 + "failed, return = %d\n", 928 + __func__, rtn_value); 929 + } 930 + 931 + /* Deactivate the signals */ 932 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 933 + } 934 + 935 + stop_spu_profiling_cycles(); 936 + } 937 + 938 + static void cell_global_stop_spu_events(void) 939 + { 940 + int cpu; 941 + oprofile_running = 0; 942 + 943 + stop_spu_profiling_events(); 944 + smp_wmb(); 945 + 946 + for_each_online_cpu(cpu) { 947 + if (cbe_get_hw_thread_id(cpu)) 948 + continue; 949 + 950 + cbe_sync_irq(cbe_cpu_to_node(cpu)); 951 + /* Stop the counters */ 952 + cbe_disable_pm(cpu); 953 + cbe_write_pm07_control(cpu, 0, 0); 954 + 955 + /* Deactivate the signals */ 956 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 957 + 958 + /* Deactivate interrupts */ 959 + cbe_disable_pm_interrupts(cpu); 960 + } 961 + del_timer_sync(&timer_spu_event_swap); 962 + } 963 + 964 + static void cell_global_stop_ppu(void) 965 + { 966 + int cpu; 967 + 968 + /* 969 + * This routine will be called once for the system. 970 + * There is one performance monitor per node, so we 971 + * only need to perform this function once per node. 972 + */ 973 + del_timer_sync(&timer_virt_cntr); 974 + oprofile_running = 0; 975 + smp_wmb(); 976 + 977 + for_each_online_cpu(cpu) { 978 + if (cbe_get_hw_thread_id(cpu)) 979 + continue; 980 + 981 + cbe_sync_irq(cbe_cpu_to_node(cpu)); 982 + /* Stop the counters */ 983 + cbe_disable_pm(cpu); 984 + 985 + /* Deactivate the signals */ 986 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 987 + 988 + /* Deactivate interrupts */ 989 + cbe_disable_pm_interrupts(cpu); 990 + } 991 + } 992 + 993 + static void cell_global_stop(void) 994 + { 995 + if (profiling_mode == PPU_PROFILING) 996 + cell_global_stop_ppu(); 997 + else if (profiling_mode == SPU_PROFILING_EVENTS) 998 + cell_global_stop_spu_events(); 999 + else 1000 + cell_global_stop_spu_cycles(); 1001 + } 1002 + 1003 + static int cell_global_start_spu_cycles(struct op_counter_config *ctr) 1138 1004 { 1139 1005 int subfunc; 1140 1006 unsigned int lfsr_value; ··· 1315 951 1316 952 /* start profiling */ 1317 953 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, 1318 - cbe_cpu_to_node(cpu), lfsr_value); 954 + cbe_cpu_to_node(cpu), lfsr_value); 1319 955 1320 956 if (unlikely(ret != 0)) { 1321 957 printk(KERN_ERR 1322 - "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", 1323 - __func__, ret); 958 + "%s: rtas call ibm,cbe-spu-perftools failed, " \ 959 + "return = %d\n", __func__, ret); 1324 960 rtas_error = -EIO; 1325 961 goto out; 1326 962 } 1327 963 } 1328 964 1329 - rtas_error = start_spu_profiling(spu_cycle_reset); 965 + rtas_error = start_spu_profiling_cycles(spu_cycle_reset); 1330 966 if (rtas_error) 1331 967 goto out_stop; 1332 968 ··· 1334 970 return 0; 1335 971 1336 972 out_stop: 1337 - cell_global_stop_spu(); /* clean up the PMU/debug bus */ 973 + cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */ 1338 974 out: 1339 975 return rtas_error; 976 + } 977 + 978 + static int cell_global_start_spu_events(struct op_counter_config *ctr) 979 + { 980 + int cpu; 981 + u32 interrupt_mask = 0; 982 + int rtn = 0; 983 + 984 + hdw_thread = 0; 985 + 986 + /* spu event profiling, uses the performance counters to generate 987 + * an interrupt. The hardware is setup to store the SPU program 988 + * counter into the trace array. The occurrence mode is used to 989 + * enable storing data to the trace buffer. The bits are set 990 + * to send/store the SPU address in the trace buffer. The debug 991 + * bus must be setup to route the SPU program counter onto the 992 + * debug bus. The occurrence data in the trace buffer is not used. 993 + */ 994 + 995 + /* This routine gets called once for the system. 996 + * There is one performance monitor per node, so we 997 + * only need to perform this function once per node. 998 + */ 999 + 1000 + for_each_online_cpu(cpu) { 1001 + if (cbe_get_hw_thread_id(cpu)) 1002 + continue; 1003 + 1004 + /* 1005 + * Setup SPU event-based profiling. 1006 + * Set perf_mon_control bit 0 to a zero before 1007 + * enabling spu collection hardware. 1008 + * 1009 + * Only support one SPU event on one SPU per node. 1010 + */ 1011 + if (ctr_enabled & 1) { 1012 + cbe_write_ctr(cpu, 0, reset_value[0]); 1013 + enable_ctr(cpu, 0, pm_regs.pm07_cntrl); 1014 + interrupt_mask |= 1015 + CBE_PM_CTR_OVERFLOW_INTR(0); 1016 + } else { 1017 + /* Disable counter */ 1018 + cbe_write_pm07_control(cpu, 0, 0); 1019 + } 1020 + 1021 + cbe_get_and_clear_pm_interrupts(cpu); 1022 + cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); 1023 + cbe_enable_pm(cpu); 1024 + 1025 + /* clear the trace buffer */ 1026 + cbe_write_pm(cpu, trace_address, 0); 1027 + } 1028 + 1029 + /* Start the timer to time slice collecting the event profile 1030 + * on each of the SPUs. Note, can collect profile on one SPU 1031 + * per node at a time. 1032 + */ 1033 + start_spu_event_swap(); 1034 + start_spu_profiling_events(); 1035 + oprofile_running = 1; 1036 + smp_wmb(); 1037 + 1038 + return rtn; 1340 1039 } 1341 1040 1342 1041 static int cell_global_start_ppu(struct op_counter_config *ctr) ··· 1421 994 if (ctr_enabled & (1 << i)) { 1422 995 cbe_write_ctr(cpu, i, reset_value[i]); 1423 996 enable_ctr(cpu, i, pm_regs.pm07_cntrl); 1424 - interrupt_mask |= 1425 - CBE_PM_CTR_OVERFLOW_INTR(i); 997 + interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i); 1426 998 } else { 1427 999 /* Disable counter */ 1428 1000 cbe_write_pm07_control(cpu, i, 0); ··· 1450 1024 1451 1025 static int cell_global_start(struct op_counter_config *ctr) 1452 1026 { 1453 - if (spu_cycle_reset) 1454 - return cell_global_start_spu(ctr); 1027 + if (profiling_mode == SPU_PROFILING_CYCLES) 1028 + return cell_global_start_spu_cycles(ctr); 1029 + else if (profiling_mode == SPU_PROFILING_EVENTS) 1030 + return cell_global_start_spu_events(ctr); 1455 1031 else 1456 1032 return cell_global_start_ppu(ctr); 1457 1033 } 1458 1034 1459 - /* 1460 - * Note the generic OProfile stop calls do not support returning 1461 - * an error on stop. Hence, will not return an error if the FW 1462 - * calls fail on stop. Failure to reset the debug bus is not an issue. 1463 - * Failure to disable the SPU profiling is not an issue. The FW calls 1464 - * to enable the performance counters and debug bus will work even if 1465 - * the hardware was not cleanly reset. 1035 + 1036 + /* The SPU interrupt handler 1037 + * 1038 + * SPU event profiling works as follows: 1039 + * The pm_signal[0] holds the one SPU event to be measured. It is routed on 1040 + * the debug bus using word 0 or 1. The value of pm_signal[1] and 1041 + * pm_signal[2] contain the necessary events to route the SPU program 1042 + * counter for the selected SPU onto the debug bus using words 2 and 3. 1043 + * The pm_interval register is setup to write the SPU PC value into the 1044 + * trace buffer at the maximum rate possible. The trace buffer is configured 1045 + * to store the PCs, wrapping when it is full. The performance counter is 1046 + * intialized to the max hardware count minus the number of events, N, between 1047 + * samples. Once the N events have occured, a HW counter overflow occurs 1048 + * causing the generation of a HW counter interrupt which also stops the 1049 + * writing of the SPU PC values to the trace buffer. Hence the last PC 1050 + * written to the trace buffer is the SPU PC that we want. Unfortunately, 1051 + * we have to read from the beginning of the trace buffer to get to the 1052 + * last value written. We just hope the PPU has nothing better to do then 1053 + * service this interrupt. The PC for the specific SPU being profiled is 1054 + * extracted from the trace buffer processed and stored. The trace buffer 1055 + * is cleared, interrupts are cleared, the counter is reset to max - N. 1056 + * A kernel timer is used to periodically call the routine spu_evnt_swap() 1057 + * to switch to the next physical SPU in the node to profile in round robbin 1058 + * order. This way data is collected for all SPUs on the node. It does mean 1059 + * that we need to use a relatively small value of N to ensure enough samples 1060 + * on each SPU are collected each SPU is being profiled 1/8 of the time. 1061 + * It may also be necessary to use a longer sample collection period. 1466 1062 */ 1467 - static void cell_global_stop_spu(void) 1063 + static void cell_handle_interrupt_spu(struct pt_regs *regs, 1064 + struct op_counter_config *ctr) 1468 1065 { 1469 - int subfunc, rtn_value; 1470 - unsigned int lfsr_value; 1471 - int cpu; 1066 + u32 cpu, cpu_tmp; 1067 + u64 trace_entry; 1068 + u32 interrupt_mask; 1069 + u64 trace_buffer[2]; 1070 + u64 last_trace_buffer; 1071 + u32 sample; 1072 + u32 trace_addr; 1073 + unsigned long sample_array_lock_flags; 1074 + int spu_num; 1075 + unsigned long flags; 1472 1076 1473 - oprofile_running = 0; 1077 + /* Make sure spu event interrupt handler and spu event swap 1078 + * don't access the counters simultaneously. 1079 + */ 1080 + cpu = smp_processor_id(); 1081 + spin_lock_irqsave(&cntr_lock, flags); 1474 1082 1475 - #ifdef CONFIG_CPU_FREQ 1476 - cpufreq_unregister_notifier(&cpu_freq_notifier_block, 1477 - CPUFREQ_TRANSITION_NOTIFIER); 1478 - #endif 1083 + cpu_tmp = cpu; 1084 + cbe_disable_pm(cpu); 1479 1085 1480 - for_each_online_cpu(cpu) { 1481 - if (cbe_get_hw_thread_id(cpu)) 1482 - continue; 1086 + interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 1483 1087 1484 - subfunc = 3; /* 1485 - * 2 - activate SPU tracing, 1486 - * 3 - deactivate 1487 - */ 1488 - lfsr_value = 0x8f100000; 1088 + sample = 0xABCDEF; 1089 + trace_entry = 0xfedcba; 1090 + last_trace_buffer = 0xdeadbeaf; 1489 1091 1490 - rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, 1491 - subfunc, cbe_cpu_to_node(cpu), 1492 - lfsr_value); 1092 + if ((oprofile_running == 1) && (interrupt_mask != 0)) { 1093 + /* disable writes to trace buff */ 1094 + cbe_write_pm(cpu, pm_interval, 0); 1493 1095 1494 - if (unlikely(rtn_value != 0)) { 1495 - printk(KERN_ERR 1496 - "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", 1497 - __func__, rtn_value); 1096 + /* only have one perf cntr being used, cntr 0 */ 1097 + if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0)) 1098 + && ctr[0].enabled) 1099 + /* The SPU PC values will be read 1100 + * from the trace buffer, reset counter 1101 + */ 1102 + 1103 + cbe_write_ctr(cpu, 0, reset_value[0]); 1104 + 1105 + trace_addr = cbe_read_pm(cpu, trace_address); 1106 + 1107 + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { 1108 + /* There is data in the trace buffer to process 1109 + * Read the buffer until you get to the last 1110 + * entry. This is the value we want. 1111 + */ 1112 + 1113 + cbe_read_trace_buffer(cpu, trace_buffer); 1114 + trace_addr = cbe_read_pm(cpu, trace_address); 1498 1115 } 1499 1116 1500 - /* Deactivate the signals */ 1501 - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 1117 + /* SPU Address 16 bit count format for 128 bit 1118 + * HW trace buffer is used for the SPU PC storage 1119 + * HDR bits 0:15 1120 + * SPU Addr 0 bits 16:31 1121 + * SPU Addr 1 bits 32:47 1122 + * unused bits 48:127 1123 + * 1124 + * HDR: bit4 = 1 SPU Address 0 valid 1125 + * HDR: bit5 = 1 SPU Address 1 valid 1126 + * - unfortunately, the valid bits don't seem to work 1127 + * 1128 + * Note trace_buffer[0] holds bits 0:63 of the HW 1129 + * trace buffer, trace_buffer[1] holds bits 64:127 1130 + */ 1131 + 1132 + trace_entry = trace_buffer[0] 1133 + & 0x00000000FFFF0000; 1134 + 1135 + /* only top 16 of the 18 bit SPU PC address 1136 + * is stored in trace buffer, hence shift right 1137 + * by 16 -2 bits */ 1138 + sample = trace_entry >> 14; 1139 + last_trace_buffer = trace_buffer[0]; 1140 + 1141 + spu_num = spu_evnt_phys_spu_indx 1142 + + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE); 1143 + 1144 + /* make sure only one process at a time is calling 1145 + * spu_sync_buffer() 1146 + */ 1147 + spin_lock_irqsave(&oprof_spu_smpl_arry_lck, 1148 + sample_array_lock_flags); 1149 + spu_sync_buffer(spu_num, &sample, 1); 1150 + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, 1151 + sample_array_lock_flags); 1152 + 1153 + smp_wmb(); /* insure spu event buffer updates are written 1154 + * don't want events intermingled... */ 1155 + 1156 + /* The counters were frozen by the interrupt. 1157 + * Reenable the interrupt and restart the counters. 1158 + */ 1159 + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 1160 + cbe_enable_pm_interrupts(cpu, hdw_thread, 1161 + virt_cntr_inter_mask); 1162 + 1163 + /* clear the trace buffer, re-enable writes to trace buff */ 1164 + cbe_write_pm(cpu, trace_address, 0); 1165 + cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 1166 + 1167 + /* The writes to the various performance counters only writes 1168 + * to a latch. The new values (interrupt setting bits, reset 1169 + * counter value etc.) are not copied to the actual registers 1170 + * until the performance monitor is enabled. In order to get 1171 + * this to work as desired, the permormance monitor needs to 1172 + * be disabled while writing to the latches. This is a 1173 + * HW design issue. 1174 + */ 1175 + write_pm_cntrl(cpu); 1176 + cbe_enable_pm(cpu); 1502 1177 } 1503 - 1504 - stop_spu_profiling(); 1178 + spin_unlock_irqrestore(&cntr_lock, flags); 1505 1179 } 1506 1180 1507 - static void cell_global_stop_ppu(void) 1508 - { 1509 - int cpu; 1510 - 1511 - /* 1512 - * This routine will be called once for the system. 1513 - * There is one performance monitor per node, so we 1514 - * only need to perform this function once per node. 1515 - */ 1516 - del_timer_sync(&timer_virt_cntr); 1517 - oprofile_running = 0; 1518 - smp_wmb(); 1519 - 1520 - for_each_online_cpu(cpu) { 1521 - if (cbe_get_hw_thread_id(cpu)) 1522 - continue; 1523 - 1524 - cbe_sync_irq(cbe_cpu_to_node(cpu)); 1525 - /* Stop the counters */ 1526 - cbe_disable_pm(cpu); 1527 - 1528 - /* Deactivate the signals */ 1529 - pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 1530 - 1531 - /* Deactivate interrupts */ 1532 - cbe_disable_pm_interrupts(cpu); 1533 - } 1534 - } 1535 - 1536 - static void cell_global_stop(void) 1537 - { 1538 - if (spu_cycle_reset) 1539 - cell_global_stop_spu(); 1540 - else 1541 - cell_global_stop_ppu(); 1542 - } 1543 - 1544 - static void cell_handle_interrupt(struct pt_regs *regs, 1545 - struct op_counter_config *ctr) 1181 + static void cell_handle_interrupt_ppu(struct pt_regs *regs, 1182 + struct op_counter_config *ctr) 1546 1183 { 1547 1184 u32 cpu; 1548 1185 u64 pc; ··· 1621 1132 * routine are not running at the same time. See the 1622 1133 * cell_virtual_cntr() routine for additional comments. 1623 1134 */ 1624 - spin_lock_irqsave(&virt_cntr_lock, flags); 1135 + spin_lock_irqsave(&cntr_lock, flags); 1625 1136 1626 1137 /* 1627 1138 * Need to disable and reenable the performance counters ··· 1674 1185 */ 1675 1186 cbe_enable_pm(cpu); 1676 1187 } 1677 - spin_unlock_irqrestore(&virt_cntr_lock, flags); 1188 + spin_unlock_irqrestore(&cntr_lock, flags); 1189 + } 1190 + 1191 + static void cell_handle_interrupt(struct pt_regs *regs, 1192 + struct op_counter_config *ctr) 1193 + { 1194 + if (profiling_mode == PPU_PROFILING) 1195 + cell_handle_interrupt_ppu(regs, ctr); 1196 + else 1197 + cell_handle_interrupt_spu(regs, ctr); 1678 1198 } 1679 1199 1680 1200 /* ··· 1693 1195 */ 1694 1196 static int cell_sync_start(void) 1695 1197 { 1696 - if (spu_cycle_reset) 1198 + if ((profiling_mode == SPU_PROFILING_CYCLES) || 1199 + (profiling_mode == SPU_PROFILING_EVENTS)) 1697 1200 return spu_sync_start(); 1698 1201 else 1699 1202 return DO_GENERIC_SYNC; ··· 1702 1203 1703 1204 static int cell_sync_stop(void) 1704 1205 { 1705 - if (spu_cycle_reset) 1206 + if ((profiling_mode == SPU_PROFILING_CYCLES) || 1207 + (profiling_mode == SPU_PROFILING_EVENTS)) 1706 1208 return spu_sync_stop(); 1707 1209 else 1708 1210 return 1;

+52 -97

arch/x86/oprofile/op_model_amd.c

··· 2 2 * @file op_model_amd.c 3 3 * athlon / K7 / K8 / Family 10h model-specific MSR operations 4 4 * 5 - * @remark Copyright 2002-2008 OProfile authors 5 + * @remark Copyright 2002-2009 OProfile authors 6 6 * @remark Read the file COPYING 7 7 * 8 8 * @author John Levon ··· 10 10 * @author Graydon Hoare 11 11 * @author Robert Richter <robert.richter@amd.com> 12 12 * @author Barry Kasindorf 13 - */ 13 + */ 14 14 15 15 #include <linux/oprofile.h> 16 16 #include <linux/device.h> ··· 60 60 #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ 61 61 #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ 62 62 63 - /* Codes used in cpu_buffer.c */ 64 - /* This produces duplicate code, need to be fixed */ 65 - #define IBS_FETCH_BEGIN 3 66 - #define IBS_OP_BEGIN 4 63 + #define IBS_FETCH_SIZE 6 64 + #define IBS_OP_SIZE 12 67 65 68 - /* 69 - * The function interface needs to be fixed, something like add 70 - * data. Should then be added to linux/oprofile.h. 71 - */ 72 - extern void 73 - oprofile_add_ibs_sample(struct pt_regs * const regs, 74 - unsigned int * const ibs_sample, int ibs_code); 75 - 76 - struct ibs_fetch_sample { 77 - /* MSRC001_1031 IBS Fetch Linear Address Register */ 78 - unsigned int ibs_fetch_lin_addr_low; 79 - unsigned int ibs_fetch_lin_addr_high; 80 - /* MSRC001_1030 IBS Fetch Control Register */ 81 - unsigned int ibs_fetch_ctl_low; 82 - unsigned int ibs_fetch_ctl_high; 83 - /* MSRC001_1032 IBS Fetch Physical Address Register */ 84 - unsigned int ibs_fetch_phys_addr_low; 85 - unsigned int ibs_fetch_phys_addr_high; 86 - }; 87 - 88 - struct ibs_op_sample { 89 - /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ 90 - unsigned int ibs_op_rip_low; 91 - unsigned int ibs_op_rip_high; 92 - /* MSRC001_1035 IBS Op Data Register */ 93 - unsigned int ibs_op_data1_low; 94 - unsigned int ibs_op_data1_high; 95 - /* MSRC001_1036 IBS Op Data 2 Register */ 96 - unsigned int ibs_op_data2_low; 97 - unsigned int ibs_op_data2_high; 98 - /* MSRC001_1037 IBS Op Data 3 Register */ 99 - unsigned int ibs_op_data3_low; 100 - unsigned int ibs_op_data3_high; 101 - /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ 102 - unsigned int ibs_dc_linear_low; 103 - unsigned int ibs_dc_linear_high; 104 - /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ 105 - unsigned int ibs_dc_phys_low; 106 - unsigned int ibs_dc_phys_high; 107 - }; 108 - 109 - static int ibs_allowed; /* AMD Family10h and later */ 66 + static int has_ibs; /* AMD Family10h and later */ 110 67 111 68 struct op_ibs_config { 112 69 unsigned long op_enabled; ··· 154 197 op_amd_handle_ibs(struct pt_regs * const regs, 155 198 struct op_msrs const * const msrs) 156 199 { 157 - unsigned int low, high; 158 - struct ibs_fetch_sample ibs_fetch; 159 - struct ibs_op_sample ibs_op; 200 + u32 low, high; 201 + u64 msr; 202 + struct op_entry entry; 160 203 161 - if (!ibs_allowed) 204 + if (!has_ibs) 162 205 return 1; 163 206 164 207 if (ibs_config.fetch_enabled) { 165 208 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 166 209 if (high & IBS_FETCH_HIGH_VALID_BIT) { 167 - ibs_fetch.ibs_fetch_ctl_high = high; 168 - ibs_fetch.ibs_fetch_ctl_low = low; 169 - rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); 170 - ibs_fetch.ibs_fetch_lin_addr_high = high; 171 - ibs_fetch.ibs_fetch_lin_addr_low = low; 172 - rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); 173 - ibs_fetch.ibs_fetch_phys_addr_high = high; 174 - ibs_fetch.ibs_fetch_phys_addr_low = low; 175 - 176 - oprofile_add_ibs_sample(regs, 177 - (unsigned int *)&ibs_fetch, 178 - IBS_FETCH_BEGIN); 210 + rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); 211 + oprofile_write_reserve(&entry, regs, msr, 212 + IBS_FETCH_CODE, IBS_FETCH_SIZE); 213 + oprofile_add_data(&entry, (u32)msr); 214 + oprofile_add_data(&entry, (u32)(msr >> 32)); 215 + oprofile_add_data(&entry, low); 216 + oprofile_add_data(&entry, high); 217 + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); 218 + oprofile_add_data(&entry, (u32)msr); 219 + oprofile_add_data(&entry, (u32)(msr >> 32)); 220 + oprofile_write_commit(&entry); 179 221 180 222 /* reenable the IRQ */ 181 - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 182 223 high &= ~IBS_FETCH_HIGH_VALID_BIT; 183 224 high |= IBS_FETCH_HIGH_ENABLE; 184 225 low &= IBS_FETCH_LOW_MAX_CNT_MASK; ··· 187 232 if (ibs_config.op_enabled) { 188 233 rdmsr(MSR_AMD64_IBSOPCTL, low, high); 189 234 if (low & IBS_OP_LOW_VALID_BIT) { 190 - rdmsr(MSR_AMD64_IBSOPRIP, low, high); 191 - ibs_op.ibs_op_rip_low = low; 192 - ibs_op.ibs_op_rip_high = high; 193 - rdmsr(MSR_AMD64_IBSOPDATA, low, high); 194 - ibs_op.ibs_op_data1_low = low; 195 - ibs_op.ibs_op_data1_high = high; 196 - rdmsr(MSR_AMD64_IBSOPDATA2, low, high); 197 - ibs_op.ibs_op_data2_low = low; 198 - ibs_op.ibs_op_data2_high = high; 199 - rdmsr(MSR_AMD64_IBSOPDATA3, low, high); 200 - ibs_op.ibs_op_data3_low = low; 201 - ibs_op.ibs_op_data3_high = high; 202 - rdmsr(MSR_AMD64_IBSDCLINAD, low, high); 203 - ibs_op.ibs_dc_linear_low = low; 204 - ibs_op.ibs_dc_linear_high = high; 205 - rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); 206 - ibs_op.ibs_dc_phys_low = low; 207 - ibs_op.ibs_dc_phys_high = high; 235 + rdmsrl(MSR_AMD64_IBSOPRIP, msr); 236 + oprofile_write_reserve(&entry, regs, msr, 237 + IBS_OP_CODE, IBS_OP_SIZE); 238 + oprofile_add_data(&entry, (u32)msr); 239 + oprofile_add_data(&entry, (u32)(msr >> 32)); 240 + rdmsrl(MSR_AMD64_IBSOPDATA, msr); 241 + oprofile_add_data(&entry, (u32)msr); 242 + oprofile_add_data(&entry, (u32)(msr >> 32)); 243 + rdmsrl(MSR_AMD64_IBSOPDATA2, msr); 244 + oprofile_add_data(&entry, (u32)msr); 245 + oprofile_add_data(&entry, (u32)(msr >> 32)); 246 + rdmsrl(MSR_AMD64_IBSOPDATA3, msr); 247 + oprofile_add_data(&entry, (u32)msr); 248 + oprofile_add_data(&entry, (u32)(msr >> 32)); 249 + rdmsrl(MSR_AMD64_IBSDCLINAD, msr); 250 + oprofile_add_data(&entry, (u32)msr); 251 + oprofile_add_data(&entry, (u32)(msr >> 32)); 252 + rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); 253 + oprofile_add_data(&entry, (u32)msr); 254 + oprofile_add_data(&entry, (u32)(msr >> 32)); 255 + oprofile_write_commit(&entry); 208 256 209 257 /* reenable the IRQ */ 210 - oprofile_add_ibs_sample(regs, 211 - (unsigned int *)&ibs_op, 212 - IBS_OP_BEGIN); 213 - rdmsr(MSR_AMD64_IBSOPCTL, low, high); 214 258 high = 0; 215 259 low &= ~IBS_OP_LOW_VALID_BIT; 216 260 low |= IBS_OP_LOW_ENABLE; ··· 259 305 } 260 306 261 307 #ifdef CONFIG_OPROFILE_IBS 262 - if (ibs_allowed && ibs_config.fetch_enabled) { 308 + if (has_ibs && ibs_config.fetch_enabled) { 263 309 low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 264 310 high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ 265 311 + IBS_FETCH_HIGH_ENABLE; 266 312 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 267 313 } 268 314 269 - if (ibs_allowed && ibs_config.op_enabled) { 315 + if (has_ibs && ibs_config.op_enabled) { 270 316 low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) 271 317 + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ 272 318 + IBS_OP_LOW_ENABLE; ··· 295 341 } 296 342 297 343 #ifdef CONFIG_OPROFILE_IBS 298 - if (ibs_allowed && ibs_config.fetch_enabled) { 344 + if (has_ibs && ibs_config.fetch_enabled) { 299 345 /* clear max count and enable */ 300 346 low = 0; 301 347 high = 0; 302 348 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 303 349 } 304 350 305 - if (ibs_allowed && ibs_config.op_enabled) { 351 + if (has_ibs && ibs_config.op_enabled) { 306 352 /* clear max count and enable */ 307 353 low = 0; 308 354 high = 0; ··· 363 409 | IBSCTL_LVTOFFSETVAL); 364 410 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 365 411 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { 412 + pci_dev_put(cpu_cfg); 366 413 printk(KERN_DEBUG "Failed to setup IBS LVT offset, " 367 414 "IBSCTL = 0x%08x", value); 368 415 return 1; ··· 391 436 /* uninitialize the APIC for the IBS interrupts if needed */ 392 437 static void clear_ibs_nmi(void) 393 438 { 394 - if (ibs_allowed) 439 + if (has_ibs) 395 440 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 396 441 } 397 442 398 443 /* initialize the APIC for the IBS interrupts if available */ 399 444 static void ibs_init(void) 400 445 { 401 - ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); 446 + has_ibs = boot_cpu_has(X86_FEATURE_IBS); 402 447 403 - if (!ibs_allowed) 448 + if (!has_ibs) 404 449 return; 405 450 406 451 if (init_ibs_nmi()) { 407 - ibs_allowed = 0; 452 + has_ibs = 0; 408 453 return; 409 454 } 410 455 ··· 413 458 414 459 static void ibs_exit(void) 415 460 { 416 - if (!ibs_allowed) 461 + if (!has_ibs) 417 462 return; 418 463 419 464 clear_ibs_nmi(); ··· 433 478 if (ret) 434 479 return ret; 435 480 436 - if (!ibs_allowed) 481 + if (!has_ibs) 437 482 return ret; 438 483 439 484 /* model specific files */

+80 -110

drivers/oprofile/buffer_sync.c

··· 1 1 /** 2 2 * @file buffer_sync.c 3 3 * 4 - * @remark Copyright 2002 OProfile authors 4 + * @remark Copyright 2002-2009 OProfile authors 5 5 * @remark Read the file COPYING 6 6 * 7 7 * @author John Levon <levon@movementarian.org> 8 8 * @author Barry Kasindorf 9 + * @author Robert Richter <robert.richter@amd.com> 9 10 * 10 11 * This is the core of the buffer management. Each 11 12 * CPU buffer is processed and entered into the ··· 316 315 add_event_entry(TRACE_BEGIN_CODE); 317 316 } 318 317 319 - #ifdef CONFIG_OPROFILE_IBS 320 - 321 - #define IBS_FETCH_CODE_SIZE 2 322 - #define IBS_OP_CODE_SIZE 5 323 - 324 - /* 325 - * Add IBS fetch and op entries to event buffer 326 - */ 327 - static void add_ibs_begin(int cpu, int code, struct mm_struct *mm) 318 + static void add_data(struct op_entry *entry, struct mm_struct *mm) 328 319 { 329 - unsigned long rip; 330 - int i, count; 331 - unsigned long ibs_cookie = 0; 320 + unsigned long code, pc, val; 321 + unsigned long cookie; 332 322 off_t offset; 333 - struct op_sample *sample; 334 323 335 - sample = cpu_buffer_read_entry(cpu); 336 - if (!sample) 337 - goto Error; 338 - rip = sample->eip; 339 - 340 - #ifdef __LP64__ 341 - rip += sample->event << 32; 342 - #endif 324 + if (!op_cpu_buffer_get_data(entry, &code)) 325 + return; 326 + if (!op_cpu_buffer_get_data(entry, &pc)) 327 + return; 328 + if (!op_cpu_buffer_get_size(entry)) 329 + return; 343 330 344 331 if (mm) { 345 - ibs_cookie = lookup_dcookie(mm, rip, &offset); 332 + cookie = lookup_dcookie(mm, pc, &offset); 346 333 347 - if (ibs_cookie == NO_COOKIE) 348 - offset = rip; 349 - if (ibs_cookie == INVALID_COOKIE) { 334 + if (cookie == NO_COOKIE) 335 + offset = pc; 336 + if (cookie == INVALID_COOKIE) { 350 337 atomic_inc(&oprofile_stats.sample_lost_no_mapping); 351 - offset = rip; 338 + offset = pc; 352 339 } 353 - if (ibs_cookie != last_cookie) { 354 - add_cookie_switch(ibs_cookie); 355 - last_cookie = ibs_cookie; 340 + if (cookie != last_cookie) { 341 + add_cookie_switch(cookie); 342 + last_cookie = cookie; 356 343 } 357 344 } else 358 - offset = rip; 345 + offset = pc; 359 346 360 347 add_event_entry(ESCAPE_CODE); 361 348 add_event_entry(code); 362 349 add_event_entry(offset); /* Offset from Dcookie */ 363 350 364 - /* we send the Dcookie offset, but send the raw Linear Add also*/ 365 - add_event_entry(sample->eip); 366 - add_event_entry(sample->event); 367 - 368 - if (code == IBS_FETCH_CODE) 369 - count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ 370 - else 371 - count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ 372 - 373 - for (i = 0; i < count; i++) { 374 - sample = cpu_buffer_read_entry(cpu); 375 - if (!sample) 376 - goto Error; 377 - add_event_entry(sample->eip); 378 - add_event_entry(sample->event); 379 - } 380 - 381 - return; 382 - 383 - Error: 384 - return; 351 + while (op_cpu_buffer_get_data(entry, &val)) 352 + add_event_entry(val); 385 353 } 386 354 387 - #endif 388 - 389 - static void add_sample_entry(unsigned long offset, unsigned long event) 355 + static inline void add_sample_entry(unsigned long offset, unsigned long event) 390 356 { 391 357 add_event_entry(offset); 392 358 add_event_entry(event); 393 359 } 394 360 395 361 396 - static int add_us_sample(struct mm_struct *mm, struct op_sample *s) 362 + /* 363 + * Add a sample to the global event buffer. If possible the 364 + * sample is converted into a persistent dentry/offset pair 365 + * for later lookup from userspace. Return 0 on failure. 366 + */ 367 + static int 368 + add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) 397 369 { 398 370 unsigned long cookie; 399 371 off_t offset; 372 + 373 + if (in_kernel) { 374 + add_sample_entry(s->eip, s->event); 375 + return 1; 376 + } 377 + 378 + /* add userspace sample */ 379 + 380 + if (!mm) { 381 + atomic_inc(&oprofile_stats.sample_lost_no_mm); 382 + return 0; 383 + } 400 384 401 385 cookie = lookup_dcookie(mm, s->eip, &offset); 402 386 ··· 398 412 add_sample_entry(offset, s->event); 399 413 400 414 return 1; 401 - } 402 - 403 - 404 - /* Add a sample to the global event buffer. If possible the 405 - * sample is converted into a persistent dentry/offset pair 406 - * for later lookup from userspace. 407 - */ 408 - static int 409 - add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) 410 - { 411 - if (in_kernel) { 412 - add_sample_entry(s->eip, s->event); 413 - return 1; 414 - } else if (mm) { 415 - return add_us_sample(mm, s); 416 - } else { 417 - atomic_inc(&oprofile_stats.sample_lost_no_mm); 418 - } 419 - return 0; 420 415 } 421 416 422 417 ··· 493 526 { 494 527 struct mm_struct *mm = NULL; 495 528 struct mm_struct *oldmm; 529 + unsigned long val; 496 530 struct task_struct *new; 497 531 unsigned long cookie = 0; 498 532 int in_kernel = 1; 499 533 sync_buffer_state state = sb_buffer_start; 500 534 unsigned int i; 501 535 unsigned long available; 536 + unsigned long flags; 537 + struct op_entry entry; 538 + struct op_sample *sample; 502 539 503 540 mutex_lock(&buffer_mutex); 504 541 505 542 add_cpu_switch(cpu); 506 543 507 - cpu_buffer_reset(cpu); 508 - available = cpu_buffer_entries(cpu); 544 + op_cpu_buffer_reset(cpu); 545 + available = op_cpu_buffer_entries(cpu); 509 546 510 547 for (i = 0; i < available; ++i) { 511 - struct op_sample *s = cpu_buffer_read_entry(cpu); 512 - if (!s) 548 + sample = op_cpu_buffer_read_entry(&entry, cpu); 549 + if (!sample) 513 550 break; 514 551 515 - if (is_code(s->eip)) { 516 - switch (s->event) { 517 - case 0: 518 - case CPU_IS_KERNEL: 519 - /* kernel/userspace switch */ 520 - in_kernel = s->event; 521 - if (state == sb_buffer_start) 522 - state = sb_sample_start; 523 - add_kernel_ctx_switch(s->event); 524 - break; 525 - case CPU_TRACE_BEGIN: 552 + if (is_code(sample->eip)) { 553 + flags = sample->event; 554 + if (flags & TRACE_BEGIN) { 526 555 state = sb_bt_start; 527 556 add_trace_begin(); 528 - break; 529 - #ifdef CONFIG_OPROFILE_IBS 530 - case IBS_FETCH_BEGIN: 531 - state = sb_bt_start; 532 - add_ibs_begin(cpu, IBS_FETCH_CODE, mm); 533 - break; 534 - case IBS_OP_BEGIN: 535 - state = sb_bt_start; 536 - add_ibs_begin(cpu, IBS_OP_CODE, mm); 537 - break; 538 - #endif 539 - default: 557 + } 558 + if (flags & KERNEL_CTX_SWITCH) { 559 + /* kernel/userspace switch */ 560 + in_kernel = flags & IS_KERNEL; 561 + if (state == sb_buffer_start) 562 + state = sb_sample_start; 563 + add_kernel_ctx_switch(flags & IS_KERNEL); 564 + } 565 + if (flags & USER_CTX_SWITCH 566 + && op_cpu_buffer_get_data(&entry, &val)) { 540 567 /* userspace context switch */ 568 + new = (struct task_struct *)val; 541 569 oldmm = mm; 542 - new = (struct task_struct *)s->event; 543 570 release_mm(oldmm); 544 571 mm = take_tasks_mm(new); 545 572 if (mm != oldmm) 546 573 cookie = get_exec_dcookie(mm); 547 574 add_user_ctx_switch(new, cookie); 548 - break; 549 575 } 550 - } else if (state >= sb_bt_start && 551 - !add_sample(mm, s, in_kernel)) { 552 - if (state == sb_bt_start) { 553 - state = sb_bt_ignore; 554 - atomic_inc(&oprofile_stats.bt_lost_no_mapping); 555 - } 576 + if (op_cpu_buffer_get_size(&entry)) 577 + add_data(&entry, mm); 578 + continue; 579 + } 580 + 581 + if (state < sb_bt_start) 582 + /* ignore sample */ 583 + continue; 584 + 585 + if (add_sample(mm, sample, in_kernel)) 586 + continue; 587 + 588 + /* ignore backtraces if failed to add a sample */ 589 + if (state == sb_bt_start) { 590 + state = sb_bt_ignore; 591 + atomic_inc(&oprofile_stats.bt_lost_no_mapping); 556 592 } 557 593 } 558 594 release_mm(mm);

+204 -118

drivers/oprofile/cpu_buffer.c

··· 1 1 /** 2 2 * @file cpu_buffer.c 3 3 * 4 - * @remark Copyright 2002 OProfile authors 4 + * @remark Copyright 2002-2009 OProfile authors 5 5 * @remark Read the file COPYING 6 6 * 7 7 * @author John Levon <levon@movementarian.org> 8 8 * @author Barry Kasindorf <barry.kasindorf@amd.com> 9 + * @author Robert Richter <robert.richter@amd.com> 9 10 * 10 11 * Each CPU has a local buffer that stores PC value/event 11 12 * pairs. We also log context switches when we notice them. ··· 46 45 * can be changed to a single buffer solution when the ring buffer 47 46 * access is implemented as non-locking atomic code. 48 47 */ 49 - struct ring_buffer *op_ring_buffer_read; 50 - struct ring_buffer *op_ring_buffer_write; 48 + static struct ring_buffer *op_ring_buffer_read; 49 + static struct ring_buffer *op_ring_buffer_write; 51 50 DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 52 51 53 52 static void wq_sync_buffer(struct work_struct *work); 54 53 55 54 #define DEFAULT_TIMER_EXPIRE (HZ / 10) 56 55 static int work_enabled; 56 + 57 + unsigned long oprofile_get_cpu_buffer_size(void) 58 + { 59 + return oprofile_cpu_buffer_size; 60 + } 61 + 62 + void oprofile_cpu_buffer_inc_smpl_lost(void) 63 + { 64 + struct oprofile_cpu_buffer *cpu_buf 65 + = &__get_cpu_var(cpu_buffer); 66 + 67 + cpu_buf->sample_lost_overflow++; 68 + } 57 69 58 70 void free_cpu_buffers(void) 59 71 { ··· 78 64 op_ring_buffer_write = NULL; 79 65 } 80 66 81 - unsigned long oprofile_get_cpu_buffer_size(void) 82 - { 83 - return fs_cpu_buffer_size; 84 - } 85 - 86 - void oprofile_cpu_buffer_inc_smpl_lost(void) 87 - { 88 - struct oprofile_cpu_buffer *cpu_buf 89 - = &__get_cpu_var(cpu_buffer); 90 - 91 - cpu_buf->sample_lost_overflow++; 92 - } 93 - 94 67 int alloc_cpu_buffers(void) 95 68 { 96 69 int i; 97 70 98 - unsigned long buffer_size = fs_cpu_buffer_size; 71 + unsigned long buffer_size = oprofile_cpu_buffer_size; 99 72 100 73 op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); 101 74 if (!op_ring_buffer_read) ··· 98 97 b->last_is_kernel = -1; 99 98 b->tracing = 0; 100 99 b->buffer_size = buffer_size; 101 - b->tail_pos = 0; 102 - b->head_pos = 0; 103 100 b->sample_received = 0; 104 101 b->sample_lost_overflow = 0; 105 102 b->backtrace_aborted = 0; ··· 144 145 flush_scheduled_work(); 145 146 } 146 147 147 - static inline int 148 - add_sample(struct oprofile_cpu_buffer *cpu_buf, 149 - unsigned long pc, unsigned long event) 148 + /* 149 + * This function prepares the cpu buffer to write a sample. 150 + * 151 + * Struct op_entry is used during operations on the ring buffer while 152 + * struct op_sample contains the data that is stored in the ring 153 + * buffer. Struct entry can be uninitialized. The function reserves a 154 + * data array that is specified by size. Use 155 + * op_cpu_buffer_write_commit() after preparing the sample. In case of 156 + * errors a null pointer is returned, otherwise the pointer to the 157 + * sample. 158 + * 159 + */ 160 + struct op_sample 161 + *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) 162 + { 163 + entry->event = ring_buffer_lock_reserve 164 + (op_ring_buffer_write, sizeof(struct op_sample) + 165 + size * sizeof(entry->sample->data[0]), &entry->irq_flags); 166 + if (entry->event) 167 + entry->sample = ring_buffer_event_data(entry->event); 168 + else 169 + entry->sample = NULL; 170 + 171 + if (!entry->sample) 172 + return NULL; 173 + 174 + entry->size = size; 175 + entry->data = entry->sample->data; 176 + 177 + return entry->sample; 178 + } 179 + 180 + int op_cpu_buffer_write_commit(struct op_entry *entry) 181 + { 182 + return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, 183 + entry->irq_flags); 184 + } 185 + 186 + struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) 187 + { 188 + struct ring_buffer_event *e; 189 + e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 190 + if (e) 191 + goto event; 192 + if (ring_buffer_swap_cpu(op_ring_buffer_read, 193 + op_ring_buffer_write, 194 + cpu)) 195 + return NULL; 196 + e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 197 + if (e) 198 + goto event; 199 + return NULL; 200 + 201 + event: 202 + entry->event = e; 203 + entry->sample = ring_buffer_event_data(e); 204 + entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) 205 + / sizeof(entry->sample->data[0]); 206 + entry->data = entry->sample->data; 207 + return entry->sample; 208 + } 209 + 210 + unsigned long op_cpu_buffer_entries(int cpu) 211 + { 212 + return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) 213 + + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); 214 + } 215 + 216 + static int 217 + op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, 218 + int is_kernel, struct task_struct *task) 150 219 { 151 220 struct op_entry entry; 152 - int ret; 221 + struct op_sample *sample; 222 + unsigned long flags; 223 + int size; 153 224 154 - ret = cpu_buffer_write_entry(&entry); 155 - if (ret) 156 - return ret; 225 + flags = 0; 157 226 158 - entry.sample->eip = pc; 159 - entry.sample->event = event; 227 + if (backtrace) 228 + flags |= TRACE_BEGIN; 160 229 161 - ret = cpu_buffer_write_commit(&entry); 162 - if (ret) 163 - return ret; 230 + /* notice a switch from user->kernel or vice versa */ 231 + is_kernel = !!is_kernel; 232 + if (cpu_buf->last_is_kernel != is_kernel) { 233 + cpu_buf->last_is_kernel = is_kernel; 234 + flags |= KERNEL_CTX_SWITCH; 235 + if (is_kernel) 236 + flags |= IS_KERNEL; 237 + } 238 + 239 + /* notice a task switch */ 240 + if (cpu_buf->last_task != task) { 241 + cpu_buf->last_task = task; 242 + flags |= USER_CTX_SWITCH; 243 + } 244 + 245 + if (!flags) 246 + /* nothing to do */ 247 + return 0; 248 + 249 + if (flags & USER_CTX_SWITCH) 250 + size = 1; 251 + else 252 + size = 0; 253 + 254 + sample = op_cpu_buffer_write_reserve(&entry, size); 255 + if (!sample) 256 + return -ENOMEM; 257 + 258 + sample->eip = ESCAPE_CODE; 259 + sample->event = flags; 260 + 261 + if (size) 262 + op_cpu_buffer_add_data(&entry, (unsigned long)task); 263 + 264 + op_cpu_buffer_write_commit(&entry); 164 265 165 266 return 0; 166 267 } 167 268 168 269 static inline int 169 - add_code(struct oprofile_cpu_buffer *buffer, unsigned long value) 270 + op_add_sample(struct oprofile_cpu_buffer *cpu_buf, 271 + unsigned long pc, unsigned long event) 170 272 { 171 - return add_sample(buffer, ESCAPE_CODE, value); 273 + struct op_entry entry; 274 + struct op_sample *sample; 275 + 276 + sample = op_cpu_buffer_write_reserve(&entry, 0); 277 + if (!sample) 278 + return -ENOMEM; 279 + 280 + sample->eip = pc; 281 + sample->event = event; 282 + 283 + return op_cpu_buffer_write_commit(&entry); 172 284 } 173 285 174 - /* This must be safe from any context. It's safe writing here 175 - * because of the head/tail separation of the writer and reader 176 - * of the CPU buffer. 286 + /* 287 + * This must be safe from any context. 177 288 * 178 289 * is_kernel is needed because on some architectures you cannot 179 290 * tell if you are in kernel or user space simply by looking at 180 291 * pc. We tag this in the buffer by generating kernel enter/exit 181 292 * events whenever is_kernel changes 182 293 */ 183 - static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, 184 - int is_kernel, unsigned long event) 294 + static int 295 + log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, 296 + unsigned long backtrace, int is_kernel, unsigned long event) 185 297 { 186 - struct task_struct *task; 187 - 188 298 cpu_buf->sample_received++; 189 299 190 300 if (pc == ESCAPE_CODE) { ··· 301 193 return 0; 302 194 } 303 195 304 - is_kernel = !!is_kernel; 196 + if (op_add_code(cpu_buf, backtrace, is_kernel, current)) 197 + goto fail; 305 198 306 - task = current; 307 - 308 - /* notice a switch from user->kernel or vice versa */ 309 - if (cpu_buf->last_is_kernel != is_kernel) { 310 - cpu_buf->last_is_kernel = is_kernel; 311 - if (add_code(cpu_buf, is_kernel)) 312 - goto fail; 313 - } 314 - 315 - /* notice a task switch */ 316 - if (cpu_buf->last_task != task) { 317 - cpu_buf->last_task = task; 318 - if (add_code(cpu_buf, (unsigned long)task)) 319 - goto fail; 320 - } 321 - 322 - if (add_sample(cpu_buf, pc, event)) 199 + if (op_add_sample(cpu_buf, pc, event)) 323 200 goto fail; 324 201 325 202 return 1; ··· 314 221 return 0; 315 222 } 316 223 317 - static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) 224 + static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) 318 225 { 319 - add_code(cpu_buf, CPU_TRACE_BEGIN); 320 226 cpu_buf->tracing = 1; 321 - return 1; 322 227 } 323 228 324 - static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) 229 + static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) 325 230 { 326 231 cpu_buf->tracing = 0; 327 232 } 328 233 329 - void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 330 - unsigned long event, int is_kernel) 234 + static inline void 235 + __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 236 + unsigned long event, int is_kernel) 331 237 { 332 238 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 333 - 334 - if (!backtrace_depth) { 335 - log_sample(cpu_buf, pc, is_kernel, event); 336 - return; 337 - } 338 - 339 - if (!oprofile_begin_trace(cpu_buf)) 340 - return; 239 + unsigned long backtrace = oprofile_backtrace_depth; 341 240 342 241 /* 343 242 * if log_sample() fail we can't backtrace since we lost the 344 243 * source of this event 345 244 */ 346 - if (log_sample(cpu_buf, pc, is_kernel, event)) 347 - oprofile_ops.backtrace(regs, backtrace_depth); 245 + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) 246 + /* failed */ 247 + return; 248 + 249 + if (!backtrace) 250 + return; 251 + 252 + oprofile_begin_trace(cpu_buf); 253 + oprofile_ops.backtrace(regs, backtrace); 348 254 oprofile_end_trace(cpu_buf); 255 + } 256 + 257 + void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 258 + unsigned long event, int is_kernel) 259 + { 260 + __oprofile_add_ext_sample(pc, regs, event, is_kernel); 349 261 } 350 262 351 263 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) ··· 358 260 int is_kernel = !user_mode(regs); 359 261 unsigned long pc = profile_pc(regs); 360 262 361 - oprofile_add_ext_sample(pc, regs, event, is_kernel); 263 + __oprofile_add_ext_sample(pc, regs, event, is_kernel); 362 264 } 363 265 364 - #ifdef CONFIG_OPROFILE_IBS 365 - 366 - #define MAX_IBS_SAMPLE_SIZE 14 367 - 368 - void oprofile_add_ibs_sample(struct pt_regs * const regs, 369 - unsigned int * const ibs_sample, int ibs_code) 266 + /* 267 + * Add samples with data to the ring buffer. 268 + * 269 + * Use oprofile_add_data(&entry, val) to add data and 270 + * oprofile_write_commit(&entry) to commit the sample. 271 + */ 272 + void 273 + oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, 274 + unsigned long pc, int code, int size) 370 275 { 276 + struct op_sample *sample; 371 277 int is_kernel = !user_mode(regs); 372 278 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 373 - struct task_struct *task; 374 - int fail = 0; 375 279 376 280 cpu_buf->sample_received++; 377 281 378 - /* notice a switch from user->kernel or vice versa */ 379 - if (cpu_buf->last_is_kernel != is_kernel) { 380 - if (add_code(cpu_buf, is_kernel)) 381 - goto fail; 382 - cpu_buf->last_is_kernel = is_kernel; 383 - } 384 - 385 - /* notice a task switch */ 386 - if (!is_kernel) { 387 - task = current; 388 - if (cpu_buf->last_task != task) { 389 - if (add_code(cpu_buf, (unsigned long)task)) 390 - goto fail; 391 - cpu_buf->last_task = task; 392 - } 393 - } 394 - 395 - fail = fail || add_code(cpu_buf, ibs_code); 396 - fail = fail || add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); 397 - fail = fail || add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); 398 - fail = fail || add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); 399 - 400 - if (ibs_code == IBS_OP_BEGIN) { 401 - fail = fail || add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); 402 - fail = fail || add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); 403 - fail = fail || add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); 404 - } 405 - 406 - if (fail) 282 + /* no backtraces for samples with data */ 283 + if (op_add_code(cpu_buf, 0, is_kernel, current)) 407 284 goto fail; 408 285 409 - if (backtrace_depth) 410 - oprofile_ops.backtrace(regs, backtrace_depth); 286 + sample = op_cpu_buffer_write_reserve(entry, size + 2); 287 + if (!sample) 288 + goto fail; 289 + sample->eip = ESCAPE_CODE; 290 + sample->event = 0; /* no flags */ 291 + 292 + op_cpu_buffer_add_data(entry, code); 293 + op_cpu_buffer_add_data(entry, pc); 411 294 412 295 return; 413 296 414 297 fail: 415 298 cpu_buf->sample_lost_overflow++; 416 - return; 417 299 } 418 300 419 - #endif 301 + int oprofile_add_data(struct op_entry *entry, unsigned long val) 302 + { 303 + return op_cpu_buffer_add_data(entry, val); 304 + } 305 + 306 + int oprofile_write_commit(struct op_entry *entry) 307 + { 308 + return op_cpu_buffer_write_commit(entry); 309 + } 420 310 421 311 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) 422 312 { 423 313 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 424 - log_sample(cpu_buf, pc, is_kernel, event); 314 + log_sample(cpu_buf, pc, 0, is_kernel, event); 425 315 } 426 316 427 317 void oprofile_add_trace(unsigned long pc) ··· 426 340 if (pc == ESCAPE_CODE) 427 341 goto fail; 428 342 429 - if (add_sample(cpu_buf, pc, 0)) 343 + if (op_add_sample(cpu_buf, pc, 0)) 430 344 goto fail; 431 345 432 346 return;

+39 -52

drivers/oprofile/cpu_buffer.h

··· 1 1 /** 2 2 * @file cpu_buffer.h 3 3 * 4 - * @remark Copyright 2002 OProfile authors 4 + * @remark Copyright 2002-2009 OProfile authors 5 5 * @remark Read the file COPYING 6 6 * 7 7 * @author John Levon <levon@movementarian.org> 8 + * @author Robert Richter <robert.richter@amd.com> 8 9 */ 9 10 10 11 #ifndef OPROFILE_CPU_BUFFER_H ··· 32 31 struct op_sample { 33 32 unsigned long eip; 34 33 unsigned long event; 34 + unsigned long data[0]; 35 35 }; 36 36 37 - struct op_entry { 38 - struct ring_buffer_event *event; 39 - struct op_sample *sample; 40 - unsigned long irq_flags; 41 - }; 37 + struct op_entry; 42 38 43 39 struct oprofile_cpu_buffer { 44 - volatile unsigned long head_pos; 45 - volatile unsigned long tail_pos; 46 40 unsigned long buffer_size; 47 41 struct task_struct *last_task; 48 42 int last_is_kernel; ··· 50 54 struct delayed_work work; 51 55 }; 52 56 53 - extern struct ring_buffer *op_ring_buffer_read; 54 - extern struct ring_buffer *op_ring_buffer_write; 55 57 DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 56 58 57 59 /* ··· 58 64 * reset these to invalid values; the next sample collected will 59 65 * populate the buffer with proper values to initialize the buffer 60 66 */ 61 - static inline void cpu_buffer_reset(int cpu) 67 + static inline void op_cpu_buffer_reset(int cpu) 62 68 { 63 69 struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); 64 70 ··· 66 72 cpu_buf->last_task = NULL; 67 73 } 68 74 69 - static inline int cpu_buffer_write_entry(struct op_entry *entry) 75 + struct op_sample 76 + *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); 77 + int op_cpu_buffer_write_commit(struct op_entry *entry); 78 + struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); 79 + unsigned long op_cpu_buffer_entries(int cpu); 80 + 81 + /* returns the remaining free size of data in the entry */ 82 + static inline 83 + int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val) 70 84 { 71 - entry->event = ring_buffer_lock_reserve(op_ring_buffer_write, 72 - sizeof(struct op_sample), 73 - &entry->irq_flags); 74 - if (entry->event) 75 - entry->sample = ring_buffer_event_data(entry->event); 76 - else 77 - entry->sample = NULL; 78 - 79 - if (!entry->sample) 80 - return -ENOMEM; 81 - 82 - return 0; 85 + if (!entry->size) 86 + return 0; 87 + *entry->data = val; 88 + entry->size--; 89 + entry->data++; 90 + return entry->size; 83 91 } 84 92 85 - static inline int cpu_buffer_write_commit(struct op_entry *entry) 93 + /* returns the size of data in the entry */ 94 + static inline 95 + int op_cpu_buffer_get_size(struct op_entry *entry) 86 96 { 87 - return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, 88 - entry->irq_flags); 97 + return entry->size; 89 98 } 90 99 91 - static inline struct op_sample *cpu_buffer_read_entry(int cpu) 100 + /* returns 0 if empty or the size of data including the current value */ 101 + static inline 102 + int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) 92 103 { 93 - struct ring_buffer_event *e; 94 - e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 95 - if (e) 96 - return ring_buffer_event_data(e); 97 - if (ring_buffer_swap_cpu(op_ring_buffer_read, 98 - op_ring_buffer_write, 99 - cpu)) 100 - return NULL; 101 - e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 102 - if (e) 103 - return ring_buffer_event_data(e); 104 - return NULL; 104 + int size = entry->size; 105 + if (!size) 106 + return 0; 107 + *val = *entry->data; 108 + entry->size--; 109 + entry->data++; 110 + return size; 105 111 } 106 112 107 - /* "acquire" as many cpu buffer slots as we can */ 108 - static inline unsigned long cpu_buffer_entries(int cpu) 109 - { 110 - return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) 111 - + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); 112 - } 113 - 114 - /* transient events for the CPU buffer -> event buffer */ 115 - #define CPU_IS_KERNEL 1 116 - #define CPU_TRACE_BEGIN 2 117 - #define IBS_FETCH_BEGIN 3 118 - #define IBS_OP_BEGIN 4 113 + /* extra data flags */ 114 + #define KERNEL_CTX_SWITCH (1UL << 0) 115 + #define IS_KERNEL (1UL << 1) 116 + #define TRACE_BEGIN (1UL << 2) 117 + #define USER_CTX_SWITCH (1UL << 3) 119 118 120 119 #endif /* OPROFILE_CPU_BUFFER_H */

+2 -2

drivers/oprofile/event_buffer.c

··· 73 73 unsigned long flags; 74 74 75 75 spin_lock_irqsave(&oprofilefs_lock, flags); 76 - buffer_size = fs_buffer_size; 77 - buffer_watershed = fs_buffer_watershed; 76 + buffer_size = oprofile_buffer_size; 77 + buffer_watershed = oprofile_buffer_watershed; 78 78 spin_unlock_irqrestore(&oprofilefs_lock, flags); 79 79 80 80 if (buffer_watershed >= buffer_size)

+2 -2

drivers/oprofile/oprof.c

··· 23 23 struct oprofile_operations oprofile_ops; 24 24 25 25 unsigned long oprofile_started; 26 - unsigned long backtrace_depth; 26 + unsigned long oprofile_backtrace_depth; 27 27 static unsigned long is_setup; 28 28 static DEFINE_MUTEX(start_mutex); 29 29 ··· 172 172 goto out; 173 173 } 174 174 175 - backtrace_depth = val; 175 + oprofile_backtrace_depth = val; 176 176 177 177 out: 178 178 mutex_unlock(&start_mutex);

+4 -4

drivers/oprofile/oprof.h

··· 21 21 22 22 struct oprofile_operations; 23 23 24 - extern unsigned long fs_buffer_size; 25 - extern unsigned long fs_cpu_buffer_size; 26 - extern unsigned long fs_buffer_watershed; 24 + extern unsigned long oprofile_buffer_size; 25 + extern unsigned long oprofile_cpu_buffer_size; 26 + extern unsigned long oprofile_buffer_watershed; 27 27 extern struct oprofile_operations oprofile_ops; 28 28 extern unsigned long oprofile_started; 29 - extern unsigned long backtrace_depth; 29 + extern unsigned long oprofile_backtrace_depth; 30 30 31 31 struct super_block; 32 32 struct dentry;

+14 -13

drivers/oprofile/oprofile_files.c

··· 14 14 #include "oprofile_stats.h" 15 15 #include "oprof.h" 16 16 17 - #define FS_BUFFER_SIZE_DEFAULT 131072 18 - #define FS_CPU_BUFFER_SIZE_DEFAULT 8192 19 - #define FS_BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ 17 + #define BUFFER_SIZE_DEFAULT 131072 18 + #define CPU_BUFFER_SIZE_DEFAULT 8192 19 + #define BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ 20 20 21 - unsigned long fs_buffer_size; 22 - unsigned long fs_cpu_buffer_size; 23 - unsigned long fs_buffer_watershed; 21 + unsigned long oprofile_buffer_size; 22 + unsigned long oprofile_cpu_buffer_size; 23 + unsigned long oprofile_buffer_watershed; 24 24 25 25 static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) 26 26 { 27 - return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); 27 + return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, 28 + offset); 28 29 } 29 30 30 31 ··· 126 125 void oprofile_create_files(struct super_block *sb, struct dentry *root) 127 126 { 128 127 /* reinitialize default values */ 129 - fs_buffer_size = FS_BUFFER_SIZE_DEFAULT; 130 - fs_cpu_buffer_size = FS_CPU_BUFFER_SIZE_DEFAULT; 131 - fs_buffer_watershed = FS_BUFFER_WATERSHED_DEFAULT; 128 + oprofile_buffer_size = BUFFER_SIZE_DEFAULT; 129 + oprofile_cpu_buffer_size = CPU_BUFFER_SIZE_DEFAULT; 130 + oprofile_buffer_watershed = BUFFER_WATERSHED_DEFAULT; 132 131 133 132 oprofilefs_create_file(sb, root, "enable", &enable_fops); 134 133 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); 135 134 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); 136 - oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); 137 - oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); 138 - oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size); 135 + oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size); 136 + oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed); 137 + oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &oprofile_cpu_buffer_size); 139 138 oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); 140 139 oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); 141 140 oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops);

+18

include/linux/oprofile.h

··· 164 164 unsigned long oprofile_get_cpu_buffer_size(void); 165 165 void oprofile_cpu_buffer_inc_smpl_lost(void); 166 166 167 + /* cpu buffer functions */ 168 + 169 + struct op_sample; 170 + 171 + struct op_entry { 172 + struct ring_buffer_event *event; 173 + struct op_sample *sample; 174 + unsigned long irq_flags; 175 + unsigned long size; 176 + unsigned long *data; 177 + }; 178 + 179 + void oprofile_write_reserve(struct op_entry *entry, 180 + struct pt_regs * const regs, 181 + unsigned long pc, int code, int size); 182 + int oprofile_add_data(struct op_entry *entry, unsigned long val); 183 + int oprofile_write_commit(struct op_entry *entry); 184 + 167 185 #endif /* OPROFILE_H */

+7 -1

kernel/trace/ring_buffer.c

··· 168 168 */ 169 169 unsigned ring_buffer_event_length(struct ring_buffer_event *event) 170 170 { 171 - return rb_event_length(event); 171 + unsigned length = rb_event_length(event); 172 + if (event->type != RINGBUF_TYPE_DATA) 173 + return length; 174 + length -= RB_EVNT_HDR_SIZE; 175 + if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) 176 + length -= sizeof(event->array[0]); 177 + return length; 172 178 } 173 179 EXPORT_SYMBOL_GPL(ring_buffer_event_length); 174 180