commit ede13d81b4dda409a6d271b34b8e2ec9383e255d · tjh.dev/kernel

+2 -1

arch/powerpc/configs/cell_defconfig

··· 1455 1455 # Instrumentation Support 1456 1456 # 1457 1457 CONFIG_PROFILING=y 1458 - CONFIG_OPROFILE=y 1458 + CONFIG_OPROFILE=m 1459 + CONFIG_OPROFILE_CELL=y 1459 1460 # CONFIG_KPROBES is not set 1460 1461 1461 1462 #

+67

arch/powerpc/kernel/crash.c

··· 219 219 cpus_in_sr = CPU_MASK_NONE; 220 220 } 221 221 #endif 222 + #ifdef CONFIG_SPU_BASE 223 + 224 + #include <asm/spu.h> 225 + #include <asm/spu_priv1.h> 226 + 227 + struct crash_spu_info { 228 + struct spu *spu; 229 + u32 saved_spu_runcntl_RW; 230 + u32 saved_spu_status_R; 231 + u32 saved_spu_npc_RW; 232 + u64 saved_mfc_sr1_RW; 233 + u64 saved_mfc_dar; 234 + u64 saved_mfc_dsisr; 235 + }; 236 + 237 + #define CRASH_NUM_SPUS 16 /* Enough for current hardware */ 238 + static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; 239 + 240 + static void crash_kexec_stop_spus(void) 241 + { 242 + struct spu *spu; 243 + int i; 244 + u64 tmp; 245 + 246 + for (i = 0; i < CRASH_NUM_SPUS; i++) { 247 + if (!crash_spu_info[i].spu) 248 + continue; 249 + 250 + spu = crash_spu_info[i].spu; 251 + 252 + crash_spu_info[i].saved_spu_runcntl_RW = 253 + in_be32(&spu->problem->spu_runcntl_RW); 254 + crash_spu_info[i].saved_spu_status_R = 255 + in_be32(&spu->problem->spu_status_R); 256 + crash_spu_info[i].saved_spu_npc_RW = 257 + in_be32(&spu->problem->spu_npc_RW); 258 + 259 + crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); 260 + crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); 261 + tmp = spu_mfc_sr1_get(spu); 262 + crash_spu_info[i].saved_mfc_sr1_RW = tmp; 263 + 264 + tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; 265 + spu_mfc_sr1_set(spu, tmp); 266 + 267 + __delay(200); 268 + } 269 + } 270 + 271 + void crash_register_spus(struct list_head *list) 272 + { 273 + struct spu *spu; 274 + 275 + list_for_each_entry(spu, list, full_list) { 276 + if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) 277 + continue; 278 + 279 + crash_spu_info[spu->number].spu = spu; 280 + } 281 + } 282 + 283 + #else 284 + static inline void crash_kexec_stop_spus(void) 285 + { 286 + } 287 + #endif /* CONFIG_SPU_BASE */ 222 288 223 289 void default_machine_crash_shutdown(struct pt_regs *regs) 224 290 { ··· 320 254 crash_save_cpu(regs, crashing_cpu); 321 255 crash_kexec_prepare_cpus(crashing_cpu); 322 256 cpu_set(crashing_cpu, cpus_in_crash); 257 + crash_kexec_stop_spus(); 323 258 if (ppc_md.kexec_cpu_down) 324 259 ppc_md.kexec_cpu_down(1, 0); 325 260 }

+1

arch/powerpc/kernel/time.c

··· 122 122 static long timezone_offset; 123 123 124 124 unsigned long ppc_proc_freq; 125 + EXPORT_SYMBOL(ppc_proc_freq); 125 126 unsigned long ppc_tb_freq; 126 127 127 128 static u64 tb_last_jiffy __cacheline_aligned_in_smp;

+7

arch/powerpc/oprofile/Kconfig

··· 15 15 16 16 If unsure, say N. 17 17 18 + config OPROFILE_CELL 19 + bool "OProfile for Cell Broadband Engine" 20 + depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m) 21 + default y 22 + help 23 + Profiling of Cell BE SPUs requires special support enabled 24 + by this option.

+3 -1

arch/powerpc/oprofile/Makefile

··· 11 11 timer_int.o ) 12 12 13 13 oprofile-y := $(DRIVER_OBJS) common.o backtrace.o 14 - oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o 14 + oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ 15 + cell/spu_profiler.o cell/vma_map.o \ 16 + cell/spu_task_sync.o 15 17 oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o 16 18 oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o 17 19 oprofile-$(CONFIG_6xx) += op_model_7450.o

+97

arch/powerpc/oprofile/cell/pr_util.h

··· 1 + /* 2 + * Cell Broadband Engine OProfile Support 3 + * 4 + * (C) Copyright IBM Corporation 2006 5 + * 6 + * Author: Maynard Johnson <maynardj@us.ibm.com> 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + */ 13 + 14 + #ifndef PR_UTIL_H 15 + #define PR_UTIL_H 16 + 17 + #include <linux/cpumask.h> 18 + #include <linux/oprofile.h> 19 + #include <asm/cell-pmu.h> 20 + #include <asm/spu.h> 21 + 22 + #include "../../platforms/cell/cbe_regs.h" 23 + 24 + /* Defines used for sync_start */ 25 + #define SKIP_GENERIC_SYNC 0 26 + #define SYNC_START_ERROR -1 27 + #define DO_GENERIC_SYNC 1 28 + 29 + struct spu_overlay_info { /* map of sections within an SPU overlay */ 30 + unsigned int vma; /* SPU virtual memory address from elf */ 31 + unsigned int size; /* size of section from elf */ 32 + unsigned int offset; /* offset of section into elf file */ 33 + unsigned int buf; 34 + }; 35 + 36 + struct vma_to_fileoffset_map { /* map of sections within an SPU program */ 37 + struct vma_to_fileoffset_map *next; /* list pointer */ 38 + unsigned int vma; /* SPU virtual memory address from elf */ 39 + unsigned int size; /* size of section from elf */ 40 + unsigned int offset; /* offset of section into elf file */ 41 + unsigned int guard_ptr; 42 + unsigned int guard_val; 43 + /* 44 + * The guard pointer is an entry in the _ovly_buf_table, 45 + * computed using ovly.buf as the index into the table. Since 46 + * ovly.buf values begin at '1' to reference the first (or 0th) 47 + * entry in the _ovly_buf_table, the computation subtracts 1 48 + * from ovly.buf. 49 + * The guard value is stored in the _ovly_buf_table entry and 50 + * is an index (starting at 1) back to the _ovly_table entry 51 + * that is pointing at this _ovly_buf_table entry. So, for 52 + * example, for an overlay scenario with one overlay segment 53 + * and two overlay sections: 54 + * - Section 1 points to the first entry of the 55 + * _ovly_buf_table, which contains a guard value 56 + * of '1', referencing the first (index=0) entry of 57 + * _ovly_table. 58 + * - Section 2 points to the second entry of the 59 + * _ovly_buf_table, which contains a guard value 60 + * of '2', referencing the second (index=1) entry of 61 + * _ovly_table. 62 + */ 63 + 64 + }; 65 + 66 + /* The three functions below are for maintaining and accessing 67 + * the vma-to-fileoffset map. 68 + */ 69 + struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, 70 + u64 objectid); 71 + unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, 72 + unsigned int vma, const struct spu *aSpu, 73 + int *grd_val); 74 + void vma_map_free(struct vma_to_fileoffset_map *map); 75 + 76 + /* 77 + * Entry point for SPU profiling. 78 + * cycles_reset is the SPU_CYCLES count value specified by the user. 79 + */ 80 + int start_spu_profiling(unsigned int cycles_reset); 81 + 82 + void stop_spu_profiling(void); 83 + 84 + 85 + /* add the necessary profiling hooks */ 86 + int spu_sync_start(void); 87 + 88 + /* remove the hooks */ 89 + int spu_sync_stop(void); 90 + 91 + /* Record SPU program counter samples to the oprofile event buffer. */ 92 + void spu_sync_buffer(int spu_num, unsigned int *samples, 93 + int num_samples); 94 + 95 + void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); 96 + 97 + #endif /* PR_UTIL_H */

+221

arch/powerpc/oprofile/cell/spu_profiler.c

··· 1 + /* 2 + * Cell Broadband Engine OProfile Support 3 + * 4 + * (C) Copyright IBM Corporation 2006 5 + * 6 + * Authors: Maynard Johnson <maynardj@us.ibm.com> 7 + * Carl Love <carll@us.ibm.com> 8 + * 9 + * This program is free software; you can redistribute it and/or 10 + * modify it under the terms of the GNU General Public License 11 + * as published by the Free Software Foundation; either version 12 + * 2 of the License, or (at your option) any later version. 13 + */ 14 + 15 + #include <linux/hrtimer.h> 16 + #include <linux/smp.h> 17 + #include <linux/slab.h> 18 + #include <asm/cell-pmu.h> 19 + #include "pr_util.h" 20 + 21 + #define TRACE_ARRAY_SIZE 1024 22 + #define SCALE_SHIFT 14 23 + 24 + static u32 *samples; 25 + 26 + static int spu_prof_running; 27 + static unsigned int profiling_interval; 28 + 29 + #define NUM_SPU_BITS_TRBUF 16 30 + #define SPUS_PER_TB_ENTRY 4 31 + #define SPUS_PER_NODE 8 32 + 33 + #define SPU_PC_MASK 0xFFFF 34 + 35 + static DEFINE_SPINLOCK(sample_array_lock); 36 + unsigned long sample_array_lock_flags; 37 + 38 + void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) 39 + { 40 + unsigned long ns_per_cyc; 41 + 42 + if (!freq_khz) 43 + freq_khz = ppc_proc_freq/1000; 44 + 45 + /* To calculate a timeout in nanoseconds, the basic 46 + * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). 47 + * To avoid floating point math, we use the scale math 48 + * technique as described in linux/jiffies.h. We use 49 + * a scale factor of SCALE_SHIFT, which provides 4 decimal places 50 + * of precision. This is close enough for the purpose at hand. 51 + * 52 + * The value of the timeout should be small enough that the hw 53 + * trace buffer will not get more then about 1/3 full for the 54 + * maximum user specified (the LFSR value) hw sampling frequency. 55 + * This is to ensure the trace buffer will never fill even if the 56 + * kernel thread scheduling varies under a heavy system load. 57 + */ 58 + 59 + ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; 60 + profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; 61 + 62 + } 63 + 64 + /* 65 + * Extract SPU PC from trace buffer entry 66 + */ 67 + static void spu_pc_extract(int cpu, int entry) 68 + { 69 + /* the trace buffer is 128 bits */ 70 + u64 trace_buffer[2]; 71 + u64 spu_mask; 72 + int spu; 73 + 74 + spu_mask = SPU_PC_MASK; 75 + 76 + /* Each SPU PC is 16 bits; hence, four spus in each of 77 + * the two 64-bit buffer entries that make up the 78 + * 128-bit trace_buffer entry. Process two 64-bit values 79 + * simultaneously. 80 + * trace[0] SPU PC contents are: 0 1 2 3 81 + * trace[1] SPU PC contents are: 4 5 6 7 82 + */ 83 + 84 + cbe_read_trace_buffer(cpu, trace_buffer); 85 + 86 + for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { 87 + /* spu PC trace entry is upper 16 bits of the 88 + * 18 bit SPU program counter 89 + */ 90 + samples[spu * TRACE_ARRAY_SIZE + entry] 91 + = (spu_mask & trace_buffer[0]) << 2; 92 + samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] 93 + = (spu_mask & trace_buffer[1]) << 2; 94 + 95 + trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; 96 + trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; 97 + } 98 + } 99 + 100 + static int cell_spu_pc_collection(int cpu) 101 + { 102 + u32 trace_addr; 103 + int entry; 104 + 105 + /* process the collected SPU PC for the node */ 106 + 107 + entry = 0; 108 + 109 + trace_addr = cbe_read_pm(cpu, trace_address); 110 + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { 111 + /* there is data in the trace buffer to process */ 112 + spu_pc_extract(cpu, entry); 113 + 114 + entry++; 115 + 116 + if (entry >= TRACE_ARRAY_SIZE) 117 + /* spu_samples is full */ 118 + break; 119 + 120 + trace_addr = cbe_read_pm(cpu, trace_address); 121 + } 122 + 123 + return entry; 124 + } 125 + 126 + 127 + static enum hrtimer_restart profile_spus(struct hrtimer *timer) 128 + { 129 + ktime_t kt; 130 + int cpu, node, k, num_samples, spu_num; 131 + 132 + if (!spu_prof_running) 133 + goto stop; 134 + 135 + for_each_online_cpu(cpu) { 136 + if (cbe_get_hw_thread_id(cpu)) 137 + continue; 138 + 139 + node = cbe_cpu_to_node(cpu); 140 + 141 + /* There should only be one kernel thread at a time processing 142 + * the samples. In the very unlikely case that the processing 143 + * is taking a very long time and multiple kernel threads are 144 + * started to process the samples. Make sure only one kernel 145 + * thread is working on the samples array at a time. The 146 + * sample array must be loaded and then processed for a given 147 + * cpu. The sample array is not per cpu. 148 + */ 149 + spin_lock_irqsave(&sample_array_lock, 150 + sample_array_lock_flags); 151 + num_samples = cell_spu_pc_collection(cpu); 152 + 153 + if (num_samples == 0) { 154 + spin_unlock_irqrestore(&sample_array_lock, 155 + sample_array_lock_flags); 156 + continue; 157 + } 158 + 159 + for (k = 0; k < SPUS_PER_NODE; k++) { 160 + spu_num = k + (node * SPUS_PER_NODE); 161 + spu_sync_buffer(spu_num, 162 + samples + (k * TRACE_ARRAY_SIZE), 163 + num_samples); 164 + } 165 + 166 + spin_unlock_irqrestore(&sample_array_lock, 167 + sample_array_lock_flags); 168 + 169 + } 170 + smp_wmb(); /* insure spu event buffer updates are written */ 171 + /* don't want events intermingled... */ 172 + 173 + kt = ktime_set(0, profiling_interval); 174 + if (!spu_prof_running) 175 + goto stop; 176 + hrtimer_forward(timer, timer->base->get_time(), kt); 177 + return HRTIMER_RESTART; 178 + 179 + stop: 180 + printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); 181 + return HRTIMER_NORESTART; 182 + } 183 + 184 + static struct hrtimer timer; 185 + /* 186 + * Entry point for SPU profiling. 187 + * NOTE: SPU profiling is done system-wide, not per-CPU. 188 + * 189 + * cycles_reset is the count value specified by the user when 190 + * setting up OProfile to count SPU_CYCLES. 191 + */ 192 + int start_spu_profiling(unsigned int cycles_reset) 193 + { 194 + ktime_t kt; 195 + 196 + pr_debug("timer resolution: %lu\n", TICK_NSEC); 197 + kt = ktime_set(0, profiling_interval); 198 + hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 199 + timer.expires = kt; 200 + timer.function = profile_spus; 201 + 202 + /* Allocate arrays for collecting SPU PC samples */ 203 + samples = kzalloc(SPUS_PER_NODE * 204 + TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL); 205 + 206 + if (!samples) 207 + return -ENOMEM; 208 + 209 + spu_prof_running = 1; 210 + hrtimer_start(&timer, kt, HRTIMER_MODE_REL); 211 + 212 + return 0; 213 + } 214 + 215 + void stop_spu_profiling(void) 216 + { 217 + spu_prof_running = 0; 218 + hrtimer_cancel(&timer); 219 + kfree(samples); 220 + pr_debug("SPU_PROF: stop_spu_profiling issued\n"); 221 + }

+484

arch/powerpc/oprofile/cell/spu_task_sync.c

··· 1 + /* 2 + * Cell Broadband Engine OProfile Support 3 + * 4 + * (C) Copyright IBM Corporation 2006 5 + * 6 + * Author: Maynard Johnson <maynardj@us.ibm.com> 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + */ 13 + 14 + /* The purpose of this file is to handle SPU event task switching 15 + * and to record SPU context information into the OProfile 16 + * event buffer. 17 + * 18 + * Additionally, the spu_sync_buffer function is provided as a helper 19 + * for recoding actual SPU program counter samples to the event buffer. 20 + */ 21 + #include <linux/dcookies.h> 22 + #include <linux/kref.h> 23 + #include <linux/mm.h> 24 + #include <linux/module.h> 25 + #include <linux/notifier.h> 26 + #include <linux/numa.h> 27 + #include <linux/oprofile.h> 28 + #include <linux/spinlock.h> 29 + #include "pr_util.h" 30 + 31 + #define RELEASE_ALL 9999 32 + 33 + static DEFINE_SPINLOCK(buffer_lock); 34 + static DEFINE_SPINLOCK(cache_lock); 35 + static int num_spu_nodes; 36 + int spu_prof_num_nodes; 37 + int last_guard_val[MAX_NUMNODES * 8]; 38 + 39 + /* Container for caching information about an active SPU task. */ 40 + struct cached_info { 41 + struct vma_to_fileoffset_map *map; 42 + struct spu *the_spu; /* needed to access pointer to local_store */ 43 + struct kref cache_ref; 44 + }; 45 + 46 + static struct cached_info *spu_info[MAX_NUMNODES * 8]; 47 + 48 + static void destroy_cached_info(struct kref *kref) 49 + { 50 + struct cached_info *info; 51 + 52 + info = container_of(kref, struct cached_info, cache_ref); 53 + vma_map_free(info->map); 54 + kfree(info); 55 + module_put(THIS_MODULE); 56 + } 57 + 58 + /* Return the cached_info for the passed SPU number. 59 + * ATTENTION: Callers are responsible for obtaining the 60 + * cache_lock if needed prior to invoking this function. 61 + */ 62 + static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) 63 + { 64 + struct kref *ref; 65 + struct cached_info *ret_info; 66 + 67 + if (spu_num >= num_spu_nodes) { 68 + printk(KERN_ERR "SPU_PROF: " 69 + "%s, line %d: Invalid index %d into spu info cache\n", 70 + __FUNCTION__, __LINE__, spu_num); 71 + ret_info = NULL; 72 + goto out; 73 + } 74 + if (!spu_info[spu_num] && the_spu) { 75 + ref = spu_get_profile_private_kref(the_spu->ctx); 76 + if (ref) { 77 + spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); 78 + kref_get(&spu_info[spu_num]->cache_ref); 79 + } 80 + } 81 + 82 + ret_info = spu_info[spu_num]; 83 + out: 84 + return ret_info; 85 + } 86 + 87 + 88 + /* Looks for cached info for the passed spu. If not found, the 89 + * cached info is created for the passed spu. 90 + * Returns 0 for success; otherwise, -1 for error. 91 + */ 92 + static int 93 + prepare_cached_spu_info(struct spu *spu, unsigned long objectId) 94 + { 95 + unsigned long flags; 96 + struct vma_to_fileoffset_map *new_map; 97 + int retval = 0; 98 + struct cached_info *info; 99 + 100 + /* We won't bother getting cache_lock here since 101 + * don't do anything with the cached_info that's returned. 102 + */ 103 + info = get_cached_info(spu, spu->number); 104 + 105 + if (info) { 106 + pr_debug("Found cached SPU info.\n"); 107 + goto out; 108 + } 109 + 110 + /* Create cached_info and set spu_info[spu->number] to point to it. 111 + * spu->number is a system-wide value, not a per-node value. 112 + */ 113 + info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); 114 + if (!info) { 115 + printk(KERN_ERR "SPU_PROF: " 116 + "%s, line %d: create vma_map failed\n", 117 + __FUNCTION__, __LINE__); 118 + retval = -ENOMEM; 119 + goto err_alloc; 120 + } 121 + new_map = create_vma_map(spu, objectId); 122 + if (!new_map) { 123 + printk(KERN_ERR "SPU_PROF: " 124 + "%s, line %d: create vma_map failed\n", 125 + __FUNCTION__, __LINE__); 126 + retval = -ENOMEM; 127 + goto err_alloc; 128 + } 129 + 130 + pr_debug("Created vma_map\n"); 131 + info->map = new_map; 132 + info->the_spu = spu; 133 + kref_init(&info->cache_ref); 134 + spin_lock_irqsave(&cache_lock, flags); 135 + spu_info[spu->number] = info; 136 + /* Increment count before passing off ref to SPUFS. */ 137 + kref_get(&info->cache_ref); 138 + 139 + /* We increment the module refcount here since SPUFS is 140 + * responsible for the final destruction of the cached_info, 141 + * and it must be able to access the destroy_cached_info() 142 + * function defined in the OProfile module. We decrement 143 + * the module refcount in destroy_cached_info. 144 + */ 145 + try_module_get(THIS_MODULE); 146 + spu_set_profile_private_kref(spu->ctx, &info->cache_ref, 147 + destroy_cached_info); 148 + spin_unlock_irqrestore(&cache_lock, flags); 149 + goto out; 150 + 151 + err_alloc: 152 + kfree(info); 153 + out: 154 + return retval; 155 + } 156 + 157 + /* 158 + * NOTE: The caller is responsible for locking the 159 + * cache_lock prior to calling this function. 160 + */ 161 + static int release_cached_info(int spu_index) 162 + { 163 + int index, end; 164 + 165 + if (spu_index == RELEASE_ALL) { 166 + end = num_spu_nodes; 167 + index = 0; 168 + } else { 169 + if (spu_index >= num_spu_nodes) { 170 + printk(KERN_ERR "SPU_PROF: " 171 + "%s, line %d: " 172 + "Invalid index %d into spu info cache\n", 173 + __FUNCTION__, __LINE__, spu_index); 174 + goto out; 175 + } 176 + end = spu_index + 1; 177 + index = spu_index; 178 + } 179 + for (; index < end; index++) { 180 + if (spu_info[index]) { 181 + kref_put(&spu_info[index]->cache_ref, 182 + destroy_cached_info); 183 + spu_info[index] = NULL; 184 + } 185 + } 186 + 187 + out: 188 + return 0; 189 + } 190 + 191 + /* The source code for fast_get_dcookie was "borrowed" 192 + * from drivers/oprofile/buffer_sync.c. 193 + */ 194 + 195 + /* Optimisation. We can manage without taking the dcookie sem 196 + * because we cannot reach this code without at least one 197 + * dcookie user still being registered (namely, the reader 198 + * of the event buffer). 199 + */ 200 + static inline unsigned long fast_get_dcookie(struct dentry *dentry, 201 + struct vfsmount *vfsmnt) 202 + { 203 + unsigned long cookie; 204 + 205 + if (dentry->d_cookie) 206 + return (unsigned long)dentry; 207 + get_dcookie(dentry, vfsmnt, &cookie); 208 + return cookie; 209 + } 210 + 211 + /* Look up the dcookie for the task's first VM_EXECUTABLE mapping, 212 + * which corresponds loosely to "application name". Also, determine 213 + * the offset for the SPU ELF object. If computed offset is 214 + * non-zero, it implies an embedded SPU object; otherwise, it's a 215 + * separate SPU binary, in which case we retrieve it's dcookie. 216 + * For the embedded case, we must determine if SPU ELF is embedded 217 + * in the executable application or another file (i.e., shared lib). 218 + * If embedded in a shared lib, we must get the dcookie and return 219 + * that to the caller. 220 + */ 221 + static unsigned long 222 + get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, 223 + unsigned long *spu_bin_dcookie, 224 + unsigned long spu_ref) 225 + { 226 + unsigned long app_cookie = 0; 227 + unsigned int my_offset = 0; 228 + struct file *app = NULL; 229 + struct vm_area_struct *vma; 230 + struct mm_struct *mm = spu->mm; 231 + 232 + if (!mm) 233 + goto out; 234 + 235 + down_read(&mm->mmap_sem); 236 + 237 + for (vma = mm->mmap; vma; vma = vma->vm_next) { 238 + if (!vma->vm_file) 239 + continue; 240 + if (!(vma->vm_flags & VM_EXECUTABLE)) 241 + continue; 242 + app_cookie = fast_get_dcookie(vma->vm_file->f_dentry, 243 + vma->vm_file->f_vfsmnt); 244 + pr_debug("got dcookie for %s\n", 245 + vma->vm_file->f_dentry->d_name.name); 246 + app = vma->vm_file; 247 + break; 248 + } 249 + 250 + for (vma = mm->mmap; vma; vma = vma->vm_next) { 251 + if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) 252 + continue; 253 + my_offset = spu_ref - vma->vm_start; 254 + if (!vma->vm_file) 255 + goto fail_no_image_cookie; 256 + 257 + pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n", 258 + my_offset, spu_ref, 259 + vma->vm_file->f_dentry->d_name.name); 260 + *offsetp = my_offset; 261 + break; 262 + } 263 + 264 + *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry, 265 + vma->vm_file->f_vfsmnt); 266 + pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name); 267 + 268 + up_read(&mm->mmap_sem); 269 + 270 + out: 271 + return app_cookie; 272 + 273 + fail_no_image_cookie: 274 + up_read(&mm->mmap_sem); 275 + 276 + printk(KERN_ERR "SPU_PROF: " 277 + "%s, line %d: Cannot find dcookie for SPU binary\n", 278 + __FUNCTION__, __LINE__); 279 + goto out; 280 + } 281 + 282 + 283 + 284 + /* This function finds or creates cached context information for the 285 + * passed SPU and records SPU context information into the OProfile 286 + * event buffer. 287 + */ 288 + static int process_context_switch(struct spu *spu, unsigned long objectId) 289 + { 290 + unsigned long flags; 291 + int retval; 292 + unsigned int offset = 0; 293 + unsigned long spu_cookie = 0, app_dcookie; 294 + 295 + retval = prepare_cached_spu_info(spu, objectId); 296 + if (retval) 297 + goto out; 298 + 299 + /* Get dcookie first because a mutex_lock is taken in that 300 + * code path, so interrupts must not be disabled. 301 + */ 302 + app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId); 303 + if (!app_dcookie || !spu_cookie) { 304 + retval = -ENOENT; 305 + goto out; 306 + } 307 + 308 + /* Record context info in event buffer */ 309 + spin_lock_irqsave(&buffer_lock, flags); 310 + add_event_entry(ESCAPE_CODE); 311 + add_event_entry(SPU_CTX_SWITCH_CODE); 312 + add_event_entry(spu->number); 313 + add_event_entry(spu->pid); 314 + add_event_entry(spu->tgid); 315 + add_event_entry(app_dcookie); 316 + add_event_entry(spu_cookie); 317 + add_event_entry(offset); 318 + spin_unlock_irqrestore(&buffer_lock, flags); 319 + smp_wmb(); /* insure spu event buffer updates are written */ 320 + /* don't want entries intermingled... */ 321 + out: 322 + return retval; 323 + } 324 + 325 + /* 326 + * This function is invoked on either a bind_context or unbind_context. 327 + * If called for an unbind_context, the val arg is 0; otherwise, 328 + * it is the object-id value for the spu context. 329 + * The data arg is of type 'struct spu *'. 330 + */ 331 + static int spu_active_notify(struct notifier_block *self, unsigned long val, 332 + void *data) 333 + { 334 + int retval; 335 + unsigned long flags; 336 + struct spu *the_spu = data; 337 + 338 + pr_debug("SPU event notification arrived\n"); 339 + if (!val) { 340 + spin_lock_irqsave(&cache_lock, flags); 341 + retval = release_cached_info(the_spu->number); 342 + spin_unlock_irqrestore(&cache_lock, flags); 343 + } else { 344 + retval = process_context_switch(the_spu, val); 345 + } 346 + return retval; 347 + } 348 + 349 + static struct notifier_block spu_active = { 350 + .notifier_call = spu_active_notify, 351 + }; 352 + 353 + static int number_of_online_nodes(void) 354 + { 355 + u32 cpu; u32 tmp; 356 + int nodes = 0; 357 + for_each_online_cpu(cpu) { 358 + tmp = cbe_cpu_to_node(cpu) + 1; 359 + if (tmp > nodes) 360 + nodes++; 361 + } 362 + return nodes; 363 + } 364 + 365 + /* The main purpose of this function is to synchronize 366 + * OProfile with SPUFS by registering to be notified of 367 + * SPU task switches. 368 + * 369 + * NOTE: When profiling SPUs, we must ensure that only 370 + * spu_sync_start is invoked and not the generic sync_start 371 + * in drivers/oprofile/oprof.c. A return value of 372 + * SKIP_GENERIC_SYNC or SYNC_START_ERROR will 373 + * accomplish this. 374 + */ 375 + int spu_sync_start(void) 376 + { 377 + int k; 378 + int ret = SKIP_GENERIC_SYNC; 379 + int register_ret; 380 + unsigned long flags = 0; 381 + 382 + spu_prof_num_nodes = number_of_online_nodes(); 383 + num_spu_nodes = spu_prof_num_nodes * 8; 384 + 385 + spin_lock_irqsave(&buffer_lock, flags); 386 + add_event_entry(ESCAPE_CODE); 387 + add_event_entry(SPU_PROFILING_CODE); 388 + add_event_entry(num_spu_nodes); 389 + spin_unlock_irqrestore(&buffer_lock, flags); 390 + 391 + /* Register for SPU events */ 392 + register_ret = spu_switch_event_register(&spu_active); 393 + if (register_ret) { 394 + ret = SYNC_START_ERROR; 395 + goto out; 396 + } 397 + 398 + for (k = 0; k < (MAX_NUMNODES * 8); k++) 399 + last_guard_val[k] = 0; 400 + pr_debug("spu_sync_start -- running.\n"); 401 + out: 402 + return ret; 403 + } 404 + 405 + /* Record SPU program counter samples to the oprofile event buffer. */ 406 + void spu_sync_buffer(int spu_num, unsigned int *samples, 407 + int num_samples) 408 + { 409 + unsigned long long file_offset; 410 + unsigned long flags; 411 + int i; 412 + struct vma_to_fileoffset_map *map; 413 + struct spu *the_spu; 414 + unsigned long long spu_num_ll = spu_num; 415 + unsigned long long spu_num_shifted = spu_num_ll << 32; 416 + struct cached_info *c_info; 417 + 418 + /* We need to obtain the cache_lock here because it's 419 + * possible that after getting the cached_info, the SPU job 420 + * corresponding to this cached_info may end, thus resulting 421 + * in the destruction of the cached_info. 422 + */ 423 + spin_lock_irqsave(&cache_lock, flags); 424 + c_info = get_cached_info(NULL, spu_num); 425 + if (!c_info) { 426 + /* This legitimately happens when the SPU task ends before all 427 + * samples are recorded. 428 + * No big deal -- so we just drop a few samples. 429 + */ 430 + pr_debug("SPU_PROF: No cached SPU contex " 431 + "for SPU #%d. Dropping samples.\n", spu_num); 432 + goto out; 433 + } 434 + 435 + map = c_info->map; 436 + the_spu = c_info->the_spu; 437 + spin_lock(&buffer_lock); 438 + for (i = 0; i < num_samples; i++) { 439 + unsigned int sample = *(samples+i); 440 + int grd_val = 0; 441 + file_offset = 0; 442 + if (sample == 0) 443 + continue; 444 + file_offset = vma_map_lookup( map, sample, the_spu, &grd_val); 445 + 446 + /* If overlays are used by this SPU application, the guard 447 + * value is non-zero, indicating which overlay section is in 448 + * use. We need to discard samples taken during the time 449 + * period which an overlay occurs (i.e., guard value changes). 450 + */ 451 + if (grd_val && grd_val != last_guard_val[spu_num]) { 452 + last_guard_val[spu_num] = grd_val; 453 + /* Drop the rest of the samples. */ 454 + break; 455 + } 456 + 457 + add_event_entry(file_offset | spu_num_shifted); 458 + } 459 + spin_unlock(&buffer_lock); 460 + out: 461 + spin_unlock_irqrestore(&cache_lock, flags); 462 + } 463 + 464 + 465 + int spu_sync_stop(void) 466 + { 467 + unsigned long flags = 0; 468 + int ret = spu_switch_event_unregister(&spu_active); 469 + if (ret) { 470 + printk(KERN_ERR "SPU_PROF: " 471 + "%s, line %d: spu_switch_event_unregister returned %d\n", 472 + __FUNCTION__, __LINE__, ret); 473 + goto out; 474 + } 475 + 476 + spin_lock_irqsave(&cache_lock, flags); 477 + ret = release_cached_info(RELEASE_ALL); 478 + spin_unlock_irqrestore(&cache_lock, flags); 479 + out: 480 + pr_debug("spu_sync_stop -- done.\n"); 481 + return ret; 482 + } 483 + 484 +

+287

arch/powerpc/oprofile/cell/vma_map.c

··· 1 + /* 2 + * Cell Broadband Engine OProfile Support 3 + * 4 + * (C) Copyright IBM Corporation 2006 5 + * 6 + * Author: Maynard Johnson <maynardj@us.ibm.com> 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + */ 13 + 14 + /* The code in this source file is responsible for generating 15 + * vma-to-fileOffset maps for both overlay and non-overlay SPU 16 + * applications. 17 + */ 18 + 19 + #include <linux/mm.h> 20 + #include <linux/string.h> 21 + #include <linux/uaccess.h> 22 + #include <linux/elf.h> 23 + #include "pr_util.h" 24 + 25 + 26 + void vma_map_free(struct vma_to_fileoffset_map *map) 27 + { 28 + while (map) { 29 + struct vma_to_fileoffset_map *next = map->next; 30 + kfree(map); 31 + map = next; 32 + } 33 + } 34 + 35 + unsigned int 36 + vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma, 37 + const struct spu *aSpu, int *grd_val) 38 + { 39 + /* 40 + * Default the offset to the physical address + a flag value. 41 + * Addresses of dynamically generated code can't be found in the vma 42 + * map. For those addresses the flagged value will be sent on to 43 + * the user space tools so they can be reported rather than just 44 + * thrown away. 45 + */ 46 + u32 offset = 0x10000000 + vma; 47 + u32 ovly_grd; 48 + 49 + for (; map; map = map->next) { 50 + if (vma < map->vma || vma >= map->vma + map->size) 51 + continue; 52 + 53 + if (map->guard_ptr) { 54 + ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr); 55 + if (ovly_grd != map->guard_val) 56 + continue; 57 + *grd_val = ovly_grd; 58 + } 59 + offset = vma - map->vma + map->offset; 60 + break; 61 + } 62 + 63 + return offset; 64 + } 65 + 66 + static struct vma_to_fileoffset_map * 67 + vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma, 68 + unsigned int size, unsigned int offset, unsigned int guard_ptr, 69 + unsigned int guard_val) 70 + { 71 + struct vma_to_fileoffset_map *new = 72 + kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); 73 + if (!new) { 74 + printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", 75 + __FUNCTION__, __LINE__); 76 + vma_map_free(map); 77 + return NULL; 78 + } 79 + 80 + new->next = map; 81 + new->vma = vma; 82 + new->size = size; 83 + new->offset = offset; 84 + new->guard_ptr = guard_ptr; 85 + new->guard_val = guard_val; 86 + 87 + return new; 88 + } 89 + 90 + 91 + /* Parse SPE ELF header and generate a list of vma_maps. 92 + * A pointer to the first vma_map in the generated list 93 + * of vma_maps is returned. */ 94 + struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, 95 + unsigned long spu_elf_start) 96 + { 97 + static const unsigned char expected[EI_PAD] = { 98 + [EI_MAG0] = ELFMAG0, 99 + [EI_MAG1] = ELFMAG1, 100 + [EI_MAG2] = ELFMAG2, 101 + [EI_MAG3] = ELFMAG3, 102 + [EI_CLASS] = ELFCLASS32, 103 + [EI_DATA] = ELFDATA2MSB, 104 + [EI_VERSION] = EV_CURRENT, 105 + [EI_OSABI] = ELFOSABI_NONE 106 + }; 107 + 108 + int grd_val; 109 + struct vma_to_fileoffset_map *map = NULL; 110 + struct spu_overlay_info ovly; 111 + unsigned int overlay_tbl_offset = -1; 112 + unsigned long phdr_start, shdr_start; 113 + Elf32_Ehdr ehdr; 114 + Elf32_Phdr phdr; 115 + Elf32_Shdr shdr, shdr_str; 116 + Elf32_Sym sym; 117 + int i, j; 118 + char name[32]; 119 + 120 + unsigned int ovly_table_sym = 0; 121 + unsigned int ovly_buf_table_sym = 0; 122 + unsigned int ovly_table_end_sym = 0; 123 + unsigned int ovly_buf_table_end_sym = 0; 124 + unsigned long ovly_table; 125 + unsigned int n_ovlys; 126 + 127 + /* Get and validate ELF header. */ 128 + 129 + if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr))) 130 + goto fail; 131 + 132 + if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) { 133 + printk(KERN_ERR "SPU_PROF: " 134 + "%s, line %d: Unexpected e_ident parsing SPU ELF\n", 135 + __FUNCTION__, __LINE__); 136 + goto fail; 137 + } 138 + if (ehdr.e_machine != EM_SPU) { 139 + printk(KERN_ERR "SPU_PROF: " 140 + "%s, line %d: Unexpected e_machine parsing SPU ELF\n", 141 + __FUNCTION__, __LINE__); 142 + goto fail; 143 + } 144 + if (ehdr.e_type != ET_EXEC) { 145 + printk(KERN_ERR "SPU_PROF: " 146 + "%s, line %d: Unexpected e_type parsing SPU ELF\n", 147 + __FUNCTION__, __LINE__); 148 + goto fail; 149 + } 150 + phdr_start = spu_elf_start + ehdr.e_phoff; 151 + shdr_start = spu_elf_start + ehdr.e_shoff; 152 + 153 + /* Traverse program headers. */ 154 + for (i = 0; i < ehdr.e_phnum; i++) { 155 + if (copy_from_user(&phdr, 156 + (void *) (phdr_start + i * sizeof(phdr)), 157 + sizeof(phdr))) 158 + goto fail; 159 + 160 + if (phdr.p_type != PT_LOAD) 161 + continue; 162 + if (phdr.p_flags & (1 << 27)) 163 + continue; 164 + 165 + map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz, 166 + phdr.p_offset, 0, 0); 167 + if (!map) 168 + goto fail; 169 + } 170 + 171 + pr_debug("SPU_PROF: Created non-overlay maps\n"); 172 + /* Traverse section table and search for overlay-related symbols. */ 173 + for (i = 0; i < ehdr.e_shnum; i++) { 174 + if (copy_from_user(&shdr, 175 + (void *) (shdr_start + i * sizeof(shdr)), 176 + sizeof(shdr))) 177 + goto fail; 178 + 179 + if (shdr.sh_type != SHT_SYMTAB) 180 + continue; 181 + if (shdr.sh_entsize != sizeof (sym)) 182 + continue; 183 + 184 + if (copy_from_user(&shdr_str, 185 + (void *) (shdr_start + shdr.sh_link * 186 + sizeof(shdr)), 187 + sizeof(shdr))) 188 + goto fail; 189 + 190 + if (shdr_str.sh_type != SHT_STRTAB) 191 + goto fail;; 192 + 193 + for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { 194 + if (copy_from_user(&sym, (void *) (spu_elf_start + 195 + shdr.sh_offset + j * 196 + sizeof (sym)), 197 + sizeof (sym))) 198 + goto fail; 199 + 200 + if (copy_from_user(name, (void *) 201 + (spu_elf_start + shdr_str.sh_offset + 202 + sym.st_name), 203 + 20)) 204 + goto fail; 205 + 206 + if (memcmp(name, "_ovly_table", 12) == 0) 207 + ovly_table_sym = sym.st_value; 208 + if (memcmp(name, "_ovly_buf_table", 16) == 0) 209 + ovly_buf_table_sym = sym.st_value; 210 + if (memcmp(name, "_ovly_table_end", 16) == 0) 211 + ovly_table_end_sym = sym.st_value; 212 + if (memcmp(name, "_ovly_buf_table_end", 20) == 0) 213 + ovly_buf_table_end_sym = sym.st_value; 214 + } 215 + } 216 + 217 + /* If we don't have overlays, we're done. */ 218 + if (ovly_table_sym == 0 || ovly_buf_table_sym == 0 219 + || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) { 220 + pr_debug("SPU_PROF: No overlay table found\n"); 221 + goto out; 222 + } else { 223 + pr_debug("SPU_PROF: Overlay table found\n"); 224 + } 225 + 226 + /* The _ovly_table symbol represents a table with one entry 227 + * per overlay section. The _ovly_buf_table symbol represents 228 + * a table with one entry per overlay region. 229 + * The struct spu_overlay_info gives the structure of the _ovly_table 230 + * entries. The structure of _ovly_table_buf is simply one 231 + * u32 word per entry. 232 + */ 233 + overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym, 234 + aSpu, &grd_val); 235 + if (overlay_tbl_offset < 0) { 236 + printk(KERN_ERR "SPU_PROF: " 237 + "%s, line %d: Error finding SPU overlay table\n", 238 + __FUNCTION__, __LINE__); 239 + goto fail; 240 + } 241 + ovly_table = spu_elf_start + overlay_tbl_offset; 242 + 243 + n_ovlys = (ovly_table_end_sym - 244 + ovly_table_sym) / sizeof (ovly); 245 + 246 + /* Traverse overlay table. */ 247 + for (i = 0; i < n_ovlys; i++) { 248 + if (copy_from_user(&ovly, (void *) 249 + (ovly_table + i * sizeof (ovly)), 250 + sizeof (ovly))) 251 + goto fail; 252 + 253 + /* The ovly.vma/size/offset arguments are analogous to the same 254 + * arguments used above for non-overlay maps. The final two 255 + * args are referred to as the guard pointer and the guard 256 + * value. 257 + * The guard pointer is an entry in the _ovly_buf_table, 258 + * computed using ovly.buf as the index into the table. Since 259 + * ovly.buf values begin at '1' to reference the first (or 0th) 260 + * entry in the _ovly_buf_table, the computation subtracts 1 261 + * from ovly.buf. 262 + * The guard value is stored in the _ovly_buf_table entry and 263 + * is an index (starting at 1) back to the _ovly_table entry 264 + * that is pointing at this _ovly_buf_table entry. So, for 265 + * example, for an overlay scenario with one overlay segment 266 + * and two overlay sections: 267 + * - Section 1 points to the first entry of the 268 + * _ovly_buf_table, which contains a guard value 269 + * of '1', referencing the first (index=0) entry of 270 + * _ovly_table. 271 + * - Section 2 points to the second entry of the 272 + * _ovly_buf_table, which contains a guard value 273 + * of '2', referencing the second (index=1) entry of 274 + * _ovly_table. 275 + */ 276 + map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset, 277 + ovly_buf_table_sym + (ovly.buf-1) * 4, i+1); 278 + if (!map) 279 + goto fail; 280 + } 281 + goto out; 282 + 283 + fail: 284 + map = NULL; 285 + out: 286 + return map; 287 + }

+42 -9

arch/powerpc/oprofile/common.c

··· 29 29 static struct op_counter_config ctr[OP_MAX_COUNTER]; 30 30 static struct op_system_config sys; 31 31 32 + static int op_per_cpu_rc; 33 + 32 34 static void op_handle_interrupt(struct pt_regs *regs) 33 35 { 34 36 model->handle_interrupt(regs, ctr); ··· 38 36 39 37 static void op_powerpc_cpu_setup(void *dummy) 40 38 { 41 - model->cpu_setup(ctr); 39 + int ret; 40 + 41 + ret = model->cpu_setup(ctr); 42 + 43 + if (ret != 0) 44 + op_per_cpu_rc = ret; 42 45 } 43 46 44 47 static int op_powerpc_setup(void) 45 48 { 46 49 int err; 50 + 51 + op_per_cpu_rc = 0; 47 52 48 53 /* Grab the hardware */ 49 54 err = reserve_pmc_hardware(op_handle_interrupt); ··· 58 49 return err; 59 50 60 51 /* Pre-compute the values to stuff in the hardware registers. */ 61 - model->reg_setup(ctr, &sys, model->num_counters); 52 + op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters); 62 53 63 - /* Configure the registers on all cpus. */ 54 + if (op_per_cpu_rc) 55 + goto out; 56 + 57 + /* Configure the registers on all cpus. If an error occurs on one 58 + * of the cpus, op_per_cpu_rc will be set to the error */ 64 59 on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1); 65 60 66 - return 0; 61 + out: if (op_per_cpu_rc) { 62 + /* error on setup release the performance counter hardware */ 63 + release_pmc_hardware(); 64 + } 65 + 66 + return op_per_cpu_rc; 67 67 } 68 68 69 69 static void op_powerpc_shutdown(void) ··· 82 64 83 65 static void op_powerpc_cpu_start(void *dummy) 84 66 { 85 - model->start(ctr); 67 + /* If any of the cpus have return an error, set the 68 + * global flag to the error so it can be returned 69 + * to the generic OProfile caller. 70 + */ 71 + int ret; 72 + 73 + ret = model->start(ctr); 74 + if (ret != 0) 75 + op_per_cpu_rc = ret; 86 76 } 87 77 88 78 static int op_powerpc_start(void) 89 79 { 80 + op_per_cpu_rc = 0; 81 + 90 82 if (model->global_start) 91 - model->global_start(ctr); 92 - if (model->start) 83 + return model->global_start(ctr); 84 + if (model->start) { 93 85 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); 94 - return 0; 86 + return op_per_cpu_rc; 87 + } 88 + return -EIO; /* No start function is defined for this 89 + power architecture */ 95 90 } 96 91 97 92 static inline void op_powerpc_cpu_stop(void *dummy) ··· 178 147 179 148 switch (cur_cpu_spec->oprofile_type) { 180 149 #ifdef CONFIG_PPC64 181 - #ifdef CONFIG_PPC_CELL_NATIVE 150 + #ifdef CONFIG_OPROFILE_CELL 182 151 case PPC_OPROFILE_CELL: 183 152 if (firmware_has_feature(FW_FEATURE_LPAR)) 184 153 return -ENODEV; 185 154 model = &op_model_cell; 155 + ops->sync_start = model->sync_start; 156 + ops->sync_stop = model->sync_stop; 186 157 break; 187 158 #endif 188 159 case PPC_OPROFILE_RS64:

+10 -4

arch/powerpc/oprofile/op_model_7450.c

··· 81 81 82 82 /* Configures the counters on this CPU based on the global 83 83 * settings */ 84 - static void fsl7450_cpu_setup(struct op_counter_config *ctr) 84 + static int fsl7450_cpu_setup(struct op_counter_config *ctr) 85 85 { 86 86 /* freeze all counters */ 87 87 pmc_stop_ctrs(); ··· 89 89 mtspr(SPRN_MMCR0, mmcr0_val); 90 90 mtspr(SPRN_MMCR1, mmcr1_val); 91 91 mtspr(SPRN_MMCR2, mmcr2_val); 92 + 93 + return 0; 92 94 } 93 95 94 96 #define NUM_CTRS 6 95 97 96 98 /* Configures the global settings for the countes on all CPUs. */ 97 - static void fsl7450_reg_setup(struct op_counter_config *ctr, 99 + static int fsl7450_reg_setup(struct op_counter_config *ctr, 98 100 struct op_system_config *sys, 99 101 int num_ctrs) 100 102 { ··· 128 126 | mmcr1_event6(ctr[5].event); 129 127 130 128 mmcr2_val = 0; 129 + 130 + return 0; 131 131 } 132 132 133 133 /* Sets the counters on this CPU to the chosen values, and starts them */ 134 - static void fsl7450_start(struct op_counter_config *ctr) 134 + static int fsl7450_start(struct op_counter_config *ctr) 135 135 { 136 136 int i; 137 137 ··· 152 148 pmc_start_ctrs(); 153 149 154 150 oprofile_running = 1; 151 + 152 + return 0; 155 153 } 156 154 157 155 /* Stop the counters on this CPU */ ··· 199 193 /* The freeze bit was set by the interrupt. */ 200 194 /* Clear the freeze bit, and reenable the interrupt. 201 195 * The counters won't actually start until the rfi clears 202 - * the PMM bit */ 196 + * the PM/M bit */ 203 197 pmc_start_ctrs(); 204 198 } 205 199

+528 -81

arch/powerpc/oprofile/op_model_cell.c

··· 5 5 * 6 6 * Author: David Erb (djerb@us.ibm.com) 7 7 * Modifications: 8 - * Carl Love <carll@us.ibm.com> 9 - * Maynard Johnson <maynardj@us.ibm.com> 8 + * Carl Love <carll@us.ibm.com> 9 + * Maynard Johnson <maynardj@us.ibm.com> 10 10 * 11 11 * This program is free software; you can redistribute it and/or 12 12 * modify it under the terms of the GNU General Public License ··· 38 38 39 39 #include "../platforms/cell/interrupt.h" 40 40 #include "../platforms/cell/cbe_regs.h" 41 + #include "cell/pr_util.h" 42 + 43 + static void cell_global_stop_spu(void); 44 + 45 + /* 46 + * spu_cycle_reset is the number of cycles between samples. 47 + * This variable is used for SPU profiling and should ONLY be set 48 + * at the beginning of cell_reg_setup; otherwise, it's read-only. 49 + */ 50 + static unsigned int spu_cycle_reset; 51 + 52 + #define NUM_SPUS_PER_NODE 8 53 + #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ 41 54 42 55 #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 43 - #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying 44 - * PPU_CYCLES event 45 - */ 46 - #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 56 + #define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying 57 + * PPU_CYCLES event 58 + */ 59 + #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 47 60 48 61 #define NUM_THREADS 2 /* number of physical threads in 49 62 * physical processor ··· 64 51 #define NUM_TRACE_BUS_WORDS 4 65 52 #define NUM_INPUT_BUS_WORDS 2 66 53 54 + #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ 67 55 68 56 struct pmc_cntrl_data { 69 57 unsigned long vcntr; ··· 76 62 /* 77 63 * ibm,cbe-perftools rtas parameters 78 64 */ 79 - 80 65 struct pm_signal { 81 66 u16 cpu; /* Processor to modify */ 82 - u16 sub_unit; /* hw subunit this applies to (if applicable) */ 83 - short int signal_group; /* Signal Group to Enable/Disable */ 67 + u16 sub_unit; /* hw subunit this applies to (if applicable)*/ 68 + short int signal_group; /* Signal Group to Enable/Disable */ 84 69 u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event 85 70 * Bus Word(s) (bitmask) 86 71 */ ··· 125 112 126 113 static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 127 114 128 - /* Interpetation of hdw_thread: 115 + /* 116 + * The CELL profiling code makes rtas calls to setup the debug bus to 117 + * route the performance signals. Additionally, SPU profiling requires 118 + * a second rtas call to setup the hardware to capture the SPU PCs. 119 + * The EIO error value is returned if the token lookups or the rtas 120 + * call fail. The EIO error number is the best choice of the existing 121 + * error numbers. The probability of rtas related error is very low. But 122 + * by returning EIO and printing additional information to dmsg the user 123 + * will know that OProfile did not start and dmesg will tell them why. 124 + * OProfile does not support returning errors on Stop. Not a huge issue 125 + * since failure to reset the debug bus or stop the SPU PC collection is 126 + * not a fatel issue. Chances are if the Stop failed, Start doesn't work 127 + * either. 128 + */ 129 + 130 + /* 131 + * Interpetation of hdw_thread: 129 132 * 0 - even virtual cpus 0, 2, 4,... 130 133 * 1 - odd virtual cpus 1, 3, 5, ... 134 + * 135 + * FIXME: this is strictly wrong, we need to clean this up in a number 136 + * of places. It works for now. -arnd 131 137 */ 132 138 static u32 hdw_thread; 133 139 134 140 static u32 virt_cntr_inter_mask; 135 141 static struct timer_list timer_virt_cntr; 136 142 137 - /* pm_signal needs to be global since it is initialized in 143 + /* 144 + * pm_signal needs to be global since it is initialized in 138 145 * cell_reg_setup at the time when the necessary information 139 146 * is available. 140 147 */ 141 148 static struct pm_signal pm_signal[NR_PHYS_CTRS]; 142 - static int pm_rtas_token; 149 + static int pm_rtas_token; /* token for debug bus setup call */ 150 + static int spu_rtas_token; /* token for SPU cycle profiling */ 143 151 144 152 static u32 reset_value[NR_PHYS_CTRS]; 145 153 static int num_counters; ··· 181 147 { 182 148 u64 paddr = __pa(address); 183 149 184 - return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, 185 - paddr >> 32, paddr & 0xffffffff, length); 150 + return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, 151 + passthru, paddr >> 32, paddr & 0xffffffff, length); 186 152 } 187 153 188 154 static void pm_rtas_reset_signals(u32 node) ··· 190 156 int ret; 191 157 struct pm_signal pm_signal_local; 192 158 193 - /* The debug bus is being set to the passthru disable state. 194 - * However, the FW still expects atleast one legal signal routing 195 - * entry or it will return an error on the arguments. If we don't 196 - * supply a valid entry, we must ignore all return values. Ignoring 197 - * all return values means we might miss an error we should be 198 - * concerned about. 159 + /* 160 + * The debug bus is being set to the passthru disable state. 161 + * However, the FW still expects atleast one legal signal routing 162 + * entry or it will return an error on the arguments. If we don't 163 + * supply a valid entry, we must ignore all return values. Ignoring 164 + * all return values means we might miss an error we should be 165 + * concerned about. 199 166 */ 200 167 201 168 /* fw expects physical cpu #. */ ··· 210 175 &pm_signal_local, 211 176 sizeof(struct pm_signal)); 212 177 213 - if (ret) 178 + if (unlikely(ret)) 179 + /* 180 + * Not a fatal error. For Oprofile stop, the oprofile 181 + * functions do not support returning an error for 182 + * failure to stop OProfile. 183 + */ 214 184 printk(KERN_WARNING "%s: rtas returned: %d\n", 215 185 __FUNCTION__, ret); 216 186 } 217 187 218 - static void pm_rtas_activate_signals(u32 node, u32 count) 188 + static int pm_rtas_activate_signals(u32 node, u32 count) 219 189 { 220 190 int ret; 221 191 int i, j; 222 192 struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 223 193 224 - /* There is no debug setup required for the cycles event. 194 + /* 195 + * There is no debug setup required for the cycles event. 225 196 * Note that only events in the same group can be used. 226 197 * Otherwise, there will be conflicts in correctly routing 227 198 * the signals on the debug bus. It is the responsiblity ··· 254 213 pm_signal_local, 255 214 i * sizeof(struct pm_signal)); 256 215 257 - if (ret) 216 + if (unlikely(ret)) { 258 217 printk(KERN_WARNING "%s: rtas returned: %d\n", 259 218 __FUNCTION__, ret); 219 + return -EIO; 220 + } 260 221 } 222 + 223 + return 0; 261 224 } 262 225 263 226 /* ··· 305 260 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 306 261 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 307 262 308 - /* Some of the islands signal selection is based on 64 bit words. 263 + /* 264 + * Some of the islands signal selection is based on 64 bit words. 309 265 * The debug bus words are 32 bits, the input words to the performance 310 266 * counters are defined as 32 bits. Need to convert the 64 bit island 311 267 * specification to the appropriate 32 input bit and bus word for the 312 - * performance counter event selection. See the CELL Performance 268 + * performance counter event selection. See the CELL Performance 313 269 * monitoring signals manual and the Perf cntr hardware descriptions 314 270 * for the details. 315 271 */ ··· 344 298 input_bus[j] = i; 345 299 pm_regs.group_control |= 346 300 (i << (31 - i)); 301 + 347 302 break; 348 303 } 349 304 } ··· 356 309 357 310 static void write_pm_cntrl(int cpu) 358 311 { 359 - /* Oprofile will use 32 bit counters, set bits 7:10 to 0 312 + /* 313 + * Oprofile will use 32 bit counters, set bits 7:10 to 0 360 314 * pmregs.pm_cntrl is a global 361 315 */ 362 316 ··· 374 326 if (pm_regs.pm_cntrl.freeze == 1) 375 327 val |= CBE_PM_FREEZE_ALL_CTRS; 376 328 377 - /* Routine set_count_mode must be called previously to set 329 + /* 330 + * Routine set_count_mode must be called previously to set 378 331 * the count mode based on the user selection of user and kernel. 379 332 */ 380 333 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); ··· 385 336 static inline void 386 337 set_count_mode(u32 kernel, u32 user) 387 338 { 388 - /* The user must specify user and kernel if they want them. If 339 + /* 340 + * The user must specify user and kernel if they want them. If 389 341 * neither is specified, OProfile will count in hypervisor mode. 390 342 * pm_regs.pm_cntrl is a global 391 343 */ ··· 414 364 415 365 /* 416 366 * Oprofile is expected to collect data on all CPUs simultaneously. 417 - * However, there is one set of performance counters per node. There are 367 + * However, there is one set of performance counters per node. There are 418 368 * two hardware threads or virtual CPUs on each node. Hence, OProfile must 419 369 * multiplex in time the performance counter collection on the two virtual 420 370 * CPUs. The multiplexing of the performance counters is done by this ··· 427 377 * pair of per-cpu arrays is used for storing the previous and next 428 378 * pmc values for a given node. 429 379 * NOTE: We use the per-cpu variable to improve cache performance. 380 + * 381 + * This routine will alternate loading the virtual counters for 382 + * virtual CPUs 430 383 */ 431 384 static void cell_virtual_cntr(unsigned long data) 432 385 { 433 - /* This routine will alternate loading the virtual counters for 434 - * virtual CPUs 435 - */ 436 386 int i, prev_hdw_thread, next_hdw_thread; 437 387 u32 cpu; 438 388 unsigned long flags; 439 389 440 - /* Make sure that the interrupt_hander and 441 - * the virt counter are not both playing with 442 - * the counters on the same node. 390 + /* 391 + * Make sure that the interrupt_hander and the virt counter are 392 + * not both playing with the counters on the same node. 443 393 */ 444 394 445 395 spin_lock_irqsave(&virt_cntr_lock, flags); ··· 450 400 hdw_thread = 1 ^ hdw_thread; 451 401 next_hdw_thread = hdw_thread; 452 402 453 - for (i = 0; i < num_counters; i++) 454 - /* There are some per thread events. Must do the 403 + /* 404 + * There are some per thread events. Must do the 455 405 * set event, for the thread that is being started 456 406 */ 407 + for (i = 0; i < num_counters; i++) 457 408 set_pm_event(i, 458 409 pmc_cntrl[next_hdw_thread][i].evnts, 459 410 pmc_cntrl[next_hdw_thread][i].masks); 460 411 461 - /* The following is done only once per each node, but 412 + /* 413 + * The following is done only once per each node, but 462 414 * we need cpu #, not node #, to pass to the cbe_xxx functions. 463 415 */ 464 416 for_each_online_cpu(cpu) { 465 417 if (cbe_get_hw_thread_id(cpu)) 466 418 continue; 467 419 468 - /* stop counters, save counter values, restore counts 420 + /* 421 + * stop counters, save counter values, restore counts 469 422 * for previous thread 470 423 */ 471 424 cbe_disable_pm(cpu); ··· 481 428 == 0xFFFFFFFF) 482 429 /* If the cntr value is 0xffffffff, we must 483 430 * reset that to 0xfffffff0 when the current 484 - * thread is restarted. This will generate a 431 + * thread is restarted. This will generate a 485 432 * new interrupt and make sure that we never 486 433 * restore the counters to the max value. If 487 434 * the counters were restored to the max value, ··· 497 444 next_hdw_thread)[i]); 498 445 } 499 446 500 - /* Switch to the other thread. Change the interrupt 447 + /* 448 + * Switch to the other thread. Change the interrupt 501 449 * and control regs to be scheduled on the CPU 502 450 * corresponding to the thread to execute. 503 451 */ 504 452 for (i = 0; i < num_counters; i++) { 505 453 if (pmc_cntrl[next_hdw_thread][i].enabled) { 506 - /* There are some per thread events. 454 + /* 455 + * There are some per thread events. 507 456 * Must do the set event, enable_cntr 508 457 * for each cpu. 509 458 */ ··· 537 482 } 538 483 539 484 /* This function is called once for all cpus combined */ 540 - static void 541 - cell_reg_setup(struct op_counter_config *ctr, 542 - struct op_system_config *sys, int num_ctrs) 485 + static int cell_reg_setup(struct op_counter_config *ctr, 486 + struct op_system_config *sys, int num_ctrs) 543 487 { 544 488 int i, j, cpu; 489 + spu_cycle_reset = 0; 490 + 491 + if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 492 + spu_cycle_reset = ctr[0].count; 493 + 494 + /* 495 + * Each node will need to make the rtas call to start 496 + * and stop SPU profiling. Get the token once and store it. 497 + */ 498 + spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); 499 + 500 + if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { 501 + printk(KERN_ERR 502 + "%s: rtas token ibm,cbe-spu-perftools unknown\n", 503 + __FUNCTION__); 504 + return -EIO; 505 + } 506 + } 545 507 546 508 pm_rtas_token = rtas_token("ibm,cbe-perftools"); 547 - if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { 548 - printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", 509 + 510 + /* 511 + * For all events excetp PPU CYCLEs, each node will need to make 512 + * the rtas cbe-perftools call to setup and reset the debug bus. 513 + * Make the token lookup call once and store it in the global 514 + * variable pm_rtas_token. 515 + */ 516 + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 517 + printk(KERN_ERR 518 + "%s: rtas token ibm,cbe-perftools unknown\n", 549 519 __FUNCTION__); 550 - goto out; 520 + return -EIO; 551 521 } 552 522 553 523 num_counters = num_ctrs; ··· 600 520 per_cpu(pmc_values, j)[i] = 0; 601 521 } 602 522 603 - /* Setup the thread 1 events, map the thread 0 event to the 523 + /* 524 + * Setup the thread 1 events, map the thread 0 event to the 604 525 * equivalent thread 1 event. 605 526 */ 606 527 for (i = 0; i < num_ctrs; ++i) { ··· 625 544 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 626 545 input_bus[i] = 0xff; 627 546 628 - /* Our counters count up, and "count" refers to 547 + /* 548 + * Our counters count up, and "count" refers to 629 549 * how much before the next interrupt, and we interrupt 630 - * on overflow. So we calculate the starting value 550 + * on overflow. So we calculate the starting value 631 551 * which will give us "count" until overflow. 632 552 * Then we set the events on the enabled counters. 633 553 */ ··· 651 569 for (i = 0; i < num_counters; ++i) { 652 570 per_cpu(pmc_values, cpu)[i] = reset_value[i]; 653 571 } 654 - out: 655 - ; 572 + 573 + return 0; 656 574 } 657 575 576 + 577 + 658 578 /* This function is called once for each cpu */ 659 - static void cell_cpu_setup(struct op_counter_config *cntr) 579 + static int cell_cpu_setup(struct op_counter_config *cntr) 660 580 { 661 581 u32 cpu = smp_processor_id(); 662 582 u32 num_enabled = 0; 663 583 int i; 664 584 585 + if (spu_cycle_reset) 586 + return 0; 587 + 665 588 /* There is one performance monitor per processor chip (i.e. node), 666 589 * so we only need to perform this function once per node. 667 590 */ 668 591 if (cbe_get_hw_thread_id(cpu)) 669 - goto out; 670 - 671 - if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) { 672 - printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n", 673 - __FUNCTION__); 674 - goto out; 675 - } 592 + return 0; 676 593 677 594 /* Stop all counters */ 678 595 cbe_disable_pm(cpu); ··· 690 609 } 691 610 } 692 611 693 - pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 694 - out: 695 - ; 612 + /* 613 + * The pm_rtas_activate_signals will return -EIO if the FW 614 + * call failed. 615 + */ 616 + return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled); 696 617 } 697 618 698 - static void cell_global_start(struct op_counter_config *ctr) 619 + #define ENTRIES 303 620 + #define MAXLFSR 0xFFFFFF 621 + 622 + /* precomputed table of 24 bit LFSR values */ 623 + static int initial_lfsr[] = { 624 + 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, 625 + 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, 626 + 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, 627 + 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, 628 + 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, 629 + 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, 630 + 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, 631 + 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, 632 + 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, 633 + 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, 634 + 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, 635 + 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, 636 + 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, 637 + 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, 638 + 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, 639 + 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, 640 + 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, 641 + 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, 642 + 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, 643 + 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, 644 + 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, 645 + 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, 646 + 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, 647 + 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, 648 + 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, 649 + 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, 650 + 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, 651 + 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, 652 + 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, 653 + 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, 654 + 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, 655 + 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, 656 + 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, 657 + 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, 658 + 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, 659 + 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, 660 + 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, 661 + 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 662 + }; 663 + 664 + /* 665 + * The hardware uses an LFSR counting sequence to determine when to capture 666 + * the SPU PCs. An LFSR sequence is like a puesdo random number sequence 667 + * where each number occurs once in the sequence but the sequence is not in 668 + * numerical order. The SPU PC capture is done when the LFSR sequence reaches 669 + * the last value in the sequence. Hence the user specified value N 670 + * corresponds to the LFSR number that is N from the end of the sequence. 671 + * 672 + * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit 673 + * LFSR sequence is broken into four ranges. The spacing of the precomputed 674 + * values is adjusted in each range so the error between the user specifed 675 + * number (N) of events between samples and the actual number of events based 676 + * on the precomputed value will be les then about 6.2%. Note, if the user 677 + * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. 678 + * This is to prevent the loss of samples because the trace buffer is full. 679 + * 680 + * User specified N Step between Index in 681 + * precomputed values precomputed 682 + * table 683 + * 0 to 2^16-1 ---- 0 684 + * 2^16 to 2^16+2^19-1 2^12 1 to 128 685 + * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 686 + * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 687 + * 688 + * 689 + * For example, the LFSR values in the second range are computed for 2^16, 690 + * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies 691 + * 1, 2,..., 127, 128. 692 + * 693 + * The 24 bit LFSR value for the nth number in the sequence can be 694 + * calculated using the following code: 695 + * 696 + * #define size 24 697 + * int calculate_lfsr(int n) 698 + * { 699 + * int i; 700 + * unsigned int newlfsr0; 701 + * unsigned int lfsr = 0xFFFFFF; 702 + * unsigned int howmany = n; 703 + * 704 + * for (i = 2; i < howmany + 2; i++) { 705 + * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ 706 + * ((lfsr >> (size - 1 - 1)) & 1) ^ 707 + * (((lfsr >> (size - 1 - 6)) & 1) ^ 708 + * ((lfsr >> (size - 1 - 23)) & 1))); 709 + * 710 + * lfsr >>= 1; 711 + * lfsr = lfsr | (newlfsr0 << (size - 1)); 712 + * } 713 + * return lfsr; 714 + * } 715 + */ 716 + 717 + #define V2_16 (0x1 << 16) 718 + #define V2_19 (0x1 << 19) 719 + #define V2_22 (0x1 << 22) 720 + 721 + static int calculate_lfsr(int n) 699 722 { 700 - u32 cpu; 723 + /* 724 + * The ranges and steps are in powers of 2 so the calculations 725 + * can be done using shifts rather then divide. 726 + */ 727 + int index; 728 + 729 + if ((n >> 16) == 0) 730 + index = 0; 731 + else if (((n - V2_16) >> 19) == 0) 732 + index = ((n - V2_16) >> 12) + 1; 733 + else if (((n - V2_16 - V2_19) >> 22) == 0) 734 + index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; 735 + else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) 736 + index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; 737 + else 738 + index = ENTRIES-1; 739 + 740 + /* make sure index is valid */ 741 + if ((index > ENTRIES) || (index < 0)) 742 + index = ENTRIES-1; 743 + 744 + return initial_lfsr[index]; 745 + } 746 + 747 + static int pm_rtas_activate_spu_profiling(u32 node) 748 + { 749 + int ret, i; 750 + struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 751 + 752 + /* 753 + * Set up the rtas call to configure the debug bus to 754 + * route the SPU PCs. Setup the pm_signal for each SPU 755 + */ 756 + for (i = 0; i < NUM_SPUS_PER_NODE; i++) { 757 + pm_signal_local[i].cpu = node; 758 + pm_signal_local[i].signal_group = 41; 759 + /* spu i on word (i/2) */ 760 + pm_signal_local[i].bus_word = 1 << i / 2; 761 + /* spu i */ 762 + pm_signal_local[i].sub_unit = i; 763 + pm_signal_local[i].bit = 63; 764 + } 765 + 766 + ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, 767 + PASSTHRU_ENABLE, pm_signal_local, 768 + (NUM_SPUS_PER_NODE 769 + * sizeof(struct pm_signal))); 770 + 771 + if (unlikely(ret)) { 772 + printk(KERN_WARNING "%s: rtas returned: %d\n", 773 + __FUNCTION__, ret); 774 + return -EIO; 775 + } 776 + 777 + return 0; 778 + } 779 + 780 + #ifdef CONFIG_CPU_FREQ 781 + static int 782 + oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) 783 + { 784 + int ret = 0; 785 + struct cpufreq_freqs *frq = data; 786 + if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || 787 + (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) || 788 + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) 789 + set_spu_profiling_frequency(frq->new, spu_cycle_reset); 790 + return ret; 791 + } 792 + 793 + static struct notifier_block cpu_freq_notifier_block = { 794 + .notifier_call = oprof_cpufreq_notify 795 + }; 796 + #endif 797 + 798 + static int cell_global_start_spu(struct op_counter_config *ctr) 799 + { 800 + int subfunc; 801 + unsigned int lfsr_value; 802 + int cpu; 803 + int ret; 804 + int rtas_error; 805 + unsigned int cpu_khzfreq = 0; 806 + 807 + /* The SPU profiling uses time-based profiling based on 808 + * cpu frequency, so if configured with the CPU_FREQ 809 + * option, we should detect frequency changes and react 810 + * accordingly. 811 + */ 812 + #ifdef CONFIG_CPU_FREQ 813 + ret = cpufreq_register_notifier(&cpu_freq_notifier_block, 814 + CPUFREQ_TRANSITION_NOTIFIER); 815 + if (ret < 0) 816 + /* this is not a fatal error */ 817 + printk(KERN_ERR "CPU freq change registration failed: %d\n", 818 + ret); 819 + 820 + else 821 + cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); 822 + #endif 823 + 824 + set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); 825 + 826 + for_each_online_cpu(cpu) { 827 + if (cbe_get_hw_thread_id(cpu)) 828 + continue; 829 + 830 + /* 831 + * Setup SPU cycle-based profiling. 832 + * Set perf_mon_control bit 0 to a zero before 833 + * enabling spu collection hardware. 834 + */ 835 + cbe_write_pm(cpu, pm_control, 0); 836 + 837 + if (spu_cycle_reset > MAX_SPU_COUNT) 838 + /* use largest possible value */ 839 + lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); 840 + else 841 + lfsr_value = calculate_lfsr(spu_cycle_reset); 842 + 843 + /* must use a non zero value. Zero disables data collection. */ 844 + if (lfsr_value == 0) 845 + lfsr_value = calculate_lfsr(1); 846 + 847 + lfsr_value = lfsr_value << 8; /* shift lfsr to correct 848 + * register location 849 + */ 850 + 851 + /* debug bus setup */ 852 + ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); 853 + 854 + if (unlikely(ret)) { 855 + rtas_error = ret; 856 + goto out; 857 + } 858 + 859 + 860 + subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ 861 + 862 + /* start profiling */ 863 + ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, 864 + cbe_cpu_to_node(cpu), lfsr_value); 865 + 866 + if (unlikely(ret != 0)) { 867 + printk(KERN_ERR 868 + "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", 869 + __FUNCTION__, ret); 870 + rtas_error = -EIO; 871 + goto out; 872 + } 873 + } 874 + 875 + rtas_error = start_spu_profiling(spu_cycle_reset); 876 + if (rtas_error) 877 + goto out_stop; 878 + 879 + oprofile_running = 1; 880 + return 0; 881 + 882 + out_stop: 883 + cell_global_stop_spu(); /* clean up the PMU/debug bus */ 884 + out: 885 + return rtas_error; 886 + } 887 + 888 + static int cell_global_start_ppu(struct op_counter_config *ctr) 889 + { 890 + u32 cpu, i; 701 891 u32 interrupt_mask = 0; 702 - u32 i; 703 892 704 893 /* This routine gets called once for the system. 705 894 * There is one performance monitor per node, so we ··· 1002 651 oprofile_running = 1; 1003 652 smp_wmb(); 1004 653 1005 - /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being 1006 - * executed which manipulates the PMU. We start the "virtual counter" 654 + /* 655 + * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being 656 + * executed which manipulates the PMU. We start the "virtual counter" 1007 657 * here so that we do not need to synchronize access to the PMU in 1008 658 * the above for-loop. 1009 659 */ 1010 660 start_virt_cntrs(); 661 + 662 + return 0; 1011 663 } 1012 664 1013 - static void cell_global_stop(void) 665 + static int cell_global_start(struct op_counter_config *ctr) 666 + { 667 + if (spu_cycle_reset) 668 + return cell_global_start_spu(ctr); 669 + else 670 + return cell_global_start_ppu(ctr); 671 + } 672 + 673 + /* 674 + * Note the generic OProfile stop calls do not support returning 675 + * an error on stop. Hence, will not return an error if the FW 676 + * calls fail on stop. Failure to reset the debug bus is not an issue. 677 + * Failure to disable the SPU profiling is not an issue. The FW calls 678 + * to enable the performance counters and debug bus will work even if 679 + * the hardware was not cleanly reset. 680 + */ 681 + static void cell_global_stop_spu(void) 682 + { 683 + int subfunc, rtn_value; 684 + unsigned int lfsr_value; 685 + int cpu; 686 + 687 + oprofile_running = 0; 688 + 689 + #ifdef CONFIG_CPU_FREQ 690 + cpufreq_unregister_notifier(&cpu_freq_notifier_block, 691 + CPUFREQ_TRANSITION_NOTIFIER); 692 + #endif 693 + 694 + for_each_online_cpu(cpu) { 695 + if (cbe_get_hw_thread_id(cpu)) 696 + continue; 697 + 698 + subfunc = 3; /* 699 + * 2 - activate SPU tracing, 700 + * 3 - deactivate 701 + */ 702 + lfsr_value = 0x8f100000; 703 + 704 + rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, 705 + subfunc, cbe_cpu_to_node(cpu), 706 + lfsr_value); 707 + 708 + if (unlikely(rtn_value != 0)) { 709 + printk(KERN_ERR 710 + "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n", 711 + __FUNCTION__, rtn_value); 712 + } 713 + 714 + /* Deactivate the signals */ 715 + pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 716 + } 717 + 718 + stop_spu_profiling(); 719 + } 720 + 721 + static void cell_global_stop_ppu(void) 1014 722 { 1015 723 int cpu; 1016 724 1017 - /* This routine will be called once for the system. 725 + /* 726 + * This routine will be called once for the system. 1018 727 * There is one performance monitor per node, so we 1019 728 * only need to perform this function once per node. 1020 729 */ ··· 1098 687 } 1099 688 } 1100 689 1101 - static void 1102 - cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) 690 + static void cell_global_stop(void) 691 + { 692 + if (spu_cycle_reset) 693 + cell_global_stop_spu(); 694 + else 695 + cell_global_stop_ppu(); 696 + } 697 + 698 + static void cell_handle_interrupt(struct pt_regs *regs, 699 + struct op_counter_config *ctr) 1103 700 { 1104 701 u32 cpu; 1105 702 u64 pc; ··· 1118 699 1119 700 cpu = smp_processor_id(); 1120 701 1121 - /* Need to make sure the interrupt handler and the virt counter 702 + /* 703 + * Need to make sure the interrupt handler and the virt counter 1122 704 * routine are not running at the same time. See the 1123 705 * cell_virtual_cntr() routine for additional comments. 1124 706 */ 1125 707 spin_lock_irqsave(&virt_cntr_lock, flags); 1126 708 1127 - /* Need to disable and reenable the performance counters 709 + /* 710 + * Need to disable and reenable the performance counters 1128 711 * to get the desired behavior from the hardware. This 1129 712 * is hardware specific. 1130 713 */ ··· 1135 714 1136 715 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 1137 716 1138 - /* If the interrupt mask has been cleared, then the virt cntr 717 + /* 718 + * If the interrupt mask has been cleared, then the virt cntr 1139 719 * has cleared the interrupt. When the thread that generated 1140 720 * the interrupt is restored, the data count will be restored to 1141 721 * 0xffffff0 to cause the interrupt to be regenerated. ··· 1154 732 } 1155 733 } 1156 734 1157 - /* The counters were frozen by the interrupt. 735 + /* 736 + * The counters were frozen by the interrupt. 1158 737 * Reenable the interrupt and restart the counters. 1159 738 * If there was a race between the interrupt handler and 1160 - * the virtual counter routine. The virutal counter 739 + * the virtual counter routine. The virutal counter 1161 740 * routine may have cleared the interrupts. Hence must 1162 741 * use the virt_cntr_inter_mask to re-enable the interrupts. 1163 742 */ 1164 743 cbe_enable_pm_interrupts(cpu, hdw_thread, 1165 744 virt_cntr_inter_mask); 1166 745 1167 - /* The writes to the various performance counters only writes 1168 - * to a latch. The new values (interrupt setting bits, reset 746 + /* 747 + * The writes to the various performance counters only writes 748 + * to a latch. The new values (interrupt setting bits, reset 1169 749 * counter value etc.) are not copied to the actual registers 1170 750 * until the performance monitor is enabled. In order to get 1171 751 * this to work as desired, the permormance monitor needs to ··· 1179 755 spin_unlock_irqrestore(&virt_cntr_lock, flags); 1180 756 } 1181 757 758 + /* 759 + * This function is called from the generic OProfile 760 + * driver. When profiling PPUs, we need to do the 761 + * generic sync start; otherwise, do spu_sync_start. 762 + */ 763 + static int cell_sync_start(void) 764 + { 765 + if (spu_cycle_reset) 766 + return spu_sync_start(); 767 + else 768 + return DO_GENERIC_SYNC; 769 + } 770 + 771 + static int cell_sync_stop(void) 772 + { 773 + if (spu_cycle_reset) 774 + return spu_sync_stop(); 775 + else 776 + return 1; 777 + } 778 + 1182 779 struct op_powerpc_model op_model_cell = { 1183 780 .reg_setup = cell_reg_setup, 1184 781 .cpu_setup = cell_cpu_setup, 1185 782 .global_start = cell_global_start, 1186 783 .global_stop = cell_global_stop, 784 + .sync_start = cell_sync_start, 785 + .sync_stop = cell_sync_stop, 1187 786 .handle_interrupt = cell_handle_interrupt, 1188 787 };

+8 -3

arch/powerpc/oprofile/op_model_fsl_booke.c

··· 244 244 mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); 245 245 } 246 246 247 - static void fsl_booke_cpu_setup(struct op_counter_config *ctr) 247 + static int fsl_booke_cpu_setup(struct op_counter_config *ctr) 248 248 { 249 249 int i; 250 250 ··· 258 258 259 259 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel); 260 260 } 261 + 262 + return 0; 261 263 } 262 264 263 - static void fsl_booke_reg_setup(struct op_counter_config *ctr, 265 + static int fsl_booke_reg_setup(struct op_counter_config *ctr, 264 266 struct op_system_config *sys, 265 267 int num_ctrs) 266 268 { ··· 278 276 for (i = 0; i < num_counters; ++i) 279 277 reset_value[i] = 0x80000000UL - ctr[i].count; 280 278 279 + return 0; 281 280 } 282 281 283 - static void fsl_booke_start(struct op_counter_config *ctr) 282 + static int fsl_booke_start(struct op_counter_config *ctr) 284 283 { 285 284 int i; 286 285 ··· 311 308 312 309 pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(), 313 310 mfpmr(PMRN_PMGC0)); 311 + 312 + return 0; 314 313 } 315 314 316 315 static void fsl_booke_stop(void)

+9 -3

arch/powerpc/oprofile/op_model_pa6t.c

··· 89 89 90 90 91 91 /* precompute the values to stuff in the hardware registers */ 92 - static void pa6t_reg_setup(struct op_counter_config *ctr, 92 + static int pa6t_reg_setup(struct op_counter_config *ctr, 93 93 struct op_system_config *sys, 94 94 int num_ctrs) 95 95 { ··· 135 135 pr_debug("reset_value for pmc%u inited to 0x%lx\n", 136 136 pmc, reset_value[pmc]); 137 137 } 138 + 139 + return 0; 138 140 } 139 141 140 142 /* configure registers on this cpu */ 141 - static void pa6t_cpu_setup(struct op_counter_config *ctr) 143 + static int pa6t_cpu_setup(struct op_counter_config *ctr) 142 144 { 143 145 u64 mmcr0 = mmcr0_val; 144 146 u64 mmcr1 = mmcr1_val; ··· 156 154 mfspr(SPRN_PA6T_MMCR0)); 157 155 pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(), 158 156 mfspr(SPRN_PA6T_MMCR1)); 157 + 158 + return 0; 159 159 } 160 160 161 - static void pa6t_start(struct op_counter_config *ctr) 161 + static int pa6t_start(struct op_counter_config *ctr) 162 162 { 163 163 int i; 164 164 ··· 178 174 oprofile_running = 1; 179 175 180 176 pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); 177 + 178 + return 0; 181 179 } 182 180 183 181 static void pa6t_stop(void)

+8 -3

arch/powerpc/oprofile/op_model_power4.c

··· 32 32 static u64 mmcr1_val; 33 33 static u64 mmcra_val; 34 34 35 - static void power4_reg_setup(struct op_counter_config *ctr, 35 + static int power4_reg_setup(struct op_counter_config *ctr, 36 36 struct op_system_config *sys, 37 37 int num_ctrs) 38 38 { ··· 60 60 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE; 61 61 else 62 62 mmcr0_val |= MMCR0_PROBLEM_DISABLE; 63 + 64 + return 0; 63 65 } 64 66 65 67 extern void ppc64_enable_pmcs(void); ··· 86 84 return 0; 87 85 } 88 86 89 - static void power4_cpu_setup(struct op_counter_config *ctr) 87 + static int power4_cpu_setup(struct op_counter_config *ctr) 90 88 { 91 89 unsigned int mmcr0 = mmcr0_val; 92 90 unsigned long mmcra = mmcra_val; ··· 113 111 mfspr(SPRN_MMCR1)); 114 112 dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(), 115 113 mfspr(SPRN_MMCRA)); 114 + 115 + return 0; 116 116 } 117 117 118 - static void power4_start(struct op_counter_config *ctr) 118 + static int power4_start(struct op_counter_config *ctr) 119 119 { 120 120 int i; 121 121 unsigned int mmcr0; ··· 152 148 oprofile_running = 1; 153 149 154 150 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); 151 + return 0; 155 152 } 156 153 157 154 static void power4_stop(void)

+7 -3

arch/powerpc/oprofile/op_model_rs64.c

··· 88 88 89 89 static int num_counters; 90 90 91 - static void rs64_reg_setup(struct op_counter_config *ctr, 91 + static int rs64_reg_setup(struct op_counter_config *ctr, 92 92 struct op_system_config *sys, 93 93 int num_ctrs) 94 94 { ··· 100 100 reset_value[i] = 0x80000000UL - ctr[i].count; 101 101 102 102 /* XXX setup user and kernel profiling */ 103 + return 0; 103 104 } 104 105 105 - static void rs64_cpu_setup(struct op_counter_config *ctr) 106 + static int rs64_cpu_setup(struct op_counter_config *ctr) 106 107 { 107 108 unsigned int mmcr0; 108 109 ··· 126 125 mfspr(SPRN_MMCR0)); 127 126 dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), 128 127 mfspr(SPRN_MMCR1)); 128 + 129 + return 0; 129 130 } 130 131 131 - static void rs64_start(struct op_counter_config *ctr) 132 + static int rs64_start(struct op_counter_config *ctr) 132 133 { 133 134 int i; 134 135 unsigned int mmcr0; ··· 158 155 mtspr(SPRN_MMCR0, mmcr0); 159 156 160 157 dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0); 158 + return 0; 161 159 } 162 160 163 161 static void rs64_stop(void)

+10

arch/powerpc/platforms/Kconfig

··· 272 272 you wish to build a kernel for a machine with a CPM2 coprocessor 273 273 on it (826x, 827x, 8560). 274 274 275 + config AXON_RAM 276 + tristate "Axon DDR2 memory device driver" 277 + depends on PPC_IBM_CELL_BLADE 278 + default m 279 + help 280 + It registers one block device per Axon's DDR2 memory bank found 281 + on a system. Block devices are called axonram?, their major and 282 + minor numbers are available in /proc/devices, /proc/partitions or 283 + in /sys/block/axonram?/dev. 284 + 275 285 endmenu

+10

arch/powerpc/platforms/cell/Kconfig

··· 73 73 For details, take a look at <file:Documentation/cpu-freq/>. 74 74 If you don't have such processor, say N 75 75 76 + config CBE_CPUFREQ_PMI 77 + tristate "CBE frequency scaling using PMI interface" 78 + depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL 79 + default n 80 + help 81 + Select this, if you want to use the PMI interface 82 + to switch frequencies. Using PMI, the 83 + processor will not only be able to run at lower speed, 84 + but also at lower core voltage. 85 + 76 86 endmenu

+5 -1

arch/powerpc/platforms/cell/Makefile

··· 4 4 obj-$(CONFIG_CBE_RAS) += ras.o 5 5 6 6 obj-$(CONFIG_CBE_THERM) += cbe_thermal.o 7 - obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o 7 + obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o 8 + obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o 9 + cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o 8 10 9 11 ifeq ($(CONFIG_SMP),y) 10 12 obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o ··· 25 23 $(spu-priv1-y) \ 26 24 $(spu-manage-y) \ 27 25 spufs/ 26 + 27 + obj-$(CONFIG_PCI_MSI) += axon_msi.o

+445

arch/powerpc/platforms/cell/axon_msi.c

··· 1 + /* 2 + * Copyright 2007, Michael Ellerman, IBM Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation; either version 7 + * 2 of the License, or (at your option) any later version. 8 + */ 9 + 10 + 11 + #include <linux/interrupt.h> 12 + #include <linux/irq.h> 13 + #include <linux/kernel.h> 14 + #include <linux/pci.h> 15 + #include <linux/msi.h> 16 + #include <linux/reboot.h> 17 + 18 + #include <asm/dcr.h> 19 + #include <asm/machdep.h> 20 + #include <asm/prom.h> 21 + 22 + 23 + /* 24 + * MSIC registers, specified as offsets from dcr_base 25 + */ 26 + #define MSIC_CTRL_REG 0x0 27 + 28 + /* Base Address registers specify FIFO location in BE memory */ 29 + #define MSIC_BASE_ADDR_HI_REG 0x3 30 + #define MSIC_BASE_ADDR_LO_REG 0x4 31 + 32 + /* Hold the read/write offsets into the FIFO */ 33 + #define MSIC_READ_OFFSET_REG 0x5 34 + #define MSIC_WRITE_OFFSET_REG 0x6 35 + 36 + 37 + /* MSIC control register flags */ 38 + #define MSIC_CTRL_ENABLE 0x0001 39 + #define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002 40 + #define MSIC_CTRL_IRQ_ENABLE 0x0008 41 + #define MSIC_CTRL_FULL_STOP_ENABLE 0x0010 42 + 43 + /* 44 + * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB. 45 + * Currently we're using a 64KB FIFO size. 46 + */ 47 + #define MSIC_FIFO_SIZE_SHIFT 16 48 + #define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT) 49 + 50 + /* 51 + * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits 52 + * 8-9 of the MSIC control reg. 53 + */ 54 + #define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300) 55 + 56 + /* 57 + * We need to mask the read/write offsets to make sure they stay within 58 + * the bounds of the FIFO. Also they should always be 16-byte aligned. 59 + */ 60 + #define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu) 61 + 62 + /* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */ 63 + #define MSIC_FIFO_ENTRY_SIZE 0x10 64 + 65 + 66 + struct axon_msic { 67 + struct device_node *dn; 68 + struct irq_host *irq_host; 69 + __le32 *fifo; 70 + dcr_host_t dcr_host; 71 + struct list_head list; 72 + u32 read_offset; 73 + u32 dcr_base; 74 + }; 75 + 76 + static LIST_HEAD(axon_msic_list); 77 + 78 + static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) 79 + { 80 + pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); 81 + 82 + dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val); 83 + } 84 + 85 + static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n) 86 + { 87 + return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n); 88 + } 89 + 90 + static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) 91 + { 92 + struct axon_msic *msic = get_irq_data(irq); 93 + u32 write_offset, msi; 94 + int idx; 95 + 96 + write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG); 97 + pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); 98 + 99 + /* write_offset doesn't wrap properly, so we have to mask it */ 100 + write_offset &= MSIC_FIFO_SIZE_MASK; 101 + 102 + while (msic->read_offset != write_offset) { 103 + idx = msic->read_offset / sizeof(__le32); 104 + msi = le32_to_cpu(msic->fifo[idx]); 105 + msi &= 0xFFFF; 106 + 107 + pr_debug("axon_msi: woff %x roff %x msi %x\n", 108 + write_offset, msic->read_offset, msi); 109 + 110 + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; 111 + msic->read_offset &= MSIC_FIFO_SIZE_MASK; 112 + 113 + if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) 114 + generic_handle_irq(msi); 115 + else 116 + pr_debug("axon_msi: invalid irq 0x%x!\n", msi); 117 + } 118 + 119 + desc->chip->eoi(irq); 120 + } 121 + 122 + static struct axon_msic *find_msi_translator(struct pci_dev *dev) 123 + { 124 + struct irq_host *irq_host; 125 + struct device_node *dn, *tmp; 126 + const phandle *ph; 127 + struct axon_msic *msic = NULL; 128 + 129 + dn = pci_device_to_OF_node(dev); 130 + if (!dn) { 131 + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); 132 + return NULL; 133 + } 134 + 135 + for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { 136 + ph = of_get_property(dn, "msi-translator", NULL); 137 + if (ph) 138 + break; 139 + } 140 + 141 + if (!ph) { 142 + dev_dbg(&dev->dev, 143 + "axon_msi: no msi-translator property found\n"); 144 + goto out_error; 145 + } 146 + 147 + tmp = dn; 148 + dn = of_find_node_by_phandle(*ph); 149 + if (!dn) { 150 + dev_dbg(&dev->dev, 151 + "axon_msi: msi-translator doesn't point to a node\n"); 152 + goto out_error; 153 + } 154 + 155 + irq_host = irq_find_host(dn); 156 + if (!irq_host) { 157 + dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n", 158 + dn->full_name); 159 + goto out_error; 160 + } 161 + 162 + msic = irq_host->host_data; 163 + 164 + out_error: 165 + of_node_put(dn); 166 + of_node_put(tmp); 167 + 168 + return msic; 169 + } 170 + 171 + static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) 172 + { 173 + if (!find_msi_translator(dev)) 174 + return -ENODEV; 175 + 176 + return 0; 177 + } 178 + 179 + static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) 180 + { 181 + struct device_node *dn, *tmp; 182 + struct msi_desc *entry; 183 + int len; 184 + const u32 *prop; 185 + 186 + dn = pci_device_to_OF_node(dev); 187 + if (!dn) { 188 + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); 189 + return -ENODEV; 190 + } 191 + 192 + entry = list_first_entry(&dev->msi_list, struct msi_desc, list); 193 + 194 + for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { 195 + if (entry->msi_attrib.is_64) { 196 + prop = of_get_property(dn, "msi-address-64", &len); 197 + if (prop) 198 + break; 199 + } 200 + 201 + prop = of_get_property(dn, "msi-address-32", &len); 202 + if (prop) 203 + break; 204 + } 205 + 206 + if (!prop) { 207 + dev_dbg(&dev->dev, 208 + "axon_msi: no msi-address-(32|64) properties found\n"); 209 + return -ENOENT; 210 + } 211 + 212 + switch (len) { 213 + case 8: 214 + msg->address_hi = prop[0]; 215 + msg->address_lo = prop[1]; 216 + break; 217 + case 4: 218 + msg->address_hi = 0; 219 + msg->address_lo = prop[0]; 220 + break; 221 + default: 222 + dev_dbg(&dev->dev, 223 + "axon_msi: malformed msi-address-(32|64) property\n"); 224 + of_node_put(dn); 225 + return -EINVAL; 226 + } 227 + 228 + of_node_put(dn); 229 + 230 + return 0; 231 + } 232 + 233 + static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 234 + { 235 + unsigned int virq, rc; 236 + struct msi_desc *entry; 237 + struct msi_msg msg; 238 + struct axon_msic *msic; 239 + 240 + msic = find_msi_translator(dev); 241 + if (!msic) 242 + return -ENODEV; 243 + 244 + rc = setup_msi_msg_address(dev, &msg); 245 + if (rc) 246 + return rc; 247 + 248 + /* We rely on being able to stash a virq in a u16 */ 249 + BUILD_BUG_ON(NR_IRQS > 65536); 250 + 251 + list_for_each_entry(entry, &dev->msi_list, list) { 252 + virq = irq_create_direct_mapping(msic->irq_host); 253 + if (virq == NO_IRQ) { 254 + dev_warn(&dev->dev, 255 + "axon_msi: virq allocation failed!\n"); 256 + return -1; 257 + } 258 + dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq); 259 + 260 + set_irq_msi(virq, entry); 261 + msg.data = virq; 262 + write_msi_msg(virq, &msg); 263 + } 264 + 265 + return 0; 266 + } 267 + 268 + static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) 269 + { 270 + struct msi_desc *entry; 271 + 272 + dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); 273 + 274 + list_for_each_entry(entry, &dev->msi_list, list) { 275 + if (entry->irq == NO_IRQ) 276 + continue; 277 + 278 + set_irq_msi(entry->irq, NULL); 279 + irq_dispose_mapping(entry->irq); 280 + } 281 + } 282 + 283 + static struct irq_chip msic_irq_chip = { 284 + .mask = mask_msi_irq, 285 + .unmask = unmask_msi_irq, 286 + .shutdown = unmask_msi_irq, 287 + .typename = "AXON-MSI", 288 + }; 289 + 290 + static int msic_host_map(struct irq_host *h, unsigned int virq, 291 + irq_hw_number_t hw) 292 + { 293 + set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); 294 + 295 + return 0; 296 + } 297 + 298 + static int msic_host_match(struct irq_host *host, struct device_node *dn) 299 + { 300 + struct axon_msic *msic = host->host_data; 301 + 302 + return msic->dn == dn; 303 + } 304 + 305 + static struct irq_host_ops msic_host_ops = { 306 + .match = msic_host_match, 307 + .map = msic_host_map, 308 + }; 309 + 310 + static int axon_msi_notify_reboot(struct notifier_block *nb, 311 + unsigned long code, void *data) 312 + { 313 + struct axon_msic *msic; 314 + u32 tmp; 315 + 316 + list_for_each_entry(msic, &axon_msic_list, list) { 317 + pr_debug("axon_msi: disabling %s\n", msic->dn->full_name); 318 + tmp = msic_dcr_read(msic, MSIC_CTRL_REG); 319 + tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; 320 + msic_dcr_write(msic, MSIC_CTRL_REG, tmp); 321 + } 322 + 323 + return 0; 324 + } 325 + 326 + static struct notifier_block axon_msi_reboot_notifier = { 327 + .notifier_call = axon_msi_notify_reboot 328 + }; 329 + 330 + static int axon_msi_setup_one(struct device_node *dn) 331 + { 332 + struct page *page; 333 + struct axon_msic *msic; 334 + unsigned int virq; 335 + int dcr_len; 336 + 337 + pr_debug("axon_msi: setting up dn %s\n", dn->full_name); 338 + 339 + msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); 340 + if (!msic) { 341 + printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n", 342 + dn->full_name); 343 + goto out; 344 + } 345 + 346 + msic->dcr_base = dcr_resource_start(dn, 0); 347 + dcr_len = dcr_resource_len(dn, 0); 348 + 349 + if (msic->dcr_base == 0 || dcr_len == 0) { 350 + printk(KERN_ERR 351 + "axon_msi: couldn't parse dcr properties on %s\n", 352 + dn->full_name); 353 + goto out; 354 + } 355 + 356 + msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len); 357 + if (!DCR_MAP_OK(msic->dcr_host)) { 358 + printk(KERN_ERR "axon_msi: dcr_map failed for %s\n", 359 + dn->full_name); 360 + goto out_free_msic; 361 + } 362 + 363 + page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL, 364 + get_order(MSIC_FIFO_SIZE_BYTES)); 365 + if (!page) { 366 + printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n", 367 + dn->full_name); 368 + goto out_free_msic; 369 + } 370 + 371 + msic->fifo = page_address(page); 372 + 373 + msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS, 374 + &msic_host_ops, 0); 375 + if (!msic->irq_host) { 376 + printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n", 377 + dn->full_name); 378 + goto out_free_fifo; 379 + } 380 + 381 + msic->irq_host->host_data = msic; 382 + 383 + virq = irq_of_parse_and_map(dn, 0); 384 + if (virq == NO_IRQ) { 385 + printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n", 386 + dn->full_name); 387 + goto out_free_host; 388 + } 389 + 390 + msic->dn = of_node_get(dn); 391 + 392 + set_irq_data(virq, msic); 393 + set_irq_chained_handler(virq, axon_msi_cascade); 394 + pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq); 395 + 396 + /* Enable the MSIC hardware */ 397 + msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32); 398 + msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG, 399 + (u64)msic->fifo & 0xFFFFFFFF); 400 + msic_dcr_write(msic, MSIC_CTRL_REG, 401 + MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | 402 + MSIC_CTRL_FIFO_SIZE); 403 + 404 + list_add(&msic->list, &axon_msic_list); 405 + 406 + printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name); 407 + 408 + return 0; 409 + 410 + out_free_host: 411 + kfree(msic->irq_host); 412 + out_free_fifo: 413 + __free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES)); 414 + out_free_msic: 415 + kfree(msic); 416 + out: 417 + 418 + return -1; 419 + } 420 + 421 + static int axon_msi_init(void) 422 + { 423 + struct device_node *dn; 424 + int found = 0; 425 + 426 + pr_debug("axon_msi: initialising ...\n"); 427 + 428 + for_each_compatible_node(dn, NULL, "ibm,axon-msic") { 429 + if (axon_msi_setup_one(dn) == 0) 430 + found++; 431 + } 432 + 433 + if (found) { 434 + ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; 435 + ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; 436 + ppc_md.msi_check_device = axon_msi_check_device; 437 + 438 + register_reboot_notifier(&axon_msi_reboot_notifier); 439 + 440 + pr_debug("axon_msi: registered callbacks!\n"); 441 + } 442 + 443 + return 0; 444 + } 445 + arch_initcall(axon_msi_init);

+36 -185

arch/powerpc/platforms/cell/cbe_cpufreq.c

··· 1 1 /* 2 2 * cpufreq driver for the cell processor 3 3 * 4 - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 4 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 5 5 * 6 6 * Author: Christian Krafft <krafft@de.ibm.com> 7 7 * ··· 21 21 */ 22 22 23 23 #include <linux/cpufreq.h> 24 - #include <linux/timer.h> 25 - 26 - #include <asm/hw_irq.h> 27 - #include <asm/io.h> 28 24 #include <asm/machdep.h> 29 - #include <asm/processor.h> 30 - #include <asm/prom.h> 31 - #include <asm/time.h> 32 - #include <asm/pmi.h> 33 25 #include <asm/of_platform.h> 34 - 26 + #include <asm/prom.h> 35 27 #include "cbe_regs.h" 28 + #include "cbe_cpufreq.h" 36 29 37 30 static DEFINE_MUTEX(cbe_switch_mutex); 38 31 ··· 43 50 {0, CPUFREQ_TABLE_END}, 44 51 }; 45 52 46 - /* to write to MIC register */ 47 - static u64 MIC_Slow_Fast_Timer_table[] = { 48 - [0 ... 7] = 0x007fc00000000000ull, 49 - }; 50 - 51 - /* more values for the MIC */ 52 - static u64 MIC_Slow_Next_Timer_table[] = { 53 - 0x0000240000000000ull, 54 - 0x0000268000000000ull, 55 - 0x000029C000000000ull, 56 - 0x00002D0000000000ull, 57 - 0x0000300000000000ull, 58 - 0x0000334000000000ull, 59 - 0x000039C000000000ull, 60 - 0x00003FC000000000ull, 61 - }; 62 - 63 - static unsigned int pmi_frequency_limit = 0; 64 53 /* 65 54 * hardware specific functions 66 55 */ 67 56 68 - static struct of_device *pmi_dev; 69 - 70 - #ifdef CONFIG_PPC_PMI 71 - static int set_pmode_pmi(int cpu, unsigned int pmode) 57 + static int set_pmode(unsigned int cpu, unsigned int slow_mode) 72 58 { 73 - int ret; 74 - pmi_message_t pmi_msg; 75 - #ifdef DEBUG 76 - u64 time; 77 - #endif 59 + int rc; 78 60 79 - pmi_msg.type = PMI_TYPE_FREQ_CHANGE; 80 - pmi_msg.data1 = cbe_cpu_to_node(cpu); 81 - pmi_msg.data2 = pmode; 82 - 83 - #ifdef DEBUG 84 - time = (u64) get_cycles(); 85 - #endif 86 - 87 - pmi_send_message(pmi_dev, pmi_msg); 88 - ret = pmi_msg.data2; 89 - 90 - pr_debug("PMI returned slow mode %d\n", ret); 91 - 92 - #ifdef DEBUG 93 - time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */ 94 - time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */ 95 - pr_debug("had to wait %lu ns for a transition\n", time); 96 - #endif 97 - return ret; 98 - } 99 - #endif 100 - 101 - static int get_pmode(int cpu) 102 - { 103 - int ret; 104 - struct cbe_pmd_regs __iomem *pmd_regs; 105 - 106 - pmd_regs = cbe_get_cpu_pmd_regs(cpu); 107 - ret = in_be64(&pmd_regs->pmsr) & 0x07; 108 - 109 - return ret; 110 - } 111 - 112 - static int set_pmode_reg(int cpu, unsigned int pmode) 113 - { 114 - struct cbe_pmd_regs __iomem *pmd_regs; 115 - struct cbe_mic_tm_regs __iomem *mic_tm_regs; 116 - u64 flags; 117 - u64 value; 118 - 119 - local_irq_save(flags); 120 - 121 - mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); 122 - pmd_regs = cbe_get_cpu_pmd_regs(cpu); 123 - 124 - pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); 125 - pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); 126 - 127 - out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); 128 - out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); 129 - 130 - out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); 131 - out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); 132 - 133 - value = in_be64(&pmd_regs->pmcr); 134 - /* set bits to zero */ 135 - value &= 0xFFFFFFFFFFFFFFF8ull; 136 - /* set bits to next pmode */ 137 - value |= pmode; 138 - 139 - out_be64(&pmd_regs->pmcr, value); 140 - 141 - /* wait until new pmode appears in status register */ 142 - value = in_be64(&pmd_regs->pmsr) & 0x07; 143 - while(value != pmode) { 144 - cpu_relax(); 145 - value = in_be64(&pmd_regs->pmsr) & 0x07; 146 - } 147 - 148 - local_irq_restore(flags); 149 - 150 - return 0; 151 - } 152 - 153 - static int set_pmode(int cpu, unsigned int slow_mode) { 154 - #ifdef CONFIG_PPC_PMI 155 - if (pmi_dev) 156 - return set_pmode_pmi(cpu, slow_mode); 61 + if (cbe_cpufreq_has_pmi) 62 + rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode); 157 63 else 158 - #endif 159 - return set_pmode_reg(cpu, slow_mode); 64 + rc = cbe_cpufreq_set_pmode(cpu, slow_mode); 65 + 66 + pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu)); 67 + 68 + return rc; 160 69 } 161 - 162 - static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg) 163 - { 164 - u8 cpu; 165 - u8 cbe_pmode_new; 166 - 167 - BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); 168 - 169 - cpu = cbe_node_to_cpu(pmi_msg.data1); 170 - cbe_pmode_new = pmi_msg.data2; 171 - 172 - pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; 173 - 174 - pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit); 175 - } 176 - 177 - static int pmi_notifier(struct notifier_block *nb, 178 - unsigned long event, void *data) 179 - { 180 - struct cpufreq_policy *policy = data; 181 - 182 - if (event != CPUFREQ_INCOMPATIBLE) 183 - return 0; 184 - 185 - cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit); 186 - return 0; 187 - } 188 - 189 - static struct notifier_block pmi_notifier_block = { 190 - .notifier_call = pmi_notifier, 191 - }; 192 - 193 - static struct pmi_handler cbe_pmi_handler = { 194 - .type = PMI_TYPE_FREQ_CHANGE, 195 - .handle_pmi_message = cbe_cpufreq_handle_pmi, 196 - }; 197 - 198 70 199 71 /* 200 72 * cpufreq functions ··· 79 221 80 222 pr_debug("init cpufreq on CPU %d\n", policy->cpu); 81 223 224 + /* 225 + * Let's check we can actually get to the CELL regs 226 + */ 227 + if (!cbe_get_cpu_pmd_regs(policy->cpu) || 228 + !cbe_get_cpu_mic_tm_regs(policy->cpu)) { 229 + pr_info("invalid CBE regs pointers for cpufreq\n"); 230 + return -EINVAL; 231 + } 232 + 82 233 max_freqp = of_get_property(cpu, "clock-frequency", NULL); 234 + 235 + of_node_put(cpu); 83 236 84 237 if (!max_freqp) 85 238 return -EINVAL; ··· 108 239 } 109 240 110 241 policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 111 - /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ 242 + 243 + /* if DEBUG is enabled set_pmode() measures the latency 244 + * of a transition */ 112 245 policy->cpuinfo.transition_latency = 25000; 113 246 114 - cur_pmode = get_pmode(policy->cpu); 247 + cur_pmode = cbe_cpufreq_get_pmode(policy->cpu); 115 248 pr_debug("current pmode is at %d\n",cur_pmode); 116 249 117 250 policy->cur = cbe_freqs[cur_pmode].frequency; ··· 124 253 125 254 cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); 126 255 127 - if (pmi_dev) { 128 - /* frequency might get limited later, initialize limit with max_freq */ 129 - pmi_frequency_limit = max_freq; 130 - cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); 131 - } 132 - 133 - /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */ 256 + /* this ensures that policy->cpuinfo_min 257 + * and policy->cpuinfo_max are set correctly */ 134 258 return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); 135 259 } 136 260 137 261 static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) 138 262 { 139 - if (pmi_dev) 140 - cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); 141 - 142 263 cpufreq_frequency_table_put_attr(policy->cpu); 143 264 return 0; 144 265 } ··· 140 277 return cpufreq_frequency_table_verify(policy, cbe_freqs); 141 278 } 142 279 143 - 144 - static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, 145 - unsigned int relation) 280 + static int cbe_cpufreq_target(struct cpufreq_policy *policy, 281 + unsigned int target_freq, 282 + unsigned int relation) 146 283 { 147 284 int rc; 148 285 struct cpufreq_freqs freqs; 149 - int cbe_pmode_new; 286 + unsigned int cbe_pmode_new; 150 287 151 288 cpufreq_frequency_table_target(policy, 152 289 cbe_freqs, ··· 161 298 mutex_lock(&cbe_switch_mutex); 162 299 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 163 300 164 - pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", 301 + pr_debug("setting frequency for cpu %d to %d kHz, " \ 302 + "1/%d of max frequency\n", 165 303 policy->cpu, 166 304 cbe_freqs[cbe_pmode_new].frequency, 167 305 cbe_freqs[cbe_pmode_new].index); 168 306 169 307 rc = set_pmode(policy->cpu, cbe_pmode_new); 308 + 170 309 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 171 310 mutex_unlock(&cbe_switch_mutex); 172 311 ··· 191 326 192 327 static int __init cbe_cpufreq_init(void) 193 328 { 194 - #ifdef CONFIG_PPC_PMI 195 - struct device_node *np; 196 - #endif 197 329 if (!machine_is(cell)) 198 330 return -ENODEV; 199 - #ifdef CONFIG_PPC_PMI 200 - np = of_find_node_by_type(NULL, "ibm,pmi"); 201 331 202 - pmi_dev = of_find_device_by_node(np); 203 - 204 - if (pmi_dev) 205 - pmi_register_handler(pmi_dev, &cbe_pmi_handler); 206 - #endif 207 332 return cpufreq_register_driver(&cbe_cpufreq_driver); 208 333 } 209 334 210 335 static void __exit cbe_cpufreq_exit(void) 211 336 { 212 - #ifdef CONFIG_PPC_PMI 213 - if (pmi_dev) 214 - pmi_unregister_handler(pmi_dev, &cbe_pmi_handler); 215 - #endif 216 337 cpufreq_unregister_driver(&cbe_cpufreq_driver); 217 338 } 218 339

+24

arch/powerpc/platforms/cell/cbe_cpufreq.h

··· 1 + /* 2 + * cbe_cpufreq.h 3 + * 4 + * This file contains the definitions used by the cbe_cpufreq driver. 5 + * 6 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 7 + * 8 + * Author: Christian Krafft <krafft@de.ibm.com> 9 + * 10 + */ 11 + 12 + #include <linux/cpufreq.h> 13 + #include <linux/types.h> 14 + 15 + int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode); 16 + int cbe_cpufreq_get_pmode(int cpu); 17 + 18 + int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); 19 + 20 + #if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE) 21 + extern bool cbe_cpufreq_has_pmi; 22 + #else 23 + #define cbe_cpufreq_has_pmi (0) 24 + #endif

+115

arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c

··· 1 + /* 2 + * pervasive backend for the cbe_cpufreq driver 3 + * 4 + * This driver makes use of the pervasive unit to 5 + * engage the desired frequency. 6 + * 7 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 8 + * 9 + * Author: Christian Krafft <krafft@de.ibm.com> 10 + * 11 + * This program is free software; you can redistribute it and/or modify 12 + * it under the terms of the GNU General Public License as published by 13 + * the Free Software Foundation; either version 2, or (at your option) 14 + * any later version. 15 + * 16 + * This program is distributed in the hope that it will be useful, 17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 + * GNU General Public License for more details. 20 + * 21 + * You should have received a copy of the GNU General Public License 22 + * along with this program; if not, write to the Free Software 23 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 24 + */ 25 + 26 + #include <linux/io.h> 27 + #include <linux/kernel.h> 28 + #include <linux/time.h> 29 + #include <asm/machdep.h> 30 + #include <asm/hw_irq.h> 31 + 32 + #include "cbe_regs.h" 33 + #include "cbe_cpufreq.h" 34 + 35 + /* to write to MIC register */ 36 + static u64 MIC_Slow_Fast_Timer_table[] = { 37 + [0 ... 7] = 0x007fc00000000000ull, 38 + }; 39 + 40 + /* more values for the MIC */ 41 + static u64 MIC_Slow_Next_Timer_table[] = { 42 + 0x0000240000000000ull, 43 + 0x0000268000000000ull, 44 + 0x000029C000000000ull, 45 + 0x00002D0000000000ull, 46 + 0x0000300000000000ull, 47 + 0x0000334000000000ull, 48 + 0x000039C000000000ull, 49 + 0x00003FC000000000ull, 50 + }; 51 + 52 + 53 + int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode) 54 + { 55 + struct cbe_pmd_regs __iomem *pmd_regs; 56 + struct cbe_mic_tm_regs __iomem *mic_tm_regs; 57 + u64 flags; 58 + u64 value; 59 + #ifdef DEBUG 60 + long time; 61 + #endif 62 + 63 + local_irq_save(flags); 64 + 65 + mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); 66 + pmd_regs = cbe_get_cpu_pmd_regs(cpu); 67 + 68 + #ifdef DEBUG 69 + time = jiffies; 70 + #endif 71 + 72 + out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); 73 + out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); 74 + 75 + out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); 76 + out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); 77 + 78 + value = in_be64(&pmd_regs->pmcr); 79 + /* set bits to zero */ 80 + value &= 0xFFFFFFFFFFFFFFF8ull; 81 + /* set bits to next pmode */ 82 + value |= pmode; 83 + 84 + out_be64(&pmd_regs->pmcr, value); 85 + 86 + #ifdef DEBUG 87 + /* wait until new pmode appears in status register */ 88 + value = in_be64(&pmd_regs->pmsr) & 0x07; 89 + while (value != pmode) { 90 + cpu_relax(); 91 + value = in_be64(&pmd_regs->pmsr) & 0x07; 92 + } 93 + 94 + time = jiffies - time; 95 + time = jiffies_to_msecs(time); 96 + pr_debug("had to wait %lu ms for a transition using " \ 97 + "pervasive unit\n", time); 98 + #endif 99 + local_irq_restore(flags); 100 + 101 + return 0; 102 + } 103 + 104 + 105 + int cbe_cpufreq_get_pmode(int cpu) 106 + { 107 + int ret; 108 + struct cbe_pmd_regs __iomem *pmd_regs; 109 + 110 + pmd_regs = cbe_get_cpu_pmd_regs(cpu); 111 + ret = in_be64(&pmd_regs->pmsr) & 0x07; 112 + 113 + return ret; 114 + } 115 +

+148

arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c

··· 1 + /* 2 + * pmi backend for the cbe_cpufreq driver 3 + * 4 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 5 + * 6 + * Author: Christian Krafft <krafft@de.ibm.com> 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License as published by 10 + * the Free Software Foundation; either version 2, or (at your option) 11 + * any later version. 12 + * 13 + * This program is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + * You should have received a copy of the GNU General Public License 19 + * along with this program; if not, write to the Free Software 20 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 + */ 22 + 23 + #include <linux/kernel.h> 24 + #include <linux/types.h> 25 + #include <linux/timer.h> 26 + #include <asm/of_platform.h> 27 + #include <asm/processor.h> 28 + #include <asm/prom.h> 29 + #include <asm/pmi.h> 30 + 31 + #ifdef DEBUG 32 + #include <asm/time.h> 33 + #endif 34 + 35 + #include "cbe_regs.h" 36 + #include "cbe_cpufreq.h" 37 + 38 + static u8 pmi_slow_mode_limit[MAX_CBE]; 39 + 40 + bool cbe_cpufreq_has_pmi = false; 41 + EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi); 42 + 43 + /* 44 + * hardware specific functions 45 + */ 46 + 47 + int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode) 48 + { 49 + int ret; 50 + pmi_message_t pmi_msg; 51 + #ifdef DEBUG 52 + long time; 53 + #endif 54 + pmi_msg.type = PMI_TYPE_FREQ_CHANGE; 55 + pmi_msg.data1 = cbe_cpu_to_node(cpu); 56 + pmi_msg.data2 = pmode; 57 + 58 + #ifdef DEBUG 59 + time = jiffies; 60 + #endif 61 + pmi_send_message(pmi_msg); 62 + 63 + #ifdef DEBUG 64 + time = jiffies - time; 65 + time = jiffies_to_msecs(time); 66 + pr_debug("had to wait %lu ms for a transition using " \ 67 + "PMI\n", time); 68 + #endif 69 + ret = pmi_msg.data2; 70 + pr_debug("PMI returned slow mode %d\n", ret); 71 + 72 + return ret; 73 + } 74 + EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); 75 + 76 + 77 + static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) 78 + { 79 + u8 node, slow_mode; 80 + 81 + BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); 82 + 83 + node = pmi_msg.data1; 84 + slow_mode = pmi_msg.data2; 85 + 86 + pmi_slow_mode_limit[node] = slow_mode; 87 + 88 + pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode); 89 + } 90 + 91 + static int pmi_notifier(struct notifier_block *nb, 92 + unsigned long event, void *data) 93 + { 94 + struct cpufreq_policy *policy = data; 95 + struct cpufreq_frequency_table *cbe_freqs; 96 + u8 node; 97 + 98 + cbe_freqs = cpufreq_frequency_get_table(policy->cpu); 99 + node = cbe_cpu_to_node(policy->cpu); 100 + 101 + pr_debug("got notified, event=%lu, node=%u\n", event, node); 102 + 103 + if (pmi_slow_mode_limit[node] != 0) { 104 + pr_debug("limiting node %d to slow mode %d\n", 105 + node, pmi_slow_mode_limit[node]); 106 + 107 + cpufreq_verify_within_limits(policy, 0, 108 + 109 + cbe_freqs[pmi_slow_mode_limit[node]].frequency); 110 + } 111 + 112 + return 0; 113 + } 114 + 115 + static struct notifier_block pmi_notifier_block = { 116 + .notifier_call = pmi_notifier, 117 + }; 118 + 119 + static struct pmi_handler cbe_pmi_handler = { 120 + .type = PMI_TYPE_FREQ_CHANGE, 121 + .handle_pmi_message = cbe_cpufreq_handle_pmi, 122 + }; 123 + 124 + 125 + 126 + static int __init cbe_cpufreq_pmi_init(void) 127 + { 128 + cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0; 129 + 130 + if (!cbe_cpufreq_has_pmi) 131 + return -ENODEV; 132 + 133 + cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); 134 + 135 + return 0; 136 + } 137 + 138 + static void __exit cbe_cpufreq_pmi_exit(void) 139 + { 140 + cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); 141 + pmi_unregister_handler(&cbe_pmi_handler); 142 + } 143 + 144 + module_init(cbe_cpufreq_pmi_init); 145 + module_exit(cbe_cpufreq_pmi_exit); 146 + 147 + MODULE_LICENSE("GPL"); 148 + MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");

+7

arch/powerpc/platforms/cell/cbe_regs.c

··· 174 174 175 175 cpu_handle = of_get_property(np, "cpus", &len); 176 176 177 + /* 178 + * the CAB SLOF tree is non compliant, so we just assume 179 + * there is only one node 180 + */ 181 + if (WARN_ON_ONCE(!cpu_handle)) 182 + return np; 183 + 177 184 for (i=0; i<len; i++) 178 185 if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) 179 186 return np;

+20 -5

arch/powerpc/platforms/cell/cbe_thermal.c

··· 292 292 /* 293 293 * initialize throttling with default values 294 294 */ 295 - static void __init init_default_values(void) 295 + static int __init init_default_values(void) 296 296 { 297 297 int cpu; 298 298 struct cbe_pmd_regs __iomem *pmd_regs; ··· 339 339 for_each_possible_cpu (cpu) { 340 340 pr_debug("processing cpu %d\n", cpu); 341 341 sysdev = get_cpu_sysdev(cpu); 342 + 343 + if (!sysdev) { 344 + pr_info("invalid sysdev pointer for cbe_thermal\n"); 345 + return -EINVAL; 346 + } 347 + 342 348 pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); 349 + 350 + if (!pmd_regs) { 351 + pr_info("invalid CBE regs pointer for cbe_thermal\n"); 352 + return -EINVAL; 353 + } 343 354 344 355 out_be64(&pmd_regs->tm_str2, str2); 345 356 out_be64(&pmd_regs->tm_str1.val, str1.val); ··· 358 347 out_be64(&pmd_regs->tm_cr1.val, cr1.val); 359 348 out_be64(&pmd_regs->tm_cr2, cr2); 360 349 } 350 + 351 + return 0; 361 352 } 362 353 363 354 364 355 static int __init thermal_init(void) 365 356 { 366 - init_default_values(); 357 + int rc = init_default_values(); 367 358 368 - spu_add_sysdev_attr_group(&spu_attribute_group); 369 - cpu_add_sysdev_attr_group(&ppe_attribute_group); 359 + if (rc == 0) { 360 + spu_add_sysdev_attr_group(&spu_attribute_group); 361 + cpu_add_sysdev_attr_group(&ppe_attribute_group); 362 + } 370 363 371 - return 0; 364 + return rc; 372 365 } 373 366 module_init(thermal_init); 374 367

+219 -80

arch/powerpc/platforms/cell/spu_base.c

··· 35 35 #include <asm/spu.h> 36 36 #include <asm/spu_priv1.h> 37 37 #include <asm/xmon.h> 38 + #include <asm/prom.h> 39 + #include "spu_priv1_mmio.h" 38 40 39 41 const struct spu_management_ops *spu_management_ops; 40 42 EXPORT_SYMBOL_GPL(spu_management_ops); 41 43 42 44 const struct spu_priv1_ops *spu_priv1_ops; 43 - 44 - static struct list_head spu_list[MAX_NUMNODES]; 45 - static LIST_HEAD(spu_full_list); 46 - static DEFINE_MUTEX(spu_mutex); 47 - static DEFINE_SPINLOCK(spu_list_lock); 48 - 49 45 EXPORT_SYMBOL_GPL(spu_priv1_ops); 46 + 47 + struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; 48 + EXPORT_SYMBOL_GPL(cbe_spu_info); 49 + 50 + /* 51 + * Protects cbe_spu_info and spu->number. 52 + */ 53 + static DEFINE_SPINLOCK(spu_lock); 54 + 55 + /* 56 + * List of all spus in the system. 57 + * 58 + * This list is iterated by callers from irq context and callers that 59 + * want to sleep. Thus modifications need to be done with both 60 + * spu_full_list_lock and spu_full_list_mutex held, while iterating 61 + * through it requires either of these locks. 62 + * 63 + * In addition spu_full_list_lock protects all assignmens to 64 + * spu->mm. 65 + */ 66 + static LIST_HEAD(spu_full_list); 67 + static DEFINE_SPINLOCK(spu_full_list_lock); 68 + static DEFINE_MUTEX(spu_full_list_mutex); 50 69 51 70 void spu_invalidate_slbs(struct spu *spu) 52 71 { ··· 84 65 struct spu *spu; 85 66 unsigned long flags; 86 67 87 - spin_lock_irqsave(&spu_list_lock, flags); 68 + spin_lock_irqsave(&spu_full_list_lock, flags); 88 69 list_for_each_entry(spu, &spu_full_list, full_list) { 89 70 if (spu->mm == mm) 90 71 spu_invalidate_slbs(spu); 91 72 } 92 - spin_unlock_irqrestore(&spu_list_lock, flags); 73 + spin_unlock_irqrestore(&spu_full_list_lock, flags); 93 74 } 94 75 95 76 /* The hack below stinks... try to do something better one of ··· 107 88 { 108 89 unsigned long flags; 109 90 110 - spin_lock_irqsave(&spu_list_lock, flags); 91 + spin_lock_irqsave(&spu_full_list_lock, flags); 111 92 spu->mm = mm; 112 - spin_unlock_irqrestore(&spu_list_lock, flags); 93 + spin_unlock_irqrestore(&spu_full_list_lock, flags); 113 94 if (mm) 114 95 mm_needs_global_tlbie(mm); 115 96 } ··· 409 390 free_irq(spu->irqs[2], spu); 410 391 } 411 392 412 - static void spu_init_channels(struct spu *spu) 393 + void spu_init_channels(struct spu *spu) 413 394 { 414 395 static const struct { 415 396 unsigned channel; ··· 442 423 out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); 443 424 } 444 425 } 445 - 446 - struct spu *spu_alloc_node(int node) 447 - { 448 - struct spu *spu = NULL; 449 - 450 - mutex_lock(&spu_mutex); 451 - if (!list_empty(&spu_list[node])) { 452 - spu = list_entry(spu_list[node].next, struct spu, list); 453 - list_del_init(&spu->list); 454 - pr_debug("Got SPU %d %d\n", spu->number, spu->node); 455 - } 456 - mutex_unlock(&spu_mutex); 457 - 458 - if (spu) 459 - spu_init_channels(spu); 460 - return spu; 461 - } 462 - EXPORT_SYMBOL_GPL(spu_alloc_node); 463 - 464 - struct spu *spu_alloc(void) 465 - { 466 - struct spu *spu = NULL; 467 - int node; 468 - 469 - for (node = 0; node < MAX_NUMNODES; node++) { 470 - spu = spu_alloc_node(node); 471 - if (spu) 472 - break; 473 - } 474 - 475 - return spu; 476 - } 477 - 478 - void spu_free(struct spu *spu) 479 - { 480 - mutex_lock(&spu_mutex); 481 - list_add_tail(&spu->list, &spu_list[spu->node]); 482 - mutex_unlock(&spu_mutex); 483 - } 484 - EXPORT_SYMBOL_GPL(spu_free); 426 + EXPORT_SYMBOL_GPL(spu_init_channels); 485 427 486 428 static int spu_shutdown(struct sys_device *sysdev) 487 429 { ··· 461 481 int spu_add_sysdev_attr(struct sysdev_attribute *attr) 462 482 { 463 483 struct spu *spu; 464 - mutex_lock(&spu_mutex); 465 484 485 + mutex_lock(&spu_full_list_mutex); 466 486 list_for_each_entry(spu, &spu_full_list, full_list) 467 487 sysdev_create_file(&spu->sysdev, attr); 488 + mutex_unlock(&spu_full_list_mutex); 468 489 469 - mutex_unlock(&spu_mutex); 470 490 return 0; 471 491 } 472 492 EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); ··· 474 494 int spu_add_sysdev_attr_group(struct attribute_group *attrs) 475 495 { 476 496 struct spu *spu; 477 - mutex_lock(&spu_mutex); 478 497 498 + mutex_lock(&spu_full_list_mutex); 479 499 list_for_each_entry(spu, &spu_full_list, full_list) 480 500 sysfs_create_group(&spu->sysdev.kobj, attrs); 501 + mutex_unlock(&spu_full_list_mutex); 481 502 482 - mutex_unlock(&spu_mutex); 483 503 return 0; 484 504 } 485 505 EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); ··· 488 508 void spu_remove_sysdev_attr(struct sysdev_attribute *attr) 489 509 { 490 510 struct spu *spu; 491 - mutex_lock(&spu_mutex); 492 511 512 + mutex_lock(&spu_full_list_mutex); 493 513 list_for_each_entry(spu, &spu_full_list, full_list) 494 514 sysdev_remove_file(&spu->sysdev, attr); 495 - 496 - mutex_unlock(&spu_mutex); 515 + mutex_unlock(&spu_full_list_mutex); 497 516 } 498 517 EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); 499 518 500 519 void spu_remove_sysdev_attr_group(struct attribute_group *attrs) 501 520 { 502 521 struct spu *spu; 503 - mutex_lock(&spu_mutex); 504 522 523 + mutex_lock(&spu_full_list_mutex); 505 524 list_for_each_entry(spu, &spu_full_list, full_list) 506 525 sysfs_remove_group(&spu->sysdev.kobj, attrs); 507 - 508 - mutex_unlock(&spu_mutex); 526 + mutex_unlock(&spu_full_list_mutex); 509 527 } 510 528 EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); 511 529 ··· 531 553 int ret; 532 554 static int number; 533 555 unsigned long flags; 556 + struct timespec ts; 534 557 535 558 ret = -ENOMEM; 536 559 spu = kzalloc(sizeof (*spu), GFP_KERNEL); 537 560 if (!spu) 538 561 goto out; 539 562 563 + spu->alloc_state = SPU_FREE; 564 + 540 565 spin_lock_init(&spu->register_lock); 541 - mutex_lock(&spu_mutex); 566 + spin_lock(&spu_lock); 542 567 spu->number = number++; 543 - mutex_unlock(&spu_mutex); 568 + spin_unlock(&spu_lock); 544 569 545 570 ret = spu_create_spu(spu, data); 546 571 ··· 560 579 if (ret) 561 580 goto out_free_irqs; 562 581 563 - mutex_lock(&spu_mutex); 564 - spin_lock_irqsave(&spu_list_lock, flags); 565 - list_add(&spu->list, &spu_list[spu->node]); 566 - list_add(&spu->full_list, &spu_full_list); 567 - spin_unlock_irqrestore(&spu_list_lock, flags); 568 - mutex_unlock(&spu_mutex); 582 + mutex_lock(&cbe_spu_info[spu->node].list_mutex); 583 + list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); 584 + cbe_spu_info[spu->node].n_spus++; 585 + mutex_unlock(&cbe_spu_info[spu->node].list_mutex); 569 586 570 - spu->stats.utilization_state = SPU_UTIL_IDLE; 571 - spu->stats.tstamp = jiffies; 587 + mutex_lock(&spu_full_list_mutex); 588 + spin_lock_irqsave(&spu_full_list_lock, flags); 589 + list_add(&spu->full_list, &spu_full_list); 590 + spin_unlock_irqrestore(&spu_full_list_lock, flags); 591 + mutex_unlock(&spu_full_list_mutex); 592 + 593 + spu->stats.util_state = SPU_UTIL_IDLE_LOADED; 594 + ktime_get_ts(&ts); 595 + spu->stats.tstamp = timespec_to_ns(&ts); 596 + 597 + INIT_LIST_HEAD(&spu->aff_list); 572 598 573 599 goto out; 574 600 ··· 596 608 static unsigned long long spu_acct_time(struct spu *spu, 597 609 enum spu_utilization_state state) 598 610 { 611 + struct timespec ts; 599 612 unsigned long long time = spu->stats.times[state]; 600 613 601 - if (spu->stats.utilization_state == state) 602 - time += jiffies - spu->stats.tstamp; 614 + /* 615 + * If the spu is idle or the context is stopped, utilization 616 + * statistics are not updated. Apply the time delta from the 617 + * last recorded state of the spu. 618 + */ 619 + if (spu->stats.util_state == state) { 620 + ktime_get_ts(&ts); 621 + time += timespec_to_ns(&ts) - spu->stats.tstamp; 622 + } 603 623 604 - return jiffies_to_msecs(time); 624 + return time / NSEC_PER_MSEC; 605 625 } 606 626 607 627 ··· 619 623 620 624 return sprintf(buf, "%s %llu %llu %llu %llu " 621 625 "%llu %llu %llu %llu %llu %llu %llu %llu\n", 622 - spu_state_names[spu->stats.utilization_state], 626 + spu_state_names[spu->stats.util_state], 623 627 spu_acct_time(spu, SPU_UTIL_USER), 624 628 spu_acct_time(spu, SPU_UTIL_SYSTEM), 625 629 spu_acct_time(spu, SPU_UTIL_IOWAIT), 626 - spu_acct_time(spu, SPU_UTIL_IDLE), 630 + spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), 627 631 spu->stats.vol_ctx_switch, 628 632 spu->stats.invol_ctx_switch, 629 633 spu->stats.slb_flt, ··· 636 640 637 641 static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); 638 642 643 + /* Hardcoded affinity idxs for QS20 */ 644 + #define SPES_PER_BE 8 645 + static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; 646 + static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; 647 + 648 + static struct spu *spu_lookup_reg(int node, u32 reg) 649 + { 650 + struct spu *spu; 651 + 652 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 653 + if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg) 654 + return spu; 655 + } 656 + return NULL; 657 + } 658 + 659 + static void init_aff_QS20_harcoded(void) 660 + { 661 + int node, i; 662 + struct spu *last_spu, *spu; 663 + u32 reg; 664 + 665 + for (node = 0; node < MAX_NUMNODES; node++) { 666 + last_spu = NULL; 667 + for (i = 0; i < SPES_PER_BE; i++) { 668 + reg = QS20_reg_idxs[i]; 669 + spu = spu_lookup_reg(node, reg); 670 + if (!spu) 671 + continue; 672 + spu->has_mem_affinity = QS20_reg_memory[reg]; 673 + if (last_spu) 674 + list_add_tail(&spu->aff_list, 675 + &last_spu->aff_list); 676 + last_spu = spu; 677 + } 678 + } 679 + } 680 + 681 + static int of_has_vicinity(void) 682 + { 683 + struct spu* spu; 684 + 685 + spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list); 686 + return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; 687 + } 688 + 689 + static struct spu *aff_devnode_spu(int cbe, struct device_node *dn) 690 + { 691 + struct spu *spu; 692 + 693 + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) 694 + if (spu_devnode(spu) == dn) 695 + return spu; 696 + return NULL; 697 + } 698 + 699 + static struct spu * 700 + aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid) 701 + { 702 + struct spu *spu; 703 + const phandle *vic_handles; 704 + int lenp, i; 705 + 706 + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { 707 + if (spu_devnode(spu) == avoid) 708 + continue; 709 + vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp); 710 + for (i=0; i < (lenp / sizeof(phandle)); i++) { 711 + if (vic_handles[i] == target->linux_phandle) 712 + return spu; 713 + } 714 + } 715 + return NULL; 716 + } 717 + 718 + static void init_aff_fw_vicinity_node(int cbe) 719 + { 720 + struct spu *spu, *last_spu; 721 + struct device_node *vic_dn, *last_spu_dn; 722 + phandle avoid_ph; 723 + const phandle *vic_handles; 724 + const char *name; 725 + int lenp, i, added, mem_aff; 726 + 727 + last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list); 728 + avoid_ph = 0; 729 + for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { 730 + last_spu_dn = spu_devnode(last_spu); 731 + vic_handles = get_property(last_spu_dn, "vicinity", &lenp); 732 + 733 + for (i = 0; i < (lenp / sizeof(phandle)); i++) { 734 + if (vic_handles[i] == avoid_ph) 735 + continue; 736 + 737 + vic_dn = of_find_node_by_phandle(vic_handles[i]); 738 + if (!vic_dn) 739 + continue; 740 + 741 + name = get_property(vic_dn, "name", NULL); 742 + if (strcmp(name, "spe") == 0) { 743 + spu = aff_devnode_spu(cbe, vic_dn); 744 + avoid_ph = last_spu_dn->linux_phandle; 745 + } 746 + else { 747 + mem_aff = strcmp(name, "mic-tm") == 0; 748 + spu = aff_node_next_to(cbe, vic_dn, last_spu_dn); 749 + if (!spu) 750 + continue; 751 + if (mem_aff) { 752 + last_spu->has_mem_affinity = 1; 753 + spu->has_mem_affinity = 1; 754 + } 755 + avoid_ph = vic_dn->linux_phandle; 756 + } 757 + list_add_tail(&spu->aff_list, &last_spu->aff_list); 758 + last_spu = spu; 759 + break; 760 + } 761 + } 762 + } 763 + 764 + static void init_aff_fw_vicinity(void) 765 + { 766 + int cbe; 767 + 768 + /* sets has_mem_affinity for each spu, as long as the 769 + * spu->aff_list list, linking each spu to its neighbors 770 + */ 771 + for (cbe = 0; cbe < MAX_NUMNODES; cbe++) 772 + init_aff_fw_vicinity_node(cbe); 773 + } 774 + 639 775 static int __init init_spu_base(void) 640 776 { 641 777 int i, ret = 0; 642 778 643 - for (i = 0; i < MAX_NUMNODES; i++) 644 - INIT_LIST_HEAD(&spu_list[i]); 779 + for (i = 0; i < MAX_NUMNODES; i++) { 780 + mutex_init(&cbe_spu_info[i].list_mutex); 781 + INIT_LIST_HEAD(&cbe_spu_info[i].spus); 782 + } 645 783 646 784 if (!spu_management_ops) 647 785 goto out; ··· 805 675 fb_append_extra_logo(&logo_spe_clut224, ret); 806 676 } 807 677 678 + mutex_lock(&spu_full_list_mutex); 808 679 xmon_register_spus(&spu_full_list); 809 - 680 + crash_register_spus(&spu_full_list); 681 + mutex_unlock(&spu_full_list_mutex); 810 682 spu_add_sysdev_attr(&attr_stat); 683 + 684 + if (of_has_vicinity()) { 685 + init_aff_fw_vicinity(); 686 + } else { 687 + long root = of_get_flat_dt_root(); 688 + if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) 689 + init_aff_QS20_harcoded(); 690 + } 811 691 812 692 return 0; 813 693 814 694 out_unregister_sysdev_class: 815 695 sysdev_class_unregister(&spu_sysdev_class); 816 696 out: 817 - 818 697 return ret; 819 698 } 820 699 module_init(init_spu_base);

+15 -2

arch/powerpc/platforms/cell/spu_syscalls.c

··· 34 34 * this file is not used and the syscalls directly enter the fs code */ 35 35 36 36 asmlinkage long sys_spu_create(const char __user *name, 37 - unsigned int flags, mode_t mode) 37 + unsigned int flags, mode_t mode, int neighbor_fd) 38 38 { 39 39 long ret; 40 40 struct module *owner = spufs_calls.owner; 41 + struct file *neighbor; 42 + int fput_needed; 41 43 42 44 ret = -ENOSYS; 43 45 if (owner && try_module_get(owner)) { 44 - ret = spufs_calls.create_thread(name, flags, mode); 46 + if (flags & SPU_CREATE_AFFINITY_SPU) { 47 + neighbor = fget_light(neighbor_fd, &fput_needed); 48 + if (neighbor) { 49 + ret = spufs_calls.create_thread(name, flags, 50 + mode, neighbor); 51 + fput_light(neighbor, fput_needed); 52 + } 53 + } 54 + else { 55 + ret = spufs_calls.create_thread(name, flags, 56 + mode, NULL); 57 + } 45 58 module_put(owner); 46 59 } 47 60 return ret;

+39 -3

arch/powerpc/platforms/cell/spufs/context.c

··· 22 22 23 23 #include <linux/fs.h> 24 24 #include <linux/mm.h> 25 + #include <linux/module.h> 25 26 #include <linux/slab.h> 26 27 #include <asm/atomic.h> 27 28 #include <asm/spu.h> ··· 56 55 ctx->ops = &spu_backing_ops; 57 56 ctx->owner = get_task_mm(current); 58 57 INIT_LIST_HEAD(&ctx->rq); 58 + INIT_LIST_HEAD(&ctx->aff_list); 59 59 if (gang) 60 60 spu_gang_add_ctx(gang, ctx); 61 61 ctx->cpus_allowed = current->cpus_allowed; 62 62 spu_set_timeslice(ctx); 63 - ctx->stats.execution_state = SPUCTX_UTIL_USER; 64 - ctx->stats.tstamp = jiffies; 63 + ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; 65 64 66 65 atomic_inc(&nr_spu_contexts); 67 66 goto out; ··· 82 81 spu_fini_csa(&ctx->csa); 83 82 if (ctx->gang) 84 83 spu_gang_remove_ctx(ctx->gang, ctx); 84 + if (ctx->prof_priv_kref) 85 + kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); 85 86 BUG_ON(!list_empty(&ctx->rq)); 86 87 atomic_dec(&nr_spu_contexts); 87 88 kfree(ctx); ··· 169 166 void spu_acquire_saved(struct spu_context *ctx) 170 167 { 171 168 spu_acquire(ctx); 172 - if (ctx->state != SPU_STATE_SAVED) 169 + if (ctx->state != SPU_STATE_SAVED) { 170 + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); 173 171 spu_deactivate(ctx); 172 + } 174 173 } 174 + 175 + /** 176 + * spu_release_saved - unlock spu context and return it to the runqueue 177 + * @ctx: context to unlock 178 + */ 179 + void spu_release_saved(struct spu_context *ctx) 180 + { 181 + BUG_ON(ctx->state != SPU_STATE_SAVED); 182 + 183 + if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags)) 184 + spu_activate(ctx, 0); 185 + 186 + spu_release(ctx); 187 + } 188 + 189 + void spu_set_profile_private_kref(struct spu_context *ctx, 190 + struct kref *prof_info_kref, 191 + void ( * prof_info_release) (struct kref *kref)) 192 + { 193 + ctx->prof_priv_kref = prof_info_kref; 194 + ctx->prof_priv_release = prof_info_release; 195 + } 196 + EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); 197 + 198 + void *spu_get_profile_private_kref(struct spu_context *ctx) 199 + { 200 + return ctx->prof_priv_kref; 201 + } 202 + EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); 203 + 204 +

+1 -1

arch/powerpc/platforms/cell/spufs/coredump.c

··· 226 226 spu_acquire_saved(ctx_info->ctx); 227 227 for (j = 0; j < spufs_coredump_num_notes; j++) 228 228 spufs_arch_write_note(ctx_info, j, file); 229 - spu_release(ctx_info->ctx); 229 + spu_release_saved(ctx_info->ctx); 230 230 list_del(&ctx_info->list); 231 231 kfree(ctx_info); 232 232 }

+3 -5

arch/powerpc/platforms/cell/spufs/fault.c

··· 179 179 if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) 180 180 return 0; 181 181 182 - spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); 182 + spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); 183 183 184 184 pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, 185 185 dsisr, ctx->state); 186 186 187 187 ctx->stats.hash_flt++; 188 - if (ctx->state == SPU_STATE_RUNNABLE) { 188 + if (ctx->state == SPU_STATE_RUNNABLE) 189 189 ctx->spu->stats.hash_flt++; 190 - spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT); 191 - } 192 190 193 191 /* we must not hold the lock when entering spu_handle_mm_fault */ 194 192 spu_release(ctx); ··· 224 226 } else 225 227 spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); 226 228 227 - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); 229 + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); 228 230 return ret; 229 231 } 230 232 EXPORT_SYMBOL_GPL(spufs_handle_class1);

+67 -37

arch/powerpc/platforms/cell/spufs/file.c

··· 370 370 371 371 spu_acquire_saved(ctx); 372 372 ret = __spufs_regs_read(ctx, buffer, size, pos); 373 - spu_release(ctx); 373 + spu_release_saved(ctx); 374 374 return ret; 375 375 } 376 376 ··· 392 392 ret = copy_from_user(lscsa->gprs + *pos - size, 393 393 buffer, size) ? -EFAULT : size; 394 394 395 - spu_release(ctx); 395 + spu_release_saved(ctx); 396 396 return ret; 397 397 } 398 398 ··· 421 421 422 422 spu_acquire_saved(ctx); 423 423 ret = __spufs_fpcr_read(ctx, buffer, size, pos); 424 - spu_release(ctx); 424 + spu_release_saved(ctx); 425 425 return ret; 426 426 } 427 427 ··· 443 443 ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, 444 444 buffer, size) ? -EFAULT : size; 445 445 446 - spu_release(ctx); 446 + spu_release_saved(ctx); 447 447 return ret; 448 448 } 449 449 ··· 868 868 869 869 spu_acquire_saved(ctx); 870 870 ret = __spufs_signal1_read(ctx, buf, len, pos); 871 - spu_release(ctx); 871 + spu_release_saved(ctx); 872 872 873 873 return ret; 874 874 } ··· 934 934 .mmap = spufs_signal1_mmap, 935 935 }; 936 936 937 + static const struct file_operations spufs_signal1_nosched_fops = { 938 + .open = spufs_signal1_open, 939 + .release = spufs_signal1_release, 940 + .write = spufs_signal1_write, 941 + .mmap = spufs_signal1_mmap, 942 + }; 943 + 937 944 static int spufs_signal2_open(struct inode *inode, struct file *file) 938 945 { 939 946 struct spufs_inode_info *i = SPUFS_I(inode); ··· 999 992 1000 993 spu_acquire_saved(ctx); 1001 994 ret = __spufs_signal2_read(ctx, buf, len, pos); 1002 - spu_release(ctx); 995 + spu_release_saved(ctx); 1003 996 1004 997 return ret; 1005 998 } ··· 1065 1058 .open = spufs_signal2_open, 1066 1059 .release = spufs_signal2_release, 1067 1060 .read = spufs_signal2_read, 1061 + .write = spufs_signal2_write, 1062 + .mmap = spufs_signal2_mmap, 1063 + }; 1064 + 1065 + static const struct file_operations spufs_signal2_nosched_fops = { 1066 + .open = spufs_signal2_open, 1067 + .release = spufs_signal2_release, 1068 1068 .write = spufs_signal2_write, 1069 1069 .mmap = spufs_signal2_mmap, 1070 1070 }; ··· 1626 1612 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1627 1613 spu_acquire_saved(ctx); 1628 1614 lscsa->decr.slot[0] = (u32) val; 1629 - spu_release(ctx); 1615 + spu_release_saved(ctx); 1630 1616 } 1631 1617 1632 1618 static u64 __spufs_decr_get(void *data) ··· 1642 1628 u64 ret; 1643 1629 spu_acquire_saved(ctx); 1644 1630 ret = __spufs_decr_get(data); 1645 - spu_release(ctx); 1631 + spu_release_saved(ctx); 1646 1632 return ret; 1647 1633 } 1648 1634 DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, ··· 1651 1637 static void spufs_decr_status_set(void *data, u64 val) 1652 1638 { 1653 1639 struct spu_context *ctx = data; 1654 - struct spu_lscsa *lscsa = ctx->csa.lscsa; 1655 1640 spu_acquire_saved(ctx); 1656 - lscsa->decr_status.slot[0] = (u32) val; 1657 - spu_release(ctx); 1641 + if (val) 1642 + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; 1643 + else 1644 + ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; 1645 + spu_release_saved(ctx); 1658 1646 } 1659 1647 1660 1648 static u64 __spufs_decr_status_get(void *data) 1661 1649 { 1662 1650 struct spu_context *ctx = data; 1663 - struct spu_lscsa *lscsa = ctx->csa.lscsa; 1664 - return lscsa->decr_status.slot[0]; 1651 + if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) 1652 + return SPU_DECR_STATUS_RUNNING; 1653 + else 1654 + return 0; 1665 1655 } 1666 1656 1667 1657 static u64 spufs_decr_status_get(void *data) ··· 1674 1656 u64 ret; 1675 1657 spu_acquire_saved(ctx); 1676 1658 ret = __spufs_decr_status_get(data); 1677 - spu_release(ctx); 1659 + spu_release_saved(ctx); 1678 1660 return ret; 1679 1661 } 1680 1662 DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, ··· 1686 1668 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1687 1669 spu_acquire_saved(ctx); 1688 1670 lscsa->event_mask.slot[0] = (u32) val; 1689 - spu_release(ctx); 1671 + spu_release_saved(ctx); 1690 1672 } 1691 1673 1692 1674 static u64 __spufs_event_mask_get(void *data) ··· 1702 1684 u64 ret; 1703 1685 spu_acquire_saved(ctx); 1704 1686 ret = __spufs_event_mask_get(data); 1705 - spu_release(ctx); 1687 + spu_release_saved(ctx); 1706 1688 return ret; 1707 1689 } 1708 1690 DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, ··· 1726 1708 1727 1709 spu_acquire_saved(ctx); 1728 1710 ret = __spufs_event_status_get(data); 1729 - spu_release(ctx); 1711 + spu_release_saved(ctx); 1730 1712 return ret; 1731 1713 } 1732 1714 DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, ··· 1738 1720 struct spu_lscsa *lscsa = ctx->csa.lscsa; 1739 1721 spu_acquire_saved(ctx); 1740 1722 lscsa->srr0.slot[0] = (u32) val; 1741 - spu_release(ctx); 1723 + spu_release_saved(ctx); 1742 1724 } 1743 1725 1744 1726 static u64 spufs_srr0_get(void *data) ··· 1748 1730 u64 ret; 1749 1731 spu_acquire_saved(ctx); 1750 1732 ret = lscsa->srr0.slot[0]; 1751 - spu_release(ctx); 1733 + spu_release_saved(ctx); 1752 1734 return ret; 1753 1735 } 1754 1736 DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, ··· 1804 1786 1805 1787 spu_acquire_saved(ctx); 1806 1788 ret = __spufs_lslr_get(data); 1807 - spu_release(ctx); 1789 + spu_release_saved(ctx); 1808 1790 1809 1791 return ret; 1810 1792 } ··· 1868 1850 spin_lock(&ctx->csa.register_lock); 1869 1851 ret = __spufs_mbox_info_read(ctx, buf, len, pos); 1870 1852 spin_unlock(&ctx->csa.register_lock); 1871 - spu_release(ctx); 1853 + spu_release_saved(ctx); 1872 1854 1873 1855 return ret; 1874 1856 } ··· 1906 1888 spin_lock(&ctx->csa.register_lock); 1907 1889 ret = __spufs_ibox_info_read(ctx, buf, len, pos); 1908 1890 spin_unlock(&ctx->csa.register_lock); 1909 - spu_release(ctx); 1891 + spu_release_saved(ctx); 1910 1892 1911 1893 return ret; 1912 1894 } ··· 1947 1929 spin_lock(&ctx->csa.register_lock); 1948 1930 ret = __spufs_wbox_info_read(ctx, buf, len, pos); 1949 1931 spin_unlock(&ctx->csa.register_lock); 1950 - spu_release(ctx); 1932 + spu_release_saved(ctx); 1951 1933 1952 1934 return ret; 1953 1935 } ··· 1997 1979 spin_lock(&ctx->csa.register_lock); 1998 1980 ret = __spufs_dma_info_read(ctx, buf, len, pos); 1999 1981 spin_unlock(&ctx->csa.register_lock); 2000 - spu_release(ctx); 1982 + spu_release_saved(ctx); 2001 1983 2002 1984 return ret; 2003 1985 } ··· 2048 2030 spin_lock(&ctx->csa.register_lock); 2049 2031 ret = __spufs_proxydma_info_read(ctx, buf, len, pos); 2050 2032 spin_unlock(&ctx->csa.register_lock); 2051 - spu_release(ctx); 2033 + spu_release_saved(ctx); 2052 2034 2053 2035 return ret; 2054 2036 } ··· 2083 2065 }; 2084 2066 2085 2067 static unsigned long long spufs_acct_time(struct spu_context *ctx, 2086 - enum spuctx_execution_state state) 2068 + enum spu_utilization_state state) 2087 2069 { 2088 - unsigned long time = ctx->stats.times[state]; 2070 + struct timespec ts; 2071 + unsigned long long time = ctx->stats.times[state]; 2089 2072 2090 - if (ctx->stats.execution_state == state) 2091 - time += jiffies - ctx->stats.tstamp; 2073 + /* 2074 + * In general, utilization statistics are updated by the controlling 2075 + * thread as the spu context moves through various well defined 2076 + * state transitions, but if the context is lazily loaded its 2077 + * utilization statistics are not updated as the controlling thread 2078 + * is not tightly coupled with the execution of the spu context. We 2079 + * calculate and apply the time delta from the last recorded state 2080 + * of the spu context. 2081 + */ 2082 + if (ctx->spu && ctx->stats.util_state == state) { 2083 + ktime_get_ts(&ts); 2084 + time += timespec_to_ns(&ts) - ctx->stats.tstamp; 2085 + } 2092 2086 2093 - return jiffies_to_msecs(time); 2087 + return time / NSEC_PER_MSEC; 2094 2088 } 2095 2089 2096 2090 static unsigned long long spufs_slb_flts(struct spu_context *ctx) ··· 2137 2107 spu_acquire(ctx); 2138 2108 seq_printf(s, "%s %llu %llu %llu %llu " 2139 2109 "%llu %llu %llu %llu %llu %llu %llu %llu\n", 2140 - ctx_state_names[ctx->stats.execution_state], 2141 - spufs_acct_time(ctx, SPUCTX_UTIL_USER), 2142 - spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), 2143 - spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), 2144 - spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), 2110 + ctx_state_names[ctx->stats.util_state], 2111 + spufs_acct_time(ctx, SPU_UTIL_USER), 2112 + spufs_acct_time(ctx, SPU_UTIL_SYSTEM), 2113 + spufs_acct_time(ctx, SPU_UTIL_IOWAIT), 2114 + spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), 2145 2115 ctx->stats.vol_ctx_switch, 2146 2116 ctx->stats.invol_ctx_switch, 2147 2117 spufs_slb_flts(ctx), ··· 2214 2184 { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, 2215 2185 { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, 2216 2186 { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, 2217 - { "signal1", &spufs_signal1_fops, 0666, }, 2218 - { "signal2", &spufs_signal2_fops, 0666, }, 2187 + { "signal1", &spufs_signal1_nosched_fops, 0222, }, 2188 + { "signal2", &spufs_signal2_nosched_fops, 0222, }, 2219 2189 { "signal1_type", &spufs_signal1_type, 0666, }, 2220 2190 { "signal2_type", &spufs_signal2_type, 0666, }, 2221 2191 { "mss", &spufs_mss_fops, 0666, },

+6

arch/powerpc/platforms/cell/spufs/gang.c

··· 35 35 36 36 kref_init(&gang->kref); 37 37 mutex_init(&gang->mutex); 38 + mutex_init(&gang->aff_mutex); 38 39 INIT_LIST_HEAD(&gang->list); 40 + INIT_LIST_HEAD(&gang->aff_list_head); 39 41 40 42 out: 41 43 return gang; ··· 75 73 { 76 74 mutex_lock(&gang->mutex); 77 75 WARN_ON(ctx->gang != gang); 76 + if (!list_empty(&ctx->aff_list)) { 77 + list_del_init(&ctx->aff_list); 78 + gang->aff_flags &= ~AFF_OFFSETS_SET; 79 + } 78 80 list_del_init(&ctx->gang_list); 79 81 gang->contexts--; 80 82 mutex_unlock(&gang->mutex);

+126 -6

arch/powerpc/platforms/cell/spufs/inode.c

··· 316 316 return ret; 317 317 } 318 318 319 - static int spufs_create_context(struct inode *inode, 320 - struct dentry *dentry, 321 - struct vfsmount *mnt, int flags, int mode) 319 + static struct spu_context * 320 + spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, 321 + struct file *filp) 322 + { 323 + struct spu_context *tmp, *neighbor; 324 + int count, node; 325 + int aff_supp; 326 + 327 + aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, 328 + struct spu, cbe_list))->aff_list); 329 + 330 + if (!aff_supp) 331 + return ERR_PTR(-EINVAL); 332 + 333 + if (flags & SPU_CREATE_GANG) 334 + return ERR_PTR(-EINVAL); 335 + 336 + if (flags & SPU_CREATE_AFFINITY_MEM && 337 + gang->aff_ref_ctx && 338 + gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) 339 + return ERR_PTR(-EEXIST); 340 + 341 + if (gang->aff_flags & AFF_MERGED) 342 + return ERR_PTR(-EBUSY); 343 + 344 + neighbor = NULL; 345 + if (flags & SPU_CREATE_AFFINITY_SPU) { 346 + if (!filp || filp->f_op != &spufs_context_fops) 347 + return ERR_PTR(-EINVAL); 348 + 349 + neighbor = get_spu_context( 350 + SPUFS_I(filp->f_dentry->d_inode)->i_ctx); 351 + 352 + if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && 353 + !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && 354 + !list_entry(neighbor->aff_list.next, struct spu_context, 355 + aff_list)->aff_head) 356 + return ERR_PTR(-EEXIST); 357 + 358 + if (gang != neighbor->gang) 359 + return ERR_PTR(-EINVAL); 360 + 361 + count = 1; 362 + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) 363 + count++; 364 + if (list_empty(&neighbor->aff_list)) 365 + count++; 366 + 367 + for (node = 0; node < MAX_NUMNODES; node++) { 368 + if ((cbe_spu_info[node].n_spus - atomic_read( 369 + &cbe_spu_info[node].reserved_spus)) >= count) 370 + break; 371 + } 372 + 373 + if (node == MAX_NUMNODES) 374 + return ERR_PTR(-EEXIST); 375 + } 376 + 377 + return neighbor; 378 + } 379 + 380 + static void 381 + spufs_set_affinity(unsigned int flags, struct spu_context *ctx, 382 + struct spu_context *neighbor) 383 + { 384 + if (flags & SPU_CREATE_AFFINITY_MEM) 385 + ctx->gang->aff_ref_ctx = ctx; 386 + 387 + if (flags & SPU_CREATE_AFFINITY_SPU) { 388 + if (list_empty(&neighbor->aff_list)) { 389 + list_add_tail(&neighbor->aff_list, 390 + &ctx->gang->aff_list_head); 391 + neighbor->aff_head = 1; 392 + } 393 + 394 + if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) 395 + || list_entry(neighbor->aff_list.next, struct spu_context, 396 + aff_list)->aff_head) { 397 + list_add(&ctx->aff_list, &neighbor->aff_list); 398 + } else { 399 + list_add_tail(&ctx->aff_list, &neighbor->aff_list); 400 + if (neighbor->aff_head) { 401 + neighbor->aff_head = 0; 402 + ctx->aff_head = 1; 403 + } 404 + } 405 + 406 + if (!ctx->gang->aff_ref_ctx) 407 + ctx->gang->aff_ref_ctx = ctx; 408 + } 409 + } 410 + 411 + static int 412 + spufs_create_context(struct inode *inode, struct dentry *dentry, 413 + struct vfsmount *mnt, int flags, int mode, 414 + struct file *aff_filp) 322 415 { 323 416 int ret; 417 + int affinity; 418 + struct spu_gang *gang; 419 + struct spu_context *neighbor; 324 420 325 421 ret = -EPERM; 326 422 if ((flags & SPU_CREATE_NOSCHED) && ··· 432 336 if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) 433 337 goto out_unlock; 434 338 339 + gang = NULL; 340 + neighbor = NULL; 341 + affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); 342 + if (affinity) { 343 + gang = SPUFS_I(inode)->i_gang; 344 + ret = -EINVAL; 345 + if (!gang) 346 + goto out_unlock; 347 + mutex_lock(&gang->aff_mutex); 348 + neighbor = spufs_assert_affinity(flags, gang, aff_filp); 349 + if (IS_ERR(neighbor)) { 350 + ret = PTR_ERR(neighbor); 351 + goto out_aff_unlock; 352 + } 353 + } 354 + 435 355 ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); 436 356 if (ret) 437 - goto out_unlock; 357 + goto out_aff_unlock; 358 + 359 + if (affinity) 360 + spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx, 361 + neighbor); 438 362 439 363 /* 440 364 * get references for dget and mntget, will be released ··· 468 352 goto out; 469 353 } 470 354 355 + out_aff_unlock: 356 + if (affinity) 357 + mutex_unlock(&gang->aff_mutex); 471 358 out_unlock: 472 359 mutex_unlock(&inode->i_mutex); 473 360 out: ··· 569 450 570 451 static struct file_system_type spufs_type; 571 452 572 - long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) 453 + long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, 454 + struct file *filp) 573 455 { 574 456 struct dentry *dentry; 575 457 int ret; ··· 607 487 dentry, nd->mnt, mode); 608 488 else 609 489 return spufs_create_context(nd->dentry->d_inode, 610 - dentry, nd->mnt, flags, mode); 490 + dentry, nd->mnt, flags, mode, filp); 611 491 612 492 out_dput: 613 493 dput(dentry);

+28 -8

arch/powerpc/platforms/cell/spufs/run.c

··· 18 18 wake_up_all(&ctx->stop_wq); 19 19 } 20 20 21 - static inline int spu_stopped(struct spu_context *ctx, u32 * stat) 21 + static inline int spu_stopped(struct spu_context *ctx, u32 *stat) 22 22 { 23 23 struct spu *spu; 24 24 u64 pte_fault; 25 25 26 26 *stat = ctx->ops->status_read(ctx); 27 - if (ctx->state != SPU_STATE_RUNNABLE) 28 - return 1; 27 + 29 28 spu = ctx->spu; 29 + if (ctx->state != SPU_STATE_RUNNABLE || 30 + test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) 31 + return 1; 30 32 pte_fault = spu->dsisr & 31 33 (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); 32 34 return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? ··· 126 124 return ret; 127 125 } 128 126 129 - static int spu_run_init(struct spu_context *ctx, u32 * npc) 127 + static int spu_run_init(struct spu_context *ctx, u32 *npc) 130 128 { 129 + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); 130 + 131 131 if (ctx->flags & SPU_CREATE_ISOLATE) { 132 132 unsigned long runcntl; 133 133 ··· 155 151 ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); 156 152 } 157 153 154 + spuctx_switch_state(ctx, SPU_UTIL_USER); 155 + 158 156 return 0; 159 157 } 160 158 161 - static int spu_run_fini(struct spu_context *ctx, u32 * npc, 162 - u32 * status) 159 + static int spu_run_fini(struct spu_context *ctx, u32 *npc, 160 + u32 *status) 163 161 { 164 162 int ret = 0; 165 163 166 164 *status = ctx->ops->status_read(ctx); 167 165 *npc = ctx->ops->npc_read(ctx); 166 + 167 + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); 168 168 spu_release(ctx); 169 169 170 170 if (signal_pending(current)) ··· 297 289 return ret; 298 290 } 299 291 300 - long spufs_run_spu(struct file *file, struct spu_context *ctx, 301 - u32 *npc, u32 *event) 292 + long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) 302 293 { 303 294 int ret; 295 + struct spu *spu; 304 296 u32 status; 305 297 306 298 if (mutex_lock_interruptible(&ctx->run_mutex)) ··· 336 328 ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); 337 329 if (unlikely(ret)) 338 330 break; 331 + spu = ctx->spu; 332 + if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, 333 + &ctx->sched_flags))) { 334 + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { 335 + spu_switch_notify(spu, ctx); 336 + continue; 337 + } 338 + } 339 + 340 + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); 341 + 339 342 if ((status & SPU_STATUS_STOPPED_BY_STOP) && 340 343 (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { 341 344 ret = spu_process_callback(ctx); ··· 374 355 (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) && 375 356 (ctx->state == SPU_STATE_RUNNABLE)) 376 357 ctx->stats.libassist++; 358 + 377 359 378 360 ctx->ops->master_stop(ctx); 379 361 ret = spu_run_fini(ctx, npc, &status);

+277 -104

arch/powerpc/platforms/cell/spufs/sched.c

··· 51 51 DECLARE_BITMAP(bitmap, MAX_PRIO); 52 52 struct list_head runq[MAX_PRIO]; 53 53 spinlock_t runq_lock; 54 - struct list_head active_list[MAX_NUMNODES]; 55 - struct mutex active_mutex[MAX_NUMNODES]; 56 - int nr_active[MAX_NUMNODES]; 57 54 int nr_waiting; 58 55 }; 59 56 ··· 124 127 ctx->policy = current->policy; 125 128 126 129 /* 127 - * A lot of places that don't hold active_mutex poke into 130 + * A lot of places that don't hold list_mutex poke into 128 131 * cpus_allowed, including grab_runnable_context which 129 132 * already holds the runq_lock. So abuse runq_lock 130 133 * to protect this field aswell. ··· 138 141 { 139 142 int node = ctx->spu->node; 140 143 141 - mutex_lock(&spu_prio->active_mutex[node]); 144 + mutex_lock(&cbe_spu_info[node].list_mutex); 142 145 __spu_update_sched_info(ctx); 143 - mutex_unlock(&spu_prio->active_mutex[node]); 146 + mutex_unlock(&cbe_spu_info[node].list_mutex); 144 147 } 145 148 146 149 static int __node_allowed(struct spu_context *ctx, int node) ··· 166 169 return rval; 167 170 } 168 171 169 - /** 170 - * spu_add_to_active_list - add spu to active list 171 - * @spu: spu to add to the active list 172 - */ 173 - static void spu_add_to_active_list(struct spu *spu) 174 - { 175 - int node = spu->node; 176 - 177 - mutex_lock(&spu_prio->active_mutex[node]); 178 - spu_prio->nr_active[node]++; 179 - list_add_tail(&spu->list, &spu_prio->active_list[node]); 180 - mutex_unlock(&spu_prio->active_mutex[node]); 181 - } 182 - 183 - static void __spu_remove_from_active_list(struct spu *spu) 184 - { 185 - list_del_init(&spu->list); 186 - spu_prio->nr_active[spu->node]--; 187 - } 188 - 189 - /** 190 - * spu_remove_from_active_list - remove spu from active list 191 - * @spu: spu to remove from the active list 192 - */ 193 - static void spu_remove_from_active_list(struct spu *spu) 194 - { 195 - int node = spu->node; 196 - 197 - mutex_lock(&spu_prio->active_mutex[node]); 198 - __spu_remove_from_active_list(spu); 199 - mutex_unlock(&spu_prio->active_mutex[node]); 200 - } 201 - 202 172 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); 203 173 204 - static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) 174 + void spu_switch_notify(struct spu *spu, struct spu_context *ctx) 205 175 { 206 176 blocking_notifier_call_chain(&spu_switch_notifier, 207 177 ctx ? ctx->object_id : 0, spu); 208 178 } 209 179 180 + static void notify_spus_active(void) 181 + { 182 + int node; 183 + 184 + /* 185 + * Wake up the active spu_contexts. 186 + * 187 + * When the awakened processes see their "notify_active" flag is set, 188 + * they will call spu_switch_notify(); 189 + */ 190 + for_each_online_node(node) { 191 + struct spu *spu; 192 + 193 + mutex_lock(&cbe_spu_info[node].list_mutex); 194 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 195 + if (spu->alloc_state != SPU_FREE) { 196 + struct spu_context *ctx = spu->ctx; 197 + set_bit(SPU_SCHED_NOTIFY_ACTIVE, 198 + &ctx->sched_flags); 199 + mb(); 200 + wake_up_all(&ctx->stop_wq); 201 + } 202 + } 203 + mutex_unlock(&cbe_spu_info[node].list_mutex); 204 + } 205 + } 206 + 210 207 int spu_switch_event_register(struct notifier_block * n) 211 208 { 212 - return blocking_notifier_chain_register(&spu_switch_notifier, n); 209 + int ret; 210 + ret = blocking_notifier_chain_register(&spu_switch_notifier, n); 211 + if (!ret) 212 + notify_spus_active(); 213 + return ret; 213 214 } 215 + EXPORT_SYMBOL_GPL(spu_switch_event_register); 214 216 215 217 int spu_switch_event_unregister(struct notifier_block * n) 216 218 { 217 219 return blocking_notifier_chain_unregister(&spu_switch_notifier, n); 218 220 } 221 + EXPORT_SYMBOL_GPL(spu_switch_event_unregister); 219 222 220 223 /** 221 224 * spu_bind_context - bind spu context to physical spu ··· 226 229 { 227 230 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, 228 231 spu->number, spu->node); 232 + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); 233 + 234 + if (ctx->flags & SPU_CREATE_NOSCHED) 235 + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); 236 + if (!list_empty(&ctx->aff_list)) 237 + atomic_inc(&ctx->gang->aff_sched_count); 229 238 230 239 ctx->stats.slb_flt_base = spu->stats.slb_flt; 231 240 ctx->stats.class2_intr_base = spu->stats.class2_intr; ··· 241 238 ctx->spu = spu; 242 239 ctx->ops = &spu_hw_ops; 243 240 spu->pid = current->pid; 241 + spu->tgid = current->tgid; 244 242 spu_associate_mm(spu, ctx->owner); 245 243 spu->ibox_callback = spufs_ibox_callback; 246 244 spu->wbox_callback = spufs_wbox_callback; ··· 255 251 spu_cpu_affinity_set(spu, raw_smp_processor_id()); 256 252 spu_switch_notify(spu, ctx); 257 253 ctx->state = SPU_STATE_RUNNABLE; 258 - spu_switch_state(spu, SPU_UTIL_SYSTEM); 254 + 255 + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); 256 + } 257 + 258 + /* 259 + * Must be used with the list_mutex held. 260 + */ 261 + static inline int sched_spu(struct spu *spu) 262 + { 263 + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); 264 + 265 + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); 266 + } 267 + 268 + static void aff_merge_remaining_ctxs(struct spu_gang *gang) 269 + { 270 + struct spu_context *ctx; 271 + 272 + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { 273 + if (list_empty(&ctx->aff_list)) 274 + list_add(&ctx->aff_list, &gang->aff_list_head); 275 + } 276 + gang->aff_flags |= AFF_MERGED; 277 + } 278 + 279 + static void aff_set_offsets(struct spu_gang *gang) 280 + { 281 + struct spu_context *ctx; 282 + int offset; 283 + 284 + offset = -1; 285 + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, 286 + aff_list) { 287 + if (&ctx->aff_list == &gang->aff_list_head) 288 + break; 289 + ctx->aff_offset = offset--; 290 + } 291 + 292 + offset = 0; 293 + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { 294 + if (&ctx->aff_list == &gang->aff_list_head) 295 + break; 296 + ctx->aff_offset = offset++; 297 + } 298 + 299 + gang->aff_flags |= AFF_OFFSETS_SET; 300 + } 301 + 302 + static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, 303 + int group_size, int lowest_offset) 304 + { 305 + struct spu *spu; 306 + int node, n; 307 + 308 + /* 309 + * TODO: A better algorithm could be used to find a good spu to be 310 + * used as reference location for the ctxs chain. 311 + */ 312 + node = cpu_to_node(raw_smp_processor_id()); 313 + for (n = 0; n < MAX_NUMNODES; n++, node++) { 314 + node = (node < MAX_NUMNODES) ? node : 0; 315 + if (!node_allowed(ctx, node)) 316 + continue; 317 + mutex_lock(&cbe_spu_info[node].list_mutex); 318 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 319 + if ((!mem_aff || spu->has_mem_affinity) && 320 + sched_spu(spu)) { 321 + mutex_unlock(&cbe_spu_info[node].list_mutex); 322 + return spu; 323 + } 324 + } 325 + mutex_unlock(&cbe_spu_info[node].list_mutex); 326 + } 327 + return NULL; 328 + } 329 + 330 + static void aff_set_ref_point_location(struct spu_gang *gang) 331 + { 332 + int mem_aff, gs, lowest_offset; 333 + struct spu_context *ctx; 334 + struct spu *tmp; 335 + 336 + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; 337 + lowest_offset = 0; 338 + gs = 0; 339 + 340 + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) 341 + gs++; 342 + 343 + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, 344 + aff_list) { 345 + if (&ctx->aff_list == &gang->aff_list_head) 346 + break; 347 + lowest_offset = ctx->aff_offset; 348 + } 349 + 350 + gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); 351 + } 352 + 353 + static struct spu *ctx_location(struct spu *ref, int offset, int node) 354 + { 355 + struct spu *spu; 356 + 357 + spu = NULL; 358 + if (offset >= 0) { 359 + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { 360 + BUG_ON(spu->node != node); 361 + if (offset == 0) 362 + break; 363 + if (sched_spu(spu)) 364 + offset--; 365 + } 366 + } else { 367 + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { 368 + BUG_ON(spu->node != node); 369 + if (offset == 0) 370 + break; 371 + if (sched_spu(spu)) 372 + offset++; 373 + } 374 + } 375 + 376 + return spu; 377 + } 378 + 379 + /* 380 + * affinity_check is called each time a context is going to be scheduled. 381 + * It returns the spu ptr on which the context must run. 382 + */ 383 + static int has_affinity(struct spu_context *ctx) 384 + { 385 + struct spu_gang *gang = ctx->gang; 386 + 387 + if (list_empty(&ctx->aff_list)) 388 + return 0; 389 + 390 + mutex_lock(&gang->aff_mutex); 391 + if (!gang->aff_ref_spu) { 392 + if (!(gang->aff_flags & AFF_MERGED)) 393 + aff_merge_remaining_ctxs(gang); 394 + if (!(gang->aff_flags & AFF_OFFSETS_SET)) 395 + aff_set_offsets(gang); 396 + aff_set_ref_point_location(gang); 397 + } 398 + mutex_unlock(&gang->aff_mutex); 399 + 400 + return gang->aff_ref_spu != NULL; 259 401 } 260 402 261 403 /** ··· 413 263 { 414 264 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, 415 265 spu->pid, spu->number, spu->node); 266 + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); 416 267 417 - spu_switch_state(spu, SPU_UTIL_IDLE); 418 - 268 + if (spu->ctx->flags & SPU_CREATE_NOSCHED) 269 + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); 270 + if (!list_empty(&ctx->aff_list)) 271 + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) 272 + ctx->gang->aff_ref_spu = NULL; 419 273 spu_switch_notify(spu, NULL); 420 274 spu_unmap_mappings(ctx); 421 275 spu_save(&ctx->csa, spu); ··· 432 278 spu->dma_callback = NULL; 433 279 spu_associate_mm(spu, NULL); 434 280 spu->pid = 0; 281 + spu->tgid = 0; 435 282 ctx->ops = &spu_backing_ops; 436 - ctx->spu = NULL; 437 283 spu->flags = 0; 438 284 spu->ctx = NULL; 439 285 ··· 441 287 (spu->stats.slb_flt - ctx->stats.slb_flt_base); 442 288 ctx->stats.class2_intr += 443 289 (spu->stats.class2_intr - ctx->stats.class2_intr_base); 290 + 291 + /* This maps the underlying spu state to idle */ 292 + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); 293 + ctx->spu = NULL; 444 294 } 445 295 446 296 /** ··· 510 352 511 353 static struct spu *spu_get_idle(struct spu_context *ctx) 512 354 { 513 - struct spu *spu = NULL; 514 - int node = cpu_to_node(raw_smp_processor_id()); 515 - int n; 355 + struct spu *spu; 356 + int node, n; 516 357 358 + if (has_affinity(ctx)) { 359 + node = ctx->gang->aff_ref_spu->node; 360 + 361 + mutex_lock(&cbe_spu_info[node].list_mutex); 362 + spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); 363 + if (spu && spu->alloc_state == SPU_FREE) 364 + goto found; 365 + mutex_unlock(&cbe_spu_info[node].list_mutex); 366 + return NULL; 367 + } 368 + 369 + node = cpu_to_node(raw_smp_processor_id()); 517 370 for (n = 0; n < MAX_NUMNODES; n++, node++) { 518 371 node = (node < MAX_NUMNODES) ? node : 0; 519 372 if (!node_allowed(ctx, node)) 520 373 continue; 521 - spu = spu_alloc_node(node); 522 - if (spu) 523 - break; 374 + 375 + mutex_lock(&cbe_spu_info[node].list_mutex); 376 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 377 + if (spu->alloc_state == SPU_FREE) 378 + goto found; 379 + } 380 + mutex_unlock(&cbe_spu_info[node].list_mutex); 524 381 } 382 + 383 + return NULL; 384 + 385 + found: 386 + spu->alloc_state = SPU_USED; 387 + mutex_unlock(&cbe_spu_info[node].list_mutex); 388 + pr_debug("Got SPU %d %d\n", spu->number, spu->node); 389 + spu_init_channels(spu); 525 390 return spu; 526 391 } 527 392 ··· 574 393 if (!node_allowed(ctx, node)) 575 394 continue; 576 395 577 - mutex_lock(&spu_prio->active_mutex[node]); 578 - list_for_each_entry(spu, &spu_prio->active_list[node], list) { 396 + mutex_lock(&cbe_spu_info[node].list_mutex); 397 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 579 398 struct spu_context *tmp = spu->ctx; 580 399 581 400 if (tmp->prio > ctx->prio && 582 401 (!victim || tmp->prio > victim->prio)) 583 402 victim = spu->ctx; 584 403 } 585 - mutex_unlock(&spu_prio->active_mutex[node]); 404 + mutex_unlock(&cbe_spu_info[node].list_mutex); 586 405 587 406 if (victim) { 588 407 /* ··· 607 426 victim = NULL; 608 427 goto restart; 609 428 } 610 - spu_remove_from_active_list(spu); 429 + 430 + mutex_lock(&cbe_spu_info[node].list_mutex); 431 + cbe_spu_info[node].nr_active--; 432 + mutex_unlock(&cbe_spu_info[node].list_mutex); 433 + 611 434 spu_unbind_context(spu, victim); 612 435 victim->stats.invol_ctx_switch++; 613 436 spu->stats.invol_ctx_switch++; ··· 640 455 */ 641 456 int spu_activate(struct spu_context *ctx, unsigned long flags) 642 457 { 643 - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); 644 - 645 458 do { 646 459 struct spu *spu; 647 460 ··· 660 477 if (!spu && rt_prio(ctx->prio)) 661 478 spu = find_victim(ctx); 662 479 if (spu) { 480 + int node = spu->node; 481 + 482 + mutex_lock(&cbe_spu_info[node].list_mutex); 663 483 spu_bind_context(spu, ctx); 664 - spu_add_to_active_list(spu); 484 + cbe_spu_info[node].nr_active++; 485 + mutex_unlock(&cbe_spu_info[node].list_mutex); 665 486 return 0; 666 487 } 667 488 ··· 687 500 int best; 688 501 689 502 spin_lock(&spu_prio->runq_lock); 690 - best = sched_find_first_bit(spu_prio->bitmap); 503 + best = find_first_bit(spu_prio->bitmap, prio); 691 504 while (best < prio) { 692 505 struct list_head *rq = &spu_prio->runq[best]; 693 506 ··· 714 527 if (spu) { 715 528 new = grab_runnable_context(max_prio, spu->node); 716 529 if (new || force) { 717 - spu_remove_from_active_list(spu); 530 + int node = spu->node; 531 + 532 + mutex_lock(&cbe_spu_info[node].list_mutex); 718 533 spu_unbind_context(spu, ctx); 534 + spu->alloc_state = SPU_FREE; 535 + cbe_spu_info[node].nr_active--; 536 + mutex_unlock(&cbe_spu_info[node].list_mutex); 537 + 719 538 ctx->stats.vol_ctx_switch++; 720 539 spu->stats.vol_ctx_switch++; 721 - spu_free(spu); 540 + 722 541 if (new) 723 542 wake_up(&new->stop_wq); 724 543 } ··· 743 550 */ 744 551 void spu_deactivate(struct spu_context *ctx) 745 552 { 746 - /* 747 - * We must never reach this for a nosched context, 748 - * but handle the case gracefull instead of panicing. 749 - */ 750 - if (ctx->flags & SPU_CREATE_NOSCHED) { 751 - WARN_ON(1); 752 - return; 753 - } 754 - 755 553 __spu_deactivate(ctx, 1, MAX_PRIO); 756 - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); 757 554 } 758 555 759 556 /** 760 - * spu_yield - yield a physical spu if others are waiting 557 + * spu_yield - yield a physical spu if others are waiting 761 558 * @ctx: spu context to yield 762 559 * 763 560 * Check if there is a higher priority context waiting and if yes ··· 758 575 { 759 576 if (!(ctx->flags & SPU_CREATE_NOSCHED)) { 760 577 mutex_lock(&ctx->state_mutex); 761 - if (__spu_deactivate(ctx, 0, MAX_PRIO)) 762 - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); 763 - else { 764 - spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); 765 - spu_switch_state(ctx->spu, SPU_UTIL_USER); 766 - } 578 + __spu_deactivate(ctx, 0, MAX_PRIO); 767 579 mutex_unlock(&ctx->state_mutex); 768 580 } 769 581 } 770 582 771 - static void spusched_tick(struct spu_context *ctx) 583 + static noinline void spusched_tick(struct spu_context *ctx) 772 584 { 773 585 if (ctx->flags & SPU_CREATE_NOSCHED) 774 586 return; ··· 774 596 return; 775 597 776 598 /* 777 - * Unfortunately active_mutex ranks outside of state_mutex, so 599 + * Unfortunately list_mutex ranks outside of state_mutex, so 778 600 * we have to trylock here. If we fail give the context another 779 601 * tick and try again. 780 602 */ ··· 784 606 785 607 new = grab_runnable_context(ctx->prio + 1, spu->node); 786 608 if (new) { 787 - 788 - __spu_remove_from_active_list(spu); 789 609 spu_unbind_context(spu, ctx); 790 610 ctx->stats.invol_ctx_switch++; 791 611 spu->stats.invol_ctx_switch++; 792 - spu_free(spu); 612 + spu->alloc_state = SPU_FREE; 613 + cbe_spu_info[spu->node].nr_active--; 793 614 wake_up(&new->stop_wq); 794 615 /* 795 616 * We need to break out of the wait loop in ··· 809 632 * 810 633 * Return the number of tasks currently running or waiting to run. 811 634 * 812 - * Note that we don't take runq_lock / active_mutex here. Reading 635 + * Note that we don't take runq_lock / list_mutex here. Reading 813 636 * a single 32bit value is atomic on powerpc, and we don't care 814 637 * about memory ordering issues here. 815 638 */ ··· 818 641 int nr_active = 0, node; 819 642 820 643 for (node = 0; node < MAX_NUMNODES; node++) 821 - nr_active += spu_prio->nr_active[node]; 644 + nr_active += cbe_spu_info[node].nr_active; 822 645 nr_active += spu_prio->nr_waiting; 823 646 824 647 return nr_active; ··· 858 681 859 682 static int spusched_thread(void *unused) 860 683 { 861 - struct spu *spu, *next; 684 + struct spu *spu; 862 685 int node; 863 686 864 687 while (!kthread_should_stop()) { 865 688 set_current_state(TASK_INTERRUPTIBLE); 866 689 schedule(); 867 690 for (node = 0; node < MAX_NUMNODES; node++) { 868 - mutex_lock(&spu_prio->active_mutex[node]); 869 - list_for_each_entry_safe(spu, next, 870 - &spu_prio->active_list[node], 871 - list) 872 - spusched_tick(spu->ctx); 873 - mutex_unlock(&spu_prio->active_mutex[node]); 691 + mutex_lock(&cbe_spu_info[node].list_mutex); 692 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) 693 + if (spu->ctx) 694 + spusched_tick(spu->ctx); 695 + mutex_unlock(&cbe_spu_info[node].list_mutex); 874 696 } 875 697 } 876 698 ··· 927 751 INIT_LIST_HEAD(&spu_prio->runq[i]); 928 752 __clear_bit(i, spu_prio->bitmap); 929 753 } 930 - __set_bit(MAX_PRIO, spu_prio->bitmap); 931 754 for (i = 0; i < MAX_NUMNODES; i++) { 932 - mutex_init(&spu_prio->active_mutex[i]); 933 - INIT_LIST_HEAD(&spu_prio->active_list[i]); 755 + mutex_init(&cbe_spu_info[i].list_mutex); 756 + INIT_LIST_HEAD(&cbe_spu_info[i].spus); 934 757 } 935 758 spin_lock_init(&spu_prio->runq_lock); 936 759 ··· 958 783 return err; 959 784 } 960 785 961 - void __exit spu_sched_exit(void) 786 + void spu_sched_exit(void) 962 787 { 963 - struct spu *spu, *tmp; 788 + struct spu *spu; 964 789 int node; 965 790 966 791 remove_proc_entry("spu_loadavg", NULL); ··· 969 794 kthread_stop(spusched_task); 970 795 971 796 for (node = 0; node < MAX_NUMNODES; node++) { 972 - mutex_lock(&spu_prio->active_mutex[node]); 973 - list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], 974 - list) { 975 - list_del_init(&spu->list); 976 - spu_free(spu); 977 - } 978 - mutex_unlock(&spu_prio->active_mutex[node]); 797 + mutex_lock(&cbe_spu_info[node].list_mutex); 798 + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) 799 + if (spu->alloc_state != SPU_FREE) 800 + spu->alloc_state = SPU_FREE; 801 + mutex_unlock(&cbe_spu_info[node].list_mutex); 979 802 } 980 803 kfree(spu_prio); 981 804 }

+3 -3

arch/powerpc/platforms/cell/spufs/spu_restore.c

··· 84 84 unsigned int decr_running; 85 85 unsigned int decr; 86 86 87 - /* Restore, Step 6: 87 + /* Restore, Step 6(moved): 88 88 * If the LSCSA "decrementer running" flag is set 89 89 * then write the SPU_WrDec channel with the 90 90 * decrementer value from LSCSA. 91 91 */ 92 92 offset = LSCSA_QW_OFFSET(decr_status); 93 - decr_running = regs_spill[offset].slot[0]; 93 + decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; 94 94 if (decr_running) { 95 95 offset = LSCSA_QW_OFFSET(decr); 96 96 decr = regs_spill[offset].slot[0]; ··· 318 318 build_dma_list(lscsa_ea); /* Step 3. */ 319 319 restore_upper_240kb(lscsa_ea); /* Step 4. */ 320 320 /* Step 5: done by 'exit'. */ 321 - restore_decr(); /* Step 6. */ 322 321 enqueue_putllc(lscsa_ea); /* Step 7. */ 323 322 set_tag_update(); /* Step 8. */ 324 323 read_tag_status(); /* Step 9. */ 324 + restore_decr(); /* moved Step 6. */ 325 325 read_llar_status(); /* Step 10. */ 326 326 write_ppu_mb(); /* Step 11. */ 327 327 write_ppuint_mb(); /* Step 12. */

+277 -245

arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped

··· 10 10 0x24fd8081, 11 11 0x1cd80081, 12 12 0x33001180, 13 - 0x42030003, 13 + 0x42034003, 14 14 0x33800284, 15 15 0x1c010204, 16 16 0x40200000, ··· 24 24 0x23fffd84, 25 25 0x1c100183, 26 26 0x217ffa85, 27 - 0x3080a000, 28 - 0x3080a201, 29 - 0x3080a402, 30 - 0x3080a603, 31 - 0x3080a804, 32 - 0x3080aa05, 33 - 0x3080ac06, 34 - 0x3080ae07, 35 - 0x3080b008, 36 - 0x3080b209, 37 - 0x3080b40a, 38 - 0x3080b60b, 39 - 0x3080b80c, 40 - 0x3080ba0d, 41 - 0x3080bc0e, 42 - 0x3080be0f, 27 + 0x3080b000, 28 + 0x3080b201, 29 + 0x3080b402, 30 + 0x3080b603, 31 + 0x3080b804, 32 + 0x3080ba05, 33 + 0x3080bc06, 34 + 0x3080be07, 35 + 0x3080c008, 36 + 0x3080c209, 37 + 0x3080c40a, 38 + 0x3080c60b, 39 + 0x3080c80c, 40 + 0x3080ca0d, 41 + 0x3080cc0e, 42 + 0x3080ce0f, 43 43 0x00003ffc, 44 44 0x00000000, 45 45 0x00000000, ··· 48 48 0x3ec00083, 49 49 0xb0a14103, 50 50 0x01a00204, 51 - 0x3ec10082, 52 - 0x4202800e, 53 - 0x04000703, 54 - 0xb0a14202, 55 - 0x21a00803, 56 - 0x3fbf028d, 57 - 0x3f20068d, 58 - 0x3fbe0682, 51 + 0x3ec10083, 52 + 0x4202c002, 53 + 0xb0a14203, 54 + 0x21a00802, 55 + 0x3fbf028a, 56 + 0x3f20050a, 57 + 0x3fbe0502, 59 58 0x3fe30102, 60 59 0x21a00882, 61 - 0x3f82028f, 62 - 0x3fe3078f, 63 - 0x3fbf0784, 60 + 0x3f82028b, 61 + 0x3fe3058b, 62 + 0x3fbf0584, 64 63 0x3f200204, 65 64 0x3fbe0204, 66 65 0x3fe30204, ··· 74 75 0x21a00083, 75 76 0x40800082, 76 77 0x21a00b02, 77 - 0x10002818, 78 - 0x42a00002, 79 - 0x32800007, 80 - 0x4207000c, 81 - 0x18008208, 82 - 0x40a0000b, 83 - 0x4080020a, 84 - 0x40800709, 85 - 0x00200000, 86 - 0x42070002, 87 - 0x3ac30384, 78 + 0x10002612, 79 + 0x42a00003, 80 + 0x42074006, 81 + 0x1800c204, 82 + 0x40a00008, 83 + 0x40800789, 84 + 0x1c010305, 85 + 0x34000302, 88 86 0x1cffc489, 89 - 0x00200000, 90 - 0x18008383, 91 - 0x38830382, 92 - 0x4cffc486, 93 - 0x3ac28185, 94 - 0xb0408584, 95 - 0x28830382, 96 - 0x1c020387, 97 - 0x38828182, 98 - 0xb0408405, 99 - 0x1802c408, 100 - 0x28828182, 101 - 0x217ff886, 102 - 0x04000583, 103 - 0x21a00803, 104 - 0x3fbe0682, 105 - 0x3fe30102, 106 - 0x04000106, 107 - 0x21a00886, 108 - 0x04000603, 109 - 0x21a00903, 110 - 0x40803c02, 111 - 0x21a00982, 112 - 0x40800003, 113 - 0x04000184, 114 - 0x21a00a04, 87 + 0x3ec00303, 88 + 0x3ec00287, 89 + 0xb0408403, 90 + 0x24000302, 91 + 0x34000282, 92 + 0x1c020306, 93 + 0xb0408207, 94 + 0x18020204, 95 + 0x24000282, 96 + 0x217ffa09, 97 + 0x04000402, 98 + 0x21a00802, 99 + 0x3fbe0504, 100 + 0x3fe30204, 101 + 0x21a00884, 102 + 0x42074002, 103 + 0x21a00902, 104 + 0x40803c03, 105 + 0x21a00983, 106 + 0x04000485, 107 + 0x21a00a05, 115 108 0x40802202, 116 109 0x21a00a82, 117 - 0x42028005, 118 - 0x34208702, 119 - 0x21002282, 120 - 0x21a00804, 121 - 0x21a00886, 122 - 0x3fbf0782, 110 + 0x21a00805, 111 + 0x21a00884, 112 + 0x3fbf0582, 123 113 0x3f200102, 124 114 0x3fbe0102, 125 115 0x3fe30102, 126 116 0x21a00902, 127 117 0x40804003, 128 118 0x21a00983, 129 - 0x21a00a04, 119 + 0x21a00a05, 130 120 0x40805a02, 131 121 0x21a00a82, 132 122 0x40800083, 133 123 0x21a00b83, 134 124 0x01a00c02, 135 - 0x01a00d83, 136 - 0x3420c282, 125 + 0x30809c03, 126 + 0x34000182, 127 + 0x14004102, 128 + 0x21002082, 129 + 0x01a00d82, 130 + 0x3080a003, 131 + 0x34000182, 137 132 0x21a00e02, 138 - 0x34210283, 139 - 0x21a00f03, 140 - 0x34200284, 141 - 0x77400200, 142 - 0x3421c282, 133 + 0x3080a203, 134 + 0x34000182, 135 + 0x21a00f02, 136 + 0x3080a403, 137 + 0x34000182, 138 + 0x77400100, 139 + 0x3080a603, 140 + 0x34000182, 143 141 0x21a00702, 144 - 0x34218283, 145 - 0x21a00083, 146 - 0x34214282, 142 + 0x3080a803, 143 + 0x34000182, 144 + 0x21a00082, 145 + 0x3080aa03, 146 + 0x34000182, 147 147 0x21a00b02, 148 - 0x4200480c, 149 - 0x00200000, 150 - 0x1c010286, 151 - 0x34220284, 152 - 0x34220302, 153 - 0x0f608203, 154 - 0x5c024204, 155 - 0x3b81810b, 156 - 0x42013c02, 157 - 0x00200000, 158 - 0x18008185, 159 - 0x38808183, 160 - 0x3b814182, 161 - 0x21004e84, 148 + 0x4020007f, 149 + 0x3080ae02, 150 + 0x42004805, 151 + 0x3080ac04, 152 + 0x34000103, 153 + 0x34000202, 154 + 0x1cffc183, 155 + 0x3b810106, 156 + 0x0f608184, 157 + 0x42013802, 158 + 0x5c020183, 159 + 0x38810102, 160 + 0x3b810102, 161 + 0x21000e83, 162 162 0x4020007f, 163 163 0x35000100, 164 - 0x000004e0, 165 - 0x000002a0, 166 - 0x000002e8, 167 - 0x00000428, 164 + 0x00000470, 165 + 0x000002f8, 166 + 0x00000430, 168 167 0x00000360, 169 - 0x000002e8, 170 - 0x000004a0, 171 - 0x00000468, 168 + 0x000002f8, 172 169 0x000003c8, 170 + 0x000004a8, 171 + 0x00000298, 173 172 0x00000360, 174 - 0x409ffe02, 175 - 0x30801203, 176 - 0x40800204, 177 - 0x3ec40085, 178 - 0x10009c09, 179 - 0x3ac10606, 180 - 0xb060c105, 181 - 0x4020007f, 182 - 0x4020007f, 183 - 0x20801203, 184 - 0x38810602, 185 - 0xb0408586, 186 - 0x28810602, 187 - 0x32004180, 188 - 0x34204702, 189 - 0x21a00382, 190 - 0x4020007f, 191 - 0x327fdc80, 192 - 0x409ffe02, 193 - 0x30801203, 194 - 0x40800204, 195 - 0x3ec40087, 196 - 0x40800405, 197 173 0x00200000, 198 - 0x40800606, 199 - 0x3ac10608, 200 - 0x3ac14609, 201 - 0x3ac1860a, 202 - 0xb060c107, 174 + 0x409ffe02, 175 + 0x30801203, 176 + 0x40800208, 177 + 0x3ec40084, 178 + 0x40800407, 179 + 0x3ac20289, 180 + 0xb060c104, 181 + 0x3ac1c284, 203 182 0x20801203, 183 + 0x38820282, 204 184 0x41004003, 205 - 0x38810602, 206 - 0x4020007f, 207 - 0xb0408188, 208 - 0x4020007f, 209 - 0x28810602, 210 - 0x41201002, 211 - 0x38814603, 212 - 0x10009c09, 213 - 0xb060c109, 214 - 0x4020007f, 215 - 0x28814603, 216 - 0x41193f83, 217 - 0x38818602, 218 - 0x60ffc003, 219 - 0xb040818a, 220 - 0x28818602, 221 - 0x32003080, 222 - 0x409ffe02, 223 - 0x30801203, 224 - 0x40800204, 225 - 0x3ec40087, 226 - 0x41201008, 227 - 0x10009c14, 228 - 0x40800405, 229 - 0x3ac10609, 230 - 0x40800606, 231 - 0x3ac1460a, 232 - 0xb060c107, 233 - 0x3ac1860b, 234 - 0x20801203, 235 - 0x38810602, 236 - 0xb0408409, 237 - 0x28810602, 238 - 0x38814603, 239 - 0xb060c40a, 240 - 0x4020007f, 241 - 0x28814603, 242 - 0x41193f83, 243 - 0x38818602, 244 - 0x60ffc003, 245 - 0xb040818b, 246 - 0x28818602, 247 - 0x32002380, 248 - 0x409ffe02, 249 - 0x30801204, 250 - 0x40800205, 251 - 0x3ec40083, 252 - 0x40800406, 253 - 0x3ac14607, 254 - 0x3ac18608, 255 - 0xb0810103, 256 - 0x41004002, 257 - 0x20801204, 258 - 0x4020007f, 259 - 0x38814603, 260 - 0x10009c0b, 261 - 0xb060c107, 262 - 0x4020007f, 263 - 0x4020007f, 264 - 0x28814603, 265 - 0x38818602, 266 - 0x4020007f, 267 - 0x4020007f, 268 - 0xb0408588, 269 - 0x28818602, 270 - 0x4020007f, 271 - 0x32001780, 272 - 0x409ffe02, 273 - 0x1000640e, 274 - 0x40800204, 275 - 0x30801203, 276 - 0x40800405, 277 - 0x3ec40087, 278 - 0x40800606, 279 - 0x3ac10608, 280 - 0x3ac14609, 281 - 0x3ac1860a, 282 - 0xb060c107, 283 - 0x20801203, 284 - 0x413d8003, 285 - 0x38810602, 286 - 0x4020007f, 287 - 0x327fd780, 288 - 0x409ffe02, 289 - 0x10007f0c, 290 - 0x40800205, 291 - 0x30801204, 292 - 0x40800406, 293 - 0x3ec40083, 294 - 0x3ac14607, 295 - 0x3ac18608, 296 - 0xb0810103, 297 - 0x413d8002, 298 - 0x20801204, 299 - 0x38814603, 300 - 0x4020007f, 301 - 0x327feb80, 302 - 0x409ffe02, 303 - 0x30801203, 304 - 0x40800204, 305 - 0x3ec40087, 306 - 0x40800405, 307 - 0x1000650a, 308 - 0x40800606, 309 - 0x3ac10608, 310 - 0x3ac14609, 311 - 0x3ac1860a, 312 - 0xb060c107, 313 - 0x20801203, 314 - 0x38810602, 315 - 0xb0408588, 316 - 0x4020007f, 317 - 0x327fc980, 185 + 0xb0408189, 186 + 0x28820282, 187 + 0x3881c282, 188 + 0xb0408304, 189 + 0x2881c282, 318 190 0x00400000, 319 191 0x40800003, 320 - 0x4020007f, 321 192 0x35000000, 193 + 0x30809e03, 194 + 0x34000182, 195 + 0x21a00382, 196 + 0x4020007f, 197 + 0x327fde00, 198 + 0x409ffe02, 199 + 0x30801203, 200 + 0x40800206, 201 + 0x3ec40084, 202 + 0x40800407, 203 + 0x40800608, 204 + 0x3ac1828a, 205 + 0x3ac20289, 206 + 0xb060c104, 207 + 0x3ac1c284, 208 + 0x20801203, 209 + 0x38818282, 210 + 0x41004003, 211 + 0xb040818a, 212 + 0x10005b0b, 213 + 0x41201003, 214 + 0x28818282, 215 + 0x3881c282, 216 + 0xb0408184, 217 + 0x41193f83, 218 + 0x60ffc003, 219 + 0x2881c282, 220 + 0x38820282, 221 + 0xb0408189, 222 + 0x28820282, 223 + 0x327fef80, 224 + 0x409ffe02, 225 + 0x30801203, 226 + 0x40800207, 227 + 0x3ec40086, 228 + 0x4120100b, 229 + 0x10005b14, 230 + 0x40800404, 231 + 0x3ac1c289, 232 + 0x40800608, 233 + 0xb060c106, 234 + 0x3ac10286, 235 + 0x3ac2028a, 236 + 0x20801203, 237 + 0x3881c282, 238 + 0x41193f83, 239 + 0x60ffc003, 240 + 0xb0408589, 241 + 0x2881c282, 242 + 0x38810282, 243 + 0xb0408586, 244 + 0x28810282, 245 + 0x38820282, 246 + 0xb040818a, 247 + 0x28820282, 248 + 0x4020007f, 249 + 0x327fe280, 250 + 0x409ffe02, 251 + 0x30801203, 252 + 0x40800207, 253 + 0x3ec40084, 254 + 0x40800408, 255 + 0x10005b14, 256 + 0x40800609, 257 + 0x3ac1c28a, 258 + 0x3ac2028b, 259 + 0xb060c104, 260 + 0x3ac24284, 261 + 0x20801203, 262 + 0x41201003, 263 + 0x3881c282, 264 + 0xb040830a, 265 + 0x2881c282, 266 + 0x38820282, 267 + 0xb040818b, 268 + 0x41193f83, 269 + 0x60ffc003, 270 + 0x28820282, 271 + 0x38824282, 272 + 0xb0408184, 273 + 0x28824282, 274 + 0x4020007f, 275 + 0x327fd580, 276 + 0x409ffe02, 277 + 0x1000658e, 278 + 0x40800206, 279 + 0x30801203, 280 + 0x40800407, 281 + 0x3ec40084, 282 + 0x40800608, 283 + 0x3ac1828a, 284 + 0x3ac20289, 285 + 0xb060c104, 286 + 0x3ac1c284, 287 + 0x20801203, 288 + 0x413d8003, 289 + 0x38818282, 290 + 0x4020007f, 291 + 0x327fd800, 292 + 0x409ffe03, 293 + 0x30801202, 294 + 0x40800207, 295 + 0x3ec40084, 296 + 0x10005b09, 297 + 0x3ac1c288, 298 + 0xb0408184, 299 + 0x4020007f, 300 + 0x4020007f, 301 + 0x20801202, 302 + 0x3881c282, 303 + 0xb0408308, 304 + 0x2881c282, 305 + 0x327fc680, 306 + 0x409ffe02, 307 + 0x1000588b, 308 + 0x40800208, 309 + 0x30801203, 310 + 0x40800407, 311 + 0x3ec40084, 312 + 0x3ac20289, 313 + 0xb060c104, 314 + 0x3ac1c284, 315 + 0x20801203, 316 + 0x413d8003, 317 + 0x38820282, 318 + 0x327fbd80, 319 + 0x00200000, 320 + 0x00000da0, 322 321 0x00000000, 322 + 0x00000000, 323 + 0x00000000, 324 + 0x00000d90, 325 + 0x00000000, 326 + 0x00000000, 327 + 0x00000000, 328 + 0x00000db0, 329 + 0x00000000, 330 + 0x00000000, 331 + 0x00000000, 332 + 0x00000dc0, 333 + 0x00000000, 334 + 0x00000000, 335 + 0x00000000, 336 + 0x00000d80, 337 + 0x00000000, 338 + 0x00000000, 339 + 0x00000000, 340 + 0x00000df0, 341 + 0x00000000, 342 + 0x00000000, 343 + 0x00000000, 344 + 0x00000de0, 345 + 0x00000000, 346 + 0x00000000, 347 + 0x00000000, 348 + 0x00000dd0, 349 + 0x00000000, 350 + 0x00000000, 351 + 0x00000000, 352 + 0x00000e04, 353 + 0x00000000, 354 + 0x00000000, 355 + 0x00000000, 356 + 0x00000e00, 323 357 0x00000000, 324 358 0x00000000, 325 359 0x00000000,

+59 -38

arch/powerpc/platforms/cell/spufs/spufs.h

··· 40 40 struct spu_context_ops; 41 41 struct spu_gang; 42 42 43 - /* 44 - * This is the state for spu utilization reporting to userspace. 45 - * Because this state is visible to userspace it must never change and needs 46 - * to be kept strictly separate from any internal state kept by the kernel. 47 - */ 48 - enum spuctx_execution_state { 49 - SPUCTX_UTIL_USER = 0, 50 - SPUCTX_UTIL_SYSTEM, 51 - SPUCTX_UTIL_IOWAIT, 52 - SPUCTX_UTIL_LOADED, 53 - SPUCTX_UTIL_MAX 43 + enum { 44 + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ 45 + }; 46 + 47 + /* ctx->sched_flags */ 48 + enum { 49 + SPU_SCHED_NOTIFY_ACTIVE, 54 50 }; 55 51 56 52 struct spu_context { ··· 85 89 86 90 struct list_head gang_list; 87 91 struct spu_gang *gang; 92 + struct kref *prof_priv_kref; 93 + void ( * prof_priv_release) (struct kref *kref); 88 94 89 95 /* owner thread */ 90 96 pid_t tid; ··· 102 104 /* statistics */ 103 105 struct { 104 106 /* updates protected by ctx->state_mutex */ 105 - enum spuctx_execution_state execution_state; 106 - unsigned long tstamp; /* time of last ctx switch */ 107 - unsigned long times[SPUCTX_UTIL_MAX]; 107 + enum spu_utilization_state util_state; 108 + unsigned long long tstamp; /* time of last state switch */ 109 + unsigned long long times[SPU_UTIL_MAX]; 108 110 unsigned long long vol_ctx_switch; 109 111 unsigned long long invol_ctx_switch; 110 112 unsigned long long min_flt; ··· 116 118 unsigned long long class2_intr_base; /* # at last ctx switch */ 117 119 unsigned long long libassist; 118 120 } stats; 121 + 122 + struct list_head aff_list; 123 + int aff_head; 124 + int aff_offset; 119 125 }; 120 126 121 127 struct spu_gang { ··· 127 125 struct mutex mutex; 128 126 struct kref kref; 129 127 int contexts; 128 + 129 + struct spu_context *aff_ref_ctx; 130 + struct list_head aff_list_head; 131 + struct mutex aff_mutex; 132 + int aff_flags; 133 + struct spu *aff_ref_spu; 134 + atomic_t aff_sched_count; 130 135 }; 136 + 137 + /* Flag bits for spu_gang aff_flags */ 138 + #define AFF_OFFSETS_SET 1 139 + #define AFF_MERGED 2 131 140 132 141 struct mfc_dma_command { 133 142 int32_t pad; /* reserved */ ··· 203 190 extern struct tree_descr spufs_dir_nosched_contents[]; 204 191 205 192 /* system call implementation */ 206 - long spufs_run_spu(struct file *file, 207 - struct spu_context *ctx, u32 *npc, u32 *status); 208 - long spufs_create(struct nameidata *nd, 209 - unsigned int flags, mode_t mode); 193 + long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); 194 + long spufs_create(struct nameidata *nd, unsigned int flags, 195 + mode_t mode, struct file *filp); 210 196 extern const struct file_operations spufs_context_fops; 211 197 212 198 /* gang management */ ··· 217 205 218 206 /* fault handling */ 219 207 int spufs_handle_class1(struct spu_context *ctx); 208 + 209 + /* affinity */ 210 + struct spu *affinity_check(struct spu_context *ctx); 220 211 221 212 /* context management */ 222 213 extern atomic_t nr_spu_contexts; ··· 242 227 void spu_forget(struct spu_context *ctx); 243 228 int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); 244 229 void spu_acquire_saved(struct spu_context *ctx); 230 + void spu_release_saved(struct spu_context *ctx); 245 231 246 232 int spu_activate(struct spu_context *ctx, unsigned long flags); 247 233 void spu_deactivate(struct spu_context *ctx); 248 234 void spu_yield(struct spu_context *ctx); 235 + void spu_switch_notify(struct spu *spu, struct spu_context *ctx); 249 236 void spu_set_timeslice(struct spu_context *ctx); 250 237 void spu_update_sched_info(struct spu_context *ctx); 251 238 void __spu_update_sched_info(struct spu_context *ctx); 252 239 int __init spu_sched_init(void); 253 - void __exit spu_sched_exit(void); 240 + void spu_sched_exit(void); 254 241 255 242 extern char *isolated_loader; 256 243 ··· 310 293 * line. 311 294 */ 312 295 static inline void spuctx_switch_state(struct spu_context *ctx, 313 - enum spuctx_execution_state new_state) 296 + enum spu_utilization_state new_state) 314 297 { 298 + unsigned long long curtime; 299 + signed long long delta; 300 + struct timespec ts; 301 + struct spu *spu; 302 + enum spu_utilization_state old_state; 303 + 304 + ktime_get_ts(&ts); 305 + curtime = timespec_to_ns(&ts); 306 + delta = curtime - ctx->stats.tstamp; 307 + 315 308 WARN_ON(!mutex_is_locked(&ctx->state_mutex)); 309 + WARN_ON(delta < 0); 316 310 317 - if (ctx->stats.execution_state != new_state) { 318 - unsigned long curtime = jiffies; 311 + spu = ctx->spu; 312 + old_state = ctx->stats.util_state; 313 + ctx->stats.util_state = new_state; 314 + ctx->stats.tstamp = curtime; 319 315 320 - ctx->stats.times[ctx->stats.execution_state] += 321 - curtime - ctx->stats.tstamp; 322 - ctx->stats.tstamp = curtime; 323 - ctx->stats.execution_state = new_state; 324 - } 325 - } 326 - 327 - static inline void spu_switch_state(struct spu *spu, 328 - enum spuctx_execution_state new_state) 329 - { 330 - if (spu->stats.utilization_state != new_state) { 331 - unsigned long curtime = jiffies; 332 - 333 - spu->stats.times[spu->stats.utilization_state] += 334 - curtime - spu->stats.tstamp; 316 + /* 317 + * Update the physical SPU utilization statistics. 318 + */ 319 + if (spu) { 320 + ctx->stats.times[old_state] += delta; 321 + spu->stats.times[old_state] += delta; 322 + spu->stats.util_state = new_state; 335 323 spu->stats.tstamp = curtime; 336 - spu->stats.utilization_state = new_state; 337 324 } 338 325 } 339 326

+39 -33

arch/powerpc/platforms/cell/spufs/switch.c

··· 180 180 case MFC_CNTL_SUSPEND_COMPLETE: 181 181 if (csa) { 182 182 csa->priv2.mfc_control_RW = 183 - in_be64(&priv2->mfc_control_RW) | 183 + MFC_CNTL_SUSPEND_MASK | 184 184 MFC_CNTL_SUSPEND_DMA_QUEUE; 185 185 } 186 186 break; ··· 190 190 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == 191 191 MFC_CNTL_SUSPEND_COMPLETE); 192 192 if (csa) { 193 - csa->priv2.mfc_control_RW = 194 - in_be64(&priv2->mfc_control_RW) & 195 - ~MFC_CNTL_SUSPEND_DMA_QUEUE; 193 + csa->priv2.mfc_control_RW = 0; 196 194 } 197 195 break; 198 196 } ··· 249 251 * Read MFC_CNTL[Ds]. Update saved copy of 250 252 * CSA.MFC_CNTL[Ds]. 251 253 */ 252 - if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { 253 - csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; 254 - csa->suspend_time = get_cycles(); 255 - out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); 256 - eieio(); 257 - csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); 258 - eieio(); 259 - } else { 260 - csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; 261 - } 254 + csa->priv2.mfc_control_RW |= 255 + in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING; 262 256 } 263 257 264 258 static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) ··· 261 271 * Write MFC_CNTL[Dh] set to a '1' to halt 262 272 * the decrementer. 263 273 */ 264 - out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); 274 + out_be64(&priv2->mfc_control_RW, 275 + MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); 265 276 eieio(); 266 277 } 267 278 ··· 606 615 static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) 607 616 { 608 617 struct spu_priv2 __iomem *priv2 = spu->priv2; 609 - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 618 + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 610 619 int i; 611 620 612 621 /* Save, Step 42: ··· 617 626 csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); 618 627 619 628 /* Save the following CH: [0,3,4,24,25,27] */ 620 - for (i = 0; i < 7; i++) { 629 + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { 621 630 idx = ch_indices[i]; 622 631 out_be64(&priv2->spu_chnlcntptr_RW, idx); 623 632 eieio(); ··· 974 983 */ 975 984 } 976 985 977 - static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) 986 + static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, 987 + struct spu *spu) 978 988 { 979 989 struct spu_priv2 __iomem *priv2 = spu->priv2; 980 990 981 991 /* Restore, Step 7: 982 - * Restore, Step 47. 983 - * Write MFC_Cntl[Dh,Sc]='1','1' to suspend 992 + * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend 984 993 * the queue and halt the decrementer. 985 994 */ 986 995 out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | ··· 1081 1090 static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) 1082 1091 { 1083 1092 struct spu_priv2 __iomem *priv2 = spu->priv2; 1084 - u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 1093 + u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 1085 1094 u64 idx; 1086 1095 int i; 1087 1096 ··· 1093 1102 out_be64(&priv2->spu_chnldata_RW, 0UL); 1094 1103 1095 1104 /* Reset the following CH: [0,3,4,24,25,27] */ 1096 - for (i = 0; i < 7; i++) { 1105 + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { 1097 1106 idx = ch_indices[i]; 1098 1107 out_be64(&priv2->spu_chnlcntptr_RW, idx); 1099 1108 eieio(); ··· 1280 1289 cycles_t resume_time = get_cycles(); 1281 1290 cycles_t delta_time = resume_time - csa->suspend_time; 1282 1291 1292 + csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; 1293 + if (csa->lscsa->decr.slot[0] < delta_time) { 1294 + csa->lscsa->decr_status.slot[0] |= 1295 + SPU_DECR_STATUS_WRAPPED; 1296 + } 1297 + 1283 1298 csa->lscsa->decr.slot[0] -= delta_time; 1299 + } else { 1300 + csa->lscsa->decr_status.slot[0] = 0; 1284 1301 } 1285 1302 } 1286 1303 ··· 1395 1396 * 16kb of local storage from CSA. 1396 1397 */ 1397 1398 send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); 1399 + } 1400 + 1401 + static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) 1402 + { 1403 + struct spu_priv2 __iomem *priv2 = spu->priv2; 1404 + 1405 + /* Restore, Step 47. 1406 + * Write MFC_Cntl[Sc,Sm]='1','0' to suspend 1407 + * the queue. 1408 + */ 1409 + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); 1410 + eieio(); 1398 1411 } 1399 1412 1400 1413 static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) ··· 1559 1548 * "wrapped" flag is set, OR in a '1' to 1560 1549 * CSA.SPU_Event_Status[Tm]. 1561 1550 */ 1562 - if (csa->lscsa->decr_status.slot[0] == 1) { 1551 + if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) { 1563 1552 csa->spu_chnldata_RW[0] |= 0x20; 1564 1553 } 1565 - if ((csa->lscsa->decr_status.slot[0] == 1) && 1554 + if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) && 1566 1555 (csa->spu_chnlcnt_RW[0] == 0 && 1567 1556 ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && 1568 1557 ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { ··· 1573 1562 static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) 1574 1563 { 1575 1564 struct spu_priv2 __iomem *priv2 = spu->priv2; 1576 - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 1565 + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; 1577 1566 int i; 1578 1567 1579 1568 /* Restore, Step 59: 1569 + * Restore the following CH: [0,3,4,24,25,27] 1580 1570 */ 1581 - 1582 - /* Restore CH 1 without count */ 1583 - out_be64(&priv2->spu_chnlcntptr_RW, 1); 1584 - out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]); 1585 - 1586 - /* Restore the following CH: [0,3,4,24,25,27] */ 1587 - for (i = 0; i < 7; i++) { 1571 + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { 1588 1572 idx = ch_indices[i]; 1589 1573 out_be64(&priv2->spu_chnlcntptr_RW, idx); 1590 1574 eieio(); ··· 1938 1932 set_switch_pending(prev, spu); /* Step 5. */ 1939 1933 stop_spu_isolate(spu); /* NEW. */ 1940 1934 remove_other_spu_access(prev, spu); /* Step 6. */ 1941 - suspend_mfc(prev, spu); /* Step 7. */ 1935 + suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ 1942 1936 wait_suspend_mfc_complete(prev, spu); /* Step 8. */ 1943 1937 if (!suspend_spe(prev, spu)) /* Step 9. */ 1944 1938 clear_spu_status(prev, spu); /* Step 10. */

+29 -5

arch/powerpc/platforms/cell/spufs/syscalls.c

··· 47 47 goto out; 48 48 49 49 i = SPUFS_I(filp->f_path.dentry->d_inode); 50 - ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); 50 + ret = spufs_run_spu(i->i_ctx, &npc, &status); 51 51 52 52 if (put_user(npc, unpc)) 53 53 ret = -EFAULT; ··· 76 76 } 77 77 #endif 78 78 79 - asmlinkage long sys_spu_create(const char __user *pathname, 80 - unsigned int flags, mode_t mode) 79 + asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, 80 + mode_t mode, struct file *neighbor) 81 81 { 82 82 char *tmp; 83 83 int ret; ··· 90 90 ret = path_lookup(tmp, LOOKUP_PARENT| 91 91 LOOKUP_OPEN|LOOKUP_CREATE, &nd); 92 92 if (!ret) { 93 - ret = spufs_create(&nd, flags, mode); 93 + ret = spufs_create(&nd, flags, mode, neighbor); 94 94 path_release(&nd); 95 95 } 96 96 putname(tmp); ··· 99 99 return ret; 100 100 } 101 101 102 + #ifndef MODULE 103 + asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags, 104 + mode_t mode, int neighbor_fd) 105 + { 106 + int fput_needed; 107 + struct file *neighbor; 108 + long ret; 109 + 110 + if (flags & SPU_CREATE_AFFINITY_SPU) { 111 + ret = -EBADF; 112 + neighbor = fget_light(neighbor_fd, &fput_needed); 113 + if (neighbor) { 114 + ret = do_spu_create(pathname, flags, mode, neighbor); 115 + fput_light(neighbor, fput_needed); 116 + } 117 + } 118 + else { 119 + ret = do_spu_create(pathname, flags, mode, NULL); 120 + } 121 + 122 + return ret; 123 + } 124 + #endif 125 + 102 126 struct spufs_calls spufs_calls = { 103 - .create_thread = sys_spu_create, 127 + .create_thread = do_spu_create, 104 128 .spu_run = do_spu_run, 105 129 .owner = THIS_MODULE, 106 130 };

+1

arch/powerpc/sysdev/Makefile

··· 17 17 mv64x60-$(CONFIG_PCI) += mv64x60_pci.o 18 18 obj-$(CONFIG_MV64X60) += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o 19 19 obj-$(CONFIG_RTC_DRV_CMOS) += rtc_cmos_setup.o 20 + obj-$(CONFIG_AXON_RAM) += axonram.o 20 21 21 22 # contains only the suspend handler for time 22 23 ifeq ($(CONFIG_RTC_CLASS),)

+381

arch/powerpc/sysdev/axonram.c

··· 1 + /* 2 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2006 3 + * 4 + * Author: Maxim Shchetynin <maxim@de.ibm.com> 5 + * 6 + * Axon DDR2 device driver. 7 + * It registers one block device per Axon's DDR2 memory bank found on a system. 8 + * Block devices are called axonram?, their major and minor numbers are 9 + * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev. 10 + * 11 + * This program is free software; you can redistribute it and/or modify 12 + * it under the terms of the GNU General Public License as published by 13 + * the Free Software Foundation; either version 2, or (at your option) 14 + * any later version. 15 + * 16 + * This program is distributed in the hope that it will be useful, 17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 + * GNU General Public License for more details. 20 + * 21 + * You should have received a copy of the GNU General Public License 22 + * along with this program; if not, write to the Free Software 23 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 24 + */ 25 + 26 + #include <linux/bio.h> 27 + #include <linux/blkdev.h> 28 + #include <linux/buffer_head.h> 29 + #include <linux/device.h> 30 + #include <linux/errno.h> 31 + #include <linux/fs.h> 32 + #include <linux/genhd.h> 33 + #include <linux/interrupt.h> 34 + #include <linux/io.h> 35 + #include <linux/ioport.h> 36 + #include <linux/irq.h> 37 + #include <linux/irqreturn.h> 38 + #include <linux/kernel.h> 39 + #include <linux/mm.h> 40 + #include <linux/mod_devicetable.h> 41 + #include <linux/module.h> 42 + #include <linux/slab.h> 43 + #include <linux/string.h> 44 + #include <linux/types.h> 45 + #include <asm/of_device.h> 46 + #include <asm/of_platform.h> 47 + #include <asm/page.h> 48 + #include <asm/prom.h> 49 + 50 + #define AXON_RAM_MODULE_NAME "axonram" 51 + #define AXON_RAM_DEVICE_NAME "axonram" 52 + #define AXON_RAM_MINORS_PER_DISK 16 53 + #define AXON_RAM_BLOCK_SHIFT PAGE_SHIFT 54 + #define AXON_RAM_BLOCK_SIZE 1 << AXON_RAM_BLOCK_SHIFT 55 + #define AXON_RAM_SECTOR_SHIFT 9 56 + #define AXON_RAM_SECTOR_SIZE 1 << AXON_RAM_SECTOR_SHIFT 57 + #define AXON_RAM_IRQ_FLAGS IRQF_SHARED | IRQF_TRIGGER_RISING 58 + 59 + struct axon_ram_bank { 60 + struct of_device *device; 61 + struct gendisk *disk; 62 + unsigned int irq_correctable; 63 + unsigned int irq_uncorrectable; 64 + unsigned long ph_addr; 65 + unsigned long io_addr; 66 + unsigned long size; 67 + unsigned long ecc_counter; 68 + }; 69 + 70 + static ssize_t 71 + axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf) 72 + { 73 + struct of_device *device = to_of_device(dev); 74 + struct axon_ram_bank *bank = device->dev.platform_data; 75 + 76 + BUG_ON(!bank); 77 + 78 + return sprintf(buf, "%ld\n", bank->ecc_counter); 79 + } 80 + 81 + static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL); 82 + 83 + /** 84 + * axon_ram_irq_handler - interrupt handler for Axon RAM ECC 85 + * @irq: interrupt ID 86 + * @dev: pointer to of_device 87 + */ 88 + static irqreturn_t 89 + axon_ram_irq_handler(int irq, void *dev) 90 + { 91 + struct of_device *device = dev; 92 + struct axon_ram_bank *bank = device->dev.platform_data; 93 + 94 + BUG_ON(!bank); 95 + 96 + if (irq == bank->irq_correctable) { 97 + dev_err(&device->dev, "Correctable memory error occured\n"); 98 + bank->ecc_counter++; 99 + return IRQ_HANDLED; 100 + } else if (irq == bank->irq_uncorrectable) { 101 + dev_err(&device->dev, "Uncorrectable memory error occured\n"); 102 + panic("Critical ECC error on %s", device->node->full_name); 103 + } 104 + 105 + return IRQ_NONE; 106 + } 107 + 108 + /** 109 + * axon_ram_make_request - make_request() method for block device 110 + * @queue, @bio: see blk_queue_make_request() 111 + */ 112 + static int 113 + axon_ram_make_request(struct request_queue *queue, struct bio *bio) 114 + { 115 + struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; 116 + unsigned long phys_mem, phys_end; 117 + void *user_mem; 118 + struct bio_vec *vec; 119 + unsigned int transfered; 120 + unsigned short idx; 121 + int rc = 0; 122 + 123 + phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT); 124 + phys_end = bank->io_addr + bank->size; 125 + transfered = 0; 126 + bio_for_each_segment(vec, bio, idx) { 127 + if (unlikely(phys_mem + vec->bv_len > phys_end)) { 128 + bio_io_error(bio, bio->bi_size); 129 + rc = -ERANGE; 130 + break; 131 + } 132 + 133 + user_mem = page_address(vec->bv_page) + vec->bv_offset; 134 + if (bio_data_dir(bio) == READ) 135 + memcpy(user_mem, (void *) phys_mem, vec->bv_len); 136 + else 137 + memcpy((void *) phys_mem, user_mem, vec->bv_len); 138 + 139 + phys_mem += vec->bv_len; 140 + transfered += vec->bv_len; 141 + } 142 + bio_endio(bio, transfered, 0); 143 + 144 + return rc; 145 + } 146 + 147 + /** 148 + * axon_ram_direct_access - direct_access() method for block device 149 + * @device, @sector, @data: see block_device_operations method 150 + */ 151 + static int 152 + axon_ram_direct_access(struct block_device *device, sector_t sector, 153 + unsigned long *data) 154 + { 155 + struct axon_ram_bank *bank = device->bd_disk->private_data; 156 + loff_t offset; 157 + 158 + offset = sector << AXON_RAM_SECTOR_SHIFT; 159 + if (offset >= bank->size) { 160 + dev_err(&bank->device->dev, "Access outside of address space\n"); 161 + return -ERANGE; 162 + } 163 + 164 + *data = bank->ph_addr + offset; 165 + 166 + return 0; 167 + } 168 + 169 + static struct block_device_operations axon_ram_devops = { 170 + .owner = THIS_MODULE, 171 + .direct_access = axon_ram_direct_access 172 + }; 173 + 174 + /** 175 + * axon_ram_probe - probe() method for platform driver 176 + * @device, @device_id: see of_platform_driver method 177 + */ 178 + static int 179 + axon_ram_probe(struct of_device *device, const struct of_device_id *device_id) 180 + { 181 + static int axon_ram_bank_id = -1; 182 + struct axon_ram_bank *bank; 183 + struct resource resource; 184 + int rc = 0; 185 + 186 + axon_ram_bank_id++; 187 + 188 + dev_info(&device->dev, "Found memory controller on %s\n", 189 + device->node->full_name); 190 + 191 + bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL); 192 + if (bank == NULL) { 193 + dev_err(&device->dev, "Out of memory\n"); 194 + rc = -ENOMEM; 195 + goto failed; 196 + } 197 + 198 + device->dev.platform_data = bank; 199 + 200 + bank->device = device; 201 + 202 + if (of_address_to_resource(device->node, 0, &resource) != 0) { 203 + dev_err(&device->dev, "Cannot access device tree\n"); 204 + rc = -EFAULT; 205 + goto failed; 206 + } 207 + 208 + bank->size = resource.end - resource.start + 1; 209 + 210 + if (bank->size == 0) { 211 + dev_err(&device->dev, "No DDR2 memory found for %s%d\n", 212 + AXON_RAM_DEVICE_NAME, axon_ram_bank_id); 213 + rc = -ENODEV; 214 + goto failed; 215 + } 216 + 217 + dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n", 218 + AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); 219 + 220 + bank->ph_addr = resource.start; 221 + bank->io_addr = (unsigned long) ioremap_flags( 222 + bank->ph_addr, bank->size, _PAGE_NO_CACHE); 223 + if (bank->io_addr == 0) { 224 + dev_err(&device->dev, "ioremap() failed\n"); 225 + rc = -EFAULT; 226 + goto failed; 227 + } 228 + 229 + bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK); 230 + if (bank->disk == NULL) { 231 + dev_err(&device->dev, "Cannot register disk\n"); 232 + rc = -EFAULT; 233 + goto failed; 234 + } 235 + 236 + bank->disk->first_minor = 0; 237 + bank->disk->fops = &axon_ram_devops; 238 + bank->disk->private_data = bank; 239 + bank->disk->driverfs_dev = &device->dev; 240 + 241 + sprintf(bank->disk->disk_name, "%s%d", 242 + AXON_RAM_DEVICE_NAME, axon_ram_bank_id); 243 + bank->disk->major = register_blkdev(0, bank->disk->disk_name); 244 + if (bank->disk->major < 0) { 245 + dev_err(&device->dev, "Cannot register block device\n"); 246 + rc = -EFAULT; 247 + goto failed; 248 + } 249 + 250 + bank->disk->queue = blk_alloc_queue(GFP_KERNEL); 251 + if (bank->disk->queue == NULL) { 252 + dev_err(&device->dev, "Cannot register disk queue\n"); 253 + rc = -EFAULT; 254 + goto failed; 255 + } 256 + 257 + set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); 258 + blk_queue_make_request(bank->disk->queue, axon_ram_make_request); 259 + blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); 260 + add_disk(bank->disk); 261 + 262 + bank->irq_correctable = irq_of_parse_and_map(device->node, 0); 263 + bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1); 264 + if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) { 265 + dev_err(&device->dev, "Cannot access ECC interrupt ID\n"); 266 + rc = -EFAULT; 267 + goto failed; 268 + } 269 + 270 + rc = request_irq(bank->irq_correctable, axon_ram_irq_handler, 271 + AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); 272 + if (rc != 0) { 273 + dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); 274 + bank->irq_correctable = bank->irq_uncorrectable = 0; 275 + rc = -EFAULT; 276 + goto failed; 277 + } 278 + 279 + rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler, 280 + AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); 281 + if (rc != 0) { 282 + dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); 283 + bank->irq_uncorrectable = 0; 284 + rc = -EFAULT; 285 + goto failed; 286 + } 287 + 288 + rc = device_create_file(&device->dev, &dev_attr_ecc); 289 + if (rc != 0) { 290 + dev_err(&device->dev, "Cannot create sysfs file\n"); 291 + rc = -EFAULT; 292 + goto failed; 293 + } 294 + 295 + return 0; 296 + 297 + failed: 298 + if (bank != NULL) { 299 + if (bank->irq_uncorrectable > 0) 300 + free_irq(bank->irq_uncorrectable, device); 301 + if (bank->irq_correctable > 0) 302 + free_irq(bank->irq_correctable, device); 303 + if (bank->disk != NULL) { 304 + if (bank->disk->queue != NULL) 305 + blk_cleanup_queue(bank->disk->queue); 306 + if (bank->disk->major > 0) 307 + unregister_blkdev(bank->disk->major, 308 + bank->disk->disk_name); 309 + del_gendisk(bank->disk); 310 + } 311 + device->dev.platform_data = NULL; 312 + if (bank->io_addr != 0) 313 + iounmap((void __iomem *) bank->io_addr); 314 + kfree(bank); 315 + } 316 + 317 + return rc; 318 + } 319 + 320 + /** 321 + * axon_ram_remove - remove() method for platform driver 322 + * @device: see of_platform_driver method 323 + */ 324 + static int 325 + axon_ram_remove(struct of_device *device) 326 + { 327 + struct axon_ram_bank *bank = device->dev.platform_data; 328 + 329 + BUG_ON(!bank || !bank->disk); 330 + 331 + device_remove_file(&device->dev, &dev_attr_ecc); 332 + free_irq(bank->irq_uncorrectable, device); 333 + free_irq(bank->irq_correctable, device); 334 + blk_cleanup_queue(bank->disk->queue); 335 + unregister_blkdev(bank->disk->major, bank->disk->disk_name); 336 + del_gendisk(bank->disk); 337 + iounmap((void __iomem *) bank->io_addr); 338 + kfree(bank); 339 + 340 + return 0; 341 + } 342 + 343 + static struct of_device_id axon_ram_device_id[] = { 344 + { 345 + .type = "dma-memory" 346 + }, 347 + {} 348 + }; 349 + 350 + static struct of_platform_driver axon_ram_driver = { 351 + .owner = THIS_MODULE, 352 + .name = AXON_RAM_MODULE_NAME, 353 + .match_table = axon_ram_device_id, 354 + .probe = axon_ram_probe, 355 + .remove = axon_ram_remove 356 + }; 357 + 358 + /** 359 + * axon_ram_init 360 + */ 361 + static int __init 362 + axon_ram_init(void) 363 + { 364 + return of_register_platform_driver(&axon_ram_driver); 365 + } 366 + 367 + /** 368 + * axon_ram_exit 369 + */ 370 + static void __exit 371 + axon_ram_exit(void) 372 + { 373 + of_unregister_platform_driver(&axon_ram_driver); 374 + } 375 + 376 + module_init(axon_ram_init); 377 + module_exit(axon_ram_exit); 378 + 379 + MODULE_LICENSE("GPL"); 380 + MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>"); 381 + MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");

+22 -29

arch/powerpc/sysdev/pmi.c

··· 48 48 struct work_struct work; 49 49 }; 50 50 51 + static struct pmi_data *data; 51 52 52 53 static int pmi_irq_handler(int irq, void *dev_id) 53 54 { 54 - struct pmi_data *data; 55 55 u8 type; 56 56 int rc; 57 - 58 - data = dev_id; 59 57 60 58 spin_lock(&data->pmi_spinlock); 61 59 ··· 109 111 110 112 static void pmi_notify_handlers(struct work_struct *work) 111 113 { 112 - struct pmi_data *data; 113 114 struct pmi_handler *handler; 114 - 115 - data = container_of(work, struct pmi_data, work); 116 115 117 116 spin_lock(&data->handler_spinlock); 118 117 list_for_each_entry(handler, &data->handler, node) { 119 118 pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler); 120 119 if (handler->type == data->msg.type) 121 - handler->handle_pmi_message(data->dev, data->msg); 120 + handler->handle_pmi_message(data->msg); 122 121 } 123 122 spin_unlock(&data->handler_spinlock); 124 123 } ··· 124 129 const struct of_device_id *match) 125 130 { 126 131 struct device_node *np = dev->node; 127 - struct pmi_data *data; 128 132 int rc; 133 + 134 + if (data) { 135 + printk(KERN_ERR "pmi: driver has already been initialized.\n"); 136 + rc = -EBUSY; 137 + goto out; 138 + } 129 139 130 140 data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL); 131 141 if (!data) { ··· 154 154 155 155 INIT_WORK(&data->work, pmi_notify_handlers); 156 156 157 - dev->dev.driver_data = data; 158 157 data->dev = dev; 159 158 160 159 data->irq = irq_of_parse_and_map(np, 0); ··· 163 164 goto error_cleanup_iomap; 164 165 } 165 166 166 - rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data); 167 + rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL); 167 168 if (rc) { 168 169 printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n", 169 170 data->irq, rc); ··· 186 187 187 188 static int pmi_of_remove(struct of_device *dev) 188 189 { 189 - struct pmi_data *data; 190 190 struct pmi_handler *handler, *tmp; 191 191 192 - data = dev->dev.driver_data; 193 - 194 - free_irq(data->irq, data); 192 + free_irq(data->irq, NULL); 195 193 iounmap(data->pmi_reg); 196 194 197 195 spin_lock(&data->handler_spinlock); ··· 198 202 199 203 spin_unlock(&data->handler_spinlock); 200 204 201 - kfree(dev->dev.driver_data); 205 + kfree(data); 206 + data = NULL; 202 207 203 208 return 0; 204 209 } ··· 223 226 } 224 227 module_exit(pmi_module_exit); 225 228 226 - void pmi_send_message(struct of_device *device, pmi_message_t msg) 229 + int pmi_send_message(pmi_message_t msg) 227 230 { 228 - struct pmi_data *data; 229 231 unsigned long flags; 230 232 DECLARE_COMPLETION_ONSTACK(completion); 231 233 232 - data = device->dev.driver_data; 234 + if (!data) 235 + return -ENODEV; 233 236 234 237 mutex_lock(&data->msg_mutex); 235 238 ··· 253 256 data->completion = NULL; 254 257 255 258 mutex_unlock(&data->msg_mutex); 259 + 260 + return 0; 256 261 } 257 262 EXPORT_SYMBOL_GPL(pmi_send_message); 258 263 259 - void pmi_register_handler(struct of_device *device, 260 - struct pmi_handler *handler) 264 + int pmi_register_handler(struct pmi_handler *handler) 261 265 { 262 - struct pmi_data *data; 263 - data = device->dev.driver_data; 264 - 265 266 if (!data) 266 - return; 267 + return -ENODEV; 267 268 268 269 spin_lock(&data->handler_spinlock); 269 270 list_add_tail(&handler->node, &data->handler); 270 271 spin_unlock(&data->handler_spinlock); 272 + 273 + return 0; 271 274 } 272 275 EXPORT_SYMBOL_GPL(pmi_register_handler); 273 276 274 - void pmi_unregister_handler(struct of_device *device, 275 - struct pmi_handler *handler) 277 + void pmi_unregister_handler(struct pmi_handler *handler) 276 278 { 277 - struct pmi_data *data; 278 - data = device->dev.driver_data; 279 - 280 279 if (!data) 281 280 return; 282 281

+2 -1

drivers/oprofile/buffer_sync.c

··· 26 26 #include <linux/profile.h> 27 27 #include <linux/module.h> 28 28 #include <linux/fs.h> 29 + #include <linux/oprofile.h> 29 30 #include <linux/sched.h> 30 - 31 + 31 32 #include "oprofile_stats.h" 32 33 #include "event_buffer.h" 33 34 #include "cpu_buffer.h"

+1 -19

drivers/oprofile/event_buffer.h

··· 19 19 20 20 /* wake up the process sleeping on the event file */ 21 21 void wake_up_buffer_waiter(void); 22 - 23 - /* Each escaped entry is prefixed by ESCAPE_CODE 24 - * then one of the following codes, then the 25 - * relevant data. 26 - */ 27 - #define ESCAPE_CODE ~0UL 28 - #define CTX_SWITCH_CODE 1 29 - #define CPU_SWITCH_CODE 2 30 - #define COOKIE_SWITCH_CODE 3 31 - #define KERNEL_ENTER_SWITCH_CODE 4 32 - #define KERNEL_EXIT_SWITCH_CODE 5 33 - #define MODULE_LOADED_CODE 6 34 - #define CTX_TGID_CODE 7 35 - #define TRACE_BEGIN_CODE 8 36 - #define TRACE_END_CODE 9 37 - 22 + 38 23 #define INVALID_COOKIE ~0UL 39 24 #define NO_COOKIE 0UL 40 25 41 - /* add data to the event buffer */ 42 - void add_event_entry(unsigned long data); 43 - 44 26 extern const struct file_operations event_buffer_fops; 45 27 46 28 /* mutex between sync_cpu_buffers() and the

+28

drivers/oprofile/oprof.c

··· 53 53 * us missing task deaths and eventually oopsing 54 54 * when trying to process the event buffer. 55 55 */ 56 + if (oprofile_ops.sync_start) { 57 + int sync_ret = oprofile_ops.sync_start(); 58 + switch (sync_ret) { 59 + case 0: 60 + goto post_sync; 61 + case 1: 62 + goto do_generic; 63 + case -1: 64 + goto out3; 65 + default: 66 + goto out3; 67 + } 68 + } 69 + do_generic: 56 70 if ((err = sync_start())) 57 71 goto out3; 58 72 73 + post_sync: 59 74 is_setup = 1; 60 75 mutex_unlock(&start_mutex); 61 76 return 0; ··· 133 118 void oprofile_shutdown(void) 134 119 { 135 120 mutex_lock(&start_mutex); 121 + if (oprofile_ops.sync_stop) { 122 + int sync_ret = oprofile_ops.sync_stop(); 123 + switch (sync_ret) { 124 + case 0: 125 + goto post_sync; 126 + case 1: 127 + goto do_generic; 128 + default: 129 + goto post_sync; 130 + } 131 + } 132 + do_generic: 136 133 sync_stop(); 134 + post_sync: 137 135 if (oprofile_ops.shutdown) 138 136 oprofile_ops.shutdown(); 139 137 is_setup = 0;

+6 -4

include/asm-powerpc/oprofile_impl.h

··· 39 39 40 40 /* Per-arch configuration */ 41 41 struct op_powerpc_model { 42 - void (*reg_setup) (struct op_counter_config *, 42 + int (*reg_setup) (struct op_counter_config *, 43 43 struct op_system_config *, 44 44 int num_counters); 45 - void (*cpu_setup) (struct op_counter_config *); 46 - void (*start) (struct op_counter_config *); 47 - void (*global_start) (struct op_counter_config *); 45 + int (*cpu_setup) (struct op_counter_config *); 46 + int (*start) (struct op_counter_config *); 47 + int (*global_start) (struct op_counter_config *); 48 48 void (*stop) (void); 49 49 void (*global_stop) (void); 50 + int (*sync_start)(void); 51 + int (*sync_stop)(void); 50 52 void (*handle_interrupt) (struct pt_regs *, 51 53 struct op_counter_config *); 52 54 int num_counters;

+4 -4

include/asm-powerpc/pmi.h

··· 55 55 struct pmi_handler { 56 56 struct list_head node; 57 57 u8 type; 58 - void (*handle_pmi_message) (struct of_device *, pmi_message_t); 58 + void (*handle_pmi_message) (pmi_message_t); 59 59 }; 60 60 61 - void pmi_register_handler(struct of_device *, struct pmi_handler *); 62 - void pmi_unregister_handler(struct of_device *, struct pmi_handler *); 61 + int pmi_register_handler(struct pmi_handler *); 62 + void pmi_unregister_handler(struct pmi_handler *); 63 63 64 - void pmi_send_message(struct of_device *, pmi_message_t); 64 + int pmi_send_message(pmi_message_t); 65 65 66 66 #endif /* __KERNEL__ */ 67 67 #endif /* _POWERPC_PMI_H */

+50 -12

include/asm-powerpc/spu.h

··· 107 107 struct device_node; 108 108 109 109 enum spu_utilization_state { 110 - SPU_UTIL_SYSTEM, 111 110 SPU_UTIL_USER, 111 + SPU_UTIL_SYSTEM, 112 112 SPU_UTIL_IOWAIT, 113 - SPU_UTIL_IDLE, 113 + SPU_UTIL_IDLE_LOADED, 114 114 SPU_UTIL_MAX 115 115 }; 116 116 ··· 121 121 unsigned long problem_phys; 122 122 struct spu_problem __iomem *problem; 123 123 struct spu_priv2 __iomem *priv2; 124 - struct list_head list; 125 - struct list_head sched_list; 124 + struct list_head cbe_list; 126 125 struct list_head full_list; 126 + enum { SPU_FREE, SPU_USED } alloc_state; 127 127 int number; 128 128 unsigned int irqs[3]; 129 129 u32 node; ··· 137 137 struct spu_runqueue *rq; 138 138 unsigned long long timestamp; 139 139 pid_t pid; 140 + pid_t tgid; 140 141 int class_0_pending; 141 142 spinlock_t register_lock; 142 143 ··· 166 165 167 166 struct sys_device sysdev; 168 167 168 + int has_mem_affinity; 169 + struct list_head aff_list; 170 + 169 171 struct { 170 172 /* protected by interrupt reentrancy */ 171 - enum spu_utilization_state utilization_state; 172 - unsigned long tstamp; /* time of last ctx switch */ 173 - unsigned long times[SPU_UTIL_MAX]; 173 + enum spu_utilization_state util_state; 174 + unsigned long long tstamp; 175 + unsigned long long times[SPU_UTIL_MAX]; 174 176 unsigned long long vol_ctx_switch; 175 177 unsigned long long invol_ctx_switch; 176 178 unsigned long long min_flt; ··· 185 181 } stats; 186 182 }; 187 183 188 - struct spu *spu_alloc(void); 189 - struct spu *spu_alloc_node(int node); 190 - void spu_free(struct spu *spu); 184 + struct cbe_spu_info { 185 + struct mutex list_mutex; 186 + struct list_head spus; 187 + int n_spus; 188 + int nr_active; 189 + atomic_t reserved_spus; 190 + }; 191 + 192 + extern struct cbe_spu_info cbe_spu_info[]; 193 + 194 + void spu_init_channels(struct spu *spu); 191 195 int spu_irq_class_0_bottom(struct spu *spu); 192 196 int spu_irq_class_1_bottom(struct spu *spu); 193 197 void spu_irq_setaffinity(struct spu *spu, int cpu); 198 + 199 + #ifdef CONFIG_KEXEC 200 + void crash_register_spus(struct list_head *list); 201 + #else 202 + static inline void crash_register_spus(struct list_head *list) 203 + { 204 + } 205 + #endif 194 206 195 207 extern void spu_invalidate_slbs(struct spu *spu); 196 208 extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm); ··· 214 194 /* Calls from the memory management to the SPU */ 215 195 struct mm_struct; 216 196 extern void spu_flush_all_slbs(struct mm_struct *mm); 197 + 198 + /* This interface allows a profiler (e.g., OProfile) to store a ref 199 + * to spu context information that it creates. This caching technique 200 + * avoids the need to recreate this information after a save/restore operation. 201 + * 202 + * Assumes the caller has already incremented the ref count to 203 + * profile_info; then spu_context_destroy must call kref_put 204 + * on prof_info_kref. 205 + */ 206 + void spu_set_profile_private_kref(struct spu_context *ctx, 207 + struct kref *prof_info_kref, 208 + void ( * prof_info_release) (struct kref *kref)); 209 + 210 + void *spu_get_profile_private_kref(struct spu_context *ctx); 217 211 218 212 /* system callbacks from the SPU */ 219 213 struct spu_syscall_block { ··· 240 206 struct file; 241 207 extern struct spufs_calls { 242 208 asmlinkage long (*create_thread)(const char __user *name, 243 - unsigned int flags, mode_t mode); 209 + unsigned int flags, mode_t mode, 210 + struct file *neighbor); 244 211 asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc, 245 212 __u32 __user *ustatus); 246 213 struct module *owner; ··· 268 233 #define SPU_CREATE_GANG 0x0002 269 234 #define SPU_CREATE_NOSCHED 0x0004 270 235 #define SPU_CREATE_ISOLATE 0x0008 236 + #define SPU_CREATE_AFFINITY_SPU 0x0010 237 + #define SPU_CREATE_AFFINITY_MEM 0x0020 271 238 272 - #define SPU_CREATE_FLAG_ALL 0x000f /* mask of all valid flags */ 239 + #define SPU_CREATE_FLAG_ALL 0x003f /* mask of all valid flags */ 273 240 274 241 275 242 #ifdef CONFIG_SPU_FS_MODULE ··· 440 403 #define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0) 441 404 #define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0) 442 405 #define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0) 406 + #define MFC_CNTL_SUSPEND_MASK (1ull << 4) 443 407 #define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8) 444 408 #define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8) 445 409 #define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8)

+7 -1

include/asm-powerpc/spu_csa.h

··· 50 50 #define SPU_STOPPED_STATUS_P_I 8 51 51 #define SPU_STOPPED_STATUS_R 9 52 52 53 + /* 54 + * Definitions for software decrementer status flag. 55 + */ 56 + #define SPU_DECR_STATUS_RUNNING 0x1 57 + #define SPU_DECR_STATUS_WRAPPED 0x2 58 + 53 59 #ifndef __ASSEMBLY__ 54 60 /** 55 61 * spu_reg128 - generic 128-bit register definition. ··· 69 63 * @gprs: Array of saved registers. 70 64 * @fpcr: Saved floating point status control register. 71 65 * @decr: Saved decrementer value. 72 - * @decr_status: Indicates decrementer run status. 66 + * @decr_status: Indicates software decrementer status flags. 73 67 * @ppu_mb: Saved PPU mailbox data. 74 68 * @ppuint_mb: Saved PPU interrupting mailbox data. 75 69 * @tag_mask: Saved tag group mask.

+1

include/linux/dcookies.h

··· 12 12 13 13 #ifdef CONFIG_PROFILING 14 14 15 + #include <linux/dcache.h> 15 16 #include <linux/types.h> 16 17 17 18 struct dcookie_user;

+2 -1

include/linux/elf-em.h

··· 20 20 #define EM_PARISC 15 /* HPPA */ 21 21 #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ 22 22 #define EM_PPC 20 /* PowerPC */ 23 - #define EM_PPC64 21 /* PowerPC64 */ 23 + #define EM_PPC64 21 /* PowerPC64 */ 24 + #define EM_SPU 23 /* Cell BE SPU */ 24 25 #define EM_SH 42 /* SuperH */ 25 26 #define EM_SPARCV9 43 /* SPARC v9 64-bit */ 26 27 #define EM_IA_64 50 /* HP/Intel IA-64 */

+35

include/linux/oprofile.h

··· 17 17 #include <linux/spinlock.h> 18 18 #include <asm/atomic.h> 19 19 20 + /* Each escaped entry is prefixed by ESCAPE_CODE 21 + * then one of the following codes, then the 22 + * relevant data. 23 + * These #defines live in this file so that arch-specific 24 + * buffer sync'ing code can access them. 25 + */ 26 + #define ESCAPE_CODE ~0UL 27 + #define CTX_SWITCH_CODE 1 28 + #define CPU_SWITCH_CODE 2 29 + #define COOKIE_SWITCH_CODE 3 30 + #define KERNEL_ENTER_SWITCH_CODE 4 31 + #define KERNEL_EXIT_SWITCH_CODE 5 32 + #define MODULE_LOADED_CODE 6 33 + #define CTX_TGID_CODE 7 34 + #define TRACE_BEGIN_CODE 8 35 + #define TRACE_END_CODE 9 36 + #define XEN_ENTER_SWITCH_CODE 10 37 + #define SPU_PROFILING_CODE 11 38 + #define SPU_CTX_SWITCH_CODE 12 39 + 20 40 struct super_block; 21 41 struct dentry; 22 42 struct file_operations; ··· 55 35 int (*start)(void); 56 36 /* Stop delivering interrupts. */ 57 37 void (*stop)(void); 38 + /* Arch-specific buffer sync functions. 39 + * Return value = 0: Success 40 + * Return value = -1: Failure 41 + * Return value = 1: Run generic sync function 42 + */ 43 + int (*sync_start)(void); 44 + int (*sync_stop)(void); 45 + 58 46 /* Initiate a stack backtrace. Optional. */ 59 47 void (*backtrace)(struct pt_regs * const regs, unsigned int depth); 60 48 /* CPU identification string. */ ··· 82 54 * One-time exit/cleanup for the arch. 83 55 */ 84 56 void oprofile_arch_exit(void); 57 + 58 + /** 59 + * Add data to the event buffer. 60 + * The data passed is free-form, but typically consists of 61 + * file offsets, dcookies, context information, and ESCAPE codes. 62 + */ 63 + void add_event_entry(unsigned long data); 85 64 86 65 /** 87 66 * Add a sample. This may be called from any context. Pass

+1 -1

include/linux/syscalls.h

··· 549 549 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, 550 550 __u32 __user *ustatus); 551 551 asmlinkage long sys_spu_create(const char __user *name, 552 - unsigned int flags, mode_t mode); 552 + unsigned int flags, mode_t mode, int fd); 553 553 554 554 asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode, 555 555 unsigned dev);