Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.17 296 lines 8.3 kB view raw
1/* 2 * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) 3 * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) 4 * Copyright (C) 2000 SiByte, Inc. 5 * Copyright (C) 2005 Thiemo Seufer 6 * 7 * Written by Justin Carlson of SiByte, Inc. 8 * and Kip Walker of Broadcom Corp. 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; either version 2 14 * of the License, or (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 24 */ 25#include <linux/config.h> 26#include <linux/module.h> 27#include <linux/sched.h> 28#include <linux/smp.h> 29 30#include <asm/io.h> 31#include <asm/sibyte/sb1250.h> 32#include <asm/sibyte/sb1250_regs.h> 33#include <asm/sibyte/sb1250_dma.h> 34 35#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS 36#define SB1_PREF_LOAD_STREAMED_HINT "0" 37#define SB1_PREF_STORE_STREAMED_HINT "1" 38#else 39#define SB1_PREF_LOAD_STREAMED_HINT "4" 40#define SB1_PREF_STORE_STREAMED_HINT "5" 41#endif 42 43static inline void clear_page_cpu(void *page) 44{ 45 unsigned char *addr = (unsigned char *) page; 46 unsigned char *end = addr + PAGE_SIZE; 47 48 /* 49 * JDCXXX - This should be bottlenecked by the write buffer, but these 50 * things tend to be mildly unpredictable...should check this on the 51 * performance model 52 * 53 * We prefetch 4 lines ahead. We're also "cheating" slightly here... 54 * since we know we're on an SB1, we force the assembler to take 55 * 64-bit operands to speed things up 56 */ 57 __asm__ __volatile__( 58 " .set push \n" 59 " .set mips4 \n" 60 " .set noreorder \n" 61#ifdef CONFIG_CPU_HAS_PREFETCH 62 " daddiu %0, %0, 128 \n" 63 " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n" 64 /* Prefetch the first 4 lines */ 65 " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n" 66 " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n" 67 " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" 68 "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */ 69 " sd $0, -120(%0) \n" 70 " sd $0, -112(%0) \n" 71 " sd $0, -104(%0) \n" 72 " daddiu %0, %0, 32 \n" 73 " bnel %0, %1, 1b \n" 74 " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" 75 " daddiu %0, %0, -128 \n" 76#endif 77 " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ 78 "1: sd $0, 8(%0) \n" 79 " sd $0, 16(%0) \n" 80 " sd $0, 24(%0) \n" 81 " daddiu %0, %0, 32 \n" 82 " bnel %0, %1, 1b \n" 83 " sd $0, 0(%0) \n" 84 " .set pop \n" 85 : "+r" (addr) 86 : "r" (end) 87 : "memory"); 88} 89 90static inline void copy_page_cpu(void *to, void *from) 91{ 92 unsigned char *src = (unsigned char *)from; 93 unsigned char *dst = (unsigned char *)to; 94 unsigned char *end = src + PAGE_SIZE; 95 96 /* 97 * The pref's used here are using "streaming" hints, which cause the 98 * copied data to be kicked out of the cache sooner. A page copy often 99 * ends up copying a lot more data than is commonly used, so this seems 100 * to make sense in terms of reducing cache pollution, but I've no real 101 * performance data to back this up 102 */ 103 __asm__ __volatile__( 104 " .set push \n" 105 " .set mips4 \n" 106 " .set noreorder \n" 107#ifdef CONFIG_CPU_HAS_PREFETCH 108 " daddiu %0, %0, 128 \n" 109 " daddiu %1, %1, 128 \n" 110 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n" 111 /* Prefetch the first 4 lines */ 112 " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n" 113 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n" 114 " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n" 115 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n" 116 " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n" 117 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" 118 "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n" 119# ifdef CONFIG_64BIT 120 " ld $8, -128(%0) \n" /* Block copy a cacheline */ 121 " ld $9, -120(%0) \n" 122 " ld $10, -112(%0) \n" 123 " ld $11, -104(%0) \n" 124 " sd $8, -128(%1) \n" 125 " sd $9, -120(%1) \n" 126 " sd $10, -112(%1) \n" 127 " sd $11, -104(%1) \n" 128# else 129 " lw $2, -128(%0) \n" /* Block copy a cacheline */ 130 " lw $3, -124(%0) \n" 131 " lw $6, -120(%0) \n" 132 " lw $7, -116(%0) \n" 133 " lw $8, -112(%0) \n" 134 " lw $9, -108(%0) \n" 135 " lw $10, -104(%0) \n" 136 " lw $11, -100(%0) \n" 137 " sw $2, -128(%1) \n" 138 " sw $3, -124(%1) \n" 139 " sw $6, -120(%1) \n" 140 " sw $7, -116(%1) \n" 141 " sw $8, -112(%1) \n" 142 " sw $9, -108(%1) \n" 143 " sw $10, -104(%1) \n" 144 " sw $11, -100(%1) \n" 145# endif 146 " daddiu %0, %0, 32 \n" 147 " daddiu %1, %1, 32 \n" 148 " bnel %0, %2, 1b \n" 149 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" 150 " daddiu %0, %0, -128 \n" 151 " daddiu %1, %1, -128 \n" 152#endif 153#ifdef CONFIG_64BIT 154 " ld $8, 0(%0) \n" /* Block copy a cacheline */ 155 "1: ld $9, 8(%0) \n" 156 " ld $10, 16(%0) \n" 157 " ld $11, 24(%0) \n" 158 " sd $8, 0(%1) \n" 159 " sd $9, 8(%1) \n" 160 " sd $10, 16(%1) \n" 161 " sd $11, 24(%1) \n" 162#else 163 " lw $2, 0(%0) \n" /* Block copy a cacheline */ 164 "1: lw $3, 4(%0) \n" 165 " lw $6, 8(%0) \n" 166 " lw $7, 12(%0) \n" 167 " lw $8, 16(%0) \n" 168 " lw $9, 20(%0) \n" 169 " lw $10, 24(%0) \n" 170 " lw $11, 28(%0) \n" 171 " sw $2, 0(%1) \n" 172 " sw $3, 4(%1) \n" 173 " sw $6, 8(%1) \n" 174 " sw $7, 12(%1) \n" 175 " sw $8, 16(%1) \n" 176 " sw $9, 20(%1) \n" 177 " sw $10, 24(%1) \n" 178 " sw $11, 28(%1) \n" 179#endif 180 " daddiu %0, %0, 32 \n" 181 " daddiu %1, %1, 32 \n" 182 " bnel %0, %2, 1b \n" 183#ifdef CONFIG_64BIT 184 " ld $8, 0(%0) \n" 185#else 186 " lw $2, 0(%0) \n" 187#endif 188 " .set pop \n" 189 : "+r" (src), "+r" (dst) 190 : "r" (end) 191#ifdef CONFIG_64BIT 192 : "$8","$9","$10","$11","memory"); 193#else 194 : "$2","$3","$6","$7","$8","$9","$10","$11","memory"); 195#endif 196} 197 198 199#ifdef CONFIG_SIBYTE_DMA_PAGEOPS 200 201/* 202 * Pad descriptors to cacheline, since each is exclusively owned by a 203 * particular CPU. 204 */ 205typedef struct dmadscr_s { 206 u64 dscr_a; 207 u64 dscr_b; 208 u64 pad_a; 209 u64 pad_b; 210} dmadscr_t; 211 212static dmadscr_t page_descr[DM_NUM_CHANNELS] 213 __attribute__((aligned(SMP_CACHE_BYTES))); 214 215void sb1_dma_init(void) 216{ 217 int i; 218 219 for (i = 0; i < DM_NUM_CHANNELS; i++) { 220 const u64 base_val = CPHYSADDR(&page_descr[i]) | 221 V_DM_DSCR_BASE_RINGSZ(1); 222 volatile void *base_reg = 223 IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 224 225 __raw_writeq(base_val, base_reg); 226 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 227 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 228 } 229} 230 231void clear_page(void *page) 232{ 233 u64 to_phys = CPHYSADDR(page); 234 unsigned int cpu = smp_processor_id(); 235 236 /* if the page is not in KSEG0, use old way */ 237 if ((long)KSEGX(page) != (long)CKSEG0) 238 return clear_page_cpu(page); 239 240 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 241 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 242 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 243 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 244 245 /* 246 * Don't really want to do it this way, but there's no 247 * reliable way to delay completion detection. 248 */ 249 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 250 & M_DM_DSCR_BASE_INTERRUPT)) 251 ; 252 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 253} 254 255void copy_page(void *to, void *from) 256{ 257 u64 from_phys = CPHYSADDR(from); 258 u64 to_phys = CPHYSADDR(to); 259 unsigned int cpu = smp_processor_id(); 260 261 /* if any page is not in KSEG0, use old way */ 262 if ((long)KSEGX(to) != (long)CKSEG0 263 || (long)KSEGX(from) != (long)CKSEG0) 264 return copy_page_cpu(to, from); 265 266 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 267 M_DM_DSCRA_INTERRUPT; 268 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 269 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 270 271 /* 272 * Don't really want to do it this way, but there's no 273 * reliable way to delay completion detection. 274 */ 275 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 276 & M_DM_DSCR_BASE_INTERRUPT)) 277 ; 278 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 279} 280 281#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */ 282 283void clear_page(void *page) 284{ 285 return clear_page_cpu(page); 286} 287 288void copy_page(void *to, void *from) 289{ 290 return copy_page_cpu(to, from); 291} 292 293#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */ 294 295EXPORT_SYMBOL(clear_page); 296EXPORT_SYMBOL(copy_page);