C6X: library code · tjh.dev/kernel@09831ca

+34

arch/c6x/include/asm/checksum.h

··· 1 + /* 2 + * Copyright (C) 2011 Texas Instruments Incorporated 3 + * Author: Mark Salter <msalter@redhat.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + */ 9 + #ifndef _ASM_C6X_CHECKSUM_H 10 + #define _ASM_C6X_CHECKSUM_H 11 + 12 + static inline __wsum 13 + csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, 14 + unsigned short proto, __wsum sum) 15 + { 16 + unsigned long long tmp; 17 + 18 + asm ("add .d1 %1,%5,%1\n" 19 + "|| addu .l1 %3,%4,%0\n" 20 + "addu .l1 %2,%0,%0\n" 21 + #ifndef CONFIG_CPU_BIG_ENDIAN 22 + "|| shl .s1 %1,8,%1\n" 23 + #endif 24 + "addu .l1 %1,%0,%0\n" 25 + "add .l1 %P0,%p0,%2\n" 26 + : "=&a"(tmp), "+a"(len), "+a"(sum) 27 + : "a" (saddr), "a" (daddr), "a" (proto)); 28 + return sum; 29 + } 30 + #define csum_tcpudp_nofold csum_tcpudp_nofold 31 + 32 + #include <asm-generic/checksum.h> 33 + 34 + #endif /* _ASM_C6X_CHECKSUM_H */

+36

arch/c6x/lib/checksum.c

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or 3 + * modify it under the terms of the GNU General Public License 4 + * as published by the Free Software Foundation; either version 5 + * 2 of the License, or (at your option) any later version. 6 + */ 7 + #include <linux/module.h> 8 + #include <net/checksum.h> 9 + 10 + #include <asm/byteorder.h> 11 + 12 + /* 13 + * copy from fs while checksumming, otherwise like csum_partial 14 + */ 15 + __wsum 16 + csum_partial_copy_from_user(const void __user *src, void *dst, int len, 17 + __wsum sum, int *csum_err) 18 + { 19 + int missing; 20 + 21 + missing = __copy_from_user(dst, src, len); 22 + if (missing) { 23 + memset(dst + len - missing, 0, missing); 24 + *csum_err = -EFAULT; 25 + } else 26 + *csum_err = 0; 27 + 28 + return csum_partial(dst, len, sum); 29 + } 30 + EXPORT_SYMBOL(csum_partial_copy_from_user); 31 + 32 + /* These are from csum_64plus.S */ 33 + EXPORT_SYMBOL(csum_partial); 34 + EXPORT_SYMBOL(csum_partial_copy); 35 + EXPORT_SYMBOL(ip_compute_csum); 36 + EXPORT_SYMBOL(ip_fast_csum);

+419

arch/c6x/lib/csum_64plus.S

··· 1 + ; 2 + ; linux/arch/c6x/lib/csum_64plus.s 3 + ; 4 + ; Port on Texas Instruments TMS320C6x architecture 5 + ; 6 + ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated 7 + ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) 8 + ; 9 + ; This program is free software; you can redistribute it and/or modify 10 + ; it under the terms of the GNU General Public License version 2 as 11 + ; published by the Free Software Foundation. 12 + ; 13 + #include <linux/linkage.h> 14 + 15 + ; 16 + ;unsigned int csum_partial_copy(const char *src, char * dst, 17 + ; int len, int sum) 18 + ; 19 + ; A4: src 20 + ; B4: dst 21 + ; A6: len 22 + ; B6: sum 23 + ; return csum in A4 24 + ; 25 + 26 + .text 27 + ENTRY(csum_partial_copy) 28 + MVC .S2 ILC,B30 29 + 30 + MV .D1X B6,A31 ; given csum 31 + ZERO .D1 A9 ; csum (a side) 32 + || ZERO .D2 B9 ; csum (b side) 33 + || SHRU .S2X A6,2,B5 ; len / 4 34 + 35 + ;; Check alignment and size 36 + AND .S1 3,A4,A1 37 + || AND .S2 3,B4,B0 38 + OR .L2X B0,A1,B0 ; non aligned condition 39 + || MVC .S2 B5,ILC 40 + || MVK .D2 1,B2 41 + || MV .D1X B5,A1 ; words condition 42 + [!A1] B .S1 L8 43 + [B0] BNOP .S1 L6,5 44 + 45 + SPLOOP 1 46 + 47 + ;; Main loop for aligned words 48 + LDW .D1T1 *A4++,A7 49 + NOP 4 50 + MV .S2X A7,B7 51 + || EXTU .S1 A7,0,16,A16 52 + STW .D2T2 B7,*B4++ 53 + || MPYU .M2 B7,B2,B8 54 + || ADD .L1 A16,A9,A9 55 + NOP 56 + SPKERNEL 8,0 57 + || ADD .L2 B8,B9,B9 58 + 59 + ZERO .D1 A1 60 + || ADD .L1X A9,B9,A9 ; add csum from a and b sides 61 + 62 + L6: 63 + [!A1] BNOP .S1 L8,5 64 + 65 + ;; Main loop for non-aligned words 66 + SPLOOP 2 67 + || MVK .L1 1,A2 68 + 69 + LDNW .D1T1 *A4++,A7 70 + NOP 3 71 + 72 + NOP 73 + MV .S2X A7,B7 74 + || EXTU .S1 A7,0,16,A16 75 + || MPYU .M1 A7,A2,A8 76 + 77 + ADD .L1 A16,A9,A9 78 + SPKERNEL 6,0 79 + || STNW .D2T2 B7,*B4++ 80 + || ADD .L1 A8,A9,A9 81 + 82 + L8: AND .S2X 2,A6,B5 83 + CMPGT .L2 B5,0,B0 84 + [!B0] BNOP .S1 L82,4 85 + 86 + ;; Manage half-word 87 + ZERO .L1 A7 88 + || ZERO .D1 A8 89 + 90 + #ifdef CONFIG_CPU_BIG_ENDIAN 91 + 92 + LDBU .D1T1 *A4++,A7 93 + LDBU .D1T1 *A4++,A8 94 + NOP 3 95 + SHL .S1 A7,8,A0 96 + ADD .S1 A8,A9,A9 97 + STB .D2T1 A7,*B4++ 98 + || ADD .S1 A0,A9,A9 99 + STB .D2T1 A8,*B4++ 100 + 101 + #else 102 + 103 + LDBU .D1T1 *A4++,A7 104 + LDBU .D1T1 *A4++,A8 105 + NOP 3 106 + ADD .S1 A7,A9,A9 107 + SHL .S1 A8,8,A0 108 + 109 + STB .D2T1 A7,*B4++ 110 + || ADD .S1 A0,A9,A9 111 + STB .D2T1 A8,*B4++ 112 + 113 + #endif 114 + 115 + ;; Manage eventually the last byte 116 + L82: AND .S2X 1,A6,B0 117 + [!B0] BNOP .S1 L9,5 118 + 119 + || ZERO .L1 A7 120 + 121 + L83: LDBU .D1T1 *A4++,A7 122 + NOP 4 123 + 124 + MV .L2X A7,B7 125 + 126 + #ifdef CONFIG_CPU_BIG_ENDIAN 127 + 128 + STB .D2T2 B7,*B4++ 129 + || SHL .S1 A7,8,A7 130 + ADD .S1 A7,A9,A9 131 + 132 + #else 133 + 134 + STB .D2T2 B7,*B4++ 135 + || ADD .S1 A7,A9,A9 136 + 137 + #endif 138 + 139 + ;; Fold the csum 140 + L9: SHRU .S2X A9,16,B0 141 + [!B0] BNOP .S1 L10,5 142 + 143 + L91: SHRU .S2X A9,16,B4 144 + || EXTU .S1 A9,16,16,A3 145 + ADD .D1X A3,B4,A9 146 + 147 + SHRU .S1 A9,16,A0 148 + [A0] BNOP .S1 L91,5 149 + 150 + L10: ADD .D1 A31,A9,A9 151 + MV .D1 A9,A4 152 + 153 + BNOP .S2 B3,4 154 + MVC .S2 B30,ILC 155 + ENDPROC(csum_partial_copy) 156 + 157 + ; 158 + ;unsigned short 159 + ;ip_fast_csum(unsigned char *iph, unsigned int ihl) 160 + ;{ 161 + ; unsigned int checksum = 0; 162 + ; unsigned short *tosum = (unsigned short *) iph; 163 + ; int len; 164 + ; 165 + ; len = ihl*4; 166 + ; 167 + ; if (len <= 0) 168 + ; return 0; 169 + ; 170 + ; while(len) { 171 + ; len -= 2; 172 + ; checksum += *tosum++; 173 + ; } 174 + ; if (len & 1) 175 + ; checksum += *(unsigned char*) tosum; 176 + ; 177 + ; while(checksum >> 16) 178 + ; checksum = (checksum & 0xffff) + (checksum >> 16); 179 + ; 180 + ; return ~checksum; 181 + ;} 182 + ; 183 + ; A4: iph 184 + ; B4: ihl 185 + ; return checksum in A4 186 + ; 187 + .text 188 + 189 + ENTRY(ip_fast_csum) 190 + ZERO .D1 A5 191 + || MVC .S2 ILC,B30 192 + SHL .S2 B4,2,B0 193 + CMPGT .L2 B0,0,B1 194 + [!B1] BNOP .S1 L15,4 195 + [!B1] ZERO .D1 A3 196 + 197 + [!B0] B .S1 L12 198 + SHRU .S2 B0,1,B0 199 + MVC .S2 B0,ILC 200 + NOP 3 201 + 202 + SPLOOP 1 203 + LDHU .D1T1 *A4++,A3 204 + NOP 3 205 + NOP 206 + SPKERNEL 5,0 207 + || ADD .L1 A3,A5,A5 208 + 209 + L12: SHRU .S1 A5,16,A0 210 + [!A0] BNOP .S1 L14,5 211 + 212 + L13: SHRU .S2X A5,16,B4 213 + EXTU .S1 A5,16,16,A3 214 + ADD .D1X A3,B4,A5 215 + SHRU .S1 A5,16,A0 216 + [A0] BNOP .S1 L13,5 217 + 218 + L14: NOT .D1 A5,A3 219 + EXTU .S1 A3,16,16,A3 220 + 221 + L15: BNOP .S2 B3,3 222 + MVC .S2 B30,ILC 223 + MV .D1 A3,A4 224 + ENDPROC(ip_fast_csum) 225 + 226 + ; 227 + ;unsigned short 228 + ;do_csum(unsigned char *buff, unsigned int len) 229 + ;{ 230 + ; int odd, count; 231 + ; unsigned int result = 0; 232 + ; 233 + ; if (len <= 0) 234 + ; goto out; 235 + ; odd = 1 & (unsigned long) buff; 236 + ; if (odd) { 237 + ;#ifdef __LITTLE_ENDIAN 238 + ; result += (*buff << 8); 239 + ;#else 240 + ; result = *buff; 241 + ;#endif 242 + ; len--; 243 + ; buff++; 244 + ; } 245 + ; count = len >> 1; /* nr of 16-bit words.. */ 246 + ; if (count) { 247 + ; if (2 & (unsigned long) buff) { 248 + ; result += *(unsigned short *) buff; 249 + ; count--; 250 + ; len -= 2; 251 + ; buff += 2; 252 + ; } 253 + ; count >>= 1; /* nr of 32-bit words.. */ 254 + ; if (count) { 255 + ; unsigned int carry = 0; 256 + ; do { 257 + ; unsigned int w = *(unsigned int *) buff; 258 + ; count--; 259 + ; buff += 4; 260 + ; result += carry; 261 + ; result += w; 262 + ; carry = (w > result); 263 + ; } while (count); 264 + ; result += carry; 265 + ; result = (result & 0xffff) + (result >> 16); 266 + ; } 267 + ; if (len & 2) { 268 + ; result += *(unsigned short *) buff; 269 + ; buff += 2; 270 + ; } 271 + ; } 272 + ; if (len & 1) 273 + ;#ifdef __LITTLE_ENDIAN 274 + ; result += *buff; 275 + ;#else 276 + ; result += (*buff << 8); 277 + ;#endif 278 + ; result = (result & 0xffff) + (result >> 16); 279 + ; /* add up carry.. */ 280 + ; result = (result & 0xffff) + (result >> 16); 281 + ; if (odd) 282 + ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 283 + ;out: 284 + ; return result; 285 + ;} 286 + ; 287 + ; A4: buff 288 + ; B4: len 289 + ; return checksum in A4 290 + ; 291 + 292 + ENTRY(do_csum) 293 + CMPGT .L2 B4,0,B0 294 + [!B0] BNOP .S1 L26,3 295 + EXTU .S1 A4,31,31,A0 296 + 297 + MV .L1 A0,A3 298 + || MV .S1X B3,A5 299 + || MV .L2 B4,B3 300 + || ZERO .D1 A1 301 + 302 + #ifdef CONFIG_CPU_BIG_ENDIAN 303 + [A0] SUB .L2 B3,1,B3 304 + || [A0] LDBU .D1T1 *A4++,A1 305 + #else 306 + [!A0] BNOP .S1 L21,5 307 + || [A0] LDBU .D1T1 *A4++,A0 308 + SUB .L2 B3,1,B3 309 + || SHL .S1 A0,8,A1 310 + L21: 311 + #endif 312 + SHR .S2 B3,1,B0 313 + [!B0] BNOP .S1 L24,3 314 + MVK .L1 2,A0 315 + AND .L1 A4,A0,A0 316 + 317 + [!A0] BNOP .S1 L22,5 318 + || [A0] LDHU .D1T1 *A4++,A0 319 + SUB .L2 B0,1,B0 320 + || SUB .S2 B3,2,B3 321 + || ADD .L1 A0,A1,A1 322 + L22: 323 + SHR .S2 B0,1,B0 324 + || ZERO .L1 A0 325 + 326 + [!B0] BNOP .S1 L23,5 327 + || [B0] MVC .S2 B0,ILC 328 + 329 + SPLOOP 3 330 + SPMASK L1 331 + || MV .L1 A1,A2 332 + || LDW .D1T1 *A4++,A1 333 + 334 + NOP 4 335 + ADD .L1 A0,A1,A0 336 + ADD .L1 A2,A0,A2 337 + 338 + SPKERNEL 1,2 339 + || CMPGTU .L1 A1,A2,A0 340 + 341 + ADD .L1 A0,A2,A6 342 + EXTU .S1 A6,16,16,A7 343 + SHRU .S2X A6,16,B0 344 + NOP 1 345 + ADD .L1X A7,B0,A1 346 + L23: 347 + MVK .L2 2,B0 348 + AND .L2 B3,B0,B0 349 + [B0] LDHU .D1T1 *A4++,A0 350 + NOP 4 351 + [B0] ADD .L1 A0,A1,A1 352 + L24: 353 + EXTU .S2 B3,31,31,B0 354 + #ifdef CONFIG_CPU_BIG_ENDIAN 355 + [!B0] BNOP .S1 L25,4 356 + || [B0] LDBU .D1T1 *A4,A0 357 + SHL .S1 A0,8,A0 358 + ADD .L1 A0,A1,A1 359 + L25: 360 + #else 361 + [B0] LDBU .D1T1 *A4,A0 362 + NOP 4 363 + [B0] ADD .L1 A0,A1,A1 364 + #endif 365 + EXTU .S1 A1,16,16,A0 366 + SHRU .S2X A1,16,B0 367 + NOP 1 368 + ADD .L1X A0,B0,A0 369 + SHRU .S1 A0,16,A1 370 + ADD .L1 A0,A1,A0 371 + EXTU .S1 A0,16,16,A1 372 + EXTU .S1 A1,16,24,A2 373 + 374 + EXTU .S1 A1,24,16,A0 375 + || MV .L2X A3,B0 376 + 377 + [B0] OR .L1 A0,A2,A1 378 + L26: 379 + NOP 1 380 + BNOP .S2X A5,4 381 + MV .L1 A1,A4 382 + ENDPROC(do_csum) 383 + 384 + ;__wsum csum_partial(const void *buff, int len, __wsum wsum) 385 + ;{ 386 + ; unsigned int sum = (__force unsigned int)wsum; 387 + ; unsigned int result = do_csum(buff, len); 388 + ; 389 + ; /* add in old sum, and carry.. */ 390 + ; result += sum; 391 + ; if (sum > result) 392 + ; result += 1; 393 + ; return (__force __wsum)result; 394 + ;} 395 + ; 396 + ENTRY(csum_partial) 397 + MV .L1X B3,A9 398 + || CALLP .S2 do_csum,B3 399 + || MV .S1 A6,A8 400 + BNOP .S2X A9,2 401 + ADD .L1 A8,A4,A1 402 + CMPGTU .L1 A8,A1,A0 403 + ADD .L1 A1,A0,A4 404 + ENDPROC(csum_partial) 405 + 406 + ;unsigned short 407 + ;ip_compute_csum(unsigned char *buff, unsigned int len) 408 + ; 409 + ; A4: buff 410 + ; B4: len 411 + ; return checksum in A4 412 + 413 + ENTRY(ip_compute_csum) 414 + MV .L1X B3,A9 415 + || CALLP .S2 do_csum,B3 416 + BNOP .S2X A9,3 417 + NOT .S1 A4,A4 418 + CLR .S1 A4,16,31,A4 419 + ENDPROC(ip_compute_csum)

+53

arch/c6x/lib/divi.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + ;; ABI considerations for the divide functions 21 + ;; The following registers are call-used: 22 + ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 23 + ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 24 + ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 25 + ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 26 + ;; 27 + ;; In our implementation, divu and remu are leaf functions, 28 + ;; while both divi and remi call into divu. 29 + ;; A0 is not clobbered by any of the functions. 30 + ;; divu does not clobber B2 either, which is taken advantage of 31 + ;; in remi. 32 + ;; divi uses B5 to hold the original return address during 33 + ;; the call to divu. 34 + ;; remi uses B2 and A5 to hold the input values during the 35 + ;; call to divu. It stores B3 in on the stack. 36 + 37 + .text 38 + ENTRY(__c6xabi_divi) 39 + call .s2 __c6xabi_divu 40 + || mv .d2 B3, B5 41 + || cmpgt .l1 0, A4, A1 42 + || cmpgt .l2 0, B4, B1 43 + 44 + [A1] neg .l1 A4, A4 45 + || [B1] neg .l2 B4, B4 46 + || xor .s1x A1, B1, A1 47 + [A1] addkpc .s2 _divu_ret, B3, 4 48 + _divu_ret: 49 + neg .l1 A4, A4 50 + || mv .l2 B3,B5 51 + || ret .s2 B5 52 + nop 5 53 + ENDPROC(__c6xabi_divi)

+46

arch/c6x/lib/divremi.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + .text 21 + ENTRY(__c6xabi_divremi) 22 + stw .d2t2 B3, *B15--[2] 23 + || cmpgt .l1 0, A4, A1 24 + || cmpgt .l2 0, B4, B2 25 + || mv .s1 A4, A5 26 + || call .s2 __c6xabi_divu 27 + 28 + [A1] neg .l1 A4, A4 29 + || [B2] neg .l2 B4, B4 30 + || xor .s2x B2, A1, B0 31 + || mv .d2 B4, B2 32 + 33 + [B0] addkpc .s2 _divu_ret_1, B3, 1 34 + [!B0] addkpc .s2 _divu_ret_2, B3, 1 35 + nop 2 36 + _divu_ret_1: 37 + neg .l1 A4, A4 38 + _divu_ret_2: 39 + ldw .d2t2 *++B15[2], B3 40 + 41 + mpy32 .m1x A4, B2, A6 42 + nop 3 43 + ret .s2 B3 44 + sub .l1 A5, A6, A5 45 + nop 4 46 + ENDPROC(__c6xabi_divremi)

+87

arch/c6x/lib/divremu.S

··· 1 + ;; Copyright 2011 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + .text 21 + ENTRY(__c6xabi_divremu) 22 + ;; We use a series of up to 31 subc instructions. First, we find 23 + ;; out how many leading zero bits there are in the divisor. This 24 + ;; gives us both a shift count for aligning (shifting) the divisor 25 + ;; to the, and the number of times we have to execute subc. 26 + 27 + ;; At the end, we have both the remainder and most of the quotient 28 + ;; in A4. The top bit of the quotient is computed first and is 29 + ;; placed in A2. 30 + 31 + ;; Return immediately if the dividend is zero. Setting B4 to 1 32 + ;; is a trick to allow us to leave the following insns in the jump 33 + ;; delay slot without affecting the result. 34 + mv .s2x A4, B1 35 + 36 + [b1] lmbd .l2 1, B4, B1 37 + ||[!b1] b .s2 B3 ; RETURN A 38 + ||[!b1] mvk .d2 1, B4 39 + 40 + ||[!b1] zero .s1 A5 41 + mv .l1x B1, A6 42 + || shl .s2 B4, B1, B4 43 + 44 + ;; The loop performs a maximum of 28 steps, so we do the 45 + ;; first 3 here. 46 + cmpltu .l1x A4, B4, A2 47 + [!A2] sub .l1x A4, B4, A4 48 + || shru .s2 B4, 1, B4 49 + || xor .s1 1, A2, A2 50 + 51 + shl .s1 A2, 31, A2 52 + || [b1] subc .l1x A4,B4,A4 53 + || [b1] add .s2 -1, B1, B1 54 + [b1] subc .l1x A4,B4,A4 55 + || [b1] add .s2 -1, B1, B1 56 + 57 + ;; RETURN A may happen here (note: must happen before the next branch) 58 + __divremu0: 59 + cmpgt .l2 B1, 7, B0 60 + || [b1] subc .l1x A4,B4,A4 61 + || [b1] add .s2 -1, B1, B1 62 + [b1] subc .l1x A4,B4,A4 63 + || [b1] add .s2 -1, B1, B1 64 + || [b0] b .s1 __divremu0 65 + [b1] subc .l1x A4,B4,A4 66 + || [b1] add .s2 -1, B1, B1 67 + [b1] subc .l1x A4,B4,A4 68 + || [b1] add .s2 -1, B1, B1 69 + [b1] subc .l1x A4,B4,A4 70 + || [b1] add .s2 -1, B1, B1 71 + [b1] subc .l1x A4,B4,A4 72 + || [b1] add .s2 -1, B1, B1 73 + [b1] subc .l1x A4,B4,A4 74 + || [b1] add .s2 -1, B1, B1 75 + ;; loop backwards branch happens here 76 + 77 + ret .s2 B3 78 + || mvk .s1 32, A1 79 + sub .l1 A1, A6, A6 80 + || extu .s1 A4, A6, A5 81 + shl .s1 A4, A6, A4 82 + shru .s1 A4, 1, A4 83 + || sub .l1 A6, 1, A6 84 + or .l1 A2, A4, A4 85 + shru .s1 A4, A6, A4 86 + nop 87 + ENDPROC(__c6xabi_divremu)

+98

arch/c6x/lib/divu.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + ;; ABI considerations for the divide functions 21 + ;; The following registers are call-used: 22 + ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 23 + ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 24 + ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 25 + ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 26 + ;; 27 + ;; In our implementation, divu and remu are leaf functions, 28 + ;; while both divi and remi call into divu. 29 + ;; A0 is not clobbered by any of the functions. 30 + ;; divu does not clobber B2 either, which is taken advantage of 31 + ;; in remi. 32 + ;; divi uses B5 to hold the original return address during 33 + ;; the call to divu. 34 + ;; remi uses B2 and A5 to hold the input values during the 35 + ;; call to divu. It stores B3 in on the stack. 36 + 37 + .text 38 + ENTRY(__c6xabi_divu) 39 + ;; We use a series of up to 31 subc instructions. First, we find 40 + ;; out how many leading zero bits there are in the divisor. This 41 + ;; gives us both a shift count for aligning (shifting) the divisor 42 + ;; to the, and the number of times we have to execute subc. 43 + 44 + ;; At the end, we have both the remainder and most of the quotient 45 + ;; in A4. The top bit of the quotient is computed first and is 46 + ;; placed in A2. 47 + 48 + ;; Return immediately if the dividend is zero. 49 + mv .s2x A4, B1 50 + [B1] lmbd .l2 1, B4, B1 51 + || [!B1] b .s2 B3 ; RETURN A 52 + || [!B1] mvk .d2 1, B4 53 + mv .l1x B1, A6 54 + || shl .s2 B4, B1, B4 55 + 56 + ;; The loop performs a maximum of 28 steps, so we do the 57 + ;; first 3 here. 58 + cmpltu .l1x A4, B4, A2 59 + [!A2] sub .l1x A4, B4, A4 60 + || shru .s2 B4, 1, B4 61 + || xor .s1 1, A2, A2 62 + 63 + shl .s1 A2, 31, A2 64 + || [B1] subc .l1x A4,B4,A4 65 + || [B1] add .s2 -1, B1, B1 66 + [B1] subc .l1x A4,B4,A4 67 + || [B1] add .s2 -1, B1, B1 68 + 69 + ;; RETURN A may happen here (note: must happen before the next branch) 70 + _divu_loop: 71 + cmpgt .l2 B1, 7, B0 72 + || [B1] subc .l1x A4,B4,A4 73 + || [B1] add .s2 -1, B1, B1 74 + [B1] subc .l1x A4,B4,A4 75 + || [B1] add .s2 -1, B1, B1 76 + || [B0] b .s1 _divu_loop 77 + [B1] subc .l1x A4,B4,A4 78 + || [B1] add .s2 -1, B1, B1 79 + [B1] subc .l1x A4,B4,A4 80 + || [B1] add .s2 -1, B1, B1 81 + [B1] subc .l1x A4,B4,A4 82 + || [B1] add .s2 -1, B1, B1 83 + [B1] subc .l1x A4,B4,A4 84 + || [B1] add .s2 -1, B1, B1 85 + [B1] subc .l1x A4,B4,A4 86 + || [B1] add .s2 -1, B1, B1 87 + ;; loop backwards branch happens here 88 + 89 + ret .s2 B3 90 + || mvk .s1 32, A1 91 + sub .l1 A1, A6, A6 92 + shl .s1 A4, A6, A4 93 + shru .s1 A4, 1, A4 94 + || sub .l1 A6, 1, A6 95 + or .l1 A2, A4, A4 96 + shru .s1 A4, A6, A4 97 + nop 98 + ENDPROC(__c6xabi_divu)

+37

arch/c6x/lib/llshl.S

··· 1 + ;; Copyright (C) 2010 Texas Instruments Incorporated 2 + ;; Contributed by Mark Salter <msalter@redhat.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + ;; uint64_t __c6xabi_llshl(uint64_t val, uint shift) 19 + 20 + #include <linux/linkage.h> 21 + 22 + .text 23 + ENTRY(__c6xabi_llshl) 24 + mv .l1x B4,A1 25 + [!A1] b .s2 B3 ; just return if zero shift 26 + mvk .s1 32,A0 27 + sub .d1 A0,A1,A0 28 + cmplt .l1 0,A0,A2 29 + [A2] shru .s1 A4,A0,A0 30 + [!A2] neg .l1 A0,A5 31 + || [A2] shl .s1 A5,A1,A5 32 + [!A2] shl .s1 A4,A5,A5 33 + || [A2] or .d1 A5,A0,A5 34 + || [!A2] mvk .l1 0,A4 35 + [A2] shl .s1 A4,A1,A4 36 + bnop .s2 B3,5 37 + ENDPROC(__c6xabi_llshl)

+38

arch/c6x/lib/llshr.S

··· 1 + ;; Copyright (C) 2010 Texas Instruments Incorporated 2 + ;; Contributed by Mark Salter <msalter@redhat.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + ;; uint64_t __c6xabi_llshr(uint64_t val, uint shift) 19 + 20 + #include <linux/linkage.h> 21 + 22 + .text 23 + ENTRY(__c6xabi_llshr) 24 + mv .l1x B4,A1 25 + [!A1] b .s2 B3 ; return if zero shift count 26 + mvk .s1 32,A0 27 + sub .d1 A0,A1,A0 28 + cmplt .l1 0,A0,A2 29 + [A2] shl .s1 A5,A0,A0 30 + nop 31 + [!A2] neg .l1 A0,A4 32 + || [A2] shru .s1 A4,A1,A4 33 + [!A2] shr .s1 A5,A4,A4 34 + || [A2] or .d1 A4,A0,A4 35 + [!A2] shr .s1 A5,0x1f,A5 36 + [A2] shr .s1 A5,A1,A5 37 + bnop .s2 B3,5 38 + ENDPROC(__c6xabi_llshr)

+38

arch/c6x/lib/llshru.S

··· 1 + ;; Copyright (C) 2010 Texas Instruments Incorporated 2 + ;; Contributed by Mark Salter <msalter@redhat.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + ;; uint64_t __c6xabi_llshru(uint64_t val, uint shift) 19 + 20 + #include <linux/linkage.h> 21 + 22 + .text 23 + ENTRY(__c6xabi_llshru) 24 + mv .l1x B4,A1 25 + [!A1] b .s2 B3 ; return if zero shift count 26 + mvk .s1 32,A0 27 + sub .d1 A0,A1,A0 28 + cmplt .l1 0,A0,A2 29 + [A2] shl .s1 A5,A0,A0 30 + nop 31 + [!A2] neg .l1 A0,A4 32 + || [A2] shru .s1 A4,A1,A4 33 + [!A2] shru .s1 A5,A4,A4 34 + || [A2] or .d1 A4,A0,A4 35 + || [!A2] mvk .l1 0,A5 36 + [A2] shru .s1 A5,A1,A5 37 + bnop .s2 B3,5 38 + ENDPROC(__c6xabi_llshru)

+46

arch/c6x/lib/memcpy_64plus.S

··· 1 + ; Port on Texas Instruments TMS320C6x architecture 2 + ; 3 + ; Copyright (C) 2006, 2009, 2010 Texas Instruments Incorporated 4 + ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) 5 + ; 6 + ; This program is free software; you can redistribute it and/or modify 7 + ; it under the terms of the GNU General Public License version 2 as 8 + ; published by the Free Software Foundation. 9 + ; 10 + 11 + #include <linux/linkage.h> 12 + 13 + .text 14 + 15 + ENTRY(memcpy) 16 + AND .L1 0x1,A6,A0 17 + || AND .S1 0x2,A6,A1 18 + || AND .L2X 0x4,A6,B0 19 + || MV .D1 A4,A3 20 + || MVC .S2 ILC,B2 21 + 22 + [A0] LDB .D2T1 *B4++,A5 23 + [A1] LDB .D2T1 *B4++,A7 24 + [A1] LDB .D2T1 *B4++,A8 25 + [B0] LDNW .D2T1 *B4++,A9 26 + || SHRU .S2X A6,0x3,B1 27 + [!B1] BNOP .S2 B3,1 28 + 29 + [A0] STB .D1T1 A5,*A3++ 30 + ||[B1] MVC .S2 B1,ILC 31 + [A1] STB .D1T1 A7,*A3++ 32 + [A1] STB .D1T1 A8,*A3++ 33 + [B0] STNW .D1T1 A9,*A3++ ; return when len < 8 34 + 35 + SPLOOP 2 36 + 37 + LDNDW .D2T1 *B4++,A9:A8 38 + NOP 3 39 + 40 + NOP 41 + SPKERNEL 0,0 42 + || STNDW .D1T1 A9:A8,*A3++ 43 + 44 + BNOP .S2 B3,4 45 + MVC .S2 B2,ILC 46 + ENDPROC(memcpy)

+49

arch/c6x/lib/mpyll.S

··· 1 + ;; Copyright (C) 2010 Texas Instruments Incorporated 2 + ;; Contributed by Mark Salter <msalter@redhat.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + ;; uint64_t __c6xabi_mpyll(uint64_t x, uint64_t y) 21 + ;; 22 + ;; 64x64 multiply 23 + ;; First compute partial results using 32-bit parts of x and y: 24 + ;; 25 + ;; b63 b32 b31 b0 26 + ;; ----------------------------- 27 + ;; | 1 | 0 | 28 + ;; ----------------------------- 29 + ;; 30 + ;; P0 = X0*Y0 31 + ;; P1 = X0*Y1 + X1*Y0 32 + ;; P2 = X1*Y1 33 + ;; 34 + ;; result = (P2 << 64) + (P1 << 32) + P0 35 + ;; 36 + ;; Since the result is also 64-bit, we can skip the P2 term. 37 + 38 + .text 39 + ENTRY(__c6xabi_mpyll) 40 + mpy32u .m1x A4,B4,A1:A0 ; X0*Y0 41 + b .s2 B3 42 + || mpy32u .m2x B5,A4,B1:B0 ; X0*Y1 (don't need upper 32-bits) 43 + || mpy32u .m1x A5,B4,A3:A2 ; X1*Y0 (don't need upper 32-bits) 44 + nop 45 + nop 46 + mv .s1 A0,A4 47 + add .l1x A2,B0,A5 48 + add .s1 A1,A5,A5 49 + ENDPROC(__c6xabi_mpyll)

+31

arch/c6x/lib/negll.S

··· 1 + ;; Copyright (C) 2010 Texas Instruments Incorporated 2 + ;; Contributed by Mark Salter <msalter@redhat.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + ;; int64_t __c6xabi_negll(int64_t val) 19 + 20 + #include <linux/linkage.h> 21 + 22 + .text 23 + ENTRY(__c6xabi_negll) 24 + b .s2 B3 25 + mvk .l1 0,A0 26 + subu .l1 A0,A4,A3:A2 27 + sub .l1 A0,A5,A0 28 + || ext .s1 A3,24,24,A5 29 + add .l1 A5,A0,A5 30 + mv .s1 A2,A4 31 + ENDPROC(__c6xabi_negll)

+32

arch/c6x/lib/pop_rts.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + .text 21 + 22 + ENTRY(__c6xabi_pop_rts) 23 + lddw .d2t2 *++B15, B3:B2 24 + lddw .d2t1 *++B15, A11:A10 25 + lddw .d2t2 *++B15, B11:B10 26 + lddw .d2t1 *++B15, A13:A12 27 + lddw .d2t2 *++B15, B13:B12 28 + lddw .d2t1 *++B15, A15:A14 29 + || b .s2 B3 30 + ldw .d2t2 *++B15[2], B14 31 + nop 4 32 + ENDPROC(__c6xabi_pop_rts)

+31

arch/c6x/lib/push_rts.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + .text 21 + 22 + ENTRY(__c6xabi_push_rts) 23 + stw .d2t2 B14, *B15--[2] 24 + stdw .d2t1 A15:A14, *B15-- 25 + || b .s2x A3 26 + stdw .d2t2 B13:B12, *B15-- 27 + stdw .d2t1 A13:A12, *B15-- 28 + stdw .d2t2 B11:B10, *B15-- 29 + stdw .d2t1 A11:A10, *B15-- 30 + stdw .d2t2 B3:B2, *B15-- 31 + ENDPROC(__c6xabi_push_rts)

+64

arch/c6x/lib/remi.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + ;; ABI considerations for the divide functions 21 + ;; The following registers are call-used: 22 + ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 23 + ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 24 + ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 25 + ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 26 + ;; 27 + ;; In our implementation, divu and remu are leaf functions, 28 + ;; while both divi and remi call into divu. 29 + ;; A0 is not clobbered by any of the functions. 30 + ;; divu does not clobber B2 either, which is taken advantage of 31 + ;; in remi. 32 + ;; divi uses B5 to hold the original return address during 33 + ;; the call to divu. 34 + ;; remi uses B2 and A5 to hold the input values during the 35 + ;; call to divu. It stores B3 in on the stack. 36 + 37 + .text 38 + 39 + ENTRY(__c6xabi_remi) 40 + stw .d2t2 B3, *B15--[2] 41 + || cmpgt .l1 0, A4, A1 42 + || cmpgt .l2 0, B4, B2 43 + || mv .s1 A4, A5 44 + || call .s2 __c6xabi_divu 45 + 46 + [A1] neg .l1 A4, A4 47 + || [B2] neg .l2 B4, B4 48 + || xor .s2x B2, A1, B0 49 + || mv .d2 B4, B2 50 + 51 + [B0] addkpc .s2 _divu_ret_1, B3, 1 52 + [!B0] addkpc .s2 _divu_ret_2, B3, 1 53 + nop 2 54 + _divu_ret_1: 55 + neg .l1 A4, A4 56 + _divu_ret_2: 57 + ldw .d2t2 *++B15[2], B3 58 + 59 + mpy32 .m1x A4, B2, A6 60 + nop 3 61 + ret .s2 B3 62 + sub .l1 A5, A6, A4 63 + nop 4 64 + ENDPROC(__c6xabi_remi)

+82

arch/c6x/lib/remu.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + ;; ABI considerations for the divide functions 21 + ;; The following registers are call-used: 22 + ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 23 + ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 24 + ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 25 + ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 26 + ;; 27 + ;; In our implementation, divu and remu are leaf functions, 28 + ;; while both divi and remi call into divu. 29 + ;; A0 is not clobbered by any of the functions. 30 + ;; divu does not clobber B2 either, which is taken advantage of 31 + ;; in remi. 32 + ;; divi uses B5 to hold the original return address during 33 + ;; the call to divu. 34 + ;; remi uses B2 and A5 to hold the input values during the 35 + ;; call to divu. It stores B3 in on the stack. 36 + 37 + 38 + .text 39 + 40 + ENTRY(__c6xabi_remu) 41 + ;; The ABI seems designed to prevent these functions calling each other, 42 + ;; so we duplicate most of the divsi3 code here. 43 + mv .s2x A4, B1 44 + lmbd .l2 1, B4, B1 45 + || [!B1] b .s2 B3 ; RETURN A 46 + || [!B1] mvk .d2 1, B4 47 + 48 + mv .l1x B1, A7 49 + || shl .s2 B4, B1, B4 50 + 51 + cmpltu .l1x A4, B4, A1 52 + [!A1] sub .l1x A4, B4, A4 53 + shru .s2 B4, 1, B4 54 + 55 + _remu_loop: 56 + cmpgt .l2 B1, 7, B0 57 + || [B1] subc .l1x A4,B4,A4 58 + || [B1] add .s2 -1, B1, B1 59 + ;; RETURN A may happen here (note: must happen before the next branch) 60 + [B1] subc .l1x A4,B4,A4 61 + || [B1] add .s2 -1, B1, B1 62 + || [B0] b .s1 _remu_loop 63 + [B1] subc .l1x A4,B4,A4 64 + || [B1] add .s2 -1, B1, B1 65 + [B1] subc .l1x A4,B4,A4 66 + || [B1] add .s2 -1, B1, B1 67 + [B1] subc .l1x A4,B4,A4 68 + || [B1] add .s2 -1, B1, B1 69 + [B1] subc .l1x A4,B4,A4 70 + || [B1] add .s2 -1, B1, B1 71 + [B1] subc .l1x A4,B4,A4 72 + || [B1] add .s2 -1, B1, B1 73 + ;; loop backwards branch happens here 74 + 75 + ret .s2 B3 76 + [B1] subc .l1x A4,B4,A4 77 + || [B1] add .s2 -1, B1, B1 78 + [B1] subc .l1x A4,B4,A4 79 + 80 + extu .s1 A4, A7, A4 81 + nop 2 82 + ENDPROC(__c6xabi_remu)

+89

arch/c6x/lib/strasgi.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + .text 21 + 22 + ENTRY(__c6xabi_strasgi) 23 + ;; This is essentially memcpy, with alignment known to be at least 24 + ;; 4, and the size a multiple of 4 greater than or equal to 28. 25 + ldw .d2t1 *B4++, A0 26 + || mvk .s2 16, B1 27 + ldw .d2t1 *B4++, A1 28 + || mvk .s2 20, B2 29 + || sub .d1 A6, 24, A6 30 + ldw .d2t1 *B4++, A5 31 + ldw .d2t1 *B4++, A7 32 + || mv .l2x A6, B7 33 + ldw .d2t1 *B4++, A8 34 + ldw .d2t1 *B4++, A9 35 + || mv .s2x A0, B5 36 + || cmpltu .l2 B2, B7, B0 37 + 38 + _strasgi_loop: 39 + stw .d1t2 B5, *A4++ 40 + || [B0] ldw .d2t1 *B4++, A0 41 + || mv .s2x A1, B5 42 + || mv .l2 B7, B6 43 + 44 + [B0] sub .d2 B6, 24, B7 45 + || [B0] b .s2 _strasgi_loop 46 + || cmpltu .l2 B1, B6, B0 47 + 48 + [B0] ldw .d2t1 *B4++, A1 49 + || stw .d1t2 B5, *A4++ 50 + || mv .s2x A5, B5 51 + || cmpltu .l2 12, B6, B0 52 + 53 + [B0] ldw .d2t1 *B4++, A5 54 + || stw .d1t2 B5, *A4++ 55 + || mv .s2x A7, B5 56 + || cmpltu .l2 8, B6, B0 57 + 58 + [B0] ldw .d2t1 *B4++, A7 59 + || stw .d1t2 B5, *A4++ 60 + || mv .s2x A8, B5 61 + || cmpltu .l2 4, B6, B0 62 + 63 + [B0] ldw .d2t1 *B4++, A8 64 + || stw .d1t2 B5, *A4++ 65 + || mv .s2x A9, B5 66 + || cmpltu .l2 0, B6, B0 67 + 68 + [B0] ldw .d2t1 *B4++, A9 69 + || stw .d1t2 B5, *A4++ 70 + || mv .s2x A0, B5 71 + || cmpltu .l2 B2, B7, B0 72 + 73 + ;; loop back branch happens here 74 + 75 + cmpltu .l2 B1, B6, B0 76 + || ret .s2 b3 77 + 78 + [B0] stw .d1t1 A1, *A4++ 79 + || cmpltu .l2 12, B6, B0 80 + [B0] stw .d1t1 A5, *A4++ 81 + || cmpltu .l2 8, B6, B0 82 + [B0] stw .d1t1 A7, *A4++ 83 + || cmpltu .l2 4, B6, B0 84 + [B0] stw .d1t1 A8, *A4++ 85 + || cmpltu .l2 0, B6, B0 86 + [B0] stw .d1t1 A9, *A4++ 87 + 88 + ;; return happens here 89 + ENDPROC(__c6xabi_strasgi)

+39

arch/c6x/lib/strasgi_64plus.S

··· 1 + ;; Copyright 2010 Free Software Foundation, Inc. 2 + ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>. 3 + ;; 4 + ;; This program is free software; you can redistribute it and/or modify 5 + ;; it under the terms of the GNU General Public License as published by 6 + ;; the Free Software Foundation; either version 2 of the License, or 7 + ;; (at your option) any later version. 8 + ;; 9 + ;; This program is distributed in the hope that it will be useful, 10 + ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + ;; GNU General Public License for more details. 13 + ;; 14 + ;; You should have received a copy of the GNU General Public License 15 + ;; along with this program; if not, write to the Free Software 16 + ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 + 18 + #include <linux/linkage.h> 19 + 20 + .text 21 + 22 + ENTRY(__c6xabi_strasgi_64plus) 23 + shru .s2x a6, 2, b31 24 + || mv .s1 a4, a30 25 + || mv .d2 b4, b30 26 + 27 + add .s2 -4, b31, b31 28 + 29 + sploopd 1 30 + || mvc .s2 b31, ilc 31 + ldw .d2t2 *b30++, b31 32 + nop 4 33 + mv .s1x b31,a31 34 + spkernel 6, 0 35 + || stw .d1t1 a31, *a30++ 36 + 37 + ret .s2 b3 38 + nop 5 39 + ENDPROC(__c6xabi_strasgi_64plus)