at v2.6.13 526 lines 16 kB view raw
1#ifndef XTENSA_COREASM_H 2#define XTENSA_COREASM_H 3 4/* 5 * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND 6 * 7 * include/asm-xtensa/xtensa/coreasm.h -- assembler-specific 8 * definitions that depend on CORE configuration. 9 * 10 * Source for configuration-independent binaries (which link in a 11 * configuration-specific HAL library) must NEVER include this file. 12 * It is perfectly normal, however, for the HAL itself to include this 13 * file. 14 * 15 * This file must NOT include xtensa/config/system.h. Any assembler 16 * header file that depends on system information should likely go in 17 * a new systemasm.h (or sysasm.h) header file. 18 * 19 * NOTE: macro beqi32 is NOT configuration-dependent, and is placed 20 * here til we will have configuration-independent header file. 21 * 22 * This file is subject to the terms and conditions of the GNU General 23 * Public License. See the file "COPYING" in the main directory of 24 * this archive for more details. 25 * 26 * Copyright (C) 2002 Tensilica Inc. 27 */ 28 29 30#include <xtensa/config/core.h> 31#include <xtensa/config/specreg.h> 32 33/* 34 * Assembly-language specific definitions (assembly macros, etc.). 35 */ 36 37/*---------------------------------------------------------------------- 38 * find_ms_setbit 39 * 40 * This macro finds the most significant bit that is set in <as> 41 * and return its index + <base> in <ad>, or <base> - 1 if <as> is zero. 42 * The index counts starting at zero for the lsbit, so the return 43 * value ranges from <base>-1 (no bit set) to <base>+31 (msbit set). 44 * 45 * Parameters: 46 * <ad> destination address register (any register) 47 * <as> source address register 48 * <at> temporary address register (must be different than <as>) 49 * <base> constant value added to result (usually 0 or 1) 50 * On entry: 51 * <ad> = undefined if different than <as> 52 * <as> = value whose most significant set bit is to be found 53 * <at> = undefined 54 * no other registers are used by this macro. 55 * On exit: 56 * <ad> = <base> + index of msbit set in original <as>, 57 * = <base> - 1 if original <as> was zero. 58 * <as> clobbered (if not <ad>) 59 * <at> clobbered (if not <ad>) 60 * Example: 61 * find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4 62 */ 63 64 .macro find_ms_setbit ad, as, at, base 65#if XCHAL_HAVE_NSA 66 movi \at, 31+\base 67 nsau \as, \as // get index of \as, numbered from msbit (32 if absent) 68 sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent) 69#else /* XCHAL_HAVE_NSA */ 70 movi \at, \base // start with result of 0 (point to lsbit of 32) 71 72 beqz \as, 2f // special case for zero argument: return -1 73 bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits) 74 addi \at, \at, 16 // no, increment result to upper 16 bits (of 32) 75 //srli \as, \as, 16 // check upper half (shift right 16 bits) 76 extui \as, \as, 16, 16 // check upper half (shift right 16 bits) 771: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits) 78 addi \at, \at, 8 // no, increment result to upper 8 bits (of 16) 79 srli \as, \as, 8 // shift right to check upper 8 bits 801: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits) 81 addi \at, \at, 4 // no, increment result to upper 4 bits (of 8) 82 srli \as, \as, 4 // shift right 4 bits to check upper half 831: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits) 84 addi \at, \at, 2 // no, increment result to upper 2 bits (of 4) 85 srli \as, \as, 2 // shift right 2 bits to check upper half 861: bltui \as, 0x2, 1f // is it the lsbit? 87 addi \at, \at, 2 // no, increment result to upper bit (of 2) 882: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1) 89 //srli \as, \as, 1 901: // done! \at contains index of msbit set (or -1 if none set) 91 .if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15) 92 mov \ad, \at // then move result to \ad 93 .endif 94#endif /* XCHAL_HAVE_NSA */ 95 .endm // find_ms_setbit 96 97/*---------------------------------------------------------------------- 98 * find_ls_setbit 99 * 100 * This macro finds the least significant bit that is set in <as>, 101 * and return its index in <ad>. 102 * Usage is the same as for the find_ms_setbit macro. 103 * Example: 104 * find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4 105 */ 106 107 .macro find_ls_setbit ad, as, at, base 108 neg \at, \as // keep only the least-significant bit that is set... 109 and \as, \at, \as // ... in \as 110 find_ms_setbit \ad, \as, \at, \base 111 .endm // find_ls_setbit 112 113/*---------------------------------------------------------------------- 114 * find_ls_one 115 * 116 * Same as find_ls_setbit with base zero. 117 * Source (as) and destination (ad) registers must be different. 118 * Provided for backward compatibility. 119 */ 120 121 .macro find_ls_one ad, as 122 find_ls_setbit \ad, \as, \ad, 0 123 .endm // find_ls_one 124 125/*---------------------------------------------------------------------- 126 * floop, floopnez, floopgtz, floopend 127 * 128 * These macros are used for fast inner loops that 129 * work whether or not the Loops options is configured. 130 * If the Loops option is configured, they simply use 131 * the zero-overhead LOOP instructions; otherwise 132 * they use explicit decrement and branch instructions. 133 * 134 * They are used in pairs, with floop, floopnez or floopgtz 135 * at the beginning of the loop, and floopend at the end. 136 * 137 * Each pair of loop macro calls must be given the loop count 138 * address register and a unique label for that loop. 139 * 140 * Example: 141 * 142 * movi a3, 16 // loop 16 times 143 * floop a3, myloop1 144 * : 145 * bnez a7, end1 // exit loop if a7 != 0 146 * : 147 * floopend a3, myloop1 148 * end1: 149 * 150 * Like the LOOP instructions, these macros cannot be 151 * nested, must include at least one instruction, 152 * cannot call functions inside the loop, etc. 153 * The loop can be exited by jumping to the instruction 154 * following floopend (or elsewhere outside the loop), 155 * or continued by jumping to a NOP instruction placed 156 * immediately before floopend. 157 * 158 * Unlike LOOP instructions, the register passed to floop* 159 * cannot be used inside the loop, because it is used as 160 * the loop counter if the Loops option is not configured. 161 * And its value is undefined after exiting the loop. 162 * And because the loop counter register is active inside 163 * the loop, you can't easily use this construct to loop 164 * across a register file using ROTW as you might with LOOP 165 * instructions, unless you copy the loop register along. 166 */ 167 168 /* Named label version of the macros: */ 169 170 .macro floop ar, endlabel 171 floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 172 .endm 173 174 .macro floopnez ar, endlabel 175 floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 176 .endm 177 178 .macro floopgtz ar, endlabel 179 floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 180 .endm 181 182 .macro floopend ar, endlabel 183 floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 184 .endm 185 186 /* Numbered local label version of the macros: */ 187#if 0 /*UNTESTED*/ 188 .macro floop89 ar 189 floop_ \ar, 8, 9f 190 .endm 191 192 .macro floopnez89 ar 193 floopnez_ \ar, 8, 9f 194 .endm 195 196 .macro floopgtz89 ar 197 floopgtz_ \ar, 8, 9f 198 .endm 199 200 .macro floopend89 ar 201 floopend_ \ar, 8b, 9 202 .endm 203#endif /*0*/ 204 205 /* Underlying version of the macros: */ 206 207 .macro floop_ ar, startlabel, endlabelref 208 .ifdef _infloop_ 209 .if _infloop_ 210 .err // Error: floop cannot be nested 211 .endif 212 .endif 213 .set _infloop_, 1 214#if XCHAL_HAVE_LOOPS 215 loop \ar, \endlabelref 216#else /* XCHAL_HAVE_LOOPS */ 217\startlabel: 218 addi \ar, \ar, -1 219#endif /* XCHAL_HAVE_LOOPS */ 220 .endm // floop_ 221 222 .macro floopnez_ ar, startlabel, endlabelref 223 .ifdef _infloop_ 224 .if _infloop_ 225 .err // Error: floopnez cannot be nested 226 .endif 227 .endif 228 .set _infloop_, 1 229#if XCHAL_HAVE_LOOPS 230 loopnez \ar, \endlabelref 231#else /* XCHAL_HAVE_LOOPS */ 232 beqz \ar, \endlabelref 233\startlabel: 234 addi \ar, \ar, -1 235#endif /* XCHAL_HAVE_LOOPS */ 236 .endm // floopnez_ 237 238 .macro floopgtz_ ar, startlabel, endlabelref 239 .ifdef _infloop_ 240 .if _infloop_ 241 .err // Error: floopgtz cannot be nested 242 .endif 243 .endif 244 .set _infloop_, 1 245#if XCHAL_HAVE_LOOPS 246 loopgtz \ar, \endlabelref 247#else /* XCHAL_HAVE_LOOPS */ 248 bltz \ar, \endlabelref 249 beqz \ar, \endlabelref 250\startlabel: 251 addi \ar, \ar, -1 252#endif /* XCHAL_HAVE_LOOPS */ 253 .endm // floopgtz_ 254 255 256 .macro floopend_ ar, startlabelref, endlabel 257 .ifndef _infloop_ 258 .err // Error: floopend without matching floopXXX 259 .endif 260 .ifeq _infloop_ 261 .err // Error: floopend without matching floopXXX 262 .endif 263 .set _infloop_, 0 264#if ! XCHAL_HAVE_LOOPS 265 bnez \ar, \startlabelref 266#endif /* XCHAL_HAVE_LOOPS */ 267\endlabel: 268 .endm // floopend_ 269 270/*---------------------------------------------------------------------- 271 * crsil -- conditional RSIL (read/set interrupt level) 272 * 273 * Executes the RSIL instruction if it exists, else just reads PS. 274 * The RSIL instruction does not exist in the new exception architecture 275 * if the interrupt option is not selected. 276 */ 277 278 .macro crsil ar, newlevel 279#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS 280 rsil \ar, \newlevel 281#else 282 rsr \ar, PS 283#endif 284 .endm // crsil 285 286/*---------------------------------------------------------------------- 287 * window_spill{4,8,12} 288 * 289 * These macros spill callers' register windows to the stack. 290 * They work for both privileged and non-privileged tasks. 291 * Must be called from a windowed ABI context, eg. within 292 * a windowed ABI function (ie. valid stack frame, window 293 * exceptions enabled, not in exception mode, etc). 294 * 295 * This macro requires a single invocation of the window_spill_common 296 * macro in the same assembly unit and section. 297 * 298 * Note that using window_spill{4,8,12} macros is more efficient 299 * than calling a function implemented using window_spill_function, 300 * because the latter needs extra code to figure out the size of 301 * the call to the spilling function. 302 * 303 * Example usage: 304 * 305 * .text 306 * .align 4 307 * .global some_function 308 * .type some_function,@function 309 * some_function: 310 * entry a1, 16 311 * : 312 * : 313 * 314 * window_spill4 // spill windows of some_function's callers; preserves a0..a3 only; 315 * // to use window_spill{8,12} in this example function we'd have 316 * // to increase space allocated by the entry instruction, because 317 * // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed 318 * // for call8/window_spill8 or call12/window_spill12 respectively. 319 * : 320 * 321 * retw 322 * 323 * window_spill_common // instantiates code used by window_spill4 324 * 325 * 326 * On entry: 327 * none (if window_spill4) 328 * stack frame has enough space allocated for call8 (if window_spill8) 329 * stack frame has enough space allocated for call12 (if window_spill12) 330 * On exit: 331 * a4..a15 clobbered (if window_spill4) 332 * a8..a15 clobbered (if window_spill8) 333 * a12..a15 clobbered (if window_spill12) 334 * no caller windows are in live registers 335 */ 336 337 .macro window_spill4 338#if XCHAL_HAVE_WINDOWED 339# if XCHAL_NUM_AREGS == 16 340 movi a15, 0 // for 16-register files, no need to call to reach the end 341# elif XCHAL_NUM_AREGS == 32 342 call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers 343# elif XCHAL_NUM_AREGS == 64 344 call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers 345# endif 346#endif 347 .endm // window_spill4 348 349 .macro window_spill8 350#if XCHAL_HAVE_WINDOWED 351# if XCHAL_NUM_AREGS == 16 352 movi a15, 0 // for 16-register files, no need to call to reach the end 353# elif XCHAL_NUM_AREGS == 32 354 call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers 355# elif XCHAL_NUM_AREGS == 64 356 call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers 357# endif 358#endif 359 .endm // window_spill8 360 361 .macro window_spill12 362#if XCHAL_HAVE_WINDOWED 363# if XCHAL_NUM_AREGS == 16 364 movi a15, 0 // for 16-register files, no need to call to reach the end 365# elif XCHAL_NUM_AREGS == 32 366 call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers 367# elif XCHAL_NUM_AREGS == 64 368 call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers 369# endif 370#endif 371 .endm // window_spill12 372 373/*---------------------------------------------------------------------- 374 * window_spill_function 375 * 376 * This macro outputs a function that will spill its caller's callers' 377 * register windows to the stack. Eg. it could be used to implement 378 * a version of xthal_window_spill() that works in non-privileged tasks. 379 * This works for both privileged and non-privileged tasks. 380 * 381 * Typical usage: 382 * 383 * .text 384 * .align 4 385 * .global my_spill_function 386 * .type my_spill_function,@function 387 * my_spill_function: 388 * window_spill_function 389 * 390 * On entry to resulting function: 391 * none 392 * On exit from resulting function: 393 * none (no caller windows are in live registers) 394 */ 395 396 .macro window_spill_function 397#if XCHAL_HAVE_WINDOWED 398# if XCHAL_NUM_AREGS == 32 399 entry sp, 48 400 bbci.l a0, 31, 1f // branch if called with call4 401 bbsi.l a0, 30, 2f // branch if called with call12 402 call8 .L__wdwspill_assist16 // called with call8, only need another 8 403 retw 4041: call12 .L__wdwspill_assist16 // called with call4, only need another 12 405 retw 4062: call4 .L__wdwspill_assist16 // called with call12, only need another 4 407 retw 408# elif XCHAL_NUM_AREGS == 64 409 entry sp, 48 410 bbci.l a0, 31, 1f // branch if called with call4 411 bbsi.l a0, 30, 2f // branch if called with call12 412 call4 .L__wdwspill_assist52 // called with call8, only need a call4 413 retw 4141: call8 .L__wdwspill_assist52 // called with call4, only need a call8 415 retw 4162: call12 .L__wdwspill_assist40 // called with call12, can skip a call12 417 retw 418# elif XCHAL_NUM_AREGS == 16 419 entry sp, 16 420 bbci.l a0, 31, 1f // branch if called with call4 421 bbsi.l a0, 30, 2f // branch if called with call12 422 movi a7, 0 // called with call8 423 retw 4241: movi a11, 0 // called with call4 4252: retw // if called with call12, everything already spilled 426 427// movi a15, 0 // trick to spill all but the direct caller 428// j 1f 429// // The entry instruction is magical in the assembler (gets auto-aligned) 430// // so we have to jump to it to avoid falling through the padding. 431// // We need entry/retw to know where to return. 432//1: entry sp, 16 433// retw 434# else 435# error "unrecognized address register file size" 436# endif 437#endif /* XCHAL_HAVE_WINDOWED */ 438 window_spill_common 439 .endm // window_spill_function 440 441/*---------------------------------------------------------------------- 442 * window_spill_common 443 * 444 * Common code used by any number of invocations of the window_spill## 445 * and window_spill_function macros. 446 * 447 * Must be instantiated exactly once within a given assembly unit, 448 * within call/j range of and same section as window_spill## 449 * macro invocations for that assembly unit. 450 * (Is automatically instantiated by the window_spill_function macro.) 451 */ 452 453 .macro window_spill_common 454#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64) 455 .ifndef .L__wdwspill_defined 456# if XCHAL_NUM_AREGS >= 64 457.L__wdwspill_assist60: 458 entry sp, 32 459 call8 .L__wdwspill_assist52 460 retw 461.L__wdwspill_assist56: 462 entry sp, 16 463 call4 .L__wdwspill_assist52 464 retw 465.L__wdwspill_assist52: 466 entry sp, 48 467 call12 .L__wdwspill_assist40 468 retw 469.L__wdwspill_assist40: 470 entry sp, 48 471 call12 .L__wdwspill_assist28 472 retw 473# endif 474.L__wdwspill_assist28: 475 entry sp, 48 476 call12 .L__wdwspill_assist16 477 retw 478.L__wdwspill_assist24: 479 entry sp, 32 480 call8 .L__wdwspill_assist16 481 retw 482.L__wdwspill_assist20: 483 entry sp, 16 484 call4 .L__wdwspill_assist16 485 retw 486.L__wdwspill_assist16: 487 entry sp, 16 488 movi a15, 0 489 retw 490 .set .L__wdwspill_defined, 1 491 .endif 492#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */ 493 .endm // window_spill_common 494 495/*---------------------------------------------------------------------- 496 * beqi32 497 * 498 * macro implements version of beqi for arbitrary 32-bit immidiate value 499 * 500 * beqi32 ax, ay, imm32, label 501 * 502 * Compares value in register ax with imm32 value and jumps to label if 503 * equal. Clobberes register ay if needed 504 * 505 */ 506 .macro beqi32 ax, ay, imm, label 507 .ifeq ((\imm-1) & ~7) // 1..8 ? 508 beqi \ax, \imm, \label 509 .else 510 .ifeq (\imm+1) // -1 ? 511 beqi \ax, \imm, \label 512 .else 513 .ifeq (\imm) // 0 ? 514 beqz \ax, \label 515 .else 516 // We could also handle immediates 10,12,16,32,64,128,256 517 // but it would be a long macro... 518 movi \ay, \imm 519 beq \ax, \ay, \label 520 .endif 521 .endif 522 .endif 523 .endm // beqi32 524 525#endif /*XTENSA_COREASM_H*/ 526