1 /* 2 * xtensa/coreasm.h -- assembler-specific definitions that depend on CORE configuration 3 * 4 * Source for configuration-independent binaries (which link in a 5 * configuration-specific HAL library) must NEVER include this file. 6 * It is perfectly normal, however, for the HAL itself to include this file. 7 * 8 * This file must NOT include xtensa/config/system.h. Any assembler 9 * header file that depends on system information should likely go 10 * in a new systemasm.h (or sysasm.h) header file. 11 * 12 * NOTE: macro beqi32 is NOT configuration-dependent, and is placed 13 * here until we have a proper configuration-independent header file. 14 */ 15 16 /* $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/include/xtensa/coreasm.h#1 $ */ 17 18 /* 19 * Copyright (c) 2000-2014 Tensilica Inc. 20 * 21 * Permission is hereby granted, free of charge, to any person obtaining 22 * a copy of this software and associated documentation files (the 23 * "Software"), to deal in the Software without restriction, including 24 * without limitation the rights to use, copy, modify, merge, publish, 25 * distribute, sublicense, and/or sell copies of the Software, and to 26 * permit persons to whom the Software is furnished to do so, subject to 27 * the following conditions: 28 * 29 * The above copyright notice and this permission notice shall be included 30 * in all copies or substantial portions of the Software. 31 * 32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 33 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 34 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 35 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 36 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 37 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 38 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 39 */ 40 41 #ifndef XTENSA_COREASM_H 42 #define XTENSA_COREASM_H 43 44 /* 45 * Tell header files this is assembly source, so they can avoid non-assembler 46 * definitions (eg. C types etc): 47 */ 48 #ifndef _ASMLANGUAGE /* conditionalize to avoid cpp warnings (3rd parties might use same macro) */ 49 #define _ASMLANGUAGE 50 #endif 51 52 #include <xtensa/config/core.h> 53 #include <xtensa/config/specreg.h> 54 #include <xtensa/config/system.h> 55 56 /* 57 * Assembly-language specific definitions (assembly macros, etc.). 58 */ 59 60 /*---------------------------------------------------------------------- 61 * find_ms_setbit 62 * 63 * This macro finds the most significant bit that is set in <as> 64 * and return its index + <base> in <ad>, or <base> - 1 if <as> is zero. 65 * The index counts starting at zero for the lsbit, so the return 66 * value ranges from <base>-1 (no bit set) to <base>+31 (msbit set). 67 * 68 * Parameters: 69 * <ad> destination address register (any register) 70 * <as> source address register 71 * <at> temporary address register (must be different than <as>) 72 * <base> constant value added to result (usually 0 or 1) 73 * On entry: 74 * <ad> = undefined if different than <as> 75 * <as> = value whose most significant set bit is to be found 76 * <at> = undefined 77 * no other registers are used by this macro. 78 * On exit: 79 * <ad> = <base> + index of msbit set in original <as>, 80 * = <base> - 1 if original <as> was zero. 81 * <as> clobbered (if not <ad>) 82 * <at> clobbered (if not <ad>) 83 * Example: 84 * find_ms_setbit a0, a4, a0, 0 -- return in a0 index of msbit set in a4 85 */ 86 87 .macro find_ms_setbit ad, as, at, base 88 #if XCHAL_HAVE_NSA 89 movi \at, 31+\base 90 nsau \as, \as // get index of \as, numbered from msbit (32 if absent) 91 sub \ad, \at, \as // get numbering from lsbit (0..31, -1 if absent) 92 #else /* XCHAL_HAVE_NSA */ 93 movi \at, \base // start with result of 0 (point to lsbit of 32) 94 95 beqz \as, 2f // special case for zero argument: return -1 96 bltui \as, 0x10000, 1f // is it one of the 16 lsbits? (if so, check lower 16 bits) 97 addi \at, \at, 16 // no, increment result to upper 16 bits (of 32) 98 //srli \as, \as, 16 // check upper half (shift right 16 bits) 99 extui \as, \as, 16, 16 // check upper half (shift right 16 bits) 100 1: bltui \as, 0x100, 1f // is it one of the 8 lsbits? (if so, check lower 8 bits) 101 addi \at, \at, 8 // no, increment result to upper 8 bits (of 16) 102 srli \as, \as, 8 // shift right to check upper 8 bits 103 1: bltui \as, 0x10, 1f // is it one of the 4 lsbits? (if so, check lower 4 bits) 104 addi \at, \at, 4 // no, increment result to upper 4 bits (of 8) 105 srli \as, \as, 4 // shift right 4 bits to check upper half 106 1: bltui \as, 0x4, 1f // is it one of the 2 lsbits? (if so, check lower 2 bits) 107 addi \at, \at, 2 // no, increment result to upper 2 bits (of 4) 108 srli \as, \as, 2 // shift right 2 bits to check upper half 109 1: bltui \as, 0x2, 1f // is it the lsbit? 110 addi \at, \at, 2 // no, increment result to upper bit (of 2) 111 2: addi \at, \at, -1 // (from just above: add 1; from beqz: return -1) 112 //srli \as, \as, 1 113 1: // done! \at contains index of msbit set (or -1 if none set) 114 .if 0x\ad - 0x\at // destination different than \at ? (works because regs are a0-a15) 115 mov \ad, \at // then move result to \ad 116 .endif 117 #endif /* XCHAL_HAVE_NSA */ 118 .endm // find_ms_setbit 119 120 /*---------------------------------------------------------------------- 121 * find_ls_setbit 122 * 123 * This macro finds the least significant bit that is set in <as>, 124 * and return its index in <ad>. 125 * Usage is the same as for the find_ms_setbit macro. 126 * Example: 127 * find_ls_setbit a0, a4, a0, 0 -- return in a0 index of lsbit set in a4 128 */ 129 130 .macro find_ls_setbit ad, as, at, base 131 neg \at, \as // keep only the least-significant bit that is set... 132 and \as, \at, \as // ... in \as 133 find_ms_setbit \ad, \as, \at, \base 134 .endm // find_ls_setbit 135 136 /*---------------------------------------------------------------------- 137 * find_ls_one 138 * 139 * Same as find_ls_setbit with base zero. 140 * Source (as) and destination (ad) registers must be different. 141 * Provided for backward compatibility. 142 */ 143 144 .macro find_ls_one ad, as 145 find_ls_setbit \ad, \as, \ad, 0 146 .endm // find_ls_one 147 148 /*---------------------------------------------------------------------- 149 * floop, floopnez, floopgtz, floopend 150 * 151 * These macros are used for fast inner loops that 152 * work whether or not the Loops options is configured. 153 * If the Loops option is configured, they simply use 154 * the zero-overhead LOOP instructions; otherwise 155 * they use explicit decrement and branch instructions. 156 * 157 * They are used in pairs, with floop, floopnez or floopgtz 158 * at the beginning of the loop, and floopend at the end. 159 * 160 * Each pair of loop macro calls must be given the loop count 161 * address register and a unique label for that loop. 162 * 163 * Example: 164 * 165 * movi a3, 16 // loop 16 times 166 * floop a3, myloop1 167 * : 168 * bnez a7, end1 // exit loop if a7 != 0 169 * : 170 * floopend a3, myloop1 171 * end1: 172 * 173 * Like the LOOP instructions, these macros cannot be 174 * nested, must include at least one instruction, 175 * cannot call functions inside the loop, etc. 176 * The loop can be exited by jumping to the instruction 177 * following floopend (or elsewhere outside the loop), 178 * or continued by jumping to a NOP instruction placed 179 * immediately before floopend. 180 * 181 * Unlike LOOP instructions, the register passed to floop* 182 * cannot be used inside the loop, because it is used as 183 * the loop counter if the Loops option is not configured. 184 * And its value is undefined after exiting the loop. 185 * And because the loop counter register is active inside 186 * the loop, you can't easily use this construct to loop 187 * across a register file using ROTW as you might with LOOP 188 * instructions, unless you copy the loop register along. 189 */ 190 191 /* Named label version of the macros: */ 192 193 .macro floop ar, endlabel 194 floop_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 195 .endm 196 197 .macro floopnez ar, endlabel 198 floopnez_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 199 .endm 200 201 .macro floopgtz ar, endlabel 202 floopgtz_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 203 .endm 204 205 .macro floopend ar, endlabel 206 floopend_ \ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel 207 .endm 208 209 /* Numbered local label version of the macros: */ 210 #if 0 /*UNTESTED*/ 211 .macro floop89 ar 212 floop_ \ar, 8, 9f 213 .endm 214 215 .macro floopnez89 ar 216 floopnez_ \ar, 8, 9f 217 .endm 218 219 .macro floopgtz89 ar 220 floopgtz_ \ar, 8, 9f 221 .endm 222 223 .macro floopend89 ar 224 floopend_ \ar, 8b, 9 225 .endm 226 #endif /*0*/ 227 228 /* Underlying version of the macros: */ 229 230 .macro floop_ ar, startlabel, endlabelref 231 .ifdef _infloop_ 232 .if _infloop_ 233 .err // Error: floop cannot be nested 234 .endif 235 .endif 236 .set _infloop_, 1 237 #if XCHAL_HAVE_LOOPS 238 loop \ar, \endlabelref 239 #else /* XCHAL_HAVE_LOOPS */ 240 \startlabel: 241 addi \ar, \ar, -1 242 #endif /* XCHAL_HAVE_LOOPS */ 243 .endm // floop_ 244 245 .macro floopnez_ ar, startlabel, endlabelref 246 .ifdef _infloop_ 247 .if _infloop_ 248 .err // Error: floopnez cannot be nested 249 .endif 250 .endif 251 .set _infloop_, 1 252 #if XCHAL_HAVE_LOOPS 253 loopnez \ar, \endlabelref 254 #else /* XCHAL_HAVE_LOOPS */ 255 beqz \ar, \endlabelref 256 \startlabel: 257 addi \ar, \ar, -1 258 #endif /* XCHAL_HAVE_LOOPS */ 259 .endm // floopnez_ 260 261 .macro floopgtz_ ar, startlabel, endlabelref 262 .ifdef _infloop_ 263 .if _infloop_ 264 .err // Error: floopgtz cannot be nested 265 .endif 266 .endif 267 .set _infloop_, 1 268 #if XCHAL_HAVE_LOOPS 269 loopgtz \ar, \endlabelref 270 #else /* XCHAL_HAVE_LOOPS */ 271 bltz \ar, \endlabelref 272 beqz \ar, \endlabelref 273 \startlabel: 274 addi \ar, \ar, -1 275 #endif /* XCHAL_HAVE_LOOPS */ 276 .endm // floopgtz_ 277 278 279 .macro floopend_ ar, startlabelref, endlabel 280 .ifndef _infloop_ 281 .err // Error: floopend without matching floopXXX 282 .endif 283 .ifeq _infloop_ 284 .err // Error: floopend without matching floopXXX 285 .endif 286 .set _infloop_, 0 287 #if ! XCHAL_HAVE_LOOPS 288 bnez \ar, \startlabelref 289 #endif /* XCHAL_HAVE_LOOPS */ 290 \endlabel: 291 .endm // floopend_ 292 293 /*---------------------------------------------------------------------- 294 * crsil -- conditional RSIL (read/set interrupt level) 295 * 296 * Executes the RSIL instruction if it exists, else just reads PS. 297 * The RSIL instruction does not exist in the new exception architecture 298 * if the interrupt option is not selected. 299 */ 300 301 .macro crsil ar, newlevel 302 #if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS 303 rsil \ar, \newlevel 304 #else 305 rsr.ps \ar 306 #endif 307 .endm // crsil 308 309 /*---------------------------------------------------------------------- 310 * safe_movi_a0 -- move constant into a0 when L32R is not safe 311 * 312 * This macro is typically used by interrupt/exception handlers. 313 * Loads a 32-bit constant in a0, without using any other register, 314 * and without corrupting the LITBASE register, even when the 315 * value of the LITBASE register is unknown (eg. when application 316 * code and interrupt/exception handling code are built independently, 317 * and thus with independent values of the LITBASE register; 318 * debug monitors are one example of this). 319 * 320 * Worst-case size of resulting code: 17 bytes. 321 */ 322 323 .macro safe_movi_a0 constant 324 #if XCHAL_HAVE_ABSOLUTE_LITERALS 325 /* Contort a PC-relative literal load even though we may be in litbase-relative mode: */ 326 j 1f 327 .begin no-transform // ensure what follows is assembled exactly as-is 328 .align 4 // ensure constant and call0 target ... 329 .byte 0 // ... are 4-byte aligned (call0 instruction is 3 bytes long) 330 1: call0 2f // read PC (that follows call0) in a0 331 .long \constant // 32-bit constant to load into a0 332 2: 333 .end no-transform 334 l32i a0, a0, 0 // load constant 335 #else 336 movi a0, \constant // no LITBASE, can assume PC-relative L32R 337 #endif 338 .endm 339 340 341 342 343 /*---------------------------------------------------------------------- 344 * window_spill{4,8,12} 345 * 346 * These macros spill callers' register windows to the stack. 347 * They work for both privileged and non-privileged tasks. 348 * Must be called from a windowed ABI context, eg. within 349 * a windowed ABI function (ie. valid stack frame, window 350 * exceptions enabled, not in exception mode, etc). 351 * 352 * This macro requires a single invocation of the window_spill_common 353 * macro in the same assembly unit and section. 354 * 355 * Note that using window_spill{4,8,12} macros is more efficient 356 * than calling a function implemented using window_spill_function, 357 * because the latter needs extra code to figure out the size of 358 * the call to the spilling function. 359 * 360 * Example usage: 361 * 362 * .text 363 * .align 4 364 * .global some_function 365 * .type some_function,@function 366 * some_function: 367 * entry a1, 16 368 * : 369 * : 370 * 371 * window_spill4 // Spill windows of some_function's callers; preserves a0..a3 only; 372 * // to use window_spill{8,12} in this example function we'd have 373 * // to increase space allocated by the entry instruction, because 374 * // 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed 375 * // for call8/window_spill8 or call12/window_spill12 respectively. 376 * 377 * : 378 * 379 * retw 380 * 381 * window_spill_common // instantiates code used by window_spill4 382 * 383 * 384 * On entry: 385 * none (if window_spill4) 386 * stack frame has enough space allocated for call8 (if window_spill8) 387 * stack frame has enough space allocated for call12 (if window_spill12) 388 * On exit: 389 * a4..a15 clobbered (if window_spill4) 390 * a8..a15 clobbered (if window_spill8) 391 * a12..a15 clobbered (if window_spill12) 392 * no caller windows are in live registers 393 */ 394 395 .macro window_spill4 396 #if XCHAL_HAVE_WINDOWED 397 # if XCHAL_NUM_AREGS == 16 398 movi a15, 0 // for 16-register files, no need to call to reach the end 399 # elif XCHAL_NUM_AREGS == 32 400 call4 .L__wdwspill_assist28 // call deep enough to clear out any live callers 401 # elif XCHAL_NUM_AREGS == 64 402 call4 .L__wdwspill_assist60 // call deep enough to clear out any live callers 403 # endif 404 #endif 405 .endm // window_spill4 406 407 .macro window_spill8 408 #if XCHAL_HAVE_WINDOWED 409 # if XCHAL_NUM_AREGS == 16 410 movi a15, 0 // for 16-register files, no need to call to reach the end 411 # elif XCHAL_NUM_AREGS == 32 412 call8 .L__wdwspill_assist24 // call deep enough to clear out any live callers 413 # elif XCHAL_NUM_AREGS == 64 414 call8 .L__wdwspill_assist56 // call deep enough to clear out any live callers 415 # endif 416 #endif 417 .endm // window_spill8 418 419 .macro window_spill12 420 #if XCHAL_HAVE_WINDOWED 421 # if XCHAL_NUM_AREGS == 16 422 movi a15, 0 // for 16-register files, no need to call to reach the end 423 # elif XCHAL_NUM_AREGS == 32 424 call12 .L__wdwspill_assist20 // call deep enough to clear out any live callers 425 # elif XCHAL_NUM_AREGS == 64 426 call12 .L__wdwspill_assist52 // call deep enough to clear out any live callers 427 # endif 428 #endif 429 .endm // window_spill12 430 431 432 /*---------------------------------------------------------------------- 433 * window_spill_function 434 * 435 * This macro outputs a function that will spill its caller's callers' 436 * register windows to the stack. Eg. it could be used to implement 437 * a version of xthal_window_spill() that works in non-privileged tasks. 438 * This works for both privileged and non-privileged tasks. 439 * 440 * Typical usage: 441 * 442 * .text 443 * .align 4 444 * .global my_spill_function 445 * .type my_spill_function,@function 446 * my_spill_function: 447 * window_spill_function 448 * 449 * On entry to resulting function: 450 * none 451 * On exit from resulting function: 452 * none (no caller windows are in live registers) 453 */ 454 455 .macro window_spill_function 456 #if XCHAL_HAVE_WINDOWED 457 # if XCHAL_NUM_AREGS == 32 458 entry sp, 48 459 bbci.l a0, 31, 1f // branch if called with call4 460 bbsi.l a0, 30, 2f // branch if called with call12 461 call8 .L__wdwspill_assist16 // called with call8, only need another 8 462 retw 463 1: call12 .L__wdwspill_assist16 // called with call4, only need another 12 464 retw 465 2: call4 .L__wdwspill_assist16 // called with call12, only need another 4 466 retw 467 # elif XCHAL_NUM_AREGS == 64 468 entry sp, 48 469 bbci.l a0, 31, 1f // branch if called with call4 470 bbsi.l a0, 30, 2f // branch if called with call12 471 call4 .L__wdwspill_assist52 // called with call8, only need a call4 472 retw 473 1: call8 .L__wdwspill_assist52 // called with call4, only need a call8 474 retw 475 2: call12 .L__wdwspill_assist40 // called with call12, can skip a call12 476 retw 477 # elif XCHAL_NUM_AREGS == 16 478 entry sp, 16 479 bbci.l a0, 31, 1f // branch if called with call4 480 bbsi.l a0, 30, 2f // branch if called with call12 481 movi a7, 0 // called with call8 482 retw 483 1: movi a11, 0 // called with call4 484 2: retw // if called with call12, everything already spilled 485 486 // movi a15, 0 // trick to spill all but the direct caller 487 // j 1f 488 // // The entry instruction is magical in the assembler (gets auto-aligned) 489 // // so we have to jump to it to avoid falling through the padding. 490 // // We need entry/retw to know where to return. 491 //1: entry sp, 16 492 // retw 493 # else 494 # error "unrecognized address register file size" 495 # endif 496 497 #endif /* XCHAL_HAVE_WINDOWED */ 498 window_spill_common 499 .endm // window_spill_function 500 501 /*---------------------------------------------------------------------- 502 * window_spill_common 503 * 504 * Common code used by any number of invocations of the window_spill## 505 * and window_spill_function macros. 506 * 507 * Must be instantiated exactly once within a given assembly unit, 508 * within call/j range of and same section as window_spill## 509 * macro invocations for that assembly unit. 510 * (Is automatically instantiated by the window_spill_function macro.) 511 */ 512 513 .macro window_spill_common 514 #if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64) 515 .ifndef .L__wdwspill_defined 516 # if XCHAL_NUM_AREGS >= 64 517 .L__wdwspill_assist60: 518 entry sp, 32 519 call8 .L__wdwspill_assist52 520 retw 521 .L__wdwspill_assist56: 522 entry sp, 16 523 call4 .L__wdwspill_assist52 524 retw 525 .L__wdwspill_assist52: 526 entry sp, 48 527 call12 .L__wdwspill_assist40 528 retw 529 .L__wdwspill_assist40: 530 entry sp, 48 531 call12 .L__wdwspill_assist28 532 retw 533 # endif 534 .L__wdwspill_assist28: 535 entry sp, 48 536 call12 .L__wdwspill_assist16 537 retw 538 .L__wdwspill_assist24: 539 entry sp, 32 540 call8 .L__wdwspill_assist16 541 retw 542 .L__wdwspill_assist20: 543 entry sp, 16 544 call4 .L__wdwspill_assist16 545 retw 546 .L__wdwspill_assist16: 547 entry sp, 16 548 movi a15, 0 549 retw 550 .set .L__wdwspill_defined, 1 551 .endif 552 #endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */ 553 .endm // window_spill_common 554 555 /*---------------------------------------------------------------------- 556 * beqi32 557 * 558 * macro implements version of beqi for arbitrary 32-bit immediate value 559 * 560 * beqi32 ax, ay, imm32, label 561 * 562 * Compares value in register ax with imm32 value and jumps to label if 563 * equal. Clobbers register ay if needed 564 * 565 */ 566 .macro beqi32 ax, ay, imm, label 567 .ifeq ((\imm-1) & ~7) // 1..8 ? 568 beqi \ax, \imm, \label 569 .else 570 .ifeq (\imm+1) // -1 ? 571 beqi \ax, \imm, \label 572 .else 573 .ifeq (\imm) // 0 ? 574 beqz \ax, \label 575 .else 576 // We could also handle immediates 10,12,16,32,64,128,256 577 // but it would be a long macro... 578 movi \ay, \imm 579 beq \ax, \ay, \label 580 .endif 581 .endif 582 .endif 583 .endm // beqi32 584 585 /*---------------------------------------------------------------------- 586 * isync_retw_nop 587 * 588 * This macro must be invoked immediately after ISYNC if ISYNC 589 * would otherwise be immediately followed by RETW (or other instruction 590 * modifying WindowBase or WindowStart), in a context where 591 * kernel vector mode may be selected, and level-one interrupts 592 * and window overflows may be enabled, on an XEA1 configuration. 593 * 594 * On hardware with erratum "XEA1KWIN" (see <xtensa/core.h> for details), 595 * XEA1 code must have at least one instruction between ISYNC and RETW if 596 * run in kernel vector mode with interrupts and window overflows enabled. 597 */ 598 .macro isync_retw_nop 599 #if XCHAL_MAYHAVE_ERRATUM_XEA1KWIN 600 nop 601 #endif 602 .endm 603 604 /*---------------------------------------------------------------------- 605 * isync_return_nop 606 * 607 * This macro should be used instead of isync_retw_nop in code that is 608 * intended to run on both the windowed and call0 ABIs 609 */ 610 .macro isync_return_nop 611 #ifdef __XTENSA_WINDOWED_ABI__ 612 isync_retw_nop 613 #endif 614 .endm 615 616 /*---------------------------------------------------------------------- 617 * isync_erratum453 618 * 619 * This macro must be invoked at certain points in the code, 620 * such as in exception and interrupt vectors in particular, 621 * to work around erratum 453. 622 */ 623 .macro isync_erratum453 624 #if XCHAL_ERRATUM_453 625 isync 626 #endif 627 .endm 628 629 630 /*---------------------------------------------------------------------- 631 * readsr 632 * 633 * wrapper for 'rsr' that constructs register names that involve levels 634 * e.g. EPCn etc. Use like so: 635 * readsr epc XCHAL_DEBUGLEVEL a2 636 */ 637 .macro readsr reg suf ar 638 rsr.\reg\suf \ar 639 .endm 640 641 /*---------------------------------------------------------------------- 642 * writesr 643 * 644 * wrapper for 'wsr' that constructs register names that involve levels 645 * e.g. EPCn etc. Use like so: 646 * writesr epc XCHAL_DEBUGLEVEL a2 647 */ 648 .macro writesr reg suf ar 649 wsr.\reg\suf \ar 650 .endm 651 652 /*---------------------------------------------------------------------- 653 * xchgsr 654 * 655 * wrapper for 'xsr' that constructs register names that involve levels 656 * e.g. EPCn etc. Use like so: 657 * xchgsr epc XCHAL_DEBUGLEVEL a2 658 */ 659 .macro xchgsr reg suf ar 660 xsr.\reg\suf \ar 661 .endm 662 663 /*---------------------------------------------------------------------- 664 * INDEX_SR 665 * 666 * indexing wrapper for rsr/wsr/xsr that constructs register names from 667 * the provided base name and the current index. Use like so: 668 * .set _idx, 0 669 * INDEX_SR rsr.ccompare a2 670 * 671 * this yields: rsr.ccompare0 a2 672 */ 673 .macro INDEX_SR instr ar 674 .ifeq (_idx) 675 &instr&0 \ar 676 .endif 677 .ifeq (_idx-1) 678 &instr&1 \ar 679 .endif 680 .ifeq (_idx-2) 681 &instr&2 \ar 682 .endif 683 .ifeq (_idx-3) 684 &instr&3 \ar 685 .endif 686 .ifeq (_idx-4) 687 &instr&4 \ar 688 .endif 689 .ifeq (_idx-5) 690 &instr&5 \ar 691 .endif 692 .ifeq (_idx-6) 693 &instr&6 \ar 694 .endif 695 .ifeq (_idx-7) 696 &instr&7 \ar 697 .endif 698 .endm 699 700 701 /*---------------------------------------------------------------------- 702 * abs 703 * 704 * implements abs on machines that do not have it configured 705 */ 706 707 #if !XCHAL_HAVE_ABS 708 .macro abs arr, ars 709 .ifc \arr, \ars 710 //src equal dest is less efficient 711 bgez \arr, 1f 712 neg \arr, \arr 713 1: 714 .else 715 neg \arr, \ars 716 movgez \arr, \ars, \ars 717 .endif 718 .endm 719 #endif /* !XCHAL_HAVE_ABS */ 720 721 722 /*---------------------------------------------------------------------- 723 * addx2 724 * 725 * implements addx2 on machines that do not have it configured 726 * 727 */ 728 729 #if !XCHAL_HAVE_ADDX 730 .macro addx2 arr, ars, art 731 .ifc \arr, \art 732 .ifc \arr, \ars 733 // addx2 a, a, a (not common) 734 .err 735 .else 736 add \arr, \ars, \art 737 add \arr, \ars, \art 738 .endif 739 .else 740 //addx2 a, b, c 741 //addx2 a, a, b 742 //addx2 a, b, b 743 slli \arr, \ars, 1 744 add \arr, \arr, \art 745 .endif 746 .endm 747 #endif /* !XCHAL_HAVE_ADDX */ 748 749 /*---------------------------------------------------------------------- 750 * addx4 751 * 752 * implements addx4 on machines that do not have it configured 753 * 754 */ 755 756 #if !XCHAL_HAVE_ADDX 757 .macro addx4 arr, ars, art 758 .ifc \arr, \art 759 .ifc \arr, \ars 760 // addx4 a, a, a (not common) 761 .err 762 .else 763 //# addx4 a, b, a 764 add \arr, \ars, \art 765 add \arr, \ars, \art 766 add \arr, \ars, \art 767 add \arr, \ars, \art 768 .endif 769 .else 770 //addx4 a, b, c 771 //addx4 a, a, b 772 //addx4 a, b, b 773 slli \arr, \ars, 2 774 add \arr, \arr, \art 775 .endif 776 .endm 777 #endif /* !XCHAL_HAVE_ADDX */ 778 779 /*---------------------------------------------------------------------- 780 * addx8 781 * 782 * implements addx8 on machines that do not have it configured 783 * 784 */ 785 786 #if !XCHAL_HAVE_ADDX 787 .macro addx8 arr, ars, art 788 .ifc \arr, \art 789 .ifc \arr, \ars 790 //addx8 a, a, a (not common) 791 .err 792 .else 793 //addx8 a, b, a 794 add \arr, \ars, \art 795 add \arr, \ars, \art 796 add \arr, \ars, \art 797 add \arr, \ars, \art 798 add \arr, \ars, \art 799 add \arr, \ars, \art 800 add \arr, \ars, \art 801 add \arr, \ars, \art 802 .endif 803 .else 804 //addx8 a, b, c 805 //addx8 a, a, b 806 //addx8 a, b, b 807 slli \arr, \ars, 3 808 add \arr, \arr, \art 809 .endif 810 .endm 811 #endif /* !XCHAL_HAVE_ADDX */ 812 813 814 /*---------------------------------------------------------------------- 815 * rfe_rfue 816 * 817 * Maps to RFUE on XEA1, and RFE on XEA2. No mapping on XEAX. 818 */ 819 820 #if XCHAL_HAVE_XEA1 821 .macro rfe_rfue 822 rfue 823 .endm 824 #elif XCHAL_HAVE_XEA2 825 .macro rfe_rfue 826 rfe 827 .endm 828 #endif 829 830 831 /*---------------------------------------------------------------------- 832 * abi_entry 833 * 834 * Generate proper function entry sequence for the current ABI 835 * (windowed or call0). Takes care of allocating stack space (up to 1kB) 836 * and saving the return PC, if necessary. The corresponding abi_return 837 * macro does the corresponding stack deallocation and restoring return PC. 838 * 839 * Parameters are: 840 * 841 * locsize Number of bytes to allocate on the stack 842 * for local variables (and for args to pass to 843 * callees, if any calls are made). Defaults to zero. 844 * The macro rounds this up to a multiple of 16. 845 * NOTE: large values are allowed (e.g. up to 1 GB). 846 * 847 * callsize Maximum call size made by this function. 848 * Leave zero (default) for leaf functions, i.e. if 849 * this function makes no calls to other functions. 850 * Otherwise must be set to 4, 8, or 12 according 851 * to whether the "largest" call made is a call[x]4, 852 * call[x]8, or call[x]12 (for call0 ABI, it makes 853 * no difference whether this is set to 4, 8 or 12, 854 * but it must be set to one of these values). 855 * 856 * NOTE: It is up to the caller to align the entry point, declare the 857 * function symbol, make it global, etc. 858 * 859 * NOTE: This macro relies on assembler relaxation for large values 860 * of locsize. It might not work with the no-transform directive. 861 * NOTE: For the call0 ABI, this macro ensures SP is allocated or 862 * de-allocated cleanly, i.e. without temporarily allocating too much 863 * (or allocating negatively!) due to addi relaxation. 864 * 865 * NOTE: Generating the proper sequence and register allocation for 866 * making calls in an ABI independent manner is a separate topic not 867 * covered by this macro. 868 * 869 * NOTE: To access arguments, you can't use a fixed offset from SP. 870 * The offset depends on the ABI, whether the function is leaf, etc. 871 * The simplest method is probably to use the .locsz symbol, which 872 * is set by this macro to the actual number of bytes allocated on 873 * the stack, in other words, to the offset from SP to the arguments. 874 * E.g. for a function whose arguments are all 32-bit integers, you 875 * can get the 7th and 8th arguments (1st and 2nd args stored on stack) 876 * using: 877 * l32i a2, sp, .locsz 878 * l32i a3, sp, .locsz+4 879 * (this example works as long as locsize is under L32I's offset limit 880 * of 1020 minus up to 48 bytes of ABI-specific stack usage; 881 * otherwise you might first need to do "addi a?, sp, .locsz" 882 * or similar sequence). 883 * 884 * NOTE: For call0 ABI, this macro (and abi_return) may clobber a9 885 * (a caller-saved register). 886 * 887 * Examples: 888 * abi_entry 889 * abi_entry 5 890 * abi_entry 22, 8 891 * abi_entry 0, 4 892 */ 893 894 /* 895 * Compute .locsz and .callsz without emitting any instructions. 896 * Used by both abi_entry and abi_return. 897 * Assumes locsize >= 0. 898 */ 899 .macro abi_entry_size locsize=0, callsize=0 900 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 901 .ifeq \callsize 902 .set .callsz, 16 903 .else 904 .ifeq \callsize-4 905 .set .callsz, 16 906 .else 907 .ifeq \callsize-8 908 .set .callsz, 32 909 .else 910 .ifeq \callsize-12 911 .set .callsz, 48 912 .else 913 .error "abi_entry: invalid call size \callsize" 914 .endif 915 .endif 916 .endif 917 .endif 918 .set .locsz, .callsz + ((\locsize + 15) & -16) 919 #else 920 .set .callsz, \callsize 921 .if .callsz /* if calls, need space for return PC */ 922 .set .locsz, (\locsize + 4 + 15) & -16 923 .else 924 .set .locsz, (\locsize + 15) & -16 925 .endif 926 #endif 927 .endm 928 929 .macro abi_entry locsize=0, callsize=0 930 .iflt \locsize 931 .error "abi_entry: invalid negative size of locals (\locsize)" 932 .endif 933 abi_entry_size \locsize, \callsize 934 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 935 # define ABI_ENTRY_MINSIZE 3 /* size of abi_entry (no arguments) instructions in bytes */ 936 .ifgt .locsz - 32760 /* .locsz > 32760 (ENTRY's max range)? */ 937 /* Funky computation to try to have assembler use addmi efficiently if possible: */ 938 entry sp, 0x7F00 + (.locsz & 0xF0) 939 addi a12, sp, - ((.locsz & -0x100) - 0x7F00) 940 movsp sp, a12 941 .else 942 entry sp, .locsz 943 .endif 944 #else 945 # define ABI_ENTRY_MINSIZE 0 /* size of abi_entry (no arguments) instructions in bytes */ 946 .if .locsz 947 .ifle .locsz - 128 /* if locsz <= 128 */ 948 addi sp, sp, -.locsz 949 .if .callsz 950 s32i a0, sp, .locsz - 4 951 .endif 952 .elseif .callsz /* locsz > 128, with calls: */ 953 movi a9, .locsz - 16 /* note: a9 is caller-saved */ 954 addi sp, sp, -16 955 s32i a0, sp, 12 956 sub sp, sp, a9 957 .else /* locsz > 128, no calls: */ 958 movi a9, .locsz 959 sub sp, sp, a9 960 .endif /* end */ 961 .endif 962 #endif 963 .endm 964 965 966 967 /*---------------------------------------------------------------------- 968 * abi_return 969 * 970 * Generate proper function exit sequence for the current ABI 971 * (windowed or call0). Takes care of freeing stack space and 972 * restoring the return PC, if necessary. 973 * NOTE: This macro MUST be invoked following a corresponding 974 * abi_entry macro invocation. For call0 ABI in particular, 975 * all stack and PC restoration are done according to the last 976 * abi_entry macro invoked before this macro in the assembly file. 977 * 978 * Normally this macro takes no arguments. However to allow 979 * for placing abi_return *before* abi_entry (as must be done 980 * for some highly optimized assembly), it optionally takes 981 * exactly the same arguments as abi_entry. 982 */ 983 984 .macro abi_return locsize=-1, callsize=0 985 .ifge \locsize 986 abi_entry_size \locsize, \callsize 987 .endif 988 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 989 retw 990 #else 991 .if .locsz 992 .iflt .locsz - 128 /* if locsz < 128 */ 993 .if .callsz 994 l32i a0, sp, .locsz - 4 995 .endif 996 addi sp, sp, .locsz 997 .elseif .callsz /* locsz >= 128, with calls: */ 998 addi a9, sp, .locsz - 16 999 l32i a0, a9, 12 1000 addi sp, a9, 16 1001 .else /* locsz >= 128, no calls: */ 1002 movi a9, .locsz 1003 add sp, sp, a9 1004 .endif /* end */ 1005 .endif 1006 ret 1007 #endif 1008 .endm 1009 1010 1011 /* 1012 * HW erratum fixes. 1013 */ 1014 1015 .macro hw_erratum_487_fix 1016 #if defined XSHAL_ERRATUM_487_FIX 1017 isync 1018 #endif 1019 .endm 1020 1021 /* 1022 * These macros are internal, subject to change, and should not be used in 1023 * any new code. 1024 */ 1025 1026 #define _GBL(x) .global x 1027 #define _TYP(x) .type x,@function 1028 #define _ALN(x) .align x 1029 #define _SIZ(x) .size x, . - x 1030 #define _MKEND(x) .purgem endfunc ; .macro endfunc ; _SIZ(x) ; .purgem endfunc ; .macro endfunc ; .endm ; .endm 1031 #define _SYMT(x) _GBL(x); _MKEND(x); _TYP(x); _ALN(4); x: 1032 #define _SYM2(x) _GBL(x); _TYP(x); x: 1033 #define _SYM(x) _GBL(x); _MKEND(x); _ALN(4); x: 1034 .macro endfunc ; .endm 1035 1036 /* 1037 * the DECLFUNC() macro provides a mechanism for implementing both the 1038 * standard and _nw interface with a single copy of the code. 1039 * 1040 * For Call0 ABI there is one function definition which is labeled with 1041 * both the xthal_..._nw and xthal_... symbols. 1042 * 1043 * For windowed ABI, two compilations are involved (one with the __NW_FUNCTION__ 1044 * symbol defined) resulting in two separate functions (the _nw one without 1045 * the window adjustments). 1046 */ 1047 1048 #if defined(__NW_FUNCTION__) 1049 # define DECLFUNC(x) _SYMT(x ## _nw) 1050 #else 1051 # if defined (__XTENSA_CALL0_ABI__) 1052 # define DECLFUNC(x) _SYMT(x); _SYM2(x ## _nw) 1053 # else 1054 # define DECLFUNC(x) _SYMT(x) 1055 # endif 1056 #endif 1057 1058 #endif /*XTENSA_COREASM_H*/ 1059 1060