// // windowspill.S -- register window spill routine // // $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/hal/windowspill_asm.S#1 $ // Copyright (c) 1999-2010 Tensilica Inc. // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to // the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include // xthal_window_spill_nw // // Spill live register windows to the stack. // // Required entry conditions: // PS.WOE = 0 // PS.INTLEVEL >= XCHAL_EXCM_LEVEL // a1 = valid stack pointer (note: some regs may be spilled at a1-16) // a0 = return PC (usually set by call0 or callx0 when calling this function) // a2,a3 undefined // a4 thru a15 valid, if they are part of window(s) to be spilled // (Current window a0..a15 saved if necessary.) // WINDOWSTART[WINDOWBASE] = 1 // // Exit conditions: // PS.WOE, PS.INTLEVEL = same as on entry // WINDOWBASE = same as on entry // WINDOWSTART updated to reflect spilled windows // (equals 1< successful // (WINDOWSTART = 1< invalid WINDOWSTART (WINDOWBASE bit not set) // (WINDOWSTART unchanged) // 2 --> invalid window size (not 4, 8 or 12 regs) // (WINDOWSTART bits of successfully spilled // windows are cleared, others left intact) // a3 clobbered // a4,a5,a8,a9,a12,a13 = same as on entry // a6,a7,a10,a11,a14,a15 clobbered if they were part of window(s) // to be spilled, otherwise they are the same as on entry // loop registers (LCOUNT,LBEG,LEND) are NOT affected (they were in earlier versions) // SAR clobbered // // All non-spilled register windows will be spilled. // Beware that this may include a4..a15 of the current window, // so generally these should not have been clobbered by the // caller if it is at all possible that these registers // are part of an unspilled window (it often is possible) // (otherwise the spilled stack would be invalid). // // THIS MEANS: the caller is responsible for saving a0-a15 but // the caller must leave a4-a15 intact when control is transferred // here. // // It may be reentrant (but stack pointer is invalid during // execution due to window rotations, so can't take interrupts // and exceptions in the usual manner, so ... what does // reentrancy really mean here?). // The xthal_spill_registers_into_stack_nw entry point // is kept here only for backwards compatibility. // It will be removed in the very near future. .global xthal_spill_registers_into_stack_nw .text .align 4 .global xthal_window_spill_nw xthal_window_spill_nw: xthal_spill_registers_into_stack_nw: // BACKWARD COMPATIBILITY ONLY - see above #if ! XCHAL_HAVE_WINDOWED // Nothing to do -- window option was not selected. movi a2, 0 // always report success ret #else /* XCHAL_HAVE_WINDOWED */ #define WSBITS (XCHAL_NUM_AREGS / 4) /* width of WINDOWSTART register in bits */ #define WBBITS (XCHAL_NUM_AREGS_LOG2 - 2) /* width of WINDOWBASE register in bits */ /* * Rearrange (rotate) window start bits relative to the current * window (WINDOWBASE). WINDOWSTART currently looks like this: * * a15-a0 * NAREG-1 | | 0 * | vvvv | * xxxxxxxxxx1yyyyy * ^ * | * WINDOWBASE * * The start bit pointed to by WINDOWBASE must be set * (we return an error if it isn't), as it corresponds * to the start of the current window (shown as a0-a15). * * We want the window start bits rotated to look like this: * 1yyyyyxxxxxxxxxx * * Note that there is one start bit for every four registers; * and the total number of registers (NAREG) can be 32 or 64; * so the number of start bits in WINDOWSTART is NAREG/4, * and the size of WINDOWSTART can be 8 or 16. */ rsr.windowbase a2 addi a2, a2, 1 ssr a2 // sar = WINDOWBASE + 1 rsr.windowstart a3 srl a2, a3 // a2 is 0... | 000000xxxxxxxxxx = WINDOWSTART >> sar sll a3, a3 // a3 is 1yyyyy0000000000 | 0... = WINDOWSTART << (32 - sar) bgez a3, .Linvalid_ws // verify that msbit is indeed set srli a3, a3, 32-WSBITS // a3 is 0... | 1yyyyy0000000000 = a3 >> (32-NAREG/4) or a2, a2, a3 // a2 is 0... | 1yyyyyxxxxxxxxxx /* * FIND THE FIRST ONE * * Now we have (in a2) the window start bits rotated in order * from oldest (closest to lsbit) to current (msbit set). * Each start bit (that is set), other than the current one, * corresponds to a window frame to spill. * * Now find the first start bit, ie. the first frame to spill, * by looking for the first bit set in a2 (from lsbit side). */ #if XCHAL_HAVE_NSA neg a3, a2 // keep only the least-significant bit set of a2 ... and a3, a3, a2 // ... in a3 nsau a3, a3 // get index of that bit, numbered from msbit (32 if absent) ssl a3 // set sar = 32 - a3 = bit index numbered from lsbit + 1 #else /* XCHAL_HAVE_NSA */ wsr.windowstart a2 // temporarily save rotated start bits // (we can use WINDOWSTART because WOE=0) // NOTE: this could be optimized a bit, by explicit coding rather than the macro. find_ls_one a3, a2 // set a3 to index of lsmost bit set in a2 (a2 clobbered) addi a2, a3, 1 // index+1 ssr a2 // set sar = index + 1 rsr.windowstart a2 // restore a2 (rotated start bits) #endif /* XCHAL_HAVE_NSA */ srl a2, a2 // right-justify the rotated start bits (dropping lsbit set) wsr.windowstart a2 // save rotated + justified window start bits, // because a2 will disappear when modifying WINDOWBASE // again, we can use WINDOWSTART because WOE=0 /* * Rotate WindowBase so that a0 of the next window to spill is in a4 * (ie. leaving us with a2 and a3 to play with, because a0 and a1 * may be those of the original window which we must preserve). */ rsr.windowbase a2 #if XCHAL_HAVE_NSA addi a2, a2, 31 sub a3, a2, a3 // a3 = WINDOWBASE + index = WINDOWBASE + (31 - msbit_index) #else /* XCHAL_HAVE_NSA */ add a3, a2, a3 // a3 = WINDOWBASE + index #endif /* XCHAL_HAVE_NSA */ wsr.windowbase a3 // effectively do: rotw index rsync // wait for write to WINDOWBASE to complete // Now our registers have changed! rsr.windowstart a2 // restore a2 (rotated + justified window start bits) /* * We are now ready to start the window spill loop. * Relative to the above, a2 and WINDOWBASE are now as follows: * * 1yyyyyxxxxxxxxxx = rotated start bits as shown above * 1yyyyyxxxx100000 = actual rotated start bits (example) * 0000001yyyyyxxxx ^ = a2 = rotated + justified start bits * ^ xxx1^ = window being spilled * ^ ^ * | | * original current * WINDOWBASE WINDOWBASE * * The first window to spill (save) starts at what is now a4. * The spill loop maintains the adjusted start bits in a2, * shifting them right as each window is spilled. */ .Lspill_loop: // Top of save loop. // Find the size of this call and branch to the appropriate save routine. beqz a2, .Ldone // if no start bit remaining, we're done bbsi.l a2, 0, .Lspill4 // if next start bit is set, it's a call4 bbsi.l a2, 1, .Lspill8 // if 2nd next bit set, it's a call8 bbsi.l a2, 2, .Lspill12 // if 3rd next bit set, it's a call12 j .Linvalid_window // else it's an invalid window! // SAVE A CALL4 .Lspill4: addi a3, a9, -16 // a3 gets call[i+1]'s sp - 16 s32i a4, a3, 0 // store call[i]'s a0 s32i a5, a3, 4 // store call[i]'s a1 s32i a6, a3, 8 // store call[i]'s a2 s32i a7, a3, 12 // store call[i]'s a3 srli a6, a2, 1 // move and shift the start bits rotw 1 // rotate the window j .Lspill_loop // SAVE A CALL8 .Lspill8: addi a3, a13, -16 // a0 gets call[i+1]'s sp - 16 s32i a4, a3, 0 // store call[i]'s a0 s32i a5, a3, 4 // store call[i]'s a1 s32i a6, a3, 8 // store call[i]'s a2 s32i a7, a3, 12 // store call[i]'s a3 addi a3, a5, -12 // call[i-1]'s sp address l32i a3, a3, 0 // a3 is call[i-1]'s sp // (load slot) addi a3, a3, -32 // a3 points to our spill area s32i a8, a3, 0 // store call[i]'s a4 s32i a9, a3, 4 // store call[i]'s a5 s32i a10, a3, 8 // store call[i]'s a6 s32i a11, a3, 12 // store call[i]'s a7 srli a10, a2, 2 // move and shift the start bits rotw 2 // rotate the window j .Lspill_loop // SAVE A CALL12 .Lspill12: rotw 1 // rotate to see call[i+1]'s sp addi a13, a13, -16 // set to the reg save area s32i a0, a13, 0 // store call[i]'s a0 s32i a1, a13, 4 // store call[i]'s a1 s32i a2, a13, 8 // store call[i]'s a2 s32i a3, a13, 12 // store call[i]'s a3 addi a3, a1, -12 // call[i-1]'s sp address l32i a3, a3, 0 // a3 has call[i-1]'s sp addi a13, a13, 16 // restore call[i+1]'s sp (here to fill load slot) addi a3, a3, -48 // a3 points to our save area s32i a4, a3, 0 // store call[i]'s a4 s32i a5, a3, 4 // store call[i]'s a5 s32i a6, a3, 8 // store call[i]'s a6 s32i a7, a3, 12 // store call[i]'s a7 s32i a8, a3, 16 // store call[i]'s a4 s32i a9, a3, 20 // store call[i]'s a5 s32i a10, a3, 24 // store call[i]'s a6 s32i a11, a3, 28 // store call[i]'s a7 rotw -1 // rotate to see start bits (a2) srli a14, a2, 3 // move and shift the start bits rotw 3 // rotate to next window j .Lspill_loop .Ldone: rotw 1 // back to the original window rsr.windowbase a2 // get (original) window base ssl a2 // setup for shift left by WINDOWBASE movi a2, 1 sll a2, a2 // compute new WINDOWSTART = 1<