1// 2// windowspill.S -- register window spill routine 3// 4// $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/hal/windowspill_asm.S#1 $ 5 6// Copyright (c) 1999-2010 Tensilica Inc. 7// 8// Permission is hereby granted, free of charge, to any person obtaining 9// a copy of this software and associated documentation files (the 10// "Software"), to deal in the Software without restriction, including 11// without limitation the rights to use, copy, modify, merge, publish, 12// distribute, sublicense, and/or sell copies of the Software, and to 13// permit persons to whom the Software is furnished to do so, subject to 14// the following conditions: 15// 16// The above copyright notice and this permission notice shall be included 17// in all copies or substantial portions of the Software. 18// 19// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 23// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 27#include <xtensa/coreasm.h> 28 29 30// xthal_window_spill_nw 31// 32// Spill live register windows to the stack. 33// 34// Required entry conditions: 35// PS.WOE = 0 36// PS.INTLEVEL >= XCHAL_EXCM_LEVEL 37// a1 = valid stack pointer (note: some regs may be spilled at a1-16) 38// a0 = return PC (usually set by call0 or callx0 when calling this function) 39// a2,a3 undefined 40// a4 thru a15 valid, if they are part of window(s) to be spilled 41// (Current window a0..a15 saved if necessary.) 42// WINDOWSTART[WINDOWBASE] = 1 43// 44// Exit conditions: 45// PS.WOE, PS.INTLEVEL = same as on entry 46// WINDOWBASE = same as on entry 47// WINDOWSTART updated to reflect spilled windows 48// (equals 1<<WINDOWBASE if successful return) 49// a0 = return PC 50// a1 = same as on entry 51// a2 = error code: 52// 0 --> successful 53// (WINDOWSTART = 1<<WINDOWBASE) 54// 1 --> invalid WINDOWSTART (WINDOWBASE bit not set) 55// (WINDOWSTART unchanged) 56// 2 --> invalid window size (not 4, 8 or 12 regs) 57// (WINDOWSTART bits of successfully spilled 58// windows are cleared, others left intact) 59// a3 clobbered 60// a4,a5,a8,a9,a12,a13 = same as on entry 61// a6,a7,a10,a11,a14,a15 clobbered if they were part of window(s) 62// to be spilled, otherwise they are the same as on entry 63// loop registers (LCOUNT,LBEG,LEND) are NOT affected (they were in earlier versions) 64// SAR clobbered 65// 66// All non-spilled register windows will be spilled. 67// Beware that this may include a4..a15 of the current window, 68// so generally these should not have been clobbered by the 69// caller if it is at all possible that these registers 70// are part of an unspilled window (it often is possible) 71// (otherwise the spilled stack would be invalid). 72// 73// THIS MEANS: the caller is responsible for saving a0-a15 but 74// the caller must leave a4-a15 intact when control is transferred 75// here. 76// 77// It may be reentrant (but stack pointer is invalid during 78// execution due to window rotations, so can't take interrupts 79// and exceptions in the usual manner, so ... what does 80// reentrancy really mean here?). 81 82 83 // The xthal_spill_registers_into_stack_nw entry point 84 // is kept here only for backwards compatibility. 85 // It will be removed in the very near future. 86 .global xthal_spill_registers_into_stack_nw 87 88 .text 89 .align 4 90 .global xthal_window_spill_nw 91xthal_window_spill_nw: 92xthal_spill_registers_into_stack_nw: // BACKWARD COMPATIBILITY ONLY - see above 93 94#if ! XCHAL_HAVE_WINDOWED 95 // Nothing to do -- window option was not selected. 96 movi a2, 0 // always report success 97 ret 98#else /* XCHAL_HAVE_WINDOWED */ 99#define WSBITS (XCHAL_NUM_AREGS / 4) /* width of WINDOWSTART register in bits */ 100#define WBBITS (XCHAL_NUM_AREGS_LOG2 - 2) /* width of WINDOWBASE register in bits */ 101 /* 102 * Rearrange (rotate) window start bits relative to the current 103 * window (WINDOWBASE). WINDOWSTART currently looks like this: 104 * 105 * a15-a0 106 * NAREG-1 | | 0 107 * | vvvv | 108 * xxxxxxxxxx1yyyyy 109 * ^ 110 * | 111 * WINDOWBASE 112 * 113 * The start bit pointed to by WINDOWBASE must be set 114 * (we return an error if it isn't), as it corresponds 115 * to the start of the current window (shown as a0-a15). 116 * 117 * We want the window start bits rotated to look like this: 118 * 1yyyyyxxxxxxxxxx 119 * 120 * Note that there is one start bit for every four registers; 121 * and the total number of registers (NAREG) can be 32 or 64; 122 * so the number of start bits in WINDOWSTART is NAREG/4, 123 * and the size of WINDOWSTART can be 8 or 16. 124 */ 125 126 rsr.windowbase a2 127 addi a2, a2, 1 128 ssr a2 // sar = WINDOWBASE + 1 129 rsr.windowstart a3 130 srl a2, a3 // a2 is 0... | 000000xxxxxxxxxx = WINDOWSTART >> sar 131 sll a3, a3 // a3 is 1yyyyy0000000000 | 0... = WINDOWSTART << (32 - sar) 132 bgez a3, .Linvalid_ws // verify that msbit is indeed set 133 134 srli a3, a3, 32-WSBITS // a3 is 0... | 1yyyyy0000000000 = a3 >> (32-NAREG/4) 135 or a2, a2, a3 // a2 is 0... | 1yyyyyxxxxxxxxxx 136 137 /* 138 * FIND THE FIRST ONE 139 * 140 * Now we have (in a2) the window start bits rotated in order 141 * from oldest (closest to lsbit) to current (msbit set). 142 * Each start bit (that is set), other than the current one, 143 * corresponds to a window frame to spill. 144 * 145 * Now find the first start bit, ie. the first frame to spill, 146 * by looking for the first bit set in a2 (from lsbit side). 147 */ 148 149#if XCHAL_HAVE_NSA 150 neg a3, a2 // keep only the least-significant bit set of a2 ... 151 and a3, a3, a2 // ... in a3 152 nsau a3, a3 // get index of that bit, numbered from msbit (32 if absent) 153 ssl a3 // set sar = 32 - a3 = bit index numbered from lsbit + 1 154#else /* XCHAL_HAVE_NSA */ 155 wsr.windowstart a2 // temporarily save rotated start bits 156 // (we can use WINDOWSTART because WOE=0) 157 158 // NOTE: this could be optimized a bit, by explicit coding rather than the macro. 159 find_ls_one a3, a2 // set a3 to index of lsmost bit set in a2 (a2 clobbered) 160 161 addi a2, a3, 1 // index+1 162 ssr a2 // set sar = index + 1 163 rsr.windowstart a2 // restore a2 (rotated start bits) 164#endif /* XCHAL_HAVE_NSA */ 165 srl a2, a2 // right-justify the rotated start bits (dropping lsbit set) 166 wsr.windowstart a2 // save rotated + justified window start bits, 167 // because a2 will disappear when modifying WINDOWBASE 168 // again, we can use WINDOWSTART because WOE=0 169 170 /* 171 * Rotate WindowBase so that a0 of the next window to spill is in a4 172 * (ie. leaving us with a2 and a3 to play with, because a0 and a1 173 * may be those of the original window which we must preserve). 174 */ 175 rsr.windowbase a2 176#if XCHAL_HAVE_NSA 177 addi a2, a2, 31 178 sub a3, a2, a3 // a3 = WINDOWBASE + index = WINDOWBASE + (31 - msbit_index) 179#else /* XCHAL_HAVE_NSA */ 180 add a3, a2, a3 // a3 = WINDOWBASE + index 181#endif /* XCHAL_HAVE_NSA */ 182 wsr.windowbase a3 // effectively do: rotw index 183 rsync // wait for write to WINDOWBASE to complete 184 // Now our registers have changed! 185 186 rsr.windowstart a2 // restore a2 (rotated + justified window start bits) 187 188 /* 189 * We are now ready to start the window spill loop. 190 * Relative to the above, a2 and WINDOWBASE are now as follows: 191 * 192 * 1yyyyyxxxxxxxxxx = rotated start bits as shown above 193 * 1yyyyyxxxx100000 = actual rotated start bits (example) 194 * 0000001yyyyyxxxx ^ = a2 = rotated + justified start bits 195 * ^ xxx1^ = window being spilled 196 * ^ ^ 197 * | | 198 * original current 199 * WINDOWBASE WINDOWBASE 200 * 201 * The first window to spill (save) starts at what is now a4. 202 * The spill loop maintains the adjusted start bits in a2, 203 * shifting them right as each window is spilled. 204 */ 205 206.Lspill_loop: 207 // Top of save loop. 208 // Find the size of this call and branch to the appropriate save routine. 209 210 beqz a2, .Ldone // if no start bit remaining, we're done 211 bbsi.l a2, 0, .Lspill4 // if next start bit is set, it's a call4 212 bbsi.l a2, 1, .Lspill8 // if 2nd next bit set, it's a call8 213 bbsi.l a2, 2, .Lspill12 // if 3rd next bit set, it's a call12 214 j .Linvalid_window // else it's an invalid window! 215 216 217 218 // SAVE A CALL4 219.Lspill4: 220 addi a3, a9, -16 // a3 gets call[i+1]'s sp - 16 221 s32i a4, a3, 0 // store call[i]'s a0 222 s32i a5, a3, 4 // store call[i]'s a1 223 s32i a6, a3, 8 // store call[i]'s a2 224 s32i a7, a3, 12 // store call[i]'s a3 225 226 srli a6, a2, 1 // move and shift the start bits 227 rotw 1 // rotate the window 228 229 j .Lspill_loop 230 231 // SAVE A CALL8 232.Lspill8: 233 addi a3, a13, -16 // a0 gets call[i+1]'s sp - 16 234 s32i a4, a3, 0 // store call[i]'s a0 235 s32i a5, a3, 4 // store call[i]'s a1 236 s32i a6, a3, 8 // store call[i]'s a2 237 s32i a7, a3, 12 // store call[i]'s a3 238 239 addi a3, a5, -12 // call[i-1]'s sp address 240 l32i a3, a3, 0 // a3 is call[i-1]'s sp 241 // (load slot) 242 addi a3, a3, -32 // a3 points to our spill area 243 244 s32i a8, a3, 0 // store call[i]'s a4 245 s32i a9, a3, 4 // store call[i]'s a5 246 s32i a10, a3, 8 // store call[i]'s a6 247 s32i a11, a3, 12 // store call[i]'s a7 248 249 srli a10, a2, 2 // move and shift the start bits 250 rotw 2 // rotate the window 251 252 j .Lspill_loop 253 254 // SAVE A CALL12 255.Lspill12: 256 rotw 1 // rotate to see call[i+1]'s sp 257 258 addi a13, a13, -16 // set to the reg save area 259 s32i a0, a13, 0 // store call[i]'s a0 260 s32i a1, a13, 4 // store call[i]'s a1 261 s32i a2, a13, 8 // store call[i]'s a2 262 s32i a3, a13, 12 // store call[i]'s a3 263 264 addi a3, a1, -12 // call[i-1]'s sp address 265 l32i a3, a3, 0 // a3 has call[i-1]'s sp 266 addi a13, a13, 16 // restore call[i+1]'s sp (here to fill load slot) 267 addi a3, a3, -48 // a3 points to our save area 268 269 s32i a4, a3, 0 // store call[i]'s a4 270 s32i a5, a3, 4 // store call[i]'s a5 271 s32i a6, a3, 8 // store call[i]'s a6 272 s32i a7, a3, 12 // store call[i]'s a7 273 s32i a8, a3, 16 // store call[i]'s a4 274 s32i a9, a3, 20 // store call[i]'s a5 275 s32i a10, a3, 24 // store call[i]'s a6 276 s32i a11, a3, 28 // store call[i]'s a7 277 278 rotw -1 // rotate to see start bits (a2) 279 srli a14, a2, 3 // move and shift the start bits 280 rotw 3 // rotate to next window 281 282 j .Lspill_loop 283 284 285 286.Ldone: 287 rotw 1 // back to the original window 288 rsr.windowbase a2 // get (original) window base 289 ssl a2 // setup for shift left by WINDOWBASE 290 movi a2, 1 291 sll a2, a2 // compute new WINDOWSTART = 1<<WINDOWBASE 292 wsr.windowstart a2 // and apply it 293 rsync 294 movi a2, 0 // done! 295 ret 296 //jx a0 297 298 299 // Invalid WINDOWSTART register. 300 // 301.Linvalid_ws: 302 movi a2, 1 // indicate invalid WINDOWSTART 303 ret // return from subroutine 304 305 306 // Invalid window size! 307 // The three bits following the start bit are all clear, so 308 // we have an invalid window state (can't determine a window size). 309 // 310 // So we exit with an error, but to do that we must first restore 311 // the original WINDOWBASE. We also compute a sensible 312 // WINDOWSTART that has the start bits of spilled windows 313 // cleared, but all other start bits intact, so someone debugging 314 // the failure can look at WINDOWSTART to see which window 315 // failed to spill. 316 // 317.Linvalid_window: 318 slli a2, a2, 1 // space for missing start bit 319 addi a2, a2, 1 // add missing start bit 320 rsr.windowbase a3 // get current WINDOWBASE 321 bbsi.l a2, WSBITS-1, 2f // branch if current WINDOWBASE==original 3221: addi a3, a3, -1 // decrement towards original WINDOWBASE 323 slli a2, a2, 1 // shift towards original WINDOWSTART alignment 324 bbci.l a2, WSBITS-1, 1b // repeat until ms start bit set 325 extui a3, a3, 0, WBBITS // mask out upper base bits, in case of carry-over 3262: // Here, a3 = original WINDOWBASE; 327 // and msbit of start bits in a2 is set, and no other bits above it. 328 // Now rotate a2 to become the correct WINDOWSTART. 329 ssl a3 // set shift left ... (sar = 32 - orig WB) 330 slli a3, a2, 32-WSBITS // left-justify start bits 331 src a2, a2, a3 // rotate left by original WINDOWBASE 332 extui a2, a2, 0, WSBITS // keep only significant start bits 333 wsr.windowstart a2 // we've cleared only start bits of spilled windows 334 rsr.sar a3 // retrieve 32 - original WINDOWBASE 335 movi a2, 32 336 sub a3, a2, a3 // restore original WINDOWBASE 337 wsr.windowbase a3 // back to original WINDOWBASE 338 rsync 339 340 movi a2, 2 // indicate invalid window size 341 ret 342 343#endif /* XCHAL_HAVE_WINDOWED */ 344 345 .size xthal_window_spill_nw, . - xthal_window_spill_nw 346 347 348// void xthal_window_spill (void); 349// 350// Spill live register windows to the stack. 351// 352// This will spill all register windows except this 353// function's window, and possibly that of its caller. 354// (Currently, the caller's window is spilled and reloaded 355// when this function returns. This may change with 356// future optimisations.) 357// 358// Another, simpler way to implement this might be 359// to use an appropriate sequence of call/entry/retw 360// instructions to force overflow of any live windows. 361// 362// Assumes that PS.INTLEVEL=0 and PS.WOE=1 on entry/exit. 363// 364 .text 365 .align 4 366 .global xthal_window_spill 367 .type xthal_window_spill,@function 368xthal_window_spill: 369 abi_entry 370#if XCHAL_HAVE_WINDOWED 371 movi a6, ~(PS_WOE_MASK|PS_INTLEVEL_MASK) // (using a6 ensures any window using this a4..a7 is spilled) 372 rsr.ps a5 373 mov a4, a0 // save a0 374 and a2, a5, a6 // clear WOE, INTLEVEL 375 addi a2, a2, XCHAL_EXCM_LEVEL // set INTLEVEL = XCHAL_EXCM_LEVEL 376 wsr.ps a2 // apply to PS 377 rsync 378 call0 xthal_window_spill_nw 379 mov a0, a4 // restore a0 380 wsr.ps a5 // restore PS 381 rsync 382#endif /* XCHAL_HAVE_WINDOWED */ 383 abi_return 384 385 .size xthal_window_spill, . - xthal_window_spill 386 387