1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6 7 #ifndef ZEPHYR_ARCH_XTENSA_INCLUDE_XTENSA_ASM2_S_H 8 #define ZEPHYR_ARCH_XTENSA_INCLUDE_XTENSA_ASM2_S_H 9 10 #include <zephyr/zsr.h> 11 #include "xtensa_asm2_context.h" 12 13 #include <zephyr/offsets.h> 14 15 /* Assembler header! This file contains macros designed to be included 16 * only by the assembler. 17 */ 18 19 #if defined(CONFIG_XTENSA_HIFI_SHARING) 20 .extern _xtensa_hifi_save 21 #endif 22 23 /* 24 * SPILL_ALL_WINDOWS 25 * 26 * Spills all windowed registers (i.e. registers not visible as 27 * A0-A15) to their ABI-defined spill regions on the stack. 28 * 29 * Unlike the Xtensa HAL implementation, this code requires that the 30 * EXCM and WOE bit be enabled in PS, and relies on repeated hardware 31 * exception handling to do the register spills. The trick is to do a 32 * noop write to the high registers, which the hardware will trap 33 * (into an overflow exception) in the case where those registers are 34 * already used by an existing call frame. Then it rotates the window 35 * and repeats until all but the A0-A3 registers of the original frame 36 * are guaranteed to be spilled, eventually rotating back around into 37 * the original frame. Advantages: 38 * 39 * - Vastly smaller code size 40 * 41 * - More easily maintained if changes are needed to window over/underflow 42 * exception handling. 43 * 44 * - Requires no scratch registers to do its work, so can be used safely in any 45 * context. 46 * 47 * - If the WOE bit is not enabled (for example, in code written for 48 * the CALL0 ABI), this becomes a silent noop and operates compatibly. 49 * 50 * - In memory protection situations, this relies on the existing 51 * exception handlers (and thus their use of the L/S32E 52 * instructions) to execute stores in the protected space. AFAICT, 53 * the HAL routine does not handle this situation and isn't safe: it 54 * will happily write through the "stack pointers" found in 55 * registers regardless of where they might point. 56 * 57 * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not 58 * just a little bit, it's MUCH faster. With a mostly full register 59 * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill 60 * registers with this vs. 279 (!) to do it with 61 * xthal_spill_windows(). Apparently Xtensa exception handling is 62 * really fast, and no one told their software people. 63 * 64 * Note that as with the Xtensa HAL spill routine, and unlike context 65 * switching code on most sane architectures, the intermediate states 66 * here will have an invalid stack pointer. That means that this code 67 * must not be preempted in any context (i.e. all Zephyr situations) 68 * where the interrupt code will need to use the stack to save the 69 * context. But unlike the HAL, which runs with exceptions masked via 70 * EXCM, this will not: hit needs the overflow handlers unmasked. Use 71 * INTLEVEL instead (which, happily, is what Zephyr's locking does 72 * anyway). 73 */ 74 .macro SPILL_ALL_WINDOWS 75 #if XCHAL_NUM_AREGS == 64 76 and a12, a12, a12 77 rotw 3 78 and a12, a12, a12 79 rotw 3 80 and a12, a12, a12 81 rotw 3 82 and a12, a12, a12 83 rotw 3 84 and a12, a12, a12 85 rotw 4 86 #elif XCHAL_NUM_AREGS == 32 87 and a12, a12, a12 88 rotw 3 89 and a12, a12, a12 90 rotw 3 91 and a4, a4, a4 92 rotw 2 93 #else 94 #error Unrecognized XCHAL_NUM_AREGS 95 #endif 96 .endm 97 98 #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 99 /* 100 * FPU_REG_SAVE 101 * 102 * Saves the Float Point Unit context registers in the base save 103 * area pointed to by the current stack pointer A1. The Floating-Point 104 * Coprocessor Option adds the FR register file and two User Registers 105 * called FCR and FSR.The FR register file consists of 16 registers of 106 * 32 bits each and is used for all data computation. 107 */ 108 .macro FPU_REG_SAVE 109 rur.fcr a0 110 s32i a0, a1, ___xtensa_irq_bsa_t_fcr_OFFSET 111 rur.fsr a0 112 s32i a0, a1, ___xtensa_irq_bsa_t_fsr_OFFSET 113 ssi f0, a1, ___xtensa_irq_bsa_t_fpu0_OFFSET 114 ssi f1, a1, ___xtensa_irq_bsa_t_fpu1_OFFSET 115 ssi f2, a1, ___xtensa_irq_bsa_t_fpu2_OFFSET 116 ssi f3, a1, ___xtensa_irq_bsa_t_fpu3_OFFSET 117 ssi f4, a1, ___xtensa_irq_bsa_t_fpu4_OFFSET 118 ssi f5, a1, ___xtensa_irq_bsa_t_fpu5_OFFSET 119 ssi f6, a1, ___xtensa_irq_bsa_t_fpu6_OFFSET 120 ssi f7, a1, ___xtensa_irq_bsa_t_fpu7_OFFSET 121 ssi f8, a1, ___xtensa_irq_bsa_t_fpu8_OFFSET 122 ssi f9, a1, ___xtensa_irq_bsa_t_fpu9_OFFSET 123 ssi f10, a1, ___xtensa_irq_bsa_t_fpu10_OFFSET 124 ssi f11, a1, ___xtensa_irq_bsa_t_fpu11_OFFSET 125 ssi f12, a1, ___xtensa_irq_bsa_t_fpu12_OFFSET 126 ssi f13, a1, ___xtensa_irq_bsa_t_fpu13_OFFSET 127 ssi f14, a1, ___xtensa_irq_bsa_t_fpu14_OFFSET 128 ssi f15, a1, ___xtensa_irq_bsa_t_fpu15_OFFSET 129 .endm 130 131 .macro FPU_REG_RESTORE 132 l32i.n a0, a1, ___xtensa_irq_bsa_t_fcr_OFFSET 133 wur.fcr a0 134 l32i.n a0, a1, ___xtensa_irq_bsa_t_fsr_OFFSET 135 wur.fsr a0 136 lsi f0, a1, ___xtensa_irq_bsa_t_fpu0_OFFSET 137 lsi f1, a1, ___xtensa_irq_bsa_t_fpu1_OFFSET 138 lsi f2, a1, ___xtensa_irq_bsa_t_fpu2_OFFSET 139 lsi f3, a1, ___xtensa_irq_bsa_t_fpu3_OFFSET 140 lsi f4, a1, ___xtensa_irq_bsa_t_fpu4_OFFSET 141 lsi f5, a1, ___xtensa_irq_bsa_t_fpu5_OFFSET 142 lsi f6, a1, ___xtensa_irq_bsa_t_fpu6_OFFSET 143 lsi f7, a1, ___xtensa_irq_bsa_t_fpu7_OFFSET 144 lsi f8, a1, ___xtensa_irq_bsa_t_fpu8_OFFSET 145 lsi f9, a1, ___xtensa_irq_bsa_t_fpu9_OFFSET 146 lsi f10, a1, ___xtensa_irq_bsa_t_fpu10_OFFSET 147 lsi f11, a1, ___xtensa_irq_bsa_t_fpu11_OFFSET 148 lsi f12, a1, ___xtensa_irq_bsa_t_fpu12_OFFSET 149 lsi f13, a1, ___xtensa_irq_bsa_t_fpu13_OFFSET 150 lsi f14, a1, ___xtensa_irq_bsa_t_fpu14_OFFSET 151 lsi f15, a1, ___xtensa_irq_bsa_t_fpu15_OFFSET 152 .endm 153 #endif 154 155 /* 156 * ODD_REG_SAVE 157 * 158 * Stashes the oddball shift/loop context registers in the base save 159 * area pointed to by the current stack pointer. On exit, A0 will 160 * have been modified but A2/A3 have not, and the shift/loop 161 * instructions can be used freely (though note loops don't work in 162 * exceptions for other reasons!). 163 * 164 * Does not populate or modify the PS/PC save locations. 165 */ 166 .macro ODD_REG_SAVE 167 rsr.sar a0 168 s32i a0, a1, ___xtensa_irq_bsa_t_sar_OFFSET 169 #if XCHAL_HAVE_LOOPS 170 rsr.lbeg a0 171 s32i a0, a1, ___xtensa_irq_bsa_t_lbeg_OFFSET 172 rsr.lend a0 173 s32i a0, a1, ___xtensa_irq_bsa_t_lend_OFFSET 174 rsr.lcount a0 175 s32i a0, a1, ___xtensa_irq_bsa_t_lcount_OFFSET 176 #endif 177 rsr.exccause a0 178 s32i a0, a1, ___xtensa_irq_bsa_t_exccause_OFFSET 179 #if XCHAL_HAVE_S32C1I 180 rsr.scompare1 a0 181 s32i a0, a1, ___xtensa_irq_bsa_t_scompare1_OFFSET 182 #endif 183 #if XCHAL_HAVE_THREADPTR && \ 184 (defined(CONFIG_USERSPACE) || defined(CONFIG_THREAD_LOCAL_STORAGE)) 185 rur.THREADPTR a0 186 s32i a0, a1, ___xtensa_irq_bsa_t_threadptr_OFFSET 187 #endif 188 #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 189 FPU_REG_SAVE 190 #endif 191 192 .endm 193 194 #ifdef CONFIG_XTENSA_MMU 195 /* 196 * CALC_PTEVADDR_BASE 197 * 198 * This calculates the virtual address of the first PTE page 199 * (PTEVADDR base, the one mapping 0x00000000) so that we can 200 * use this to obtain the virtual address of the PTE page we are 201 * interested in. This can be obtained via 202 * (1 << CONFIG_XTENSA_MMU_PTEVADDR_SHIFT). 203 * 204 * Note that this is done this way is to avoid any TLB 205 * miss if we are to use l32r to load the PTEVADDR base. 206 * If the page containing the PTEVADDR base address is 207 * not in TLB, we will need to handle the TLB miss which 208 * we are trying to avoid here. 209 * 210 * @param ADDR_REG Register to store the calculated 211 * PTEVADDR base address. 212 * 213 * @note The content of ADDR_REG will be modified. 214 * Save and restore it around this macro usage. 215 */ 216 .macro CALC_PTEVADDR_BASE ADDR_REG 217 movi \ADDR_REG, 1 218 slli \ADDR_REG, \ADDR_REG, CONFIG_XTENSA_MMU_PTEVADDR_SHIFT 219 .endm 220 221 /* 222 * PRELOAD_PTEVADDR 223 * 224 * This preloads the page table entries for a 4MB region to avoid TLB 225 * misses. This 4MB region is mapped via a page (4KB) of page table 226 * entries (PTE). Each entry is 4 bytes mapping a 4KB region. Each page, 227 * then, has 1024 entries mapping a 4MB region. Filling TLB entries is 228 * automatically done via hardware, as long as the PTE page associated 229 * with a particular address is also in TLB. If the PTE page is not in 230 * TLB, an exception will be raised that must be handled. This TLB miss 231 * is problematic when we are in the middle of dealing with another 232 * exception or handling an interrupt. So we need to put the PTE page 233 * into TLB by simply do a load operation. 234 * 235 * @param ADDR_REG Register containing the target address 236 * @param PTEVADDR_BASE_REG Register containing the PTEVADDR base 237 * 238 * @note Both the content of ADDR_REG will be modified. 239 * Save and restore it around this macro usage. 240 */ 241 .macro PRELOAD_PTEVADDR ADDR_REG, PTEVADDR_BASE_REG 242 /* 243 * Calculate the offset to first PTE page of all memory. 244 * 245 * Every page (4KB) of page table entries contains 246 * 1024 entires (as each entry is 4 bytes). Each entry 247 * maps one 4KB page. So one page of entries maps 4MB of 248 * memory. 249 * 250 * 1. We need to find the virtual address of the PTE page 251 * having the page table entry mapping the address in 252 * register ADDR_REG. To do this, we first need to find 253 * the offset of this PTE page from the first PTE page 254 * (the one mapping memory 0x00000000): 255 * a. Find the beginning address of the 4KB page 256 * containing address in ADDR_REG. This can simply 257 * be done by discarding 11 bits (or shifting right 258 * and then left 12 bits). 259 * b. Since each PTE page contains 1024 entries, 260 * we divide the address obtained in step (a) by 261 * further dividing it by 1024 (shifting right and 262 * then left 10 bits) to obtain the offset of 263 * the PTE page. 264 * 265 * Step (a) and (b) can be obtained together so that 266 * we can shift right 22 bits, and then shift left 267 * 12 bits. 268 * 269 * 2. Once we have combine the results from step (1) and 270 * PTEVADDR_BASE_REG to get the virtual address of 271 * the PTE page. 272 * 273 * 3. Do a l32i to force the PTE page to be in TLB. 274 */ 275 276 /* Step 1 */ 277 srli \ADDR_REG, \ADDR_REG, 22 278 slli \ADDR_REG, \ADDR_REG, 12 279 280 /* Step 2 */ 281 add \ADDR_REG, \ADDR_REG, \PTEVADDR_BASE_REG 282 283 /* Step 3 */ 284 l32i \ADDR_REG, \ADDR_REG, 0 285 .endm 286 #endif /* CONFIG_XTENSA_MMU */ 287 288 /* 289 * CROSS_STACK_CALL 290 * 291 * Sets the stack up carefully such that a "cross stack" call can spill 292 * correctly, then invokes an immediate handler. Note that: 293 * 294 * 0. When spilling a frame, functions find their callEE's stack pointer 295 * (to save A0-A3) from registers. But they find their 296 * already-spilled callER's stack pointer (to save higher GPRs) from 297 * their own stack memory. 298 * 299 * 1. The function that was interrupted ("interruptee") does not need to 300 * be spilled, because it already has been as part of the context 301 * save. So it doesn't need registers allocated for it anywhere. 302 * 303 * 2. Interruptee's caller needs to spill into the space below the 304 * interrupted stack frame, which means that the A1 register it finds 305 * below it needs to contain the old/interrupted stack and not the 306 * context saved one. 307 * 308 * 3. The ISR dispatcher (called "underneath" interruptee) needs to spill 309 * high registers into the space immediately above its own stack frame, 310 * so it needs to find a caller with the "new" stack pointer instead. 311 * 312 * We make this work by inserting TWO 4-register frames between 313 * "interruptee's caller" and "ISR dispatcher". The top one (which 314 * occupies the slot formerly held by "interruptee", whose registers 315 * were saved via external means) holds the "interrupted A1" and the 316 * bottom has the "top of the interrupt stack" which can be either the 317 * word above a new memory area (when handling an interrupt from user 318 * mode) OR the existing "post-context-save" stack pointer (when 319 * handling a nested interrupt). The code works either way. Because 320 * these are both only 4-registers, neither needs its own caller for 321 * spilling. 322 * 323 * The net cost is 32 wasted bytes on the interrupt stack frame to 324 * spill our two "phantom frames" (actually not quite, as we'd need a 325 * few of those words used somewhere for tracking the stack pointers 326 * anyway). But the benefit is that NO REGISTER FRAMES NEED TO BE 327 * SPILLED on interrupt entry. And if we return back into the same 328 * context we interrupted (a common case) no windows need to be 329 * explicitly spilled at all. And in fact in the case where the ISR 330 * uses significant depth on its own stack, the interrupted frames 331 * will be spilled naturally as a standard cost of a function call, 332 * giving register windows something like "zero cost interrupts". 333 * 334 * FIXME: a terrible awful really nifty idea to fix the stack waste 335 * problem would be to use a SINGLE frame between the two stacks, 336 * pre-spill it with one stack pointer for the "lower" call to see and 337 * leave the register SP in place for the "upper" frame to use. 338 * Would require modifying the Window{Over|Under}flow4 exceptions to 339 * know not to spill/fill these special frames, but that's not too 340 * hard, maybe... 341 * 342 * Enter this macro with a valid "context saved" pointer (i.e. SP 343 * should point to a stored pointer which points to one BSA below the 344 * interrupted/old stack) in A1, a handler function in A2, and a "new" 345 * stack pointer (i.e. a pointer to the word ABOVE the allocated stack 346 * area) in A3. Exceptions should be enabled via PS.EXCM, but 347 * PS.INTLEVEL must (!) be set such that no nested interrupts can 348 * arrive (we restore the natural INTLEVEL from the value in ZSR_EPS 349 * just before entering the call). On return A0/1 will be unchanged, 350 * A2 has the return value of the called function, and A3 is 351 * clobbered. A4-A15 become part of called frames and MUST NOT BE IN 352 * USE by the code that expands this macro. The called function gets 353 * the context save handle in A1 as it's first argument. 354 */ 355 .macro CROSS_STACK_CALL 356 mov a6, a3 /* place "new sp" in the next frame's A2 */ 357 mov a10, a1 /* pass "context handle" in 2nd frame's A2 */ 358 mov a3, a1 /* stash it locally in A3 too */ 359 mov a11, a2 /* handler in 2nd frame's A3, next frame's A7 */ 360 361 /* Recover the interrupted SP from the BSA */ 362 l32i a1, a1, 0 363 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 364 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 365 366 call4 _xstack_call0_\@ 367 mov a1, a3 /* restore original SP */ 368 mov a2, a6 /* copy return value */ 369 j _xstack_returned_\@ 370 .align 4 371 _xstack_call0_\@: 372 /* We want an ENTRY to set a bit in windowstart and do the 373 * rotation, but we want our own SP. After that, we are 374 * running in a valid frame, so re-enable interrupts. 375 */ 376 entry a1, 16 377 mov a1, a2 378 rsr.ZSR_EPS a2 379 wsr.ps a2 380 call4 _xstack_call1_\@ 381 mov a2, a6 /* copy return value */ 382 retw 383 .align 4 384 _xstack_call1_\@: 385 /* Remember the handler is going to do our ENTRY, so the 386 * handler pointer is still in A6 (not A2) even though this is 387 * after the second CALL4. 388 */ 389 jx a7 390 _xstack_returned_\@: 391 .endm 392 393 /* Entry setup for all exceptions and interrupts. Arrive here with 394 * the stack pointer decremented across a base save area, A0-A3 and 395 * PS/PC already spilled to the stack in the BSA, and A2 containing a 396 * level-specific C handler function. 397 * 398 * This is a macro (to allow for unit testing) that expands to a 399 * handler body to which the vectors can jump. It takes two static 400 * (!) arguments: a special register name (which should be set up to 401 * point to some kind of per-CPU record struct) and offsets within 402 * that struct which contains an interrupt stack top and a "nest 403 * count" word. 404 */ 405 .macro EXCINT_HANDLER NEST_OFF, INTSTACK_OFF 406 /* A2 contains our handler function which will get clobbered 407 * by the save. Stash it into the unused "a1" slot in the 408 * BSA and recover it immediately after. Kind of a hack. 409 */ 410 s32i a2, a1, ___xtensa_irq_bsa_t_scratch_OFFSET 411 412 ODD_REG_SAVE 413 414 #if defined(CONFIG_XTENSA_HIFI_SHARING) 415 call0 _xtensa_hifi_save /* Save HiFi registers */ 416 #endif 417 418 call0 xtensa_save_high_regs 419 420 l32i a2, a1, 0 421 l32i a2, a2, ___xtensa_irq_bsa_t_scratch_OFFSET 422 423 #if XCHAL_HAVE_THREADPTR && defined(CONFIG_USERSPACE) 424 /* Clear up the threadptr because it is used 425 * to check if a thread is runnig on user mode. Since 426 * we are in a interruption we don't want the system 427 * thinking it is possbly running in user mode. 428 */ 429 movi.n a0, 0 430 wur.THREADPTR a0 431 #endif /* XCHAL_HAVE_THREADPTR && CONFIG_USERSPACE */ 432 433 #ifdef CONFIG_XTENSA_INTERRUPT_NONPREEMPTABLE 434 435 /* Setting the interrupt mask to the max non-debug level 436 * to prevent lower priority interrupts being preempted by 437 * high level interrupts until processing of that lower level 438 * interrupt has completed. 439 */ 440 rsr.ps a0 441 movi a3, ~(PS_INTLEVEL_MASK) 442 and a0, a0, a3 443 movi a3, PS_INTLEVEL(ZSR_RFI_LEVEL) 444 or a0, a0, a3 445 wsr.ps a0 446 447 #else 448 449 /* There's a gotcha with level 1 handlers: the INTLEVEL field 450 * gets left at zero and not set like high priority interrupts 451 * do. That works fine for exceptions, but for L1 interrupts, 452 * when we unmask EXCM below, the CPU will just fire the 453 * interrupt again and get stuck in a loop blasting save 454 * frames down the stack to the bottom of memory. It would be 455 * good to put this code into the L1 handler only, but there's 456 * not enough room in the vector without some work there to 457 * squash it some. Next choice would be to make this a macro 458 * argument and expand two versions of this handler. An 459 * optimization FIXME, I guess. 460 */ 461 rsr.ps a0 462 movi a3, PS_INTLEVEL_MASK 463 and a0, a0, a3 464 bnez a0, _not_l1 465 rsr.ps a0 466 movi a3, PS_INTLEVEL(1) 467 or a0, a0, a3 468 wsr.ps a0 469 470 _not_l1: 471 #endif /* CONFIG_XTENSA_INTERRUPT_NONPREEMPTABLE */ 472 473 /* Setting up the cross stack call below has states where the 474 * resulting frames are invalid/non-reentrant, so we can't 475 * allow nested interrupts. But we do need EXCM unmasked, as 476 * we use CALL/ENTRY instructions in the process and need to 477 * handle exceptions to spill caller/interruptee frames. Use 478 * PS.INTLEVEL at maximum to mask all interrupts and stash the 479 * current value in our designated EPS register (which is 480 * guaranteed unused across the call) 481 */ 482 rsil a0, 0xf 483 484 /* Since we are unmasking EXCM, we need to set RING bits to kernel 485 * mode, otherwise we won't be able to run the exception handler in C. 486 */ 487 movi a3, ~(PS_EXCM_MASK) & ~(PS_RING_MASK) 488 and a0, a0, a3 489 wsr.ZSR_EPS a0 490 wsr.ps a0 491 rsync 492 493 /* A1 already contains our saved stack, and A2 our handler. 494 * So all that's needed for CROSS_STACK_CALL is to put the 495 * "new" stack into A3. This can be either a copy of A1 or an 496 * entirely new area depending on whether we find a 1 in our 497 * SR[off] macro argument. 498 */ 499 rsr.ZSR_CPU a3 500 l32i a0, a3, \NEST_OFF 501 beqz a0, _switch_stacks_\@ 502 503 /* Use the same stack, just copy A1 to A3 after incrementing NEST */ 504 addi a0, a0, 1 505 s32i a0, a3, \NEST_OFF 506 mov a3, a1 507 j _do_call_\@ 508 509 _switch_stacks_\@: 510 addi a0, a0, 1 511 s32i a0, a3, \NEST_OFF 512 l32i a3, a3, \INTSTACK_OFF 513 514 _do_call_\@: 515 CROSS_STACK_CALL 516 517 /* Mask interrupts (which have been unmasked during the handler 518 * execution) while we muck with the windows and decrement the nested 519 * count. The restore will unmask them correctly. 520 */ 521 rsil a0, XCHAL_NUM_INTLEVELS 522 523 /* Decrement nest count */ 524 rsr.ZSR_CPU a3 525 l32i a0, a3, \NEST_OFF 526 addi a0, a0, -1 527 s32i a0, a3, \NEST_OFF 528 529 /* Last trick: the called function returned the "next" handle 530 * to restore to in A6 (the call4'd function's A2). If this 531 * is not the same handle as we started with, we need to do a 532 * register spill before restoring, for obvious reasons. 533 * Remember to restore the A1 stack pointer as it existed at 534 * interrupt time so the caller of the interrupted function 535 * spills to the right place. 536 */ 537 beq a6, a1, _restore_\@ 538 539 #ifndef CONFIG_USERSPACE 540 l32i a1, a1, 0 541 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 542 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 543 #ifndef CONFIG_KERNEL_COHERENCE 544 /* When using coherence, the registers of the interrupted 545 * context got spilled upstream in arch_cohere_stacks() 546 */ 547 SPILL_ALL_WINDOWS 548 #endif 549 550 /* Restore A1 stack pointer from "next" handle. */ 551 mov a1, a6 552 #else 553 /* With userspace, we cannot simply restore A1 stack pointer 554 * at this pointer because we need to swap page tables to 555 * the incoming thread, and we do not want to call that 556 * function with thread's stack. So we stash the new stack 557 * pointer into A2 first, then move it to A1 after we have 558 * swapped the page table. 559 */ 560 mov a2, a6 561 562 /* Need to switch page tables because the "next" handle 563 * returned above is not the same handle as we started 564 * with. This means we are being restored to another 565 * thread. 566 */ 567 rsr a6, ZSR_CPU 568 l32i a6, a6, ___cpu_t_current_OFFSET 569 570 #ifdef CONFIG_XTENSA_MMU 571 call4 xtensa_swap_update_page_tables 572 #endif 573 #ifdef CONFIG_XTENSA_MPU 574 call4 xtensa_mpu_map_write 575 #endif 576 l32i a1, a1, 0 577 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 578 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 579 580 SPILL_ALL_WINDOWS 581 582 /* Moved stashed stack pointer to A1 to restore stack. */ 583 mov a1, a2 584 #endif 585 586 _restore_\@: 587 j _restore_context 588 .endm 589 590 /* Defines an exception/interrupt vector for a specified level. Saves 591 * off the interrupted A0-A3 registers and the per-level PS/PC 592 * registers to the stack before jumping to a handler (defined with 593 * EXCINT_HANDLER) to do the rest of the work. 594 * 595 * Arguments are a numeric interrupt level and symbol names for the 596 * entry code (defined via EXCINT_HANDLER) and a C handler for this 597 * particular level. 598 * 599 * Note that the linker sections for some levels get special names for 600 * no particularly good reason. Only level 1 has any code generation 601 * difference, because it is the legacy exception level that predates 602 * the EPS/EPC registers. It also lives in the "iram0.text" segment 603 * (which is linked immediately after the vectors) so that an assembly 604 * stub can be loaded into the vector area instead and reach this code 605 * with a simple jump instruction. 606 */ 607 .macro DEF_EXCINT LVL, ENTRY_SYM, C_HANDLER_SYM 608 #if defined(CONFIG_XTENSA_SMALL_VECTOR_TABLE_ENTRY) 609 .pushsection .iram.text, "ax" 610 .global _Level\LVL\()VectorHelper 611 _Level\LVL\()VectorHelper : 612 #else 613 .if \LVL == 1 614 .pushsection .iram0.text, "ax" 615 .elseif \LVL == XCHAL_DEBUGLEVEL 616 .pushsection .DebugExceptionVector.text, "ax" 617 .elseif \LVL == XCHAL_NMILEVEL 618 .pushsection .NMIExceptionVector.text, "ax" 619 .else 620 .pushsection .Level\LVL\()InterruptVector.text, "ax" 621 .endif 622 .global _Level\LVL\()Vector 623 _Level\LVL\()Vector: 624 #endif 625 626 #ifdef CONFIG_XTENSA_MMU 627 .if \LVL == 1 628 /* If there are any TLB misses during interrupt handling, 629 * the user/kernel/double exception vector will be triggered 630 * to handle these misses. This results in DEPC and EXCCAUSE 631 * being overwritten, and then execution returned back to 632 * this site of TLB misses. When it gets to the C handler, 633 * it will not see the original cause. So stash 634 * the EXCCAUSE here so C handler can see the original cause. 635 * 636 * For double exception, DEPC in saved in earlier vector 637 * code. 638 */ 639 wsr a0, ZSR_EXCCAUSE_SAVE 640 641 esync 642 643 rsr a0, ZSR_DEPC_SAVE 644 beqz a0, _not_triple_fault 645 646 /* If stashed DEPC is not zero, we have started servicing 647 * a double exception and yet we are here because there is 648 * another exception (through user/kernel if PS.EXCM is 649 * cleared, or through double if PS.EXCM is set). This can 650 * be considered triple fault. Although there is no triple 651 * faults on Xtensa. Once PS.EXCM is set, it keeps going 652 * through double exception vector for any new exceptions. 653 * However, our exception code needs to unmask PS.EXCM to 654 * enable register window operations. So after that, any 655 * new exceptions will go through the kernel or user vectors 656 * depending on PS.UM. If there is continuous faults, it may 657 * keep ping-ponging between double and kernel/user exception 658 * vectors that may never get resolved. Since we stash DEPC 659 * during double exception, and the stashed one is only cleared 660 * once the double exception has been processed, we can use 661 * the stashed DEPC value to detect if the next exception could 662 * be considered a triple fault. If such a case exists, simply 663 * jump to an infinite loop, or quit the simulator, or invoke 664 * debugger. 665 */ 666 rsr a0, ZSR_EXCCAUSE_SAVE 667 j _TripleFault 668 669 _not_triple_fault: 670 rsr.exccause a0 671 672 xsr a0, ZSR_EXCCAUSE_SAVE 673 674 esync 675 .endif 676 #endif 677 678 addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF 679 s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 680 s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET 681 s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET 682 683 /* Level "1" is the exception handler, which uses a different 684 * calling convention. No special register holds the 685 * interrupted PS, instead we just assume that the CPU has 686 * turned on the EXCM bit and set INTLEVEL. 687 */ 688 .if \LVL == 1 689 rsr.ps a0 690 #ifdef CONFIG_XTENSA_MMU 691 /* TLB misses also come through level 1 interrupts. 692 * We do not want to unconditionally unmask interrupts. 693 * Execution continues after a TLB miss is handled, 694 * and we need to preserve the interrupt mask. 695 * The interrupt mask will be cleared for non-TLB-misses 696 * level 1 interrupt later in the handler code. 697 */ 698 movi a2, ~PS_EXCM_MASK 699 #else 700 movi a2, ~(PS_EXCM_MASK | PS_INTLEVEL_MASK) 701 #endif 702 and a0, a0, a2 703 s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 704 .else 705 rsr.eps\LVL a0 706 s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 707 .endif 708 709 rsr.epc\LVL a0 710 s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET 711 712 /* What's happening with this jump is that the L32R 713 * instruction to load a full 32 bit immediate must use an 714 * offset that is negative from PC. Normally the assembler 715 * fixes this up for you by putting the "literal pool" 716 * somewhere at the start of the section. But vectors start 717 * at a fixed address in their own section, and don't (in our 718 * current linker setup) have anywhere "definitely before 719 * vectors" to place immediates. Some platforms and apps will 720 * link by dumb luck, others won't. We add an extra jump just 721 * to clear space we know to be legal. 722 * 723 * The right way to fix this would be to use a "literal_prefix" 724 * to put the literals into a per-vector section, then link 725 * that section into the PREVIOUS vector's area right after 726 * the vector code. Requires touching a lot of linker scripts 727 * though. 728 */ 729 j _after_imms\LVL\() 730 .align 4 731 _handle_excint_imm\LVL: 732 .word \ENTRY_SYM 733 _c_handler_imm\LVL: 734 .word \C_HANDLER_SYM 735 _after_imms\LVL: 736 l32r a2, _c_handler_imm\LVL 737 l32r a0, _handle_excint_imm\LVL 738 jx a0 739 .popsection 740 741 #if defined(CONFIG_XTENSA_SMALL_VECTOR_TABLE_ENTRY) 742 .if \LVL == 1 743 .pushsection .iram0.text, "ax" 744 .elseif \LVL == XCHAL_DEBUGLEVEL 745 .pushsection .DebugExceptionVector.text, "ax" 746 .elseif \LVL == XCHAL_NMILEVEL 747 .pushsection .NMIExceptionVector.text, "ax" 748 .else 749 .pushsection .Level\LVL\()InterruptVector.text, "ax" 750 .endif 751 .global _Level\LVL\()Vector 752 _Level\LVL\()Vector : 753 j _Level\LVL\()VectorHelper 754 .popsection 755 #endif 756 757 .endm 758 759 #endif /* ZEPHYR_ARCH_XTENSA_INCLUDE_XTENSA_ASM2_S_H */ 760