1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6 7 #ifndef ZEPHYR_ARCH_XTENSA_INCLUDE_XTENSA_ASM2_S_H 8 #define ZEPHYR_ARCH_XTENSA_INCLUDE_XTENSA_ASM2_S_H 9 10 #include <zephyr/zsr.h> 11 #include "xtensa_asm2_context.h" 12 13 #include <zephyr/offsets.h> 14 15 /* Assembler header! This file contains macros designed to be included 16 * only by the assembler. 17 */ 18 19 #if defined(CONFIG_XTENSA_HIFI_SHARING) 20 .extern _xtensa_hifi_save 21 #endif 22 23 /* 24 * SPILL_ALL_WINDOWS 25 * 26 * Spills all windowed registers (i.e. registers not visible as 27 * A0-A15) to their ABI-defined spill regions on the stack. 28 * 29 * Unlike the Xtensa HAL implementation, this code requires that the 30 * EXCM and WOE bit be enabled in PS, and relies on repeated hardware 31 * exception handling to do the register spills. The trick is to do a 32 * noop write to the high registers, which the hardware will trap 33 * (into an overflow exception) in the case where those registers are 34 * already used by an existing call frame. Then it rotates the window 35 * and repeats until all but the A0-A3 registers of the original frame 36 * are guaranteed to be spilled, eventually rotating back around into 37 * the original frame. Advantages: 38 * 39 * - Vastly smaller code size 40 * 41 * - More easily maintained if changes are needed to window over/underflow 42 * exception handling. 43 * 44 * - Requires no scratch registers to do its work, so can be used safely in any 45 * context. 46 * 47 * - If the WOE bit is not enabled (for example, in code written for 48 * the CALL0 ABI), this becomes a silent noop and operates compatibly. 49 * 50 * - In memory protection situations, this relies on the existing 51 * exception handlers (and thus their use of the L/S32E 52 * instructions) to execute stores in the protected space. AFAICT, 53 * the HAL routine does not handle this situation and isn't safe: it 54 * will happily write through the "stack pointers" found in 55 * registers regardless of where they might point. 56 * 57 * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not 58 * just a little bit, it's MUCH faster. With a mostly full register 59 * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill 60 * registers with this vs. 279 (!) to do it with 61 * xthal_spill_windows(). Apparently Xtensa exception handling is 62 * really fast, and no one told their software people. 63 * 64 * Note that as with the Xtensa HAL spill routine, and unlike context 65 * switching code on most sane architectures, the intermediate states 66 * here will have an invalid stack pointer. That means that this code 67 * must not be preempted in any context (i.e. all Zephyr situations) 68 * where the interrupt code will need to use the stack to save the 69 * context. But unlike the HAL, which runs with exceptions masked via 70 * EXCM, this will not: hit needs the overflow handlers unmasked. Use 71 * INTLEVEL instead (which, happily, is what Zephyr's locking does 72 * anyway). 73 */ 74 .macro SPILL_ALL_WINDOWS 75 #if XCHAL_NUM_AREGS == 64 76 and a12, a12, a12 77 rotw 3 78 and a12, a12, a12 79 rotw 3 80 and a12, a12, a12 81 rotw 3 82 and a12, a12, a12 83 rotw 3 84 and a12, a12, a12 85 rotw 4 86 #elif XCHAL_NUM_AREGS == 32 87 and a12, a12, a12 88 rotw 3 89 and a12, a12, a12 90 rotw 3 91 and a4, a4, a4 92 rotw 2 93 #else 94 #error Unrecognized XCHAL_NUM_AREGS 95 #endif 96 .endm 97 98 #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 99 /* 100 * FPU_REG_SAVE 101 * 102 * Saves the Float Point Unit context registers in the base save 103 * area pointed to by the current stack pointer A1. The Floating-Point 104 * Coprocessor Option adds the FR register file and two User Registers 105 * called FCR and FSR.The FR register file consists of 16 registers of 106 * 32 bits each and is used for all data computation. 107 */ 108 .macro FPU_REG_SAVE 109 rur.fcr a0 110 s32i a0, a1, ___xtensa_irq_bsa_t_fcr_OFFSET 111 rur.fsr a0 112 s32i a0, a1, ___xtensa_irq_bsa_t_fsr_OFFSET 113 ssi f0, a1, ___xtensa_irq_bsa_t_fpu0_OFFSET 114 ssi f1, a1, ___xtensa_irq_bsa_t_fpu1_OFFSET 115 ssi f2, a1, ___xtensa_irq_bsa_t_fpu2_OFFSET 116 ssi f3, a1, ___xtensa_irq_bsa_t_fpu3_OFFSET 117 ssi f4, a1, ___xtensa_irq_bsa_t_fpu4_OFFSET 118 ssi f5, a1, ___xtensa_irq_bsa_t_fpu5_OFFSET 119 ssi f6, a1, ___xtensa_irq_bsa_t_fpu6_OFFSET 120 ssi f7, a1, ___xtensa_irq_bsa_t_fpu7_OFFSET 121 ssi f8, a1, ___xtensa_irq_bsa_t_fpu8_OFFSET 122 ssi f9, a1, ___xtensa_irq_bsa_t_fpu9_OFFSET 123 ssi f10, a1, ___xtensa_irq_bsa_t_fpu10_OFFSET 124 ssi f11, a1, ___xtensa_irq_bsa_t_fpu11_OFFSET 125 ssi f12, a1, ___xtensa_irq_bsa_t_fpu12_OFFSET 126 ssi f13, a1, ___xtensa_irq_bsa_t_fpu13_OFFSET 127 ssi f14, a1, ___xtensa_irq_bsa_t_fpu14_OFFSET 128 ssi f15, a1, ___xtensa_irq_bsa_t_fpu15_OFFSET 129 .endm 130 131 .macro FPU_REG_RESTORE 132 l32i.n a0, a1, ___xtensa_irq_bsa_t_fcr_OFFSET 133 wur.fcr a0 134 l32i.n a0, a1, ___xtensa_irq_bsa_t_fsr_OFFSET 135 wur.fsr a0 136 lsi f0, a1, ___xtensa_irq_bsa_t_fpu0_OFFSET 137 lsi f1, a1, ___xtensa_irq_bsa_t_fpu1_OFFSET 138 lsi f2, a1, ___xtensa_irq_bsa_t_fpu2_OFFSET 139 lsi f3, a1, ___xtensa_irq_bsa_t_fpu3_OFFSET 140 lsi f4, a1, ___xtensa_irq_bsa_t_fpu4_OFFSET 141 lsi f5, a1, ___xtensa_irq_bsa_t_fpu5_OFFSET 142 lsi f6, a1, ___xtensa_irq_bsa_t_fpu6_OFFSET 143 lsi f7, a1, ___xtensa_irq_bsa_t_fpu7_OFFSET 144 lsi f8, a1, ___xtensa_irq_bsa_t_fpu8_OFFSET 145 lsi f9, a1, ___xtensa_irq_bsa_t_fpu9_OFFSET 146 lsi f10, a1, ___xtensa_irq_bsa_t_fpu10_OFFSET 147 lsi f11, a1, ___xtensa_irq_bsa_t_fpu11_OFFSET 148 lsi f12, a1, ___xtensa_irq_bsa_t_fpu12_OFFSET 149 lsi f13, a1, ___xtensa_irq_bsa_t_fpu13_OFFSET 150 lsi f14, a1, ___xtensa_irq_bsa_t_fpu14_OFFSET 151 lsi f15, a1, ___xtensa_irq_bsa_t_fpu15_OFFSET 152 .endm 153 #endif 154 155 /* 156 * ODD_REG_SAVE 157 * 158 * Stashes the oddball shift/loop context registers in the base save 159 * area pointed to by the current stack pointer. On exit, A0 will 160 * have been modified but A2/A3 have not, and the shift/loop 161 * instructions can be used freely (though note loops don't work in 162 * exceptions for other reasons!). 163 * 164 * Does not populate or modify the PS/PC save locations. 165 */ 166 .macro ODD_REG_SAVE 167 rsr.sar a0 168 s32i a0, a1, ___xtensa_irq_bsa_t_sar_OFFSET 169 #if XCHAL_HAVE_LOOPS 170 rsr.lbeg a0 171 s32i a0, a1, ___xtensa_irq_bsa_t_lbeg_OFFSET 172 rsr.lend a0 173 s32i a0, a1, ___xtensa_irq_bsa_t_lend_OFFSET 174 rsr.lcount a0 175 s32i a0, a1, ___xtensa_irq_bsa_t_lcount_OFFSET 176 #endif 177 rsr.exccause a0 178 s32i a0, a1, ___xtensa_irq_bsa_t_exccause_OFFSET 179 #if XCHAL_HAVE_S32C1I 180 rsr.scompare1 a0 181 s32i a0, a1, ___xtensa_irq_bsa_t_scompare1_OFFSET 182 #endif 183 #if XCHAL_HAVE_THREADPTR && \ 184 (defined(CONFIG_USERSPACE) || defined(CONFIG_THREAD_LOCAL_STORAGE)) 185 rur.THREADPTR a0 186 s32i a0, a1, ___xtensa_irq_bsa_t_threadptr_OFFSET 187 #endif 188 #if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 189 FPU_REG_SAVE 190 #endif 191 192 .endm 193 194 #ifdef CONFIG_XTENSA_MMU 195 /* 196 * CALC_PTEVADDR_BASE 197 * 198 * This calculates the virtual address of the first PTE page 199 * (PTEVADDR base, the one mapping 0x00000000) so that we can 200 * use this to obtain the virtual address of the PTE page we are 201 * interested in. This can be obtained via 202 * (1 << CONFIG_XTENSA_MMU_PTEVADDR_SHIFT). 203 * 204 * Note that this is done this way is to avoid any TLB 205 * miss if we are to use l32r to load the PTEVADDR base. 206 * If the page containing the PTEVADDR base address is 207 * not in TLB, we will need to handle the TLB miss which 208 * we are trying to avoid here. 209 * 210 * @param ADDR_REG Register to store the calculated 211 * PTEVADDR base address. 212 * 213 * @note The content of ADDR_REG will be modified. 214 * Save and restore it around this macro usage. 215 */ 216 .macro CALC_PTEVADDR_BASE ADDR_REG 217 movi \ADDR_REG, 1 218 slli \ADDR_REG, \ADDR_REG, CONFIG_XTENSA_MMU_PTEVADDR_SHIFT 219 .endm 220 221 /* 222 * PRELOAD_PTEVADDR 223 * 224 * This preloads the page table entries for a 4MB region to avoid TLB 225 * misses. This 4MB region is mapped via a page (4KB) of page table 226 * entries (PTE). Each entry is 4 bytes mapping a 4KB region. Each page, 227 * then, has 1024 entries mapping a 4MB region. Filling TLB entries is 228 * automatically done via hardware, as long as the PTE page associated 229 * with a particular address is also in TLB. If the PTE page is not in 230 * TLB, an exception will be raised that must be handled. This TLB miss 231 * is problematic when we are in the middle of dealing with another 232 * exception or handling an interrupt. So we need to put the PTE page 233 * into TLB by simply do a load operation. 234 * 235 * @param ADDR_REG Register containing the target address 236 * @param PTEVADDR_BASE_REG Register containing the PTEVADDR base 237 * 238 * @note Both the content of ADDR_REG will be modified. 239 * Save and restore it around this macro usage. 240 */ 241 .macro PRELOAD_PTEVADDR ADDR_REG, PTEVADDR_BASE_REG 242 /* 243 * Calculate the offset to first PTE page of all memory. 244 * 245 * Every page (4KB) of page table entries contains 246 * 1024 entires (as each entry is 4 bytes). Each entry 247 * maps one 4KB page. So one page of entries maps 4MB of 248 * memory. 249 * 250 * 1. We need to find the virtual address of the PTE page 251 * having the page table entry mapping the address in 252 * register ADDR_REG. To do this, we first need to find 253 * the offset of this PTE page from the first PTE page 254 * (the one mapping memory 0x00000000): 255 * a. Find the beginning address of the 4KB page 256 * containing address in ADDR_REG. This can simply 257 * be done by discarding 11 bits (or shifting right 258 * and then left 12 bits). 259 * b. Since each PTE page contains 1024 entries, 260 * we divide the address obtained in step (a) by 261 * further dividing it by 1024 (shifting right and 262 * then left 10 bits) to obtain the offset of 263 * the PTE page. 264 * 265 * Step (a) and (b) can be obtained together so that 266 * we can shift right 22 bits, and then shift left 267 * 12 bits. 268 * 269 * 2. Once we have combine the results from step (1) and 270 * PTEVADDR_BASE_REG to get the virtual address of 271 * the PTE page. 272 * 273 * 3. Do a l32i to force the PTE page to be in TLB. 274 */ 275 276 /* Step 1 */ 277 srli \ADDR_REG, \ADDR_REG, 22 278 slli \ADDR_REG, \ADDR_REG, 12 279 280 /* Step 2 */ 281 add \ADDR_REG, \ADDR_REG, \PTEVADDR_BASE_REG 282 283 /* Step 3 */ 284 l32i \ADDR_REG, \ADDR_REG, 0 285 .endm 286 #endif /* CONFIG_XTENSA_MMU */ 287 288 /* 289 * CROSS_STACK_CALL 290 * 291 * Sets the stack up carefully such that a "cross stack" call can spill 292 * correctly, then invokes an immediate handler. Note that: 293 * 294 * 0. When spilling a frame, functions find their callEE's stack pointer 295 * (to save A0-A3) from registers. But they find their 296 * already-spilled callER's stack pointer (to save higher GPRs) from 297 * their own stack memory. 298 * 299 * 1. The function that was interrupted ("interruptee") does not need to 300 * be spilled, because it already has been as part of the context 301 * save. So it doesn't need registers allocated for it anywhere. 302 * 303 * 2. Interruptee's caller needs to spill into the space below the 304 * interrupted stack frame, which means that the A1 register it finds 305 * below it needs to contain the old/interrupted stack and not the 306 * context saved one. 307 * 308 * 3. The ISR dispatcher (called "underneath" interruptee) needs to spill 309 * high registers into the space immediately above its own stack frame, 310 * so it needs to find a caller with the "new" stack pointer instead. 311 * 312 * We make this work by inserting TWO 4-register frames between 313 * "interruptee's caller" and "ISR dispatcher". The top one (which 314 * occupies the slot formerly held by "interruptee", whose registers 315 * were saved via external means) holds the "interrupted A1" and the 316 * bottom has the "top of the interrupt stack" which can be either the 317 * word above a new memory area (when handling an interrupt from user 318 * mode) OR the existing "post-context-save" stack pointer (when 319 * handling a nested interrupt). The code works either way. Because 320 * these are both only 4-registers, neither needs its own caller for 321 * spilling. 322 * 323 * The net cost is 32 wasted bytes on the interrupt stack frame to 324 * spill our two "phantom frames" (actually not quite, as we'd need a 325 * few of those words used somewhere for tracking the stack pointers 326 * anyway). But the benefit is that NO REGISTER FRAMES NEED TO BE 327 * SPILLED on interrupt entry. And if we return back into the same 328 * context we interrupted (a common case) no windows need to be 329 * explicitly spilled at all. And in fact in the case where the ISR 330 * uses significant depth on its own stack, the interrupted frames 331 * will be spilled naturally as a standard cost of a function call, 332 * giving register windows something like "zero cost interrupts". 333 * 334 * FIXME: a terrible awful really nifty idea to fix the stack waste 335 * problem would be to use a SINGLE frame between the two stacks, 336 * pre-spill it with one stack pointer for the "lower" call to see and 337 * leave the register SP in place for the "upper" frame to use. 338 * Would require modifying the Window{Over|Under}flow4 exceptions to 339 * know not to spill/fill these special frames, but that's not too 340 * hard, maybe... 341 * 342 * Enter this macro with a valid "context saved" pointer (i.e. SP 343 * should point to a stored pointer which points to one BSA below the 344 * interrupted/old stack) in A1, a handler function in A2, and a "new" 345 * stack pointer (i.e. a pointer to the word ABOVE the allocated stack 346 * area) in A3. Exceptions should be enabled via PS.EXCM, but 347 * PS.INTLEVEL must (!) be set such that no nested interrupts can 348 * arrive (we restore the natural INTLEVEL from the value in ZSR_EPS 349 * just before entering the call). On return A0/1 will be unchanged, 350 * A2 has the return value of the called function, and A3 is 351 * clobbered. A4-A15 become part of called frames and MUST NOT BE IN 352 * USE by the code that expands this macro. The called function gets 353 * the context save handle in A1 as it's first argument. 354 */ 355 .macro CROSS_STACK_CALL 356 mov a6, a3 /* place "new sp" in the next frame's A2 */ 357 mov a10, a1 /* pass "context handle" in 2nd frame's A2 */ 358 mov a3, a1 /* stash it locally in A3 too */ 359 mov a11, a2 /* handler in 2nd frame's A3, next frame's A7 */ 360 361 /* Recover the interrupted SP from the BSA */ 362 l32i a1, a1, 0 363 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 364 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 365 366 call4 _xstack_call0_\@ 367 mov a1, a3 /* restore original SP */ 368 mov a2, a6 /* copy return value */ 369 j _xstack_returned_\@ 370 .align 4 371 _xstack_call0_\@: 372 /* We want an ENTRY to set a bit in windowstart and do the 373 * rotation, but we want our own SP. After that, we are 374 * running in a valid frame, so re-enable interrupts. 375 */ 376 entry a1, 16 377 mov a1, a2 378 rsr.ZSR_EPS a2 379 wsr.ps a2 380 call4 _xstack_call1_\@ 381 mov a2, a6 /* copy return value */ 382 retw 383 .align 4 384 _xstack_call1_\@: 385 /* Remember the handler is going to do our ENTRY, so the 386 * handler pointer is still in A6 (not A2) even though this is 387 * after the second CALL4. 388 */ 389 jx a7 390 _xstack_returned_\@: 391 .endm 392 393 /* Entry setup for all exceptions and interrupts. Arrive here with 394 * the stack pointer decremented across a base save area, A0-A3 and 395 * PS/PC already spilled to the stack in the BSA, and A2 containing a 396 * level-specific C handler function. 397 * 398 * This is a macro (to allow for unit testing) that expands to a 399 * handler body to which the vectors can jump. It takes two static 400 * (!) arguments: a special register name (which should be set up to 401 * point to some kind of per-CPU record struct) and offsets within 402 * that struct which contains an interrupt stack top and a "nest 403 * count" word. 404 */ 405 .macro EXCINT_HANDLER NEST_OFF, INTSTACK_OFF 406 /* A2 contains our handler function which will get clobbered 407 * by the save. Stash it into the unused "a1" slot in the 408 * BSA and recover it immediately after. Kind of a hack. 409 */ 410 s32i a2, a1, ___xtensa_irq_bsa_t_scratch_OFFSET 411 412 ODD_REG_SAVE 413 414 #if defined(CONFIG_XTENSA_HIFI_SHARING) 415 call0 _xtensa_hifi_save /* Save HiFi registers */ 416 #endif 417 418 call0 xtensa_save_high_regs 419 420 l32i a2, a1, 0 421 l32i a2, a2, ___xtensa_irq_bsa_t_scratch_OFFSET 422 423 #if XCHAL_HAVE_THREADPTR && defined(CONFIG_USERSPACE) 424 /* Clear up the threadptr because it is used 425 * to check if a thread is runnig on user mode. Since 426 * we are in a interruption we don't want the system 427 * thinking it is possbly running in user mode. 428 */ 429 movi.n a0, 0 430 wur.THREADPTR a0 431 #endif /* XCHAL_HAVE_THREADPTR && CONFIG_USERSPACE */ 432 433 /* There's a gotcha with level 1 handlers: the INTLEVEL field 434 * gets left at zero and not set like high priority interrupts 435 * do. That works fine for exceptions, but for L1 interrupts, 436 * when we unmask EXCM below, the CPU will just fire the 437 * interrupt again and get stuck in a loop blasting save 438 * frames down the stack to the bottom of memory. It would be 439 * good to put this code into the L1 handler only, but there's 440 * not enough room in the vector without some work there to 441 * squash it some. Next choice would be to make this a macro 442 * argument and expand two versions of this handler. An 443 * optimization FIXME, I guess. 444 */ 445 rsr.ps a0 446 movi a3, PS_INTLEVEL_MASK 447 and a0, a0, a3 448 bnez a0, _not_l1 449 rsr.ps a0 450 movi a3, PS_INTLEVEL(1) 451 or a0, a0, a3 452 wsr.ps a0 453 _not_l1: 454 455 /* Setting up the cross stack call below has states where the 456 * resulting frames are invalid/non-reentrant, so we can't 457 * allow nested interrupts. But we do need EXCM unmasked, as 458 * we use CALL/ENTRY instructions in the process and need to 459 * handle exceptions to spill caller/interruptee frames. Use 460 * PS.INTLEVEL at maximum to mask all interrupts and stash the 461 * current value in our designated EPS register (which is 462 * guaranteed unused across the call) 463 */ 464 rsil a0, 0xf 465 466 /* Since we are unmasking EXCM, we need to set RING bits to kernel 467 * mode, otherwise we won't be able to run the exception handler in C. 468 */ 469 movi a3, ~(PS_EXCM_MASK) & ~(PS_RING_MASK) 470 and a0, a0, a3 471 wsr.ZSR_EPS a0 472 wsr.ps a0 473 rsync 474 475 /* A1 already contains our saved stack, and A2 our handler. 476 * So all that's needed for CROSS_STACK_CALL is to put the 477 * "new" stack into A3. This can be either a copy of A1 or an 478 * entirely new area depending on whether we find a 1 in our 479 * SR[off] macro argument. 480 */ 481 rsr.ZSR_CPU a3 482 l32i a0, a3, \NEST_OFF 483 beqz a0, _switch_stacks_\@ 484 485 /* Use the same stack, just copy A1 to A3 after incrementing NEST */ 486 addi a0, a0, 1 487 s32i a0, a3, \NEST_OFF 488 mov a3, a1 489 j _do_call_\@ 490 491 _switch_stacks_\@: 492 addi a0, a0, 1 493 s32i a0, a3, \NEST_OFF 494 l32i a3, a3, \INTSTACK_OFF 495 496 _do_call_\@: 497 CROSS_STACK_CALL 498 499 /* Mask interrupts (which have been unmasked during the handler 500 * execution) while we muck with the windows and decrement the nested 501 * count. The restore will unmask them correctly. 502 */ 503 rsil a0, XCHAL_NUM_INTLEVELS 504 505 /* Decrement nest count */ 506 rsr.ZSR_CPU a3 507 l32i a0, a3, \NEST_OFF 508 addi a0, a0, -1 509 s32i a0, a3, \NEST_OFF 510 511 /* Last trick: the called function returned the "next" handle 512 * to restore to in A6 (the call4'd function's A2). If this 513 * is not the same handle as we started with, we need to do a 514 * register spill before restoring, for obvious reasons. 515 * Remember to restore the A1 stack pointer as it existed at 516 * interrupt time so the caller of the interrupted function 517 * spills to the right place. 518 */ 519 beq a6, a1, _restore_\@ 520 521 #ifndef CONFIG_USERSPACE 522 l32i a1, a1, 0 523 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 524 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 525 #ifndef CONFIG_KERNEL_COHERENCE 526 /* When using coherence, the registers of the interrupted 527 * context got spilled upstream in arch_cohere_stacks() 528 */ 529 SPILL_ALL_WINDOWS 530 #endif 531 532 /* Restore A1 stack pointer from "next" handle. */ 533 mov a1, a6 534 #else 535 /* With userspace, we cannot simply restore A1 stack pointer 536 * at this pointer because we need to swap page tables to 537 * the incoming thread, and we do not want to call that 538 * function with thread's stack. So we stash the new stack 539 * pointer into A2 first, then move it to A1 after we have 540 * swapped the page table. 541 */ 542 mov a2, a6 543 544 /* Need to switch page tables because the "next" handle 545 * returned above is not the same handle as we started 546 * with. This means we are being restored to another 547 * thread. 548 */ 549 rsr a6, ZSR_CPU 550 l32i a6, a6, ___cpu_t_current_OFFSET 551 552 #ifdef CONFIG_XTENSA_MMU 553 call4 xtensa_swap_update_page_tables 554 #endif 555 #ifdef CONFIG_XTENSA_MPU 556 call4 xtensa_mpu_map_write 557 #endif 558 l32i a1, a1, 0 559 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 560 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 561 562 SPILL_ALL_WINDOWS 563 564 /* Moved stashed stack pointer to A1 to restore stack. */ 565 mov a1, a2 566 #endif 567 568 _restore_\@: 569 j _restore_context 570 .endm 571 572 /* Defines an exception/interrupt vector for a specified level. Saves 573 * off the interrupted A0-A3 registers and the per-level PS/PC 574 * registers to the stack before jumping to a handler (defined with 575 * EXCINT_HANDLER) to do the rest of the work. 576 * 577 * Arguments are a numeric interrupt level and symbol names for the 578 * entry code (defined via EXCINT_HANDLER) and a C handler for this 579 * particular level. 580 * 581 * Note that the linker sections for some levels get special names for 582 * no particularly good reason. Only level 1 has any code generation 583 * difference, because it is the legacy exception level that predates 584 * the EPS/EPC registers. It also lives in the "iram0.text" segment 585 * (which is linked immediately after the vectors) so that an assembly 586 * stub can be loaded into the vector area instead and reach this code 587 * with a simple jump instruction. 588 */ 589 .macro DEF_EXCINT LVL, ENTRY_SYM, C_HANDLER_SYM 590 #if defined(CONFIG_XTENSA_SMALL_VECTOR_TABLE_ENTRY) 591 .pushsection .iram.text, "ax" 592 .global _Level\LVL\()VectorHelper 593 _Level\LVL\()VectorHelper : 594 #else 595 .if \LVL == 1 596 .pushsection .iram0.text, "ax" 597 .elseif \LVL == XCHAL_DEBUGLEVEL 598 .pushsection .DebugExceptionVector.text, "ax" 599 .elseif \LVL == XCHAL_NMILEVEL 600 .pushsection .NMIExceptionVector.text, "ax" 601 .else 602 .pushsection .Level\LVL\()InterruptVector.text, "ax" 603 .endif 604 .global _Level\LVL\()Vector 605 _Level\LVL\()Vector: 606 #endif 607 608 #ifdef CONFIG_XTENSA_MMU 609 .if \LVL == 1 610 /* If there are any TLB misses during interrupt handling, 611 * the user/kernel/double exception vector will be triggered 612 * to handle these misses. This results in DEPC and EXCCAUSE 613 * being overwritten, and then execution returned back to 614 * this site of TLB misses. When it gets to the C handler, 615 * it will not see the original cause. So stash 616 * the EXCCAUSE here so C handler can see the original cause. 617 * 618 * For double exception, DEPC in saved in earlier vector 619 * code. 620 */ 621 wsr a0, ZSR_EXCCAUSE_SAVE 622 623 esync 624 625 rsr a0, ZSR_DEPC_SAVE 626 beqz a0, _not_triple_fault 627 628 /* If stashed DEPC is not zero, we have started servicing 629 * a double exception and yet we are here because there is 630 * another exception (through user/kernel if PS.EXCM is 631 * cleared, or through double if PS.EXCM is set). This can 632 * be considered triple fault. Although there is no triple 633 * faults on Xtensa. Once PS.EXCM is set, it keeps going 634 * through double exception vector for any new exceptions. 635 * However, our exception code needs to unmask PS.EXCM to 636 * enable register window operations. So after that, any 637 * new exceptions will go through the kernel or user vectors 638 * depending on PS.UM. If there is continuous faults, it may 639 * keep ping-ponging between double and kernel/user exception 640 * vectors that may never get resolved. Since we stash DEPC 641 * during double exception, and the stashed one is only cleared 642 * once the double exception has been processed, we can use 643 * the stashed DEPC value to detect if the next exception could 644 * be considered a triple fault. If such a case exists, simply 645 * jump to an infinite loop, or quit the simulator, or invoke 646 * debugger. 647 */ 648 rsr a0, ZSR_EXCCAUSE_SAVE 649 j _TripleFault 650 651 _not_triple_fault: 652 rsr.exccause a0 653 654 xsr a0, ZSR_EXCCAUSE_SAVE 655 656 esync 657 .endif 658 #endif 659 660 addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF 661 s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 662 s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET 663 s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET 664 665 /* Level "1" is the exception handler, which uses a different 666 * calling convention. No special register holds the 667 * interrupted PS, instead we just assume that the CPU has 668 * turned on the EXCM bit and set INTLEVEL. 669 */ 670 .if \LVL == 1 671 rsr.ps a0 672 #ifdef CONFIG_XTENSA_MMU 673 /* TLB misses also come through level 1 interrupts. 674 * We do not want to unconditionally unmask interrupts. 675 * Execution continues after a TLB miss is handled, 676 * and we need to preserve the interrupt mask. 677 * The interrupt mask will be cleared for non-TLB-misses 678 * level 1 interrupt later in the handler code. 679 */ 680 movi a2, ~PS_EXCM_MASK 681 #else 682 movi a2, ~(PS_EXCM_MASK | PS_INTLEVEL_MASK) 683 #endif 684 and a0, a0, a2 685 s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 686 .else 687 rsr.eps\LVL a0 688 s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 689 .endif 690 691 rsr.epc\LVL a0 692 s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET 693 694 /* What's happening with this jump is that the L32R 695 * instruction to load a full 32 bit immediate must use an 696 * offset that is negative from PC. Normally the assembler 697 * fixes this up for you by putting the "literal pool" 698 * somewhere at the start of the section. But vectors start 699 * at a fixed address in their own section, and don't (in our 700 * current linker setup) have anywhere "definitely before 701 * vectors" to place immediates. Some platforms and apps will 702 * link by dumb luck, others won't. We add an extra jump just 703 * to clear space we know to be legal. 704 * 705 * The right way to fix this would be to use a "literal_prefix" 706 * to put the literals into a per-vector section, then link 707 * that section into the PREVIOUS vector's area right after 708 * the vector code. Requires touching a lot of linker scripts 709 * though. 710 */ 711 j _after_imms\LVL\() 712 .align 4 713 _handle_excint_imm\LVL: 714 .word \ENTRY_SYM 715 _c_handler_imm\LVL: 716 .word \C_HANDLER_SYM 717 _after_imms\LVL: 718 l32r a2, _c_handler_imm\LVL 719 l32r a0, _handle_excint_imm\LVL 720 jx a0 721 .popsection 722 723 #if defined(CONFIG_XTENSA_SMALL_VECTOR_TABLE_ENTRY) 724 .if \LVL == 1 725 .pushsection .iram0.text, "ax" 726 .elseif \LVL == XCHAL_DEBUGLEVEL 727 .pushsection .DebugExceptionVector.text, "ax" 728 .elseif \LVL == XCHAL_NMILEVEL 729 .pushsection .NMIExceptionVector.text, "ax" 730 .else 731 .pushsection .Level\LVL\()InterruptVector.text, "ax" 732 .endif 733 .global _Level\LVL\()Vector 734 _Level\LVL\()Vector : 735 j _Level\LVL\()VectorHelper 736 .popsection 737 #endif 738 739 .endm 740 741 #endif /* ZEPHYR_ARCH_XTENSA_INCLUDE_XTENSA_ASM2_S_H */ 742