1/* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6#include <xtensa_asm2_s.h> 7#include <zephyr/offsets.h> 8#include <zephyr/zsr.h> 9 10#if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR) 11#include <xtensa/simcall.h> 12#endif 13 14/* 15 * xtensa_spill_reg_windows 16 * 17 * Spill all register windows. Not a C function, enter this via CALL0 18 * (so you have to save off A0, but no other registers need to be 19 * spilled). On return, all registers not part of the current 20 * function will be spilled to memory. The WINDOWSTART SR will have a 21 * single 1 bit corresponding to the current frame at WINDOWBASE. 22 */ 23.global xtensa_spill_reg_windows 24.align 4 25xtensa_spill_reg_windows: 26 SPILL_ALL_WINDOWS 27 ret 28 29/* 30 * xtensa_save_high_regs 31 * 32 * Call with CALL0, with A2/A3 available as scratch. Pushes the high 33 * A4-A15 GPRs to the stack if needed (i.e. if those registers are not 34 * part of wrapped-around frames higher up the call stack), returning 35 * to the caller with the stack pointer HAVING BEEN MODIFIED to 36 * contain them. 37 */ 38.global xtensa_save_high_regs 39.align 4 40xtensa_save_high_regs: 41 /* Generate a rotated (modulo NREGS/4 bits!) WINDOWSTART in A2 42 * by duplicating the bits twice and shifting down by WINDOWBASE 43 * bits. Now the LSB is the register quad at WINDOWBASE. 44 */ 45 rsr a2, WINDOWSTART 46 slli a3, a2, (XCHAL_NUM_AREGS / 4) 47 or a2, a2, a3 48 rsr a3, WINDOWBASE 49 ssr a3 50 srl a2, a2 51 52 mov a3, a1 /* Stash our original stack pointer */ 53 54 /* For the next three bits in WINDOWSTART (which correspond to 55 * the A4-A7, A8-A11 and A12-A15 quads), if we find a one, 56 * that means that the quad is owned by a wrapped-around call 57 * in the registers, so we don't need to spill it or any 58 * further registers from the GPRs and can skip to the end. 59 */ 60 bbsi a2, 1, _high_gpr_spill_done 61 addi a1, a1, -16 62 s32i a4, a1, 0 63 s32i a5, a1, 4 64 s32i a6, a1, 8 65 s32i a7, a1, 12 66 67 bbsi a2, 2, _high_gpr_spill_done 68 addi a1, a1, -16 69 s32i a8, a1, 0 70 s32i a9, a1, 4 71 s32i a10, a1, 8 72 s32i a11, a1, 12 73 74 bbsi a2, 3, _high_gpr_spill_done 75 addi a1, a1, -16 76 s32i a12, a1, 0 77 s32i a13, a1, 4 78 s32i a14, a1, 8 79 s32i a15, a1, 12 80 81_high_gpr_spill_done: 82 /* Push the original stack pointer so we know at restore 83 * time how many registers were spilled, then return, leaving the 84 * modified SP in A1. 85 */ 86 addi a1, a1, -4 87 s32i a3, a1, 0 88 89 ret 90 91/* 92 * xtensa_restore_high_regs 93 * 94 * Does the inverse of xtensa_save_high_regs, taking a stack pointer 95 * in A1 that resulted and restoring the A4-A15 state (and the stack 96 * pointer) to the state they had at the earlier call. Call with 97 * CALL0, leaving A2/A3 available as scratch. 98 */ 99.global xtensa_restore_high_regs 100.align 4 101xtensa_restore_high_regs: 102 /* pop our "original" stack pointer into a2, stash in a3 also */ 103 l32i a2, a1, 0 104 addi a1, a1, 4 105 mov a3, a2 106 107 beq a1, a2, _high_restore_done 108 addi a2, a2, -16 109 l32i a4, a2, 0 110 l32i a5, a2, 4 111 l32i a6, a2, 8 112 l32i a7, a2, 12 113 114 beq a1, a2, _high_restore_done 115 addi a2, a2, -16 116 l32i a8, a2, 0 117 l32i a9, a2, 4 118 l32i a10, a2, 8 119 l32i a11, a2, 12 120 121 beq a1, a2, _high_restore_done 122 addi a2, a2, -16 123 l32i a12, a2, 0 124 l32i a13, a2, 4 125 l32i a14, a2, 8 126 l32i a15, a2, 12 127 128_high_restore_done: 129 mov a1, a3 /* Original stack */ 130 ret 131 132/* 133 * _restore_context 134 * 135 * Arrive here via a jump. Enters into the restored context and does 136 * not return. A1 should have a context pointer in it as received 137 * from switch or an interrupt exit. Interrupts must be disabled, 138 * and register windows should have been spilled. 139 * 140 * Note that exit from the restore is done with the RFI instruction, 141 * using the EPCn/EPSn registers. Those will have been saved already 142 * by any interrupt entry so they are save to use. Note that EPC1 and 143 * RFE are NOT usable (they can't preserve PS). Per the ISA spec, all 144 * RFI levels do the same thing and differ only in the special 145 * registers used to hold PC/PS, but Qemu has been observed to behave 146 * strangely when RFI doesn't "return" to a INTLEVEL strictly lower 147 * than it started from. So we leverage the zsr.h framework to pick 148 * the highest level available for our specific platform. 149 */ 150.global _restore_context 151_restore_context: 152 call0 xtensa_restore_high_regs 153 154 l32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET 155 wsr a0, ZSR_EPC 156 l32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 157 wsr a0, ZSR_EPS 158 159#if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 160 FPU_REG_RESTORE 161#endif 162 163#if defined(CONFIG_XTENSA_HIFI_SHARING) 164.extern _xtensa_hifi_load 165 call0 _xtensa_hifi_load 166#endif 167 168 l32i a0, a1, ___xtensa_irq_bsa_t_sar_OFFSET 169 wsr a0, SAR 170#if XCHAL_HAVE_LOOPS 171 l32i a0, a1, ___xtensa_irq_bsa_t_lbeg_OFFSET 172 wsr a0, LBEG 173 l32i a0, a1, ___xtensa_irq_bsa_t_lend_OFFSET 174 wsr a0, LEND 175 l32i a0, a1, ___xtensa_irq_bsa_t_lcount_OFFSET 176 wsr a0, LCOUNT 177#endif 178#if XCHAL_HAVE_S32C1I 179 l32i a0, a1, ___xtensa_irq_bsa_t_scompare1_OFFSET 180 wsr a0, SCOMPARE1 181#endif 182#if XCHAL_HAVE_THREADPTR && \ 183 (defined(CONFIG_USERSPACE) || defined(CONFIG_THREAD_LOCAL_STORAGE)) 184 l32i a0, a1, ___xtensa_irq_bsa_t_threadptr_OFFSET 185 wur a0, THREADPTR 186#endif 187 rsync 188 189 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 190 l32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET 191 l32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET 192 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 193 194 rfi ZSR_RFI_LEVEL 195 196/* 197 * void xtensa_arch_except(int reason_p); 198 * 199 * Implements hardware exception for Xtensa ARCH_EXCEPT to save 200 * interrupted stack frame and reason_p for use in exception handler 201 * and coredump 202 */ 203.global xtensa_arch_except 204.global xtensa_arch_except_epc 205.align 4 206xtensa_arch_except: 207 entry a1, 16 208xtensa_arch_except_epc: 209 ill 210 retw 211 212/* 213 * void xtensa_arch_kernel_oops(int reason_p, void *ssf); 214 * 215 * Simply to raise hardware exception for Kernel OOPS. 216 */ 217.global xtensa_arch_kernel_oops 218.global xtensa_arch_kernel_oops_epc 219.align 4 220xtensa_arch_kernel_oops: 221 entry a1, 16 222xtensa_arch_kernel_oops_epc: 223 ill 224 retw 225 226/* 227 * void xtensa_switch(void *new, void **old_return); 228 * 229 * Context switches into the previously-saved "new" handle, placing 230 * the saved "old" handle into the address provided by old_return. 231 */ 232.global xtensa_switch 233.align 4 234xtensa_switch: 235 entry a1, 16 236 SPILL_ALL_WINDOWS 237 addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF 238 239 /* Stash our A0/2/3 and the shift/loop registers into the base 240 * save area so they get restored as they are now. A2/A3 241 * don't actually get used post-restore, but they need to be 242 * stashed across the xtensa_save_high_regs call and this is a 243 * convenient place. 244 */ 245 s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 246 s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET 247 s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET 248 ODD_REG_SAVE 249 250 /* Stash our PS register contents and a "restore" PC. */ 251 rsr a0, PS 252 s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 253 movi a0, _switch_restore_pc 254 s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET 255 256#if defined(CONFIG_XTENSA_HIFI_SHARING) 257 call0 _xtensa_hifi_save 258#endif 259 260 /* Now the high registers */ 261 call0 xtensa_save_high_regs 262 263#ifdef CONFIG_KERNEL_COHERENCE 264 /* Flush the stack. The top of stack was stored for us by 265 * arch_cohere_stacks(). It can be NULL for a dummy thread. 266 */ 267 rsr a0, ZSR_FLUSH 268 beqz a0, noflush 269 mov a3, a1 270flushloop: 271 dhwb a3, 0 272 addi a3, a3, XCHAL_DCACHE_LINESIZE 273 blt a3, a0, flushloop 274noflush: 275#endif 276 277 /* Restore the A3 argument we spilled earlier (via the base 278 * save pointer pushed at the bottom of the stack) and set the 279 * stack to the "new" context out of the A2 spill slot. 280 */ 281 l32i a2, a1, 0 282 l32i a3, a2, ___xtensa_irq_bsa_t_a3_OFFSET 283 s32i a1, a3, 0 284 285#ifdef CONFIG_USERSPACE 286 /* Switch page tables */ 287 rsr a6, ZSR_CPU 288 l32i a6, a6, ___cpu_t_current_OFFSET 289#ifdef CONFIG_XTENSA_MMU 290 call4 xtensa_swap_update_page_tables 291#endif 292#ifdef CONFIG_XTENSA_MPU 293 call4 xtensa_mpu_map_write 294#endif 295 296 l32i a2, a3, 0 297 l32i a2, a2, 0 298#endif 299 300 /* Switch stack pointer and restore. The jump to 301 * _restore_context does not return as such, but we arrange 302 * for the restored "next" address to be immediately after for 303 * sanity. 304 */ 305 l32i a1, a2, ___xtensa_irq_bsa_t_a2_OFFSET 306 307#ifdef CONFIG_INSTRUMENT_THREAD_SWITCHING 308 call4 z_thread_mark_switched_in 309#endif 310 j _restore_context 311_switch_restore_pc: 312 retw 313 314/* Define our entry handler to load the struct kernel_t from the 315 * MISC0 special register, and to find the nest and irq_stack values 316 * at the precomputed offsets. 317 */ 318.align 4 319_handle_excint: 320 EXCINT_HANDLER ___cpu_t_nested_OFFSET, ___cpu_t_irq_stack_OFFSET 321 322/* Define the actual vectors for the hardware-defined levels with 323 * DEF_EXCINT. These load a C handler address and jump to our handler 324 * above. 325 */ 326 327DEF_EXCINT 1, _handle_excint, xtensa_excint1_c 328 329/* In code below we are using XCHAL_NMILEVEL and XCHAL_NUM_INTLEVELS 330 * (whichever is higher), since not all Xtensa configurations support 331 * NMI. In such case we will use XCHAL_NUM_INTLEVELS. 332 */ 333#if XCHAL_HAVE_NMI 334#define MAX_INTR_LEVEL XCHAL_NMILEVEL 335#elif XCHAL_HAVE_INTERRUPTS 336#define MAX_INTR_LEVEL XCHAL_NUM_INTLEVELS 337#else 338#error Xtensa core with no interrupt support is used 339#define MAX_INTR_LEVEL 0 340#endif 341 342#if MAX_INTR_LEVEL >= 2 343#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 2)) 344DEF_EXCINT 2, _handle_excint, xtensa_int2_c 345#endif 346#endif 347 348#if MAX_INTR_LEVEL >= 3 349#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 3)) 350DEF_EXCINT 3, _handle_excint, xtensa_int3_c 351#endif 352#endif 353 354#if MAX_INTR_LEVEL >= 4 355#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 4)) 356DEF_EXCINT 4, _handle_excint, xtensa_int4_c 357#endif 358#endif 359 360#if MAX_INTR_LEVEL >= 5 361#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 5)) 362DEF_EXCINT 5, _handle_excint, xtensa_int5_c 363#endif 364#endif 365 366#if MAX_INTR_LEVEL >= 6 367#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 6)) 368DEF_EXCINT 6, _handle_excint, xtensa_int6_c 369#endif 370#endif 371 372#if MAX_INTR_LEVEL >= 7 373#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 7)) 374DEF_EXCINT 7, _handle_excint, xtensa_int7_c 375#endif 376#endif 377 378#if defined(CONFIG_GDBSTUB) 379DEF_EXCINT XCHAL_DEBUGLEVEL, _handle_excint, xtensa_debugint_c 380#endif 381 382/* The user exception vector is defined here, as we need to handle 383 * MOVSP exceptions in assembly (the result has to be to unspill the 384 * caller function of the code that took the exception, and that can't 385 * be done in C). A prototype exists which mucks with the stack frame 386 * from the C handler instead, but that would add a LARGE overhead to 387 * some alloca() calls (those whent he caller has been spilled) just 388 * to save these five cycles during other exceptions and L1 389 * interrupts. Maybe revisit at some point, with better benchmarking. 390 * Note that _xt_alloca_exc is Xtensa-authored code which expects A0 391 * to have been saved to EXCSAVE1, we've modified it to use the zsr.h 392 * API to get assigned a scratch register. 393 */ 394.pushsection .UserExceptionVector.text, "ax" 395.global _Level1RealVector 396_Level1RealVector: 397 wsr a0, ZSR_A0SAVE 398 rsync 399 rsr.exccause a0 400#ifdef CONFIG_XTENSA_MMU 401 beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_user 402#endif /* CONFIG_XTENSA_MMU */ 403#ifdef CONFIG_USERSPACE 404 beqi a0, EXCCAUSE_SYSCALL, _syscall 405#endif /* CONFIG_USERSPACE */ 406#ifdef CONFIG_XTENSA_MMU 407 addi a0, a0, -EXCCAUSE_DTLB_MISS 408 beqz a0, _handle_tlb_miss_user 409 rsr.exccause a0 410#endif /* CONFIG_XTENSA_MMU */ 411 bnei a0, EXCCAUSE_ALLOCA, _not_alloca 412 413 j _xt_alloca_exc 414_not_alloca: 415 rsr a0, ZSR_A0SAVE 416 j _Level1Vector 417#ifdef CONFIG_XTENSA_MMU 418_handle_tlb_miss_user: 419 /** 420 * Handle TLB miss by loading the PTE page: 421 * The way it works is, when we try to access an address that is not 422 * mapped, we will have a miss. The HW then will try to get the 423 * correspondent memory in the page table. As the page table is not 424 * mapped in memory we will have a second miss, which will trigger 425 * an exception. In the exception (here) what we do is to exploit 426 * this hardware capability just trying to load the page table 427 * (not mapped address), which will cause a miss, but then the hardware 428 * will automatically map it again from the page table. This time 429 * it will work since the page necessary to map the page table itself 430 * are wired map. 431 */ 432 rsr.ptevaddr a0 433 l32i a0, a0, 0 434 rsr a0, ZSR_A0SAVE 435 rfe 436#endif /* CONFIG_XTENSA_MMU */ 437#ifdef CONFIG_USERSPACE 438_syscall: 439 rsr a0, ZSR_A0SAVE 440 j xtensa_do_syscall 441#endif /* CONFIG_USERSPACE */ 442.popsection 443 444/* In theory you can have levels up to 15, but known hardware only uses 7. */ 445#if XCHAL_NMILEVEL > 7 446#error More interrupts than expected. 447#endif 448 449/* We don't actually use "kernel mode" currently. Populate the vector 450 * out of simple caution in case app code clears the UM bit by mistake. 451 */ 452.pushsection .KernelExceptionVector.text, "ax" 453.global _KernelExceptionVector 454_KernelExceptionVector: 455#ifdef CONFIG_XTENSA_MMU 456 wsr a0, ZSR_A0SAVE 457 rsr.exccause a0 458 beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_kernel 459 addi a0, a0, -EXCCAUSE_DTLB_MISS 460 beqz a0, _handle_tlb_miss_kernel 461 rsr a0, ZSR_A0SAVE 462#endif 463 j _Level1Vector 464#ifdef CONFIG_XTENSA_MMU 465_handle_tlb_miss_kernel: 466 /* The TLB miss handling is used only during xtensa_mmu_init() 467 * where vecbase is at a different address, as the offset used 468 * in the jump ('j') instruction will not jump to correct 469 * address (... remember the vecbase is moved). 470 * So we handle TLB misses in a very simple way here until 471 * we move back to using UserExceptionVector above. 472 */ 473 rsr.ptevaddr a0 474 l32i a0, a0, 0 475 rsr a0, ZSR_A0SAVE 476 rfe 477#endif 478.popsection 479 480#ifdef XCHAL_DOUBLEEXC_VECTOR_VADDR 481.pushsection .DoubleExceptionVector.text, "ax" 482.global _DoubleExceptionVector 483_DoubleExceptionVector: 484#ifdef CONFIG_XTENSA_MMU 485 wsr a0, ZSR_DBLEXC 486 rsync 487 488 rsr.exccause a0 489 addi a0, a0, -EXCCAUSE_DTLB_MISS 490 beqz a0, _handle_tlb_miss_dblexc 491 492 /* Need to stash the DEPC for used by the C handler. 493 * If we encounter any DTLB misses when PS.EXCM is set, 494 * this vector will be used and the DEPC register will 495 * have the new address instead of the one resulted in 496 * double exception. 497 */ 498 rsr.depc a0 499 wsr a0, ZSR_DEPC_SAVE 500 501 rsr a0, ZSR_DBLEXC 502 503 j _Level1Vector 504 505_TripleFault: 506#endif /* CONFIG_XTENSA_MMU */ 507 508#if XCHAL_HAVE_DEBUG && defined(CONFIG_XTENSA_BREAK_ON_UNRECOVERABLE_EXCEPTIONS) 509 /* Signals an unhandled double exception, and unrecoverable exceptions. 510 * Definitely needs debugger to be attached to the hardware or simulator 511 * to catch this. 512 */ 513 break 1, 4 514#elif defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR) 515/* Tell simulator to stop executing here, instead of trying to do 516 * an infinite loop (see below). Greatly help with using tracing in 517 * simulator so that traces will not have infinite iterations of 518 * jumps. 519 */ 520 movi a3, 1 521 movi a2, SYS_exit 522 simcall 523#endif 5241: 525 j 1b 526 527#ifdef CONFIG_XTENSA_MMU 528_handle_tlb_miss_dblexc: 529 /* Handle all data TLB misses here. 530 * These data TLB misses are mostly caused by preloading 531 * page table entries in the level 1 exception handler. 532 * Failure to load the PTE will result in another exception 533 * with different failure (exccause), which can be handled 534 * when the CPU re-enters the double exception handler. 535 */ 536 rsr.ptevaddr a0 537 l32i a0, a0, 0 538 539 rsr a0, ZSR_DBLEXC 540 rfde 541#endif 542.popsection 543 544#endif 545