1/******************************************************************************* 2 * Copyright 2019-2021 Microchip Corporation. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * MPFS HAL Embedded Software 7 * 8 */ 9 10/******************************************************************************* 11 * @file entry.S 12 * @author Microchip-FPGA Embedded Systems Solutions 13 * @brief entry functions. 14 * 15 */ 16 17#include "../common/bits.h" 18#include "../common/encoding.h" 19#include "../common/mss_mtrap.h" 20#include "system_startup_defs.h" 21#include "mpfs_hal_config/mss_sw_config.h" 22 23 .option norvc 24 .section .text.init,"ax", %progbits 25 .globl reset_vector 26 .globl _start 27 28reset_vector: 29_start: 30#if (IMAGE_LOADED_BY_BOOTLOADER == 0) 31 /* 32 * clear the Return Address Stack 33 */ 34 call .clear_ras 35 /* Setup trap handler */ 36 la a4, trap_vector 37 csrw mtvec, a4 # initalise machine trap vector address 38 /* Make sure that mtvec is updated before continuing */ 39 1: 40 csrr a5, mtvec 41 bne a4, a5, 1b 42 /* Disable and clear all interrupts */ 43 li a2, MSTATUS_MIE 44 csrc mstatus, a2 # clear interrupt enable bit 45 csrw mie, zero 46 csrw mip, zero 47 # Init delegation registers, mideleg, medeleg, if a U54 48 # These are not initialised by the hardware and come up in a random state 49 csrr a0, mhartid 50 beqz a0, .skip_e51 51 csrw mideleg, 0 52 csrw medeleg, 0 53.skip_e51: 54 # mscratch must be init to zero- we are not using scratch memory 55 csrw mscratch, zero 56 csrw mcause, zero 57 csrw mepc, zero 58 /* 59 * clear PMP enables 60 */ 61 csrw pmpcfg0, zero 62 csrw pmpcfg2, zero 63 /* 64 * clear regs 65 */ 66 li x1, 0 67 li x2, 0 68 li x3, 0 69 li x4, 0 70 li x5, 0 71 li x6, 0 72 li x7, 0 73 li x8, 0 74 li x9, 0 75 li x10,0 76 li x11,0 77 li x12,0 78 li x13,0 79 li x14,0 80 li x15,0 81 li x16,0 82 li x17,0 83 li x18,0 84 li x19,0 85 li x20,0 86 li x21,0 87 li x22,0 88 li x23,0 89 li x24,0 90 li x25,0 91 li x26,0 92 li x27,0 93 li x28,0 94 li x29,0 95 li x30,0 96 li x31,0 97 98 # enable FPU and accelerator if present, setting ignored on E51 99 li t0, MSTATUS_FS | MSTATUS_XS 100 csrs mstatus, t0 101 102 # Init floating point control register to zero 103 # skip if e51 104 csrr a0, mhartid 105 beqz a0, .no_float 106#ifdef __riscv_flen 107 fscsr x0 108#endif 109.no_float: 110 111 # make sure XLEN agrees with compilation choice, if not will loop here 112.LxlenCheck: 113 csrr t0, misa 114#if __riscv_xlen == 64 115 bltz t0, .LxlenPass 116#else 117 bgez t0, .LxlenPass 118#endif 119 j .LxlenCheck 120.LxlenPass: 121 122 # initialize global pointer, global data 123 # The __global_pointer is allocated in the linker script. It points to a 124 # location 2k after sdata start as the offsets used in the gp are +/- 2k 125 # See https://www.sifive.com/blog/2017/08/28/all-aboard-part-3-linker-relaxation-in-riscv-toolchain/ 126 # see: http://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dDirectives.html 127 .option push 128 .option norelax 129 la gp, __global_pointer$ 130 .option pop 131 132 # get core id 133 csrr a0, mhartid 134 li a1, 0 135 beq a0, a1, .hart0 136 li a1, 1 137 beq a0, a1, .hart1 138 li a1, 2 139 beq a0, a1, .hart2 140 li a1, 3 141 beq a0, a1, .hart3 142 li a1, 4 143 beq a0, a1, .hart4 144 145.hart0: 146 la a4, __stack_bottom_h0$ # keep bottom of stack in a5 so we can init later 147 la sp, __stack_top_h0$ 148 j .continue 149.hart1: 150 la a4, __stack_bottom_h1$ # keep bottom of stack in a5 so we can init later 151 la sp, __stack_top_h1$ 152 j .continue 153.hart2: 154 la a4, __stack_bottom_h2$ # keep bottom of stack in a5 so we can init later 155 la sp, __stack_top_h2$ 156 j .continue 157.hart3: 158 la a4, __stack_bottom_h3$ # keep bottom of stack in a5 so we can init later 159 la sp, __stack_top_h3$ 160 j .continue 161.hart4: 162 la a4, __stack_bottom_h4$ # keep bottom of stack in a5 so we can init later 163 la sp, __stack_top_h4$ 164 165.continue: 166 # clear HLS and stack 167 mv a5, sp 168.init_stack: 169 #csrw mepc, zero 170 STORE x0, 0(a4) 171 add a4, a4, __SIZEOF_POINTER__ 172 blt a4, a5, .init_stack 173 # Allocate some space at top of stack for the HLS 174 addi sp, sp, -HLS_DEBUG_AREA_SIZE 175 # HLS grows up from new top of stack 176 mv tp, sp 177 # get core id 178 csrr a0, mhartid 179 li a1, MPFS_HAL_FIRST_HART 180 bne a0, a1, .LOtherHartstoWFI 181 # clear the common heap 182 la a4, __heap_start 183 la a5, __heap_end 184.init_heap: 185 #csrw mepc, zero 186 STORE x0, 0(a4) 187 add a4, a4, __SIZEOF_POINTER__ 188 blt a4, a5, .init_heap 189 # 190 # clear DTIM - this is required to stop memory errors on initial access by 191 # cache 192 # Also, stops x propagation in simulation, when cache/stack reads unused 193 # area 194 # 195 li a2, MPFS_HAL_CLEAR_MEMORY 196 beq x0, a2, .skip_mem_clear 197 call .clear_dtim 198 call .clear_l2lim 199.skip_mem_clear: 200 /* 201 * Clear bus error unit accrued register on start-up 202 * This is cleared by the first hart only 203 */ 204 la a4,0x01700020UL 205 sb x0, 0(a4) 206 la a4,0x01701020UL 207 sb x0, 0(a4) 208 la a4,0x01702020UL 209 sb x0, 0(a4) 210 la a4,0x01703020UL 211 sb x0, 0(a4) 212 la a4,0x01704020UL 213 sb x0, 0(a4) 214 # now core MPFS_HAL_FIRST_HART jumps to main_first_hart 215.main_hart: 216 # pass HLS address 217 mv a0, tp 218 j main_first_hart 219.LoopForeverMain: 220 #in case of return, loop forever. nop's added so can be seen in debugger 221 nop 222 nop 223 j .LoopForeverMain 224 225.LOtherHartstoWFI: 226 li a2, MSTATUS_MIE 227 csrc mstatus, a2 # clear interrupt enable bit 228 csrw mie, zero 229 csrw mip, zero 230 li a2, MIP_MSIP 231 csrw mie, a2 # Set MSIE bit to receive IPI. This needs to be 232 # enabled- otherwise stays in wfi. 233 # Other interrupts appera to bring out of wfi,even if 234 # not enabled. 235 # 236 # Wait here until main hart is up and running 237 # 238 li a3, HLS_MAIN_HART_STARTED 239 la a1, (__stack_top_h0$ - HLS_DEBUG_AREA_SIZE) 240.wait_main_hart: 241 LWU a2, 0(a1) 242 bne a3, a2, .wait_main_hart 243 # Flag we are here to the main hart 244 li a1, HLS_OTHER_HART_IN_WFI 245 sw a1, 0(tp) 246 /* flush the instruction cache */ 247 fence.i 248.LwaitOtherHart: 249 # We assume wfi instruction will be run before main hart attampts to take 250 # out of wfi 251 wfi 252 # Only start if MIP_MSIP is set - the wfi will ensure this, but adding 253 # breakpoints in the debugger (halt) 254 # will wakeup wfi, so the following code will make sure we remain here until 255 # we get a software interrupt 256 csrr a2, mip 257 andi a2, a2, MIP_MSIP 258 beqz a2, .LwaitOtherHart 259 /* Disable and clear all interrupts- should be only a sw interrupt */ 260 li a2, MSTATUS_MIE 261 csrc mstatus, a2 # clear interrupt enable bit 262 csrw mie, zero 263 csrw mip, zero 264 # set marker as to where we are 265 li a1, HLS_OTHER_HART_PASSED_WFI 266 sw a1, 0(tp) 267 # pass HLS address 268 mv a0, tp 269 j main_other_hart 270.LoopForeverOther: 271 #in case of return, loop forever. nop's added so can be seen in debugger 272 nop 273 nop 274 j .LoopForeverOther 275 276#else /* IMAGE_LOADED_BY_BOOTLOADER == 1 */ 277 278/*********************************************************************************** 279 *The program has been loaded by a bootloader 280 * a0 - contains the hart ID 281 * a1 - contains pointer to bootloader -Hart Local Storage, for this hart. 282 */ 283_start_non_bootloader_image: 284 /* ebreak called at the start of the program if required when debuging. */ 285 /* DEBUG_EBREAK_AT_START is set to one in the debug build, 0 in the */ 286 /* release build */ 287 /* uncomment the 3 lines below if you want to use this method to for */ 288 /* debugging */ 289 /* li a2, DEBUG_EBREAK_AT_START 290 beq x0, a2, 1f 291 ebreak */ 2921: 293 /* store the value here received from boot-loader */ 294 /* a0 will always contain the hart ID */ 295 /* If a1 is null, boot-loader is not passing pointer to the HLS */ 296 /* If this is the case, point HLS to out own and fill with hart ID */ 297 /* Setup trap handler */ 298 /* we are currently only supporting mmode */ 299 /* m-mode/s-mode set-up option will be added here */ 300 la a4, trap_vector 301 csrw mtvec, a4 # initalise machine trap vector address 302 /* Make sure that mtvec is updated before continuing */ 3032: 304 csrr a5, mtvec 305 bne a4, a5, 2b 306 /* Disable and clear all interrupts */ 307 /* assumption is this has been done by the Boot-loader */ 308 # Init delegation registers, mideleg, medeleg, if a U54 309 # These are not initialised by the hardware and come up in a random state 310 # mhartid is in a0 311 beqz a0, 3f 312 csrw mideleg, 0 313 csrw medeleg, 0 3143: 315 # mscratch must be init to zero- we are not using scratch memory 316 csrw mscratch, zero 317 csrw mcause, zero 318 csrw mepc, zero 319 320 # Init floating point control register to zero 321 # skip if e51 322 # mhartid is in a0 323 beqz a0, 1f 324#ifdef __riscv_flen 325 fscsr x0 326#endif 3271: # no float 328 # make sure XLEN agrees with compilation choice, if not will loop here 329 csrr t0, misa 330#if __riscv_xlen == 64 331 bltz t0, 2f 332#else 333 bgez t0, 2f 334#endif 335 j 1b 3362: 337 # initialize global pointer, global data 338 # The __global_pointer is allocated in the linker script. It points to a 339 # location 2k after sdata start as the offsets used in the gp are +/- 2k 340 # See https://www.sifive.com/blog/2017/08/28/all-aboard-part-3-linker-relaxation-in-riscv-toolchain/ 341 # see: http://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dDirectives.html 342 .option push 343 .option norelax 344 la gp, __global_pointer$ 345 .option pop 346 347 la a4, __app_stack_bottom # keep bottom of stack in a5 so we can init later 348 la a5, __app_stack_top 349 la sp, __app_stack_top 3501: 351 STORE x0, 0(a4) 352 add a4, a4, __SIZEOF_POINTER__ 353 blt a4, a5, 1b 354 # clear the common heap 355 la a4, __heap_start 356 la a5, __heap_end 3572: 358 STORE x0, 0(a4) 359 add a4, a4, __SIZEOF_POINTER__ 360 blt a4, a5, 2b 361 # check if HLS passed by BL, if not allocate one here 362 bnez a1, 1f 363 # Allocate some space at top of stack for the HLS, as HLS mem not passed 364 addi sp, sp, -HLS_DEBUG_AREA_SIZE 365 # HLS grows up from new top of stack 366 mv tp, sp 367 mv a0, tp 368 j u54_single_hart 3691: 370 # pass HLS address from the boot-loader 371 mv a0, a1 372 j u54_single_hart 3732: 374 # in case of return, loop forever. nop's added so can be seen in debugger 375 nop 376 nop 377 j 2b 378#endif /* IMAGE_LOADED_BY_BOOTLOADER */ 379 380/******************************************************************************/ 381/******************************interrupt handeling below here******************/ 382/******************************************************************************/ 383 384trap_vector: 385#if defined USING_FREERTOS 386 addi sp, sp, -REGBYTES /* Save t0 for now */ 387 STORE t0, 0x0(sp) 388 csrr t0, mcause 389 bge t0,x0,.Le51_other /* Not an interrupt... */ 390 slli t0,t0,1 391 srli t0,t0,1 392 addi t0,t0,-7 393 bne t0,x0,.Le51_other /* Not Timer interrupt... */ 394 /* Interrupt is timer interrupt so let FreeRTOS handle it */ 395 LOAD t0, 0x0(sp) # Restore t0 for proper context save by FreeRTOS 396 addi sp, sp, REGBYTES 397 j TIMER_CMP_INT 398 mret 399 400.Le51_other: # Re-enter mainline here if not timer interrupt 401 LOAD t0, 0x0(sp) # Restore t0 for proper context save by HAL 402 addi sp, sp, REGBYTES 403#endif 404 # The mscratch register is an XLEN-bit read/write register dedicated for use by machine mode. 405 # Typically, it is used to hold a pointer to a machine-mode hart-local context space and swapped 406 # with a user register upon entry to an M-mode trap handler. 407 # In this implementation, we are noty using HLS 408 # csrrw sp, mscratch, sp #copy sp to mscratch, and mscrath to sp 409 410 addi sp, sp, -INTEGER_CONTEXT_SIZE # moves sp down stack to make I 411 # INTEGER_CONTEXT_SIZE area 412 # Preserve the registers. 413 STORE sp, 2*REGBYTES(sp) # sp 414 STORE a0, 10*REGBYTES(sp) # save a0,a1 in the created CONTEXT 415 STORE a1, 11*REGBYTES(sp) 416 STORE ra, 1*REGBYTES(sp) 417 STORE gp, 3*REGBYTES(sp) 418 STORE tp, 4*REGBYTES(sp) 419 STORE t0, 5*REGBYTES(sp) 420 STORE t1, 6*REGBYTES(sp) 421 STORE t2, 7*REGBYTES(sp) 422 STORE s0, 8*REGBYTES(sp) 423 STORE s1, 9*REGBYTES(sp) 424 STORE a2,12*REGBYTES(sp) 425 STORE a3,13*REGBYTES(sp) 426 STORE a4,14*REGBYTES(sp) 427 STORE a5,15*REGBYTES(sp) 428 STORE a6,16*REGBYTES(sp) 429 STORE a7,17*REGBYTES(sp) 430 STORE s2,18*REGBYTES(sp) 431 STORE s3,19*REGBYTES(sp) 432 STORE s4,20*REGBYTES(sp) 433 STORE s5,21*REGBYTES(sp) 434 STORE s6,22*REGBYTES(sp) 435 STORE s7,23*REGBYTES(sp) 436 STORE s8,24*REGBYTES(sp) 437 STORE s9,25*REGBYTES(sp) 438 STORE s10,26*REGBYTES(sp) 439 STORE s11,27*REGBYTES(sp) 440 STORE t3,28*REGBYTES(sp) 441 STORE t4,29*REGBYTES(sp) 442 STORE t5,30*REGBYTES(sp) 443 STORE t6,31*REGBYTES(sp) 444 # Invoke the handler. 445 mv a0, sp # a0 <- regs 446 # Please note: mtval is the newer name for register mbadaddr 447 # If you get a compile failure here, use the newer name 448 # At this point (2019), both are supported in latest compiler 449 # older compiler versions only support mbadaddr, so going with this. 450 # See: https://github.com/riscv/riscv-gcc/issues/133 451 csrr a1, mbadaddr # useful for anaysis when things go wrong 452 csrr a2, mepc 453 jal trap_from_machine_mode 454 455restore_regs: 456 # Restore all of the registers. 457 LOAD ra, 1*REGBYTES(sp) 458 LOAD gp, 3*REGBYTES(sp) 459 LOAD tp, 4*REGBYTES(sp) 460 LOAD t0, 5*REGBYTES(sp) 461 LOAD t1, 6*REGBYTES(sp) 462 LOAD t2, 7*REGBYTES(sp) 463 LOAD s0, 8*REGBYTES(sp) 464 LOAD s1, 9*REGBYTES(sp) 465 LOAD a0,10*REGBYTES(sp) 466 LOAD a1,11*REGBYTES(sp) 467 LOAD a2,12*REGBYTES(sp) 468 LOAD a3,13*REGBYTES(sp) 469 LOAD a4,14*REGBYTES(sp) 470 LOAD a5,15*REGBYTES(sp) 471 LOAD a6,16*REGBYTES(sp) 472 LOAD a7,17*REGBYTES(sp) 473 LOAD s2,18*REGBYTES(sp) 474 LOAD s3,19*REGBYTES(sp) 475 LOAD s4,20*REGBYTES(sp) 476 LOAD s5,21*REGBYTES(sp) 477 LOAD s6,22*REGBYTES(sp) 478 LOAD s7,23*REGBYTES(sp) 479 LOAD s8,24*REGBYTES(sp) 480 LOAD s9,25*REGBYTES(sp) 481 LOAD s10,26*REGBYTES(sp) 482 LOAD s11,27*REGBYTES(sp) 483 LOAD t3,28*REGBYTES(sp) 484 LOAD t4,29*REGBYTES(sp) 485 LOAD t5,30*REGBYTES(sp) 486 LOAD t6,31*REGBYTES(sp) 487 LOAD sp, 2*REGBYTES(sp) 488 addi sp, sp, +INTEGER_CONTEXT_SIZE # moves sp up stack to reclaim 489 # INTEGER_CONTEXT_SIZE area 490 mret 491 492 /*****************************************************************************/ 493 /******************************interrupt handeling above here*****************/ 494 /*****************************************************************************/ 495 496.enable_sw_int: 497 li a2, MIP_MSIP 498 csrw mie, a2 # Set MSIE bit to receive IPI 499 li a2, MSTATUS_MIE 500 csrs mstatus, a2 # enable interrupts 501 /* flush the instruction cache */ 502 fence.i 503 ret 504 505 /*********************************************************************************** 506 * 507 * The following init_memory() symbol overrides the weak symbol in the HAL and does 508 * a safe copy of RW data and clears zero-init memory 509 * 510 */ 511 // zero_section helper function: 512 // a0 = exec_start_addr 513 // a1 = exec_end_addr 514 // 515 .globl zero_section 516 .type zero_section, @function 517zero_section: 518 bge a0, a1, .zero_section_done 519 sd zero, (a0) 520 addi a0, a0, 8 521 j zero_section 522.zero_section_done: 523 ret 524 525 // zero_section helper function: 526 // a0 = exec_start_addr 527 // a1 = exec_end_addr 528 // a2 = start count 529 // 530 .globl count_section 531 .type count_section, @function 532count_section: 533 beq a0, a1, .count_section_done 534 sd a2, (a0) 535 addi a0, a0, 8 536 addi a2, a2, 8 537 j count_section 538.count_section_done: 539 ret 540 541 // copy_section helper function: 542 // a0 = load_addr 543 // a1 = exec_start_addr 544 // a2 = exec_end_addr 545 .globl copy_section 546 .type copy_section, @function 547copy_section: 548 beq a1, a0, .copy_section_done // if load_addr == exec_start_addr, goto copy_section_done 549.check_if_copy_section_done: 550 beq a1, a2, .copy_section_done // if offset != length, goto keep_copying 551.keep_copying: 552 ld a3, 0(a0) // val = *load_addr 553 sd a3, 0(a1) // *exec_start_addr = val; 554 addi a0, a0, 8 // load_addr = load_addr + 8 555 addi a1, a1, 8 // exec_start_addr = exec_start_addr + 8 556 j .check_if_copy_section_done 557.copy_section_done: 558 ret 559 560 561/*********************************************************************************** 562 * 563 * The following copy_switch_code() symbol overrides the weak symbol in the HAL and does 564 * a safe copy of HW config data 565 */ 566 .globl copy_switch_code 567 .type copy_switch_code, @function 568copy_switch_code: 569 la a5, __sc_start // a5 = __sc_start 570 la a4, __sc_load // a4 = __sc_load 571 beq a5,a4,.copy_switch_code_done // if a5 == a4, goto copy_switch_code_done 572 la a3, __sc_end // a3 = __sc_end 573 beq a5,a3,.copy_switch_code_done // if a5 == a3, goto copy_switch_code_done 574.copy_switch_code_loop: 575 lw a2,0(a4) // a2 = *a4 576 sw a2,0(a5) // *a5 = a2 577 addi a5,a5,4 // a5+=4 578 addi a4,a4,4 // a4+=4 579 580 bltu a5,a3,.copy_switch_code_loop // if a5 < a3, goto copy_switch_code_loop 581.copy_switch_code_done: 582 ret 583 584/******************************************************************************* 585 * 586 */ 587#define START__OF_LIM 0x08000000 588#define END__OF_LIM 0x08200000 589#define START__OF_DTM 0x01000000 590#define END__OF_DTM 0x01002000 591 592 593.clear_l2lim: 594 // Clear the LIM 595 // 596 // On reset, the first 15 ways are L2 and the last way is cache 597 // We can initialize all, as cache write through to DDR is blocked 598 // until DDR in initialized, so will have no effect other than clear ECC 599 // 600 // NOTE: we need to check if we are debugging from LIM,if so do not 601 // initialize. 602 // 603 la a2, _start 604 la a4, 0x08000000 # start of LIM address 605 and a2, a2, a4 606 bnez a2, .done_clear 607 la a5, 0x08200000 # end of LIM address 608 j 1f 609.clear_dtim: 610 // 611 // Clear the E51 DTIM to prevent any ECC memory errors on initial access 612 // 613 la a4, 0x01000000 # DTIM start 614 la a5, 0x01002000 # DTIM end 6151: 616 // common loop used by both .clear_l2lim and .clear_dtim 617 sd x0, 0(a4) 618 add a4, a4, __SIZEOF_POINTER__ 619 blt a4, a5, 1b 620.done_clear: 621 ret 622 623/* 624 * record_ecc_error_counts on reset 625 * These are non-zero in the coreplex. 626 * Can be checked later on to see if values have changed 627 * a0 = mECCDataFailCount save address 628 a1 = mECCDataCorrectionCount save address 629 a2 = mECCDirFixCount save address 630 */ 631.record_ecc_error_counts: 632 # Store initial ECC errors 633 #define mECCDataFailCount 0x02010168U 634 la a5, mECCDataFailCount 635 mv a4, a0// eg. Use stat of DTIM in not used for anything else 0x01000100 636 lw t2,0(a5) 637 sw t2,0(a4) 638 #define mECCDataCorrectionCount 0x02010148U 639 la a5, mECCDataCorrectionCount 640 mv a4, a1// eg. Use stat of DTIM in not used for anything else 0x01000110 641 lw t2,0(a5) 642 sw t2,0(a4) 643 #define mECCDirFixCount 0x02010108u 644 la a5, mECCDirFixCount 645 mv a4, a2// eg. Use stat of DTIM in not used for anything else 0x01000120 646 lw t2,0(a5) 647 sw t2,0(a4) 648 ret 649 650/* 651 * clear_ras , clear_ras_2_deep 652 * Two deep function calls. 653 * Used to clear the interal processor Return Address Stack 654 * This is belt and braces, may not be required 655 */ 656.clear_ras: 657 mv a5, x1 658 nop 659 call .clear_ras_2_deep 660 nop 661 nop 662 nop 663 nop 664 nop 665 nop 666 mv x1, a5 667 ret 668 669.clear_ras_2_deep: 670 nop 671 nop 672 nop 673 nop 674 nop 675 nop 676 ret 677 678