1/*******************************************************************************
2 * Copyright 2019-2021 Microchip Corporation.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * MPFS HAL Embedded Software
7 *
8 */
9
10/*******************************************************************************
11 * @file entry.S
12 * @author Microchip-FPGA Embedded Systems Solutions
13 * @brief entry functions.
14 *
15 */
16
17#include "../common/bits.h"
18#include "../common/encoding.h"
19#include "../common/mss_mtrap.h"
20#include "system_startup_defs.h"
21#include "mpfs_hal_config/mss_sw_config.h"
22
23  .option norvc
24  .section .text.init,"ax", %progbits
25  .globl reset_vector
26  .globl _start
27
28reset_vector:
29_start:
30#if (IMAGE_LOADED_BY_BOOTLOADER == 0)
31    /*
32     * clear the Return Address Stack
33     */
34    call .clear_ras
35    /* Setup trap handler */
36    la a4, trap_vector
37    csrw mtvec, a4          # initalise machine trap vector address
38    /* Make sure that mtvec is updated before continuing */
39    1:
40    csrr    a5, mtvec
41    bne a4, a5, 1b
42    /* Disable and clear all interrupts */
43    li a2,  MSTATUS_MIE
44    csrc mstatus, a2        # clear interrupt enable bit
45    csrw mie, zero
46    csrw mip, zero
47    # Init delegation registers, mideleg, medeleg, if a U54
48    # These are not initialised by the hardware and come up in a random state
49    csrr a0, mhartid
50    beqz a0, .skip_e51
51    csrw mideleg, 0
52    csrw medeleg, 0
53.skip_e51:
54    # mscratch must be init to zero- we are not using scratch memory
55    csrw mscratch, zero
56    csrw mcause, zero
57    csrw mepc, zero
58    /*
59     * clear PMP enables
60     */
61    csrw pmpcfg0, zero
62    csrw pmpcfg2, zero
63    /*
64     * clear regs
65     */
66    li  x1, 0
67    li  x2, 0
68    li  x3, 0
69    li  x4, 0
70    li  x5, 0
71    li  x6, 0
72    li  x7, 0
73    li  x8, 0
74    li  x9, 0
75    li  x10,0
76    li  x11,0
77    li  x12,0
78    li  x13,0
79    li  x14,0
80    li  x15,0
81    li  x16,0
82    li  x17,0
83    li  x18,0
84    li  x19,0
85    li  x20,0
86    li  x21,0
87    li  x22,0
88    li  x23,0
89    li  x24,0
90    li  x25,0
91    li  x26,0
92    li  x27,0
93    li  x28,0
94    li  x29,0
95    li  x30,0
96    li  x31,0
97
98    # enable FPU and accelerator if present, setting ignored on E51
99    li t0, MSTATUS_FS | MSTATUS_XS
100    csrs mstatus, t0
101
102    # Init floating point control register to zero
103    # skip if e51
104    csrr a0, mhartid
105    beqz a0, .no_float
106#ifdef __riscv_flen
107    fscsr x0
108#endif
109.no_float:
110
111     # make sure XLEN agrees with compilation choice, if not will loop here
112.LxlenCheck:
113    csrr t0, misa
114#if __riscv_xlen == 64
115    bltz t0, .LxlenPass
116#else
117    bgez t0, .LxlenPass
118#endif
119    j .LxlenCheck
120.LxlenPass:
121
122    # initialize global pointer, global data
123    # The __global_pointer is allocated in the linker script. It points to a
124    # location 2k after sdata start as the offsets used in the gp are +/- 2k
125    # See https://www.sifive.com/blog/2017/08/28/all-aboard-part-3-linker-relaxation-in-riscv-toolchain/
126    # see: http://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dDirectives.html
127    .option push
128    .option norelax
129    la gp, __global_pointer$
130    .option pop
131
132    # get core id
133    csrr a0, mhartid
134    li a1,  0
135    beq a0, a1, .hart0
136    li a1,  1
137    beq a0, a1, .hart1
138    li a1,  2
139    beq a0, a1, .hart2
140    li a1,  3
141    beq a0, a1, .hart3
142    li a1,  4
143    beq a0, a1, .hart4
144
145.hart0:
146    la a4, __stack_bottom_h0$  # keep bottom of stack in a5 so we can init later
147    la sp, __stack_top_h0$
148    j .continue
149.hart1:
150    la a4, __stack_bottom_h1$  # keep bottom of stack in a5 so we can init later
151    la sp, __stack_top_h1$
152    j .continue
153.hart2:
154    la a4, __stack_bottom_h2$  # keep bottom of stack in a5 so we can init later
155    la sp, __stack_top_h2$
156    j .continue
157.hart3:
158    la a4, __stack_bottom_h3$  # keep bottom of stack in a5 so we can init later
159    la sp, __stack_top_h3$
160    j .continue
161.hart4:
162    la a4, __stack_bottom_h4$  # keep bottom of stack in a5 so we can init later
163    la sp, __stack_top_h4$
164
165.continue:
166    # clear HLS and stack
167    mv  a5, sp
168.init_stack:
169    #csrw mepc, zero
170    STORE x0, 0(a4)
171    add a4, a4, __SIZEOF_POINTER__
172    blt a4, a5, .init_stack
173    # Allocate some space at top of stack for the HLS
174    addi sp, sp, -HLS_DEBUG_AREA_SIZE
175    # HLS grows up from new top of stack
176    mv tp, sp
177    # get core id
178    csrr a0, mhartid
179    li a1, MPFS_HAL_FIRST_HART
180    bne a0, a1, .LOtherHartstoWFI
181    # clear the common heap
182    la  a4, __heap_start
183    la  a5, __heap_end
184.init_heap:
185    #csrw mepc, zero
186    STORE x0, 0(a4)
187    add a4, a4, __SIZEOF_POINTER__
188    blt a4, a5, .init_heap
189    #
190    # clear DTIM - this is required to stop memory errors on initial access by
191    # cache
192    # Also, stops x propagation in simulation, when cache/stack reads unused
193    # area
194    #
195    li a2, MPFS_HAL_CLEAR_MEMORY
196    beq x0, a2, .skip_mem_clear
197    call    .clear_dtim
198    call    .clear_l2lim
199.skip_mem_clear:
200    /*
201     * Clear bus error unit accrued register on start-up
202     * This is cleared by the first hart only
203     */
204    la a4,0x01700020UL
205    sb   x0, 0(a4)
206    la a4,0x01701020UL
207    sb   x0, 0(a4)
208    la a4,0x01702020UL
209    sb   x0, 0(a4)
210    la a4,0x01703020UL
211    sb   x0, 0(a4)
212    la a4,0x01704020UL
213    sb   x0, 0(a4)
214    # now core MPFS_HAL_FIRST_HART jumps to main_first_hart
215.main_hart:
216    # pass HLS address
217    mv  a0, tp
218    j main_first_hart
219.LoopForeverMain:
220    #in case of return, loop forever. nop's added so can be seen in debugger
221    nop
222    nop
223    j .LoopForeverMain
224
225.LOtherHartstoWFI:
226    li a2,  MSTATUS_MIE
227    csrc mstatus, a2       # clear interrupt enable bit
228    csrw mie, zero
229    csrw mip, zero
230    li a2, MIP_MSIP
231    csrw mie, a2           # Set MSIE bit to receive IPI. This needs to be
232                           # enabled- otherwise stays in wfi.
233                           # Other interrupts appera to bring out of wfi,even if
234                           # not enabled.
235    #
236    # Wait here until main hart is up and running
237    #
238    li a3, HLS_MAIN_HART_STARTED
239    la a1, (__stack_top_h0$ - HLS_DEBUG_AREA_SIZE)
240.wait_main_hart:
241    LWU a2, 0(a1)
242    bne a3, a2, .wait_main_hart
243    # Flag we are here to the main hart
244    li a1, HLS_OTHER_HART_IN_WFI
245    sw a1, 0(tp)
246    /* flush the instruction cache */
247    fence.i
248.LwaitOtherHart:
249    # We assume wfi instruction will be run before main hart attampts to take
250    # out of wfi
251    wfi
252    # Only start if MIP_MSIP is set - the wfi will ensure this, but adding
253    # breakpoints in the debugger (halt)
254    # will wakeup wfi, so the following code will make sure we remain here until
255    # we get a software interrupt
256    csrr a2, mip
257    andi a2, a2, MIP_MSIP
258    beqz a2, .LwaitOtherHart
259    /* Disable and clear all interrupts- should be only a sw interrupt */
260    li a2,  MSTATUS_MIE
261    csrc mstatus, a2        # clear interrupt enable bit
262    csrw mie, zero
263    csrw mip, zero
264    # set marker as to where we are
265    li a1, HLS_OTHER_HART_PASSED_WFI
266    sw a1, 0(tp)
267    # pass HLS address
268    mv  a0, tp
269    j main_other_hart
270.LoopForeverOther:
271    #in case of return, loop forever. nop's added so can be seen in debugger
272    nop
273    nop
274    j .LoopForeverOther
275
276#else /* IMAGE_LOADED_BY_BOOTLOADER == 1 */
277
278/***********************************************************************************
279 *The program has been loaded by a bootloader
280 * a0 - contains the hart ID
281 * a1 - contains pointer to bootloader -Hart Local Storage, for this hart.
282 */
283_start_non_bootloader_image:
284    /* ebreak called at the start of the program if required when debuging.   */
285    /* DEBUG_EBREAK_AT_START is set to one in the debug build, 0 in the       */
286    /* release build                                                          */
287    /* uncomment the 3 lines below if you want to use this method to for      */
288    /* debugging                                                              */
289    /* li a2, DEBUG_EBREAK_AT_START
290    beq x0, a2, 1f
291    ebreak */
2921:
293    /* store the value here received from boot-loader */
294    /* a0 will always contain the hart ID */
295    /* If a1 is null, boot-loader is not passing pointer to the HLS */
296    /* If this is the case, point HLS to out own and fill with hart ID */
297    /* Setup trap handler */
298    /* we are currently only supporting mmode */
299    /* m-mode/s-mode set-up option will be added here */
300    la a4, trap_vector
301    csrw mtvec, a4          # initalise machine trap vector address
302    /* Make sure that mtvec is updated before continuing */
3032:
304    csrr    a5, mtvec
305    bne a4, a5, 2b
306    /* Disable and clear all interrupts */
307    /* assumption is this has been done by the Boot-loader */
308    # Init delegation registers, mideleg, medeleg, if a U54
309    # These are not initialised by the hardware and come up in a random state
310    # mhartid is in a0
311    beqz a0, 3f
312    csrw mideleg, 0
313    csrw medeleg, 0
3143:
315    # mscratch must be init to zero- we are not using scratch memory
316    csrw mscratch, zero
317    csrw mcause, zero
318    csrw mepc, zero
319
320    # Init floating point control register to zero
321    # skip if e51
322    # mhartid is in a0
323    beqz a0, 1f
324#ifdef __riscv_flen
325    fscsr x0
326#endif
3271:  # no float
328    # make sure XLEN agrees with compilation choice, if not will loop here
329    csrr t0, misa
330#if __riscv_xlen == 64
331    bltz t0, 2f
332#else
333    bgez t0, 2f
334#endif
335    j 1b
3362:
337    # initialize global pointer, global data
338    # The __global_pointer is allocated in the linker script. It points to a
339    # location 2k after sdata start as the offsets used in the gp are +/- 2k
340    # See https://www.sifive.com/blog/2017/08/28/all-aboard-part-3-linker-relaxation-in-riscv-toolchain/
341    # see: http://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dDirectives.html
342    .option push
343    .option norelax
344    la gp, __global_pointer$
345    .option pop
346
347    la a4, __app_stack_bottom  # keep bottom of stack in a5 so we can init later
348    la a5, __app_stack_top
349    la sp, __app_stack_top
3501:
351    STORE x0, 0(a4)
352    add a4, a4, __SIZEOF_POINTER__
353    blt a4, a5, 1b
354    # clear the common heap
355    la  a4, __heap_start
356    la  a5, __heap_end
3572:
358    STORE x0, 0(a4)
359    add a4, a4, __SIZEOF_POINTER__
360    blt a4, a5, 2b
361    # check if HLS passed by BL, if not allocate one here
362    bnez a1, 1f
363    # Allocate some space at top of stack for the HLS, as HLS mem not passed
364    addi sp, sp, -HLS_DEBUG_AREA_SIZE
365    # HLS grows up from new top of stack
366    mv tp, sp
367    mv a0, tp
368    j u54_single_hart
3691:
370    # pass HLS address from the boot-loader
371    mv a0, a1
372    j u54_single_hart
3732:
374    # in case of return, loop forever. nop's added so can be seen in debugger
375    nop
376    nop
377    j 2b
378#endif /* IMAGE_LOADED_BY_BOOTLOADER */
379
380/******************************************************************************/
381/******************************interrupt handeling below here******************/
382/******************************************************************************/
383
384trap_vector:
385#if defined USING_FREERTOS
386    addi    sp, sp, -REGBYTES /* Save t0 for now */
387    STORE   t0, 0x0(sp)
388    csrr  t0, mcause
389    bge   t0,x0,.Le51_other  /* Not an interrupt... */
390    slli  t0,t0,1
391    srli  t0,t0,1
392    addi  t0,t0,-7
393    bne   t0,x0,.Le51_other /* Not Timer interrupt... */
394    /* Interrupt is timer interrupt so let FreeRTOS handle it */
395    LOAD    t0, 0x0(sp)     # Restore t0 for proper context save by FreeRTOS
396    addi    sp, sp, REGBYTES
397    j       TIMER_CMP_INT
398    mret
399
400.Le51_other:                # Re-enter mainline here if not timer interrupt
401    LOAD    t0, 0x0(sp)       # Restore t0 for proper context save by HAL
402    addi    sp, sp, REGBYTES
403#endif
404    # The mscratch register is an XLEN-bit read/write register dedicated for use by machine mode.
405    # Typically, it is used to hold a pointer to a machine-mode hart-local context space and swapped
406    # with a user register upon entry to an M-mode trap handler.
407    # In this implementation, we are noty using HLS
408    # csrrw sp, mscratch, sp                    #copy sp to mscratch, and mscrath to sp
409
410    addi sp, sp, -INTEGER_CONTEXT_SIZE     # moves sp down stack to make I
411                                           # INTEGER_CONTEXT_SIZE area
412    # Preserve the registers.
413    STORE sp, 2*REGBYTES(sp)               # sp
414    STORE a0, 10*REGBYTES(sp)              # save a0,a1 in the created CONTEXT
415    STORE a1, 11*REGBYTES(sp)
416    STORE ra, 1*REGBYTES(sp)
417    STORE gp, 3*REGBYTES(sp)
418    STORE tp, 4*REGBYTES(sp)
419    STORE t0, 5*REGBYTES(sp)
420    STORE t1, 6*REGBYTES(sp)
421    STORE t2, 7*REGBYTES(sp)
422    STORE s0, 8*REGBYTES(sp)
423    STORE s1, 9*REGBYTES(sp)
424    STORE a2,12*REGBYTES(sp)
425    STORE a3,13*REGBYTES(sp)
426    STORE a4,14*REGBYTES(sp)
427    STORE a5,15*REGBYTES(sp)
428    STORE a6,16*REGBYTES(sp)
429    STORE a7,17*REGBYTES(sp)
430    STORE s2,18*REGBYTES(sp)
431    STORE s3,19*REGBYTES(sp)
432    STORE s4,20*REGBYTES(sp)
433    STORE s5,21*REGBYTES(sp)
434    STORE s6,22*REGBYTES(sp)
435    STORE s7,23*REGBYTES(sp)
436    STORE s8,24*REGBYTES(sp)
437    STORE s9,25*REGBYTES(sp)
438    STORE s10,26*REGBYTES(sp)
439    STORE s11,27*REGBYTES(sp)
440    STORE t3,28*REGBYTES(sp)
441    STORE t4,29*REGBYTES(sp)
442    STORE t5,30*REGBYTES(sp)
443    STORE t6,31*REGBYTES(sp)
444    # Invoke the handler.
445    mv a0, sp                          # a0 <- regs
446    # Please note: mtval is the newer name for register mbadaddr
447    # If you get a compile failure here, use the newer name
448    # At this point (2019), both are supported in latest compiler
449    # older compiler versions only support mbadaddr, so going with this.
450    # See: https://github.com/riscv/riscv-gcc/issues/133
451    csrr a1, mbadaddr                 # useful for anaysis when things go wrong
452    csrr a2, mepc
453    jal trap_from_machine_mode
454
455restore_regs:
456    # Restore all of the registers.
457    LOAD ra, 1*REGBYTES(sp)
458    LOAD gp, 3*REGBYTES(sp)
459    LOAD tp, 4*REGBYTES(sp)
460    LOAD t0, 5*REGBYTES(sp)
461    LOAD t1, 6*REGBYTES(sp)
462    LOAD t2, 7*REGBYTES(sp)
463    LOAD s0, 8*REGBYTES(sp)
464    LOAD s1, 9*REGBYTES(sp)
465    LOAD a0,10*REGBYTES(sp)
466    LOAD a1,11*REGBYTES(sp)
467    LOAD a2,12*REGBYTES(sp)
468    LOAD a3,13*REGBYTES(sp)
469    LOAD a4,14*REGBYTES(sp)
470    LOAD a5,15*REGBYTES(sp)
471    LOAD a6,16*REGBYTES(sp)
472    LOAD a7,17*REGBYTES(sp)
473    LOAD s2,18*REGBYTES(sp)
474    LOAD s3,19*REGBYTES(sp)
475    LOAD s4,20*REGBYTES(sp)
476    LOAD s5,21*REGBYTES(sp)
477    LOAD s6,22*REGBYTES(sp)
478    LOAD s7,23*REGBYTES(sp)
479    LOAD s8,24*REGBYTES(sp)
480    LOAD s9,25*REGBYTES(sp)
481    LOAD s10,26*REGBYTES(sp)
482    LOAD s11,27*REGBYTES(sp)
483    LOAD t3,28*REGBYTES(sp)
484    LOAD t4,29*REGBYTES(sp)
485    LOAD t5,30*REGBYTES(sp)
486    LOAD t6,31*REGBYTES(sp)
487    LOAD sp, 2*REGBYTES(sp)
488    addi sp, sp, +INTEGER_CONTEXT_SIZE  # moves sp up stack to reclaim
489                                        # INTEGER_CONTEXT_SIZE area
490    mret
491
492 /*****************************************************************************/
493 /******************************interrupt handeling above here*****************/
494 /*****************************************************************************/
495
496.enable_sw_int:
497    li a2, MIP_MSIP
498    csrw mie, a2            # Set MSIE bit to receive IPI
499    li a2,  MSTATUS_MIE
500    csrs mstatus, a2        # enable interrupts
501    /* flush the instruction cache */
502    fence.i
503    ret
504
505 /***********************************************************************************
506 *
507 * The following init_memory() symbol overrides the weak symbol in the HAL and does
508 * a safe copy of RW data and clears zero-init memory
509 *
510 */
511    // zero_section helper function:
512    //       a0 = exec_start_addr
513    //       a1 = exec_end_addr
514    //
515    .globl  zero_section
516    .type   zero_section, @function
517zero_section:
518    bge a0, a1, .zero_section_done
519    sd  zero, (a0)
520    addi    a0, a0, 8
521    j   zero_section
522.zero_section_done:
523    ret
524
525    // zero_section helper function:
526    //       a0 = exec_start_addr
527    //       a1 = exec_end_addr
528    //       a2 = start count
529    //
530    .globl  count_section
531    .type   count_section, @function
532count_section:
533    beq a0, a1, .count_section_done
534    sd  a2, (a0)
535    addi    a0, a0, 8
536    addi    a2, a2, 8
537    j   count_section
538.count_section_done:
539    ret
540
541    // copy_section helper function:
542    //  a0 = load_addr
543    //  a1 = exec_start_addr
544    //  a2 = exec_end_addr
545    .globl  copy_section
546    .type   copy_section, @function
547copy_section:
548    beq a1, a0, .copy_section_done // if load_addr == exec_start_addr, goto copy_section_done
549.check_if_copy_section_done:
550    beq a1, a2, .copy_section_done // if offset != length, goto keep_copying
551.keep_copying:
552    ld  a3, 0(a0)                  // val = *load_addr
553    sd  a3, 0(a1)                  // *exec_start_addr = val;
554    addi    a0, a0, 8              // load_addr = load_addr + 8
555    addi    a1, a1, 8              // exec_start_addr = exec_start_addr + 8
556    j   .check_if_copy_section_done
557.copy_section_done:
558    ret
559
560
561/***********************************************************************************
562 *
563 * The following copy_switch_code() symbol overrides the weak symbol in the HAL and does
564 * a safe copy of HW config data
565 */
566    .globl  copy_switch_code
567    .type   copy_switch_code, @function
568copy_switch_code:
569    la      a5, __sc_start           // a5 = __sc_start
570    la      a4, __sc_load            // a4 = __sc_load
571    beq a5,a4,.copy_switch_code_done // if a5 == a4, goto copy_switch_code_done
572    la      a3, __sc_end             // a3 = __sc_end
573    beq a5,a3,.copy_switch_code_done // if a5 == a3, goto copy_switch_code_done
574.copy_switch_code_loop:
575    lw  a2,0(a4)                     // a2 = *a4
576    sw  a2,0(a5)                     // *a5 = a2
577    addi    a5,a5,4                  // a5+=4
578    addi    a4,a4,4                  // a4+=4
579
580    bltu    a5,a3,.copy_switch_code_loop // if a5 < a3, goto copy_switch_code_loop
581.copy_switch_code_done:
582    ret
583
584/*******************************************************************************
585 *
586 */
587#define START__OF_LIM 0x08000000
588#define END__OF_LIM   0x08200000
589#define START__OF_DTM 0x01000000
590#define END__OF_DTM   0x01002000
591
592
593.clear_l2lim:
594    // Clear the LIM
595    //
596    // On reset, the first 15 ways are L2 and the last way is cache
597    // We can initialize all, as cache write through to DDR is blocked
598    // until DDR in initialized, so will have no effect other than clear ECC
599    //
600    // NOTE: we need to check if we are debugging from LIM,if so do not
601    // initialize.
602    //
603    la a2, _start
604    la  a4, 0x08000000          # start of LIM address
605    and a2, a2, a4
606    bnez a2, .done_clear
607    la  a5, 0x08200000          # end of LIM address
608    j   1f
609.clear_dtim:
610    //
611    // Clear the E51 DTIM to prevent any ECC memory errors on initial access
612    //
613    la  a4, 0x01000000          # DTIM start
614    la  a5, 0x01002000          # DTIM end
6151:
616    // common loop used by both .clear_l2lim and .clear_dtim
617    sd   x0, 0(a4)
618    add a4, a4, __SIZEOF_POINTER__
619    blt a4, a5, 1b
620.done_clear:
621    ret
622
623/*
624 * record_ecc_error_counts on reset
625 * These are non-zero in the coreplex.
626 * Can be checked later on to see if values have changed
627 *      a0 = mECCDataFailCount save address
628        a1 = mECCDataCorrectionCount save address
629        a2 = mECCDirFixCount save address
630 */
631.record_ecc_error_counts:
632    # Store initial ECC errors
633    #define mECCDataFailCount               0x02010168U
634    la  a5, mECCDataFailCount
635    mv  a4, a0// eg. Use stat of DTIM in not used for anything else  0x01000100
636    lw  t2,0(a5)
637    sw  t2,0(a4)
638    #define mECCDataCorrectionCount         0x02010148U
639    la  a5, mECCDataCorrectionCount
640    mv  a4, a1// eg. Use stat of DTIM in not used for anything else 0x01000110
641    lw  t2,0(a5)
642    sw  t2,0(a4)
643    #define mECCDirFixCount                 0x02010108u
644    la  a5, mECCDirFixCount
645    mv  a4, a2// eg. Use stat of DTIM in not used for anything else 0x01000120
646    lw  t2,0(a5)
647    sw  t2,0(a4)
648    ret
649
650/*
651 * clear_ras , clear_ras_2_deep
652 * Two deep function calls.
653 * Used to clear the interal processor Return Address Stack
654 * This is belt and braces, may not be required
655 */
656.clear_ras:
657    mv a5, x1
658    nop
659    call .clear_ras_2_deep
660    nop
661    nop
662    nop
663    nop
664    nop
665    nop
666    mv  x1, a5
667    ret
668
669.clear_ras_2_deep:
670    nop
671    nop
672    nop
673    nop
674    nop
675    nop
676    ret
677
678