1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright © 2019 Keith Packard
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above
14  *    copyright notice, this list of conditions and the following
15  *    disclaimer in the documentation and/or other materials provided
16  *    with the distribution.
17  *
18  * 3. Neither the name of the copyright holder nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
33  * OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/types.h>
37 
38 /* The size of the thread control block.
39  * TLS relocations are generated relative to
40  * a location this far *before* the first thread
41  * variable (!)
42  * NB: The actual size before tp also includes padding
43  * to align up to the alignment of .tdata/.tbss.
44  */
45 #if __SIZE_WIDTH__ == 32
46 extern char __arm32_tls_tcb_offset;
47 #define TP_OFFSET ((size_t)&__arm32_tls_tcb_offset)
48 #else
49 extern char __arm64_tls_tcb_offset;
50 #define TP_OFFSET ((size_t)&__arm64_tls_tcb_offset)
51 #endif
52 
53 #pragma GCC diagnostic ignored "-Warray-bounds"
54 
55 static inline void
_set_tls(void * tls)56 _set_tls(void *tls)
57 {
58 	__asm__ volatile("msr tpidr_el0, %x0" : : "r" (tls - TP_OFFSET));
59 }
60 
61 #include "../../crt0.h"
62 
63 /*
64  * We need 4 1GB mappings to cover the usual Normal memory space,
65  * which runs from 0x00000000 to 0x7fffffff along with the usual
66  * Device space which runs from 0x80000000 to 0xffffffff. However,
67  * it looks like the smallest VA we can construct is 8GB, so we'll
68  * pad the space with invalid PTEs
69  */
70 #define MMU_NORMAL_COUNT        2
71 #define MMU_DEVICE_COUNT        2
72 #define MMU_INVALID_COUNT       4
73 extern uint64_t __identity_page_table[MMU_NORMAL_COUNT + MMU_DEVICE_COUNT + MMU_INVALID_COUNT];
74 
75 #define MMU_DESCRIPTOR_VALID    (1 << 0)
76 #define MMU_DESCRIPTOR_BLOCK    (0 << 1)
77 #define MMU_DESCRIPTOR_TABLE    (1 << 1)
78 
79 #define MMU_BLOCK_XN            (1LL << 54)
80 #define MMU_BLOCK_PXN           (1LL << 53)
81 #define MMU_BLOCK_CONTIG        (1LL << 52)
82 #define MMU_BLOCK_DBM           (1LL << 51)
83 #define MMU_BLOCK_GP            (1LL << 50)
84 
85 #define MMU_BLOCK_NT            (1 << 16)
86 #define MMU_BLOCK_OA_BIT        12
87 #define MMU_BLOCK_NG            (1 << 11)
88 #define MMU_BLOCK_AF            (1 << 10)
89 #define MMU_BLOCK_SH_BIT        8
90 #define MMU_BLOCK_SH_NS         (0 << MMU_BLOCK_SH_BIT)
91 #define MMU_BLOCK_SH_OS         (2 << MMU_BLOCK_SH_BIT)
92 #define MMU_BLOCK_SH_IS         (3 << MMU_BLOCK_SH_BIT)
93 #define MMU_BLOCK_AP_BIT        6
94 #define MMU_BLOCK_NS            (1 << 5)
95 #define MMU_BLOCK_ATTR_BIT      2
96 
97 #define MMU_NORMAL_FLAGS        (MMU_DESCRIPTOR_VALID |         \
98                                  MMU_DESCRIPTOR_BLOCK |         \
99                                  MMU_BLOCK_AF |                 \
100                                  MMU_BLOCK_SH_IS |              \
101                                  (0 << MMU_BLOCK_ATTR_BIT))
102 
103 #define MMU_DEVICE_FLAGS        (MMU_DESCRIPTOR_VALID | \
104                                  MMU_DESCRIPTOR_BLOCK | \
105                                  MMU_BLOCK_AF | \
106                                  (1 << MMU_BLOCK_ATTR_BIT))
107 
108 #define MMU_INVALID_FLAGS       0
109 
110 __asm__(
111     ".section .rodata\n"
112     ".global __identity_page_table\n"
113     ".balign 65536\n"
114     "__identity_page_table:\n"
115     ".set _i, 0\n"
116     ".rept " __XSTRING(MMU_NORMAL_COUNT) "\n"
117     "  .8byte (_i << 30) |" __XSTRING(MMU_NORMAL_FLAGS) "\n"
118     "  .set _i, _i + 1\n"
119     ".endr\n"
120     ".set _i, 0\n"
121     ".rept " __XSTRING(MMU_DEVICE_COUNT) "\n"
122     "  .8byte (1 << 31) | (_i << 30) |" __XSTRING(MMU_DEVICE_FLAGS) "\n"
123     "  .set _i, _i + 1\n"
124     ".endr\n"
125     ".set _i, 0\n"
126     ".rept " __XSTRING(MMU_INVALID_COUNT) "\n"
127     "  .8byte " __XSTRING(MMU_INVALID_FLAGS) "\n"
128     "  .set _i, _i + 1\n"
129     ".endr\n"
130     ".size __identity_page_table, " __XSTRING((MMU_NORMAL_COUNT + MMU_DEVICE_COUNT + MMU_INVALID_COUNT) * 8) "\n"
131 );
132 
133 #define SCTLR_MMU       (1 << 0)
134 #define SCTLR_A         (1 << 1)
135 #define SCTLR_C         (1 << 2)
136 #define SCTLR_ICACHE    (1 << 12)
137 #define SCTLR_WXN       (1 << 19)
138 #define TCR_T0SZ_BIT    0
139 #define TCR_EPD0        (1 << 7)
140 #define TCR_IRGN0_BIT   8
141 #define TCR_IRGN0_NC    (0 << TCR_IRGN0_BIT)
142 #define TCR_IRGN0_WB_WA (1 << TCR_IRGN0_BIT)
143 #define TCR_IRGN0_WT    (2 << TCR_IRGN0_BIT)
144 #define TCR_IRGN0_WB    (3 << TCR_IRGN0_BIT)
145 #define TCR_ORGN0_BIT   10
146 #define TCR_ORGN0_NC    (0 << TCR_ORGN0_BIT)
147 #define TCR_ORGN0_WB_WA (1 << TCR_ORGN0_BIT)
148 #define TCR_ORGN0_WT    (2 << TCR_ORGN0_BIT)
149 #define TCR_ORGN0_WB    (3 << TCR_ORGN0_BIT)
150 #define TCR_SH0_BIT     12
151 #define TCR_SH0_NS      (0 << TCR_SH0_BIT)
152 #define TCR_SH0_OS      (2 << TCR_SH0_BIT)
153 #define TCR_SH0_IS      (3 << TCR_SH0_BIT)
154 #define TCR_TG0_BIT     14
155 #define TCR_TG0_4KB     (0 << TCR_TG0_BIT)
156 #define TCR_TG0_64KB    (1 << TCR_TG0_BIT)
157 #define TCR_TG0_16KB    (2 << TCR_TG0_BIT)
158 #define TCR_EPD1        (1 << 23)
159 #define TCR_IPS_BIT     32
160 #define TCR_IPS_4GB     (0LL << TCR_IPS_BIT)
161 
162 extern const void *__vector_table[];
163 
164 static void __attribute((used))
_cstart(void)165 _cstart(void)
166 {
167         uint64_t        sctlr_el1;
168 
169         /* Invalidate the cache */
170         __asm__("ic iallu");
171         __asm__("isb\n");
172 
173         /*
174          * Set up the TCR register to provide a 33bit VA space using
175          * 4kB pages over 4GB of PA
176          */
177         __asm__("msr    tcr_el1, %x0" ::
178                 "r" ((0x1f << TCR_T0SZ_BIT) |
179                      TCR_IRGN0_WB_WA |
180                      TCR_ORGN0_WB_WA |
181                      TCR_SH0_IS |
182                      TCR_TG0_4KB |
183                      TCR_EPD1 |
184                      TCR_IPS_4GB));
185 
186         /* Load the page table base */
187         __asm__("msr    ttbr0_el1, %x0" :: "r" (__identity_page_table));
188 
189         /*
190          * Set the memory attributions in the MAIR register:
191          *
192          * Region 0 is Normal memory
193          * Region 1 is Device memory
194          */
195         __asm__("msr    mair_el1, %x0" ::
196                 "r" ((0xffLL << 0) | (0x00LL << 8)));
197 
198         /*
199          * Enable caches, and the MMU, disable alignment requirements
200          * and write-implies-XN
201          */
202         __asm__("mrs    %x0, sctlr_el1" : "=r" (sctlr_el1));
203         sctlr_el1 |= SCTLR_ICACHE | SCTLR_C | SCTLR_MMU;
204         sctlr_el1 &= ~(SCTLR_A | SCTLR_WXN);
205         __asm__("msr    sctlr_el1, %x0" :: "r" (sctlr_el1));
206         __asm__("isb\n");
207 
208         /* Set the vector base address register */
209         __asm__("msr    vbar_el1, %x0" :: "r" (__vector_table));
210 	__start();
211 }
212 
213 void __section(".text.init.enter")
_start(void)214 _start(void)
215 {
216         /* Switch to EL1 */
217 	__asm__("msr     SPSel, #1");
218 
219 	/* Initialize stack */
220 	__asm__("adrp x1, __stack");
221 	__asm__("add  x1, x1, :lo12:__stack");
222 	__asm__("mov sp, x1");
223 #if __ARM_FP
224 	/* Enable FPU */
225 	__asm__("mov x1, #(0x3 << 20)");
226 	__asm__("msr cpacr_el1,x1");
227 #endif
228 	/* Jump into C code */
229 	__asm__("bl _cstart");
230 }
231 
232 #ifdef CRT0_SEMIHOST
233 
234 /*
235  * Trap faults, print message and exit when running under semihost
236  */
237 
238 #include <semihost.h>
239 #include <unistd.h>
240 #include <stdio.h>
241 
242 #define _REASON(r) #r
243 #define REASON(r) _REASON(r)
244 
aarch64_fault_write_reg(const char * prefix,uint64_t reg)245 static void aarch64_fault_write_reg(const char *prefix, uint64_t reg)
246 {
247     fputs(prefix, stdout);
248 
249     for (unsigned i = 0; i < 16; i++) {
250         unsigned digitval = 0xF & (reg >> (60 - 4*i));
251         char digitchr = '0' + digitval + (digitval >= 10 ? 'a'-'0'-10 : 0);
252         putchar(digitchr);
253     }
254 
255     putchar('\n');
256 }
257 
258 struct fault {
259     uint64_t    x[31];
260     uint64_t    pc;
261     uint64_t    esr;
262     uint64_t    far;
263 };
264 
265 static const char *const reasons[] = {
266     "sync\n",
267     "irq\n",
268     "fiq\n",
269     "serror\n"
270 };
271 
272 #define REASON_SYNC     0
273 #define REASON_IRQ      1
274 #define REASON_FIQ      2
275 #define REASON_SERROR   3
276 
277 static void __attribute__((used))
aarch64_fault(struct fault * f,int reason)278 aarch64_fault(struct fault *f, int reason)
279 {
280     int r;
281     fputs("AARCH64 fault: ", stdout);
282     fputs(reasons[reason], stdout);
283     char prefix[] = "\tX##:   0x";
284     for (r = 0; r <= 30; r++) {
285         prefix[2] = '0' + r / 10;    /* overwrite # with register number */
286         prefix[3] = '0' + r % 10;    /* overwrite # with register number */
287         aarch64_fault_write_reg(prefix, f->x[r]);
288     }
289     aarch64_fault_write_reg("\tPC:    0x", f->pc);
290     aarch64_fault_write_reg("\tESR:   0x", f->esr);
291     aarch64_fault_write_reg("\tFAR:   0x", f->far);
292     _exit(1);
293 }
294 
295 #define VECTOR_COMMON \
296     __asm__("sub sp, sp, #256"); \
297     __asm__("str x0, [sp, #0]"); \
298     __asm__("str x1, [sp, #8]"); \
299     __asm__("str x2, [sp, #16]"); \
300     __asm__("str x3, [sp, #24]"); \
301     __asm__("str x4, [sp, #32]"); \
302     __asm__("str x5, [sp, #40]"); \
303     __asm__("str x6, [sp, #48]"); \
304     __asm__("str x7, [sp, #56]"); \
305     __asm__("str x8, [sp, #64]"); \
306     __asm__("str x9, [sp, #72]"); \
307     __asm__("str x10, [sp, #80]"); \
308     __asm__("str x11, [sp, #88]"); \
309     __asm__("str x12, [sp, #96]"); \
310     __asm__("str x13, [sp, #104]"); \
311     __asm__("str x14, [sp, #112]"); \
312     __asm__("str x15, [sp, #120]"); \
313     __asm__("str x16, [sp, #128]"); \
314     __asm__("str x17, [sp, #136]"); \
315     __asm__("str x18, [sp, #144]"); \
316     __asm__("str x19, [sp, #152]"); \
317     __asm__("str x20, [sp, #160]"); \
318     __asm__("str x21, [sp, #168]"); \
319     __asm__("str x22, [sp, #176]"); \
320     __asm__("str x23, [sp, #184]"); \
321     __asm__("str x24, [sp, #192]"); \
322     __asm__("str x25, [sp, #200]"); \
323     __asm__("str x26, [sp, #208]"); \
324     __asm__("str x27, [sp, #216]"); \
325     __asm__("str x28, [sp, #224]"); \
326     __asm__("str x29, [sp, #232]"); \
327     __asm__("str x30, [sp, #240]"); \
328     __asm__("mrs x0, ELR_EL1\n"); \
329     __asm__("str x0, [sp, #248]"); \
330     __asm__("mrs x0, ESR_EL1\n"); \
331     __asm__("str x0, [sp, #256]"); \
332     __asm__("mrs x0, FAR_EL1\n"); \
333     __asm__("str x0, [sp, #264]"); \
334     __asm__("mov x0, sp")
335 
336 void __section(".init")
aarch64_sync_vector(void)337 aarch64_sync_vector(void)
338 {
339     VECTOR_COMMON;
340     __asm__("mov x1, #" REASON(REASON_SYNC));
341     __asm__("b  aarch64_fault");
342 }
343 
344 void __section(".init")
aarch64_irq_vector(void)345 aarch64_irq_vector(void)
346 {
347     VECTOR_COMMON;
348     __asm__("mov x1, #" REASON(REASON_IRQ));
349     __asm__("b  aarch64_fault");
350 }
351 
352 void __section(".init")
aarch64_fiq_vector(void)353 aarch64_fiq_vector(void)
354 {
355     VECTOR_COMMON;
356     __asm__("mov x1, #" REASON(REASON_FIQ));
357     __asm__("b  aarch64_fault");
358 }
359 
360 #endif /* CRT0_SEMIHOST */
361