1 /*
2 * Copyright 2023 Google LLC
3 * SPDX-License-Identifier: Apache-2.0
4 */
5 #include <stdint.h>
6 #include <stdbool.h>
7 #include <zephyr/kernel.h>
8 #include <xtensa/config/core-isa.h>
9 #include <xtensa_mmu_priv.h>
10 #include <zephyr/cache.h>
11
12 #ifdef CONFIG_USERSPACE
13 BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) &&
14 (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0);
15 #endif
16
17 #define ASID_INVALID 0
18
19 struct tlb_regs {
20 uint32_t rasid;
21 uint32_t ptevaddr;
22 uint32_t ptepin_as;
23 uint32_t ptepin_at;
24 uint32_t vecpin_as;
25 uint32_t vecpin_at;
26 };
27
compute_regs(uint32_t user_asid,uint32_t * l1_page,struct tlb_regs * regs)28 static void compute_regs(uint32_t user_asid, uint32_t *l1_page, struct tlb_regs *regs)
29 {
30 uint32_t vecbase = XTENSA_RSR("VECBASE");
31
32 __ASSERT_NO_MSG((((uint32_t)l1_page) & 0xfff) == 0);
33 __ASSERT_NO_MSG((user_asid == 0) || ((user_asid > 2) &&
34 (user_asid < XTENSA_MMU_SHARED_ASID)));
35
36 /* We don't use ring 1, ring 0 ASID must be 1 */
37 regs->rasid = (XTENSA_MMU_SHARED_ASID << 24) |
38 (user_asid << 16) | 0x000201;
39
40 /* Derive PTEVADDR from ASID so each domain gets its own PTE area */
41 regs->ptevaddr = CONFIG_XTENSA_MMU_PTEVADDR + user_asid * 0x400000;
42
43 /* The ptables code doesn't add the mapping for the l1 page itself */
44 l1_page[XTENSA_MMU_L1_POS(regs->ptevaddr)] =
45 (uint32_t)l1_page | XTENSA_MMU_PAGE_TABLE_ATTR;
46
47 regs->ptepin_at = (uint32_t)l1_page;
48 regs->ptepin_as = XTENSA_MMU_PTE_ENTRY_VADDR(regs->ptevaddr, regs->ptevaddr)
49 | XTENSA_MMU_PTE_WAY;
50
51 /* Pin mapping for refilling the vector address into the ITLB
52 * (for handling TLB miss exceptions). Note: this is NOT an
53 * instruction TLB entry for the vector code itself, it's a
54 * DATA TLB entry for the page containing the vector mapping
55 * so the refill on instruction fetch can find it. The
56 * hardware doesn't have a 4k pinnable instruction TLB way,
57 * frustratingly.
58 */
59 uint32_t vb_pte = l1_page[XTENSA_MMU_L1_POS(vecbase)];
60
61 regs->vecpin_at = vb_pte;
62 regs->vecpin_as = XTENSA_MMU_PTE_ENTRY_VADDR(regs->ptevaddr, vecbase)
63 | XTENSA_MMU_VECBASE_WAY;
64 }
65
66 /* Switch to a new page table. There are four items we have to set in
67 * the hardware: the PTE virtual address, the ring/ASID mapping
68 * register, and two pinned entries in the data TLB handling refills
69 * for the page tables and the vector handlers.
70 *
71 * These can be done in any order, provided that we ensure that no
72 * memory access which cause a TLB miss can happen during the process.
73 * This means that we must work entirely within registers in a single
74 * asm block. Also note that instruction fetches are memory accesses
75 * too, which means we cannot cross a page boundary which might reach
76 * a new page not in the TLB (a single jump to an aligned address that
77 * holds our five instructions is sufficient to guarantee that: I
78 * couldn't think of a way to do the alignment statically that also
79 * interoperated well with inline assembly).
80 */
xtensa_set_paging(uint32_t user_asid,uint32_t * l1_page)81 void xtensa_set_paging(uint32_t user_asid, uint32_t *l1_page)
82 {
83 /* Optimization note: the registers computed here are pure
84 * functions of the two arguments. With a minor API tweak,
85 * they could be cached in e.g. a thread struct instead of
86 * being recomputed. This is called on context switch paths
87 * and is performance-sensitive.
88 */
89 struct tlb_regs regs;
90
91 compute_regs(user_asid, l1_page, ®s);
92
93 __asm__ volatile("j 1f\n"
94 ".align 16\n" /* enough for 5 insns */
95 "1:\n"
96 "wsr %0, PTEVADDR\n"
97 "wsr %1, RASID\n"
98 "wdtlb %2, %3\n"
99 "wdtlb %4, %5\n"
100 "isync"
101 :: "r"(regs.ptevaddr), "r"(regs.rasid),
102 "r"(regs.ptepin_at), "r"(regs.ptepin_as),
103 "r"(regs.vecpin_at), "r"(regs.vecpin_as));
104 }
105
106 /* This is effectively the same algorithm from xtensa_set_paging(),
107 * but it also disables the hardware-initialized 512M TLB entries in
108 * way 6 (because the hardware disallows duplicate TLB mappings). For
109 * instruction fetches this produces a critical ordering constraint:
110 * the instruction following the invalidation of ITLB entry mapping
111 * the current PC will by definition create a refill condition, which
112 * will (because the data TLB was invalidated) cause a refill
113 * exception. Therefore this step must be the very last one, once
114 * everything else is setup up and working, which includes the
115 * invalidation of the virtual PTEVADDR area so that the resulting
116 * refill can complete.
117 *
118 * Note that we can't guarantee that the compiler won't insert a data
119 * fetch from our stack memory after exit from the asm block (while it
120 * might be double-mapped), so we invalidate that data TLB inside the
121 * asm for correctness. The other 13 entries get invalidated in a C
122 * loop at the end.
123 */
xtensa_init_paging(uint32_t * l1_page)124 void xtensa_init_paging(uint32_t *l1_page)
125 {
126 extern char z_xt_init_pc; /* defined in asm below */
127 struct tlb_regs regs;
128 unsigned int initial_rasid;
129
130 /* The initial rasid after hardware initialization is 0x04030201.
131 * 1 is hardwired to ring 0, other slots must be different
132 * from each other and must not be 0.
133 *
134 * For our initial implementation we just set the 4th slot (ring 3),
135 * to use the ASID value used for memory that is shared with all threads.
136 */
137 initial_rasid = 0xff030201;
138
139 #if CONFIG_MP_MAX_NUM_CPUS > 1
140 /* The incoherent cache can get into terrible trouble if it's
141 * allowed to cache PTEs differently across CPUs. We require
142 * that all page tables supplied by the OS have exclusively
143 * uncached mappings for page data, but can't do anything
144 * about earlier code/firmware. Dump the cache to be safe.
145 */
146 sys_cache_data_flush_and_invd_all();
147 #endif
148
149 compute_regs(ASID_INVALID, l1_page, ®s);
150
151 uint32_t idtlb_pte = (regs.ptevaddr & 0xe0000000) | XCHAL_SPANNING_WAY;
152 uint32_t idtlb_stk = (((uint32_t)®s) & ~0xfff) | XCHAL_SPANNING_WAY;
153 uint32_t iitlb_pc = (((uint32_t)&z_xt_init_pc) & ~0xfff) | XCHAL_SPANNING_WAY;
154
155 /* Note: the jump is mostly pedantry, as it's almost
156 * inconceivable that a hardware memory region at boot is
157 * going to cross a 512M page boundary. But we need the entry
158 * symbol to get the address above, so the jump is here for
159 * symmetry with the set_paging() code.
160 */
161 __asm__ volatile("j z_xt_init_pc\n"
162 ".align 32\n" /* room for 10 insns */
163 ".globl z_xt_init_pc\n"
164 "z_xt_init_pc:\n"
165 "wsr %0, PTEVADDR\n"
166 "wsr %1, RASID\n"
167 "wdtlb %2, %3\n"
168 "wdtlb %4, %5\n"
169 "idtlb %6\n" /* invalidate pte */
170 "idtlb %7\n" /* invalidate stk */
171 "isync\n"
172 "iitlb %8\n" /* invalidate pc */
173 "isync\n" /* <--- traps a ITLB miss */
174 :: "r"(regs.ptevaddr), "r"(initial_rasid),
175 "r"(regs.ptepin_at), "r"(regs.ptepin_as),
176 "r"(regs.vecpin_at), "r"(regs.vecpin_as),
177 "r"(idtlb_pte), "r"(idtlb_stk), "r"(iitlb_pc));
178
179 /* Invalidate the remaining (unused by this function)
180 * initialization entries. Now we're flying free with our own
181 * page table.
182 */
183 for (uint32_t i = 0; i < 8; i++) {
184 uint32_t ixtlb = (i * 0x20000000) | XCHAL_SPANNING_WAY;
185
186 if (ixtlb != iitlb_pc) {
187 __asm__ volatile("iitlb %0" :: "r"(ixtlb));
188 }
189 if (ixtlb != idtlb_stk && ixtlb != idtlb_pte) {
190 __asm__ volatile("idtlb %0" :: "r"(ixtlb));
191 }
192 }
193 __asm__ volatile("isync");
194 }
195