1 /*
2 * Copyright 2019 Broadcom
3 * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
4 *
5 * Copyright (c) 2021 BayLibre, SAS
6 *
7 * SPDX-License-Identifier: Apache-2.0
8 */
9
10 #include <zephyr/cache.h>
11 #include <zephyr/device.h>
12 #include <zephyr/init.h>
13 #include <zephyr/kernel.h>
14 #include <kernel_arch_func.h>
15 #include <kernel_arch_interface.h>
16 #include <kernel_internal.h>
17 #include <zephyr/logging/log.h>
18 #include <zephyr/arch/arm64/cpu.h>
19 #include <zephyr/arch/arm64/lib_helpers.h>
20 #include <zephyr/arch/arm64/mm.h>
21 #include <zephyr/linker/linker-defs.h>
22 #include <zephyr/spinlock.h>
23 #include <zephyr/sys/util.h>
24
25 #include "mmu.h"
26
27 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
28
29 static uint64_t xlat_tables[CONFIG_MAX_XLAT_TABLES * Ln_XLAT_NUM_ENTRIES]
30 __aligned(Ln_XLAT_NUM_ENTRIES * sizeof(uint64_t));
31 static uint16_t xlat_use_count[CONFIG_MAX_XLAT_TABLES];
32 static struct k_spinlock xlat_lock;
33
34 /* Returns a reference to a free table */
new_table(void)35 static uint64_t *new_table(void)
36 {
37 uint64_t *table;
38 unsigned int i;
39
40 /* Look for a free table. */
41 for (i = 0U; i < CONFIG_MAX_XLAT_TABLES; i++) {
42 if (xlat_use_count[i] == 0U) {
43 table = &xlat_tables[i * Ln_XLAT_NUM_ENTRIES];
44 xlat_use_count[i] = 1U;
45 MMU_DEBUG("allocating table [%d]%p\n", i, table);
46 return table;
47 }
48 }
49
50 LOG_ERR("CONFIG_MAX_XLAT_TABLES, too small");
51 return NULL;
52 }
53
table_index(uint64_t * pte)54 static inline unsigned int table_index(uint64_t *pte)
55 {
56 unsigned int i = (pte - xlat_tables) / Ln_XLAT_NUM_ENTRIES;
57
58 __ASSERT(i < CONFIG_MAX_XLAT_TABLES, "table %p out of range", pte);
59 return i;
60 }
61
62 /* Makes a table free for reuse. */
free_table(uint64_t * table)63 static void free_table(uint64_t *table)
64 {
65 unsigned int i = table_index(table);
66
67 MMU_DEBUG("freeing table [%d]%p\n", i, table);
68 __ASSERT(xlat_use_count[i] == 1U, "table still in use");
69 xlat_use_count[i] = 0U;
70 }
71
72 /* Adjusts usage count and returns current count. */
table_usage(uint64_t * table,int adjustment)73 static int table_usage(uint64_t *table, int adjustment)
74 {
75 unsigned int i = table_index(table);
76
77 xlat_use_count[i] += adjustment;
78 __ASSERT(xlat_use_count[i] > 0, "usage count underflow");
79 return xlat_use_count[i];
80 }
81
is_table_unused(uint64_t * table)82 static inline bool is_table_unused(uint64_t *table)
83 {
84 return table_usage(table, 0) == 1;
85 }
86
is_free_desc(uint64_t desc)87 static inline bool is_free_desc(uint64_t desc)
88 {
89 return (desc & PTE_DESC_TYPE_MASK) == PTE_INVALID_DESC;
90 }
91
is_table_desc(uint64_t desc,unsigned int level)92 static inline bool is_table_desc(uint64_t desc, unsigned int level)
93 {
94 return level != XLAT_LAST_LEVEL &&
95 (desc & PTE_DESC_TYPE_MASK) == PTE_TABLE_DESC;
96 }
97
is_block_desc(uint64_t desc)98 static inline bool is_block_desc(uint64_t desc)
99 {
100 return (desc & PTE_DESC_TYPE_MASK) == PTE_BLOCK_DESC;
101 }
102
pte_desc_table(uint64_t desc)103 static inline uint64_t *pte_desc_table(uint64_t desc)
104 {
105 uint64_t address = desc & GENMASK(47, PAGE_SIZE_SHIFT);
106
107 return (uint64_t *)address;
108 }
109
is_desc_block_aligned(uint64_t desc,unsigned int level_size)110 static inline bool is_desc_block_aligned(uint64_t desc, unsigned int level_size)
111 {
112 uint64_t mask = GENMASK(47, PAGE_SIZE_SHIFT);
113 bool aligned = !((desc & mask) & (level_size - 1));
114
115 if (!aligned) {
116 MMU_DEBUG("misaligned desc 0x%016llx for block size 0x%x\n",
117 desc, level_size);
118 }
119
120 return aligned;
121 }
122
is_desc_superset(uint64_t desc1,uint64_t desc2,unsigned int level)123 static inline bool is_desc_superset(uint64_t desc1, uint64_t desc2,
124 unsigned int level)
125 {
126 uint64_t mask = DESC_ATTRS_MASK | GENMASK(47, LEVEL_TO_VA_SIZE_SHIFT(level));
127
128 return (desc1 & mask) == (desc2 & mask);
129 }
130
131 #if DUMP_PTE
debug_show_pte(uint64_t * pte,unsigned int level)132 static void debug_show_pte(uint64_t *pte, unsigned int level)
133 {
134 MMU_DEBUG("%.*s", level * 2U, ". . . ");
135 MMU_DEBUG("[%d]%p: ", table_index(pte), pte);
136
137 if (is_free_desc(*pte)) {
138 MMU_DEBUG("---\n");
139 return;
140 }
141
142 if (is_table_desc(*pte, level)) {
143 uint64_t *table = pte_desc_table(*pte);
144
145 MMU_DEBUG("[Table] [%d]%p\n", table_index(table), table);
146 return;
147 }
148
149 if (is_block_desc(*pte)) {
150 MMU_DEBUG("[Block] ");
151 } else {
152 MMU_DEBUG("[Page] ");
153 }
154
155 uint8_t mem_type = (*pte >> 2) & MT_TYPE_MASK;
156
157 MMU_DEBUG((mem_type == MT_NORMAL) ? "MEM" :
158 ((mem_type == MT_NORMAL_NC) ? "NC" : "DEV"));
159 MMU_DEBUG((*pte & PTE_BLOCK_DESC_AP_RO) ? "-RO" : "-RW");
160 MMU_DEBUG((*pte & PTE_BLOCK_DESC_NS) ? "-NS" : "-S");
161 MMU_DEBUG((*pte & PTE_BLOCK_DESC_AP_ELx) ? "-ELx" : "-ELh");
162 MMU_DEBUG((*pte & PTE_BLOCK_DESC_PXN) ? "-PXN" : "-PX");
163 MMU_DEBUG((*pte & PTE_BLOCK_DESC_UXN) ? "-UXN" : "-UX");
164 MMU_DEBUG("\n");
165 }
166 #else
debug_show_pte(uint64_t * pte,unsigned int level)167 static inline void debug_show_pte(uint64_t *pte, unsigned int level) { }
168 #endif
169
set_pte_table_desc(uint64_t * pte,uint64_t * table,unsigned int level)170 static void set_pte_table_desc(uint64_t *pte, uint64_t *table, unsigned int level)
171 {
172 /* Point pte to new table */
173 *pte = PTE_TABLE_DESC | (uint64_t)table;
174 debug_show_pte(pte, level);
175 }
176
set_pte_block_desc(uint64_t * pte,uint64_t desc,unsigned int level)177 static void set_pte_block_desc(uint64_t *pte, uint64_t desc, unsigned int level)
178 {
179 if (desc) {
180 desc |= (level == XLAT_LAST_LEVEL) ? PTE_PAGE_DESC : PTE_BLOCK_DESC;
181 }
182 *pte = desc;
183 debug_show_pte(pte, level);
184 }
185
expand_to_table(uint64_t * pte,unsigned int level)186 static uint64_t *expand_to_table(uint64_t *pte, unsigned int level)
187 {
188 uint64_t *table;
189
190 __ASSERT(level < XLAT_LAST_LEVEL, "can't expand last level");
191
192 table = new_table();
193 if (!table) {
194 return NULL;
195 }
196
197 if (!is_free_desc(*pte)) {
198 /*
199 * If entry at current level was already populated
200 * then we need to reflect that in the new table.
201 */
202 uint64_t desc = *pte;
203 unsigned int i, stride_shift;
204
205 MMU_DEBUG("expanding PTE 0x%016llx into table [%d]%p\n",
206 desc, table_index(table), table);
207 __ASSERT(is_block_desc(desc), "");
208
209 if (level + 1 == XLAT_LAST_LEVEL) {
210 desc |= PTE_PAGE_DESC;
211 }
212
213 stride_shift = LEVEL_TO_VA_SIZE_SHIFT(level + 1);
214 for (i = 0U; i < Ln_XLAT_NUM_ENTRIES; i++) {
215 table[i] = desc | (i << stride_shift);
216 }
217 table_usage(table, Ln_XLAT_NUM_ENTRIES);
218 } else {
219 /*
220 * Adjust usage count for parent table's entry
221 * that will no longer be free.
222 */
223 table_usage(pte, 1);
224 }
225
226 /* Link the new table in place of the pte it replaces */
227 set_pte_table_desc(pte, table, level);
228 table_usage(table, 1);
229
230 return table;
231 }
232
set_mapping(struct arm_mmu_ptables * ptables,uintptr_t virt,size_t size,uint64_t desc,bool may_overwrite)233 static int set_mapping(struct arm_mmu_ptables *ptables,
234 uintptr_t virt, size_t size,
235 uint64_t desc, bool may_overwrite)
236 {
237 uint64_t *pte, *ptes[XLAT_LAST_LEVEL + 1];
238 uint64_t level_size;
239 uint64_t *table = ptables->base_xlat_table;
240 unsigned int level = BASE_XLAT_LEVEL;
241 int ret = 0;
242
243 while (size) {
244 __ASSERT(level <= XLAT_LAST_LEVEL,
245 "max translation table level exceeded\n");
246
247 /* Locate PTE for given virtual address and page table level */
248 pte = &table[XLAT_TABLE_VA_IDX(virt, level)];
249 ptes[level] = pte;
250
251 if (is_table_desc(*pte, level)) {
252 /* Move to the next translation table level */
253 level++;
254 table = pte_desc_table(*pte);
255 continue;
256 }
257
258 if (!may_overwrite && !is_free_desc(*pte)) {
259 /* the entry is already allocated */
260 LOG_ERR("entry already in use: "
261 "level %d pte %p *pte 0x%016llx",
262 level, pte, *pte);
263 ret = -EBUSY;
264 break;
265 }
266
267 level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
268
269 if (is_desc_superset(*pte, desc, level)) {
270 /* This block already covers our range */
271 level_size -= (virt & (level_size - 1));
272 if (level_size > size) {
273 level_size = size;
274 }
275 goto move_on;
276 }
277
278 if ((size < level_size) || (virt & (level_size - 1)) ||
279 !is_desc_block_aligned(desc, level_size)) {
280 /* Range doesn't fit, create subtable */
281 table = expand_to_table(pte, level);
282 if (!table) {
283 ret = -ENOMEM;
284 break;
285 }
286 level++;
287 continue;
288 }
289
290 /* Adjust usage count for corresponding table */
291 if (is_free_desc(*pte)) {
292 table_usage(pte, 1);
293 }
294 if (!desc) {
295 table_usage(pte, -1);
296 }
297 /* Create (or erase) block/page descriptor */
298 set_pte_block_desc(pte, desc, level);
299
300 /* recursively free unused tables if any */
301 while (level != BASE_XLAT_LEVEL &&
302 is_table_unused(pte)) {
303 free_table(pte);
304 pte = ptes[--level];
305 set_pte_block_desc(pte, 0, level);
306 table_usage(pte, -1);
307 }
308
309 move_on:
310 virt += level_size;
311 desc += desc ? level_size : 0;
312 size -= level_size;
313
314 /* Range is mapped, start again for next range */
315 table = ptables->base_xlat_table;
316 level = BASE_XLAT_LEVEL;
317 }
318
319 return ret;
320 }
321
322 #ifdef CONFIG_USERSPACE
323
dup_table(uint64_t * src_table,unsigned int level)324 static uint64_t *dup_table(uint64_t *src_table, unsigned int level)
325 {
326 uint64_t *dst_table = new_table();
327 int i;
328
329 if (!dst_table) {
330 return NULL;
331 }
332
333 MMU_DEBUG("dup (level %d) [%d]%p to [%d]%p\n", level,
334 table_index(src_table), src_table,
335 table_index(dst_table), dst_table);
336
337 for (i = 0; i < Ln_XLAT_NUM_ENTRIES; i++) {
338 /*
339 * After the table duplication, each table can be independently
340 * updated. Thus, entries may become non-global.
341 * To keep the invariants very simple, we thus force the non-global
342 * bit on duplication. Moreover, there is no process to revert this
343 * (e.g. in `globalize_table`). Could be improved in future work.
344 */
345 if (!is_free_desc(src_table[i]) && !is_table_desc(src_table[i], level)) {
346 src_table[i] |= PTE_BLOCK_DESC_NG;
347 }
348
349 dst_table[i] = src_table[i];
350 if (is_table_desc(src_table[i], level)) {
351 table_usage(pte_desc_table(src_table[i]), 1);
352 }
353 if (!is_free_desc(dst_table[i])) {
354 table_usage(dst_table, 1);
355 }
356 }
357
358 return dst_table;
359 }
360
privatize_table(uint64_t * dst_table,uint64_t * src_table,uintptr_t virt,size_t size,unsigned int level)361 static int privatize_table(uint64_t *dst_table, uint64_t *src_table,
362 uintptr_t virt, size_t size, unsigned int level)
363 {
364 size_t step, level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
365 unsigned int i;
366 int ret;
367
368 for ( ; size; virt += step, size -= step) {
369 step = level_size - (virt & (level_size - 1));
370 if (step > size) {
371 step = size;
372 }
373 i = XLAT_TABLE_VA_IDX(virt, level);
374
375 if (!is_table_desc(dst_table[i], level) ||
376 !is_table_desc(src_table[i], level)) {
377 /* this entry is already private */
378 continue;
379 }
380
381 uint64_t *dst_subtable = pte_desc_table(dst_table[i]);
382 uint64_t *src_subtable = pte_desc_table(src_table[i]);
383
384 if (dst_subtable == src_subtable) {
385 /* need to make a private copy of this table */
386 dst_subtable = dup_table(src_subtable, level + 1);
387 if (!dst_subtable) {
388 return -ENOMEM;
389 }
390 set_pte_table_desc(&dst_table[i], dst_subtable, level);
391 table_usage(dst_subtable, 1);
392 table_usage(src_subtable, -1);
393 }
394
395 ret = privatize_table(dst_subtable, src_subtable,
396 virt, step, level + 1);
397 if (ret) {
398 return ret;
399 }
400 }
401
402 return 0;
403 }
404
405 /*
406 * Make the given virtual address range private in dst_pt with regards to
407 * src_pt. By "private" this means that corresponding page tables in dst_pt
408 * will be duplicated so not to share the same table(s) with src_pt.
409 * If corresponding page tables in dst_pt are already distinct from src_pt
410 * then nothing is done. This allows for subsequent mapping changes in that
411 * range to affect only dst_pt.
412 */
privatize_page_range(struct arm_mmu_ptables * dst_pt,struct arm_mmu_ptables * src_pt,uintptr_t virt_start,size_t size,const char * name)413 static int privatize_page_range(struct arm_mmu_ptables *dst_pt,
414 struct arm_mmu_ptables *src_pt,
415 uintptr_t virt_start, size_t size,
416 const char *name)
417 {
418 k_spinlock_key_t key;
419 int ret;
420
421 MMU_DEBUG("privatize [%s]: virt %lx size %lx\n",
422 name, virt_start, size);
423
424 key = k_spin_lock(&xlat_lock);
425
426 ret = privatize_table(dst_pt->base_xlat_table, src_pt->base_xlat_table,
427 virt_start, size, BASE_XLAT_LEVEL);
428
429 k_spin_unlock(&xlat_lock, key);
430 return ret;
431 }
432
discard_table(uint64_t * table,unsigned int level)433 static void discard_table(uint64_t *table, unsigned int level)
434 {
435 unsigned int i;
436
437 for (i = 0U; i < Ln_XLAT_NUM_ENTRIES; i++) {
438 if (is_table_desc(table[i], level)) {
439 table_usage(pte_desc_table(table[i]), -1);
440 discard_table(pte_desc_table(table[i]), level + 1);
441 }
442 if (!is_free_desc(table[i])) {
443 table[i] = 0U;
444 table_usage(table, -1);
445 }
446 }
447 free_table(table);
448 }
449
globalize_table(uint64_t * dst_table,uint64_t * src_table,uintptr_t virt,size_t size,unsigned int level)450 static int globalize_table(uint64_t *dst_table, uint64_t *src_table,
451 uintptr_t virt, size_t size, unsigned int level)
452 {
453 size_t step, level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
454 unsigned int i;
455 int ret;
456
457 for ( ; size; virt += step, size -= step) {
458 step = level_size - (virt & (level_size - 1));
459 if (step > size) {
460 step = size;
461 }
462 i = XLAT_TABLE_VA_IDX(virt, level);
463
464 if (dst_table[i] == src_table[i]) {
465 /* already identical to global table */
466 continue;
467 }
468
469 if (step != level_size) {
470 /* boundary falls in the middle of this pte */
471 __ASSERT(is_table_desc(src_table[i], level),
472 "can't have partial block pte here");
473 if (!is_table_desc(dst_table[i], level)) {
474 /* we need more fine grained boundaries */
475 if (!expand_to_table(&dst_table[i], level)) {
476 return -ENOMEM;
477 }
478 }
479 ret = globalize_table(pte_desc_table(dst_table[i]),
480 pte_desc_table(src_table[i]),
481 virt, step, level + 1);
482 if (ret) {
483 return ret;
484 }
485 continue;
486 }
487
488 /* we discard current pte and replace with global one */
489
490 uint64_t *old_table = is_table_desc(dst_table[i], level) ?
491 pte_desc_table(dst_table[i]) : NULL;
492
493 if (is_free_desc(dst_table[i])) {
494 table_usage(dst_table, 1);
495 }
496 if (is_free_desc(src_table[i])) {
497 table_usage(dst_table, -1);
498 }
499 if (is_table_desc(src_table[i], level)) {
500 table_usage(pte_desc_table(src_table[i]), 1);
501 }
502 dst_table[i] = src_table[i];
503 debug_show_pte(&dst_table[i], level);
504
505 if (old_table) {
506 /* we can discard the whole branch */
507 table_usage(old_table, -1);
508 discard_table(old_table, level + 1);
509 }
510 }
511
512 return 0;
513 }
514
515 /*
516 * Globalize the given virtual address range in dst_pt from src_pt. We make
517 * it global by sharing as much page table content from src_pt as possible,
518 * including page tables themselves, and corresponding private tables in
519 * dst_pt are then discarded. If page tables in the given range are already
520 * shared then nothing is done. If page table sharing is not possible then
521 * page table entries in dst_pt are synchronized with those from src_pt.
522 */
globalize_page_range(struct arm_mmu_ptables * dst_pt,struct arm_mmu_ptables * src_pt,uintptr_t virt_start,size_t size,const char * name)523 static int globalize_page_range(struct arm_mmu_ptables *dst_pt,
524 struct arm_mmu_ptables *src_pt,
525 uintptr_t virt_start, size_t size,
526 const char *name)
527 {
528 k_spinlock_key_t key;
529 int ret;
530
531 MMU_DEBUG("globalize [%s]: virt %lx size %lx\n",
532 name, virt_start, size);
533
534 key = k_spin_lock(&xlat_lock);
535
536 ret = globalize_table(dst_pt->base_xlat_table, src_pt->base_xlat_table,
537 virt_start, size, BASE_XLAT_LEVEL);
538
539 k_spin_unlock(&xlat_lock, key);
540 return ret;
541 }
542
543 #endif /* CONFIG_USERSPACE */
544
get_region_desc(uint32_t attrs)545 static uint64_t get_region_desc(uint32_t attrs)
546 {
547 unsigned int mem_type;
548 uint64_t desc = 0U;
549
550 /* NS bit for security memory access from secure state */
551 desc |= (attrs & MT_NS) ? PTE_BLOCK_DESC_NS : 0;
552
553 /*
554 * AP bits for EL0 / ELh Data access permission
555 *
556 * AP[2:1] ELh EL0
557 * +--------------------+
558 * 00 RW NA
559 * 01 RW RW
560 * 10 RO NA
561 * 11 RO RO
562 */
563
564 /* AP bits for Data access permission */
565 desc |= (attrs & MT_RW) ? PTE_BLOCK_DESC_AP_RW : PTE_BLOCK_DESC_AP_RO;
566
567 /* Mirror permissions to EL0 */
568 desc |= (attrs & MT_RW_AP_ELx) ?
569 PTE_BLOCK_DESC_AP_ELx : PTE_BLOCK_DESC_AP_EL_HIGHER;
570
571 /* the access flag */
572 desc |= PTE_BLOCK_DESC_AF;
573
574 /* memory attribute index field */
575 mem_type = MT_TYPE(attrs);
576 desc |= PTE_BLOCK_DESC_MEMTYPE(mem_type);
577
578 switch (mem_type) {
579 case MT_DEVICE_nGnRnE:
580 case MT_DEVICE_nGnRE:
581 case MT_DEVICE_GRE:
582 /* Access to Device memory and non-cacheable memory are coherent
583 * for all observers in the system and are treated as
584 * Outer shareable, so, for these 2 types of memory,
585 * it is not strictly needed to set shareability field
586 */
587 desc |= PTE_BLOCK_DESC_OUTER_SHARE;
588 /* Map device memory as execute-never */
589 desc |= PTE_BLOCK_DESC_PXN;
590 desc |= PTE_BLOCK_DESC_UXN;
591 break;
592 case MT_NORMAL_NC:
593 case MT_NORMAL:
594 /* Make Normal RW memory as execute never */
595 if ((attrs & MT_RW) || (attrs & MT_P_EXECUTE_NEVER))
596 desc |= PTE_BLOCK_DESC_PXN;
597
598 if (((attrs & MT_RW) && (attrs & MT_RW_AP_ELx)) ||
599 (attrs & MT_U_EXECUTE_NEVER))
600 desc |= PTE_BLOCK_DESC_UXN;
601
602 if (mem_type == MT_NORMAL)
603 desc |= PTE_BLOCK_DESC_INNER_SHARE;
604 else
605 desc |= PTE_BLOCK_DESC_OUTER_SHARE;
606 }
607
608 /* non-Global bit */
609 if (attrs & MT_NG) {
610 desc |= PTE_BLOCK_DESC_NG;
611 }
612
613 return desc;
614 }
615
__add_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t phys,uintptr_t virt,size_t size,uint32_t attrs)616 static int __add_map(struct arm_mmu_ptables *ptables, const char *name,
617 uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs)
618 {
619 uint64_t desc = get_region_desc(attrs);
620 bool may_overwrite = !(attrs & MT_NO_OVERWRITE);
621
622 MMU_DEBUG("mmap [%s]: virt %lx phys %lx size %lx attr %llx %s overwrite\n",
623 name, virt, phys, size, desc,
624 may_overwrite ? "may" : "no");
625 __ASSERT(((virt | phys | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0,
626 "address/size are not page aligned\n");
627 desc |= phys;
628 return set_mapping(ptables, virt, size, desc, may_overwrite);
629 }
630
add_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t phys,uintptr_t virt,size_t size,uint32_t attrs)631 static int add_map(struct arm_mmu_ptables *ptables, const char *name,
632 uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs)
633 {
634 k_spinlock_key_t key;
635 int ret;
636
637 key = k_spin_lock(&xlat_lock);
638 ret = __add_map(ptables, name, phys, virt, size, attrs);
639 k_spin_unlock(&xlat_lock, key);
640 return ret;
641 }
642
remove_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t virt,size_t size)643 static int remove_map(struct arm_mmu_ptables *ptables, const char *name,
644 uintptr_t virt, size_t size)
645 {
646 k_spinlock_key_t key;
647 int ret;
648
649 MMU_DEBUG("unmmap [%s]: virt %lx size %lx\n", name, virt, size);
650 __ASSERT(((virt | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0,
651 "address/size are not page aligned\n");
652
653 key = k_spin_lock(&xlat_lock);
654 ret = set_mapping(ptables, virt, size, 0, true);
655 k_spin_unlock(&xlat_lock, key);
656 return ret;
657 }
658
invalidate_tlb_all(void)659 static void invalidate_tlb_all(void)
660 {
661 __asm__ volatile (
662 "dsb ishst; tlbi vmalle1; dsb ish; isb"
663 : : : "memory");
664 }
665
666 /* zephyr execution regions with appropriate attributes */
667
668 struct arm_mmu_flat_range {
669 char *name;
670 void *start;
671 void *end;
672 uint32_t attrs;
673 };
674
675 static const struct arm_mmu_flat_range mmu_zephyr_ranges[] = {
676
677 /* Mark the zephyr execution regions (data, bss, noinit, etc.)
678 * cacheable, read-write
679 * Note: read-write region is marked execute-never internally
680 */
681 { .name = "zephyr_data",
682 .start = _image_ram_start,
683 .end = _image_ram_end,
684 .attrs = MT_NORMAL | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE },
685
686 /* Mark text segment cacheable,read only and executable */
687 { .name = "zephyr_code",
688 .start = __text_region_start,
689 .end = __text_region_end,
690 .attrs = MT_NORMAL | MT_P_RX_U_RX | MT_DEFAULT_SECURE_STATE },
691
692 /* Mark rodata segment cacheable, read only and execute-never */
693 { .name = "zephyr_rodata",
694 .start = __rodata_region_start,
695 .end = __rodata_region_end,
696 .attrs = MT_NORMAL | MT_P_RO_U_RO | MT_DEFAULT_SECURE_STATE },
697
698 #ifdef CONFIG_NOCACHE_MEMORY
699 /* Mark nocache segment noncachable, read-write and execute-never */
700 { .name = "nocache_data",
701 .start = _nocache_ram_start,
702 .end = _nocache_ram_end,
703 .attrs = MT_NORMAL_NC | MT_P_RW_U_RW | MT_DEFAULT_SECURE_STATE },
704 #endif
705 };
706
add_arm_mmu_flat_range(struct arm_mmu_ptables * ptables,const struct arm_mmu_flat_range * range,uint32_t extra_flags)707 static inline void add_arm_mmu_flat_range(struct arm_mmu_ptables *ptables,
708 const struct arm_mmu_flat_range *range,
709 uint32_t extra_flags)
710 {
711 uintptr_t address = (uintptr_t)range->start;
712 size_t size = (uintptr_t)range->end - address;
713
714 if (size) {
715 /* MMU not yet active: must use unlocked version */
716 __add_map(ptables, range->name, address, address,
717 size, range->attrs | extra_flags);
718 }
719 }
720
add_arm_mmu_region(struct arm_mmu_ptables * ptables,const struct arm_mmu_region * region,uint32_t extra_flags)721 static inline void add_arm_mmu_region(struct arm_mmu_ptables *ptables,
722 const struct arm_mmu_region *region,
723 uint32_t extra_flags)
724 {
725 if (region->size || region->attrs) {
726 /* MMU not yet active: must use unlocked version */
727 __add_map(ptables, region->name, region->base_pa, region->base_va,
728 region->size, region->attrs | extra_flags);
729 }
730 }
731
inv_dcache_after_map_helper(void * virt,size_t size,uint32_t attrs)732 static inline void inv_dcache_after_map_helper(void *virt, size_t size, uint32_t attrs)
733 {
734 if (MT_TYPE(attrs) == MT_NORMAL || MT_TYPE(attrs) == MT_NORMAL_WT) {
735 sys_cache_data_invd_range(virt, size);
736 }
737 }
738
setup_page_tables(struct arm_mmu_ptables * ptables)739 static void setup_page_tables(struct arm_mmu_ptables *ptables)
740 {
741 unsigned int index;
742 const struct arm_mmu_flat_range *range;
743 const struct arm_mmu_region *region;
744 uintptr_t max_va = 0, max_pa = 0;
745
746 MMU_DEBUG("xlat tables:\n");
747 for (index = 0U; index < CONFIG_MAX_XLAT_TABLES; index++)
748 MMU_DEBUG("%d: %p\n", index, xlat_tables + index * Ln_XLAT_NUM_ENTRIES);
749
750 for (index = 0U; index < mmu_config.num_regions; index++) {
751 region = &mmu_config.mmu_regions[index];
752 max_va = MAX(max_va, region->base_va + region->size);
753 max_pa = MAX(max_pa, region->base_pa + region->size);
754 }
755
756 __ASSERT(max_va <= (1ULL << CONFIG_ARM64_VA_BITS),
757 "Maximum VA not supported\n");
758 __ASSERT(max_pa <= (1ULL << CONFIG_ARM64_PA_BITS),
759 "Maximum PA not supported\n");
760
761 /* setup translation table for zephyr execution regions */
762 for (index = 0U; index < ARRAY_SIZE(mmu_zephyr_ranges); index++) {
763 range = &mmu_zephyr_ranges[index];
764 add_arm_mmu_flat_range(ptables, range, 0);
765 }
766
767 /*
768 * Create translation tables for user provided platform regions.
769 * Those must not conflict with our default mapping.
770 */
771 for (index = 0U; index < mmu_config.num_regions; index++) {
772 region = &mmu_config.mmu_regions[index];
773 add_arm_mmu_region(ptables, region, MT_NO_OVERWRITE);
774 }
775
776 invalidate_tlb_all();
777
778 for (index = 0U; index < ARRAY_SIZE(mmu_zephyr_ranges); index++) {
779 size_t size;
780
781 range = &mmu_zephyr_ranges[index];
782 size = POINTER_TO_UINT(range->end) - POINTER_TO_UINT(range->start);
783 inv_dcache_after_map_helper(range->start, size, range->attrs);
784 }
785
786 for (index = 0U; index < mmu_config.num_regions; index++) {
787 region = &mmu_config.mmu_regions[index];
788 inv_dcache_after_map_helper(UINT_TO_POINTER(region->base_va), region->size,
789 region->attrs);
790 }
791 }
792
793 /* Translation table control register settings */
get_tcr(int el)794 static uint64_t get_tcr(int el)
795 {
796 uint64_t tcr;
797 uint64_t va_bits = CONFIG_ARM64_VA_BITS;
798 uint64_t tcr_ps_bits;
799
800 tcr_ps_bits = TCR_PS_BITS;
801
802 if (el == 1) {
803 tcr = (tcr_ps_bits << TCR_EL1_IPS_SHIFT);
804 /*
805 * TCR_EL1.EPD1: Disable translation table walk for addresses
806 * that are translated using TTBR1_EL1.
807 */
808 tcr |= TCR_EPD1_DISABLE;
809 } else {
810 tcr = (tcr_ps_bits << TCR_EL3_PS_SHIFT);
811 }
812
813 tcr |= TCR_T0SZ(va_bits);
814
815 /*
816 * Translation table walk is cacheable, inner/outer WBWA and
817 * inner shareable. Due to Cortex-A57 erratum #822227 we must
818 * set TG1[1] = 4KB.
819 */
820 tcr |= TCR_TG1_4K | TCR_TG0_4K | TCR_SHARED_INNER |
821 TCR_ORGN_WBWA | TCR_IRGN_WBWA;
822
823 return tcr;
824 }
825
enable_mmu_el1(struct arm_mmu_ptables * ptables,unsigned int flags)826 static void enable_mmu_el1(struct arm_mmu_ptables *ptables, unsigned int flags)
827 {
828 ARG_UNUSED(flags);
829 uint64_t val;
830
831 /* Set MAIR, TCR and TBBR registers */
832 write_mair_el1(MEMORY_ATTRIBUTES);
833 write_tcr_el1(get_tcr(1));
834 write_ttbr0_el1((uint64_t)ptables->base_xlat_table);
835
836 /* Ensure these changes are seen before MMU is enabled */
837 barrier_isync_fence_full();
838
839 /* Enable the MMU and data cache */
840 val = read_sctlr_el1();
841 write_sctlr_el1(val | SCTLR_M_BIT | SCTLR_C_BIT);
842
843 /* Ensure the MMU enable takes effect immediately */
844 barrier_isync_fence_full();
845
846 MMU_DEBUG("MMU enabled with dcache\n");
847 }
848
849 /* ARM MMU Driver Initial Setup */
850
851 static struct arm_mmu_ptables kernel_ptables;
852 #ifdef CONFIG_USERSPACE
853 static sys_slist_t domain_list;
854 #endif
855
856 /*
857 * @brief MMU default configuration
858 *
859 * This function provides the default configuration mechanism for the Memory
860 * Management Unit (MMU).
861 */
z_arm64_mm_init(bool is_primary_core)862 void z_arm64_mm_init(bool is_primary_core)
863 {
864 unsigned int flags = 0U;
865
866 __ASSERT(CONFIG_MMU_PAGE_SIZE == KB(4),
867 "Only 4K page size is supported\n");
868
869 __ASSERT(GET_EL(read_currentel()) == MODE_EL1,
870 "Exception level not EL1, MMU not enabled!\n");
871
872 /* Ensure that MMU is already not enabled */
873 __ASSERT((read_sctlr_el1() & SCTLR_M_BIT) == 0, "MMU is already enabled\n");
874
875 /*
876 * Only booting core setup up the page tables.
877 */
878 if (is_primary_core) {
879 kernel_ptables.base_xlat_table = new_table();
880 setup_page_tables(&kernel_ptables);
881 }
882
883 /* currently only EL1 is supported */
884 enable_mmu_el1(&kernel_ptables, flags);
885 }
886
sync_domains(uintptr_t virt,size_t size)887 static void sync_domains(uintptr_t virt, size_t size)
888 {
889 #ifdef CONFIG_USERSPACE
890 sys_snode_t *node;
891 struct arch_mem_domain *domain;
892 struct arm_mmu_ptables *domain_ptables;
893 k_spinlock_key_t key;
894 int ret;
895
896 key = k_spin_lock(&z_mem_domain_lock);
897 SYS_SLIST_FOR_EACH_NODE(&domain_list, node) {
898 domain = CONTAINER_OF(node, struct arch_mem_domain, node);
899 domain_ptables = &domain->ptables;
900 ret = globalize_page_range(domain_ptables, &kernel_ptables,
901 virt, size, "generic");
902 if (ret) {
903 LOG_ERR("globalize_page_range() returned %d", ret);
904 }
905 }
906 k_spin_unlock(&z_mem_domain_lock, key);
907 #endif
908 }
909
__arch_mem_map(void * virt,uintptr_t phys,size_t size,uint32_t flags)910 static int __arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
911 {
912 struct arm_mmu_ptables *ptables;
913 uint32_t entry_flags = MT_DEFAULT_SECURE_STATE | MT_P_RX_U_NA | MT_NO_OVERWRITE;
914
915 /* Always map in the kernel page tables */
916 ptables = &kernel_ptables;
917
918 /* Translate flags argument into HW-recognized entry flags. */
919 switch (flags & K_MEM_CACHE_MASK) {
920 /*
921 * K_MEM_CACHE_NONE, K_MEM_ARM_DEVICE_nGnRnE => MT_DEVICE_nGnRnE
922 * (Device memory nGnRnE)
923 * K_MEM_ARM_DEVICE_nGnRE => MT_DEVICE_nGnRE
924 * (Device memory nGnRE)
925 * K_MEM_ARM_DEVICE_GRE => MT_DEVICE_GRE
926 * (Device memory GRE)
927 * K_MEM_ARM_NORMAL_NC => MT_NORMAL_NC
928 * (Normal memory Non-cacheable)
929 * K_MEM_CACHE_WB => MT_NORMAL
930 * (Normal memory Outer WB + Inner WB)
931 * K_MEM_CACHE_WT => MT_NORMAL_WT
932 * (Normal memory Outer WT + Inner WT)
933 */
934 case K_MEM_CACHE_NONE:
935 /* K_MEM_CACHE_NONE equal to K_MEM_ARM_DEVICE_nGnRnE */
936 /* case K_MEM_ARM_DEVICE_nGnRnE: */
937 entry_flags |= MT_DEVICE_nGnRnE;
938 break;
939 case K_MEM_ARM_DEVICE_nGnRE:
940 entry_flags |= MT_DEVICE_nGnRE;
941 break;
942 case K_MEM_ARM_DEVICE_GRE:
943 entry_flags |= MT_DEVICE_GRE;
944 break;
945 case K_MEM_ARM_NORMAL_NC:
946 entry_flags |= MT_NORMAL_NC;
947 break;
948 case K_MEM_CACHE_WT:
949 entry_flags |= MT_NORMAL_WT;
950 break;
951 case K_MEM_CACHE_WB:
952 entry_flags |= MT_NORMAL;
953 break;
954 default:
955 return -ENOTSUP;
956 }
957
958 if ((flags & K_MEM_PERM_RW) != 0U) {
959 entry_flags |= MT_RW;
960 }
961
962 if ((flags & K_MEM_PERM_EXEC) == 0U) {
963 entry_flags |= MT_P_EXECUTE_NEVER;
964 }
965
966 if ((flags & K_MEM_PERM_USER) != 0U) {
967 entry_flags |= MT_RW_AP_ELx;
968 }
969
970 return add_map(ptables, "generic", phys, (uintptr_t)virt, size, entry_flags);
971 }
972
arch_mem_map(void * virt,uintptr_t phys,size_t size,uint32_t flags)973 void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
974 {
975 int ret = __arch_mem_map(virt, phys, size, flags);
976
977 if (ret) {
978 LOG_ERR("__arch_mem_map() returned %d", ret);
979 k_panic();
980 } else {
981 uint32_t mem_flags = flags & K_MEM_CACHE_MASK;
982
983 sync_domains((uintptr_t)virt, size);
984 invalidate_tlb_all();
985
986 switch (mem_flags) {
987 case K_MEM_CACHE_WB:
988 case K_MEM_CACHE_WT:
989 mem_flags = (mem_flags == K_MEM_CACHE_WB) ? MT_NORMAL : MT_NORMAL_WT;
990 inv_dcache_after_map_helper(virt, size, mem_flags);
991 default:
992 break;
993 }
994 }
995 }
996
arch_mem_unmap(void * addr,size_t size)997 void arch_mem_unmap(void *addr, size_t size)
998 {
999 int ret = remove_map(&kernel_ptables, "generic", (uintptr_t)addr, size);
1000
1001 if (ret) {
1002 LOG_ERR("remove_map() returned %d", ret);
1003 } else {
1004 sync_domains((uintptr_t)addr, size);
1005 invalidate_tlb_all();
1006 }
1007 }
1008
arch_page_phys_get(void * virt,uintptr_t * phys)1009 int arch_page_phys_get(void *virt, uintptr_t *phys)
1010 {
1011 uint64_t par;
1012 int key;
1013
1014 key = arch_irq_lock();
1015 __asm__ volatile ("at S1E1R, %0" : : "r" (virt));
1016 barrier_isync_fence_full();
1017 par = read_par_el1();
1018 arch_irq_unlock(key);
1019
1020 if (par & BIT(0)) {
1021 return -EFAULT;
1022 }
1023
1024 if (phys) {
1025 *phys = par & GENMASK(47, 12);
1026 }
1027 return 0;
1028 }
1029
arch_virt_region_align(uintptr_t phys,size_t size)1030 size_t arch_virt_region_align(uintptr_t phys, size_t size)
1031 {
1032 size_t alignment = CONFIG_MMU_PAGE_SIZE;
1033 size_t level_size;
1034 int level;
1035
1036 for (level = XLAT_LAST_LEVEL; level >= BASE_XLAT_LEVEL; level--) {
1037 level_size = 1 << LEVEL_TO_VA_SIZE_SHIFT(level);
1038
1039 if (size < level_size) {
1040 break;
1041 }
1042
1043 if ((phys & (level_size - 1))) {
1044 break;
1045 }
1046
1047 alignment = level_size;
1048 }
1049
1050 return alignment;
1051 }
1052
1053 #ifdef CONFIG_USERSPACE
1054
1055 static uint16_t next_asid = 1;
1056
get_asid(uint64_t ttbr0)1057 static uint16_t get_asid(uint64_t ttbr0)
1058 {
1059 return ttbr0 >> TTBR_ASID_SHIFT;
1060 }
1061
1062 static void z_arm64_swap_ptables(struct k_thread *incoming);
1063
arch_mem_domain_max_partitions_get(void)1064 int arch_mem_domain_max_partitions_get(void)
1065 {
1066 return CONFIG_MAX_DOMAIN_PARTITIONS;
1067 }
1068
arch_mem_domain_init(struct k_mem_domain * domain)1069 int arch_mem_domain_init(struct k_mem_domain *domain)
1070 {
1071 struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables;
1072 k_spinlock_key_t key;
1073 uint16_t asid;
1074
1075 MMU_DEBUG("%s\n", __func__);
1076
1077 key = k_spin_lock(&xlat_lock);
1078
1079 /*
1080 * Pick a new ASID. We use round-robin
1081 * Note: `next_asid` is an uint16_t and `VM_ASID_BITS` could
1082 * be up to 16, hence `next_asid` might overflow to 0 below.
1083 */
1084 asid = next_asid++;
1085 if ((next_asid >= (1UL << VM_ASID_BITS)) || (next_asid == 0)) {
1086 next_asid = 1;
1087 }
1088
1089 domain_ptables->base_xlat_table =
1090 dup_table(kernel_ptables.base_xlat_table, BASE_XLAT_LEVEL);
1091 k_spin_unlock(&xlat_lock, key);
1092 if (!domain_ptables->base_xlat_table) {
1093 return -ENOMEM;
1094 }
1095
1096 domain_ptables->ttbr0 = (((uint64_t)asid) << TTBR_ASID_SHIFT) |
1097 ((uint64_t)(uintptr_t)domain_ptables->base_xlat_table);
1098
1099 sys_slist_append(&domain_list, &domain->arch.node);
1100 return 0;
1101 }
1102
private_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t phys,uintptr_t virt,size_t size,uint32_t attrs)1103 static int private_map(struct arm_mmu_ptables *ptables, const char *name,
1104 uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs)
1105 {
1106 int ret;
1107
1108 ret = privatize_page_range(ptables, &kernel_ptables, virt, size, name);
1109 __ASSERT(ret == 0, "privatize_page_range() returned %d", ret);
1110 ret = add_map(ptables, name, phys, virt, size, attrs | MT_NG);
1111 __ASSERT(ret == 0, "add_map() returned %d", ret);
1112 invalidate_tlb_all();
1113
1114 inv_dcache_after_map_helper(UINT_TO_POINTER(virt), size, attrs);
1115 return ret;
1116 }
1117
reset_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t addr,size_t size)1118 static int reset_map(struct arm_mmu_ptables *ptables, const char *name,
1119 uintptr_t addr, size_t size)
1120 {
1121 int ret;
1122
1123 ret = globalize_page_range(ptables, &kernel_ptables, addr, size, name);
1124 __ASSERT(ret == 0, "globalize_page_range() returned %d", ret);
1125 invalidate_tlb_all();
1126
1127 return ret;
1128 }
1129
arch_mem_domain_partition_add(struct k_mem_domain * domain,uint32_t partition_id)1130 int arch_mem_domain_partition_add(struct k_mem_domain *domain,
1131 uint32_t partition_id)
1132 {
1133 struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables;
1134 struct k_mem_partition *ptn = &domain->partitions[partition_id];
1135
1136 return private_map(domain_ptables, "partition", ptn->start, ptn->start,
1137 ptn->size, ptn->attr.attrs | MT_NORMAL);
1138 }
1139
arch_mem_domain_partition_remove(struct k_mem_domain * domain,uint32_t partition_id)1140 int arch_mem_domain_partition_remove(struct k_mem_domain *domain,
1141 uint32_t partition_id)
1142 {
1143 struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables;
1144 struct k_mem_partition *ptn = &domain->partitions[partition_id];
1145
1146 return reset_map(domain_ptables, "partition removal",
1147 ptn->start, ptn->size);
1148 }
1149
map_thread_stack(struct k_thread * thread,struct arm_mmu_ptables * ptables)1150 static int map_thread_stack(struct k_thread *thread,
1151 struct arm_mmu_ptables *ptables)
1152 {
1153 return private_map(ptables, "thread_stack", thread->stack_info.start,
1154 thread->stack_info.start, thread->stack_info.size,
1155 MT_P_RW_U_RW | MT_NORMAL);
1156 }
1157
arch_mem_domain_thread_add(struct k_thread * thread)1158 int arch_mem_domain_thread_add(struct k_thread *thread)
1159 {
1160 struct arm_mmu_ptables *old_ptables, *domain_ptables;
1161 struct k_mem_domain *domain;
1162 bool is_user, is_migration;
1163 int ret = 0;
1164
1165 domain = thread->mem_domain_info.mem_domain;
1166 domain_ptables = &domain->arch.ptables;
1167 old_ptables = thread->arch.ptables;
1168
1169 is_user = (thread->base.user_options & K_USER) != 0;
1170 is_migration = (old_ptables != NULL) && is_user;
1171
1172 if (is_migration) {
1173 ret = map_thread_stack(thread, domain_ptables);
1174 }
1175
1176 thread->arch.ptables = domain_ptables;
1177 if (thread == _current) {
1178 z_arm64_swap_ptables(thread);
1179 } else {
1180 #ifdef CONFIG_SMP
1181 /* the thread could be running on another CPU right now */
1182 z_arm64_mem_cfg_ipi();
1183 #endif
1184 }
1185
1186 if (is_migration) {
1187 ret = reset_map(old_ptables, __func__, thread->stack_info.start,
1188 thread->stack_info.size);
1189 }
1190
1191 return ret;
1192 }
1193
arch_mem_domain_thread_remove(struct k_thread * thread)1194 int arch_mem_domain_thread_remove(struct k_thread *thread)
1195 {
1196 struct arm_mmu_ptables *domain_ptables;
1197 struct k_mem_domain *domain;
1198
1199 domain = thread->mem_domain_info.mem_domain;
1200 domain_ptables = &domain->arch.ptables;
1201
1202 if ((thread->base.user_options & K_USER) == 0) {
1203 return 0;
1204 }
1205
1206 if ((thread->base.thread_state & _THREAD_DEAD) == 0) {
1207 return 0;
1208 }
1209
1210 return reset_map(domain_ptables, __func__, thread->stack_info.start,
1211 thread->stack_info.size);
1212 }
1213
z_arm64_swap_ptables(struct k_thread * incoming)1214 static void z_arm64_swap_ptables(struct k_thread *incoming)
1215 {
1216 struct arm_mmu_ptables *ptables = incoming->arch.ptables;
1217 uint64_t curr_ttbr0 = read_ttbr0_el1();
1218 uint64_t new_ttbr0 = ptables->ttbr0;
1219
1220 if (curr_ttbr0 == new_ttbr0) {
1221 return; /* Already the right tables */
1222 }
1223
1224 z_arm64_set_ttbr0(new_ttbr0);
1225
1226 if (get_asid(curr_ttbr0) == get_asid(new_ttbr0)) {
1227 invalidate_tlb_all();
1228 }
1229 }
1230
z_arm64_thread_mem_domains_init(struct k_thread * incoming)1231 void z_arm64_thread_mem_domains_init(struct k_thread *incoming)
1232 {
1233 struct arm_mmu_ptables *ptables;
1234
1235 if ((incoming->base.user_options & K_USER) == 0)
1236 return;
1237
1238 ptables = incoming->arch.ptables;
1239
1240 /* Map the thread stack */
1241 map_thread_stack(incoming, ptables);
1242
1243 z_arm64_swap_ptables(incoming);
1244 }
1245
z_arm64_swap_mem_domains(struct k_thread * incoming)1246 void z_arm64_swap_mem_domains(struct k_thread *incoming)
1247 {
1248 z_arm64_swap_ptables(incoming);
1249 }
1250
1251 #endif /* CONFIG_USERSPACE */
1252