1 /*
2 * Copyright 2019 Broadcom
3 * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
4 *
5 * Copyright (c) 2021 BayLibre, SAS
6 *
7 * SPDX-License-Identifier: Apache-2.0
8 */
9
10 #include <zephyr/cache.h>
11 #include <zephyr/device.h>
12 #include <zephyr/init.h>
13 #include <zephyr/kernel.h>
14 #include <zephyr/kernel/mm/demand_paging.h>
15 #include <kernel_arch_func.h>
16 #include <kernel_arch_interface.h>
17 #include <kernel_internal.h>
18 #include <zephyr/logging/log.h>
19 #include <zephyr/arch/arm64/cpu.h>
20 #include <zephyr/arch/arm64/lib_helpers.h>
21 #include <zephyr/arch/arm64/mm.h>
22 #include <zephyr/linker/linker-defs.h>
23 #include <zephyr/spinlock.h>
24 #include <zephyr/sys/util.h>
25 #include <mmu.h>
26
27 #include "mmu.h"
28 #include "paging.h"
29
30 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
31
32 static uint64_t xlat_tables[CONFIG_MAX_XLAT_TABLES * Ln_XLAT_NUM_ENTRIES]
33 __aligned(Ln_XLAT_NUM_ENTRIES * sizeof(uint64_t));
34 static int xlat_use_count[CONFIG_MAX_XLAT_TABLES];
35 static struct k_spinlock xlat_lock;
36
37 /* Usage count value range */
38 #define XLAT_PTE_COUNT_MASK GENMASK(15, 0)
39 #define XLAT_REF_COUNT_UNIT BIT(16)
40
41 /* Returns a reference to a free table */
new_table(void)42 static uint64_t *new_table(void)
43 {
44 uint64_t *table;
45 unsigned int i;
46
47 /* Look for a free table. */
48 for (i = 0U; i < CONFIG_MAX_XLAT_TABLES; i++) {
49 if (xlat_use_count[i] == 0) {
50 table = &xlat_tables[i * Ln_XLAT_NUM_ENTRIES];
51 xlat_use_count[i] = XLAT_REF_COUNT_UNIT;
52 MMU_DEBUG("allocating table [%d]%p\n", i, table);
53 return table;
54 }
55 }
56
57 LOG_ERR("CONFIG_MAX_XLAT_TABLES, too small");
58 return NULL;
59 }
60
table_index(uint64_t * pte)61 static inline unsigned int table_index(uint64_t *pte)
62 {
63 unsigned int i = (pte - xlat_tables) / Ln_XLAT_NUM_ENTRIES;
64
65 __ASSERT(i < CONFIG_MAX_XLAT_TABLES, "table %p out of range", pte);
66 return i;
67 }
68
69 /* Adjusts usage count and returns current count. */
table_usage(uint64_t * table,int adjustment)70 static int table_usage(uint64_t *table, int adjustment)
71 {
72 unsigned int i = table_index(table);
73 int prev_count = xlat_use_count[i];
74 int new_count = prev_count + adjustment;
75
76 /* be reasonable not to always create a debug flood */
77 if ((IS_ENABLED(DUMP_PTE) && adjustment != 0) || new_count == 0) {
78 MMU_DEBUG("table [%d]%p: usage %#x -> %#x\n", i, table, prev_count, new_count);
79 }
80
81 __ASSERT(new_count >= 0,
82 "table use count underflow");
83 __ASSERT(new_count == 0 || new_count >= XLAT_REF_COUNT_UNIT,
84 "table in use with no reference to it");
85 __ASSERT((new_count & XLAT_PTE_COUNT_MASK) <= Ln_XLAT_NUM_ENTRIES,
86 "table PTE count overflow");
87
88 xlat_use_count[i] = new_count;
89 return new_count;
90 }
91
inc_table_ref(uint64_t * table)92 static inline void inc_table_ref(uint64_t *table)
93 {
94 table_usage(table, XLAT_REF_COUNT_UNIT);
95 }
96
dec_table_ref(uint64_t * table)97 static inline void dec_table_ref(uint64_t *table)
98 {
99 int ref_unit = XLAT_REF_COUNT_UNIT;
100
101 table_usage(table, -ref_unit);
102 }
103
is_table_unused(uint64_t * table)104 static inline bool is_table_unused(uint64_t *table)
105 {
106 return (table_usage(table, 0) & XLAT_PTE_COUNT_MASK) == 0;
107 }
108
is_table_single_referenced(uint64_t * table)109 static inline bool is_table_single_referenced(uint64_t *table)
110 {
111 return table_usage(table, 0) < (2 * XLAT_REF_COUNT_UNIT);
112 }
113
114 #ifdef CONFIG_TEST
115 /* Hooks to let test code peek at table states */
116
arm64_mmu_nb_free_tables(void)117 int arm64_mmu_nb_free_tables(void)
118 {
119 int count = 0;
120
121 for (int i = 0; i < CONFIG_MAX_XLAT_TABLES; i++) {
122 if (xlat_use_count[i] == 0) {
123 count++;
124 }
125 }
126
127 return count;
128 }
129
arm64_mmu_tables_total_usage(void)130 int arm64_mmu_tables_total_usage(void)
131 {
132 int count = 0;
133
134 for (int i = 0; i < CONFIG_MAX_XLAT_TABLES; i++) {
135 count += xlat_use_count[i];
136 }
137
138 return count;
139 }
140
141 #endif /* CONFIG_TEST */
142
is_free_desc(uint64_t desc)143 static inline bool is_free_desc(uint64_t desc)
144 {
145 return desc == 0;
146 }
147
is_inval_desc(uint64_t desc)148 static inline bool is_inval_desc(uint64_t desc)
149 {
150 /* invalid descriptors aren't necessarily free */
151 return (desc & PTE_DESC_TYPE_MASK) == PTE_INVALID_DESC;
152 }
153
is_table_desc(uint64_t desc,unsigned int level)154 static inline bool is_table_desc(uint64_t desc, unsigned int level)
155 {
156 return level != XLAT_LAST_LEVEL &&
157 (desc & PTE_DESC_TYPE_MASK) == PTE_TABLE_DESC;
158 }
159
is_block_desc(uint64_t desc)160 static inline bool is_block_desc(uint64_t desc)
161 {
162 return (desc & PTE_DESC_TYPE_MASK) == PTE_BLOCK_DESC;
163 }
164
pte_desc_table(uint64_t desc)165 static inline uint64_t *pte_desc_table(uint64_t desc)
166 {
167 uint64_t address = desc & PTE_PHYSADDR_MASK;
168
169 /* tables use a 1:1 physical:virtual mapping */
170 return (uint64_t *)address;
171 }
172
is_desc_block_aligned(uint64_t desc,unsigned int level_size)173 static inline bool is_desc_block_aligned(uint64_t desc, unsigned int level_size)
174 {
175 bool aligned = (desc & PTE_PHYSADDR_MASK & (level_size - 1)) == 0;
176
177 if (!aligned) {
178 MMU_DEBUG("misaligned desc 0x%016llx for block size 0x%x\n",
179 desc, level_size);
180 }
181
182 return aligned;
183 }
184
is_desc_superset(uint64_t desc1,uint64_t desc2,unsigned int level)185 static inline bool is_desc_superset(uint64_t desc1, uint64_t desc2,
186 unsigned int level)
187 {
188 uint64_t mask = DESC_ATTRS_MASK | GENMASK64(47, LEVEL_TO_VA_SIZE_SHIFT(level));
189
190 return (desc1 & mask) == (desc2 & mask);
191 }
192
193 #if DUMP_PTE
debug_show_pte(uint64_t * pte,unsigned int level)194 static void debug_show_pte(uint64_t *pte, unsigned int level)
195 {
196 MMU_DEBUG("%.*s", level * 2U, ". . . ");
197 MMU_DEBUG("[%d]%p: ", table_index(pte), pte);
198
199 if (is_free_desc(*pte)) {
200 MMU_DEBUG("---\n");
201 return;
202 }
203
204 MMU_DEBUG("0x%016llx ", *pte);
205
206 if (is_table_desc(*pte, level)) {
207 uint64_t *table = pte_desc_table(*pte);
208
209 MMU_DEBUG("[Table] [%d]%p\n", table_index(table), table);
210 return;
211 }
212
213 if (is_block_desc(*pte)) {
214 MMU_DEBUG("[Block] ");
215 } else if (!is_inval_desc(*pte)) {
216 MMU_DEBUG("[Page] ");
217 } else {
218 MMU_DEBUG("[paged-out] ");
219 }
220
221 uint8_t mem_type = (*pte >> 2) & MT_TYPE_MASK;
222
223 MMU_DEBUG((mem_type == MT_NORMAL) ? "MEM" :
224 ((mem_type == MT_NORMAL_NC) ? "NC" : "DEV"));
225 MMU_DEBUG((*pte & PTE_BLOCK_DESC_AP_RO) ? "-RO" : "-RW");
226 MMU_DEBUG((*pte & PTE_BLOCK_DESC_NS) ? "-NS" : "-S");
227 MMU_DEBUG((*pte & PTE_BLOCK_DESC_AP_ELx) ? "-ELx" : "-ELh");
228 MMU_DEBUG((*pte & PTE_BLOCK_DESC_PXN) ? "-PXN" : "-PX");
229 MMU_DEBUG((*pte & PTE_BLOCK_DESC_UXN) ? "-UXN" : "-UX");
230 MMU_DEBUG((*pte & PTE_SW_WRITABLE) ? "-WRITABLE" : "");
231 MMU_DEBUG("\n");
232 }
233 #else
debug_show_pte(uint64_t * pte,unsigned int level)234 static inline void debug_show_pte(uint64_t *pte, unsigned int level) { }
235 #endif
236
set_pte_table_desc(uint64_t * pte,uint64_t * table,unsigned int level)237 static void set_pte_table_desc(uint64_t *pte, uint64_t *table, unsigned int level)
238 {
239 /* Point pte to new table */
240 *pte = PTE_TABLE_DESC | (uint64_t)table;
241 debug_show_pte(pte, level);
242 }
243
set_pte_block_desc(uint64_t * pte,uint64_t desc,unsigned int level)244 static void set_pte_block_desc(uint64_t *pte, uint64_t desc, unsigned int level)
245 {
246 if (level != XLAT_LAST_LEVEL) {
247 desc |= PTE_BLOCK_DESC;
248 } else if (!IS_ENABLED(CONFIG_DEMAND_PAGING) || (desc & PTE_BLOCK_DESC_AF) != 0) {
249 desc |= PTE_PAGE_DESC;
250 } else {
251 /*
252 * Demand paging configured and AF unset: leave the descriptor
253 * type to "invalid" as in arch_mem_page_out().
254 */
255 }
256 *pte = desc;
257 debug_show_pte(pte, level);
258 }
259
expand_to_table(uint64_t * pte,unsigned int level)260 static uint64_t *expand_to_table(uint64_t *pte, unsigned int level)
261 {
262 uint64_t *table;
263
264 __ASSERT(level < XLAT_LAST_LEVEL, "can't expand last level");
265
266 table = new_table();
267 if (!table) {
268 return NULL;
269 }
270
271 if (!is_free_desc(*pte)) {
272 /*
273 * If entry at current level was already populated
274 * then we need to reflect that in the new table.
275 */
276 uint64_t desc = *pte;
277 unsigned int i, stride_shift;
278
279 MMU_DEBUG("expanding PTE 0x%016llx into table [%d]%p\n",
280 desc, table_index(table), table);
281 __ASSERT(is_block_desc(desc), "");
282
283 if (level + 1 == XLAT_LAST_LEVEL) {
284 desc |= PTE_PAGE_DESC;
285 }
286
287 stride_shift = LEVEL_TO_VA_SIZE_SHIFT(level + 1);
288 for (i = 0U; i < Ln_XLAT_NUM_ENTRIES; i++) {
289 table[i] = desc | (i << stride_shift);
290 }
291 table_usage(table, Ln_XLAT_NUM_ENTRIES);
292 } else {
293 /*
294 * Adjust usage count for parent table's entry
295 * that will no longer be free.
296 */
297 table_usage(pte, 1);
298 }
299
300 /* Link the new table in place of the pte it replaces */
301 set_pte_table_desc(pte, table, level);
302
303 return table;
304 }
305
set_mapping(uint64_t * top_table,uintptr_t virt,size_t size,uint64_t desc,bool may_overwrite)306 static int set_mapping(uint64_t *top_table, uintptr_t virt, size_t size,
307 uint64_t desc, bool may_overwrite)
308 {
309 uint64_t *table = top_table;
310 uint64_t *pte;
311 uint64_t level_size;
312 unsigned int level = BASE_XLAT_LEVEL;
313
314 while (size) {
315 __ASSERT(level <= XLAT_LAST_LEVEL,
316 "max translation table level exceeded\n");
317
318 /* Locate PTE for given virtual address and page table level */
319 pte = &table[XLAT_TABLE_VA_IDX(virt, level)];
320
321 if (is_table_desc(*pte, level)) {
322 /* Move to the next translation table level */
323 level++;
324 table = pte_desc_table(*pte);
325 continue;
326 }
327
328 if (!may_overwrite && !is_free_desc(*pte)) {
329 /* the entry is already allocated */
330 LOG_ERR("entry already in use: "
331 "level %d pte %p *pte 0x%016llx",
332 level, pte, *pte);
333 return -EBUSY;
334 }
335
336 level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
337
338 if (is_desc_superset(*pte, desc, level)) {
339 /* This block already covers our range */
340 level_size -= (virt & (level_size - 1));
341 if (level_size > size) {
342 level_size = size;
343 }
344 goto move_on;
345 }
346
347 if ((size < level_size) || (virt & (level_size - 1)) ||
348 !is_desc_block_aligned(desc, level_size)) {
349 /* Range doesn't fit, create subtable */
350 table = expand_to_table(pte, level);
351 if (!table) {
352 return -ENOMEM;
353 }
354 level++;
355 continue;
356 }
357
358 /* Adjust usage count for corresponding table */
359 if (is_free_desc(*pte)) {
360 table_usage(pte, 1);
361 }
362 /* Create block/page descriptor */
363 set_pte_block_desc(pte, desc, level);
364
365 move_on:
366 virt += level_size;
367 desc += level_size;
368 size -= level_size;
369
370 /* Range is mapped, start again for next range */
371 table = top_table;
372 level = BASE_XLAT_LEVEL;
373 }
374
375 return 0;
376 }
377
del_mapping(uint64_t * table,uintptr_t virt,size_t size,unsigned int level)378 static void del_mapping(uint64_t *table, uintptr_t virt, size_t size,
379 unsigned int level)
380 {
381 size_t step, level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
382 uint64_t *pte, *subtable;
383
384 for ( ; size; virt += step, size -= step) {
385 step = level_size - (virt & (level_size - 1));
386 if (step > size) {
387 step = size;
388 }
389 pte = &table[XLAT_TABLE_VA_IDX(virt, level)];
390
391 if (is_free_desc(*pte)) {
392 continue;
393 }
394
395 if (step != level_size && is_block_desc(*pte)) {
396 /* need to split this block mapping */
397 expand_to_table(pte, level);
398 }
399
400 if (is_table_desc(*pte, level)) {
401 subtable = pte_desc_table(*pte);
402 del_mapping(subtable, virt, step, level + 1);
403 if (!is_table_unused(subtable)) {
404 continue;
405 }
406 dec_table_ref(subtable);
407 }
408
409 /* free this entry */
410 *pte = 0;
411 table_usage(pte, -1);
412 }
413 }
414
415 #ifdef CONFIG_USERSPACE
416
dup_table(uint64_t * src_table,unsigned int level)417 static uint64_t *dup_table(uint64_t *src_table, unsigned int level)
418 {
419 uint64_t *dst_table = new_table();
420 int i, usage_count = 0;
421
422 if (!dst_table) {
423 return NULL;
424 }
425
426 MMU_DEBUG("dup (level %d) [%d]%p to [%d]%p\n", level,
427 table_index(src_table), src_table,
428 table_index(dst_table), dst_table);
429
430 for (i = 0; i < Ln_XLAT_NUM_ENTRIES; i++) {
431 /*
432 * After the table duplication, each table can be independently
433 * updated. Thus, entries may become non-global.
434 * To keep the invariants very simple, we thus force the non-global
435 * bit on duplication. Moreover, there is no process to revert this
436 * (e.g. in `globalize_table`). Could be improved in future work.
437 */
438 if (!is_free_desc(src_table[i]) && !is_table_desc(src_table[i], level)) {
439 src_table[i] |= PTE_BLOCK_DESC_NG;
440 }
441
442 dst_table[i] = src_table[i];
443 if (is_table_desc(dst_table[i], level)) {
444 inc_table_ref(pte_desc_table(dst_table[i]));
445 }
446 if (!is_free_desc(dst_table[i])) {
447 usage_count++;
448 }
449 }
450 table_usage(dst_table, usage_count);
451
452 return dst_table;
453 }
454
privatize_table(uint64_t * dst_table,uint64_t * src_table,uintptr_t virt,size_t size,unsigned int level)455 static int privatize_table(uint64_t *dst_table, uint64_t *src_table,
456 uintptr_t virt, size_t size, unsigned int level)
457 {
458 size_t step, level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
459 unsigned int i;
460 int ret;
461
462 for ( ; size; virt += step, size -= step) {
463 step = level_size - (virt & (level_size - 1));
464 if (step > size) {
465 step = size;
466 }
467 i = XLAT_TABLE_VA_IDX(virt, level);
468
469 if (!is_table_desc(dst_table[i], level) ||
470 !is_table_desc(src_table[i], level)) {
471 /* this entry is already private */
472 continue;
473 }
474
475 uint64_t *dst_subtable = pte_desc_table(dst_table[i]);
476 uint64_t *src_subtable = pte_desc_table(src_table[i]);
477
478 if (dst_subtable == src_subtable) {
479 /* need to make a private copy of this table */
480 dst_subtable = dup_table(src_subtable, level + 1);
481 if (!dst_subtable) {
482 return -ENOMEM;
483 }
484 set_pte_table_desc(&dst_table[i], dst_subtable, level);
485 dec_table_ref(src_subtable);
486 }
487
488 ret = privatize_table(dst_subtable, src_subtable,
489 virt, step, level + 1);
490 if (ret) {
491 return ret;
492 }
493 }
494
495 return 0;
496 }
497
498 /*
499 * Make the given virtual address range private in dst_pt with regards to
500 * src_pt. By "private" this means that corresponding page tables in dst_pt
501 * will be duplicated so not to share the same table(s) with src_pt.
502 * If corresponding page tables in dst_pt are already distinct from src_pt
503 * then nothing is done. This allows for subsequent mapping changes in that
504 * range to affect only dst_pt.
505 */
privatize_page_range(struct arm_mmu_ptables * dst_pt,struct arm_mmu_ptables * src_pt,uintptr_t virt_start,size_t size,const char * name)506 static int privatize_page_range(struct arm_mmu_ptables *dst_pt,
507 struct arm_mmu_ptables *src_pt,
508 uintptr_t virt_start, size_t size,
509 const char *name)
510 {
511 k_spinlock_key_t key;
512 int ret;
513
514 MMU_DEBUG("privatize [%s]: virt %lx size %lx\n",
515 name, virt_start, size);
516
517 key = k_spin_lock(&xlat_lock);
518
519 ret = privatize_table(dst_pt->base_xlat_table, src_pt->base_xlat_table,
520 virt_start, size, BASE_XLAT_LEVEL);
521
522 k_spin_unlock(&xlat_lock, key);
523 return ret;
524 }
525
discard_table(uint64_t * table,unsigned int level)526 static void discard_table(uint64_t *table, unsigned int level)
527 {
528 unsigned int i;
529 int free_count = 0;
530
531 for (i = 0U; i < Ln_XLAT_NUM_ENTRIES; i++) {
532 if (is_table_desc(table[i], level)) {
533 uint64_t *subtable = pte_desc_table(table[i]);
534
535 if (is_table_single_referenced(subtable)) {
536 discard_table(subtable, level + 1);
537 }
538 dec_table_ref(subtable);
539 }
540 if (!is_free_desc(table[i])) {
541 table[i] = 0U;
542 free_count++;
543 }
544 }
545 table_usage(table, -free_count);
546 }
547
globalize_table(uint64_t * dst_table,uint64_t * src_table,uintptr_t virt,size_t size,unsigned int level)548 static int globalize_table(uint64_t *dst_table, uint64_t *src_table,
549 uintptr_t virt, size_t size, unsigned int level)
550 {
551 size_t step, level_size = 1ULL << LEVEL_TO_VA_SIZE_SHIFT(level);
552 unsigned int i;
553 int ret;
554
555 for ( ; size; virt += step, size -= step) {
556 step = level_size - (virt & (level_size - 1));
557 if (step > size) {
558 step = size;
559 }
560 i = XLAT_TABLE_VA_IDX(virt, level);
561
562 if (dst_table[i] == src_table[i]) {
563 /* already identical to global table */
564 continue;
565 }
566
567 if (is_free_desc(src_table[i]) &&
568 is_table_desc(dst_table[i], level)) {
569 uint64_t *subtable = pte_desc_table(dst_table[i]);
570
571 del_mapping(subtable, virt, step, level + 1);
572 if (is_table_unused(subtable)) {
573 /* unreference the empty table */
574 dst_table[i] = 0;
575 table_usage(dst_table, -1);
576 dec_table_ref(subtable);
577 }
578 continue;
579 }
580
581 if (step != level_size) {
582 /* boundary falls in the middle of this pte */
583 __ASSERT(is_table_desc(src_table[i], level),
584 "can't have partial block pte here");
585 if (!is_table_desc(dst_table[i], level)) {
586 /* we need more fine grained boundaries */
587 if (!expand_to_table(&dst_table[i], level)) {
588 return -ENOMEM;
589 }
590 }
591 ret = globalize_table(pte_desc_table(dst_table[i]),
592 pte_desc_table(src_table[i]),
593 virt, step, level + 1);
594 if (ret) {
595 return ret;
596 }
597 continue;
598 }
599
600 /* we discard current pte and replace with global one */
601
602 uint64_t *old_table = is_table_desc(dst_table[i], level) ?
603 pte_desc_table(dst_table[i]) : NULL;
604
605 if (is_free_desc(dst_table[i])) {
606 table_usage(dst_table, 1);
607 }
608 if (is_free_desc(src_table[i])) {
609 table_usage(dst_table, -1);
610 }
611 if (is_table_desc(src_table[i], level)) {
612 inc_table_ref(pte_desc_table(src_table[i]));
613 }
614 dst_table[i] = src_table[i];
615 debug_show_pte(&dst_table[i], level);
616
617 if (old_table) {
618 /* we can discard the whole branch */
619 discard_table(old_table, level + 1);
620 dec_table_ref(old_table);
621 }
622 }
623
624 return 0;
625 }
626
627 /*
628 * Globalize the given virtual address range in dst_pt from src_pt. We make
629 * it global by sharing as much page table content from src_pt as possible,
630 * including page tables themselves, and corresponding private tables in
631 * dst_pt are then discarded. If page tables in the given range are already
632 * shared then nothing is done. If page table sharing is not possible then
633 * page table entries in dst_pt are synchronized with those from src_pt.
634 */
globalize_page_range(struct arm_mmu_ptables * dst_pt,struct arm_mmu_ptables * src_pt,uintptr_t virt_start,size_t size,const char * name)635 static int globalize_page_range(struct arm_mmu_ptables *dst_pt,
636 struct arm_mmu_ptables *src_pt,
637 uintptr_t virt_start, size_t size,
638 const char *name)
639 {
640 k_spinlock_key_t key;
641 int ret;
642
643 MMU_DEBUG("globalize [%s]: virt %lx size %lx\n",
644 name, virt_start, size);
645
646 key = k_spin_lock(&xlat_lock);
647
648 ret = globalize_table(dst_pt->base_xlat_table, src_pt->base_xlat_table,
649 virt_start, size, BASE_XLAT_LEVEL);
650
651 k_spin_unlock(&xlat_lock, key);
652 return ret;
653 }
654
655 #endif /* CONFIG_USERSPACE */
656
get_region_desc(uint32_t attrs)657 static uint64_t get_region_desc(uint32_t attrs)
658 {
659 unsigned int mem_type;
660 uint64_t desc = 0U;
661
662 /* NS bit for security memory access from secure state */
663 desc |= (attrs & MT_NS) ? PTE_BLOCK_DESC_NS : 0;
664
665 /*
666 * AP bits for EL0 / ELh Data access permission
667 *
668 * AP[2:1] ELh EL0
669 * +--------------------+
670 * 00 RW NA
671 * 01 RW RW
672 * 10 RO NA
673 * 11 RO RO
674 */
675
676 /* AP bits for Data access permission */
677 desc |= (attrs & MT_RW) ? PTE_BLOCK_DESC_AP_RW : PTE_BLOCK_DESC_AP_RO;
678 desc |= (IS_ENABLED(CONFIG_DEMAND_PAGING) && (attrs & MT_RW)) ?
679 PTE_SW_WRITABLE : 0;
680
681 /* Mirror permissions to EL0 */
682 desc |= (attrs & MT_RW_AP_ELx) ?
683 PTE_BLOCK_DESC_AP_ELx : PTE_BLOCK_DESC_AP_EL_HIGHER;
684
685 /* the access flag */
686 desc |= PTE_BLOCK_DESC_AF;
687 if (IS_ENABLED(CONFIG_DEMAND_PAGING) && (attrs & MT_PAGED_OUT) != 0) {
688 /* set it up for demand paging like arch_mem_page_out() */
689 desc &= ~PTE_BLOCK_DESC_AF;
690 desc |= PTE_BLOCK_DESC_AP_RO;
691 }
692
693 /* memory attribute index field */
694 mem_type = MT_TYPE(attrs);
695 desc |= PTE_BLOCK_DESC_MEMTYPE(mem_type);
696
697 switch (mem_type) {
698 case MT_DEVICE_nGnRnE:
699 case MT_DEVICE_nGnRE:
700 case MT_DEVICE_GRE:
701 /* Access to Device memory and non-cacheable memory are coherent
702 * for all observers in the system and are treated as
703 * Outer shareable, so, for these 2 types of memory,
704 * it is not strictly needed to set shareability field
705 */
706 desc |= PTE_BLOCK_DESC_OUTER_SHARE;
707 /* Map device memory as execute-never */
708 desc |= PTE_BLOCK_DESC_PXN;
709 desc |= PTE_BLOCK_DESC_UXN;
710 break;
711 case MT_NORMAL_NC:
712 case MT_NORMAL:
713 /* Make Normal RW memory as execute never */
714 if ((attrs & MT_RW) || (attrs & MT_P_EXECUTE_NEVER)) {
715 desc |= PTE_BLOCK_DESC_PXN;
716 }
717
718 if (((attrs & MT_RW) && (attrs & MT_RW_AP_ELx)) ||
719 (attrs & MT_U_EXECUTE_NEVER)) {
720 desc |= PTE_BLOCK_DESC_UXN;
721 }
722
723 if (mem_type == MT_NORMAL) {
724 desc |= PTE_BLOCK_DESC_INNER_SHARE;
725 } else {
726 desc |= PTE_BLOCK_DESC_OUTER_SHARE;
727 }
728 }
729
730 /* non-Global bit */
731 if (attrs & MT_NG) {
732 desc |= PTE_BLOCK_DESC_NG;
733 }
734
735 return desc;
736 }
737
__add_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t phys,uintptr_t virt,size_t size,uint32_t attrs)738 static int __add_map(struct arm_mmu_ptables *ptables, const char *name,
739 uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs)
740 {
741 uint64_t desc = get_region_desc(attrs);
742 bool may_overwrite = !(attrs & MT_NO_OVERWRITE);
743
744 MMU_DEBUG("mmap [%s]: virt %lx phys %lx size %lx attr %llx %s overwrite\n",
745 name, virt, phys, size, desc,
746 may_overwrite ? "may" : "no");
747 __ASSERT(((virt | phys | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0,
748 "address/size are not page aligned\n");
749 desc |= phys;
750 return set_mapping(ptables->base_xlat_table, virt, size, desc, may_overwrite);
751 }
752
add_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t phys,uintptr_t virt,size_t size,uint32_t attrs)753 static int add_map(struct arm_mmu_ptables *ptables, const char *name,
754 uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs)
755 {
756 k_spinlock_key_t key;
757 int ret;
758
759 key = k_spin_lock(&xlat_lock);
760 ret = __add_map(ptables, name, phys, virt, size, attrs);
761 k_spin_unlock(&xlat_lock, key);
762 return ret;
763 }
764
remove_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t virt,size_t size)765 static void remove_map(struct arm_mmu_ptables *ptables, const char *name,
766 uintptr_t virt, size_t size)
767 {
768 k_spinlock_key_t key;
769
770 MMU_DEBUG("unmmap [%s]: virt %lx size %lx\n", name, virt, size);
771 __ASSERT(((virt | size) & (CONFIG_MMU_PAGE_SIZE - 1)) == 0,
772 "address/size are not page aligned\n");
773
774 key = k_spin_lock(&xlat_lock);
775 del_mapping(ptables->base_xlat_table, virt, size, BASE_XLAT_LEVEL);
776 k_spin_unlock(&xlat_lock, key);
777 }
778
invalidate_tlb_all(void)779 static void invalidate_tlb_all(void)
780 {
781 __asm__ volatile (
782 "dsb ishst; tlbi vmalle1; dsb ish; isb"
783 : : : "memory");
784 }
785
invalidate_tlb_page(uintptr_t virt)786 static inline void invalidate_tlb_page(uintptr_t virt)
787 {
788 /* to be refined */
789 invalidate_tlb_all();
790 }
791
792 /* zephyr execution regions with appropriate attributes */
793
794 struct arm_mmu_flat_range {
795 char *name;
796 void *start;
797 void *end;
798 uint32_t attrs;
799 };
800
801 static const struct arm_mmu_flat_range mmu_zephyr_ranges[] = {
802
803 /* Mark the zephyr execution regions (data, bss, noinit, etc.)
804 * cacheable, read-write
805 * Note: read-write region is marked execute-never internally
806 */
807 { .name = "zephyr_data",
808 .start = _image_ram_start,
809 .end = _image_ram_end,
810 .attrs = MT_NORMAL | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE },
811
812 /* Mark text segment cacheable,read only and executable */
813 { .name = "zephyr_code",
814 .start = __text_region_start,
815 .end = __text_region_end,
816 .attrs = MT_NORMAL | MT_P_RX_U_RX | MT_DEFAULT_SECURE_STATE },
817
818 /* Mark rodata segment cacheable, read only and execute-never */
819 { .name = "zephyr_rodata",
820 .start = __rodata_region_start,
821 .end = __rodata_region_end,
822 .attrs = MT_NORMAL | MT_P_RO_U_RO | MT_DEFAULT_SECURE_STATE },
823
824 #ifdef CONFIG_NOCACHE_MEMORY
825 /* Mark nocache segment noncachable, read-write and execute-never */
826 { .name = "nocache_data",
827 .start = _nocache_ram_start,
828 .end = _nocache_ram_end,
829 .attrs = MT_NORMAL_NC | MT_P_RW_U_RW | MT_DEFAULT_SECURE_STATE },
830 #endif
831 };
832
add_arm_mmu_flat_range(struct arm_mmu_ptables * ptables,const struct arm_mmu_flat_range * range,uint32_t extra_flags)833 static inline void add_arm_mmu_flat_range(struct arm_mmu_ptables *ptables,
834 const struct arm_mmu_flat_range *range,
835 uint32_t extra_flags)
836 {
837 uintptr_t address = (uintptr_t)range->start;
838 size_t size = (uintptr_t)range->end - address;
839
840 if (size) {
841 /* MMU not yet active: must use unlocked version */
842 __add_map(ptables, range->name, address, address,
843 size, range->attrs | extra_flags);
844 }
845 }
846
add_arm_mmu_region(struct arm_mmu_ptables * ptables,const struct arm_mmu_region * region,uint32_t extra_flags)847 static inline void add_arm_mmu_region(struct arm_mmu_ptables *ptables,
848 const struct arm_mmu_region *region,
849 uint32_t extra_flags)
850 {
851 if (region->size || region->attrs) {
852 /* MMU not yet active: must use unlocked version */
853 __add_map(ptables, region->name, region->base_pa, region->base_va,
854 region->size, region->attrs | extra_flags);
855 }
856 }
857
inv_dcache_after_map_helper(void * virt,size_t size,uint32_t attrs)858 static inline void inv_dcache_after_map_helper(void *virt, size_t size, uint32_t attrs)
859 {
860 /*
861 * DC IVAC instruction requires write access permission to the VA,
862 * otherwise it can generate a permission fault
863 */
864 if ((attrs & MT_RW) != MT_RW) {
865 return;
866 }
867
868 if (MT_TYPE(attrs) == MT_NORMAL || MT_TYPE(attrs) == MT_NORMAL_WT) {
869 sys_cache_data_invd_range(virt, size);
870 }
871 }
872
setup_page_tables(struct arm_mmu_ptables * ptables)873 static void setup_page_tables(struct arm_mmu_ptables *ptables)
874 {
875 unsigned int index;
876 const struct arm_mmu_flat_range *range;
877 const struct arm_mmu_region *region;
878 uintptr_t max_va = 0, max_pa = 0;
879
880 MMU_DEBUG("xlat tables:\n");
881 for (index = 0U; index < CONFIG_MAX_XLAT_TABLES; index++) {
882 MMU_DEBUG("%d: %p\n", index, xlat_tables + index * Ln_XLAT_NUM_ENTRIES);
883 }
884
885 for (index = 0U; index < mmu_config.num_regions; index++) {
886 region = &mmu_config.mmu_regions[index];
887 max_va = MAX(max_va, region->base_va + region->size);
888 max_pa = MAX(max_pa, region->base_pa + region->size);
889 }
890
891 __ASSERT(max_va <= (1ULL << CONFIG_ARM64_VA_BITS),
892 "Maximum VA not supported\n");
893 __ASSERT(max_pa <= (1ULL << CONFIG_ARM64_PA_BITS),
894 "Maximum PA not supported\n");
895
896 /* setup translation table for zephyr execution regions */
897 for (index = 0U; index < ARRAY_SIZE(mmu_zephyr_ranges); index++) {
898 range = &mmu_zephyr_ranges[index];
899 add_arm_mmu_flat_range(ptables, range, 0);
900 }
901
902 /*
903 * Create translation tables for user provided platform regions.
904 * Those must not conflict with our default mapping.
905 */
906 for (index = 0U; index < mmu_config.num_regions; index++) {
907 region = &mmu_config.mmu_regions[index];
908 add_arm_mmu_region(ptables, region, MT_NO_OVERWRITE);
909 }
910
911 invalidate_tlb_all();
912
913 for (index = 0U; index < ARRAY_SIZE(mmu_zephyr_ranges); index++) {
914 size_t size;
915
916 range = &mmu_zephyr_ranges[index];
917 size = POINTER_TO_UINT(range->end) - POINTER_TO_UINT(range->start);
918 inv_dcache_after_map_helper(range->start, size, range->attrs);
919 }
920
921 for (index = 0U; index < mmu_config.num_regions; index++) {
922 region = &mmu_config.mmu_regions[index];
923 inv_dcache_after_map_helper(UINT_TO_POINTER(region->base_va), region->size,
924 region->attrs);
925 }
926 }
927
928 /* Translation table control register settings */
get_tcr(int el)929 static uint64_t get_tcr(int el)
930 {
931 uint64_t tcr;
932 uint64_t va_bits = CONFIG_ARM64_VA_BITS;
933 uint64_t tcr_ps_bits;
934
935 tcr_ps_bits = TCR_PS_BITS;
936
937 if (el == 1) {
938 tcr = (tcr_ps_bits << TCR_EL1_IPS_SHIFT);
939 /*
940 * TCR_EL1.EPD1: Disable translation table walk for addresses
941 * that are translated using TTBR1_EL1.
942 */
943 tcr |= TCR_EPD1_DISABLE;
944 } else {
945 tcr = (tcr_ps_bits << TCR_EL3_PS_SHIFT);
946 }
947
948 tcr |= TCR_T0SZ(va_bits);
949
950 /*
951 * Translation table walk is cacheable, inner/outer WBWA and
952 * inner shareable. Due to Cortex-A57 erratum #822227 we must
953 * set TG1[1] = 4KB.
954 */
955 tcr |= TCR_TG1_4K | TCR_TG0_4K | TCR_SHARED_INNER |
956 TCR_ORGN_WBWA | TCR_IRGN_WBWA;
957
958 return tcr;
959 }
960
enable_mmu_el1(struct arm_mmu_ptables * ptables,unsigned int flags)961 static void enable_mmu_el1(struct arm_mmu_ptables *ptables, unsigned int flags)
962 {
963 ARG_UNUSED(flags);
964 uint64_t val;
965
966 /* Set MAIR, TCR and TBBR registers */
967 write_mair_el1(MEMORY_ATTRIBUTES);
968 write_tcr_el1(get_tcr(1));
969 write_ttbr0_el1((uint64_t)ptables->base_xlat_table);
970
971 /* Ensure these changes are seen before MMU is enabled */
972 barrier_isync_fence_full();
973
974 /* Enable the MMU and data cache */
975 val = read_sctlr_el1();
976 write_sctlr_el1(val | SCTLR_M_BIT | SCTLR_C_BIT);
977
978 /* Ensure the MMU enable takes effect immediately */
979 barrier_isync_fence_full();
980
981 MMU_DEBUG("MMU enabled with dcache\n");
982 }
983
984 /* ARM MMU Driver Initial Setup */
985
986 static struct arm_mmu_ptables kernel_ptables;
987 #ifdef CONFIG_USERSPACE
988 static sys_slist_t domain_list;
989 #endif
990
991 /*
992 * @brief MMU default configuration
993 *
994 * This function provides the default configuration mechanism for the Memory
995 * Management Unit (MMU).
996 */
z_arm64_mm_init(bool is_primary_core)997 void z_arm64_mm_init(bool is_primary_core)
998 {
999 unsigned int flags = 0U;
1000
1001 __ASSERT(CONFIG_MMU_PAGE_SIZE == KB(4),
1002 "Only 4K page size is supported\n");
1003
1004 __ASSERT(GET_EL(read_currentel()) == MODE_EL1,
1005 "Exception level not EL1, MMU not enabled!\n");
1006
1007 /* Ensure that MMU is already not enabled */
1008 __ASSERT((read_sctlr_el1() & SCTLR_M_BIT) == 0, "MMU is already enabled\n");
1009
1010 /*
1011 * Only booting core setup up the page tables.
1012 */
1013 if (is_primary_core) {
1014 kernel_ptables.base_xlat_table = new_table();
1015 setup_page_tables(&kernel_ptables);
1016 }
1017
1018 /* currently only EL1 is supported */
1019 enable_mmu_el1(&kernel_ptables, flags);
1020 }
1021
sync_domains(uintptr_t virt,size_t size,const char * name)1022 static void sync_domains(uintptr_t virt, size_t size, const char *name)
1023 {
1024 #ifdef CONFIG_USERSPACE
1025 sys_snode_t *node;
1026 struct arch_mem_domain *domain;
1027 struct arm_mmu_ptables *domain_ptables;
1028 k_spinlock_key_t key;
1029 int ret;
1030
1031 key = k_spin_lock(&z_mem_domain_lock);
1032 SYS_SLIST_FOR_EACH_NODE(&domain_list, node) {
1033 domain = CONTAINER_OF(node, struct arch_mem_domain, node);
1034 domain_ptables = &domain->ptables;
1035 ret = globalize_page_range(domain_ptables, &kernel_ptables,
1036 virt, size, name);
1037 if (ret) {
1038 LOG_ERR("globalize_page_range() returned %d", ret);
1039 }
1040 }
1041 k_spin_unlock(&z_mem_domain_lock, key);
1042 #endif
1043 }
1044
__arch_mem_map(void * virt,uintptr_t phys,size_t size,uint32_t flags)1045 static int __arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
1046 {
1047 struct arm_mmu_ptables *ptables;
1048 uint32_t entry_flags = MT_DEFAULT_SECURE_STATE | MT_P_RX_U_NA | MT_NO_OVERWRITE;
1049
1050 /* Always map in the kernel page tables */
1051 ptables = &kernel_ptables;
1052
1053 /* Translate flags argument into HW-recognized entry flags. */
1054 switch (flags & K_MEM_CACHE_MASK) {
1055 /*
1056 * K_MEM_CACHE_NONE, K_MEM_ARM_DEVICE_nGnRnE => MT_DEVICE_nGnRnE
1057 * (Device memory nGnRnE)
1058 * K_MEM_ARM_DEVICE_nGnRE => MT_DEVICE_nGnRE
1059 * (Device memory nGnRE)
1060 * K_MEM_ARM_DEVICE_GRE => MT_DEVICE_GRE
1061 * (Device memory GRE)
1062 * K_MEM_ARM_NORMAL_NC => MT_NORMAL_NC
1063 * (Normal memory Non-cacheable)
1064 * K_MEM_CACHE_WB => MT_NORMAL
1065 * (Normal memory Outer WB + Inner WB)
1066 * K_MEM_CACHE_WT => MT_NORMAL_WT
1067 * (Normal memory Outer WT + Inner WT)
1068 */
1069 case K_MEM_CACHE_NONE:
1070 /* K_MEM_CACHE_NONE equal to K_MEM_ARM_DEVICE_nGnRnE */
1071 /* case K_MEM_ARM_DEVICE_nGnRnE: */
1072 entry_flags |= MT_DEVICE_nGnRnE;
1073 break;
1074 case K_MEM_ARM_DEVICE_nGnRE:
1075 entry_flags |= MT_DEVICE_nGnRE;
1076 break;
1077 case K_MEM_ARM_DEVICE_GRE:
1078 entry_flags |= MT_DEVICE_GRE;
1079 break;
1080 case K_MEM_ARM_NORMAL_NC:
1081 entry_flags |= MT_NORMAL_NC;
1082 break;
1083 case K_MEM_CACHE_WT:
1084 entry_flags |= MT_NORMAL_WT;
1085 break;
1086 case K_MEM_CACHE_WB:
1087 entry_flags |= MT_NORMAL;
1088 break;
1089 default:
1090 return -ENOTSUP;
1091 }
1092
1093 if ((flags & K_MEM_PERM_RW) != 0U) {
1094 entry_flags |= MT_RW;
1095 }
1096
1097 if ((flags & K_MEM_PERM_EXEC) == 0U) {
1098 entry_flags |= MT_P_EXECUTE_NEVER;
1099 }
1100
1101 if ((flags & K_MEM_PERM_USER) != 0U) {
1102 entry_flags |= MT_RW_AP_ELx;
1103 }
1104
1105 if (IS_ENABLED(CONFIG_DEMAND_PAGING) && (flags & K_MEM_MAP_UNPAGED) != 0) {
1106 entry_flags |= MT_PAGED_OUT;
1107 }
1108
1109 return add_map(ptables, "generic", phys, (uintptr_t)virt, size, entry_flags);
1110 }
1111
arch_mem_map(void * virt,uintptr_t phys,size_t size,uint32_t flags)1112 void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
1113 {
1114 int ret = __arch_mem_map(virt, phys, size, flags);
1115
1116 if (ret) {
1117 LOG_ERR("__arch_mem_map() returned %d", ret);
1118 k_panic();
1119 } else {
1120 uint32_t mem_flags = flags & K_MEM_CACHE_MASK;
1121
1122 sync_domains((uintptr_t)virt, size, "mem_map");
1123 invalidate_tlb_all();
1124
1125 switch (mem_flags) {
1126 case K_MEM_CACHE_WB:
1127 case K_MEM_CACHE_WT:
1128 mem_flags = (mem_flags == K_MEM_CACHE_WB) ? MT_NORMAL : MT_NORMAL_WT;
1129 mem_flags |= (flags & K_MEM_PERM_RW) ? MT_RW : 0;
1130 inv_dcache_after_map_helper(virt, size, mem_flags);
1131 default:
1132 break;
1133 }
1134 }
1135 }
1136
arch_mem_unmap(void * addr,size_t size)1137 void arch_mem_unmap(void *addr, size_t size)
1138 {
1139 remove_map(&kernel_ptables, "generic", (uintptr_t)addr, size);
1140 sync_domains((uintptr_t)addr, size, "mem_unmap");
1141 invalidate_tlb_all();
1142 }
1143
arch_page_phys_get(void * virt,uintptr_t * phys)1144 int arch_page_phys_get(void *virt, uintptr_t *phys)
1145 {
1146 uint64_t par;
1147 int key;
1148
1149 key = arch_irq_lock();
1150 __asm__ volatile ("at S1E1R, %0" : : "r" (virt));
1151 barrier_isync_fence_full();
1152 par = read_par_el1();
1153 arch_irq_unlock(key);
1154
1155 if (par & BIT(0)) {
1156 return -EFAULT;
1157 }
1158
1159 if (phys) {
1160 *phys = par & GENMASK64(47, 12);
1161 }
1162 return 0;
1163 }
1164
arch_virt_region_align(uintptr_t phys,size_t size)1165 size_t arch_virt_region_align(uintptr_t phys, size_t size)
1166 {
1167 size_t alignment = CONFIG_MMU_PAGE_SIZE;
1168 size_t level_size;
1169 int level;
1170
1171 for (level = XLAT_LAST_LEVEL; level >= BASE_XLAT_LEVEL; level--) {
1172 level_size = 1 << LEVEL_TO_VA_SIZE_SHIFT(level);
1173
1174 if (size < level_size) {
1175 break;
1176 }
1177
1178 if ((phys & (level_size - 1))) {
1179 break;
1180 }
1181
1182 alignment = level_size;
1183 }
1184
1185 return alignment;
1186 }
1187
1188 #ifdef CONFIG_USERSPACE
1189
1190 static uint16_t next_asid = 1;
1191
get_asid(uint64_t ttbr0)1192 static uint16_t get_asid(uint64_t ttbr0)
1193 {
1194 return ttbr0 >> TTBR_ASID_SHIFT;
1195 }
1196
1197 static void z_arm64_swap_ptables(struct k_thread *incoming);
1198
arch_mem_domain_max_partitions_get(void)1199 int arch_mem_domain_max_partitions_get(void)
1200 {
1201 return CONFIG_MAX_DOMAIN_PARTITIONS;
1202 }
1203
arch_mem_domain_init(struct k_mem_domain * domain)1204 int arch_mem_domain_init(struct k_mem_domain *domain)
1205 {
1206 struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables;
1207 k_spinlock_key_t key;
1208 uint16_t asid;
1209
1210 MMU_DEBUG("%s\n", __func__);
1211
1212 key = k_spin_lock(&xlat_lock);
1213
1214 /*
1215 * Pick a new ASID. We use round-robin
1216 * Note: `next_asid` is an uint16_t and `VM_ASID_BITS` could
1217 * be up to 16, hence `next_asid` might overflow to 0 below.
1218 */
1219 asid = next_asid++;
1220 if ((next_asid >= (1UL << VM_ASID_BITS)) || (next_asid == 0)) {
1221 next_asid = 1;
1222 }
1223
1224 domain_ptables->base_xlat_table =
1225 dup_table(kernel_ptables.base_xlat_table, BASE_XLAT_LEVEL);
1226 k_spin_unlock(&xlat_lock, key);
1227 if (!domain_ptables->base_xlat_table) {
1228 return -ENOMEM;
1229 }
1230
1231 domain_ptables->ttbr0 = (((uint64_t)asid) << TTBR_ASID_SHIFT) |
1232 ((uint64_t)(uintptr_t)domain_ptables->base_xlat_table);
1233
1234 sys_slist_append(&domain_list, &domain->arch.node);
1235 return 0;
1236 }
1237
private_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t phys,uintptr_t virt,size_t size,uint32_t attrs)1238 static int private_map(struct arm_mmu_ptables *ptables, const char *name,
1239 uintptr_t phys, uintptr_t virt, size_t size, uint32_t attrs)
1240 {
1241 int ret;
1242
1243 ret = privatize_page_range(ptables, &kernel_ptables, virt, size, name);
1244 __ASSERT(ret == 0, "privatize_page_range() returned %d", ret);
1245 ret = add_map(ptables, name, phys, virt, size, attrs | MT_NG);
1246 __ASSERT(ret == 0, "add_map() returned %d", ret);
1247 invalidate_tlb_all();
1248
1249 inv_dcache_after_map_helper(UINT_TO_POINTER(virt), size, attrs);
1250 return ret;
1251 }
1252
reset_map(struct arm_mmu_ptables * ptables,const char * name,uintptr_t addr,size_t size)1253 static int reset_map(struct arm_mmu_ptables *ptables, const char *name,
1254 uintptr_t addr, size_t size)
1255 {
1256 int ret;
1257
1258 ret = globalize_page_range(ptables, &kernel_ptables, addr, size, name);
1259 __ASSERT(ret == 0, "globalize_page_range() returned %d", ret);
1260 invalidate_tlb_all();
1261
1262 return ret;
1263 }
1264
arch_mem_domain_partition_add(struct k_mem_domain * domain,uint32_t partition_id)1265 int arch_mem_domain_partition_add(struct k_mem_domain *domain,
1266 uint32_t partition_id)
1267 {
1268 struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables;
1269 struct k_mem_partition *ptn = &domain->partitions[partition_id];
1270
1271 return private_map(domain_ptables, "partition", ptn->start, ptn->start,
1272 ptn->size, ptn->attr.attrs | MT_NORMAL);
1273 }
1274
arch_mem_domain_partition_remove(struct k_mem_domain * domain,uint32_t partition_id)1275 int arch_mem_domain_partition_remove(struct k_mem_domain *domain,
1276 uint32_t partition_id)
1277 {
1278 struct arm_mmu_ptables *domain_ptables = &domain->arch.ptables;
1279 struct k_mem_partition *ptn = &domain->partitions[partition_id];
1280
1281 return reset_map(domain_ptables, "partition removal",
1282 ptn->start, ptn->size);
1283 }
1284
map_thread_stack(struct k_thread * thread,struct arm_mmu_ptables * ptables)1285 static int map_thread_stack(struct k_thread *thread,
1286 struct arm_mmu_ptables *ptables)
1287 {
1288 return private_map(ptables, "thread_stack", thread->stack_info.start,
1289 thread->stack_info.start, thread->stack_info.size,
1290 MT_P_RW_U_RW | MT_NORMAL);
1291 }
1292
arch_mem_domain_thread_add(struct k_thread * thread)1293 int arch_mem_domain_thread_add(struct k_thread *thread)
1294 {
1295 struct arm_mmu_ptables *old_ptables, *domain_ptables;
1296 struct k_mem_domain *domain;
1297 bool is_user, is_migration;
1298 int ret = 0;
1299
1300 domain = thread->mem_domain_info.mem_domain;
1301 domain_ptables = &domain->arch.ptables;
1302 old_ptables = thread->arch.ptables;
1303
1304 is_user = (thread->base.user_options & K_USER) != 0;
1305 is_migration = (old_ptables != NULL) && is_user;
1306
1307 if (is_migration) {
1308 ret = map_thread_stack(thread, domain_ptables);
1309 }
1310
1311 thread->arch.ptables = domain_ptables;
1312 if (thread == arch_current_thread()) {
1313 z_arm64_swap_ptables(thread);
1314 } else {
1315 #ifdef CONFIG_SMP
1316 /* the thread could be running on another CPU right now */
1317 z_arm64_mem_cfg_ipi();
1318 #endif
1319 }
1320
1321 if (is_migration) {
1322 ret = reset_map(old_ptables, __func__, thread->stack_info.start,
1323 thread->stack_info.size);
1324 }
1325
1326 return ret;
1327 }
1328
arch_mem_domain_thread_remove(struct k_thread * thread)1329 int arch_mem_domain_thread_remove(struct k_thread *thread)
1330 {
1331 struct arm_mmu_ptables *domain_ptables;
1332 struct k_mem_domain *domain;
1333
1334 domain = thread->mem_domain_info.mem_domain;
1335 domain_ptables = &domain->arch.ptables;
1336
1337 if ((thread->base.user_options & K_USER) == 0) {
1338 return 0;
1339 }
1340
1341 if ((thread->base.thread_state & _THREAD_DEAD) == 0) {
1342 return 0;
1343 }
1344
1345 return reset_map(domain_ptables, __func__, thread->stack_info.start,
1346 thread->stack_info.size);
1347 }
1348
z_arm64_swap_ptables(struct k_thread * incoming)1349 static void z_arm64_swap_ptables(struct k_thread *incoming)
1350 {
1351 struct arm_mmu_ptables *ptables = incoming->arch.ptables;
1352 uint64_t curr_ttbr0 = read_ttbr0_el1();
1353 uint64_t new_ttbr0 = ptables->ttbr0;
1354
1355 if (curr_ttbr0 == new_ttbr0) {
1356 return; /* Already the right tables */
1357 }
1358
1359 MMU_DEBUG("TTBR0 switch from %#llx to %#llx\n", curr_ttbr0, new_ttbr0);
1360 z_arm64_set_ttbr0(new_ttbr0);
1361
1362 if (get_asid(curr_ttbr0) == get_asid(new_ttbr0)) {
1363 invalidate_tlb_all();
1364 }
1365 }
1366
z_arm64_thread_mem_domains_init(struct k_thread * incoming)1367 void z_arm64_thread_mem_domains_init(struct k_thread *incoming)
1368 {
1369 struct arm_mmu_ptables *ptables;
1370
1371 if ((incoming->base.user_options & K_USER) == 0) {
1372 return;
1373 }
1374
1375 ptables = incoming->arch.ptables;
1376
1377 /* Map the thread stack */
1378 map_thread_stack(incoming, ptables);
1379
1380 z_arm64_swap_ptables(incoming);
1381 }
1382
z_arm64_swap_mem_domains(struct k_thread * incoming)1383 void z_arm64_swap_mem_domains(struct k_thread *incoming)
1384 {
1385 z_arm64_swap_ptables(incoming);
1386 }
1387
1388 #endif /* CONFIG_USERSPACE */
1389
1390 #ifdef CONFIG_DEMAND_PAGING
1391
get_pte_location(struct arm_mmu_ptables * ptables,uintptr_t virt)1392 static uint64_t *get_pte_location(struct arm_mmu_ptables *ptables,
1393 uintptr_t virt)
1394 {
1395 uint64_t *pte;
1396 uint64_t *table = ptables->base_xlat_table;
1397 unsigned int level = BASE_XLAT_LEVEL;
1398
1399 for (;;) {
1400 pte = &table[XLAT_TABLE_VA_IDX(virt, level)];
1401 if (level == XLAT_LAST_LEVEL) {
1402 return pte;
1403 }
1404
1405 if (is_table_desc(*pte, level)) {
1406 level++;
1407 table = pte_desc_table(*pte);
1408 continue;
1409 }
1410
1411 /* anything else is unexpected */
1412 return NULL;
1413 }
1414 }
1415
arch_mem_page_out(void * addr,uintptr_t location)1416 void arch_mem_page_out(void *addr, uintptr_t location)
1417 {
1418 uintptr_t virt = (uintptr_t)addr;
1419 uint64_t *pte = get_pte_location(&kernel_ptables, virt);
1420 uint64_t desc;
1421
1422 __ASSERT(pte != NULL, "");
1423 desc = *pte;
1424
1425 /* mark the entry invalid to the hardware */
1426 desc &= ~PTE_DESC_TYPE_MASK;
1427 desc |= PTE_INVALID_DESC;
1428
1429 /* store the location token in place of the physical address */
1430 __ASSERT((location & ~PTE_PHYSADDR_MASK) == 0, "");
1431 desc &= ~PTE_PHYSADDR_MASK;
1432 desc |= location;
1433
1434 /*
1435 * The location token may be 0. Make sure the whole descriptor
1436 * doesn't end up being zero as this would be seen as a free entry.
1437 */
1438 desc |= PTE_BLOCK_DESC_AP_RO;
1439
1440 *pte = desc;
1441 MMU_DEBUG("page_out: virt=%#lx location=%#lx\n", virt, location);
1442 debug_show_pte(pte, XLAT_LAST_LEVEL);
1443
1444 sync_domains(virt, CONFIG_MMU_PAGE_SIZE, "page_out");
1445 invalidate_tlb_page(virt);
1446 }
1447
arch_mem_page_in(void * addr,uintptr_t phys)1448 void arch_mem_page_in(void *addr, uintptr_t phys)
1449 {
1450 uintptr_t virt = (uintptr_t)addr;
1451 uint64_t *pte = get_pte_location(&kernel_ptables, virt);
1452 uint64_t desc;
1453
1454 __ASSERT((phys & ~PTE_PHYSADDR_MASK) == 0, "");
1455
1456 __ASSERT(pte != NULL, "");
1457 desc = *pte;
1458 __ASSERT(!is_free_desc(desc), "");
1459
1460 /* mark the entry valid again to the hardware */
1461 desc &= ~PTE_DESC_TYPE_MASK;
1462 desc |= PTE_PAGE_DESC;
1463
1464 /* store the physical address */
1465 desc &= ~PTE_PHYSADDR_MASK;
1466 desc |= phys;
1467
1468 /* mark as clean */
1469 desc |= PTE_BLOCK_DESC_AP_RO;
1470
1471 /* and make it initially unaccessible to track unaccessed pages */
1472 desc &= ~PTE_BLOCK_DESC_AF;
1473
1474 *pte = desc;
1475 MMU_DEBUG("page_in: virt=%#lx phys=%#lx\n", virt, phys);
1476 debug_show_pte(pte, XLAT_LAST_LEVEL);
1477
1478 sync_domains(virt, CONFIG_MMU_PAGE_SIZE, "page_in");
1479 invalidate_tlb_page(virt);
1480 }
1481
arch_page_location_get(void * addr,uintptr_t * location)1482 enum arch_page_location arch_page_location_get(void *addr, uintptr_t *location)
1483 {
1484 uintptr_t virt = (uintptr_t)addr;
1485 uint64_t *pte = get_pte_location(&kernel_ptables, virt);
1486 uint64_t desc;
1487 enum arch_page_location status;
1488
1489 if (!pte) {
1490 return ARCH_PAGE_LOCATION_BAD;
1491 }
1492 desc = *pte;
1493 if (is_free_desc(desc)) {
1494 return ARCH_PAGE_LOCATION_BAD;
1495 }
1496
1497 switch (desc & PTE_DESC_TYPE_MASK) {
1498 case PTE_PAGE_DESC:
1499 status = ARCH_PAGE_LOCATION_PAGED_IN;
1500 break;
1501 case PTE_INVALID_DESC:
1502 status = ARCH_PAGE_LOCATION_PAGED_OUT;
1503 break;
1504 default:
1505 return ARCH_PAGE_LOCATION_BAD;
1506 }
1507
1508 *location = desc & PTE_PHYSADDR_MASK;
1509 return status;
1510 }
1511
arch_page_info_get(void * addr,uintptr_t * phys,bool clear_accessed)1512 uintptr_t arch_page_info_get(void *addr, uintptr_t *phys, bool clear_accessed)
1513 {
1514 uintptr_t virt = (uintptr_t)addr;
1515 uint64_t *pte = get_pte_location(&kernel_ptables, virt);
1516 uint64_t desc;
1517 uintptr_t status = 0;
1518
1519 if (!pte) {
1520 return ARCH_DATA_PAGE_NOT_MAPPED;
1521 }
1522 desc = *pte;
1523 if (is_free_desc(desc)) {
1524 return ARCH_DATA_PAGE_NOT_MAPPED;
1525 }
1526
1527 switch (desc & PTE_DESC_TYPE_MASK) {
1528 case PTE_PAGE_DESC:
1529 status |= ARCH_DATA_PAGE_LOADED;
1530 break;
1531 case PTE_INVALID_DESC:
1532 /* page not loaded */
1533 break;
1534 default:
1535 return ARCH_DATA_PAGE_NOT_MAPPED;
1536 }
1537
1538 if (phys) {
1539 *phys = desc & PTE_PHYSADDR_MASK;
1540 }
1541
1542 if ((status & ARCH_DATA_PAGE_LOADED) == 0) {
1543 return status;
1544 }
1545
1546 if ((desc & PTE_BLOCK_DESC_AF) != 0) {
1547 status |= ARCH_DATA_PAGE_ACCESSED;
1548 }
1549
1550 if ((desc & PTE_BLOCK_DESC_AP_RO) == 0) {
1551 status |= ARCH_DATA_PAGE_DIRTY;
1552 }
1553
1554 if (clear_accessed) {
1555 desc &= ~PTE_BLOCK_DESC_AF;
1556 *pte = desc;
1557 MMU_DEBUG("page_info: virt=%#lx (clearing AF)\n", virt);
1558 debug_show_pte(pte, XLAT_LAST_LEVEL);
1559 sync_domains(virt, CONFIG_MMU_PAGE_SIZE, "unaccessible");
1560 invalidate_tlb_page(virt);
1561 }
1562
1563 return status;
1564 }
1565
1566 #define MT_SCRATCH (MT_NORMAL | MT_P_RW_U_NA | MT_DEFAULT_SECURE_STATE)
1567
arch_mem_scratch(uintptr_t phys)1568 void arch_mem_scratch(uintptr_t phys)
1569 {
1570 uintptr_t virt = (uintptr_t)K_MEM_SCRATCH_PAGE;
1571 size_t size = CONFIG_MMU_PAGE_SIZE;
1572 int ret = add_map(&kernel_ptables, "scratch", phys, virt, size, MT_SCRATCH);
1573
1574 if (ret) {
1575 LOG_ERR("add_map() returned %d", ret);
1576 } else {
1577 sync_domains(virt, size, "scratch");
1578 invalidate_tlb_page(virt);
1579 }
1580 }
1581
do_mem_page_fault(struct arch_esf * esf,uintptr_t virt)1582 static bool do_mem_page_fault(struct arch_esf *esf, uintptr_t virt)
1583 {
1584 /*
1585 * The k_mem_page_fault() code expects to be called with IRQs enabled
1586 * if the fault happened in a context where IRQs were enabled.
1587 */
1588 if (arch_irq_unlocked(esf->spsr)) {
1589 enable_irq();
1590 }
1591
1592 bool ok = k_mem_page_fault((void *)virt);
1593
1594 disable_irq();
1595 return ok;
1596 }
1597
1598 /* Called from the fault handler. Returns true if the fault is resolved. */
z_arm64_do_demand_paging(struct arch_esf * esf,uint64_t esr,uint64_t far)1599 bool z_arm64_do_demand_paging(struct arch_esf *esf, uint64_t esr, uint64_t far)
1600 {
1601 uintptr_t virt = far;
1602 uint64_t *pte, desc;
1603 uintptr_t phys;
1604
1605 /* filter relevant exceptions */
1606 switch (GET_ESR_EC(esr)) {
1607 case 0x21: /* insn abort from current EL */
1608 case 0x25: /* data abort from current EL */
1609 break;
1610 default:
1611 return false;
1612 }
1613
1614 /* make sure the fault happened in the expected range */
1615 if (!IN_RANGE(virt,
1616 (uintptr_t)K_MEM_VIRT_RAM_START,
1617 ((uintptr_t)K_MEM_VIRT_RAM_END - 1))) {
1618 return false;
1619 }
1620
1621 virt = ROUND_DOWN(virt, CONFIG_MMU_PAGE_SIZE);
1622
1623 pte = get_pte_location(&kernel_ptables, virt);
1624 if (!pte) {
1625 /* page mapping doesn't exist, let the core code do its thing */
1626 return do_mem_page_fault(esf, virt);
1627 }
1628 desc = *pte;
1629 if ((desc & PTE_DESC_TYPE_MASK) != PTE_PAGE_DESC) {
1630 /* page is not loaded/mapped */
1631 return do_mem_page_fault(esf, virt);
1632 }
1633
1634 /*
1635 * From this point, we expect only 2 cases:
1636 *
1637 * 1) the Access Flag was not set so we set it marking the page
1638 * as accessed;
1639 *
1640 * 2) the page was read-only and a write occurred so we clear the
1641 * RO flag marking the page dirty.
1642 *
1643 * We bail out on anything else.
1644 *
1645 * Fault status codes for Data aborts (DFSC):
1646 * 0b0010LL Access flag fault
1647 * 0b0011LL Permission fault
1648 */
1649 uint32_t dfsc = GET_ESR_ISS(esr) & GENMASK(5, 0);
1650 bool write = (GET_ESR_ISS(esr) & BIT(6)) != 0; /* WnR */
1651
1652 if (dfsc == (0b001000 | XLAT_LAST_LEVEL) &&
1653 (desc & PTE_BLOCK_DESC_AF) == 0) {
1654 /* page is being accessed: set the access flag */
1655 desc |= PTE_BLOCK_DESC_AF;
1656 if (write) {
1657 if ((desc & PTE_SW_WRITABLE) == 0) {
1658 /* we don't actually have write permission */
1659 return false;
1660 }
1661 /*
1662 * Let's avoid another fault immediately after
1663 * returning by making the page read-write right away
1664 * effectively marking it "dirty" as well.
1665 */
1666 desc &= ~PTE_BLOCK_DESC_AP_RO;
1667 }
1668 *pte = desc;
1669 sync_domains(virt, CONFIG_MMU_PAGE_SIZE, "accessed");
1670 /* no TLB inval needed after setting AF */
1671
1672 /* tell the eviction algorithm about it */
1673 phys = desc & PTE_PHYSADDR_MASK;
1674 k_mem_paging_eviction_accessed(phys);
1675 return true;
1676 }
1677
1678 if (dfsc == (0b001100 | XLAT_LAST_LEVEL) && write &&
1679 (desc & PTE_BLOCK_DESC_AP_RO) != 0 &&
1680 (desc & PTE_SW_WRITABLE) != 0) {
1681 /* make it "dirty" i.e. read-write */
1682 desc &= ~PTE_BLOCK_DESC_AP_RO;
1683 *pte = desc;
1684 sync_domains(virt, CONFIG_MMU_PAGE_SIZE, "dirtied");
1685 invalidate_tlb_page(virt);
1686
1687 /* this also counts as an access refresh */
1688 phys = desc & PTE_PHYSADDR_MASK;
1689 k_mem_paging_eviction_accessed(phys);
1690 return true;
1691 }
1692
1693 return false;
1694 }
1695
1696 #endif /* CONFIG_DEMAND_PAGING */
1697