1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/timing/timing.h>
20 #include <zephyr/logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22
23 /*
24 * General terminology:
25 * - A page frame is a page-sized physical memory region in RAM. It is a
26 * container where a data page may be placed. It is always referred to by
27 * physical address. We have a convention of using uintptr_t for physical
28 * addresses. We instantiate a struct z_page_frame to store metadata for
29 * every page frame.
30 *
31 * - A data page is a page-sized region of data. It may exist in a page frame,
32 * or be paged out to some backing store. Its location can always be looked
33 * up in the CPU's page tables (or equivalent) by virtual address.
34 * The data type will always be void * or in some cases uint8_t * when we
35 * want to do pointer arithmetic.
36 */
37
38 /* Spinlock to protect any globals in this file and serialize page table
39 * updates in arch code
40 */
41 struct k_spinlock z_mm_lock;
42
43 /*
44 * General page frame management
45 */
46
47 /* Database of all RAM page frames */
48 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
49
50 #if __ASSERT_ON
51 /* Indicator that z_page_frames has been initialized, many of these APIs do
52 * not work before POST_KERNEL
53 */
54 static bool page_frames_initialized;
55 #endif
56
57 /* Add colors to page table dumps to indicate mapping type */
58 #define COLOR_PAGE_FRAMES 1
59
60 #if COLOR_PAGE_FRAMES
61 #define ANSI_DEFAULT "\x1B" "[0m"
62 #define ANSI_RED "\x1B" "[1;31m"
63 #define ANSI_GREEN "\x1B" "[1;32m"
64 #define ANSI_YELLOW "\x1B" "[1;33m"
65 #define ANSI_BLUE "\x1B" "[1;34m"
66 #define ANSI_MAGENTA "\x1B" "[1;35m"
67 #define ANSI_CYAN "\x1B" "[1;36m"
68 #define ANSI_GREY "\x1B" "[1;90m"
69
70 #define COLOR(x) printk(_CONCAT(ANSI_, x))
71 #else
72 #define COLOR(x) do { } while (false)
73 #endif
74
75 /* LCOV_EXCL_START */
page_frame_dump(struct z_page_frame * pf)76 static void page_frame_dump(struct z_page_frame *pf)
77 {
78 if (z_page_frame_is_reserved(pf)) {
79 COLOR(CYAN);
80 printk("R");
81 } else if (z_page_frame_is_busy(pf)) {
82 COLOR(MAGENTA);
83 printk("B");
84 } else if (z_page_frame_is_pinned(pf)) {
85 COLOR(YELLOW);
86 printk("P");
87 } else if (z_page_frame_is_available(pf)) {
88 COLOR(GREY);
89 printk(".");
90 } else if (z_page_frame_is_mapped(pf)) {
91 COLOR(DEFAULT);
92 printk("M");
93 } else {
94 COLOR(RED);
95 printk("?");
96 }
97 }
98
z_page_frames_dump(void)99 void z_page_frames_dump(void)
100 {
101 int column = 0;
102
103 __ASSERT(page_frames_initialized, "%s called too early", __func__);
104 printk("Physical memory from 0x%lx to 0x%lx\n",
105 Z_PHYS_RAM_START, Z_PHYS_RAM_END);
106
107 for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
108 struct z_page_frame *pf = &z_page_frames[i];
109
110 page_frame_dump(pf);
111
112 column++;
113 if (column == 64) {
114 column = 0;
115 printk("\n");
116 }
117 }
118
119 COLOR(DEFAULT);
120 if (column != 0) {
121 printk("\n");
122 }
123 }
124 /* LCOV_EXCL_STOP */
125
126 #define VIRT_FOREACH(_base, _size, _pos) \
127 for (_pos = _base; \
128 _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
129
130 #define PHYS_FOREACH(_base, _size, _pos) \
131 for (_pos = _base; \
132 _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
133
134
135 /*
136 * Virtual address space management
137 *
138 * Call all of these functions with z_mm_lock held.
139 *
140 * Overall virtual memory map: When the kernel starts, it resides in
141 * virtual memory in the region Z_KERNEL_VIRT_START to
142 * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
143 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
144 *
145 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
146 * but have a mapping for all RAM in place. This is for special architectural
147 * purposes and does not otherwise affect page frame accounting or flags;
148 * the only guarantee is that such RAM mapping outside of the Zephyr image
149 * won't be disturbed by subsequent memory mapping calls.
150 *
151 * +--------------+ <- Z_VIRT_RAM_START
152 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
153 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
154 * | Mapping for |
155 * | main kernel |
156 * | image |
157 * | |
158 * | |
159 * +--------------+ <- Z_FREE_VM_START
160 * | |
161 * | Unused, |
162 * | Available VM |
163 * | |
164 * |..............| <- mapping_pos (grows downward as more mappings are made)
165 * | Mapping |
166 * +--------------+
167 * | Mapping |
168 * +--------------+
169 * | ... |
170 * +--------------+
171 * | Mapping |
172 * +--------------+ <- mappings start here
173 * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED
174 * +--------------+ <- Z_VIRT_RAM_END
175 */
176
177 /* Bitmap of virtual addresses where one bit corresponds to one page.
178 * This is being used for virt_region_alloc() to figure out which
179 * region of virtual addresses can be used for memory mapping.
180 *
181 * Note that bit #0 is the highest address so that allocation is
182 * done in reverse from highest address.
183 */
184 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
185 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
186
187 static bool virt_region_inited;
188
189 #define Z_VIRT_REGION_START_ADDR Z_FREE_VM_START
190 #define Z_VIRT_REGION_END_ADDR (Z_VIRT_RAM_END - Z_VM_RESERVED)
191
virt_from_bitmap_offset(size_t offset,size_t size)192 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
193 {
194 return POINTER_TO_UINT(Z_VIRT_RAM_END)
195 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
196 }
197
virt_to_bitmap_offset(void * vaddr,size_t size)198 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
199 {
200 return (POINTER_TO_UINT(Z_VIRT_RAM_END)
201 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
202 }
203
virt_region_init(void)204 static void virt_region_init(void)
205 {
206 size_t offset, num_bits;
207
208 /* There are regions where we should never map via
209 * k_mem_map() and z_phys_map(). Mark them as
210 * already allocated so they will never be used.
211 */
212
213 if (Z_VM_RESERVED > 0) {
214 /* Mark reserved region at end of virtual address space */
215 num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
216 (void)sys_bitarray_set_region(&virt_region_bitmap,
217 num_bits, 0);
218 }
219
220 /* Mark all bits up to Z_FREE_VM_START as allocated */
221 num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
222 - POINTER_TO_UINT(Z_VIRT_RAM_START);
223 offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
224 num_bits /= CONFIG_MMU_PAGE_SIZE;
225 (void)sys_bitarray_set_region(&virt_region_bitmap,
226 num_bits, offset);
227
228 virt_region_inited = true;
229 }
230
virt_region_free(void * vaddr,size_t size)231 static void virt_region_free(void *vaddr, size_t size)
232 {
233 size_t offset, num_bits;
234 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
235
236 if (unlikely(!virt_region_inited)) {
237 virt_region_init();
238 }
239
240 #ifndef CONFIG_KERNEL_DIRECT_MAP
241 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
242 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
243 "invalid virtual address region %p (%zu)", vaddr_u8, size);
244 #endif
245 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
246 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
247 return;
248 }
249
250 offset = virt_to_bitmap_offset(vaddr, size);
251 num_bits = size / CONFIG_MMU_PAGE_SIZE;
252 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
253 }
254
virt_region_alloc(size_t size,size_t align)255 static void *virt_region_alloc(size_t size, size_t align)
256 {
257 uintptr_t dest_addr;
258 size_t alloc_size;
259 size_t offset;
260 size_t num_bits;
261 int ret;
262
263 if (unlikely(!virt_region_inited)) {
264 virt_region_init();
265 }
266
267 /* Possibly request more pages to ensure we can get an aligned virtual address */
268 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
269 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
270 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
271 if (ret != 0) {
272 LOG_ERR("insufficient virtual address space (requested %zu)",
273 size);
274 return NULL;
275 }
276
277 /* Remember that bit #0 in bitmap corresponds to the highest
278 * virtual address. So here we need to go downwards (backwards?)
279 * to get the starting address of the allocated region.
280 */
281 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
282
283 if (alloc_size > size) {
284 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
285
286 /* Here is the memory organization when trying to get an aligned
287 * virtual address:
288 *
289 * +--------------+ <- Z_VIRT_RAM_START
290 * | Undefined VM |
291 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
292 * | Mapping for |
293 * | main kernel |
294 * | image |
295 * | |
296 * | |
297 * +--------------+ <- Z_FREE_VM_START
298 * | ... |
299 * +==============+ <- dest_addr
300 * | Unused |
301 * |..............| <- aligned_dest_addr
302 * | |
303 * | Aligned |
304 * | Mapping |
305 * | |
306 * |..............| <- aligned_dest_addr + size
307 * | Unused |
308 * +==============+ <- offset from Z_VIRT_RAM_END == dest_addr + alloc_size
309 * | ... |
310 * +--------------+
311 * | Mapping |
312 * +--------------+
313 * | Reserved |
314 * +--------------+ <- Z_VIRT_RAM_END
315 */
316
317 /* Free the two unused regions */
318 virt_region_free(UINT_TO_POINTER(dest_addr),
319 aligned_dest_addr - dest_addr);
320 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
321 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
322 (dest_addr + alloc_size) - (aligned_dest_addr + size));
323 }
324
325 dest_addr = aligned_dest_addr;
326 }
327
328 /* Need to make sure this does not step into kernel memory */
329 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
330 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
331 return NULL;
332 }
333
334 return UINT_TO_POINTER(dest_addr);
335 }
336
337 /*
338 * Free page frames management
339 *
340 * Call all of these functions with z_mm_lock held.
341 */
342
343 /* Linked list of unused and available page frames.
344 *
345 * TODO: This is very simple and treats all free page frames as being equal.
346 * However, there are use-cases to consolidate free pages such that entire
347 * SRAM banks can be switched off to save power, and so obtaining free pages
348 * may require a more complex ontology which prefers page frames in RAM banks
349 * which are still active.
350 *
351 * This implies in the future there may be multiple slists managing physical
352 * pages. Each page frame will still just have one snode link.
353 */
354 static sys_slist_t free_page_frame_list;
355
356 /* Number of unused and available free page frames */
357 size_t z_free_page_count;
358
359 #define PF_ASSERT(pf, expr, fmt, ...) \
360 __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
361 ##__VA_ARGS__)
362
363 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)364 static struct z_page_frame *free_page_frame_list_get(void)
365 {
366 sys_snode_t *node;
367 struct z_page_frame *pf = NULL;
368
369 node = sys_slist_get(&free_page_frame_list);
370 if (node != NULL) {
371 z_free_page_count--;
372 pf = CONTAINER_OF(node, struct z_page_frame, node);
373 PF_ASSERT(pf, z_page_frame_is_available(pf),
374 "unavailable but somehow on free list");
375 }
376
377 return pf;
378 }
379
380 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)381 static void free_page_frame_list_put(struct z_page_frame *pf)
382 {
383 PF_ASSERT(pf, z_page_frame_is_available(pf),
384 "unavailable page put on free list");
385 /* The structure is packed, which ensures that this is true */
386 void *node = pf;
387
388 sys_slist_append(&free_page_frame_list, node);
389 z_free_page_count++;
390 }
391
free_page_frame_list_init(void)392 static void free_page_frame_list_init(void)
393 {
394 sys_slist_init(&free_page_frame_list);
395 }
396
page_frame_free_locked(struct z_page_frame * pf)397 static void page_frame_free_locked(struct z_page_frame *pf)
398 {
399 pf->flags = 0;
400 free_page_frame_list_put(pf);
401 }
402
403 /*
404 * Memory Mapping
405 */
406
407 /* Called after the frame is mapped in the arch layer, to update our
408 * local ontology (and do some assertions while we're at it)
409 */
frame_mapped_set(struct z_page_frame * pf,void * addr)410 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
411 {
412 PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
413 "attempted to map a reserved page frame");
414
415 /* We do allow multiple mappings for pinned page frames
416 * since we will never need to reverse map them.
417 * This is uncommon, use-cases are for things like the
418 * Zephyr equivalent of VSDOs
419 */
420 PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
421 "non-pinned and already mapped to %p", pf->addr);
422
423 pf->flags |= Z_PAGE_FRAME_MAPPED;
424 pf->addr = addr;
425 }
426
427 /* LCOV_EXCL_START */
428 /* Go through page frames to find the physical address mapped
429 * by a virtual address.
430 *
431 * @param[in] virt Virtual Address
432 * @param[out] phys Physical address mapped to the input virtual address
433 * if such mapping exists.
434 *
435 * @retval 0 if mapping is found and valid
436 * @retval -EFAULT if virtual address is not mapped
437 */
virt_to_page_frame(void * virt,uintptr_t * phys)438 static int virt_to_page_frame(void *virt, uintptr_t *phys)
439 {
440 uintptr_t paddr;
441 struct z_page_frame *pf;
442 int ret = -EFAULT;
443
444 Z_PAGE_FRAME_FOREACH(paddr, pf) {
445 if (z_page_frame_is_mapped(pf)) {
446 if (virt == pf->addr) {
447 ret = 0;
448 *phys = z_page_frame_to_phys(pf);
449 break;
450 }
451 }
452 }
453
454 return ret;
455 }
456 /* LCOV_EXCL_STOP */
457
458 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
459
460 #ifdef CONFIG_DEMAND_PAGING
461 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
462 bool page_in, uintptr_t *location_ptr);
463
464 static inline void do_backing_store_page_in(uintptr_t location);
465 static inline void do_backing_store_page_out(uintptr_t location);
466 #endif /* CONFIG_DEMAND_PAGING */
467
468 /* Allocate a free page frame, and map it to a specified virtual address
469 *
470 * TODO: Add optional support for copy-on-write mappings to a zero page instead
471 * of allocating, in which case page frames will be allocated lazily as
472 * the mappings to the zero page get touched. This will avoid expensive
473 * page-ins as memory is mapped and physical RAM or backing store storage will
474 * not be used if the mapped memory is unused. The cost is an empty physical
475 * page of zeroes.
476 */
map_anon_page(void * addr,uint32_t flags)477 static int map_anon_page(void *addr, uint32_t flags)
478 {
479 struct z_page_frame *pf;
480 uintptr_t phys;
481 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
482 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
483
484 pf = free_page_frame_list_get();
485 if (pf == NULL) {
486 #ifdef CONFIG_DEMAND_PAGING
487 uintptr_t location;
488 bool dirty;
489 int ret;
490
491 pf = k_mem_paging_eviction_select(&dirty);
492 __ASSERT(pf != NULL, "failed to get a page frame");
493 LOG_DBG("evicting %p at 0x%lx", pf->addr,
494 z_page_frame_to_phys(pf));
495 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
496 if (ret != 0) {
497 return -ENOMEM;
498 }
499 if (dirty) {
500 do_backing_store_page_out(location);
501 }
502 pf->flags = 0;
503 #else
504 return -ENOMEM;
505 #endif /* CONFIG_DEMAND_PAGING */
506 }
507
508 phys = z_page_frame_to_phys(pf);
509 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
510
511 if (lock) {
512 pf->flags |= Z_PAGE_FRAME_PINNED;
513 }
514 frame_mapped_set(pf, addr);
515
516 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
517
518 if (!uninit) {
519 /* If we later implement mappings to a copy-on-write
520 * zero page, won't need this step
521 */
522 memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
523 }
524
525 return 0;
526 }
527
k_mem_map(size_t size,uint32_t flags)528 void *k_mem_map(size_t size, uint32_t flags)
529 {
530 uint8_t *dst;
531 size_t total_size;
532 int ret;
533 k_spinlock_key_t key;
534 uint8_t *pos;
535
536 __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
537 ((flags & K_MEM_MAP_UNINIT) != 0U)),
538 "user access to anonymous uninitialized pages is forbidden");
539 __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
540 "unaligned size %zu passed to %s", size, __func__);
541 __ASSERT(size != 0, "zero sized memory mapping");
542 __ASSERT(page_frames_initialized, "%s called too early", __func__);
543 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
544 "%s does not support explicit cache settings", __func__);
545
546 key = k_spin_lock(&z_mm_lock);
547
548 /* Need extra for the guard pages (before and after) which we
549 * won't map.
550 */
551 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
552
553 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
554 if (dst == NULL) {
555 /* Address space has no free region */
556 goto out;
557 }
558
559 /* Unmap both guard pages to make sure accessing them
560 * will generate fault.
561 */
562 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
563 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
564 CONFIG_MMU_PAGE_SIZE);
565
566 /* Skip over the "before" guard page in returned address. */
567 dst += CONFIG_MMU_PAGE_SIZE;
568
569 VIRT_FOREACH(dst, size, pos) {
570 ret = map_anon_page(pos, flags);
571
572 if (ret != 0) {
573 /* TODO: call k_mem_unmap(dst, pos - dst) when
574 * implemented in #28990 and release any guard virtual
575 * page as well.
576 */
577 dst = NULL;
578 goto out;
579 }
580 }
581 out:
582 k_spin_unlock(&z_mm_lock, key);
583 return dst;
584 }
585
k_mem_unmap(void * addr,size_t size)586 void k_mem_unmap(void *addr, size_t size)
587 {
588 uintptr_t phys;
589 uint8_t *pos;
590 struct z_page_frame *pf;
591 k_spinlock_key_t key;
592 size_t total_size;
593 int ret;
594
595 /* Need space for the "before" guard page */
596 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
597
598 /* Make sure address range is still valid after accounting
599 * for two guard pages.
600 */
601 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
602 z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
603
604 key = k_spin_lock(&z_mm_lock);
605
606 /* Check if both guard pages are unmapped.
607 * Bail if not, as this is probably a region not mapped
608 * using k_mem_map().
609 */
610 pos = addr;
611 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
612 if (ret == 0) {
613 __ASSERT(ret == 0,
614 "%s: cannot find preceding guard page for (%p, %zu)",
615 __func__, addr, size);
616 goto out;
617 }
618
619 ret = arch_page_phys_get(pos + size, NULL);
620 if (ret == 0) {
621 __ASSERT(ret == 0,
622 "%s: cannot find succeeding guard page for (%p, %zu)",
623 __func__, addr, size);
624 goto out;
625 }
626
627 VIRT_FOREACH(addr, size, pos) {
628 ret = arch_page_phys_get(pos, &phys);
629
630 __ASSERT(ret == 0,
631 "%s: cannot unmap an unmapped address %p",
632 __func__, pos);
633 if (ret != 0) {
634 /* Found an address not mapped. Do not continue. */
635 goto out;
636 }
637
638 __ASSERT(z_is_page_frame(phys),
639 "%s: 0x%lx is not a page frame", __func__, phys);
640 if (!z_is_page_frame(phys)) {
641 /* Physical address has no corresponding page frame
642 * description in the page frame array.
643 * This should not happen. Do not continue.
644 */
645 goto out;
646 }
647
648 /* Grab the corresponding page frame from physical address */
649 pf = z_phys_to_page_frame(phys);
650
651 __ASSERT(z_page_frame_is_mapped(pf),
652 "%s: 0x%lx is not a mapped page frame", __func__, phys);
653 if (!z_page_frame_is_mapped(pf)) {
654 /* Page frame is not marked mapped.
655 * This should not happen. Do not continue.
656 */
657 goto out;
658 }
659
660 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
661
662 /* Put the page frame back into free list */
663 page_frame_free_locked(pf);
664 }
665
666 /* There are guard pages just before and after the mapped
667 * region. So we also need to free them from the bitmap.
668 */
669 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
670 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
671 virt_region_free(pos, total_size);
672
673 out:
674 k_spin_unlock(&z_mm_lock, key);
675 }
676
k_mem_free_get(void)677 size_t k_mem_free_get(void)
678 {
679 size_t ret;
680 k_spinlock_key_t key;
681
682 __ASSERT(page_frames_initialized, "%s called too early", __func__);
683
684 key = k_spin_lock(&z_mm_lock);
685 #ifdef CONFIG_DEMAND_PAGING
686 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
687 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
688 } else {
689 ret = 0;
690 }
691 #else
692 ret = z_free_page_count;
693 #endif
694 k_spin_unlock(&z_mm_lock, key);
695
696 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
697 }
698
699 /* Get the default virtual region alignment, here the default MMU page size
700 *
701 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
702 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
703 *
704 * @retval alignment to apply on the virtual address of this region
705 */
virt_region_align(uintptr_t phys,size_t size)706 static size_t virt_region_align(uintptr_t phys, size_t size)
707 {
708 ARG_UNUSED(phys);
709 ARG_UNUSED(size);
710
711 return CONFIG_MMU_PAGE_SIZE;
712 }
713
714 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
715
716 /* This may be called from arch early boot code before z_cstart() is invoked.
717 * Data will be copied and BSS zeroed, but this must not rely on any
718 * initialization functions being called prior to work correctly.
719 */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)720 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
721 {
722 uintptr_t aligned_phys, addr_offset;
723 size_t aligned_size, align_boundary;
724 k_spinlock_key_t key;
725 uint8_t *dest_addr;
726 size_t num_bits;
727 size_t offset;
728
729 #ifndef CONFIG_KERNEL_DIRECT_MAP
730 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
731 #endif
732 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
733 phys, size,
734 CONFIG_MMU_PAGE_SIZE);
735 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
736 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
737 "wraparound for physical address 0x%lx (size %zu)",
738 aligned_phys, aligned_size);
739
740 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
741
742 key = k_spin_lock(&z_mm_lock);
743 if (flags & K_MEM_DIRECT_MAP) {
744 dest_addr = (uint8_t *)aligned_phys;
745 /* Reserve from the virtual memory space */
746 if (!(dest_addr + aligned_size < Z_VIRT_RAM_START ||
747 dest_addr > Z_VIRT_RAM_END)) {
748 num_bits = aligned_size / CONFIG_MMU_PAGE_SIZE;
749 offset = virt_to_bitmap_offset(dest_addr, aligned_size);
750 if (sys_bitarray_test_and_set_region(
751 &virt_region_bitmap, num_bits, offset, true))
752 goto fail;
753 }
754 } else {
755 /* Obtain an appropriately sized chunk of virtual memory */
756 dest_addr = virt_region_alloc(aligned_size, align_boundary);
757 if (!dest_addr) {
758 goto fail;
759 }
760 }
761
762 /* If this fails there's something amiss with virt_region_get */
763 __ASSERT((uintptr_t)dest_addr <
764 ((uintptr_t)dest_addr + (size - 1)),
765 "wraparound for virtual address %p (size %zu)",
766 dest_addr, size);
767
768 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
769 aligned_phys, aligned_size, flags, addr_offset);
770
771 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
772 k_spin_unlock(&z_mm_lock, key);
773
774 *virt_ptr = dest_addr + addr_offset;
775 return;
776 fail:
777 /* May re-visit this in the future, but for now running out of
778 * virtual address space or failing the arch_mem_map() call is
779 * an unrecoverable situation.
780 *
781 * Other problems not related to resource exhaustion we leave as
782 * assertions since they are clearly programming mistakes.
783 */
784 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
785 phys, size, flags);
786 k_panic();
787 }
788
z_phys_unmap(uint8_t * virt,size_t size)789 void z_phys_unmap(uint8_t *virt, size_t size)
790 {
791 uintptr_t aligned_virt, addr_offset;
792 size_t aligned_size;
793 k_spinlock_key_t key;
794
795 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
796 POINTER_TO_UINT(virt), size,
797 CONFIG_MMU_PAGE_SIZE);
798 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
799 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
800 "wraparound for virtual address 0x%lx (size %zu)",
801 aligned_virt, aligned_size);
802
803 key = k_spin_lock(&z_mm_lock);
804
805 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
806 aligned_virt, aligned_size, addr_offset);
807
808 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
809 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
810 k_spin_unlock(&z_mm_lock, key);
811 }
812
813 /*
814 * Miscellaneous
815 */
816
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)817 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
818 uintptr_t addr, size_t size, size_t align)
819 {
820 size_t addr_offset;
821
822 /* The actual mapped region must be page-aligned. Round down the
823 * physical address and pad the region size appropriately
824 */
825 *aligned_addr = ROUND_DOWN(addr, align);
826 addr_offset = addr - *aligned_addr;
827 *aligned_size = ROUND_UP(size + addr_offset, align);
828
829 return addr_offset;
830 }
831
832 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)833 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
834 bool pin)
835 {
836 struct z_page_frame *pf;
837 uint8_t *addr;
838
839 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
840 CONFIG_MMU_PAGE_SIZE);
841 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
842 CONFIG_MMU_PAGE_SIZE);
843 size_t pinned_size = pinned_end - pinned_start;
844
845 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
846 {
847 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
848 frame_mapped_set(pf, addr);
849
850 if (pin) {
851 pf->flags |= Z_PAGE_FRAME_PINNED;
852 } else {
853 pf->flags &= ~Z_PAGE_FRAME_PINNED;
854 }
855 }
856 }
857 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
858
z_mem_manage_init(void)859 void z_mem_manage_init(void)
860 {
861 uintptr_t phys;
862 uint8_t *addr;
863 struct z_page_frame *pf;
864 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
865
866 free_page_frame_list_init();
867
868 ARG_UNUSED(addr);
869
870 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
871 /* If some page frames are unavailable for use as memory, arch
872 * code will mark Z_PAGE_FRAME_RESERVED in their flags
873 */
874 arch_reserved_pages_update();
875 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
876
877 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
878 /* All pages composing the Zephyr image are mapped at boot in a
879 * predictable way. This can change at runtime.
880 */
881 VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
882 {
883 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
884 frame_mapped_set(pf, addr);
885
886 /* TODO: for now we pin the whole Zephyr image. Demand paging
887 * currently tested with anonymously-mapped pages which are not
888 * pinned.
889 *
890 * We will need to setup linker regions for a subset of kernel
891 * code/data pages which are pinned in memory and
892 * may not be evicted. This will contain critical CPU data
893 * structures, and any code used to perform page fault
894 * handling, page-ins, etc.
895 */
896 pf->flags |= Z_PAGE_FRAME_PINNED;
897 }
898 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
899
900 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
901 /* Pin the boot section to prevent it from being swapped out during
902 * boot process. Will be un-pinned once boot process completes.
903 */
904 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
905 #endif
906
907 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
908 /* Pin the page frames correspondng to the pinned symbols */
909 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
910 #endif
911
912 /* Any remaining pages that aren't mapped, reserved, or pinned get
913 * added to the free pages list
914 */
915 Z_PAGE_FRAME_FOREACH(phys, pf) {
916 if (z_page_frame_is_available(pf)) {
917 free_page_frame_list_put(pf);
918 }
919 }
920 LOG_DBG("free page frames: %zu", z_free_page_count);
921
922 #ifdef CONFIG_DEMAND_PAGING
923 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
924 z_paging_histogram_init();
925 #endif
926 k_mem_paging_backing_store_init();
927 k_mem_paging_eviction_init();
928 #endif
929 #if __ASSERT_ON
930 page_frames_initialized = true;
931 #endif
932 k_spin_unlock(&z_mm_lock, key);
933
934 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
935 /* If BSS section is not present in memory at boot,
936 * it would not have been cleared. This needs to be
937 * done now since paging mechanism has been initialized
938 * and the BSS pages can be brought into physical
939 * memory to be cleared.
940 */
941 z_bss_zero();
942 #endif
943 }
944
z_mem_manage_boot_finish(void)945 void z_mem_manage_boot_finish(void)
946 {
947 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
948 /* At the end of boot process, unpin the boot sections
949 * as they don't need to be in memory all the time anymore.
950 */
951 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
952 #endif
953 }
954
955 #ifdef CONFIG_DEMAND_PAGING
956
957 #ifdef CONFIG_DEMAND_PAGING_STATS
958 struct k_mem_paging_stats_t paging_stats;
959 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
960 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
961 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
962 #endif
963
do_backing_store_page_in(uintptr_t location)964 static inline void do_backing_store_page_in(uintptr_t location)
965 {
966 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
967 uint32_t time_diff;
968
969 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
970 timing_t time_start, time_end;
971
972 time_start = timing_counter_get();
973 #else
974 uint32_t time_start;
975
976 time_start = k_cycle_get_32();
977 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
978 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
979
980 k_mem_paging_backing_store_page_in(location);
981
982 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
983 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
984 time_end = timing_counter_get();
985 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
986 #else
987 time_diff = k_cycle_get_32() - time_start;
988 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
989
990 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
991 time_diff);
992 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
993 }
994
do_backing_store_page_out(uintptr_t location)995 static inline void do_backing_store_page_out(uintptr_t location)
996 {
997 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
998 uint32_t time_diff;
999
1000 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1001 timing_t time_start, time_end;
1002
1003 time_start = timing_counter_get();
1004 #else
1005 uint32_t time_start;
1006
1007 time_start = k_cycle_get_32();
1008 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1009 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1010
1011 k_mem_paging_backing_store_page_out(location);
1012
1013 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1014 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1015 time_end = timing_counter_get();
1016 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1017 #else
1018 time_diff = k_cycle_get_32() - time_start;
1019 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1020
1021 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1022 time_diff);
1023 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1024 }
1025
1026 /* Current implementation relies on interrupt locking to any prevent page table
1027 * access, which falls over if other CPUs are active. Addressing this is not
1028 * as simple as using spinlocks as regular memory reads/writes constitute
1029 * "access" in this sense.
1030 *
1031 * Current needs for demand paging are on uniprocessor systems.
1032 */
1033 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
1034
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1035 static void virt_region_foreach(void *addr, size_t size,
1036 void (*func)(void *))
1037 {
1038 z_mem_assert_virtual_region(addr, size);
1039
1040 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1041 func((uint8_t *)addr + offset);
1042 }
1043 }
1044
1045 /*
1046 * Perform some preparatory steps before paging out. The provided page frame
1047 * must be evicted to the backing store immediately after this is called
1048 * with a call to k_mem_paging_backing_store_page_out() if it contains
1049 * a data page.
1050 *
1051 * - Map page frame to scratch area if requested. This always is true if we're
1052 * doing a page fault, but is only set on manual evictions if the page is
1053 * dirty.
1054 * - If mapped:
1055 * - obtain backing store location and populate location parameter
1056 * - Update page tables with location
1057 * - Mark page frame as busy
1058 *
1059 * Returns -ENOMEM if the backing store is full
1060 */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1061 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
1062 bool page_fault, uintptr_t *location_ptr)
1063 {
1064 uintptr_t phys;
1065 int ret;
1066 bool dirty = *dirty_ptr;
1067
1068 phys = z_page_frame_to_phys(pf);
1069 __ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1070 phys);
1071
1072 /* If the backing store doesn't have a copy of the page, even if it
1073 * wasn't modified, treat as dirty. This can happen for a few
1074 * reasons:
1075 * 1) Page has never been swapped out before, and the backing store
1076 * wasn't pre-populated with this data page.
1077 * 2) Page was swapped out before, but the page contents were not
1078 * preserved after swapping back in.
1079 * 3) Page contents were preserved when swapped back in, but were later
1080 * evicted from the backing store to make room for other evicted
1081 * pages.
1082 */
1083 if (z_page_frame_is_mapped(pf)) {
1084 dirty = dirty || !z_page_frame_is_backed(pf);
1085 }
1086
1087 if (dirty || page_fault) {
1088 arch_mem_scratch(phys);
1089 }
1090
1091 if (z_page_frame_is_mapped(pf)) {
1092 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1093 page_fault);
1094 if (ret != 0) {
1095 LOG_ERR("out of backing store memory");
1096 return -ENOMEM;
1097 }
1098 arch_mem_page_out(pf->addr, *location_ptr);
1099 } else {
1100 /* Shouldn't happen unless this function is mis-used */
1101 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1102 }
1103 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1104 /* Mark as busy so that z_page_frame_is_evictable() returns false */
1105 __ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1106 phys);
1107 pf->flags |= Z_PAGE_FRAME_BUSY;
1108 #endif
1109 /* Update dirty parameter, since we set to true if it wasn't backed
1110 * even if otherwise clean
1111 */
1112 *dirty_ptr = dirty;
1113
1114 return 0;
1115 }
1116
do_mem_evict(void * addr)1117 static int do_mem_evict(void *addr)
1118 {
1119 bool dirty;
1120 struct z_page_frame *pf;
1121 uintptr_t location;
1122 int key, ret;
1123 uintptr_t flags, phys;
1124
1125 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1126 __ASSERT(!k_is_in_isr(),
1127 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1128 __func__);
1129 k_sched_lock();
1130 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1131 key = irq_lock();
1132 flags = arch_page_info_get(addr, &phys, false);
1133 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1134 "address %p isn't mapped", addr);
1135 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1136 /* Un-mapped or already evicted. Nothing to do */
1137 ret = 0;
1138 goto out;
1139 }
1140
1141 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1142 pf = z_phys_to_page_frame(phys);
1143 __ASSERT(pf->addr == addr, "page frame address mismatch");
1144 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1145 if (ret != 0) {
1146 goto out;
1147 }
1148
1149 __ASSERT(ret == 0, "failed to prepare page frame");
1150 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1151 irq_unlock(key);
1152 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1153 if (dirty) {
1154 do_backing_store_page_out(location);
1155 }
1156 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1157 key = irq_lock();
1158 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1159 page_frame_free_locked(pf);
1160 out:
1161 irq_unlock(key);
1162 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1163 k_sched_unlock();
1164 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1165 return ret;
1166 }
1167
k_mem_page_out(void * addr,size_t size)1168 int k_mem_page_out(void *addr, size_t size)
1169 {
1170 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1171 addr);
1172 z_mem_assert_virtual_region(addr, size);
1173
1174 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1175 void *pos = (uint8_t *)addr + offset;
1176 int ret;
1177
1178 ret = do_mem_evict(pos);
1179 if (ret != 0) {
1180 return ret;
1181 }
1182 }
1183
1184 return 0;
1185 }
1186
z_page_frame_evict(uintptr_t phys)1187 int z_page_frame_evict(uintptr_t phys)
1188 {
1189 int key, ret;
1190 struct z_page_frame *pf;
1191 bool dirty;
1192 uintptr_t flags;
1193 uintptr_t location;
1194
1195 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1196 __func__, phys);
1197
1198 /* Implementation is similar to do_page_fault() except there is no
1199 * data page to page-in, see comments in that function.
1200 */
1201
1202 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1203 __ASSERT(!k_is_in_isr(),
1204 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1205 __func__);
1206 k_sched_lock();
1207 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1208 key = irq_lock();
1209 pf = z_phys_to_page_frame(phys);
1210 if (!z_page_frame_is_mapped(pf)) {
1211 /* Nothing to do, free page */
1212 ret = 0;
1213 goto out;
1214 }
1215 flags = arch_page_info_get(pf->addr, NULL, false);
1216 /* Shouldn't ever happen */
1217 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1218 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1219 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1220 if (ret != 0) {
1221 goto out;
1222 }
1223
1224 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1225 irq_unlock(key);
1226 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1227 if (dirty) {
1228 do_backing_store_page_out(location);
1229 }
1230 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1231 key = irq_lock();
1232 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1233 page_frame_free_locked(pf);
1234 out:
1235 irq_unlock(key);
1236 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1237 k_sched_unlock();
1238 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1239 return ret;
1240 }
1241
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1242 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1243 int key)
1244 {
1245 #ifdef CONFIG_DEMAND_PAGING_STATS
1246 bool is_irq_unlocked = arch_irq_unlocked(key);
1247
1248 paging_stats.pagefaults.cnt++;
1249
1250 if (is_irq_unlocked) {
1251 paging_stats.pagefaults.irq_unlocked++;
1252 } else {
1253 paging_stats.pagefaults.irq_locked++;
1254 }
1255
1256 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1257 faulting_thread->paging_stats.pagefaults.cnt++;
1258
1259 if (is_irq_unlocked) {
1260 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1261 } else {
1262 faulting_thread->paging_stats.pagefaults.irq_locked++;
1263 }
1264 #else
1265 ARG_UNUSED(faulting_thread);
1266 #endif
1267
1268 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1269 if (k_is_in_isr()) {
1270 paging_stats.pagefaults.in_isr++;
1271
1272 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1273 faulting_thread->paging_stats.pagefaults.in_isr++;
1274 #endif
1275 }
1276 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1277 #endif /* CONFIG_DEMAND_PAGING_STATS */
1278 }
1279
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1280 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1281 bool dirty)
1282 {
1283 #ifdef CONFIG_DEMAND_PAGING_STATS
1284 if (dirty) {
1285 paging_stats.eviction.dirty++;
1286 } else {
1287 paging_stats.eviction.clean++;
1288 }
1289 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1290 if (dirty) {
1291 faulting_thread->paging_stats.eviction.dirty++;
1292 } else {
1293 faulting_thread->paging_stats.eviction.clean++;
1294 }
1295 #else
1296 ARG_UNUSED(faulting_thread);
1297 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1298 #endif /* CONFIG_DEMAND_PAGING_STATS */
1299 }
1300
do_eviction_select(bool * dirty)1301 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1302 {
1303 struct z_page_frame *pf;
1304
1305 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1306 uint32_t time_diff;
1307
1308 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1309 timing_t time_start, time_end;
1310
1311 time_start = timing_counter_get();
1312 #else
1313 uint32_t time_start;
1314
1315 time_start = k_cycle_get_32();
1316 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1317 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1318
1319 pf = k_mem_paging_eviction_select(dirty);
1320
1321 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1322 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1323 time_end = timing_counter_get();
1324 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1325 #else
1326 time_diff = k_cycle_get_32() - time_start;
1327 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1328
1329 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1330 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1331
1332 return pf;
1333 }
1334
do_page_fault(void * addr,bool pin)1335 static bool do_page_fault(void *addr, bool pin)
1336 {
1337 struct z_page_frame *pf;
1338 int key, ret;
1339 uintptr_t page_in_location, page_out_location;
1340 enum arch_page_location status;
1341 bool result;
1342 bool dirty = false;
1343 struct k_thread *faulting_thread = _current_cpu->current;
1344
1345 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1346 addr);
1347
1348 LOG_DBG("page fault at %p", addr);
1349
1350 /*
1351 * TODO: Add performance accounting:
1352 * - k_mem_paging_eviction_select() metrics
1353 * * periodic timer execution time histogram (if implemented)
1354 */
1355
1356 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1357 /* We lock the scheduler so that other threads are never scheduled
1358 * during the page-in/out operation.
1359 *
1360 * We do however re-enable interrupts during the page-in/page-out
1361 * operation iff interrupts were enabled when the exception was taken;
1362 * in this configuration page faults in an ISR are a bug; all their
1363 * code/data must be pinned.
1364 *
1365 * If interrupts were disabled when the exception was taken, the
1366 * arch code is responsible for keeping them that way when entering
1367 * this function.
1368 *
1369 * If this is not enabled, then interrupts are always locked for the
1370 * entire operation. This is far worse for system interrupt latency
1371 * but requires less pinned pages and ISRs may also take page faults.
1372 *
1373 * Support for allowing k_mem_paging_backing_store_page_out() and
1374 * k_mem_paging_backing_store_page_in() to also sleep and allow
1375 * other threads to run (such as in the case where the transfer is
1376 * async DMA) is not implemented. Even if limited to thread context,
1377 * arbitrary memory access triggering exceptions that put a thread to
1378 * sleep on a contended page fault operation will break scheduling
1379 * assumptions of cooperative threads or threads that implement
1380 * crticial sections with spinlocks or disabling IRQs.
1381 */
1382 k_sched_lock();
1383 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1384 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1385
1386 key = irq_lock();
1387 status = arch_page_location_get(addr, &page_in_location);
1388 if (status == ARCH_PAGE_LOCATION_BAD) {
1389 /* Return false to treat as a fatal error */
1390 result = false;
1391 goto out;
1392 }
1393 result = true;
1394
1395 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1396 if (pin) {
1397 /* It's a physical memory address */
1398 uintptr_t phys = page_in_location;
1399
1400 pf = z_phys_to_page_frame(phys);
1401 pf->flags |= Z_PAGE_FRAME_PINNED;
1402 }
1403
1404 /* This if-block is to pin the page if it is
1405 * already present in physical memory. There is
1406 * no need to go through the following code to
1407 * pull in the data pages. So skip to the end.
1408 */
1409 goto out;
1410 }
1411 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1412 "unexpected status value %d", status);
1413
1414 paging_stats_faults_inc(faulting_thread, key);
1415
1416 pf = free_page_frame_list_get();
1417 if (pf == NULL) {
1418 /* Need to evict a page frame */
1419 pf = do_eviction_select(&dirty);
1420 __ASSERT(pf != NULL, "failed to get a page frame");
1421 LOG_DBG("evicting %p at 0x%lx", pf->addr,
1422 z_page_frame_to_phys(pf));
1423
1424 paging_stats_eviction_inc(faulting_thread, dirty);
1425 }
1426 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1427 __ASSERT(ret == 0, "failed to prepare page frame");
1428
1429 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1430 irq_unlock(key);
1431 /* Interrupts are now unlocked if they were not locked when we entered
1432 * this function, and we may service ISRs. The scheduler is still
1433 * locked.
1434 */
1435 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1436 if (dirty) {
1437 do_backing_store_page_out(page_out_location);
1438 }
1439 do_backing_store_page_in(page_in_location);
1440
1441 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1442 key = irq_lock();
1443 pf->flags &= ~Z_PAGE_FRAME_BUSY;
1444 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1445 if (pin) {
1446 pf->flags |= Z_PAGE_FRAME_PINNED;
1447 }
1448 pf->flags |= Z_PAGE_FRAME_MAPPED;
1449 pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1450 & ~(CONFIG_MMU_PAGE_SIZE - 1));
1451
1452 arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1453 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1454 out:
1455 irq_unlock(key);
1456 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1457 k_sched_unlock();
1458 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1459
1460 return result;
1461 }
1462
do_page_in(void * addr)1463 static void do_page_in(void *addr)
1464 {
1465 bool ret;
1466
1467 ret = do_page_fault(addr, false);
1468 __ASSERT(ret, "unmapped memory address %p", addr);
1469 (void)ret;
1470 }
1471
k_mem_page_in(void * addr,size_t size)1472 void k_mem_page_in(void *addr, size_t size)
1473 {
1474 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1475 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1476 __func__);
1477 virt_region_foreach(addr, size, do_page_in);
1478 }
1479
do_mem_pin(void * addr)1480 static void do_mem_pin(void *addr)
1481 {
1482 bool ret;
1483
1484 ret = do_page_fault(addr, true);
1485 __ASSERT(ret, "unmapped memory address %p", addr);
1486 (void)ret;
1487 }
1488
k_mem_pin(void * addr,size_t size)1489 void k_mem_pin(void *addr, size_t size)
1490 {
1491 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1492 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1493 __func__);
1494 virt_region_foreach(addr, size, do_mem_pin);
1495 }
1496
z_page_fault(void * addr)1497 bool z_page_fault(void *addr)
1498 {
1499 return do_page_fault(addr, false);
1500 }
1501
do_mem_unpin(void * addr)1502 static void do_mem_unpin(void *addr)
1503 {
1504 struct z_page_frame *pf;
1505 unsigned int key;
1506 uintptr_t flags, phys;
1507
1508 key = irq_lock();
1509 flags = arch_page_info_get(addr, &phys, false);
1510 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1511 "invalid data page at %p", addr);
1512 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1513 pf = z_phys_to_page_frame(phys);
1514 pf->flags &= ~Z_PAGE_FRAME_PINNED;
1515 }
1516 irq_unlock(key);
1517 }
1518
k_mem_unpin(void * addr,size_t size)1519 void k_mem_unpin(void *addr, size_t size)
1520 {
1521 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1522 addr);
1523 virt_region_foreach(addr, size, do_mem_unpin);
1524 }
1525
1526 #endif /* CONFIG_DEMAND_PAGING */
1527