1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <spinlock.h>
12 #include <mmu.h>
13 #include <init.h>
14 #include <kernel_internal.h>
15 #include <syscall_handler.h>
16 #include <toolchain.h>
17 #include <linker/linker-defs.h>
18 #include <sys/bitarray.h>
19 #include <timing/timing.h>
20 #include <logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22
23 /*
24 * General terminology:
25 * - A page frame is a page-sized physical memory region in RAM. It is a
26 * container where a data page may be placed. It is always referred to by
27 * physical address. We have a convention of using uintptr_t for physical
28 * addresses. We instantiate a struct z_page_frame to store metadata for
29 * every page frame.
30 *
31 * - A data page is a page-sized region of data. It may exist in a page frame,
32 * or be paged out to some backing store. Its location can always be looked
33 * up in the CPU's page tables (or equivalent) by virtual address.
34 * The data type will always be void * or in some cases uint8_t * when we
35 * want to do pointer arithmetic.
36 */
37
38 /* Spinlock to protect any globals in this file and serialize page table
39 * updates in arch code
40 */
41 struct k_spinlock z_mm_lock;
42
43 /*
44 * General page frame management
45 */
46
47 /* Database of all RAM page frames */
48 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
49
50 #if __ASSERT_ON
51 /* Indicator that z_page_frames has been initialized, many of these APIs do
52 * not work before POST_KERNEL
53 */
54 static bool page_frames_initialized;
55 #endif
56
57 /* Add colors to page table dumps to indicate mapping type */
58 #define COLOR_PAGE_FRAMES 1
59
60 #if COLOR_PAGE_FRAMES
61 #define ANSI_DEFAULT "\x1B[0m"
62 #define ANSI_RED "\x1B[1;31m"
63 #define ANSI_GREEN "\x1B[1;32m"
64 #define ANSI_YELLOW "\x1B[1;33m"
65 #define ANSI_BLUE "\x1B[1;34m"
66 #define ANSI_MAGENTA "\x1B[1;35m"
67 #define ANSI_CYAN "\x1B[1;36m"
68 #define ANSI_GREY "\x1B[1;90m"
69
70 #define COLOR(x) printk(_CONCAT(ANSI_, x))
71 #else
72 #define COLOR(x) do { } while (0)
73 #endif
74
page_frame_dump(struct z_page_frame * pf)75 static void page_frame_dump(struct z_page_frame *pf)
76 {
77 if (z_page_frame_is_reserved(pf)) {
78 COLOR(CYAN);
79 printk("R");
80 } else if (z_page_frame_is_busy(pf)) {
81 COLOR(MAGENTA);
82 printk("B");
83 } else if (z_page_frame_is_pinned(pf)) {
84 COLOR(YELLOW);
85 printk("P");
86 } else if (z_page_frame_is_available(pf)) {
87 COLOR(GREY);
88 printk(".");
89 } else if (z_page_frame_is_mapped(pf)) {
90 COLOR(DEFAULT);
91 printk("M");
92 } else {
93 COLOR(RED);
94 printk("?");
95 }
96 }
97
z_page_frames_dump(void)98 void z_page_frames_dump(void)
99 {
100 int column = 0;
101
102 __ASSERT(page_frames_initialized, "%s called too early", __func__);
103 printk("Physical memory from 0x%lx to 0x%lx\n",
104 Z_PHYS_RAM_START, Z_PHYS_RAM_END);
105
106 for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
107 struct z_page_frame *pf = &z_page_frames[i];
108
109 page_frame_dump(pf);
110
111 column++;
112 if (column == 64) {
113 column = 0;
114 printk("\n");
115 }
116 }
117
118 COLOR(DEFAULT);
119 if (column != 0) {
120 printk("\n");
121 }
122 }
123
124 #define VIRT_FOREACH(_base, _size, _pos) \
125 for (_pos = _base; \
126 _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
127
128 #define PHYS_FOREACH(_base, _size, _pos) \
129 for (_pos = _base; \
130 _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
131
132
133 /*
134 * Virtual address space management
135 *
136 * Call all of these functions with z_mm_lock held.
137 *
138 * Overall virtual memory map: When the kernel starts, it resides in
139 * virtual memory in the region Z_KERNEL_VIRT_START to
140 * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
141 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
142 *
143 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
144 * but have a mapping for all RAM in place. This is for special architectural
145 * purposes and does not otherwise affect page frame accounting or flags;
146 * the only guarantee is that such RAM mapping outside of the Zephyr image
147 * won't be disturbed by subsequent memory mapping calls.
148 *
149 * +--------------+ <- Z_VIRT_RAM_START
150 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
151 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
152 * | Mapping for |
153 * | main kernel |
154 * | image |
155 * | |
156 * | |
157 * +--------------+ <- Z_FREE_VM_START
158 * | |
159 * | Unused, |
160 * | Available VM |
161 * | |
162 * |..............| <- mapping_pos (grows downward as more mappings are made)
163 * | Mapping |
164 * +--------------+
165 * | Mapping |
166 * +--------------+
167 * | ... |
168 * +--------------+
169 * | Mapping |
170 * +--------------+ <- mappings start here
171 * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED
172 * +--------------+ <- Z_VIRT_RAM_END
173 */
174
175 /* Bitmap of virtual addresses where one bit corresponds to one page.
176 * This is being used for virt_region_alloc() to figure out which
177 * region of virtual addresses can be used for memory mapping.
178 *
179 * Note that bit #0 is the highest address so that allocation is
180 * done in reverse from highest address.
181 */
182 SYS_BITARRAY_DEFINE(virt_region_bitmap,
183 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
184
185 static bool virt_region_inited;
186
187 #define Z_VIRT_REGION_START_ADDR Z_FREE_VM_START
188 #define Z_VIRT_REGION_END_ADDR (Z_VIRT_RAM_END - Z_VM_RESERVED)
189
virt_from_bitmap_offset(size_t offset,size_t size)190 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
191 {
192 return POINTER_TO_UINT(Z_VIRT_RAM_END)
193 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
194 }
195
virt_to_bitmap_offset(void * vaddr,size_t size)196 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
197 {
198 return (POINTER_TO_UINT(Z_VIRT_RAM_END)
199 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
200 }
201
virt_region_init(void)202 static void virt_region_init(void)
203 {
204 size_t offset, num_bits;
205
206 /* There are regions where we should never map via
207 * k_mem_map() and z_phys_map(). Mark them as
208 * already allocated so they will never be used.
209 */
210
211 if (Z_VM_RESERVED > 0) {
212 /* Mark reserved region at end of virtual address space */
213 num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
214 (void)sys_bitarray_set_region(&virt_region_bitmap,
215 num_bits, 0);
216 }
217
218 /* Mark all bits up to Z_FREE_VM_START as allocated */
219 num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
220 - POINTER_TO_UINT(Z_VIRT_RAM_START);
221 offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
222 num_bits /= CONFIG_MMU_PAGE_SIZE;
223 (void)sys_bitarray_set_region(&virt_region_bitmap,
224 num_bits, offset);
225
226 virt_region_inited = true;
227 }
228
virt_region_alloc(size_t size)229 static void *virt_region_alloc(size_t size)
230 {
231 uintptr_t dest_addr;
232 size_t offset;
233 size_t num_bits;
234 int ret;
235
236 if (unlikely(!virt_region_inited)) {
237 virt_region_init();
238 }
239
240 num_bits = size / CONFIG_MMU_PAGE_SIZE;
241 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
242 if (ret != 0) {
243 LOG_ERR("insufficient virtual address space (requested %zu)",
244 size);
245 return NULL;
246 }
247
248 /* Remember that bit #0 in bitmap corresponds to the highest
249 * virtual address. So here we need to go downwards (backwards?)
250 * to get the starting address of the allocated region.
251 */
252 dest_addr = virt_from_bitmap_offset(offset, size);
253
254 /* Need to make sure this does not step into kernel memory */
255 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
256 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
257 return NULL;
258 }
259
260 return UINT_TO_POINTER(dest_addr);
261 }
262
virt_region_free(void * vaddr,size_t size)263 static void virt_region_free(void *vaddr, size_t size)
264 {
265 size_t offset, num_bits;
266 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
267
268 if (unlikely(!virt_region_inited)) {
269 virt_region_init();
270 }
271
272 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
273 && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR),
274 "invalid virtual address region %p (%zu)", vaddr_u8, size);
275 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
276 && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR))) {
277 return;
278 }
279
280 offset = virt_to_bitmap_offset(vaddr, size);
281 num_bits = size / CONFIG_MMU_PAGE_SIZE;
282 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
283 }
284
285 /*
286 * Free page frames management
287 *
288 * Call all of these functions with z_mm_lock held.
289 */
290
291 /* Linked list of unused and available page frames.
292 *
293 * TODO: This is very simple and treats all free page frames as being equal.
294 * However, there are use-cases to consolidate free pages such that entire
295 * SRAM banks can be switched off to save power, and so obtaining free pages
296 * may require a more complex ontology which prefers page frames in RAM banks
297 * which are still active.
298 *
299 * This implies in the future there may be multiple slists managing physical
300 * pages. Each page frame will still just have one snode link.
301 */
302 static sys_slist_t free_page_frame_list;
303
304 /* Number of unused and available free page frames */
305 size_t z_free_page_count;
306
307 #define PF_ASSERT(pf, expr, fmt, ...) \
308 __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
309 ##__VA_ARGS__)
310
311 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)312 static struct z_page_frame *free_page_frame_list_get(void)
313 {
314 sys_snode_t *node;
315 struct z_page_frame *pf = NULL;
316
317 node = sys_slist_get(&free_page_frame_list);
318 if (node != NULL) {
319 z_free_page_count--;
320 pf = CONTAINER_OF(node, struct z_page_frame, node);
321 PF_ASSERT(pf, z_page_frame_is_available(pf),
322 "unavailable but somehow on free list");
323 }
324
325 return pf;
326 }
327
328 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)329 static void free_page_frame_list_put(struct z_page_frame *pf)
330 {
331 PF_ASSERT(pf, z_page_frame_is_available(pf),
332 "unavailable page put on free list");
333 sys_slist_append(&free_page_frame_list, &pf->node);
334 z_free_page_count++;
335 }
336
free_page_frame_list_init(void)337 static void free_page_frame_list_init(void)
338 {
339 sys_slist_init(&free_page_frame_list);
340 }
341
page_frame_free_locked(struct z_page_frame * pf)342 static void page_frame_free_locked(struct z_page_frame *pf)
343 {
344 pf->flags = 0;
345 free_page_frame_list_put(pf);
346 }
347
348 /*
349 * Memory Mapping
350 */
351
352 /* Called after the frame is mapped in the arch layer, to update our
353 * local ontology (and do some assertions while we're at it)
354 */
frame_mapped_set(struct z_page_frame * pf,void * addr)355 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
356 {
357 PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
358 "attempted to map a reserved page frame");
359
360 /* We do allow multiple mappings for pinned page frames
361 * since we will never need to reverse map them.
362 * This is uncommon, use-cases are for things like the
363 * Zephyr equivalent of VSDOs
364 */
365 PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
366 "non-pinned and already mapped to %p", pf->addr);
367
368 pf->flags |= Z_PAGE_FRAME_MAPPED;
369 pf->addr = addr;
370 }
371
372 /* Go through page frames to find the physical address mapped
373 * by a virtual address.
374 *
375 * @param[in] virt Virtual Address
376 * @param[out] phys Physical address mapped to the input virtual address
377 * if such mapping exists.
378 *
379 * @retval 0 if mapping is found and valid
380 * @retval -EFAULT if virtual address is not mapped
381 */
virt_to_page_frame(void * virt,uintptr_t * phys)382 static int virt_to_page_frame(void *virt, uintptr_t *phys)
383 {
384 uintptr_t paddr;
385 struct z_page_frame *pf;
386 int ret = -EFAULT;
387
388 Z_PAGE_FRAME_FOREACH(paddr, pf) {
389 if (z_page_frame_is_mapped(pf)) {
390 if (virt == pf->addr) {
391 ret = 0;
392 *phys = z_page_frame_to_phys(pf);
393 break;
394 }
395 }
396 }
397
398 return ret;
399 }
400 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
401
402 #ifdef CONFIG_DEMAND_PAGING
403 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
404 bool page_in, uintptr_t *location_ptr);
405
406 static inline void do_backing_store_page_in(uintptr_t location);
407 static inline void do_backing_store_page_out(uintptr_t location);
408 #endif /* CONFIG_DEMAND_PAGING */
409
410 /* Allocate a free page frame, and map it to a specified virtual address
411 *
412 * TODO: Add optional support for copy-on-write mappings to a zero page instead
413 * of allocating, in which case page frames will be allocated lazily as
414 * the mappings to the zero page get touched. This will avoid expensive
415 * page-ins as memory is mapped and physical RAM or backing store storage will
416 * not be used if the mapped memory is unused. The cost is an empty physical
417 * page of zeroes.
418 */
map_anon_page(void * addr,uint32_t flags)419 static int map_anon_page(void *addr, uint32_t flags)
420 {
421 struct z_page_frame *pf;
422 uintptr_t phys;
423 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
424 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
425
426 pf = free_page_frame_list_get();
427 if (pf == NULL) {
428 #ifdef CONFIG_DEMAND_PAGING
429 uintptr_t location;
430 bool dirty;
431 int ret;
432
433 pf = k_mem_paging_eviction_select(&dirty);
434 __ASSERT(pf != NULL, "failed to get a page frame");
435 LOG_DBG("evicting %p at 0x%lx", pf->addr,
436 z_page_frame_to_phys(pf));
437 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
438 if (ret != 0) {
439 return -ENOMEM;
440 }
441 if (dirty) {
442 do_backing_store_page_out(location);
443 }
444 pf->flags = 0;
445 #else
446 return -ENOMEM;
447 #endif /* CONFIG_DEMAND_PAGING */
448 }
449
450 phys = z_page_frame_to_phys(pf);
451 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
452
453 if (lock) {
454 pf->flags |= Z_PAGE_FRAME_PINNED;
455 }
456 frame_mapped_set(pf, addr);
457
458 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
459
460 if (!uninit) {
461 /* If we later implement mappings to a copy-on-write
462 * zero page, won't need this step
463 */
464 memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
465 }
466
467 return 0;
468 }
469
k_mem_map(size_t size,uint32_t flags)470 void *k_mem_map(size_t size, uint32_t flags)
471 {
472 uint8_t *dst;
473 size_t total_size;
474 int ret;
475 k_spinlock_key_t key;
476 uint8_t *pos;
477
478 __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
479 ((flags & K_MEM_MAP_UNINIT) != 0U)),
480 "user access to anonymous uninitialized pages is forbidden");
481 __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
482 "unaligned size %zu passed to %s", size, __func__);
483 __ASSERT(size != 0, "zero sized memory mapping");
484 __ASSERT(page_frames_initialized, "%s called too early", __func__);
485 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
486 "%s does not support explicit cache settings", __func__);
487
488 key = k_spin_lock(&z_mm_lock);
489
490 /* Need extra for the guard pages (before and after) which we
491 * won't map.
492 */
493 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
494
495 dst = virt_region_alloc(total_size);
496 if (dst == NULL) {
497 /* Address space has no free region */
498 goto out;
499 }
500
501 /* Unmap both guard pages to make sure accessing them
502 * will generate fault.
503 */
504 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
505 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
506 CONFIG_MMU_PAGE_SIZE);
507
508 /* Skip over the "before" guard page in returned address. */
509 dst += CONFIG_MMU_PAGE_SIZE;
510
511 VIRT_FOREACH(dst, size, pos) {
512 ret = map_anon_page(pos, flags);
513
514 if (ret != 0) {
515 /* TODO: call k_mem_unmap(dst, pos - dst) when
516 * implmented in #28990 and release any guard virtual
517 * page as well.
518 */
519 dst = NULL;
520 goto out;
521 }
522 }
523 out:
524 k_spin_unlock(&z_mm_lock, key);
525 return dst;
526 }
527
k_mem_unmap(void * addr,size_t size)528 void k_mem_unmap(void *addr, size_t size)
529 {
530 uintptr_t phys;
531 uint8_t *pos;
532 struct z_page_frame *pf;
533 k_spinlock_key_t key;
534 size_t total_size;
535 int ret;
536
537 /* Need space for the "before" guard page */
538 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
539
540 /* Make sure address range is still valid after accounting
541 * for two guard pages.
542 */
543 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
544 z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
545
546 key = k_spin_lock(&z_mm_lock);
547
548 /* Check if both guard pages are unmapped.
549 * Bail if not, as this is probably a region not mapped
550 * using k_mem_map().
551 */
552 pos = addr;
553 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
554 if (ret == 0) {
555 __ASSERT(ret == 0,
556 "%s: cannot find preceding guard page for (%p, %zu)",
557 __func__, addr, size);
558 goto out;
559 }
560
561 ret = arch_page_phys_get(pos + size, NULL);
562 if (ret == 0) {
563 __ASSERT(ret == 0,
564 "%s: cannot find succeeding guard page for (%p, %zu)",
565 __func__, addr, size);
566 goto out;
567 }
568
569 VIRT_FOREACH(addr, size, pos) {
570 ret = arch_page_phys_get(pos, &phys);
571
572 __ASSERT(ret == 0,
573 "%s: cannot unmap an unmapped address %p",
574 __func__, pos);
575 if (ret != 0) {
576 /* Found an address not mapped. Do not continue. */
577 goto out;
578 }
579
580 __ASSERT(z_is_page_frame(phys),
581 "%s: 0x%lx is not a page frame", __func__, phys);
582 if (!z_is_page_frame(phys)) {
583 /* Physical address has no corresponding page frame
584 * description in the page frame array.
585 * This should not happen. Do not continue.
586 */
587 goto out;
588 }
589
590 /* Grab the corresponding page frame from physical address */
591 pf = z_phys_to_page_frame(phys);
592
593 __ASSERT(z_page_frame_is_mapped(pf),
594 "%s: 0x%lx is not a mapped page frame", __func__, phys);
595 if (!z_page_frame_is_mapped(pf)) {
596 /* Page frame is not marked mapped.
597 * This should not happen. Do not continue.
598 */
599 goto out;
600 }
601
602 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
603
604 /* Put the page frame back into free list */
605 page_frame_free_locked(pf);
606 }
607
608 /* There are guard pages just before and after the mapped
609 * region. So we also need to free them from the bitmap.
610 */
611 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
612 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
613 virt_region_free(pos, total_size);
614
615 out:
616 k_spin_unlock(&z_mm_lock, key);
617 }
618
k_mem_free_get(void)619 size_t k_mem_free_get(void)
620 {
621 size_t ret;
622 k_spinlock_key_t key;
623
624 __ASSERT(page_frames_initialized, "%s called too early", __func__);
625
626 key = k_spin_lock(&z_mm_lock);
627 #ifdef CONFIG_DEMAND_PAGING
628 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
629 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
630 } else {
631 ret = 0;
632 }
633 #else
634 ret = z_free_page_count;
635 #endif
636 k_spin_unlock(&z_mm_lock, key);
637
638 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
639 }
640
641 /* This may be called from arch early boot code before z_cstart() is invoked.
642 * Data will be copied and BSS zeroed, but this must not rely on any
643 * initialization functions being called prior to work correctly.
644 */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)645 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
646 {
647 uintptr_t aligned_phys, addr_offset;
648 size_t aligned_size;
649 k_spinlock_key_t key;
650 uint8_t *dest_addr;
651
652 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
653 phys, size,
654 CONFIG_MMU_PAGE_SIZE);
655 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
656 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
657 "wraparound for physical address 0x%lx (size %zu)",
658 aligned_phys, aligned_size);
659
660 key = k_spin_lock(&z_mm_lock);
661 /* Obtain an appropriately sized chunk of virtual memory */
662 dest_addr = virt_region_alloc(aligned_size);
663 if (!dest_addr) {
664 goto fail;
665 }
666
667 /* If this fails there's something amiss with virt_region_get */
668 __ASSERT((uintptr_t)dest_addr <
669 ((uintptr_t)dest_addr + (size - 1)),
670 "wraparound for virtual address %p (size %zu)",
671 dest_addr, size);
672
673 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
674 aligned_phys, aligned_size, flags, addr_offset);
675
676 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
677 k_spin_unlock(&z_mm_lock, key);
678
679 *virt_ptr = dest_addr + addr_offset;
680 return;
681 fail:
682 /* May re-visit this in the future, but for now running out of
683 * virtual address space or failing the arch_mem_map() call is
684 * an unrecoverable situation.
685 *
686 * Other problems not related to resource exhaustion we leave as
687 * assertions since they are clearly programming mistakes.
688 */
689 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
690 phys, size, flags);
691 k_panic();
692 }
693
z_phys_unmap(uint8_t * virt,size_t size)694 void z_phys_unmap(uint8_t *virt, size_t size)
695 {
696 uintptr_t aligned_virt, addr_offset;
697 size_t aligned_size;
698 k_spinlock_key_t key;
699
700 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
701 POINTER_TO_UINT(virt), size,
702 CONFIG_MMU_PAGE_SIZE);
703 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
704 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
705 "wraparound for virtual address 0x%lx (size %zu)",
706 aligned_virt, aligned_size);
707
708 key = k_spin_lock(&z_mm_lock);
709 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
710 virt_region_free(virt, size);
711 k_spin_unlock(&z_mm_lock, key);
712 }
713
714 /*
715 * Miscellaneous
716 */
717
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)718 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
719 uintptr_t addr, size_t size, size_t align)
720 {
721 size_t addr_offset;
722
723 /* The actual mapped region must be page-aligned. Round down the
724 * physical address and pad the region size appropriately
725 */
726 *aligned_addr = ROUND_DOWN(addr, align);
727 addr_offset = addr - *aligned_addr;
728 *aligned_size = ROUND_UP(size + addr_offset, align);
729
730 return addr_offset;
731 }
732
733 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)734 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
735 bool pin)
736 {
737 struct z_page_frame *pf;
738 uint8_t *addr;
739
740 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
741 CONFIG_MMU_PAGE_SIZE);
742 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
743 CONFIG_MMU_PAGE_SIZE);
744 size_t pinned_size = pinned_end - pinned_start;
745
746 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
747 {
748 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
749 frame_mapped_set(pf, addr);
750
751 if (pin) {
752 pf->flags |= Z_PAGE_FRAME_PINNED;
753 } else {
754 pf->flags &= ~Z_PAGE_FRAME_PINNED;
755 }
756 }
757 }
758 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
759
z_mem_manage_init(void)760 void z_mem_manage_init(void)
761 {
762 uintptr_t phys;
763 uint8_t *addr;
764 struct z_page_frame *pf;
765 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
766
767 free_page_frame_list_init();
768
769 ARG_UNUSED(addr);
770
771 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
772 /* If some page frames are unavailable for use as memory, arch
773 * code will mark Z_PAGE_FRAME_RESERVED in their flags
774 */
775 arch_reserved_pages_update();
776 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
777
778 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
779 /* All pages composing the Zephyr image are mapped at boot in a
780 * predictable way. This can change at runtime.
781 */
782 VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
783 {
784 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
785 frame_mapped_set(pf, addr);
786
787 /* TODO: for now we pin the whole Zephyr image. Demand paging
788 * currently tested with anonymously-mapped pages which are not
789 * pinned.
790 *
791 * We will need to setup linker regions for a subset of kernel
792 * code/data pages which are pinned in memory and
793 * may not be evicted. This will contain critical CPU data
794 * structures, and any code used to perform page fault
795 * handling, page-ins, etc.
796 */
797 pf->flags |= Z_PAGE_FRAME_PINNED;
798 }
799 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
800
801 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
802 /* Pin the boot section to prevent it from being swapped out during
803 * boot process. Will be un-pinned once boot process completes.
804 */
805 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
806 #endif
807
808 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
809 /* Pin the page frames correspondng to the pinned symbols */
810 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
811 #endif
812
813 /* Any remaining pages that aren't mapped, reserved, or pinned get
814 * added to the free pages list
815 */
816 Z_PAGE_FRAME_FOREACH(phys, pf) {
817 if (z_page_frame_is_available(pf)) {
818 free_page_frame_list_put(pf);
819 }
820 }
821 LOG_DBG("free page frames: %zu", z_free_page_count);
822
823 #ifdef CONFIG_DEMAND_PAGING
824 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
825 z_paging_histogram_init();
826 #endif
827 k_mem_paging_backing_store_init();
828 k_mem_paging_eviction_init();
829 #endif
830 #if __ASSERT_ON
831 page_frames_initialized = true;
832 #endif
833 k_spin_unlock(&z_mm_lock, key);
834
835 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
836 /* If BSS section is not present in memory at boot,
837 * it would not have been cleared. This needs to be
838 * done now since paging mechanism has been initialized
839 * and the BSS pages can be brought into physical
840 * memory to be cleared.
841 */
842 z_bss_zero();
843 #endif
844 }
845
z_mem_manage_boot_finish(void)846 void z_mem_manage_boot_finish(void)
847 {
848 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
849 /* At the end of boot process, unpin the boot sections
850 * as they don't need to be in memory all the time anymore.
851 */
852 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
853 #endif
854 }
855
856 #ifdef CONFIG_DEMAND_PAGING
857
858 #ifdef CONFIG_DEMAND_PAGING_STATS
859 struct k_mem_paging_stats_t paging_stats;
860 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
861 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
862 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
863 #endif
864
do_backing_store_page_in(uintptr_t location)865 static inline void do_backing_store_page_in(uintptr_t location)
866 {
867 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
868 uint32_t time_diff;
869
870 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
871 timing_t time_start, time_end;
872
873 time_start = timing_counter_get();
874 #else
875 uint32_t time_start;
876
877 time_start = k_cycle_get_32();
878 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
879 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
880
881 k_mem_paging_backing_store_page_in(location);
882
883 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
884 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
885 time_end = timing_counter_get();
886 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
887 #else
888 time_diff = k_cycle_get_32() - time_start;
889 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
890
891 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
892 time_diff);
893 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
894 }
895
do_backing_store_page_out(uintptr_t location)896 static inline void do_backing_store_page_out(uintptr_t location)
897 {
898 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
899 uint32_t time_diff;
900
901 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
902 timing_t time_start, time_end;
903
904 time_start = timing_counter_get();
905 #else
906 uint32_t time_start;
907
908 time_start = k_cycle_get_32();
909 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
910 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
911
912 k_mem_paging_backing_store_page_out(location);
913
914 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
915 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
916 time_end = timing_counter_get();
917 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
918 #else
919 time_diff = k_cycle_get_32() - time_start;
920 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
921
922 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
923 time_diff);
924 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
925 }
926
927 /* Current implementation relies on interrupt locking to any prevent page table
928 * access, which falls over if other CPUs are active. Addressing this is not
929 * as simple as using spinlocks as regular memory reads/writes constitute
930 * "access" in this sense.
931 *
932 * Current needs for demand paging are on uniprocessor systems.
933 */
934 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
935
virt_region_foreach(void * addr,size_t size,void (* func)(void *))936 static void virt_region_foreach(void *addr, size_t size,
937 void (*func)(void *))
938 {
939 z_mem_assert_virtual_region(addr, size);
940
941 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
942 func((uint8_t *)addr + offset);
943 }
944 }
945
946 /*
947 * Perform some preparatory steps before paging out. The provided page frame
948 * must be evicted to the backing store immediately after this is called
949 * with a call to k_mem_paging_backing_store_page_out() if it contains
950 * a data page.
951 *
952 * - Map page frame to scratch area if requested. This always is true if we're
953 * doing a page fault, but is only set on manual evictions if the page is
954 * dirty.
955 * - If mapped:
956 * - obtain backing store location and populate location parameter
957 * - Update page tables with location
958 * - Mark page frame as busy
959 *
960 * Returns -ENOMEM if the backing store is full
961 */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)962 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
963 bool page_fault, uintptr_t *location_ptr)
964 {
965 uintptr_t phys;
966 int ret;
967 bool dirty = *dirty_ptr;
968
969 phys = z_page_frame_to_phys(pf);
970 __ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
971 phys);
972
973 /* If the backing store doesn't have a copy of the page, even if it
974 * wasn't modified, treat as dirty. This can happen for a few
975 * reasons:
976 * 1) Page has never been swapped out before, and the backing store
977 * wasn't pre-populated with this data page.
978 * 2) Page was swapped out before, but the page contents were not
979 * preserved after swapping back in.
980 * 3) Page contents were preserved when swapped back in, but were later
981 * evicted from the backing store to make room for other evicted
982 * pages.
983 */
984 if (z_page_frame_is_mapped(pf)) {
985 dirty = dirty || !z_page_frame_is_backed(pf);
986 }
987
988 if (dirty || page_fault) {
989 arch_mem_scratch(phys);
990 }
991
992 if (z_page_frame_is_mapped(pf)) {
993 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
994 page_fault);
995 if (ret != 0) {
996 LOG_ERR("out of backing store memory");
997 return -ENOMEM;
998 }
999 arch_mem_page_out(pf->addr, *location_ptr);
1000 } else {
1001 /* Shouldn't happen unless this function is mis-used */
1002 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1003 }
1004 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1005 /* Mark as busy so that z_page_frame_is_evictable() returns false */
1006 __ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1007 phys);
1008 pf->flags |= Z_PAGE_FRAME_BUSY;
1009 #endif
1010 /* Update dirty parameter, since we set to true if it wasn't backed
1011 * even if otherwise clean
1012 */
1013 *dirty_ptr = dirty;
1014
1015 return 0;
1016 }
1017
do_mem_evict(void * addr)1018 static int do_mem_evict(void *addr)
1019 {
1020 bool dirty;
1021 struct z_page_frame *pf;
1022 uintptr_t location;
1023 int key, ret;
1024 uintptr_t flags, phys;
1025
1026 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1027 __ASSERT(!k_is_in_isr(),
1028 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1029 __func__);
1030 k_sched_lock();
1031 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1032 key = irq_lock();
1033 flags = arch_page_info_get(addr, &phys, false);
1034 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1035 "address %p isn't mapped", addr);
1036 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1037 /* Un-mapped or already evicted. Nothing to do */
1038 ret = 0;
1039 goto out;
1040 }
1041
1042 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1043 pf = z_phys_to_page_frame(phys);
1044 __ASSERT(pf->addr == addr, "page frame address mismatch");
1045 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1046 if (ret != 0) {
1047 goto out;
1048 }
1049
1050 __ASSERT(ret == 0, "failed to prepare page frame");
1051 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1052 irq_unlock(key);
1053 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1054 if (dirty) {
1055 do_backing_store_page_out(location);
1056 }
1057 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1058 key = irq_lock();
1059 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1060 page_frame_free_locked(pf);
1061 out:
1062 irq_unlock(key);
1063 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1064 k_sched_unlock();
1065 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1066 return ret;
1067 }
1068
k_mem_page_out(void * addr,size_t size)1069 int k_mem_page_out(void *addr, size_t size)
1070 {
1071 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1072 addr);
1073 z_mem_assert_virtual_region(addr, size);
1074
1075 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1076 void *pos = (uint8_t *)addr + offset;
1077 int ret;
1078
1079 ret = do_mem_evict(pos);
1080 if (ret != 0) {
1081 return ret;
1082 }
1083 }
1084
1085 return 0;
1086 }
1087
z_page_frame_evict(uintptr_t phys)1088 int z_page_frame_evict(uintptr_t phys)
1089 {
1090 int key, ret;
1091 struct z_page_frame *pf;
1092 bool dirty;
1093 uintptr_t flags;
1094 uintptr_t location;
1095
1096 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1097 __func__, phys);
1098
1099 /* Implementation is similar to do_page_fault() except there is no
1100 * data page to page-in, see comments in that function.
1101 */
1102
1103 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1104 __ASSERT(!k_is_in_isr(),
1105 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1106 __func__);
1107 k_sched_lock();
1108 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1109 key = irq_lock();
1110 pf = z_phys_to_page_frame(phys);
1111 if (!z_page_frame_is_mapped(pf)) {
1112 /* Nothing to do, free page */
1113 ret = 0;
1114 goto out;
1115 }
1116 flags = arch_page_info_get(pf->addr, NULL, false);
1117 /* Shouldn't ever happen */
1118 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1119 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1120 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1121 if (ret != 0) {
1122 goto out;
1123 }
1124
1125 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1126 irq_unlock(key);
1127 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1128 if (dirty) {
1129 do_backing_store_page_out(location);
1130 }
1131 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1132 key = irq_lock();
1133 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1134 page_frame_free_locked(pf);
1135 out:
1136 irq_unlock(key);
1137 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1138 k_sched_unlock();
1139 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1140 return ret;
1141 }
1142
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1143 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1144 int key)
1145 {
1146 #ifdef CONFIG_DEMAND_PAGING_STATS
1147 bool is_irq_unlocked = arch_irq_unlocked(key);
1148
1149 paging_stats.pagefaults.cnt++;
1150
1151 if (is_irq_unlocked) {
1152 paging_stats.pagefaults.irq_unlocked++;
1153 } else {
1154 paging_stats.pagefaults.irq_locked++;
1155 }
1156
1157 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1158 faulting_thread->paging_stats.pagefaults.cnt++;
1159
1160 if (is_irq_unlocked) {
1161 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1162 } else {
1163 faulting_thread->paging_stats.pagefaults.irq_locked++;
1164 }
1165 #else
1166 ARG_UNUSED(faulting_thread);
1167 #endif
1168
1169 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1170 if (k_is_in_isr()) {
1171 paging_stats.pagefaults.in_isr++;
1172
1173 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1174 faulting_thread->paging_stats.pagefaults.in_isr++;
1175 #endif
1176 }
1177 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1178 #endif /* CONFIG_DEMAND_PAGING_STATS */
1179 }
1180
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1181 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1182 bool dirty)
1183 {
1184 #ifdef CONFIG_DEMAND_PAGING_STATS
1185 if (dirty) {
1186 paging_stats.eviction.dirty++;
1187 } else {
1188 paging_stats.eviction.clean++;
1189 }
1190 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1191 if (dirty) {
1192 faulting_thread->paging_stats.eviction.dirty++;
1193 } else {
1194 faulting_thread->paging_stats.eviction.clean++;
1195 }
1196 #else
1197 ARG_UNUSED(faulting_thread);
1198 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1199 #endif /* CONFIG_DEMAND_PAGING_STATS */
1200 }
1201
do_eviction_select(bool * dirty)1202 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1203 {
1204 struct z_page_frame *pf;
1205
1206 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1207 uint32_t time_diff;
1208
1209 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1210 timing_t time_start, time_end;
1211
1212 time_start = timing_counter_get();
1213 #else
1214 uint32_t time_start;
1215
1216 time_start = k_cycle_get_32();
1217 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1218 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1219
1220 pf = k_mem_paging_eviction_select(dirty);
1221
1222 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1223 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1224 time_end = timing_counter_get();
1225 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1226 #else
1227 time_diff = k_cycle_get_32() - time_start;
1228 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1229
1230 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1231 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1232
1233 return pf;
1234 }
1235
do_page_fault(void * addr,bool pin)1236 static bool do_page_fault(void *addr, bool pin)
1237 {
1238 struct z_page_frame *pf;
1239 int key, ret;
1240 uintptr_t page_in_location, page_out_location;
1241 enum arch_page_location status;
1242 bool result;
1243 bool dirty = false;
1244 struct k_thread *faulting_thread = _current_cpu->current;
1245
1246 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1247 addr);
1248
1249 LOG_DBG("page fault at %p", addr);
1250
1251 /*
1252 * TODO: Add performance accounting:
1253 * - k_mem_paging_eviction_select() metrics
1254 * * periodic timer execution time histogram (if implemented)
1255 */
1256
1257 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1258 /* We lock the scheduler so that other threads are never scheduled
1259 * during the page-in/out operation.
1260 *
1261 * We do however re-enable interrupts during the page-in/page-out
1262 * operation iff interrupts were enabled when the exception was taken;
1263 * in this configuration page faults in an ISR are a bug; all their
1264 * code/data must be pinned.
1265 *
1266 * If interrupts were disabled when the exception was taken, the
1267 * arch code is responsible for keeping them that way when entering
1268 * this function.
1269 *
1270 * If this is not enabled, then interrupts are always locked for the
1271 * entire operation. This is far worse for system interrupt latency
1272 * but requires less pinned pages and ISRs may also take page faults.
1273 *
1274 * Support for allowing k_mem_paging_backing_store_page_out() and
1275 * k_mem_paging_backing_store_page_in() to also sleep and allow
1276 * other threads to run (such as in the case where the transfer is
1277 * async DMA) is not implemented. Even if limited to thread context,
1278 * arbitrary memory access triggering exceptions that put a thread to
1279 * sleep on a contended page fault operation will break scheduling
1280 * assumptions of cooperative threads or threads that implement
1281 * crticial sections with spinlocks or disabling IRQs.
1282 */
1283 k_sched_lock();
1284 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1285 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1286
1287 key = irq_lock();
1288 status = arch_page_location_get(addr, &page_in_location);
1289 if (status == ARCH_PAGE_LOCATION_BAD) {
1290 /* Return false to treat as a fatal error */
1291 result = false;
1292 goto out;
1293 }
1294 result = true;
1295
1296 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1297 if (pin) {
1298 /* It's a physical memory address */
1299 uintptr_t phys = page_in_location;
1300
1301 pf = z_phys_to_page_frame(phys);
1302 pf->flags |= Z_PAGE_FRAME_PINNED;
1303 }
1304
1305 /* This if-block is to pin the page if it is
1306 * already present in physical memory. There is
1307 * no need to go through the following code to
1308 * pull in the data pages. So skip to the end.
1309 */
1310 goto out;
1311 }
1312 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1313 "unexpected status value %d", status);
1314
1315 paging_stats_faults_inc(faulting_thread, key);
1316
1317 pf = free_page_frame_list_get();
1318 if (pf == NULL) {
1319 /* Need to evict a page frame */
1320 pf = do_eviction_select(&dirty);
1321 __ASSERT(pf != NULL, "failed to get a page frame");
1322 LOG_DBG("evicting %p at 0x%lx", pf->addr,
1323 z_page_frame_to_phys(pf));
1324
1325 paging_stats_eviction_inc(faulting_thread, dirty);
1326 }
1327 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1328 __ASSERT(ret == 0, "failed to prepare page frame");
1329
1330 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1331 irq_unlock(key);
1332 /* Interrupts are now unlocked if they were not locked when we entered
1333 * this function, and we may service ISRs. The scheduler is still
1334 * locked.
1335 */
1336 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1337 if (dirty) {
1338 do_backing_store_page_out(page_out_location);
1339 }
1340 do_backing_store_page_in(page_in_location);
1341
1342 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1343 key = irq_lock();
1344 pf->flags &= ~Z_PAGE_FRAME_BUSY;
1345 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1346 if (pin) {
1347 pf->flags |= Z_PAGE_FRAME_PINNED;
1348 }
1349 pf->flags |= Z_PAGE_FRAME_MAPPED;
1350 pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1351 & ~(CONFIG_MMU_PAGE_SIZE - 1));
1352
1353 arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1354 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1355 out:
1356 irq_unlock(key);
1357 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1358 k_sched_unlock();
1359 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1360
1361 return result;
1362 }
1363
do_page_in(void * addr)1364 static void do_page_in(void *addr)
1365 {
1366 bool ret;
1367
1368 ret = do_page_fault(addr, false);
1369 __ASSERT(ret, "unmapped memory address %p", addr);
1370 (void)ret;
1371 }
1372
k_mem_page_in(void * addr,size_t size)1373 void k_mem_page_in(void *addr, size_t size)
1374 {
1375 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1376 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1377 __func__);
1378 virt_region_foreach(addr, size, do_page_in);
1379 }
1380
do_mem_pin(void * addr)1381 static void do_mem_pin(void *addr)
1382 {
1383 bool ret;
1384
1385 ret = do_page_fault(addr, true);
1386 __ASSERT(ret, "unmapped memory address %p", addr);
1387 (void)ret;
1388 }
1389
k_mem_pin(void * addr,size_t size)1390 void k_mem_pin(void *addr, size_t size)
1391 {
1392 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1393 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1394 __func__);
1395 virt_region_foreach(addr, size, do_mem_pin);
1396 }
1397
z_page_fault(void * addr)1398 bool z_page_fault(void *addr)
1399 {
1400 return do_page_fault(addr, false);
1401 }
1402
do_mem_unpin(void * addr)1403 static void do_mem_unpin(void *addr)
1404 {
1405 struct z_page_frame *pf;
1406 int key;
1407 uintptr_t flags, phys;
1408
1409 key = irq_lock();
1410 flags = arch_page_info_get(addr, &phys, false);
1411 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1412 "invalid data page at %p", addr);
1413 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1414 pf = z_phys_to_page_frame(phys);
1415 pf->flags &= ~Z_PAGE_FRAME_PINNED;
1416 }
1417 irq_unlock(key);
1418 }
1419
k_mem_unpin(void * addr,size_t size)1420 void k_mem_unpin(void *addr, size_t size)
1421 {
1422 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1423 addr);
1424 virt_region_foreach(addr, size, do_mem_unpin);
1425 }
1426
1427 #endif /* CONFIG_DEMAND_PAGING */
1428