1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/internal/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/timing/timing.h>
20 #include <zephyr/logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22
23 #ifdef CONFIG_DEMAND_PAGING
24 #include <zephyr/kernel/mm/demand_paging.h>
25 #endif
26
27 /*
28 * General terminology:
29 * - A page frame is a page-sized physical memory region in RAM. It is a
30 * container where a data page may be placed. It is always referred to by
31 * physical address. We have a convention of using uintptr_t for physical
32 * addresses. We instantiate a struct z_page_frame to store metadata for
33 * every page frame.
34 *
35 * - A data page is a page-sized region of data. It may exist in a page frame,
36 * or be paged out to some backing store. Its location can always be looked
37 * up in the CPU's page tables (or equivalent) by virtual address.
38 * The data type will always be void * or in some cases uint8_t * when we
39 * want to do pointer arithmetic.
40 */
41
42 /* Spinlock to protect any globals in this file and serialize page table
43 * updates in arch code
44 */
45 struct k_spinlock z_mm_lock;
46
47 /*
48 * General page frame management
49 */
50
51 /* Database of all RAM page frames */
52 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
53
54 #if __ASSERT_ON
55 /* Indicator that z_page_frames has been initialized, many of these APIs do
56 * not work before POST_KERNEL
57 */
58 static bool page_frames_initialized;
59 #endif
60
61 /* Add colors to page table dumps to indicate mapping type */
62 #define COLOR_PAGE_FRAMES 1
63
64 #if COLOR_PAGE_FRAMES
65 #define ANSI_DEFAULT "\x1B" "[0m"
66 #define ANSI_RED "\x1B" "[1;31m"
67 #define ANSI_GREEN "\x1B" "[1;32m"
68 #define ANSI_YELLOW "\x1B" "[1;33m"
69 #define ANSI_BLUE "\x1B" "[1;34m"
70 #define ANSI_MAGENTA "\x1B" "[1;35m"
71 #define ANSI_CYAN "\x1B" "[1;36m"
72 #define ANSI_GREY "\x1B" "[1;90m"
73
74 #define COLOR(x) printk(_CONCAT(ANSI_, x))
75 #else
76 #define COLOR(x) do { } while (false)
77 #endif
78
79 /* LCOV_EXCL_START */
page_frame_dump(struct z_page_frame * pf)80 static void page_frame_dump(struct z_page_frame *pf)
81 {
82 if (z_page_frame_is_reserved(pf)) {
83 COLOR(CYAN);
84 printk("R");
85 } else if (z_page_frame_is_busy(pf)) {
86 COLOR(MAGENTA);
87 printk("B");
88 } else if (z_page_frame_is_pinned(pf)) {
89 COLOR(YELLOW);
90 printk("P");
91 } else if (z_page_frame_is_available(pf)) {
92 COLOR(GREY);
93 printk(".");
94 } else if (z_page_frame_is_mapped(pf)) {
95 COLOR(DEFAULT);
96 printk("M");
97 } else {
98 COLOR(RED);
99 printk("?");
100 }
101 }
102
z_page_frames_dump(void)103 void z_page_frames_dump(void)
104 {
105 int column = 0;
106
107 __ASSERT(page_frames_initialized, "%s called too early", __func__);
108 printk("Physical memory from 0x%lx to 0x%lx\n",
109 Z_PHYS_RAM_START, Z_PHYS_RAM_END);
110
111 for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
112 struct z_page_frame *pf = &z_page_frames[i];
113
114 page_frame_dump(pf);
115
116 column++;
117 if (column == 64) {
118 column = 0;
119 printk("\n");
120 }
121 }
122
123 COLOR(DEFAULT);
124 if (column != 0) {
125 printk("\n");
126 }
127 }
128 /* LCOV_EXCL_STOP */
129
130 #define VIRT_FOREACH(_base, _size, _pos) \
131 for (_pos = _base; \
132 _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
133
134 #define PHYS_FOREACH(_base, _size, _pos) \
135 for (_pos = _base; \
136 _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
137
138
139 /*
140 * Virtual address space management
141 *
142 * Call all of these functions with z_mm_lock held.
143 *
144 * Overall virtual memory map: When the kernel starts, it resides in
145 * virtual memory in the region Z_KERNEL_VIRT_START to
146 * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
147 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
148 *
149 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
150 * but have a mapping for all RAM in place. This is for special architectural
151 * purposes and does not otherwise affect page frame accounting or flags;
152 * the only guarantee is that such RAM mapping outside of the Zephyr image
153 * won't be disturbed by subsequent memory mapping calls.
154 *
155 * +--------------+ <- Z_VIRT_RAM_START
156 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
157 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
158 * | Mapping for |
159 * | main kernel |
160 * | image |
161 * | |
162 * | |
163 * +--------------+ <- Z_FREE_VM_START
164 * | |
165 * | Unused, |
166 * | Available VM |
167 * | |
168 * |..............| <- mapping_pos (grows downward as more mappings are made)
169 * | Mapping |
170 * +--------------+
171 * | Mapping |
172 * +--------------+
173 * | ... |
174 * +--------------+
175 * | Mapping |
176 * +--------------+ <- mappings start here
177 * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED
178 * +--------------+ <- Z_VIRT_RAM_END
179 */
180
181 /* Bitmap of virtual addresses where one bit corresponds to one page.
182 * This is being used for virt_region_alloc() to figure out which
183 * region of virtual addresses can be used for memory mapping.
184 *
185 * Note that bit #0 is the highest address so that allocation is
186 * done in reverse from highest address.
187 */
188 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
189 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
190
191 static bool virt_region_inited;
192
193 #define Z_VIRT_REGION_START_ADDR Z_FREE_VM_START
194 #define Z_VIRT_REGION_END_ADDR (Z_VIRT_RAM_END - Z_VM_RESERVED)
195
virt_from_bitmap_offset(size_t offset,size_t size)196 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
197 {
198 return POINTER_TO_UINT(Z_VIRT_RAM_END)
199 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
200 }
201
virt_to_bitmap_offset(void * vaddr,size_t size)202 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
203 {
204 return (POINTER_TO_UINT(Z_VIRT_RAM_END)
205 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
206 }
207
virt_region_init(void)208 static void virt_region_init(void)
209 {
210 size_t offset, num_bits;
211
212 /* There are regions where we should never map via
213 * k_mem_map() and z_phys_map(). Mark them as
214 * already allocated so they will never be used.
215 */
216
217 if (Z_VM_RESERVED > 0) {
218 /* Mark reserved region at end of virtual address space */
219 num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
220 (void)sys_bitarray_set_region(&virt_region_bitmap,
221 num_bits, 0);
222 }
223
224 /* Mark all bits up to Z_FREE_VM_START as allocated */
225 num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
226 - POINTER_TO_UINT(Z_VIRT_RAM_START);
227 offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
228 num_bits /= CONFIG_MMU_PAGE_SIZE;
229 (void)sys_bitarray_set_region(&virt_region_bitmap,
230 num_bits, offset);
231
232 virt_region_inited = true;
233 }
234
virt_region_free(void * vaddr,size_t size)235 static void virt_region_free(void *vaddr, size_t size)
236 {
237 size_t offset, num_bits;
238 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
239
240 if (unlikely(!virt_region_inited)) {
241 virt_region_init();
242 }
243
244 #ifndef CONFIG_KERNEL_DIRECT_MAP
245 /* Without the need to support K_MEM_DIRECT_MAP, the region must be
246 * able to be represented in the bitmap. So this case is
247 * simple.
248 */
249
250 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
251 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
252 "invalid virtual address region %p (%zu)", vaddr_u8, size);
253 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
254 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
255 return;
256 }
257
258 offset = virt_to_bitmap_offset(vaddr, size);
259 num_bits = size / CONFIG_MMU_PAGE_SIZE;
260 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
261 #else /* !CONFIG_KERNEL_DIRECT_MAP */
262 /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
263 * memory space, wholly within it, or overlap partially.
264 * So additional processing is needed to make sure we only
265 * mark the pages within the bitmap.
266 */
267 if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
268 (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
269 (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
270 ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
271 uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
272 uint8_t *adjusted_end = MIN(vaddr_u8 + size,
273 Z_VIRT_REGION_END_ADDR);
274 size_t adjusted_sz = adjusted_end - adjusted_start;
275
276 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
277 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
278 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
279 }
280 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
281 }
282
virt_region_alloc(size_t size,size_t align)283 static void *virt_region_alloc(size_t size, size_t align)
284 {
285 uintptr_t dest_addr;
286 size_t alloc_size;
287 size_t offset;
288 size_t num_bits;
289 int ret;
290
291 if (unlikely(!virt_region_inited)) {
292 virt_region_init();
293 }
294
295 /* Possibly request more pages to ensure we can get an aligned virtual address */
296 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
297 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
298 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
299 if (ret != 0) {
300 LOG_ERR("insufficient virtual address space (requested %zu)",
301 size);
302 return NULL;
303 }
304
305 /* Remember that bit #0 in bitmap corresponds to the highest
306 * virtual address. So here we need to go downwards (backwards?)
307 * to get the starting address of the allocated region.
308 */
309 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
310
311 if (alloc_size > size) {
312 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
313
314 /* Here is the memory organization when trying to get an aligned
315 * virtual address:
316 *
317 * +--------------+ <- Z_VIRT_RAM_START
318 * | Undefined VM |
319 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
320 * | Mapping for |
321 * | main kernel |
322 * | image |
323 * | |
324 * | |
325 * +--------------+ <- Z_FREE_VM_START
326 * | ... |
327 * +==============+ <- dest_addr
328 * | Unused |
329 * |..............| <- aligned_dest_addr
330 * | |
331 * | Aligned |
332 * | Mapping |
333 * | |
334 * |..............| <- aligned_dest_addr + size
335 * | Unused |
336 * +==============+ <- offset from Z_VIRT_RAM_END == dest_addr + alloc_size
337 * | ... |
338 * +--------------+
339 * | Mapping |
340 * +--------------+
341 * | Reserved |
342 * +--------------+ <- Z_VIRT_RAM_END
343 */
344
345 /* Free the two unused regions */
346 virt_region_free(UINT_TO_POINTER(dest_addr),
347 aligned_dest_addr - dest_addr);
348 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
349 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
350 (dest_addr + alloc_size) - (aligned_dest_addr + size));
351 }
352
353 dest_addr = aligned_dest_addr;
354 }
355
356 /* Need to make sure this does not step into kernel memory */
357 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
358 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
359 return NULL;
360 }
361
362 return UINT_TO_POINTER(dest_addr);
363 }
364
365 /*
366 * Free page frames management
367 *
368 * Call all of these functions with z_mm_lock held.
369 */
370
371 /* Linked list of unused and available page frames.
372 *
373 * TODO: This is very simple and treats all free page frames as being equal.
374 * However, there are use-cases to consolidate free pages such that entire
375 * SRAM banks can be switched off to save power, and so obtaining free pages
376 * may require a more complex ontology which prefers page frames in RAM banks
377 * which are still active.
378 *
379 * This implies in the future there may be multiple slists managing physical
380 * pages. Each page frame will still just have one snode link.
381 */
382 static sys_slist_t free_page_frame_list;
383
384 /* Number of unused and available free page frames.
385 * This information may go stale immediately.
386 */
387 static size_t z_free_page_count;
388
389 #define PF_ASSERT(pf, expr, fmt, ...) \
390 __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
391 ##__VA_ARGS__)
392
393 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)394 static struct z_page_frame *free_page_frame_list_get(void)
395 {
396 sys_snode_t *node;
397 struct z_page_frame *pf = NULL;
398
399 node = sys_slist_get(&free_page_frame_list);
400 if (node != NULL) {
401 z_free_page_count--;
402 pf = CONTAINER_OF(node, struct z_page_frame, node);
403 PF_ASSERT(pf, z_page_frame_is_available(pf),
404 "unavailable but somehow on free list");
405 }
406
407 return pf;
408 }
409
410 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)411 static void free_page_frame_list_put(struct z_page_frame *pf)
412 {
413 PF_ASSERT(pf, z_page_frame_is_available(pf),
414 "unavailable page put on free list");
415 /* The structure is packed, which ensures that this is true */
416 void *node = pf;
417
418 sys_slist_append(&free_page_frame_list, node);
419 z_free_page_count++;
420 }
421
free_page_frame_list_init(void)422 static void free_page_frame_list_init(void)
423 {
424 sys_slist_init(&free_page_frame_list);
425 }
426
page_frame_free_locked(struct z_page_frame * pf)427 static void page_frame_free_locked(struct z_page_frame *pf)
428 {
429 pf->flags = 0;
430 free_page_frame_list_put(pf);
431 }
432
433 /*
434 * Memory Mapping
435 */
436
437 /* Called after the frame is mapped in the arch layer, to update our
438 * local ontology (and do some assertions while we're at it)
439 */
frame_mapped_set(struct z_page_frame * pf,void * addr)440 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
441 {
442 PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
443 "attempted to map a reserved page frame");
444
445 /* We do allow multiple mappings for pinned page frames
446 * since we will never need to reverse map them.
447 * This is uncommon, use-cases are for things like the
448 * Zephyr equivalent of VSDOs
449 */
450 PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
451 "non-pinned and already mapped to %p", pf->addr);
452
453 pf->flags |= Z_PAGE_FRAME_MAPPED;
454 pf->addr = addr;
455 }
456
457 /* LCOV_EXCL_START */
458 /* Go through page frames to find the physical address mapped
459 * by a virtual address.
460 *
461 * @param[in] virt Virtual Address
462 * @param[out] phys Physical address mapped to the input virtual address
463 * if such mapping exists.
464 *
465 * @retval 0 if mapping is found and valid
466 * @retval -EFAULT if virtual address is not mapped
467 */
virt_to_page_frame(void * virt,uintptr_t * phys)468 static int virt_to_page_frame(void *virt, uintptr_t *phys)
469 {
470 uintptr_t paddr;
471 struct z_page_frame *pf;
472 int ret = -EFAULT;
473
474 Z_PAGE_FRAME_FOREACH(paddr, pf) {
475 if (z_page_frame_is_mapped(pf)) {
476 if (virt == pf->addr) {
477 ret = 0;
478 if (phys != NULL) {
479 *phys = z_page_frame_to_phys(pf);
480 }
481 break;
482 }
483 }
484 }
485
486 return ret;
487 }
488 /* LCOV_EXCL_STOP */
489
490 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
491
492 #ifdef CONFIG_DEMAND_PAGING
493 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
494 bool page_in, uintptr_t *location_ptr);
495
496 static inline void do_backing_store_page_in(uintptr_t location);
497 static inline void do_backing_store_page_out(uintptr_t location);
498 #endif /* CONFIG_DEMAND_PAGING */
499
500 /* Allocate a free page frame, and map it to a specified virtual address
501 *
502 * TODO: Add optional support for copy-on-write mappings to a zero page instead
503 * of allocating, in which case page frames will be allocated lazily as
504 * the mappings to the zero page get touched. This will avoid expensive
505 * page-ins as memory is mapped and physical RAM or backing store storage will
506 * not be used if the mapped memory is unused. The cost is an empty physical
507 * page of zeroes.
508 */
map_anon_page(void * addr,uint32_t flags)509 static int map_anon_page(void *addr, uint32_t flags)
510 {
511 struct z_page_frame *pf;
512 uintptr_t phys;
513 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
514 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
515
516 pf = free_page_frame_list_get();
517 if (pf == NULL) {
518 #ifdef CONFIG_DEMAND_PAGING
519 uintptr_t location;
520 bool dirty;
521 int ret;
522
523 pf = k_mem_paging_eviction_select(&dirty);
524 __ASSERT(pf != NULL, "failed to get a page frame");
525 LOG_DBG("evicting %p at 0x%lx", pf->addr,
526 z_page_frame_to_phys(pf));
527 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
528 if (ret != 0) {
529 return -ENOMEM;
530 }
531 if (dirty) {
532 do_backing_store_page_out(location);
533 }
534 pf->flags = 0;
535 #else
536 return -ENOMEM;
537 #endif /* CONFIG_DEMAND_PAGING */
538 }
539
540 phys = z_page_frame_to_phys(pf);
541 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
542
543 if (lock) {
544 pf->flags |= Z_PAGE_FRAME_PINNED;
545 }
546 frame_mapped_set(pf, addr);
547
548 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
549
550 if (!uninit) {
551 /* If we later implement mappings to a copy-on-write
552 * zero page, won't need this step
553 */
554 memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
555 }
556
557 return 0;
558 }
559
k_mem_map(size_t size,uint32_t flags)560 void *k_mem_map(size_t size, uint32_t flags)
561 {
562 uint8_t *dst;
563 size_t total_size;
564 int ret;
565 k_spinlock_key_t key;
566 uint8_t *pos;
567
568 __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
569 ((flags & K_MEM_MAP_UNINIT) != 0U)),
570 "user access to anonymous uninitialized pages is forbidden");
571 __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
572 "unaligned size %zu passed to %s", size, __func__);
573 __ASSERT(size != 0, "zero sized memory mapping");
574 __ASSERT(page_frames_initialized, "%s called too early", __func__);
575 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
576 "%s does not support explicit cache settings", __func__);
577
578 key = k_spin_lock(&z_mm_lock);
579
580 /* Need extra for the guard pages (before and after) which we
581 * won't map.
582 */
583 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
584
585 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
586 if (dst == NULL) {
587 /* Address space has no free region */
588 goto out;
589 }
590
591 /* Unmap both guard pages to make sure accessing them
592 * will generate fault.
593 */
594 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
595 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
596 CONFIG_MMU_PAGE_SIZE);
597
598 /* Skip over the "before" guard page in returned address. */
599 dst += CONFIG_MMU_PAGE_SIZE;
600
601 VIRT_FOREACH(dst, size, pos) {
602 ret = map_anon_page(pos, flags);
603
604 if (ret != 0) {
605 /* TODO: call k_mem_unmap(dst, pos - dst) when
606 * implemented in #28990 and release any guard virtual
607 * page as well.
608 */
609 dst = NULL;
610 goto out;
611 }
612 }
613 out:
614 k_spin_unlock(&z_mm_lock, key);
615 return dst;
616 }
617
k_mem_unmap(void * addr,size_t size)618 void k_mem_unmap(void *addr, size_t size)
619 {
620 uintptr_t phys;
621 uint8_t *pos;
622 struct z_page_frame *pf;
623 k_spinlock_key_t key;
624 size_t total_size;
625 int ret;
626
627 /* Need space for the "before" guard page */
628 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
629
630 /* Make sure address range is still valid after accounting
631 * for two guard pages.
632 */
633 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
634 z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
635
636 key = k_spin_lock(&z_mm_lock);
637
638 /* Check if both guard pages are unmapped.
639 * Bail if not, as this is probably a region not mapped
640 * using k_mem_map().
641 */
642 pos = addr;
643 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
644 if (ret == 0) {
645 __ASSERT(ret == 0,
646 "%s: cannot find preceding guard page for (%p, %zu)",
647 __func__, addr, size);
648 goto out;
649 }
650
651 ret = arch_page_phys_get(pos + size, NULL);
652 if (ret == 0) {
653 __ASSERT(ret == 0,
654 "%s: cannot find succeeding guard page for (%p, %zu)",
655 __func__, addr, size);
656 goto out;
657 }
658
659 VIRT_FOREACH(addr, size, pos) {
660 ret = arch_page_phys_get(pos, &phys);
661
662 __ASSERT(ret == 0,
663 "%s: cannot unmap an unmapped address %p",
664 __func__, pos);
665 if (ret != 0) {
666 /* Found an address not mapped. Do not continue. */
667 goto out;
668 }
669
670 __ASSERT(z_is_page_frame(phys),
671 "%s: 0x%lx is not a page frame", __func__, phys);
672 if (!z_is_page_frame(phys)) {
673 /* Physical address has no corresponding page frame
674 * description in the page frame array.
675 * This should not happen. Do not continue.
676 */
677 goto out;
678 }
679
680 /* Grab the corresponding page frame from physical address */
681 pf = z_phys_to_page_frame(phys);
682
683 __ASSERT(z_page_frame_is_mapped(pf),
684 "%s: 0x%lx is not a mapped page frame", __func__, phys);
685 if (!z_page_frame_is_mapped(pf)) {
686 /* Page frame is not marked mapped.
687 * This should not happen. Do not continue.
688 */
689 goto out;
690 }
691
692 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
693
694 /* Put the page frame back into free list */
695 page_frame_free_locked(pf);
696 }
697
698 /* There are guard pages just before and after the mapped
699 * region. So we also need to free them from the bitmap.
700 */
701 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
702 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
703 virt_region_free(pos, total_size);
704
705 out:
706 k_spin_unlock(&z_mm_lock, key);
707 }
708
k_mem_free_get(void)709 size_t k_mem_free_get(void)
710 {
711 size_t ret;
712 k_spinlock_key_t key;
713
714 __ASSERT(page_frames_initialized, "%s called too early", __func__);
715
716 key = k_spin_lock(&z_mm_lock);
717 #ifdef CONFIG_DEMAND_PAGING
718 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
719 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
720 } else {
721 ret = 0;
722 }
723 #else
724 ret = z_free_page_count;
725 #endif
726 k_spin_unlock(&z_mm_lock, key);
727
728 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
729 }
730
731 /* Get the default virtual region alignment, here the default MMU page size
732 *
733 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
734 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
735 *
736 * @retval alignment to apply on the virtual address of this region
737 */
virt_region_align(uintptr_t phys,size_t size)738 static size_t virt_region_align(uintptr_t phys, size_t size)
739 {
740 ARG_UNUSED(phys);
741 ARG_UNUSED(size);
742
743 return CONFIG_MMU_PAGE_SIZE;
744 }
745
746 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
747
748 /* This may be called from arch early boot code before z_cstart() is invoked.
749 * Data will be copied and BSS zeroed, but this must not rely on any
750 * initialization functions being called prior to work correctly.
751 */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)752 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
753 {
754 uintptr_t aligned_phys, addr_offset;
755 size_t aligned_size, align_boundary;
756 k_spinlock_key_t key;
757 uint8_t *dest_addr;
758 size_t num_bits;
759 size_t offset;
760
761 #ifndef CONFIG_KERNEL_DIRECT_MAP
762 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
763 #endif
764 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
765 phys, size,
766 CONFIG_MMU_PAGE_SIZE);
767 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
768 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
769 "wraparound for physical address 0x%lx (size %zu)",
770 aligned_phys, aligned_size);
771
772 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
773
774 key = k_spin_lock(&z_mm_lock);
775
776 if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
777 (flags & K_MEM_DIRECT_MAP)) {
778 dest_addr = (uint8_t *)aligned_phys;
779
780 /* Mark the region of virtual memory bitmap as used
781 * if the region overlaps the virtual memory space.
782 *
783 * Basically if either end of region is within
784 * virtual memory space, we need to mark the bits.
785 */
786
787 if (IN_RANGE(aligned_phys,
788 (uintptr_t)Z_VIRT_RAM_START,
789 (uintptr_t)(Z_VIRT_RAM_END - 1)) ||
790 IN_RANGE(aligned_phys + aligned_size - 1,
791 (uintptr_t)Z_VIRT_RAM_START,
792 (uintptr_t)(Z_VIRT_RAM_END - 1))) {
793 uint8_t *adjusted_start = MAX(dest_addr, Z_VIRT_RAM_START);
794 uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
795 Z_VIRT_RAM_END);
796 size_t adjusted_sz = adjusted_end - adjusted_start;
797
798 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
799 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
800 if (sys_bitarray_test_and_set_region(
801 &virt_region_bitmap, num_bits, offset, true))
802 goto fail;
803 }
804 } else {
805 /* Obtain an appropriately sized chunk of virtual memory */
806 dest_addr = virt_region_alloc(aligned_size, align_boundary);
807 if (!dest_addr) {
808 goto fail;
809 }
810 }
811
812 /* If this fails there's something amiss with virt_region_get */
813 __ASSERT((uintptr_t)dest_addr <
814 ((uintptr_t)dest_addr + (size - 1)),
815 "wraparound for virtual address %p (size %zu)",
816 dest_addr, size);
817
818 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
819 aligned_phys, aligned_size, flags, addr_offset);
820
821 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
822 k_spin_unlock(&z_mm_lock, key);
823
824 *virt_ptr = dest_addr + addr_offset;
825 return;
826 fail:
827 /* May re-visit this in the future, but for now running out of
828 * virtual address space or failing the arch_mem_map() call is
829 * an unrecoverable situation.
830 *
831 * Other problems not related to resource exhaustion we leave as
832 * assertions since they are clearly programming mistakes.
833 */
834 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
835 phys, size, flags);
836 k_panic();
837 }
838
z_phys_unmap(uint8_t * virt,size_t size)839 void z_phys_unmap(uint8_t *virt, size_t size)
840 {
841 uintptr_t aligned_virt, addr_offset;
842 size_t aligned_size;
843 k_spinlock_key_t key;
844
845 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
846 POINTER_TO_UINT(virt), size,
847 CONFIG_MMU_PAGE_SIZE);
848 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
849 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
850 "wraparound for virtual address 0x%lx (size %zu)",
851 aligned_virt, aligned_size);
852
853 key = k_spin_lock(&z_mm_lock);
854
855 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
856 aligned_virt, aligned_size, addr_offset);
857
858 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
859 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
860 k_spin_unlock(&z_mm_lock, key);
861 }
862
863 /*
864 * Miscellaneous
865 */
866
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)867 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
868 uintptr_t addr, size_t size, size_t align)
869 {
870 size_t addr_offset;
871
872 /* The actual mapped region must be page-aligned. Round down the
873 * physical address and pad the region size appropriately
874 */
875 *aligned_addr = ROUND_DOWN(addr, align);
876 addr_offset = addr - *aligned_addr;
877 *aligned_size = ROUND_UP(size + addr_offset, align);
878
879 return addr_offset;
880 }
881
882 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)883 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
884 bool pin)
885 {
886 struct z_page_frame *pf;
887 uint8_t *addr;
888
889 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
890 CONFIG_MMU_PAGE_SIZE);
891 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
892 CONFIG_MMU_PAGE_SIZE);
893 size_t pinned_size = pinned_end - pinned_start;
894
895 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
896 {
897 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
898 frame_mapped_set(pf, addr);
899
900 if (pin) {
901 pf->flags |= Z_PAGE_FRAME_PINNED;
902 } else {
903 pf->flags &= ~Z_PAGE_FRAME_PINNED;
904 }
905 }
906 }
907 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
908
z_mem_manage_init(void)909 void z_mem_manage_init(void)
910 {
911 uintptr_t phys;
912 uint8_t *addr;
913 struct z_page_frame *pf;
914 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
915
916 free_page_frame_list_init();
917
918 ARG_UNUSED(addr);
919
920 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
921 /* If some page frames are unavailable for use as memory, arch
922 * code will mark Z_PAGE_FRAME_RESERVED in their flags
923 */
924 arch_reserved_pages_update();
925 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
926
927 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
928 /* All pages composing the Zephyr image are mapped at boot in a
929 * predictable way. This can change at runtime.
930 */
931 VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
932 {
933 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
934 frame_mapped_set(pf, addr);
935
936 /* TODO: for now we pin the whole Zephyr image. Demand paging
937 * currently tested with anonymously-mapped pages which are not
938 * pinned.
939 *
940 * We will need to setup linker regions for a subset of kernel
941 * code/data pages which are pinned in memory and
942 * may not be evicted. This will contain critical CPU data
943 * structures, and any code used to perform page fault
944 * handling, page-ins, etc.
945 */
946 pf->flags |= Z_PAGE_FRAME_PINNED;
947 }
948 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
949
950 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
951 /* Pin the boot section to prevent it from being swapped out during
952 * boot process. Will be un-pinned once boot process completes.
953 */
954 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
955 #endif
956
957 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
958 /* Pin the page frames correspondng to the pinned symbols */
959 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
960 #endif
961
962 /* Any remaining pages that aren't mapped, reserved, or pinned get
963 * added to the free pages list
964 */
965 Z_PAGE_FRAME_FOREACH(phys, pf) {
966 if (z_page_frame_is_available(pf)) {
967 free_page_frame_list_put(pf);
968 }
969 }
970 LOG_DBG("free page frames: %zu", z_free_page_count);
971
972 #ifdef CONFIG_DEMAND_PAGING
973 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
974 z_paging_histogram_init();
975 #endif
976 k_mem_paging_backing_store_init();
977 k_mem_paging_eviction_init();
978 #endif
979 #if __ASSERT_ON
980 page_frames_initialized = true;
981 #endif
982 k_spin_unlock(&z_mm_lock, key);
983
984 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
985 /* If BSS section is not present in memory at boot,
986 * it would not have been cleared. This needs to be
987 * done now since paging mechanism has been initialized
988 * and the BSS pages can be brought into physical
989 * memory to be cleared.
990 */
991 z_bss_zero();
992 #endif
993 }
994
z_mem_manage_boot_finish(void)995 void z_mem_manage_boot_finish(void)
996 {
997 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
998 /* At the end of boot process, unpin the boot sections
999 * as they don't need to be in memory all the time anymore.
1000 */
1001 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
1002 #endif
1003 }
1004
1005 #ifdef CONFIG_DEMAND_PAGING
1006
1007 #ifdef CONFIG_DEMAND_PAGING_STATS
1008 struct k_mem_paging_stats_t paging_stats;
1009 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1010 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1011 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1012 #endif
1013
do_backing_store_page_in(uintptr_t location)1014 static inline void do_backing_store_page_in(uintptr_t location)
1015 {
1016 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1017 uint32_t time_diff;
1018
1019 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1020 timing_t time_start, time_end;
1021
1022 time_start = timing_counter_get();
1023 #else
1024 uint32_t time_start;
1025
1026 time_start = k_cycle_get_32();
1027 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1028 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1029
1030 k_mem_paging_backing_store_page_in(location);
1031
1032 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1033 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1034 time_end = timing_counter_get();
1035 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1036 #else
1037 time_diff = k_cycle_get_32() - time_start;
1038 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1039
1040 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1041 time_diff);
1042 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1043 }
1044
do_backing_store_page_out(uintptr_t location)1045 static inline void do_backing_store_page_out(uintptr_t location)
1046 {
1047 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1048 uint32_t time_diff;
1049
1050 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1051 timing_t time_start, time_end;
1052
1053 time_start = timing_counter_get();
1054 #else
1055 uint32_t time_start;
1056
1057 time_start = k_cycle_get_32();
1058 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1059 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1060
1061 k_mem_paging_backing_store_page_out(location);
1062
1063 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1064 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1065 time_end = timing_counter_get();
1066 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1067 #else
1068 time_diff = k_cycle_get_32() - time_start;
1069 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1070
1071 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1072 time_diff);
1073 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1074 }
1075
1076 /* Current implementation relies on interrupt locking to any prevent page table
1077 * access, which falls over if other CPUs are active. Addressing this is not
1078 * as simple as using spinlocks as regular memory reads/writes constitute
1079 * "access" in this sense.
1080 *
1081 * Current needs for demand paging are on uniprocessor systems.
1082 */
1083 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
1084
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1085 static void virt_region_foreach(void *addr, size_t size,
1086 void (*func)(void *))
1087 {
1088 z_mem_assert_virtual_region(addr, size);
1089
1090 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1091 func((uint8_t *)addr + offset);
1092 }
1093 }
1094
1095 /*
1096 * Perform some preparatory steps before paging out. The provided page frame
1097 * must be evicted to the backing store immediately after this is called
1098 * with a call to k_mem_paging_backing_store_page_out() if it contains
1099 * a data page.
1100 *
1101 * - Map page frame to scratch area if requested. This always is true if we're
1102 * doing a page fault, but is only set on manual evictions if the page is
1103 * dirty.
1104 * - If mapped:
1105 * - obtain backing store location and populate location parameter
1106 * - Update page tables with location
1107 * - Mark page frame as busy
1108 *
1109 * Returns -ENOMEM if the backing store is full
1110 */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1111 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
1112 bool page_fault, uintptr_t *location_ptr)
1113 {
1114 uintptr_t phys;
1115 int ret;
1116 bool dirty = *dirty_ptr;
1117
1118 phys = z_page_frame_to_phys(pf);
1119 __ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1120 phys);
1121
1122 /* If the backing store doesn't have a copy of the page, even if it
1123 * wasn't modified, treat as dirty. This can happen for a few
1124 * reasons:
1125 * 1) Page has never been swapped out before, and the backing store
1126 * wasn't pre-populated with this data page.
1127 * 2) Page was swapped out before, but the page contents were not
1128 * preserved after swapping back in.
1129 * 3) Page contents were preserved when swapped back in, but were later
1130 * evicted from the backing store to make room for other evicted
1131 * pages.
1132 */
1133 if (z_page_frame_is_mapped(pf)) {
1134 dirty = dirty || !z_page_frame_is_backed(pf);
1135 }
1136
1137 if (dirty || page_fault) {
1138 arch_mem_scratch(phys);
1139 }
1140
1141 if (z_page_frame_is_mapped(pf)) {
1142 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1143 page_fault);
1144 if (ret != 0) {
1145 LOG_ERR("out of backing store memory");
1146 return -ENOMEM;
1147 }
1148 arch_mem_page_out(pf->addr, *location_ptr);
1149 } else {
1150 /* Shouldn't happen unless this function is mis-used */
1151 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1152 }
1153 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1154 /* Mark as busy so that z_page_frame_is_evictable() returns false */
1155 __ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1156 phys);
1157 pf->flags |= Z_PAGE_FRAME_BUSY;
1158 #endif
1159 /* Update dirty parameter, since we set to true if it wasn't backed
1160 * even if otherwise clean
1161 */
1162 *dirty_ptr = dirty;
1163
1164 return 0;
1165 }
1166
do_mem_evict(void * addr)1167 static int do_mem_evict(void *addr)
1168 {
1169 bool dirty;
1170 struct z_page_frame *pf;
1171 uintptr_t location;
1172 int key, ret;
1173 uintptr_t flags, phys;
1174
1175 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1176 __ASSERT(!k_is_in_isr(),
1177 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1178 __func__);
1179 k_sched_lock();
1180 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1181 key = irq_lock();
1182 flags = arch_page_info_get(addr, &phys, false);
1183 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1184 "address %p isn't mapped", addr);
1185 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1186 /* Un-mapped or already evicted. Nothing to do */
1187 ret = 0;
1188 goto out;
1189 }
1190
1191 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1192 pf = z_phys_to_page_frame(phys);
1193 __ASSERT(pf->addr == addr, "page frame address mismatch");
1194 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1195 if (ret != 0) {
1196 goto out;
1197 }
1198
1199 __ASSERT(ret == 0, "failed to prepare page frame");
1200 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1201 irq_unlock(key);
1202 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1203 if (dirty) {
1204 do_backing_store_page_out(location);
1205 }
1206 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1207 key = irq_lock();
1208 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1209 page_frame_free_locked(pf);
1210 out:
1211 irq_unlock(key);
1212 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1213 k_sched_unlock();
1214 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1215 return ret;
1216 }
1217
k_mem_page_out(void * addr,size_t size)1218 int k_mem_page_out(void *addr, size_t size)
1219 {
1220 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1221 addr);
1222 z_mem_assert_virtual_region(addr, size);
1223
1224 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1225 void *pos = (uint8_t *)addr + offset;
1226 int ret;
1227
1228 ret = do_mem_evict(pos);
1229 if (ret != 0) {
1230 return ret;
1231 }
1232 }
1233
1234 return 0;
1235 }
1236
z_page_frame_evict(uintptr_t phys)1237 int z_page_frame_evict(uintptr_t phys)
1238 {
1239 int key, ret;
1240 struct z_page_frame *pf;
1241 bool dirty;
1242 uintptr_t flags;
1243 uintptr_t location;
1244
1245 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1246 __func__, phys);
1247
1248 /* Implementation is similar to do_page_fault() except there is no
1249 * data page to page-in, see comments in that function.
1250 */
1251
1252 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1253 __ASSERT(!k_is_in_isr(),
1254 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1255 __func__);
1256 k_sched_lock();
1257 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1258 key = irq_lock();
1259 pf = z_phys_to_page_frame(phys);
1260 if (!z_page_frame_is_mapped(pf)) {
1261 /* Nothing to do, free page */
1262 ret = 0;
1263 goto out;
1264 }
1265 flags = arch_page_info_get(pf->addr, NULL, false);
1266 /* Shouldn't ever happen */
1267 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1268 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1269 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1270 if (ret != 0) {
1271 goto out;
1272 }
1273
1274 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1275 irq_unlock(key);
1276 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1277 if (dirty) {
1278 do_backing_store_page_out(location);
1279 }
1280 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1281 key = irq_lock();
1282 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1283 page_frame_free_locked(pf);
1284 out:
1285 irq_unlock(key);
1286 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1287 k_sched_unlock();
1288 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1289 return ret;
1290 }
1291
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1292 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1293 int key)
1294 {
1295 #ifdef CONFIG_DEMAND_PAGING_STATS
1296 bool is_irq_unlocked = arch_irq_unlocked(key);
1297
1298 paging_stats.pagefaults.cnt++;
1299
1300 if (is_irq_unlocked) {
1301 paging_stats.pagefaults.irq_unlocked++;
1302 } else {
1303 paging_stats.pagefaults.irq_locked++;
1304 }
1305
1306 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1307 faulting_thread->paging_stats.pagefaults.cnt++;
1308
1309 if (is_irq_unlocked) {
1310 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1311 } else {
1312 faulting_thread->paging_stats.pagefaults.irq_locked++;
1313 }
1314 #else
1315 ARG_UNUSED(faulting_thread);
1316 #endif
1317
1318 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1319 if (k_is_in_isr()) {
1320 paging_stats.pagefaults.in_isr++;
1321
1322 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1323 faulting_thread->paging_stats.pagefaults.in_isr++;
1324 #endif
1325 }
1326 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1327 #endif /* CONFIG_DEMAND_PAGING_STATS */
1328 }
1329
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1330 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1331 bool dirty)
1332 {
1333 #ifdef CONFIG_DEMAND_PAGING_STATS
1334 if (dirty) {
1335 paging_stats.eviction.dirty++;
1336 } else {
1337 paging_stats.eviction.clean++;
1338 }
1339 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1340 if (dirty) {
1341 faulting_thread->paging_stats.eviction.dirty++;
1342 } else {
1343 faulting_thread->paging_stats.eviction.clean++;
1344 }
1345 #else
1346 ARG_UNUSED(faulting_thread);
1347 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1348 #endif /* CONFIG_DEMAND_PAGING_STATS */
1349 }
1350
do_eviction_select(bool * dirty)1351 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1352 {
1353 struct z_page_frame *pf;
1354
1355 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1356 uint32_t time_diff;
1357
1358 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1359 timing_t time_start, time_end;
1360
1361 time_start = timing_counter_get();
1362 #else
1363 uint32_t time_start;
1364
1365 time_start = k_cycle_get_32();
1366 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1367 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1368
1369 pf = k_mem_paging_eviction_select(dirty);
1370
1371 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1372 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1373 time_end = timing_counter_get();
1374 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1375 #else
1376 time_diff = k_cycle_get_32() - time_start;
1377 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1378
1379 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1380 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1381
1382 return pf;
1383 }
1384
do_page_fault(void * addr,bool pin)1385 static bool do_page_fault(void *addr, bool pin)
1386 {
1387 struct z_page_frame *pf;
1388 int key, ret;
1389 uintptr_t page_in_location, page_out_location;
1390 enum arch_page_location status;
1391 bool result;
1392 bool dirty = false;
1393 struct k_thread *faulting_thread = _current_cpu->current;
1394
1395 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1396 addr);
1397
1398 LOG_DBG("page fault at %p", addr);
1399
1400 /*
1401 * TODO: Add performance accounting:
1402 * - k_mem_paging_eviction_select() metrics
1403 * * periodic timer execution time histogram (if implemented)
1404 */
1405
1406 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1407 /* We lock the scheduler so that other threads are never scheduled
1408 * during the page-in/out operation.
1409 *
1410 * We do however re-enable interrupts during the page-in/page-out
1411 * operation iff interrupts were enabled when the exception was taken;
1412 * in this configuration page faults in an ISR are a bug; all their
1413 * code/data must be pinned.
1414 *
1415 * If interrupts were disabled when the exception was taken, the
1416 * arch code is responsible for keeping them that way when entering
1417 * this function.
1418 *
1419 * If this is not enabled, then interrupts are always locked for the
1420 * entire operation. This is far worse for system interrupt latency
1421 * but requires less pinned pages and ISRs may also take page faults.
1422 *
1423 * Support for allowing k_mem_paging_backing_store_page_out() and
1424 * k_mem_paging_backing_store_page_in() to also sleep and allow
1425 * other threads to run (such as in the case where the transfer is
1426 * async DMA) is not implemented. Even if limited to thread context,
1427 * arbitrary memory access triggering exceptions that put a thread to
1428 * sleep on a contended page fault operation will break scheduling
1429 * assumptions of cooperative threads or threads that implement
1430 * crticial sections with spinlocks or disabling IRQs.
1431 */
1432 k_sched_lock();
1433 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1434 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1435
1436 key = irq_lock();
1437 status = arch_page_location_get(addr, &page_in_location);
1438 if (status == ARCH_PAGE_LOCATION_BAD) {
1439 /* Return false to treat as a fatal error */
1440 result = false;
1441 goto out;
1442 }
1443 result = true;
1444
1445 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1446 if (pin) {
1447 /* It's a physical memory address */
1448 uintptr_t phys = page_in_location;
1449
1450 pf = z_phys_to_page_frame(phys);
1451 pf->flags |= Z_PAGE_FRAME_PINNED;
1452 }
1453
1454 /* This if-block is to pin the page if it is
1455 * already present in physical memory. There is
1456 * no need to go through the following code to
1457 * pull in the data pages. So skip to the end.
1458 */
1459 goto out;
1460 }
1461 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1462 "unexpected status value %d", status);
1463
1464 paging_stats_faults_inc(faulting_thread, key);
1465
1466 pf = free_page_frame_list_get();
1467 if (pf == NULL) {
1468 /* Need to evict a page frame */
1469 pf = do_eviction_select(&dirty);
1470 __ASSERT(pf != NULL, "failed to get a page frame");
1471 LOG_DBG("evicting %p at 0x%lx", pf->addr,
1472 z_page_frame_to_phys(pf));
1473
1474 paging_stats_eviction_inc(faulting_thread, dirty);
1475 }
1476 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1477 __ASSERT(ret == 0, "failed to prepare page frame");
1478
1479 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1480 irq_unlock(key);
1481 /* Interrupts are now unlocked if they were not locked when we entered
1482 * this function, and we may service ISRs. The scheduler is still
1483 * locked.
1484 */
1485 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1486 if (dirty) {
1487 do_backing_store_page_out(page_out_location);
1488 }
1489 do_backing_store_page_in(page_in_location);
1490
1491 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1492 key = irq_lock();
1493 pf->flags &= ~Z_PAGE_FRAME_BUSY;
1494 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1495 if (pin) {
1496 pf->flags |= Z_PAGE_FRAME_PINNED;
1497 }
1498 pf->flags |= Z_PAGE_FRAME_MAPPED;
1499 pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1500 & ~(CONFIG_MMU_PAGE_SIZE - 1));
1501
1502 arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1503 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1504 out:
1505 irq_unlock(key);
1506 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1507 k_sched_unlock();
1508 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1509
1510 return result;
1511 }
1512
do_page_in(void * addr)1513 static void do_page_in(void *addr)
1514 {
1515 bool ret;
1516
1517 ret = do_page_fault(addr, false);
1518 __ASSERT(ret, "unmapped memory address %p", addr);
1519 (void)ret;
1520 }
1521
k_mem_page_in(void * addr,size_t size)1522 void k_mem_page_in(void *addr, size_t size)
1523 {
1524 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1525 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1526 __func__);
1527 virt_region_foreach(addr, size, do_page_in);
1528 }
1529
do_mem_pin(void * addr)1530 static void do_mem_pin(void *addr)
1531 {
1532 bool ret;
1533
1534 ret = do_page_fault(addr, true);
1535 __ASSERT(ret, "unmapped memory address %p", addr);
1536 (void)ret;
1537 }
1538
k_mem_pin(void * addr,size_t size)1539 void k_mem_pin(void *addr, size_t size)
1540 {
1541 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1542 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1543 __func__);
1544 virt_region_foreach(addr, size, do_mem_pin);
1545 }
1546
z_page_fault(void * addr)1547 bool z_page_fault(void *addr)
1548 {
1549 return do_page_fault(addr, false);
1550 }
1551
do_mem_unpin(void * addr)1552 static void do_mem_unpin(void *addr)
1553 {
1554 struct z_page_frame *pf;
1555 unsigned int key;
1556 uintptr_t flags, phys;
1557
1558 key = irq_lock();
1559 flags = arch_page_info_get(addr, &phys, false);
1560 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1561 "invalid data page at %p", addr);
1562 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1563 pf = z_phys_to_page_frame(phys);
1564 pf->flags &= ~Z_PAGE_FRAME_PINNED;
1565 }
1566 irq_unlock(key);
1567 }
1568
k_mem_unpin(void * addr,size_t size)1569 void k_mem_unpin(void *addr, size_t size)
1570 {
1571 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1572 addr);
1573 virt_region_foreach(addr, size, do_mem_unpin);
1574 }
1575
1576 #endif /* CONFIG_DEMAND_PAGING */
1577