1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/timing/timing.h>
20 #include <zephyr/logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22
23 /*
24 * General terminology:
25 * - A page frame is a page-sized physical memory region in RAM. It is a
26 * container where a data page may be placed. It is always referred to by
27 * physical address. We have a convention of using uintptr_t for physical
28 * addresses. We instantiate a struct z_page_frame to store metadata for
29 * every page frame.
30 *
31 * - A data page is a page-sized region of data. It may exist in a page frame,
32 * or be paged out to some backing store. Its location can always be looked
33 * up in the CPU's page tables (or equivalent) by virtual address.
34 * The data type will always be void * or in some cases uint8_t * when we
35 * want to do pointer arithmetic.
36 */
37
38 /* Spinlock to protect any globals in this file and serialize page table
39 * updates in arch code
40 */
41 struct k_spinlock z_mm_lock;
42
43 /*
44 * General page frame management
45 */
46
47 /* Database of all RAM page frames */
48 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
49
50 #if __ASSERT_ON
51 /* Indicator that z_page_frames has been initialized, many of these APIs do
52 * not work before POST_KERNEL
53 */
54 static bool page_frames_initialized;
55 #endif
56
57 /* Add colors to page table dumps to indicate mapping type */
58 #define COLOR_PAGE_FRAMES 1
59
60 #if COLOR_PAGE_FRAMES
61 #define ANSI_DEFAULT "\x1B" "[0m"
62 #define ANSI_RED "\x1B" "[1;31m"
63 #define ANSI_GREEN "\x1B" "[1;32m"
64 #define ANSI_YELLOW "\x1B" "[1;33m"
65 #define ANSI_BLUE "\x1B" "[1;34m"
66 #define ANSI_MAGENTA "\x1B" "[1;35m"
67 #define ANSI_CYAN "\x1B" "[1;36m"
68 #define ANSI_GREY "\x1B" "[1;90m"
69
70 #define COLOR(x) printk(_CONCAT(ANSI_, x))
71 #else
72 #define COLOR(x) do { } while (false)
73 #endif
74
75 /* LCOV_EXCL_START */
page_frame_dump(struct z_page_frame * pf)76 static void page_frame_dump(struct z_page_frame *pf)
77 {
78 if (z_page_frame_is_reserved(pf)) {
79 COLOR(CYAN);
80 printk("R");
81 } else if (z_page_frame_is_busy(pf)) {
82 COLOR(MAGENTA);
83 printk("B");
84 } else if (z_page_frame_is_pinned(pf)) {
85 COLOR(YELLOW);
86 printk("P");
87 } else if (z_page_frame_is_available(pf)) {
88 COLOR(GREY);
89 printk(".");
90 } else if (z_page_frame_is_mapped(pf)) {
91 COLOR(DEFAULT);
92 printk("M");
93 } else {
94 COLOR(RED);
95 printk("?");
96 }
97 }
98
z_page_frames_dump(void)99 void z_page_frames_dump(void)
100 {
101 int column = 0;
102
103 __ASSERT(page_frames_initialized, "%s called too early", __func__);
104 printk("Physical memory from 0x%lx to 0x%lx\n",
105 Z_PHYS_RAM_START, Z_PHYS_RAM_END);
106
107 for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
108 struct z_page_frame *pf = &z_page_frames[i];
109
110 page_frame_dump(pf);
111
112 column++;
113 if (column == 64) {
114 column = 0;
115 printk("\n");
116 }
117 }
118
119 COLOR(DEFAULT);
120 if (column != 0) {
121 printk("\n");
122 }
123 }
124 /* LCOV_EXCL_STOP */
125
126 #define VIRT_FOREACH(_base, _size, _pos) \
127 for (_pos = _base; \
128 _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
129
130 #define PHYS_FOREACH(_base, _size, _pos) \
131 for (_pos = _base; \
132 _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
133
134
135 /*
136 * Virtual address space management
137 *
138 * Call all of these functions with z_mm_lock held.
139 *
140 * Overall virtual memory map: When the kernel starts, it resides in
141 * virtual memory in the region Z_KERNEL_VIRT_START to
142 * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
143 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
144 *
145 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
146 * but have a mapping for all RAM in place. This is for special architectural
147 * purposes and does not otherwise affect page frame accounting or flags;
148 * the only guarantee is that such RAM mapping outside of the Zephyr image
149 * won't be disturbed by subsequent memory mapping calls.
150 *
151 * +--------------+ <- Z_VIRT_RAM_START
152 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
153 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
154 * | Mapping for |
155 * | main kernel |
156 * | image |
157 * | |
158 * | |
159 * +--------------+ <- Z_FREE_VM_START
160 * | |
161 * | Unused, |
162 * | Available VM |
163 * | |
164 * |..............| <- mapping_pos (grows downward as more mappings are made)
165 * | Mapping |
166 * +--------------+
167 * | Mapping |
168 * +--------------+
169 * | ... |
170 * +--------------+
171 * | Mapping |
172 * +--------------+ <- mappings start here
173 * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED
174 * +--------------+ <- Z_VIRT_RAM_END
175 */
176
177 /* Bitmap of virtual addresses where one bit corresponds to one page.
178 * This is being used for virt_region_alloc() to figure out which
179 * region of virtual addresses can be used for memory mapping.
180 *
181 * Note that bit #0 is the highest address so that allocation is
182 * done in reverse from highest address.
183 */
184 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
185 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
186
187 static bool virt_region_inited;
188
189 #define Z_VIRT_REGION_START_ADDR Z_FREE_VM_START
190 #define Z_VIRT_REGION_END_ADDR (Z_VIRT_RAM_END - Z_VM_RESERVED)
191
virt_from_bitmap_offset(size_t offset,size_t size)192 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
193 {
194 return POINTER_TO_UINT(Z_VIRT_RAM_END)
195 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
196 }
197
virt_to_bitmap_offset(void * vaddr,size_t size)198 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
199 {
200 return (POINTER_TO_UINT(Z_VIRT_RAM_END)
201 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
202 }
203
virt_region_init(void)204 static void virt_region_init(void)
205 {
206 size_t offset, num_bits;
207
208 /* There are regions where we should never map via
209 * k_mem_map() and z_phys_map(). Mark them as
210 * already allocated so they will never be used.
211 */
212
213 if (Z_VM_RESERVED > 0) {
214 /* Mark reserved region at end of virtual address space */
215 num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
216 (void)sys_bitarray_set_region(&virt_region_bitmap,
217 num_bits, 0);
218 }
219
220 /* Mark all bits up to Z_FREE_VM_START as allocated */
221 num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
222 - POINTER_TO_UINT(Z_VIRT_RAM_START);
223 offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
224 num_bits /= CONFIG_MMU_PAGE_SIZE;
225 (void)sys_bitarray_set_region(&virt_region_bitmap,
226 num_bits, offset);
227
228 virt_region_inited = true;
229 }
230
virt_region_free(void * vaddr,size_t size)231 static void virt_region_free(void *vaddr, size_t size)
232 {
233 size_t offset, num_bits;
234 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
235
236 if (unlikely(!virt_region_inited)) {
237 virt_region_init();
238 }
239
240 #ifndef CONFIG_KERNEL_DIRECT_MAP
241 /* Without the need to support K_DIRECT_MAP, the region must be
242 * able to be represented in the bitmap. So this case is
243 * simple.
244 */
245
246 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
247 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
248 "invalid virtual address region %p (%zu)", vaddr_u8, size);
249 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
250 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
251 return;
252 }
253
254 offset = virt_to_bitmap_offset(vaddr, size);
255 num_bits = size / CONFIG_MMU_PAGE_SIZE;
256 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
257 #else /* !CONFIG_KERNEL_DIRECT_MAP */
258 /* With K_DIRECT_MAP, the region can be outside of the virtual
259 * memory space, wholly within it, or overlap partially.
260 * So additional processing is needed to make sure we only
261 * mark the pages within the bitmap.
262 */
263 if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
264 (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
265 (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
266 ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
267 uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
268 uint8_t *adjusted_end = MIN(vaddr_u8 + size,
269 Z_VIRT_REGION_END_ADDR);
270 size_t adjusted_sz = adjusted_end - adjusted_start;
271
272 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
273 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
274 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
275 }
276 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
277 }
278
virt_region_alloc(size_t size,size_t align)279 static void *virt_region_alloc(size_t size, size_t align)
280 {
281 uintptr_t dest_addr;
282 size_t alloc_size;
283 size_t offset;
284 size_t num_bits;
285 int ret;
286
287 if (unlikely(!virt_region_inited)) {
288 virt_region_init();
289 }
290
291 /* Possibly request more pages to ensure we can get an aligned virtual address */
292 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
293 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
294 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
295 if (ret != 0) {
296 LOG_ERR("insufficient virtual address space (requested %zu)",
297 size);
298 return NULL;
299 }
300
301 /* Remember that bit #0 in bitmap corresponds to the highest
302 * virtual address. So here we need to go downwards (backwards?)
303 * to get the starting address of the allocated region.
304 */
305 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
306
307 if (alloc_size > size) {
308 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
309
310 /* Here is the memory organization when trying to get an aligned
311 * virtual address:
312 *
313 * +--------------+ <- Z_VIRT_RAM_START
314 * | Undefined VM |
315 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
316 * | Mapping for |
317 * | main kernel |
318 * | image |
319 * | |
320 * | |
321 * +--------------+ <- Z_FREE_VM_START
322 * | ... |
323 * +==============+ <- dest_addr
324 * | Unused |
325 * |..............| <- aligned_dest_addr
326 * | |
327 * | Aligned |
328 * | Mapping |
329 * | |
330 * |..............| <- aligned_dest_addr + size
331 * | Unused |
332 * +==============+ <- offset from Z_VIRT_RAM_END == dest_addr + alloc_size
333 * | ... |
334 * +--------------+
335 * | Mapping |
336 * +--------------+
337 * | Reserved |
338 * +--------------+ <- Z_VIRT_RAM_END
339 */
340
341 /* Free the two unused regions */
342 virt_region_free(UINT_TO_POINTER(dest_addr),
343 aligned_dest_addr - dest_addr);
344 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
345 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
346 (dest_addr + alloc_size) - (aligned_dest_addr + size));
347 }
348
349 dest_addr = aligned_dest_addr;
350 }
351
352 /* Need to make sure this does not step into kernel memory */
353 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
354 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
355 return NULL;
356 }
357
358 return UINT_TO_POINTER(dest_addr);
359 }
360
361 /*
362 * Free page frames management
363 *
364 * Call all of these functions with z_mm_lock held.
365 */
366
367 /* Linked list of unused and available page frames.
368 *
369 * TODO: This is very simple and treats all free page frames as being equal.
370 * However, there are use-cases to consolidate free pages such that entire
371 * SRAM banks can be switched off to save power, and so obtaining free pages
372 * may require a more complex ontology which prefers page frames in RAM banks
373 * which are still active.
374 *
375 * This implies in the future there may be multiple slists managing physical
376 * pages. Each page frame will still just have one snode link.
377 */
378 static sys_slist_t free_page_frame_list;
379
380 /* Number of unused and available free page frames */
381 size_t z_free_page_count;
382
383 #define PF_ASSERT(pf, expr, fmt, ...) \
384 __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
385 ##__VA_ARGS__)
386
387 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)388 static struct z_page_frame *free_page_frame_list_get(void)
389 {
390 sys_snode_t *node;
391 struct z_page_frame *pf = NULL;
392
393 node = sys_slist_get(&free_page_frame_list);
394 if (node != NULL) {
395 z_free_page_count--;
396 pf = CONTAINER_OF(node, struct z_page_frame, node);
397 PF_ASSERT(pf, z_page_frame_is_available(pf),
398 "unavailable but somehow on free list");
399 }
400
401 return pf;
402 }
403
404 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)405 static void free_page_frame_list_put(struct z_page_frame *pf)
406 {
407 PF_ASSERT(pf, z_page_frame_is_available(pf),
408 "unavailable page put on free list");
409 /* The structure is packed, which ensures that this is true */
410 void *node = pf;
411
412 sys_slist_append(&free_page_frame_list, node);
413 z_free_page_count++;
414 }
415
free_page_frame_list_init(void)416 static void free_page_frame_list_init(void)
417 {
418 sys_slist_init(&free_page_frame_list);
419 }
420
page_frame_free_locked(struct z_page_frame * pf)421 static void page_frame_free_locked(struct z_page_frame *pf)
422 {
423 pf->flags = 0;
424 free_page_frame_list_put(pf);
425 }
426
427 /*
428 * Memory Mapping
429 */
430
431 /* Called after the frame is mapped in the arch layer, to update our
432 * local ontology (and do some assertions while we're at it)
433 */
frame_mapped_set(struct z_page_frame * pf,void * addr)434 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
435 {
436 PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
437 "attempted to map a reserved page frame");
438
439 /* We do allow multiple mappings for pinned page frames
440 * since we will never need to reverse map them.
441 * This is uncommon, use-cases are for things like the
442 * Zephyr equivalent of VSDOs
443 */
444 PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
445 "non-pinned and already mapped to %p", pf->addr);
446
447 pf->flags |= Z_PAGE_FRAME_MAPPED;
448 pf->addr = addr;
449 }
450
451 /* LCOV_EXCL_START */
452 /* Go through page frames to find the physical address mapped
453 * by a virtual address.
454 *
455 * @param[in] virt Virtual Address
456 * @param[out] phys Physical address mapped to the input virtual address
457 * if such mapping exists.
458 *
459 * @retval 0 if mapping is found and valid
460 * @retval -EFAULT if virtual address is not mapped
461 */
virt_to_page_frame(void * virt,uintptr_t * phys)462 static int virt_to_page_frame(void *virt, uintptr_t *phys)
463 {
464 uintptr_t paddr;
465 struct z_page_frame *pf;
466 int ret = -EFAULT;
467
468 Z_PAGE_FRAME_FOREACH(paddr, pf) {
469 if (z_page_frame_is_mapped(pf)) {
470 if (virt == pf->addr) {
471 ret = 0;
472 *phys = z_page_frame_to_phys(pf);
473 break;
474 }
475 }
476 }
477
478 return ret;
479 }
480 /* LCOV_EXCL_STOP */
481
482 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
483
484 #ifdef CONFIG_DEMAND_PAGING
485 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
486 bool page_in, uintptr_t *location_ptr);
487
488 static inline void do_backing_store_page_in(uintptr_t location);
489 static inline void do_backing_store_page_out(uintptr_t location);
490 #endif /* CONFIG_DEMAND_PAGING */
491
492 /* Allocate a free page frame, and map it to a specified virtual address
493 *
494 * TODO: Add optional support for copy-on-write mappings to a zero page instead
495 * of allocating, in which case page frames will be allocated lazily as
496 * the mappings to the zero page get touched. This will avoid expensive
497 * page-ins as memory is mapped and physical RAM or backing store storage will
498 * not be used if the mapped memory is unused. The cost is an empty physical
499 * page of zeroes.
500 */
map_anon_page(void * addr,uint32_t flags)501 static int map_anon_page(void *addr, uint32_t flags)
502 {
503 struct z_page_frame *pf;
504 uintptr_t phys;
505 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
506 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
507
508 pf = free_page_frame_list_get();
509 if (pf == NULL) {
510 #ifdef CONFIG_DEMAND_PAGING
511 uintptr_t location;
512 bool dirty;
513 int ret;
514
515 pf = k_mem_paging_eviction_select(&dirty);
516 __ASSERT(pf != NULL, "failed to get a page frame");
517 LOG_DBG("evicting %p at 0x%lx", pf->addr,
518 z_page_frame_to_phys(pf));
519 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
520 if (ret != 0) {
521 return -ENOMEM;
522 }
523 if (dirty) {
524 do_backing_store_page_out(location);
525 }
526 pf->flags = 0;
527 #else
528 return -ENOMEM;
529 #endif /* CONFIG_DEMAND_PAGING */
530 }
531
532 phys = z_page_frame_to_phys(pf);
533 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
534
535 if (lock) {
536 pf->flags |= Z_PAGE_FRAME_PINNED;
537 }
538 frame_mapped_set(pf, addr);
539
540 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
541
542 if (!uninit) {
543 /* If we later implement mappings to a copy-on-write
544 * zero page, won't need this step
545 */
546 memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
547 }
548
549 return 0;
550 }
551
k_mem_map(size_t size,uint32_t flags)552 void *k_mem_map(size_t size, uint32_t flags)
553 {
554 uint8_t *dst;
555 size_t total_size;
556 int ret;
557 k_spinlock_key_t key;
558 uint8_t *pos;
559
560 __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
561 ((flags & K_MEM_MAP_UNINIT) != 0U)),
562 "user access to anonymous uninitialized pages is forbidden");
563 __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
564 "unaligned size %zu passed to %s", size, __func__);
565 __ASSERT(size != 0, "zero sized memory mapping");
566 __ASSERT(page_frames_initialized, "%s called too early", __func__);
567 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
568 "%s does not support explicit cache settings", __func__);
569
570 key = k_spin_lock(&z_mm_lock);
571
572 /* Need extra for the guard pages (before and after) which we
573 * won't map.
574 */
575 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
576
577 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
578 if (dst == NULL) {
579 /* Address space has no free region */
580 goto out;
581 }
582
583 /* Unmap both guard pages to make sure accessing them
584 * will generate fault.
585 */
586 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
587 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
588 CONFIG_MMU_PAGE_SIZE);
589
590 /* Skip over the "before" guard page in returned address. */
591 dst += CONFIG_MMU_PAGE_SIZE;
592
593 VIRT_FOREACH(dst, size, pos) {
594 ret = map_anon_page(pos, flags);
595
596 if (ret != 0) {
597 /* TODO: call k_mem_unmap(dst, pos - dst) when
598 * implemented in #28990 and release any guard virtual
599 * page as well.
600 */
601 dst = NULL;
602 goto out;
603 }
604 }
605 out:
606 k_spin_unlock(&z_mm_lock, key);
607 return dst;
608 }
609
k_mem_unmap(void * addr,size_t size)610 void k_mem_unmap(void *addr, size_t size)
611 {
612 uintptr_t phys;
613 uint8_t *pos;
614 struct z_page_frame *pf;
615 k_spinlock_key_t key;
616 size_t total_size;
617 int ret;
618
619 /* Need space for the "before" guard page */
620 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
621
622 /* Make sure address range is still valid after accounting
623 * for two guard pages.
624 */
625 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
626 z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
627
628 key = k_spin_lock(&z_mm_lock);
629
630 /* Check if both guard pages are unmapped.
631 * Bail if not, as this is probably a region not mapped
632 * using k_mem_map().
633 */
634 pos = addr;
635 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
636 if (ret == 0) {
637 __ASSERT(ret == 0,
638 "%s: cannot find preceding guard page for (%p, %zu)",
639 __func__, addr, size);
640 goto out;
641 }
642
643 ret = arch_page_phys_get(pos + size, NULL);
644 if (ret == 0) {
645 __ASSERT(ret == 0,
646 "%s: cannot find succeeding guard page for (%p, %zu)",
647 __func__, addr, size);
648 goto out;
649 }
650
651 VIRT_FOREACH(addr, size, pos) {
652 ret = arch_page_phys_get(pos, &phys);
653
654 __ASSERT(ret == 0,
655 "%s: cannot unmap an unmapped address %p",
656 __func__, pos);
657 if (ret != 0) {
658 /* Found an address not mapped. Do not continue. */
659 goto out;
660 }
661
662 __ASSERT(z_is_page_frame(phys),
663 "%s: 0x%lx is not a page frame", __func__, phys);
664 if (!z_is_page_frame(phys)) {
665 /* Physical address has no corresponding page frame
666 * description in the page frame array.
667 * This should not happen. Do not continue.
668 */
669 goto out;
670 }
671
672 /* Grab the corresponding page frame from physical address */
673 pf = z_phys_to_page_frame(phys);
674
675 __ASSERT(z_page_frame_is_mapped(pf),
676 "%s: 0x%lx is not a mapped page frame", __func__, phys);
677 if (!z_page_frame_is_mapped(pf)) {
678 /* Page frame is not marked mapped.
679 * This should not happen. Do not continue.
680 */
681 goto out;
682 }
683
684 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
685
686 /* Put the page frame back into free list */
687 page_frame_free_locked(pf);
688 }
689
690 /* There are guard pages just before and after the mapped
691 * region. So we also need to free them from the bitmap.
692 */
693 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
694 total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
695 virt_region_free(pos, total_size);
696
697 out:
698 k_spin_unlock(&z_mm_lock, key);
699 }
700
k_mem_free_get(void)701 size_t k_mem_free_get(void)
702 {
703 size_t ret;
704 k_spinlock_key_t key;
705
706 __ASSERT(page_frames_initialized, "%s called too early", __func__);
707
708 key = k_spin_lock(&z_mm_lock);
709 #ifdef CONFIG_DEMAND_PAGING
710 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
711 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
712 } else {
713 ret = 0;
714 }
715 #else
716 ret = z_free_page_count;
717 #endif
718 k_spin_unlock(&z_mm_lock, key);
719
720 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
721 }
722
723 /* Get the default virtual region alignment, here the default MMU page size
724 *
725 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
726 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
727 *
728 * @retval alignment to apply on the virtual address of this region
729 */
virt_region_align(uintptr_t phys,size_t size)730 static size_t virt_region_align(uintptr_t phys, size_t size)
731 {
732 ARG_UNUSED(phys);
733 ARG_UNUSED(size);
734
735 return CONFIG_MMU_PAGE_SIZE;
736 }
737
738 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
739
740 /* This may be called from arch early boot code before z_cstart() is invoked.
741 * Data will be copied and BSS zeroed, but this must not rely on any
742 * initialization functions being called prior to work correctly.
743 */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)744 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
745 {
746 uintptr_t aligned_phys, addr_offset;
747 size_t aligned_size, align_boundary;
748 k_spinlock_key_t key;
749 uint8_t *dest_addr;
750 size_t num_bits;
751 size_t offset;
752
753 #ifndef CONFIG_KERNEL_DIRECT_MAP
754 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
755 #endif
756 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
757 phys, size,
758 CONFIG_MMU_PAGE_SIZE);
759 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
760 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
761 "wraparound for physical address 0x%lx (size %zu)",
762 aligned_phys, aligned_size);
763
764 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
765
766 key = k_spin_lock(&z_mm_lock);
767
768 if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
769 (flags & K_MEM_DIRECT_MAP)) {
770 dest_addr = (uint8_t *)aligned_phys;
771
772 /* Mark the region of virtual memory bitmap as used
773 * if the region overlaps the virtual memory space.
774 *
775 * Basically if either end of region is within
776 * virtual memory space, we need to mark the bits.
777 */
778 if (((dest_addr >= Z_VIRT_RAM_START) &&
779 (dest_addr < Z_VIRT_RAM_END)) ||
780 (((dest_addr + aligned_size) >= Z_VIRT_RAM_START) &&
781 ((dest_addr + aligned_size) < Z_VIRT_RAM_END))) {
782 uint8_t *adjusted_start = MAX(dest_addr, Z_VIRT_RAM_START);
783 uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
784 Z_VIRT_RAM_END);
785 size_t adjusted_sz = adjusted_end - adjusted_start;
786
787 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
788 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
789 if (sys_bitarray_test_and_set_region(
790 &virt_region_bitmap, num_bits, offset, true))
791 goto fail;
792 }
793 } else {
794 /* Obtain an appropriately sized chunk of virtual memory */
795 dest_addr = virt_region_alloc(aligned_size, align_boundary);
796 if (!dest_addr) {
797 goto fail;
798 }
799 }
800
801 /* If this fails there's something amiss with virt_region_get */
802 __ASSERT((uintptr_t)dest_addr <
803 ((uintptr_t)dest_addr + (size - 1)),
804 "wraparound for virtual address %p (size %zu)",
805 dest_addr, size);
806
807 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
808 aligned_phys, aligned_size, flags, addr_offset);
809
810 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
811 k_spin_unlock(&z_mm_lock, key);
812
813 *virt_ptr = dest_addr + addr_offset;
814 return;
815 fail:
816 /* May re-visit this in the future, but for now running out of
817 * virtual address space or failing the arch_mem_map() call is
818 * an unrecoverable situation.
819 *
820 * Other problems not related to resource exhaustion we leave as
821 * assertions since they are clearly programming mistakes.
822 */
823 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
824 phys, size, flags);
825 k_panic();
826 }
827
z_phys_unmap(uint8_t * virt,size_t size)828 void z_phys_unmap(uint8_t *virt, size_t size)
829 {
830 uintptr_t aligned_virt, addr_offset;
831 size_t aligned_size;
832 k_spinlock_key_t key;
833
834 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
835 POINTER_TO_UINT(virt), size,
836 CONFIG_MMU_PAGE_SIZE);
837 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
838 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
839 "wraparound for virtual address 0x%lx (size %zu)",
840 aligned_virt, aligned_size);
841
842 key = k_spin_lock(&z_mm_lock);
843
844 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
845 aligned_virt, aligned_size, addr_offset);
846
847 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
848 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
849 k_spin_unlock(&z_mm_lock, key);
850 }
851
852 /*
853 * Miscellaneous
854 */
855
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)856 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
857 uintptr_t addr, size_t size, size_t align)
858 {
859 size_t addr_offset;
860
861 /* The actual mapped region must be page-aligned. Round down the
862 * physical address and pad the region size appropriately
863 */
864 *aligned_addr = ROUND_DOWN(addr, align);
865 addr_offset = addr - *aligned_addr;
866 *aligned_size = ROUND_UP(size + addr_offset, align);
867
868 return addr_offset;
869 }
870
871 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)872 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
873 bool pin)
874 {
875 struct z_page_frame *pf;
876 uint8_t *addr;
877
878 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
879 CONFIG_MMU_PAGE_SIZE);
880 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
881 CONFIG_MMU_PAGE_SIZE);
882 size_t pinned_size = pinned_end - pinned_start;
883
884 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
885 {
886 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
887 frame_mapped_set(pf, addr);
888
889 if (pin) {
890 pf->flags |= Z_PAGE_FRAME_PINNED;
891 } else {
892 pf->flags &= ~Z_PAGE_FRAME_PINNED;
893 }
894 }
895 }
896 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
897
z_mem_manage_init(void)898 void z_mem_manage_init(void)
899 {
900 uintptr_t phys;
901 uint8_t *addr;
902 struct z_page_frame *pf;
903 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
904
905 free_page_frame_list_init();
906
907 ARG_UNUSED(addr);
908
909 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
910 /* If some page frames are unavailable for use as memory, arch
911 * code will mark Z_PAGE_FRAME_RESERVED in their flags
912 */
913 arch_reserved_pages_update();
914 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
915
916 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
917 /* All pages composing the Zephyr image are mapped at boot in a
918 * predictable way. This can change at runtime.
919 */
920 VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
921 {
922 pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
923 frame_mapped_set(pf, addr);
924
925 /* TODO: for now we pin the whole Zephyr image. Demand paging
926 * currently tested with anonymously-mapped pages which are not
927 * pinned.
928 *
929 * We will need to setup linker regions for a subset of kernel
930 * code/data pages which are pinned in memory and
931 * may not be evicted. This will contain critical CPU data
932 * structures, and any code used to perform page fault
933 * handling, page-ins, etc.
934 */
935 pf->flags |= Z_PAGE_FRAME_PINNED;
936 }
937 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
938
939 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
940 /* Pin the boot section to prevent it from being swapped out during
941 * boot process. Will be un-pinned once boot process completes.
942 */
943 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
944 #endif
945
946 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
947 /* Pin the page frames correspondng to the pinned symbols */
948 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
949 #endif
950
951 /* Any remaining pages that aren't mapped, reserved, or pinned get
952 * added to the free pages list
953 */
954 Z_PAGE_FRAME_FOREACH(phys, pf) {
955 if (z_page_frame_is_available(pf)) {
956 free_page_frame_list_put(pf);
957 }
958 }
959 LOG_DBG("free page frames: %zu", z_free_page_count);
960
961 #ifdef CONFIG_DEMAND_PAGING
962 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
963 z_paging_histogram_init();
964 #endif
965 k_mem_paging_backing_store_init();
966 k_mem_paging_eviction_init();
967 #endif
968 #if __ASSERT_ON
969 page_frames_initialized = true;
970 #endif
971 k_spin_unlock(&z_mm_lock, key);
972
973 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
974 /* If BSS section is not present in memory at boot,
975 * it would not have been cleared. This needs to be
976 * done now since paging mechanism has been initialized
977 * and the BSS pages can be brought into physical
978 * memory to be cleared.
979 */
980 z_bss_zero();
981 #endif
982 }
983
z_mem_manage_boot_finish(void)984 void z_mem_manage_boot_finish(void)
985 {
986 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
987 /* At the end of boot process, unpin the boot sections
988 * as they don't need to be in memory all the time anymore.
989 */
990 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
991 #endif
992 }
993
994 #ifdef CONFIG_DEMAND_PAGING
995
996 #ifdef CONFIG_DEMAND_PAGING_STATS
997 struct k_mem_paging_stats_t paging_stats;
998 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
999 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1000 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1001 #endif
1002
do_backing_store_page_in(uintptr_t location)1003 static inline void do_backing_store_page_in(uintptr_t location)
1004 {
1005 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1006 uint32_t time_diff;
1007
1008 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1009 timing_t time_start, time_end;
1010
1011 time_start = timing_counter_get();
1012 #else
1013 uint32_t time_start;
1014
1015 time_start = k_cycle_get_32();
1016 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1017 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1018
1019 k_mem_paging_backing_store_page_in(location);
1020
1021 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1022 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1023 time_end = timing_counter_get();
1024 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1025 #else
1026 time_diff = k_cycle_get_32() - time_start;
1027 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1028
1029 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1030 time_diff);
1031 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1032 }
1033
do_backing_store_page_out(uintptr_t location)1034 static inline void do_backing_store_page_out(uintptr_t location)
1035 {
1036 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1037 uint32_t time_diff;
1038
1039 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1040 timing_t time_start, time_end;
1041
1042 time_start = timing_counter_get();
1043 #else
1044 uint32_t time_start;
1045
1046 time_start = k_cycle_get_32();
1047 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1048 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1049
1050 k_mem_paging_backing_store_page_out(location);
1051
1052 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1053 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1054 time_end = timing_counter_get();
1055 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1056 #else
1057 time_diff = k_cycle_get_32() - time_start;
1058 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1059
1060 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1061 time_diff);
1062 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1063 }
1064
1065 /* Current implementation relies on interrupt locking to any prevent page table
1066 * access, which falls over if other CPUs are active. Addressing this is not
1067 * as simple as using spinlocks as regular memory reads/writes constitute
1068 * "access" in this sense.
1069 *
1070 * Current needs for demand paging are on uniprocessor systems.
1071 */
1072 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
1073
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1074 static void virt_region_foreach(void *addr, size_t size,
1075 void (*func)(void *))
1076 {
1077 z_mem_assert_virtual_region(addr, size);
1078
1079 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1080 func((uint8_t *)addr + offset);
1081 }
1082 }
1083
1084 /*
1085 * Perform some preparatory steps before paging out. The provided page frame
1086 * must be evicted to the backing store immediately after this is called
1087 * with a call to k_mem_paging_backing_store_page_out() if it contains
1088 * a data page.
1089 *
1090 * - Map page frame to scratch area if requested. This always is true if we're
1091 * doing a page fault, but is only set on manual evictions if the page is
1092 * dirty.
1093 * - If mapped:
1094 * - obtain backing store location and populate location parameter
1095 * - Update page tables with location
1096 * - Mark page frame as busy
1097 *
1098 * Returns -ENOMEM if the backing store is full
1099 */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1100 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
1101 bool page_fault, uintptr_t *location_ptr)
1102 {
1103 uintptr_t phys;
1104 int ret;
1105 bool dirty = *dirty_ptr;
1106
1107 phys = z_page_frame_to_phys(pf);
1108 __ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1109 phys);
1110
1111 /* If the backing store doesn't have a copy of the page, even if it
1112 * wasn't modified, treat as dirty. This can happen for a few
1113 * reasons:
1114 * 1) Page has never been swapped out before, and the backing store
1115 * wasn't pre-populated with this data page.
1116 * 2) Page was swapped out before, but the page contents were not
1117 * preserved after swapping back in.
1118 * 3) Page contents were preserved when swapped back in, but were later
1119 * evicted from the backing store to make room for other evicted
1120 * pages.
1121 */
1122 if (z_page_frame_is_mapped(pf)) {
1123 dirty = dirty || !z_page_frame_is_backed(pf);
1124 }
1125
1126 if (dirty || page_fault) {
1127 arch_mem_scratch(phys);
1128 }
1129
1130 if (z_page_frame_is_mapped(pf)) {
1131 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1132 page_fault);
1133 if (ret != 0) {
1134 LOG_ERR("out of backing store memory");
1135 return -ENOMEM;
1136 }
1137 arch_mem_page_out(pf->addr, *location_ptr);
1138 } else {
1139 /* Shouldn't happen unless this function is mis-used */
1140 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1141 }
1142 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1143 /* Mark as busy so that z_page_frame_is_evictable() returns false */
1144 __ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1145 phys);
1146 pf->flags |= Z_PAGE_FRAME_BUSY;
1147 #endif
1148 /* Update dirty parameter, since we set to true if it wasn't backed
1149 * even if otherwise clean
1150 */
1151 *dirty_ptr = dirty;
1152
1153 return 0;
1154 }
1155
do_mem_evict(void * addr)1156 static int do_mem_evict(void *addr)
1157 {
1158 bool dirty;
1159 struct z_page_frame *pf;
1160 uintptr_t location;
1161 int key, ret;
1162 uintptr_t flags, phys;
1163
1164 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1165 __ASSERT(!k_is_in_isr(),
1166 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1167 __func__);
1168 k_sched_lock();
1169 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1170 key = irq_lock();
1171 flags = arch_page_info_get(addr, &phys, false);
1172 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1173 "address %p isn't mapped", addr);
1174 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1175 /* Un-mapped or already evicted. Nothing to do */
1176 ret = 0;
1177 goto out;
1178 }
1179
1180 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1181 pf = z_phys_to_page_frame(phys);
1182 __ASSERT(pf->addr == addr, "page frame address mismatch");
1183 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1184 if (ret != 0) {
1185 goto out;
1186 }
1187
1188 __ASSERT(ret == 0, "failed to prepare page frame");
1189 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1190 irq_unlock(key);
1191 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1192 if (dirty) {
1193 do_backing_store_page_out(location);
1194 }
1195 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1196 key = irq_lock();
1197 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1198 page_frame_free_locked(pf);
1199 out:
1200 irq_unlock(key);
1201 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1202 k_sched_unlock();
1203 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1204 return ret;
1205 }
1206
k_mem_page_out(void * addr,size_t size)1207 int k_mem_page_out(void *addr, size_t size)
1208 {
1209 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1210 addr);
1211 z_mem_assert_virtual_region(addr, size);
1212
1213 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1214 void *pos = (uint8_t *)addr + offset;
1215 int ret;
1216
1217 ret = do_mem_evict(pos);
1218 if (ret != 0) {
1219 return ret;
1220 }
1221 }
1222
1223 return 0;
1224 }
1225
z_page_frame_evict(uintptr_t phys)1226 int z_page_frame_evict(uintptr_t phys)
1227 {
1228 int key, ret;
1229 struct z_page_frame *pf;
1230 bool dirty;
1231 uintptr_t flags;
1232 uintptr_t location;
1233
1234 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1235 __func__, phys);
1236
1237 /* Implementation is similar to do_page_fault() except there is no
1238 * data page to page-in, see comments in that function.
1239 */
1240
1241 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1242 __ASSERT(!k_is_in_isr(),
1243 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1244 __func__);
1245 k_sched_lock();
1246 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1247 key = irq_lock();
1248 pf = z_phys_to_page_frame(phys);
1249 if (!z_page_frame_is_mapped(pf)) {
1250 /* Nothing to do, free page */
1251 ret = 0;
1252 goto out;
1253 }
1254 flags = arch_page_info_get(pf->addr, NULL, false);
1255 /* Shouldn't ever happen */
1256 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1257 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1258 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1259 if (ret != 0) {
1260 goto out;
1261 }
1262
1263 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1264 irq_unlock(key);
1265 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1266 if (dirty) {
1267 do_backing_store_page_out(location);
1268 }
1269 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1270 key = irq_lock();
1271 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1272 page_frame_free_locked(pf);
1273 out:
1274 irq_unlock(key);
1275 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1276 k_sched_unlock();
1277 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1278 return ret;
1279 }
1280
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1281 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1282 int key)
1283 {
1284 #ifdef CONFIG_DEMAND_PAGING_STATS
1285 bool is_irq_unlocked = arch_irq_unlocked(key);
1286
1287 paging_stats.pagefaults.cnt++;
1288
1289 if (is_irq_unlocked) {
1290 paging_stats.pagefaults.irq_unlocked++;
1291 } else {
1292 paging_stats.pagefaults.irq_locked++;
1293 }
1294
1295 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1296 faulting_thread->paging_stats.pagefaults.cnt++;
1297
1298 if (is_irq_unlocked) {
1299 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1300 } else {
1301 faulting_thread->paging_stats.pagefaults.irq_locked++;
1302 }
1303 #else
1304 ARG_UNUSED(faulting_thread);
1305 #endif
1306
1307 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1308 if (k_is_in_isr()) {
1309 paging_stats.pagefaults.in_isr++;
1310
1311 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1312 faulting_thread->paging_stats.pagefaults.in_isr++;
1313 #endif
1314 }
1315 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1316 #endif /* CONFIG_DEMAND_PAGING_STATS */
1317 }
1318
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1319 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1320 bool dirty)
1321 {
1322 #ifdef CONFIG_DEMAND_PAGING_STATS
1323 if (dirty) {
1324 paging_stats.eviction.dirty++;
1325 } else {
1326 paging_stats.eviction.clean++;
1327 }
1328 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1329 if (dirty) {
1330 faulting_thread->paging_stats.eviction.dirty++;
1331 } else {
1332 faulting_thread->paging_stats.eviction.clean++;
1333 }
1334 #else
1335 ARG_UNUSED(faulting_thread);
1336 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1337 #endif /* CONFIG_DEMAND_PAGING_STATS */
1338 }
1339
do_eviction_select(bool * dirty)1340 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1341 {
1342 struct z_page_frame *pf;
1343
1344 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1345 uint32_t time_diff;
1346
1347 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1348 timing_t time_start, time_end;
1349
1350 time_start = timing_counter_get();
1351 #else
1352 uint32_t time_start;
1353
1354 time_start = k_cycle_get_32();
1355 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1356 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1357
1358 pf = k_mem_paging_eviction_select(dirty);
1359
1360 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1361 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1362 time_end = timing_counter_get();
1363 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1364 #else
1365 time_diff = k_cycle_get_32() - time_start;
1366 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1367
1368 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1369 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1370
1371 return pf;
1372 }
1373
do_page_fault(void * addr,bool pin)1374 static bool do_page_fault(void *addr, bool pin)
1375 {
1376 struct z_page_frame *pf;
1377 int key, ret;
1378 uintptr_t page_in_location, page_out_location;
1379 enum arch_page_location status;
1380 bool result;
1381 bool dirty = false;
1382 struct k_thread *faulting_thread = _current_cpu->current;
1383
1384 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1385 addr);
1386
1387 LOG_DBG("page fault at %p", addr);
1388
1389 /*
1390 * TODO: Add performance accounting:
1391 * - k_mem_paging_eviction_select() metrics
1392 * * periodic timer execution time histogram (if implemented)
1393 */
1394
1395 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1396 /* We lock the scheduler so that other threads are never scheduled
1397 * during the page-in/out operation.
1398 *
1399 * We do however re-enable interrupts during the page-in/page-out
1400 * operation iff interrupts were enabled when the exception was taken;
1401 * in this configuration page faults in an ISR are a bug; all their
1402 * code/data must be pinned.
1403 *
1404 * If interrupts were disabled when the exception was taken, the
1405 * arch code is responsible for keeping them that way when entering
1406 * this function.
1407 *
1408 * If this is not enabled, then interrupts are always locked for the
1409 * entire operation. This is far worse for system interrupt latency
1410 * but requires less pinned pages and ISRs may also take page faults.
1411 *
1412 * Support for allowing k_mem_paging_backing_store_page_out() and
1413 * k_mem_paging_backing_store_page_in() to also sleep and allow
1414 * other threads to run (such as in the case where the transfer is
1415 * async DMA) is not implemented. Even if limited to thread context,
1416 * arbitrary memory access triggering exceptions that put a thread to
1417 * sleep on a contended page fault operation will break scheduling
1418 * assumptions of cooperative threads or threads that implement
1419 * crticial sections with spinlocks or disabling IRQs.
1420 */
1421 k_sched_lock();
1422 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1423 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1424
1425 key = irq_lock();
1426 status = arch_page_location_get(addr, &page_in_location);
1427 if (status == ARCH_PAGE_LOCATION_BAD) {
1428 /* Return false to treat as a fatal error */
1429 result = false;
1430 goto out;
1431 }
1432 result = true;
1433
1434 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1435 if (pin) {
1436 /* It's a physical memory address */
1437 uintptr_t phys = page_in_location;
1438
1439 pf = z_phys_to_page_frame(phys);
1440 pf->flags |= Z_PAGE_FRAME_PINNED;
1441 }
1442
1443 /* This if-block is to pin the page if it is
1444 * already present in physical memory. There is
1445 * no need to go through the following code to
1446 * pull in the data pages. So skip to the end.
1447 */
1448 goto out;
1449 }
1450 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1451 "unexpected status value %d", status);
1452
1453 paging_stats_faults_inc(faulting_thread, key);
1454
1455 pf = free_page_frame_list_get();
1456 if (pf == NULL) {
1457 /* Need to evict a page frame */
1458 pf = do_eviction_select(&dirty);
1459 __ASSERT(pf != NULL, "failed to get a page frame");
1460 LOG_DBG("evicting %p at 0x%lx", pf->addr,
1461 z_page_frame_to_phys(pf));
1462
1463 paging_stats_eviction_inc(faulting_thread, dirty);
1464 }
1465 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1466 __ASSERT(ret == 0, "failed to prepare page frame");
1467
1468 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1469 irq_unlock(key);
1470 /* Interrupts are now unlocked if they were not locked when we entered
1471 * this function, and we may service ISRs. The scheduler is still
1472 * locked.
1473 */
1474 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1475 if (dirty) {
1476 do_backing_store_page_out(page_out_location);
1477 }
1478 do_backing_store_page_in(page_in_location);
1479
1480 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1481 key = irq_lock();
1482 pf->flags &= ~Z_PAGE_FRAME_BUSY;
1483 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1484 if (pin) {
1485 pf->flags |= Z_PAGE_FRAME_PINNED;
1486 }
1487 pf->flags |= Z_PAGE_FRAME_MAPPED;
1488 pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1489 & ~(CONFIG_MMU_PAGE_SIZE - 1));
1490
1491 arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1492 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1493 out:
1494 irq_unlock(key);
1495 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1496 k_sched_unlock();
1497 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1498
1499 return result;
1500 }
1501
do_page_in(void * addr)1502 static void do_page_in(void *addr)
1503 {
1504 bool ret;
1505
1506 ret = do_page_fault(addr, false);
1507 __ASSERT(ret, "unmapped memory address %p", addr);
1508 (void)ret;
1509 }
1510
k_mem_page_in(void * addr,size_t size)1511 void k_mem_page_in(void *addr, size_t size)
1512 {
1513 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1514 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1515 __func__);
1516 virt_region_foreach(addr, size, do_page_in);
1517 }
1518
do_mem_pin(void * addr)1519 static void do_mem_pin(void *addr)
1520 {
1521 bool ret;
1522
1523 ret = do_page_fault(addr, true);
1524 __ASSERT(ret, "unmapped memory address %p", addr);
1525 (void)ret;
1526 }
1527
k_mem_pin(void * addr,size_t size)1528 void k_mem_pin(void *addr, size_t size)
1529 {
1530 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1531 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1532 __func__);
1533 virt_region_foreach(addr, size, do_mem_pin);
1534 }
1535
z_page_fault(void * addr)1536 bool z_page_fault(void *addr)
1537 {
1538 return do_page_fault(addr, false);
1539 }
1540
do_mem_unpin(void * addr)1541 static void do_mem_unpin(void *addr)
1542 {
1543 struct z_page_frame *pf;
1544 unsigned int key;
1545 uintptr_t flags, phys;
1546
1547 key = irq_lock();
1548 flags = arch_page_info_get(addr, &phys, false);
1549 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1550 "invalid data page at %p", addr);
1551 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1552 pf = z_phys_to_page_frame(phys);
1553 pf->flags &= ~Z_PAGE_FRAME_PINNED;
1554 }
1555 irq_unlock(key);
1556 }
1557
k_mem_unpin(void * addr,size_t size)1558 void k_mem_unpin(void *addr, size_t size)
1559 {
1560 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1561 addr);
1562 virt_region_foreach(addr, size, do_mem_unpin);
1563 }
1564
1565 #endif /* CONFIG_DEMAND_PAGING */
1566