1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/internal/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/sys/check.h>
20 #include <zephyr/sys/math_extras.h>
21 #include <zephyr/timing/timing.h>
22 #include <zephyr/logging/log.h>
23 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
24
25 #ifdef CONFIG_DEMAND_PAGING
26 #include <zephyr/kernel/mm/demand_paging.h>
27 #endif /* CONFIG_DEMAND_PAGING */
28
29 /*
30 * General terminology:
31 * - A page frame is a page-sized physical memory region in RAM. It is a
32 * container where a data page may be placed. It is always referred to by
33 * physical address. We have a convention of using uintptr_t for physical
34 * addresses. We instantiate a struct k_mem_page_frame to store metadata for
35 * every page frame.
36 *
37 * - A data page is a page-sized region of data. It may exist in a page frame,
38 * or be paged out to some backing store. Its location can always be looked
39 * up in the CPU's page tables (or equivalent) by virtual address.
40 * The data type will always be void * or in some cases uint8_t * when we
41 * want to do pointer arithmetic.
42 */
43
44 /* Spinlock to protect any globals in this file and serialize page table
45 * updates in arch code
46 */
47 struct k_spinlock z_mm_lock;
48
49 /*
50 * General page frame management
51 */
52
53 /* Database of all RAM page frames */
54 struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES];
55
56 #if __ASSERT_ON
57 /* Indicator that k_mem_page_frames has been initialized, many of these APIs do
58 * not work before POST_KERNEL
59 */
60 static bool page_frames_initialized;
61 #endif
62
63 /* Add colors to page table dumps to indicate mapping type */
64 #define COLOR_PAGE_FRAMES 1
65
66 #if COLOR_PAGE_FRAMES
67 #define ANSI_DEFAULT "\x1B" "[0m"
68 #define ANSI_RED "\x1B" "[1;31m"
69 #define ANSI_GREEN "\x1B" "[1;32m"
70 #define ANSI_YELLOW "\x1B" "[1;33m"
71 #define ANSI_BLUE "\x1B" "[1;34m"
72 #define ANSI_MAGENTA "\x1B" "[1;35m"
73 #define ANSI_CYAN "\x1B" "[1;36m"
74 #define ANSI_GREY "\x1B" "[1;90m"
75
76 #define COLOR(x) printk(_CONCAT(ANSI_, x))
77 #else
78 #define COLOR(x) do { } while (false)
79 #endif /* COLOR_PAGE_FRAMES */
80
81 /* LCOV_EXCL_START */
page_frame_dump(struct k_mem_page_frame * pf)82 static void page_frame_dump(struct k_mem_page_frame *pf)
83 {
84 if (k_mem_page_frame_is_free(pf)) {
85 COLOR(GREY);
86 printk("-");
87 } else if (k_mem_page_frame_is_reserved(pf)) {
88 COLOR(CYAN);
89 printk("R");
90 } else if (k_mem_page_frame_is_busy(pf)) {
91 COLOR(MAGENTA);
92 printk("B");
93 } else if (k_mem_page_frame_is_pinned(pf)) {
94 COLOR(YELLOW);
95 printk("P");
96 } else if (k_mem_page_frame_is_available(pf)) {
97 COLOR(GREY);
98 printk(".");
99 } else if (k_mem_page_frame_is_mapped(pf)) {
100 COLOR(DEFAULT);
101 printk("M");
102 } else {
103 COLOR(RED);
104 printk("?");
105 }
106 }
107
k_mem_page_frames_dump(void)108 void k_mem_page_frames_dump(void)
109 {
110 int column = 0;
111
112 __ASSERT(page_frames_initialized, "%s called too early", __func__);
113 printk("Physical memory from 0x%lx to 0x%lx\n",
114 K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END);
115
116 for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) {
117 struct k_mem_page_frame *pf = &k_mem_page_frames[i];
118
119 page_frame_dump(pf);
120
121 column++;
122 if (column == 64) {
123 column = 0;
124 printk("\n");
125 }
126 }
127
128 COLOR(DEFAULT);
129 if (column != 0) {
130 printk("\n");
131 }
132 }
133 /* LCOV_EXCL_STOP */
134
135 #define VIRT_FOREACH(_base, _size, _pos) \
136 for ((_pos) = (_base); \
137 (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
138
139 #define PHYS_FOREACH(_base, _size, _pos) \
140 for ((_pos) = (_base); \
141 (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
142
143
144 /*
145 * Virtual address space management
146 *
147 * Call all of these functions with z_mm_lock held.
148 *
149 * Overall virtual memory map: When the kernel starts, it resides in
150 * virtual memory in the region K_MEM_KERNEL_VIRT_START to
151 * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
152 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
153 *
154 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
155 * but have a mapping for all RAM in place. This is for special architectural
156 * purposes and does not otherwise affect page frame accounting or flags;
157 * the only guarantee is that such RAM mapping outside of the Zephyr image
158 * won't be disturbed by subsequent memory mapping calls.
159 *
160 * +--------------+ <- K_MEM_VIRT_RAM_START
161 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
162 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
163 * | Mapping for |
164 * | main kernel |
165 * | image |
166 * | |
167 * | |
168 * +--------------+ <- K_MEM_VM_FREE_START
169 * | |
170 * | Unused, |
171 * | Available VM |
172 * | |
173 * |..............| <- mapping_pos (grows downward as more mappings are made)
174 * | Mapping |
175 * +--------------+
176 * | Mapping |
177 * +--------------+
178 * | ... |
179 * +--------------+
180 * | Mapping |
181 * +--------------+ <- mappings start here
182 * | Reserved | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED
183 * +--------------+ <- K_MEM_VIRT_RAM_END
184 */
185
186 /* Bitmap of virtual addresses where one bit corresponds to one page.
187 * This is being used for virt_region_alloc() to figure out which
188 * region of virtual addresses can be used for memory mapping.
189 *
190 * Note that bit #0 is the highest address so that allocation is
191 * done in reverse from highest address.
192 */
193 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
194 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
195
196 static bool virt_region_inited;
197
198 #define Z_VIRT_REGION_START_ADDR K_MEM_VM_FREE_START
199 #define Z_VIRT_REGION_END_ADDR (K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED)
200
virt_from_bitmap_offset(size_t offset,size_t size)201 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
202 {
203 return POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
204 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
205 }
206
virt_to_bitmap_offset(void * vaddr,size_t size)207 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
208 {
209 return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
210 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
211 }
212
virt_region_init(void)213 static void virt_region_init(void)
214 {
215 size_t offset, num_bits;
216
217 /* There are regions where we should never map via
218 * k_mem_map() and k_mem_map_phys_bare(). Mark them as
219 * already allocated so they will never be used.
220 */
221
222 if (K_MEM_VM_RESERVED > 0) {
223 /* Mark reserved region at end of virtual address space */
224 num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
225 (void)sys_bitarray_set_region(&virt_region_bitmap,
226 num_bits, 0);
227 }
228
229 /* Mark all bits up to Z_FREE_VM_START as allocated */
230 num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START)
231 - POINTER_TO_UINT(K_MEM_VIRT_RAM_START);
232 offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits);
233 num_bits /= CONFIG_MMU_PAGE_SIZE;
234 (void)sys_bitarray_set_region(&virt_region_bitmap,
235 num_bits, offset);
236
237 virt_region_inited = true;
238 }
239
virt_region_free(void * vaddr,size_t size)240 static void virt_region_free(void *vaddr, size_t size)
241 {
242 size_t offset, num_bits;
243 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
244
245 if (unlikely(!virt_region_inited)) {
246 virt_region_init();
247 }
248
249 #ifndef CONFIG_KERNEL_DIRECT_MAP
250 /* Without the need to support K_MEM_DIRECT_MAP, the region must be
251 * able to be represented in the bitmap. So this case is
252 * simple.
253 */
254
255 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
256 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
257 "invalid virtual address region %p (%zu)", vaddr_u8, size);
258 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
259 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
260 return;
261 }
262
263 offset = virt_to_bitmap_offset(vaddr, size);
264 num_bits = size / CONFIG_MMU_PAGE_SIZE;
265 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
266 #else /* !CONFIG_KERNEL_DIRECT_MAP */
267 /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
268 * memory space, wholly within it, or overlap partially.
269 * So additional processing is needed to make sure we only
270 * mark the pages within the bitmap.
271 */
272 if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
273 (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
274 (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
275 ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
276 uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
277 uint8_t *adjusted_end = MIN(vaddr_u8 + size,
278 Z_VIRT_REGION_END_ADDR);
279 size_t adjusted_sz = adjusted_end - adjusted_start;
280
281 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
282 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
283 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
284 }
285 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
286 }
287
virt_region_alloc(size_t size,size_t align)288 static void *virt_region_alloc(size_t size, size_t align)
289 {
290 uintptr_t dest_addr;
291 size_t alloc_size;
292 size_t offset;
293 size_t num_bits;
294 int ret;
295
296 if (unlikely(!virt_region_inited)) {
297 virt_region_init();
298 }
299
300 /* Possibly request more pages to ensure we can get an aligned virtual address */
301 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
302 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
303 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
304 if (ret != 0) {
305 LOG_ERR("insufficient virtual address space (requested %zu)",
306 size);
307 return NULL;
308 }
309
310 /* Remember that bit #0 in bitmap corresponds to the highest
311 * virtual address. So here we need to go downwards (backwards?)
312 * to get the starting address of the allocated region.
313 */
314 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
315
316 if (alloc_size > size) {
317 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
318
319 /* Here is the memory organization when trying to get an aligned
320 * virtual address:
321 *
322 * +--------------+ <- K_MEM_VIRT_RAM_START
323 * | Undefined VM |
324 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
325 * | Mapping for |
326 * | main kernel |
327 * | image |
328 * | |
329 * | |
330 * +--------------+ <- K_MEM_VM_FREE_START
331 * | ... |
332 * +==============+ <- dest_addr
333 * | Unused |
334 * |..............| <- aligned_dest_addr
335 * | |
336 * | Aligned |
337 * | Mapping |
338 * | |
339 * |..............| <- aligned_dest_addr + size
340 * | Unused |
341 * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size
342 * | ... |
343 * +--------------+
344 * | Mapping |
345 * +--------------+
346 * | Reserved |
347 * +--------------+ <- K_MEM_VIRT_RAM_END
348 */
349
350 /* Free the two unused regions */
351 virt_region_free(UINT_TO_POINTER(dest_addr),
352 aligned_dest_addr - dest_addr);
353 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
354 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
355 (dest_addr + alloc_size) - (aligned_dest_addr + size));
356 }
357
358 dest_addr = aligned_dest_addr;
359 }
360
361 /* Need to make sure this does not step into kernel memory */
362 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
363 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
364 return NULL;
365 }
366
367 return UINT_TO_POINTER(dest_addr);
368 }
369
370 /*
371 * Free page frames management
372 *
373 * Call all of these functions with z_mm_lock held.
374 */
375
376 /* Linked list of unused and available page frames.
377 *
378 * TODO: This is very simple and treats all free page frames as being equal.
379 * However, there are use-cases to consolidate free pages such that entire
380 * SRAM banks can be switched off to save power, and so obtaining free pages
381 * may require a more complex ontology which prefers page frames in RAM banks
382 * which are still active.
383 *
384 * This implies in the future there may be multiple slists managing physical
385 * pages. Each page frame will still just have one snode link.
386 */
387 static sys_sflist_t free_page_frame_list;
388
389 /* Number of unused and available free page frames.
390 * This information may go stale immediately.
391 */
392 static size_t z_free_page_count;
393
394 #define PF_ASSERT(pf, expr, fmt, ...) \
395 __ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \
396 ##__VA_ARGS__)
397
398 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)399 static struct k_mem_page_frame *free_page_frame_list_get(void)
400 {
401 sys_sfnode_t *node;
402 struct k_mem_page_frame *pf = NULL;
403
404 node = sys_sflist_get(&free_page_frame_list);
405 if (node != NULL) {
406 z_free_page_count--;
407 pf = CONTAINER_OF(node, struct k_mem_page_frame, node);
408 PF_ASSERT(pf, k_mem_page_frame_is_free(pf),
409 "on free list but not free");
410 pf->va_and_flags = 0;
411 }
412
413 return pf;
414 }
415
416 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct k_mem_page_frame * pf)417 static void free_page_frame_list_put(struct k_mem_page_frame *pf)
418 {
419 PF_ASSERT(pf, k_mem_page_frame_is_available(pf),
420 "unavailable page put on free list");
421
422 sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE);
423 sys_sflist_append(&free_page_frame_list, &pf->node);
424 z_free_page_count++;
425 }
426
free_page_frame_list_init(void)427 static void free_page_frame_list_init(void)
428 {
429 sys_sflist_init(&free_page_frame_list);
430 }
431
page_frame_free_locked(struct k_mem_page_frame * pf)432 static void page_frame_free_locked(struct k_mem_page_frame *pf)
433 {
434 pf->va_and_flags = 0;
435 free_page_frame_list_put(pf);
436 }
437
438 /*
439 * Memory Mapping
440 */
441
442 /* Called after the frame is mapped in the arch layer, to update our
443 * local ontology (and do some assertions while we're at it)
444 */
frame_mapped_set(struct k_mem_page_frame * pf,void * addr)445 static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr)
446 {
447 PF_ASSERT(pf, !k_mem_page_frame_is_free(pf),
448 "attempted to map a page frame on the free list");
449 PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf),
450 "attempted to map a reserved page frame");
451
452 /* We do allow multiple mappings for pinned page frames
453 * since we will never need to reverse map them.
454 * This is uncommon, use-cases are for things like the
455 * Zephyr equivalent of VSDOs
456 */
457 PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf),
458 "non-pinned and already mapped to %p",
459 k_mem_page_frame_to_virt(pf));
460
461 uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;
462 uintptr_t va = (uintptr_t)addr & ~flags_mask;
463
464 pf->va_and_flags &= flags_mask;
465 pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED;
466 }
467
468 /* LCOV_EXCL_START */
469 /* Go through page frames to find the physical address mapped
470 * by a virtual address.
471 *
472 * @param[in] virt Virtual Address
473 * @param[out] phys Physical address mapped to the input virtual address
474 * if such mapping exists.
475 *
476 * @retval 0 if mapping is found and valid
477 * @retval -EFAULT if virtual address is not mapped
478 */
virt_to_page_frame(void * virt,uintptr_t * phys)479 static int virt_to_page_frame(void *virt, uintptr_t *phys)
480 {
481 uintptr_t paddr;
482 struct k_mem_page_frame *pf;
483 int ret = -EFAULT;
484
485 K_MEM_PAGE_FRAME_FOREACH(paddr, pf) {
486 if (k_mem_page_frame_is_mapped(pf)) {
487 if (virt == k_mem_page_frame_to_virt(pf)) {
488 ret = 0;
489 if (phys != NULL) {
490 *phys = k_mem_page_frame_to_phys(pf);
491 }
492 break;
493 }
494 }
495 }
496
497 return ret;
498 }
499 /* LCOV_EXCL_STOP */
500
501 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
502
503 #ifdef CONFIG_DEMAND_PAGING
504 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
505 bool page_in, uintptr_t *location_ptr);
506
507 static inline void do_backing_store_page_in(uintptr_t location);
508 static inline void do_backing_store_page_out(uintptr_t location);
509 #endif /* CONFIG_DEMAND_PAGING */
510
511 /* Allocate a free page frame, and map it to a specified virtual address
512 *
513 * TODO: Add optional support for copy-on-write mappings to a zero page instead
514 * of allocating, in which case page frames will be allocated lazily as
515 * the mappings to the zero page get touched. This will avoid expensive
516 * page-ins as memory is mapped and physical RAM or backing store storage will
517 * not be used if the mapped memory is unused. The cost is an empty physical
518 * page of zeroes.
519 */
map_anon_page(void * addr,uint32_t flags)520 static int map_anon_page(void *addr, uint32_t flags)
521 {
522 struct k_mem_page_frame *pf;
523 uintptr_t phys;
524 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
525
526 pf = free_page_frame_list_get();
527 if (pf == NULL) {
528 #ifdef CONFIG_DEMAND_PAGING
529 uintptr_t location;
530 bool dirty;
531 int ret;
532
533 pf = k_mem_paging_eviction_select(&dirty);
534 __ASSERT(pf != NULL, "failed to get a page frame");
535 LOG_DBG("evicting %p at 0x%lx",
536 k_mem_page_frame_to_virt(pf),
537 k_mem_page_frame_to_phys(pf));
538 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
539 if (ret != 0) {
540 return -ENOMEM;
541 }
542 if (dirty) {
543 do_backing_store_page_out(location);
544 }
545 pf->va_and_flags = 0;
546 #else
547 return -ENOMEM;
548 #endif /* CONFIG_DEMAND_PAGING */
549 }
550
551 phys = k_mem_page_frame_to_phys(pf);
552 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
553
554 if (lock) {
555 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
556 }
557 frame_mapped_set(pf, addr);
558 #ifdef CONFIG_DEMAND_PAGING
559 if (!lock) {
560 k_mem_paging_eviction_add(pf);
561 }
562 #endif
563
564 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
565
566 return 0;
567 }
568
k_mem_map_phys_guard(uintptr_t phys,size_t size,uint32_t flags,bool is_anon)569 void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon)
570 {
571 uint8_t *dst;
572 size_t total_size;
573 int ret;
574 k_spinlock_key_t key;
575 uint8_t *pos;
576 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
577
578 __ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
579 ((flags & K_MEM_MAP_UNINIT) != 0U)),
580 "user access to anonymous uninitialized pages is forbidden");
581 __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
582 "unaligned size %zu passed to %s", size, __func__);
583 __ASSERT(size != 0, "zero sized memory mapping");
584 __ASSERT(!is_anon || (is_anon && page_frames_initialized),
585 "%s called too early", __func__);
586 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
587 "%s does not support explicit cache settings", __func__);
588
589 CHECKIF(size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) {
590 LOG_ERR("too large size %zu passed to %s", size, __func__);
591 return NULL;
592 }
593
594 key = k_spin_lock(&z_mm_lock);
595
596 /* Need extra for the guard pages (before and after) which we
597 * won't map.
598 */
599 total_size = size + (CONFIG_MMU_PAGE_SIZE * 2);
600
601 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
602 if (dst == NULL) {
603 /* Address space has no free region */
604 goto out;
605 }
606
607 /* Unmap both guard pages to make sure accessing them
608 * will generate fault.
609 */
610 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
611 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
612 CONFIG_MMU_PAGE_SIZE);
613
614 /* Skip over the "before" guard page in returned address. */
615 dst += CONFIG_MMU_PAGE_SIZE;
616
617 if (is_anon) {
618 /* Mapping from anonymous memory */
619 VIRT_FOREACH(dst, size, pos) {
620 ret = map_anon_page(pos, flags);
621
622 if (ret != 0) {
623 /* TODO: call k_mem_unmap(dst, pos - dst) when
624 * implemented in #28990 and release any guard virtual
625 * page as well.
626 */
627 dst = NULL;
628 goto out;
629 }
630 }
631 } else {
632 /* Mapping known physical memory.
633 *
634 * arch_mem_map() is a void function and does not return
635 * anything. Arch code usually uses ASSERT() to catch
636 * mapping errors. Assume this works correctly for now.
637 */
638 arch_mem_map(dst, phys, size, flags);
639 }
640
641 if (!uninit) {
642 /* If we later implement mappings to a copy-on-write
643 * zero page, won't need this step
644 */
645 memset(dst, 0, size);
646 }
647
648 out:
649 k_spin_unlock(&z_mm_lock, key);
650 return dst;
651 }
652
k_mem_unmap_phys_guard(void * addr,size_t size,bool is_anon)653 void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon)
654 {
655 uintptr_t phys;
656 uint8_t *pos;
657 struct k_mem_page_frame *pf;
658 k_spinlock_key_t key;
659 size_t total_size;
660 int ret;
661
662 /* Need space for the "before" guard page */
663 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
664
665 /* Make sure address range is still valid after accounting
666 * for two guard pages.
667 */
668 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
669 k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
670
671 key = k_spin_lock(&z_mm_lock);
672
673 /* Check if both guard pages are unmapped.
674 * Bail if not, as this is probably a region not mapped
675 * using k_mem_map().
676 */
677 pos = addr;
678 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
679 if (ret == 0) {
680 __ASSERT(ret == 0,
681 "%s: cannot find preceding guard page for (%p, %zu)",
682 __func__, addr, size);
683 goto out;
684 }
685
686 ret = arch_page_phys_get(pos + size, NULL);
687 if (ret == 0) {
688 __ASSERT(ret == 0,
689 "%s: cannot find succeeding guard page for (%p, %zu)",
690 __func__, addr, size);
691 goto out;
692 }
693
694 if (is_anon) {
695 /* Unmapping anonymous memory */
696 VIRT_FOREACH(addr, size, pos) {
697 #ifdef CONFIG_DEMAND_PAGING
698 enum arch_page_location status;
699 uintptr_t location;
700
701 status = arch_page_location_get(pos, &location);
702 switch (status) {
703 case ARCH_PAGE_LOCATION_PAGED_OUT:
704 /*
705 * No pf is associated with this mapping.
706 * Simply get rid of the MMU entry and free
707 * corresponding backing store.
708 */
709 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
710 k_mem_paging_backing_store_location_free(location);
711 continue;
712 case ARCH_PAGE_LOCATION_PAGED_IN:
713 /*
714 * The page is in memory but it may not be
715 * accessible in order to manage tracking
716 * of the ARCH_DATA_PAGE_ACCESSED flag
717 * meaning arch_page_phys_get() could fail.
718 * Still, we know the actual phys address.
719 */
720 phys = location;
721 ret = 0;
722 break;
723 default:
724 ret = arch_page_phys_get(pos, &phys);
725 break;
726 }
727 #else
728 ret = arch_page_phys_get(pos, &phys);
729 #endif
730 __ASSERT(ret == 0,
731 "%s: cannot unmap an unmapped address %p",
732 __func__, pos);
733 if (ret != 0) {
734 /* Found an address not mapped. Do not continue. */
735 goto out;
736 }
737
738 __ASSERT(k_mem_is_page_frame(phys),
739 "%s: 0x%lx is not a page frame", __func__, phys);
740 if (!k_mem_is_page_frame(phys)) {
741 /* Physical address has no corresponding page frame
742 * description in the page frame array.
743 * This should not happen. Do not continue.
744 */
745 goto out;
746 }
747
748 /* Grab the corresponding page frame from physical address */
749 pf = k_mem_phys_to_page_frame(phys);
750
751 __ASSERT(k_mem_page_frame_is_mapped(pf),
752 "%s: 0x%lx is not a mapped page frame", __func__, phys);
753 if (!k_mem_page_frame_is_mapped(pf)) {
754 /* Page frame is not marked mapped.
755 * This should not happen. Do not continue.
756 */
757 goto out;
758 }
759
760 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
761 #ifdef CONFIG_DEMAND_PAGING
762 if (!k_mem_page_frame_is_pinned(pf)) {
763 k_mem_paging_eviction_remove(pf);
764 }
765 #endif
766
767 /* Put the page frame back into free list */
768 page_frame_free_locked(pf);
769 }
770 } else {
771 /*
772 * Unmapping previous mapped memory with specific physical address.
773 *
774 * Note that we don't have to unmap the guard pages, as they should
775 * have been unmapped. We just need to unmapped the in-between
776 * region [addr, (addr + size)).
777 */
778 arch_mem_unmap(addr, size);
779 }
780
781 /* There are guard pages just before and after the mapped
782 * region. So we also need to free them from the bitmap.
783 */
784 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
785 total_size = size + (CONFIG_MMU_PAGE_SIZE * 2);
786 virt_region_free(pos, total_size);
787
788 out:
789 k_spin_unlock(&z_mm_lock, key);
790 }
791
k_mem_free_get(void)792 size_t k_mem_free_get(void)
793 {
794 size_t ret;
795 k_spinlock_key_t key;
796
797 __ASSERT(page_frames_initialized, "%s called too early", __func__);
798
799 key = k_spin_lock(&z_mm_lock);
800 #ifdef CONFIG_DEMAND_PAGING
801 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
802 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
803 } else {
804 ret = 0;
805 }
806 #else
807 ret = z_free_page_count;
808 #endif /* CONFIG_DEMAND_PAGING */
809 k_spin_unlock(&z_mm_lock, key);
810
811 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
812 }
813
814 /* Get the default virtual region alignment, here the default MMU page size
815 *
816 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
817 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
818 *
819 * @retval alignment to apply on the virtual address of this region
820 */
virt_region_align(uintptr_t phys,size_t size)821 static size_t virt_region_align(uintptr_t phys, size_t size)
822 {
823 ARG_UNUSED(phys);
824 ARG_UNUSED(size);
825
826 return CONFIG_MMU_PAGE_SIZE;
827 }
828
829 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
830
831 /* This may be called from arch early boot code before z_cstart() is invoked.
832 * Data will be copied and BSS zeroed, but this must not rely on any
833 * initialization functions being called prior to work correctly.
834 */
k_mem_map_phys_bare(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)835 void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
836 {
837 uintptr_t aligned_phys, addr_offset;
838 size_t aligned_size, align_boundary;
839 k_spinlock_key_t key;
840 uint8_t *dest_addr;
841 size_t num_bits;
842 size_t offset;
843
844 #ifndef CONFIG_KERNEL_DIRECT_MAP
845 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
846 #endif /* CONFIG_KERNEL_DIRECT_MAP */
847 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
848 phys, size,
849 CONFIG_MMU_PAGE_SIZE);
850 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
851 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
852 "wraparound for physical address 0x%lx (size %zu)",
853 aligned_phys, aligned_size);
854
855 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
856
857 key = k_spin_lock(&z_mm_lock);
858
859 if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
860 (flags & K_MEM_DIRECT_MAP)) {
861 dest_addr = (uint8_t *)aligned_phys;
862
863 /* Mark the region of virtual memory bitmap as used
864 * if the region overlaps the virtual memory space.
865 *
866 * Basically if either end of region is within
867 * virtual memory space, we need to mark the bits.
868 */
869
870 if (IN_RANGE(aligned_phys,
871 (uintptr_t)K_MEM_VIRT_RAM_START,
872 (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) ||
873 IN_RANGE(aligned_phys + aligned_size - 1,
874 (uintptr_t)K_MEM_VIRT_RAM_START,
875 (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) {
876 uint8_t *adjusted_start = MAX(dest_addr, K_MEM_VIRT_RAM_START);
877 uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
878 K_MEM_VIRT_RAM_END);
879 size_t adjusted_sz = adjusted_end - adjusted_start;
880
881 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
882 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
883 if (sys_bitarray_test_and_set_region(
884 &virt_region_bitmap, num_bits, offset, true))
885 goto fail;
886 }
887 } else {
888 /* Obtain an appropriately sized chunk of virtual memory */
889 dest_addr = virt_region_alloc(aligned_size, align_boundary);
890 if (!dest_addr) {
891 goto fail;
892 }
893 }
894
895 /* If this fails there's something amiss with virt_region_get */
896 __ASSERT((uintptr_t)dest_addr <
897 ((uintptr_t)dest_addr + (size - 1)),
898 "wraparound for virtual address %p (size %zu)",
899 dest_addr, size);
900
901 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
902 aligned_phys, aligned_size, flags, addr_offset);
903
904 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
905 k_spin_unlock(&z_mm_lock, key);
906
907 *virt_ptr = dest_addr + addr_offset;
908 return;
909 fail:
910 /* May re-visit this in the future, but for now running out of
911 * virtual address space or failing the arch_mem_map() call is
912 * an unrecoverable situation.
913 *
914 * Other problems not related to resource exhaustion we leave as
915 * assertions since they are clearly programming mistakes.
916 */
917 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
918 phys, size, flags);
919 k_panic();
920 }
921
k_mem_unmap_phys_bare(uint8_t * virt,size_t size)922 void k_mem_unmap_phys_bare(uint8_t *virt, size_t size)
923 {
924 uintptr_t aligned_virt, addr_offset;
925 size_t aligned_size;
926 k_spinlock_key_t key;
927
928 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
929 POINTER_TO_UINT(virt), size,
930 CONFIG_MMU_PAGE_SIZE);
931 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
932 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
933 "wraparound for virtual address 0x%lx (size %zu)",
934 aligned_virt, aligned_size);
935
936 key = k_spin_lock(&z_mm_lock);
937
938 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
939 aligned_virt, aligned_size, addr_offset);
940
941 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
942 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
943 k_spin_unlock(&z_mm_lock, key);
944 }
945
946 /*
947 * Miscellaneous
948 */
949
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)950 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
951 uintptr_t addr, size_t size, size_t align)
952 {
953 size_t addr_offset;
954
955 /* The actual mapped region must be page-aligned. Round down the
956 * physical address and pad the region size appropriately
957 */
958 *aligned_addr = ROUND_DOWN(addr, align);
959 addr_offset = addr - *aligned_addr;
960 *aligned_size = ROUND_UP(size + addr_offset, align);
961
962 return addr_offset;
963 }
964
965 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)966 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
967 bool pin)
968 {
969 struct k_mem_page_frame *pf;
970 uint8_t *addr;
971
972 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
973 CONFIG_MMU_PAGE_SIZE);
974 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
975 CONFIG_MMU_PAGE_SIZE);
976 size_t pinned_size = pinned_end - pinned_start;
977
978 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
979 {
980 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
981 frame_mapped_set(pf, addr);
982
983 if (pin) {
984 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
985 } else {
986 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
987 #ifdef CONFIG_DEMAND_PAGING
988 if (k_mem_page_frame_is_evictable(pf)) {
989 k_mem_paging_eviction_add(pf);
990 }
991 #endif
992 }
993 }
994 }
995 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
996
z_mem_manage_init(void)997 void z_mem_manage_init(void)
998 {
999 uintptr_t phys;
1000 uint8_t *addr;
1001 struct k_mem_page_frame *pf;
1002 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
1003
1004 free_page_frame_list_init();
1005
1006 ARG_UNUSED(addr);
1007
1008 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
1009 /* If some page frames are unavailable for use as memory, arch
1010 * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags
1011 */
1012 arch_reserved_pages_update();
1013 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
1014
1015 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1016 /* All pages composing the Zephyr image are mapped at boot in a
1017 * predictable way. This can change at runtime.
1018 */
1019 VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr)
1020 {
1021 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1022 frame_mapped_set(pf, addr);
1023
1024 /* TODO: for now we pin the whole Zephyr image. Demand paging
1025 * currently tested with anonymously-mapped pages which are not
1026 * pinned.
1027 *
1028 * We will need to setup linker regions for a subset of kernel
1029 * code/data pages which are pinned in memory and
1030 * may not be evicted. This will contain critical CPU data
1031 * structures, and any code used to perform page fault
1032 * handling, page-ins, etc.
1033 */
1034 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1035 }
1036 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1037
1038 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1039 /* Pin the boot section to prevent it from being swapped out during
1040 * boot process. Will be un-pinned once boot process completes.
1041 */
1042 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
1043 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1044
1045 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
1046 /* Pin the page frames correspondng to the pinned symbols */
1047 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
1048 #endif /* CONFIG_LINKER_USE_PINNED_SECTION */
1049
1050 /* Any remaining pages that aren't mapped, reserved, or pinned get
1051 * added to the free pages list
1052 */
1053 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1054 if (k_mem_page_frame_is_available(pf)) {
1055 free_page_frame_list_put(pf);
1056 }
1057 }
1058 LOG_DBG("free page frames: %zu", z_free_page_count);
1059
1060 #ifdef CONFIG_DEMAND_PAGING
1061 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1062 z_paging_histogram_init();
1063 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1064 k_mem_paging_backing_store_init();
1065 k_mem_paging_eviction_init();
1066 /* start tracking evictable page installed above if any */
1067 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1068 if (k_mem_page_frame_is_evictable(pf)) {
1069 k_mem_paging_eviction_add(pf);
1070 }
1071 }
1072 #endif /* CONFIG_DEMAND_PAGING */
1073 #if __ASSERT_ON
1074 page_frames_initialized = true;
1075 #endif
1076 k_spin_unlock(&z_mm_lock, key);
1077
1078 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1079 /* If BSS section is not present in memory at boot,
1080 * it would not have been cleared. This needs to be
1081 * done now since paging mechanism has been initialized
1082 * and the BSS pages can be brought into physical
1083 * memory to be cleared.
1084 */
1085 z_bss_zero();
1086 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1087 }
1088
z_mem_manage_boot_finish(void)1089 void z_mem_manage_boot_finish(void)
1090 {
1091 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1092 /* At the end of boot process, unpin the boot sections
1093 * as they don't need to be in memory all the time anymore.
1094 */
1095 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
1096 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1097 }
1098
1099 #ifdef CONFIG_DEMAND_PAGING
1100
1101 #ifdef CONFIG_DEMAND_PAGING_STATS
1102 struct k_mem_paging_stats_t paging_stats;
1103 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1104 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1105 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1106 #endif /* CONFIG_DEMAND_PAGING_STATS */
1107
do_backing_store_page_in(uintptr_t location)1108 static inline void do_backing_store_page_in(uintptr_t location)
1109 {
1110 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1111 uint32_t time_diff;
1112
1113 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1114 timing_t time_start, time_end;
1115
1116 time_start = timing_counter_get();
1117 #else
1118 uint32_t time_start;
1119
1120 time_start = k_cycle_get_32();
1121 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1122 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1123
1124 k_mem_paging_backing_store_page_in(location);
1125
1126 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1127 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1128 time_end = timing_counter_get();
1129 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1130 #else
1131 time_diff = k_cycle_get_32() - time_start;
1132 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1133
1134 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1135 time_diff);
1136 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1137 }
1138
do_backing_store_page_out(uintptr_t location)1139 static inline void do_backing_store_page_out(uintptr_t location)
1140 {
1141 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1142 uint32_t time_diff;
1143
1144 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1145 timing_t time_start, time_end;
1146
1147 time_start = timing_counter_get();
1148 #else
1149 uint32_t time_start;
1150
1151 time_start = k_cycle_get_32();
1152 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1153 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1154
1155 k_mem_paging_backing_store_page_out(location);
1156
1157 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1158 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1159 time_end = timing_counter_get();
1160 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1161 #else
1162 time_diff = k_cycle_get_32() - time_start;
1163 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1164
1165 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1166 time_diff);
1167 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1168 }
1169
1170 /* Current implementation relies on interrupt locking to any prevent page table
1171 * access, which falls over if other CPUs are active. Addressing this is not
1172 * as simple as using spinlocks as regular memory reads/writes constitute
1173 * "access" in this sense.
1174 *
1175 * Current needs for demand paging are on uniprocessor systems.
1176 */
1177 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
1178
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1179 static void virt_region_foreach(void *addr, size_t size,
1180 void (*func)(void *))
1181 {
1182 k_mem_assert_virtual_region(addr, size);
1183
1184 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1185 func((uint8_t *)addr + offset);
1186 }
1187 }
1188
1189 /*
1190 * Perform some preparatory steps before paging out. The provided page frame
1191 * must be evicted to the backing store immediately after this is called
1192 * with a call to k_mem_paging_backing_store_page_out() if it contains
1193 * a data page.
1194 *
1195 * - Map page frame to scratch area if requested. This always is true if we're
1196 * doing a page fault, but is only set on manual evictions if the page is
1197 * dirty.
1198 * - If mapped:
1199 * - obtain backing store location and populate location parameter
1200 * - Update page tables with location
1201 * - Mark page frame as busy
1202 *
1203 * Returns -ENOMEM if the backing store is full
1204 */
page_frame_prepare_locked(struct k_mem_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1205 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
1206 bool page_fault, uintptr_t *location_ptr)
1207 {
1208 uintptr_t phys;
1209 int ret;
1210 bool dirty = *dirty_ptr;
1211
1212 phys = k_mem_page_frame_to_phys(pf);
1213 __ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1214 phys);
1215
1216 /* If the backing store doesn't have a copy of the page, even if it
1217 * wasn't modified, treat as dirty. This can happen for a few
1218 * reasons:
1219 * 1) Page has never been swapped out before, and the backing store
1220 * wasn't pre-populated with this data page.
1221 * 2) Page was swapped out before, but the page contents were not
1222 * preserved after swapping back in.
1223 * 3) Page contents were preserved when swapped back in, but were later
1224 * evicted from the backing store to make room for other evicted
1225 * pages.
1226 */
1227 if (k_mem_page_frame_is_mapped(pf)) {
1228 dirty = dirty || !k_mem_page_frame_is_backed(pf);
1229 }
1230
1231 if (dirty || page_fault) {
1232 arch_mem_scratch(phys);
1233 }
1234
1235 if (k_mem_page_frame_is_mapped(pf)) {
1236 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1237 page_fault);
1238 if (ret != 0) {
1239 LOG_ERR("out of backing store memory");
1240 return -ENOMEM;
1241 }
1242 arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr);
1243 k_mem_paging_eviction_remove(pf);
1244 } else {
1245 /* Shouldn't happen unless this function is mis-used */
1246 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1247 }
1248 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1249 /* Mark as busy so that k_mem_page_frame_is_evictable() returns false */
1250 __ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1251 phys);
1252 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY);
1253 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1254 /* Update dirty parameter, since we set to true if it wasn't backed
1255 * even if otherwise clean
1256 */
1257 *dirty_ptr = dirty;
1258
1259 return 0;
1260 }
1261
do_mem_evict(void * addr)1262 static int do_mem_evict(void *addr)
1263 {
1264 bool dirty;
1265 struct k_mem_page_frame *pf;
1266 uintptr_t location;
1267 int key, ret;
1268 uintptr_t flags, phys;
1269
1270 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1271 __ASSERT(!k_is_in_isr(),
1272 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1273 __func__);
1274 k_sched_lock();
1275 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1276 key = irq_lock();
1277 flags = arch_page_info_get(addr, &phys, false);
1278 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1279 "address %p isn't mapped", addr);
1280 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1281 /* Un-mapped or already evicted. Nothing to do */
1282 ret = 0;
1283 goto out;
1284 }
1285
1286 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1287 pf = k_mem_phys_to_page_frame(phys);
1288 __ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch");
1289 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1290 if (ret != 0) {
1291 goto out;
1292 }
1293
1294 __ASSERT(ret == 0, "failed to prepare page frame");
1295 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1296 irq_unlock(key);
1297 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1298 if (dirty) {
1299 do_backing_store_page_out(location);
1300 }
1301 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1302 key = irq_lock();
1303 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1304 page_frame_free_locked(pf);
1305 out:
1306 irq_unlock(key);
1307 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1308 k_sched_unlock();
1309 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1310 return ret;
1311 }
1312
k_mem_page_out(void * addr,size_t size)1313 int k_mem_page_out(void *addr, size_t size)
1314 {
1315 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1316 addr);
1317 k_mem_assert_virtual_region(addr, size);
1318
1319 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1320 void *pos = (uint8_t *)addr + offset;
1321 int ret;
1322
1323 ret = do_mem_evict(pos);
1324 if (ret != 0) {
1325 return ret;
1326 }
1327 }
1328
1329 return 0;
1330 }
1331
k_mem_page_frame_evict(uintptr_t phys)1332 int k_mem_page_frame_evict(uintptr_t phys)
1333 {
1334 int key, ret;
1335 struct k_mem_page_frame *pf;
1336 bool dirty;
1337 uintptr_t flags;
1338 uintptr_t location;
1339
1340 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1341 __func__, phys);
1342
1343 /* Implementation is similar to do_page_fault() except there is no
1344 * data page to page-in, see comments in that function.
1345 */
1346
1347 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1348 __ASSERT(!k_is_in_isr(),
1349 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1350 __func__);
1351 k_sched_lock();
1352 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1353 key = irq_lock();
1354 pf = k_mem_phys_to_page_frame(phys);
1355 if (!k_mem_page_frame_is_mapped(pf)) {
1356 /* Nothing to do, free page */
1357 ret = 0;
1358 goto out;
1359 }
1360 flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
1361 /* Shouldn't ever happen */
1362 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1363 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1364 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1365 if (ret != 0) {
1366 goto out;
1367 }
1368
1369 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1370 irq_unlock(key);
1371 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1372 if (dirty) {
1373 do_backing_store_page_out(location);
1374 }
1375 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1376 key = irq_lock();
1377 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1378 page_frame_free_locked(pf);
1379 out:
1380 irq_unlock(key);
1381 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1382 k_sched_unlock();
1383 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1384 return ret;
1385 }
1386
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1387 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1388 int key)
1389 {
1390 #ifdef CONFIG_DEMAND_PAGING_STATS
1391 bool is_irq_unlocked = arch_irq_unlocked(key);
1392
1393 paging_stats.pagefaults.cnt++;
1394
1395 if (is_irq_unlocked) {
1396 paging_stats.pagefaults.irq_unlocked++;
1397 } else {
1398 paging_stats.pagefaults.irq_locked++;
1399 }
1400
1401 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1402 faulting_thread->paging_stats.pagefaults.cnt++;
1403
1404 if (is_irq_unlocked) {
1405 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1406 } else {
1407 faulting_thread->paging_stats.pagefaults.irq_locked++;
1408 }
1409 #else
1410 ARG_UNUSED(faulting_thread);
1411 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1412
1413 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1414 if (k_is_in_isr()) {
1415 paging_stats.pagefaults.in_isr++;
1416
1417 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1418 faulting_thread->paging_stats.pagefaults.in_isr++;
1419 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1420 }
1421 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1422 #endif /* CONFIG_DEMAND_PAGING_STATS */
1423 }
1424
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1425 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1426 bool dirty)
1427 {
1428 #ifdef CONFIG_DEMAND_PAGING_STATS
1429 if (dirty) {
1430 paging_stats.eviction.dirty++;
1431 } else {
1432 paging_stats.eviction.clean++;
1433 }
1434 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1435 if (dirty) {
1436 faulting_thread->paging_stats.eviction.dirty++;
1437 } else {
1438 faulting_thread->paging_stats.eviction.clean++;
1439 }
1440 #else
1441 ARG_UNUSED(faulting_thread);
1442 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1443 #endif /* CONFIG_DEMAND_PAGING_STATS */
1444 }
1445
do_eviction_select(bool * dirty)1446 static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
1447 {
1448 struct k_mem_page_frame *pf;
1449
1450 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1451 uint32_t time_diff;
1452
1453 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1454 timing_t time_start, time_end;
1455
1456 time_start = timing_counter_get();
1457 #else
1458 uint32_t time_start;
1459
1460 time_start = k_cycle_get_32();
1461 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1462 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1463
1464 pf = k_mem_paging_eviction_select(dirty);
1465
1466 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1467 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1468 time_end = timing_counter_get();
1469 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1470 #else
1471 time_diff = k_cycle_get_32() - time_start;
1472 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1473
1474 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1475 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1476
1477 return pf;
1478 }
1479
do_page_fault(void * addr,bool pin)1480 static bool do_page_fault(void *addr, bool pin)
1481 {
1482 struct k_mem_page_frame *pf;
1483 int key, ret;
1484 uintptr_t page_in_location, page_out_location;
1485 enum arch_page_location status;
1486 bool result;
1487 bool dirty = false;
1488 struct k_thread *faulting_thread = _current_cpu->current;
1489
1490 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1491 addr);
1492
1493 LOG_DBG("page fault at %p", addr);
1494
1495 /*
1496 * TODO: Add performance accounting:
1497 * - k_mem_paging_eviction_select() metrics
1498 * * periodic timer execution time histogram (if implemented)
1499 */
1500
1501 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1502 /* We lock the scheduler so that other threads are never scheduled
1503 * during the page-in/out operation.
1504 *
1505 * We do however re-enable interrupts during the page-in/page-out
1506 * operation if and only if interrupts were enabled when the exception
1507 * was taken; in this configuration page faults in an ISR are a bug;
1508 * all their code/data must be pinned.
1509 *
1510 * If interrupts were disabled when the exception was taken, the
1511 * arch code is responsible for keeping them that way when entering
1512 * this function.
1513 *
1514 * If this is not enabled, then interrupts are always locked for the
1515 * entire operation. This is far worse for system interrupt latency
1516 * but requires less pinned pages and ISRs may also take page faults.
1517 *
1518 * Support for allowing k_mem_paging_backing_store_page_out() and
1519 * k_mem_paging_backing_store_page_in() to also sleep and allow
1520 * other threads to run (such as in the case where the transfer is
1521 * async DMA) is not implemented. Even if limited to thread context,
1522 * arbitrary memory access triggering exceptions that put a thread to
1523 * sleep on a contended page fault operation will break scheduling
1524 * assumptions of cooperative threads or threads that implement
1525 * crticial sections with spinlocks or disabling IRQs.
1526 */
1527 k_sched_lock();
1528 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1529 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1530
1531 key = irq_lock();
1532 status = arch_page_location_get(addr, &page_in_location);
1533 if (status == ARCH_PAGE_LOCATION_BAD) {
1534 /* Return false to treat as a fatal error */
1535 result = false;
1536 goto out;
1537 }
1538 result = true;
1539
1540 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1541 if (pin) {
1542 /* It's a physical memory address */
1543 uintptr_t phys = page_in_location;
1544
1545 pf = k_mem_phys_to_page_frame(phys);
1546 if (!k_mem_page_frame_is_pinned(pf)) {
1547 k_mem_paging_eviction_remove(pf);
1548 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1549 }
1550 }
1551
1552 /* This if-block is to pin the page if it is
1553 * already present in physical memory. There is
1554 * no need to go through the following code to
1555 * pull in the data pages. So skip to the end.
1556 */
1557 goto out;
1558 }
1559 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1560 "unexpected status value %d", status);
1561
1562 paging_stats_faults_inc(faulting_thread, key);
1563
1564 pf = free_page_frame_list_get();
1565 if (pf == NULL) {
1566 /* Need to evict a page frame */
1567 pf = do_eviction_select(&dirty);
1568 __ASSERT(pf != NULL, "failed to get a page frame");
1569 LOG_DBG("evicting %p at 0x%lx",
1570 k_mem_page_frame_to_virt(pf),
1571 k_mem_page_frame_to_phys(pf));
1572
1573 paging_stats_eviction_inc(faulting_thread, dirty);
1574 }
1575 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1576 __ASSERT(ret == 0, "failed to prepare page frame");
1577
1578 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1579 irq_unlock(key);
1580 /* Interrupts are now unlocked if they were not locked when we entered
1581 * this function, and we may service ISRs. The scheduler is still
1582 * locked.
1583 */
1584 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1585 if (dirty) {
1586 do_backing_store_page_out(page_out_location);
1587 }
1588 do_backing_store_page_in(page_in_location);
1589
1590 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1591 key = irq_lock();
1592 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
1593 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1594 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
1595 frame_mapped_set(pf, addr);
1596 if (pin) {
1597 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1598 }
1599
1600 arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf));
1601 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1602 if (!pin) {
1603 k_mem_paging_eviction_add(pf);
1604 }
1605 out:
1606 irq_unlock(key);
1607 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1608 k_sched_unlock();
1609 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1610
1611 return result;
1612 }
1613
do_page_in(void * addr)1614 static void do_page_in(void *addr)
1615 {
1616 bool ret;
1617
1618 ret = do_page_fault(addr, false);
1619 __ASSERT(ret, "unmapped memory address %p", addr);
1620 (void)ret;
1621 }
1622
k_mem_page_in(void * addr,size_t size)1623 void k_mem_page_in(void *addr, size_t size)
1624 {
1625 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1626 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1627 __func__);
1628 virt_region_foreach(addr, size, do_page_in);
1629 }
1630
do_mem_pin(void * addr)1631 static void do_mem_pin(void *addr)
1632 {
1633 bool ret;
1634
1635 ret = do_page_fault(addr, true);
1636 __ASSERT(ret, "unmapped memory address %p", addr);
1637 (void)ret;
1638 }
1639
k_mem_pin(void * addr,size_t size)1640 void k_mem_pin(void *addr, size_t size)
1641 {
1642 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1643 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1644 __func__);
1645 virt_region_foreach(addr, size, do_mem_pin);
1646 }
1647
k_mem_page_fault(void * addr)1648 bool k_mem_page_fault(void *addr)
1649 {
1650 return do_page_fault(addr, false);
1651 }
1652
do_mem_unpin(void * addr)1653 static void do_mem_unpin(void *addr)
1654 {
1655 struct k_mem_page_frame *pf;
1656 unsigned int key;
1657 uintptr_t flags, phys;
1658
1659 key = irq_lock();
1660 flags = arch_page_info_get(addr, &phys, false);
1661 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1662 "invalid data page at %p", addr);
1663 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1664 pf = k_mem_phys_to_page_frame(phys);
1665 if (k_mem_page_frame_is_pinned(pf)) {
1666 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1667 k_mem_paging_eviction_add(pf);
1668 }
1669 }
1670 irq_unlock(key);
1671 }
1672
k_mem_unpin(void * addr,size_t size)1673 void k_mem_unpin(void *addr, size_t size)
1674 {
1675 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1676 addr);
1677 virt_region_foreach(addr, size, do_mem_unpin);
1678 }
1679
1680 #endif /* CONFIG_DEMAND_PAGING */
1681