1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/internal/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/sys/check.h>
20 #include <zephyr/sys/math_extras.h>
21 #include <zephyr/timing/timing.h>
22 #include <zephyr/logging/log.h>
23 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
24
25 #ifdef CONFIG_DEMAND_PAGING
26 #include <zephyr/kernel/mm/demand_paging.h>
27 #endif /* CONFIG_DEMAND_PAGING */
28
29 /*
30 * General terminology:
31 * - A page frame is a page-sized physical memory region in RAM. It is a
32 * container where a data page may be placed. It is always referred to by
33 * physical address. We have a convention of using uintptr_t for physical
34 * addresses. We instantiate a struct k_mem_page_frame to store metadata for
35 * every page frame.
36 *
37 * - A data page is a page-sized region of data. It may exist in a page frame,
38 * or be paged out to some backing store. Its location can always be looked
39 * up in the CPU's page tables (or equivalent) by virtual address.
40 * The data type will always be void * or in some cases uint8_t * when we
41 * want to do pointer arithmetic.
42 */
43
44 /* Spinlock to protect any globals in this file and serialize page table
45 * updates in arch code
46 */
47 struct k_spinlock z_mm_lock;
48
49 /*
50 * General page frame management
51 */
52
53 /* Database of all RAM page frames */
54 struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES];
55
56 #if __ASSERT_ON
57 /* Indicator that k_mem_page_frames has been initialized, many of these APIs do
58 * not work before POST_KERNEL
59 */
60 static bool page_frames_initialized;
61 #endif
62
63 /* Add colors to page table dumps to indicate mapping type */
64 #define COLOR_PAGE_FRAMES 1
65
66 #if COLOR_PAGE_FRAMES
67 #define ANSI_DEFAULT "\x1B" "[0m"
68 #define ANSI_RED "\x1B" "[1;31m"
69 #define ANSI_GREEN "\x1B" "[1;32m"
70 #define ANSI_YELLOW "\x1B" "[1;33m"
71 #define ANSI_BLUE "\x1B" "[1;34m"
72 #define ANSI_MAGENTA "\x1B" "[1;35m"
73 #define ANSI_CYAN "\x1B" "[1;36m"
74 #define ANSI_GREY "\x1B" "[1;90m"
75
76 #define COLOR(x) printk(_CONCAT(ANSI_, x))
77 #else
78 #define COLOR(x) do { } while (false)
79 #endif /* COLOR_PAGE_FRAMES */
80
81 /* LCOV_EXCL_START */
page_frame_dump(struct k_mem_page_frame * pf)82 static void page_frame_dump(struct k_mem_page_frame *pf)
83 {
84 if (k_mem_page_frame_is_free(pf)) {
85 COLOR(GREY);
86 printk("-");
87 } else if (k_mem_page_frame_is_reserved(pf)) {
88 COLOR(CYAN);
89 printk("R");
90 } else if (k_mem_page_frame_is_busy(pf)) {
91 COLOR(MAGENTA);
92 printk("B");
93 } else if (k_mem_page_frame_is_pinned(pf)) {
94 COLOR(YELLOW);
95 printk("P");
96 } else if (k_mem_page_frame_is_available(pf)) {
97 COLOR(GREY);
98 printk(".");
99 } else if (k_mem_page_frame_is_mapped(pf)) {
100 COLOR(DEFAULT);
101 printk("M");
102 } else {
103 COLOR(RED);
104 printk("?");
105 }
106 }
107
k_mem_page_frames_dump(void)108 void k_mem_page_frames_dump(void)
109 {
110 int column = 0;
111
112 __ASSERT(page_frames_initialized, "%s called too early", __func__);
113 printk("Physical memory from 0x%lx to 0x%lx\n",
114 K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END);
115
116 for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) {
117 struct k_mem_page_frame *pf = &k_mem_page_frames[i];
118
119 page_frame_dump(pf);
120
121 column++;
122 if (column == 64) {
123 column = 0;
124 printk("\n");
125 }
126 }
127
128 COLOR(DEFAULT);
129 if (column != 0) {
130 printk("\n");
131 }
132 }
133 /* LCOV_EXCL_STOP */
134
135 #define VIRT_FOREACH(_base, _size, _pos) \
136 for ((_pos) = (_base); \
137 (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
138
139 #define PHYS_FOREACH(_base, _size, _pos) \
140 for ((_pos) = (_base); \
141 (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
142
143
144 /*
145 * Virtual address space management
146 *
147 * Call all of these functions with z_mm_lock held.
148 *
149 * Overall virtual memory map: When the kernel starts, it resides in
150 * virtual memory in the region K_MEM_KERNEL_VIRT_START to
151 * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
152 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
153 *
154 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
155 * but have a mapping for all RAM in place. This is for special architectural
156 * purposes and does not otherwise affect page frame accounting or flags;
157 * the only guarantee is that such RAM mapping outside of the Zephyr image
158 * won't be disturbed by subsequent memory mapping calls.
159 *
160 * +--------------+ <- K_MEM_VIRT_RAM_START
161 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
162 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
163 * | Mapping for |
164 * | main kernel |
165 * | image |
166 * | |
167 * | |
168 * +--------------+ <- K_MEM_VM_FREE_START
169 * | |
170 * | Unused, |
171 * | Available VM |
172 * | |
173 * |..............| <- mapping_pos (grows downward as more mappings are made)
174 * | Mapping |
175 * +--------------+
176 * | Mapping |
177 * +--------------+
178 * | ... |
179 * +--------------+
180 * | Mapping |
181 * +--------------+ <- mappings start here
182 * | Reserved | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED
183 * +--------------+ <- K_MEM_VIRT_RAM_END
184 */
185
186 /* Bitmap of virtual addresses where one bit corresponds to one page.
187 * This is being used for virt_region_alloc() to figure out which
188 * region of virtual addresses can be used for memory mapping.
189 *
190 * Note that bit #0 is the highest address so that allocation is
191 * done in reverse from highest address.
192 */
193 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
194 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
195
196 static bool virt_region_inited;
197
198 #define Z_VIRT_REGION_START_ADDR K_MEM_VM_FREE_START
199 #define Z_VIRT_REGION_END_ADDR (K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED)
200
virt_from_bitmap_offset(size_t offset,size_t size)201 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
202 {
203 return POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
204 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
205 }
206
virt_to_bitmap_offset(void * vaddr,size_t size)207 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
208 {
209 return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
210 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
211 }
212
virt_region_init(void)213 static void virt_region_init(void)
214 {
215 size_t offset, num_bits;
216
217 /* There are regions where we should never map via
218 * k_mem_map() and k_mem_map_phys_bare(). Mark them as
219 * already allocated so they will never be used.
220 */
221
222 if (K_MEM_VM_RESERVED > 0) {
223 /* Mark reserved region at end of virtual address space */
224 num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
225 (void)sys_bitarray_set_region(&virt_region_bitmap,
226 num_bits, 0);
227 }
228
229 /* Mark all bits up to Z_FREE_VM_START as allocated */
230 num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START)
231 - POINTER_TO_UINT(K_MEM_VIRT_RAM_START);
232 offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits);
233 num_bits /= CONFIG_MMU_PAGE_SIZE;
234 (void)sys_bitarray_set_region(&virt_region_bitmap,
235 num_bits, offset);
236
237 virt_region_inited = true;
238 }
239
virt_region_free(void * vaddr,size_t size)240 static void virt_region_free(void *vaddr, size_t size)
241 {
242 size_t offset, num_bits;
243 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
244
245 if (unlikely(!virt_region_inited)) {
246 virt_region_init();
247 }
248
249 #ifndef CONFIG_KERNEL_DIRECT_MAP
250 /* Without the need to support K_MEM_DIRECT_MAP, the region must be
251 * able to be represented in the bitmap. So this case is
252 * simple.
253 */
254
255 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
256 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
257 "invalid virtual address region %p (%zu)", vaddr_u8, size);
258 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
259 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
260 return;
261 }
262
263 offset = virt_to_bitmap_offset(vaddr, size);
264 num_bits = size / CONFIG_MMU_PAGE_SIZE;
265 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
266 #else /* !CONFIG_KERNEL_DIRECT_MAP */
267 /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
268 * memory space, wholly within it, or overlap partially.
269 * So additional processing is needed to make sure we only
270 * mark the pages within the bitmap.
271 */
272 if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
273 (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
274 (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
275 ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
276 uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
277 uint8_t *adjusted_end = MIN(vaddr_u8 + size,
278 Z_VIRT_REGION_END_ADDR);
279 size_t adjusted_sz = adjusted_end - adjusted_start;
280
281 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
282 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
283 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
284 }
285 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
286 }
287
virt_region_alloc(size_t size,size_t align)288 static void *virt_region_alloc(size_t size, size_t align)
289 {
290 uintptr_t dest_addr;
291 size_t alloc_size;
292 size_t offset;
293 size_t num_bits;
294 int ret;
295
296 if (unlikely(!virt_region_inited)) {
297 virt_region_init();
298 }
299
300 /* Possibly request more pages to ensure we can get an aligned virtual address */
301 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
302 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
303 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
304 if (ret != 0) {
305 LOG_ERR("insufficient virtual address space (requested %zu)",
306 size);
307 return NULL;
308 }
309
310 /* Remember that bit #0 in bitmap corresponds to the highest
311 * virtual address. So here we need to go downwards (backwards?)
312 * to get the starting address of the allocated region.
313 */
314 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
315
316 if (alloc_size > size) {
317 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
318
319 /* Here is the memory organization when trying to get an aligned
320 * virtual address:
321 *
322 * +--------------+ <- K_MEM_VIRT_RAM_START
323 * | Undefined VM |
324 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
325 * | Mapping for |
326 * | main kernel |
327 * | image |
328 * | |
329 * | |
330 * +--------------+ <- K_MEM_VM_FREE_START
331 * | ... |
332 * +==============+ <- dest_addr
333 * | Unused |
334 * |..............| <- aligned_dest_addr
335 * | |
336 * | Aligned |
337 * | Mapping |
338 * | |
339 * |..............| <- aligned_dest_addr + size
340 * | Unused |
341 * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size
342 * | ... |
343 * +--------------+
344 * | Mapping |
345 * +--------------+
346 * | Reserved |
347 * +--------------+ <- K_MEM_VIRT_RAM_END
348 */
349
350 /* Free the two unused regions */
351 virt_region_free(UINT_TO_POINTER(dest_addr),
352 aligned_dest_addr - dest_addr);
353 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
354 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
355 (dest_addr + alloc_size) - (aligned_dest_addr + size));
356 }
357
358 dest_addr = aligned_dest_addr;
359 }
360
361 /* Need to make sure this does not step into kernel memory */
362 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
363 (void)sys_bitarray_free(&virt_region_bitmap, size, offset);
364 return NULL;
365 }
366
367 return UINT_TO_POINTER(dest_addr);
368 }
369
370 /*
371 * Free page frames management
372 *
373 * Call all of these functions with z_mm_lock held.
374 */
375
376 /* Linked list of unused and available page frames.
377 *
378 * TODO: This is very simple and treats all free page frames as being equal.
379 * However, there are use-cases to consolidate free pages such that entire
380 * SRAM banks can be switched off to save power, and so obtaining free pages
381 * may require a more complex ontology which prefers page frames in RAM banks
382 * which are still active.
383 *
384 * This implies in the future there may be multiple slists managing physical
385 * pages. Each page frame will still just have one snode link.
386 */
387 static sys_sflist_t free_page_frame_list;
388
389 /* Number of unused and available free page frames.
390 * This information may go stale immediately.
391 */
392 static size_t z_free_page_count;
393
394 #define PF_ASSERT(pf, expr, fmt, ...) \
395 __ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \
396 ##__VA_ARGS__)
397
398 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)399 static struct k_mem_page_frame *free_page_frame_list_get(void)
400 {
401 sys_sfnode_t *node;
402 struct k_mem_page_frame *pf = NULL;
403
404 node = sys_sflist_get(&free_page_frame_list);
405 if (node != NULL) {
406 z_free_page_count--;
407 pf = CONTAINER_OF(node, struct k_mem_page_frame, node);
408 PF_ASSERT(pf, k_mem_page_frame_is_free(pf),
409 "on free list but not free");
410 pf->va_and_flags = 0;
411 }
412
413 return pf;
414 }
415
416 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct k_mem_page_frame * pf)417 static void free_page_frame_list_put(struct k_mem_page_frame *pf)
418 {
419 PF_ASSERT(pf, k_mem_page_frame_is_available(pf),
420 "unavailable page put on free list");
421
422 sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE);
423 sys_sflist_append(&free_page_frame_list, &pf->node);
424 z_free_page_count++;
425 }
426
free_page_frame_list_init(void)427 static void free_page_frame_list_init(void)
428 {
429 sys_sflist_init(&free_page_frame_list);
430 }
431
page_frame_free_locked(struct k_mem_page_frame * pf)432 static void page_frame_free_locked(struct k_mem_page_frame *pf)
433 {
434 pf->va_and_flags = 0;
435 free_page_frame_list_put(pf);
436 }
437
438 /*
439 * Memory Mapping
440 */
441
442 /* Called after the frame is mapped in the arch layer, to update our
443 * local ontology (and do some assertions while we're at it)
444 */
frame_mapped_set(struct k_mem_page_frame * pf,void * addr)445 static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr)
446 {
447 PF_ASSERT(pf, !k_mem_page_frame_is_free(pf),
448 "attempted to map a page frame on the free list");
449 PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf),
450 "attempted to map a reserved page frame");
451
452 /* We do allow multiple mappings for pinned page frames
453 * since we will never need to reverse map them.
454 * This is uncommon, use-cases are for things like the
455 * Zephyr equivalent of VSDOs
456 */
457 PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf),
458 "non-pinned and already mapped to %p",
459 k_mem_page_frame_to_virt(pf));
460
461 uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;
462 uintptr_t va = (uintptr_t)addr & ~flags_mask;
463
464 pf->va_and_flags &= flags_mask;
465 pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED;
466 }
467
468 /* LCOV_EXCL_START */
469 /* Go through page frames to find the physical address mapped
470 * by a virtual address.
471 *
472 * @param[in] virt Virtual Address
473 * @param[out] phys Physical address mapped to the input virtual address
474 * if such mapping exists.
475 *
476 * @retval 0 if mapping is found and valid
477 * @retval -EFAULT if virtual address is not mapped
478 */
virt_to_page_frame(void * virt,uintptr_t * phys)479 static int virt_to_page_frame(void *virt, uintptr_t *phys)
480 {
481 uintptr_t paddr;
482 struct k_mem_page_frame *pf;
483 int ret = -EFAULT;
484
485 K_MEM_PAGE_FRAME_FOREACH(paddr, pf) {
486 if (k_mem_page_frame_is_mapped(pf)) {
487 if (virt == k_mem_page_frame_to_virt(pf)) {
488 ret = 0;
489 if (phys != NULL) {
490 *phys = k_mem_page_frame_to_phys(pf);
491 }
492 break;
493 }
494 }
495 }
496
497 return ret;
498 }
499 /* LCOV_EXCL_STOP */
500
501 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
502
503 #ifdef CONFIG_DEMAND_PAGING
504 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
505 bool page_in, uintptr_t *location_ptr);
506
507 static inline void do_backing_store_page_in(uintptr_t location);
508 static inline void do_backing_store_page_out(uintptr_t location);
509 #endif /* CONFIG_DEMAND_PAGING */
510
511 /* Allocate a free page frame, and map it to a specified virtual address
512 *
513 * TODO: Add optional support for copy-on-write mappings to a zero page instead
514 * of allocating, in which case page frames will be allocated lazily as
515 * the mappings to the zero page get touched. This will avoid expensive
516 * page-ins as memory is mapped and physical RAM or backing store storage will
517 * not be used if the mapped memory is unused. The cost is an empty physical
518 * page of zeroes.
519 */
map_anon_page(void * addr,uint32_t flags)520 static int map_anon_page(void *addr, uint32_t flags)
521 {
522 struct k_mem_page_frame *pf;
523 uintptr_t phys;
524 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
525
526 pf = free_page_frame_list_get();
527 if (pf == NULL) {
528 #ifdef CONFIG_DEMAND_PAGING
529 uintptr_t location;
530 bool dirty;
531 int ret;
532
533 pf = k_mem_paging_eviction_select(&dirty);
534 __ASSERT(pf != NULL, "failed to get a page frame");
535 LOG_DBG("evicting %p at 0x%lx",
536 k_mem_page_frame_to_virt(pf),
537 k_mem_page_frame_to_phys(pf));
538 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
539 if (ret != 0) {
540 return -ENOMEM;
541 }
542 if (dirty) {
543 do_backing_store_page_out(location);
544 }
545 pf->va_and_flags = 0;
546 #else
547 return -ENOMEM;
548 #endif /* CONFIG_DEMAND_PAGING */
549 }
550
551 phys = k_mem_page_frame_to_phys(pf);
552 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags);
553
554 if (lock) {
555 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
556 }
557 frame_mapped_set(pf, addr);
558 #ifdef CONFIG_DEMAND_PAGING
559 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) && (!lock)) {
560 k_mem_paging_eviction_add(pf);
561 }
562 #endif
563
564 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
565
566 return 0;
567 }
568
k_mem_map_phys_guard(uintptr_t phys,size_t size,uint32_t flags,bool is_anon)569 void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon)
570 {
571 uint8_t *dst;
572 size_t total_size;
573 int ret;
574 k_spinlock_key_t key;
575 uint8_t *pos;
576 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
577
578 __ASSERT(!is_anon || (is_anon && page_frames_initialized),
579 "%s called too early", __func__);
580 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
581 "%s does not support explicit cache settings", __func__);
582
583 if (((flags & K_MEM_PERM_USER) != 0U) &&
584 ((flags & K_MEM_MAP_UNINIT) != 0U)) {
585 LOG_ERR("user access to anonymous uninitialized pages is forbidden");
586 return NULL;
587 }
588 if ((size % CONFIG_MMU_PAGE_SIZE) != 0U) {
589 LOG_ERR("unaligned size %zu passed to %s", size, __func__);
590 return NULL;
591 }
592 if (size == 0) {
593 LOG_ERR("zero sized memory mapping");
594 return NULL;
595 }
596
597 /* Need extra for the guard pages (before and after) which we
598 * won't map.
599 */
600 if (size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) {
601 LOG_ERR("too large size %zu passed to %s", size, __func__);
602 return NULL;
603 }
604
605 key = k_spin_lock(&z_mm_lock);
606
607 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
608 if (dst == NULL) {
609 /* Address space has no free region */
610 goto out;
611 }
612
613 /* Unmap both guard pages to make sure accessing them
614 * will generate fault.
615 */
616 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
617 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
618 CONFIG_MMU_PAGE_SIZE);
619
620 /* Skip over the "before" guard page in returned address. */
621 dst += CONFIG_MMU_PAGE_SIZE;
622
623 if (is_anon) {
624 /* Mapping from anonymous memory */
625 flags |= K_MEM_CACHE_WB;
626 #ifdef CONFIG_DEMAND_MAPPING
627 if ((flags & K_MEM_MAP_LOCK) == 0) {
628 flags |= K_MEM_MAP_UNPAGED;
629 VIRT_FOREACH(dst, size, pos) {
630 arch_mem_map(pos,
631 uninit ? ARCH_UNPAGED_ANON_UNINIT
632 : ARCH_UNPAGED_ANON_ZERO,
633 CONFIG_MMU_PAGE_SIZE, flags);
634 }
635 LOG_DBG("memory mapping anon pages %p to %p unpaged", dst, pos-1);
636 /* skip the memset() below */
637 uninit = true;
638 } else
639 #endif
640 {
641 VIRT_FOREACH(dst, size, pos) {
642 ret = map_anon_page(pos, flags);
643
644 if (ret != 0) {
645 /* TODO:
646 * call k_mem_unmap(dst, pos - dst)
647 * when implemented in #28990 and
648 * release any guard virtual page as well.
649 */
650 dst = NULL;
651 goto out;
652 }
653 }
654 }
655 } else {
656 /* Mapping known physical memory.
657 *
658 * arch_mem_map() is a void function and does not return
659 * anything. Arch code usually uses ASSERT() to catch
660 * mapping errors. Assume this works correctly for now.
661 */
662 arch_mem_map(dst, phys, size, flags);
663 }
664
665 out:
666 k_spin_unlock(&z_mm_lock, key);
667
668 if (dst != NULL && !uninit) {
669 /* If we later implement mappings to a copy-on-write
670 * zero page, won't need this step
671 */
672 memset(dst, 0, size);
673 }
674
675 return dst;
676 }
677
k_mem_unmap_phys_guard(void * addr,size_t size,bool is_anon)678 void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon)
679 {
680 uintptr_t phys;
681 uint8_t *pos;
682 struct k_mem_page_frame *pf;
683 k_spinlock_key_t key;
684 size_t total_size;
685 int ret;
686
687 /* Need space for the "before" guard page */
688 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
689
690 /* Make sure address range is still valid after accounting
691 * for two guard pages.
692 */
693 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
694 k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
695
696 key = k_spin_lock(&z_mm_lock);
697
698 /* Check if both guard pages are unmapped.
699 * Bail if not, as this is probably a region not mapped
700 * using k_mem_map().
701 */
702 pos = addr;
703 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
704 if (ret == 0) {
705 __ASSERT(ret == 0,
706 "%s: cannot find preceding guard page for (%p, %zu)",
707 __func__, addr, size);
708 goto out;
709 }
710
711 ret = arch_page_phys_get(pos + size, NULL);
712 if (ret == 0) {
713 __ASSERT(ret == 0,
714 "%s: cannot find succeeding guard page for (%p, %zu)",
715 __func__, addr, size);
716 goto out;
717 }
718
719 if (is_anon) {
720 /* Unmapping anonymous memory */
721 VIRT_FOREACH(addr, size, pos) {
722 #ifdef CONFIG_DEMAND_PAGING
723 enum arch_page_location status;
724 uintptr_t location;
725
726 status = arch_page_location_get(pos, &location);
727 switch (status) {
728 case ARCH_PAGE_LOCATION_PAGED_OUT:
729 /*
730 * No pf is associated with this mapping.
731 * Simply get rid of the MMU entry and free
732 * corresponding backing store.
733 */
734 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
735 k_mem_paging_backing_store_location_free(location);
736 continue;
737 case ARCH_PAGE_LOCATION_PAGED_IN:
738 /*
739 * The page is in memory but it may not be
740 * accessible in order to manage tracking
741 * of the ARCH_DATA_PAGE_ACCESSED flag
742 * meaning arch_page_phys_get() could fail.
743 * Still, we know the actual phys address.
744 */
745 phys = location;
746 ret = 0;
747 break;
748 default:
749 ret = arch_page_phys_get(pos, &phys);
750 break;
751 }
752 #else
753 ret = arch_page_phys_get(pos, &phys);
754 #endif
755 __ASSERT(ret == 0,
756 "%s: cannot unmap an unmapped address %p",
757 __func__, pos);
758 if (ret != 0) {
759 /* Found an address not mapped. Do not continue. */
760 goto out;
761 }
762
763 __ASSERT(k_mem_is_page_frame(phys),
764 "%s: 0x%lx is not a page frame", __func__, phys);
765 if (!k_mem_is_page_frame(phys)) {
766 /* Physical address has no corresponding page frame
767 * description in the page frame array.
768 * This should not happen. Do not continue.
769 */
770 goto out;
771 }
772
773 /* Grab the corresponding page frame from physical address */
774 pf = k_mem_phys_to_page_frame(phys);
775
776 __ASSERT(k_mem_page_frame_is_mapped(pf),
777 "%s: 0x%lx is not a mapped page frame", __func__, phys);
778 if (!k_mem_page_frame_is_mapped(pf)) {
779 /* Page frame is not marked mapped.
780 * This should not happen. Do not continue.
781 */
782 goto out;
783 }
784
785 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
786 #ifdef CONFIG_DEMAND_PAGING
787 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) &&
788 (!k_mem_page_frame_is_pinned(pf))) {
789 k_mem_paging_eviction_remove(pf);
790 }
791 #endif
792
793 /* Put the page frame back into free list */
794 page_frame_free_locked(pf);
795 }
796 } else {
797 /*
798 * Unmapping previous mapped memory with specific physical address.
799 *
800 * Note that we don't have to unmap the guard pages, as they should
801 * have been unmapped. We just need to unmapped the in-between
802 * region [addr, (addr + size)).
803 */
804 arch_mem_unmap(addr, size);
805 }
806
807 /* There are guard pages just before and after the mapped
808 * region. So we also need to free them from the bitmap.
809 */
810 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
811 total_size = size + (CONFIG_MMU_PAGE_SIZE * 2);
812 virt_region_free(pos, total_size);
813
814 out:
815 k_spin_unlock(&z_mm_lock, key);
816 }
817
k_mem_update_flags(void * addr,size_t size,uint32_t flags)818 int k_mem_update_flags(void *addr, size_t size, uint32_t flags)
819 {
820 uintptr_t phys;
821 k_spinlock_key_t key;
822 int ret;
823
824 k_mem_assert_virtual_region(addr, size);
825
826 key = k_spin_lock(&z_mm_lock);
827
828 /*
829 * We can achieve desired result without explicit architecture support
830 * by unmapping and remapping the same physical memory using new flags.
831 */
832
833 ret = arch_page_phys_get(addr, &phys);
834 if (ret < 0) {
835 goto out;
836 }
837
838 /* TODO: detect and handle paged-out memory as well */
839
840 arch_mem_unmap(addr, size);
841 arch_mem_map(addr, phys, size, flags);
842
843 out:
844 k_spin_unlock(&z_mm_lock, key);
845 return ret;
846 }
847
k_mem_free_get(void)848 size_t k_mem_free_get(void)
849 {
850 size_t ret;
851 k_spinlock_key_t key;
852
853 __ASSERT(page_frames_initialized, "%s called too early", __func__);
854
855 key = k_spin_lock(&z_mm_lock);
856 #ifdef CONFIG_DEMAND_PAGING
857 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
858 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
859 } else {
860 ret = 0;
861 }
862 #else
863 ret = z_free_page_count;
864 #endif /* CONFIG_DEMAND_PAGING */
865 k_spin_unlock(&z_mm_lock, key);
866
867 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
868 }
869
870 /* Get the default virtual region alignment, here the default MMU page size
871 *
872 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
873 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
874 *
875 * @retval alignment to apply on the virtual address of this region
876 */
virt_region_align(uintptr_t phys,size_t size)877 static size_t virt_region_align(uintptr_t phys, size_t size)
878 {
879 ARG_UNUSED(phys);
880 ARG_UNUSED(size);
881
882 return CONFIG_MMU_PAGE_SIZE;
883 }
884
885 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
886
887 /* This may be called from arch early boot code before z_cstart() is invoked.
888 * Data will be copied and BSS zeroed, but this must not rely on any
889 * initialization functions being called prior to work correctly.
890 */
k_mem_map_phys_bare(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)891 void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
892 {
893 uintptr_t aligned_phys, addr_offset;
894 size_t aligned_size, align_boundary;
895 k_spinlock_key_t key;
896 uint8_t *dest_addr;
897 size_t num_bits;
898 size_t offset;
899
900 #ifndef CONFIG_KERNEL_DIRECT_MAP
901 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
902 #endif /* CONFIG_KERNEL_DIRECT_MAP */
903 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
904 phys, size,
905 CONFIG_MMU_PAGE_SIZE);
906 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
907 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
908 "wraparound for physical address 0x%lx (size %zu)",
909 aligned_phys, aligned_size);
910
911 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
912
913 key = k_spin_lock(&z_mm_lock);
914
915 if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
916 (flags & K_MEM_DIRECT_MAP)) {
917 dest_addr = (uint8_t *)aligned_phys;
918
919 /* Mark the region of virtual memory bitmap as used
920 * if the region overlaps the virtual memory space.
921 *
922 * Basically if either end of region is within
923 * virtual memory space, we need to mark the bits.
924 */
925
926 if (IN_RANGE(aligned_phys,
927 (uintptr_t)K_MEM_VIRT_RAM_START,
928 (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) ||
929 IN_RANGE(aligned_phys + aligned_size - 1,
930 (uintptr_t)K_MEM_VIRT_RAM_START,
931 (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) {
932 uint8_t *adjusted_start = MAX(dest_addr, K_MEM_VIRT_RAM_START);
933 uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
934 K_MEM_VIRT_RAM_END);
935 size_t adjusted_sz = adjusted_end - adjusted_start;
936
937 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
938 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
939 if (sys_bitarray_test_and_set_region(
940 &virt_region_bitmap, num_bits, offset, true)) {
941 goto fail;
942 }
943 }
944 } else {
945 /* Obtain an appropriately sized chunk of virtual memory */
946 dest_addr = virt_region_alloc(aligned_size, align_boundary);
947 if (!dest_addr) {
948 goto fail;
949 }
950 }
951
952 /* If this fails there's something amiss with virt_region_get */
953 __ASSERT((uintptr_t)dest_addr <
954 ((uintptr_t)dest_addr + (size - 1)),
955 "wraparound for virtual address %p (size %zu)",
956 dest_addr, size);
957
958 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
959 aligned_phys, aligned_size, flags, addr_offset);
960
961 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
962 k_spin_unlock(&z_mm_lock, key);
963
964 *virt_ptr = dest_addr + addr_offset;
965 return;
966 fail:
967 /* May re-visit this in the future, but for now running out of
968 * virtual address space or failing the arch_mem_map() call is
969 * an unrecoverable situation.
970 *
971 * Other problems not related to resource exhaustion we leave as
972 * assertions since they are clearly programming mistakes.
973 */
974 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
975 phys, size, flags);
976 k_panic();
977 }
978
k_mem_unmap_phys_bare(uint8_t * virt,size_t size)979 void k_mem_unmap_phys_bare(uint8_t *virt, size_t size)
980 {
981 uintptr_t aligned_virt, addr_offset;
982 size_t aligned_size;
983 k_spinlock_key_t key;
984
985 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
986 POINTER_TO_UINT(virt), size,
987 CONFIG_MMU_PAGE_SIZE);
988 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
989 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
990 "wraparound for virtual address 0x%lx (size %zu)",
991 aligned_virt, aligned_size);
992
993 key = k_spin_lock(&z_mm_lock);
994
995 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
996 aligned_virt, aligned_size, addr_offset);
997
998 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
999 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
1000 k_spin_unlock(&z_mm_lock, key);
1001 }
1002
1003 /*
1004 * Miscellaneous
1005 */
1006
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)1007 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
1008 uintptr_t addr, size_t size, size_t align)
1009 {
1010 size_t addr_offset;
1011
1012 /* The actual mapped region must be page-aligned. Round down the
1013 * physical address and pad the region size appropriately
1014 */
1015 *aligned_addr = ROUND_DOWN(addr, align);
1016 addr_offset = addr - *aligned_addr;
1017 *aligned_size = ROUND_UP(size + addr_offset, align);
1018
1019 return addr_offset;
1020 }
1021
1022 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)1023 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
1024 bool pin)
1025 {
1026 struct k_mem_page_frame *pf;
1027 uint8_t *addr;
1028
1029 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
1030 CONFIG_MMU_PAGE_SIZE);
1031 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
1032 CONFIG_MMU_PAGE_SIZE);
1033 size_t pinned_size = pinned_end - pinned_start;
1034
1035 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
1036 {
1037 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1038 frame_mapped_set(pf, addr);
1039
1040 if (pin) {
1041 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1042 } else {
1043 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1044 #ifdef CONFIG_DEMAND_PAGING
1045 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) &&
1046 k_mem_page_frame_is_evictable(pf)) {
1047 k_mem_paging_eviction_add(pf);
1048 }
1049 #endif
1050 }
1051 }
1052 }
1053 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
1054
1055 #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
z_paging_ondemand_section_map(void)1056 static void z_paging_ondemand_section_map(void)
1057 {
1058 uint8_t *addr;
1059 size_t size;
1060 uintptr_t location;
1061 uint32_t flags;
1062
1063 size = (uintptr_t)lnkr_ondemand_text_size;
1064 flags = K_MEM_MAP_UNPAGED | K_MEM_PERM_EXEC | K_MEM_CACHE_WB;
1065 VIRT_FOREACH(lnkr_ondemand_text_start, size, addr) {
1066 k_mem_paging_backing_store_location_query(addr, &location);
1067 arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1068 sys_bitarray_set_region(&virt_region_bitmap, 1,
1069 virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1070 }
1071
1072 size = (uintptr_t)lnkr_ondemand_rodata_size;
1073 flags = K_MEM_MAP_UNPAGED | K_MEM_CACHE_WB;
1074 VIRT_FOREACH(lnkr_ondemand_rodata_start, size, addr) {
1075 k_mem_paging_backing_store_location_query(addr, &location);
1076 arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1077 sys_bitarray_set_region(&virt_region_bitmap, 1,
1078 virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1079 }
1080 }
1081 #endif /* CONFIG_LINKER_USE_ONDEMAND_SECTION */
1082
z_mem_manage_init(void)1083 void z_mem_manage_init(void)
1084 {
1085 uintptr_t phys;
1086 uint8_t *addr;
1087 struct k_mem_page_frame *pf;
1088 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
1089
1090 free_page_frame_list_init();
1091
1092 ARG_UNUSED(addr);
1093
1094 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
1095 /* If some page frames are unavailable for use as memory, arch
1096 * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags
1097 */
1098 arch_reserved_pages_update();
1099 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
1100
1101 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1102 /* All pages composing the Zephyr image are mapped at boot in a
1103 * predictable way. This can change at runtime.
1104 */
1105 VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr)
1106 {
1107 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1108 frame_mapped_set(pf, addr);
1109
1110 /* TODO: for now we pin the whole Zephyr image. Demand paging
1111 * currently tested with anonymously-mapped pages which are not
1112 * pinned.
1113 *
1114 * We will need to setup linker regions for a subset of kernel
1115 * code/data pages which are pinned in memory and
1116 * may not be evicted. This will contain critical CPU data
1117 * structures, and any code used to perform page fault
1118 * handling, page-ins, etc.
1119 */
1120 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1121 }
1122 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1123
1124 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1125 /* Pin the boot section to prevent it from being swapped out during
1126 * boot process. Will be un-pinned once boot process completes.
1127 */
1128 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
1129 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1130
1131 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
1132 /* Pin the page frames correspondng to the pinned symbols */
1133 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
1134 #endif /* CONFIG_LINKER_USE_PINNED_SECTION */
1135
1136 /* Any remaining pages that aren't mapped, reserved, or pinned get
1137 * added to the free pages list
1138 */
1139 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1140 if (k_mem_page_frame_is_available(pf)) {
1141 free_page_frame_list_put(pf);
1142 }
1143 }
1144 LOG_DBG("free page frames: %zu", z_free_page_count);
1145
1146 #ifdef CONFIG_DEMAND_PAGING
1147 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1148 z_paging_histogram_init();
1149 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1150 k_mem_paging_backing_store_init();
1151 k_mem_paging_eviction_init();
1152
1153 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1154 /* start tracking evictable page installed above if any */
1155 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1156 if (k_mem_page_frame_is_evictable(pf)) {
1157 k_mem_paging_eviction_add(pf);
1158 }
1159 }
1160 }
1161 #endif /* CONFIG_DEMAND_PAGING */
1162
1163 #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
1164 z_paging_ondemand_section_map();
1165 #endif
1166
1167 #if __ASSERT_ON
1168 page_frames_initialized = true;
1169 #endif
1170 k_spin_unlock(&z_mm_lock, key);
1171
1172 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1173 /* If BSS section is not present in memory at boot,
1174 * it would not have been cleared. This needs to be
1175 * done now since paging mechanism has been initialized
1176 * and the BSS pages can be brought into physical
1177 * memory to be cleared.
1178 */
1179 z_bss_zero();
1180 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1181 }
1182
z_mem_manage_boot_finish(void)1183 void z_mem_manage_boot_finish(void)
1184 {
1185 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1186 /* At the end of boot process, unpin the boot sections
1187 * as they don't need to be in memory all the time anymore.
1188 */
1189 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
1190 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1191 }
1192
1193 #ifdef CONFIG_DEMAND_PAGING
1194
1195 #ifdef CONFIG_DEMAND_PAGING_STATS
1196 struct k_mem_paging_stats_t paging_stats;
1197 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1198 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1199 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1200 #endif /* CONFIG_DEMAND_PAGING_STATS */
1201
do_backing_store_page_in(uintptr_t location)1202 static inline void do_backing_store_page_in(uintptr_t location)
1203 {
1204 #ifdef CONFIG_DEMAND_MAPPING
1205 /* Check for special cases */
1206 switch (location) {
1207 case ARCH_UNPAGED_ANON_ZERO:
1208 memset(K_MEM_SCRATCH_PAGE, 0, CONFIG_MMU_PAGE_SIZE);
1209 __fallthrough;
1210 case ARCH_UNPAGED_ANON_UNINIT:
1211 /* nothing else to do */
1212 return;
1213 default:
1214 break;
1215 }
1216 #endif /* CONFIG_DEMAND_MAPPING */
1217
1218 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1219 uint32_t time_diff;
1220
1221 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1222 timing_t time_start, time_end;
1223
1224 time_start = timing_counter_get();
1225 #else
1226 uint32_t time_start;
1227
1228 time_start = k_cycle_get_32();
1229 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1230 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1231
1232 k_mem_paging_backing_store_page_in(location);
1233
1234 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1235 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1236 time_end = timing_counter_get();
1237 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1238 #else
1239 time_diff = k_cycle_get_32() - time_start;
1240 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1241
1242 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1243 time_diff);
1244 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1245 }
1246
do_backing_store_page_out(uintptr_t location)1247 static inline void do_backing_store_page_out(uintptr_t location)
1248 {
1249 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1250 uint32_t time_diff;
1251
1252 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1253 timing_t time_start, time_end;
1254
1255 time_start = timing_counter_get();
1256 #else
1257 uint32_t time_start;
1258
1259 time_start = k_cycle_get_32();
1260 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1261 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1262
1263 k_mem_paging_backing_store_page_out(location);
1264
1265 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1266 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1267 time_end = timing_counter_get();
1268 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1269 #else
1270 time_diff = k_cycle_get_32() - time_start;
1271 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1272
1273 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1274 time_diff);
1275 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1276 }
1277
1278 #if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ)
1279 /*
1280 * SMP support is very simple. Some resources such as the scratch page could
1281 * be made per CPU, backing store driver execution be confined to the faulting
1282 * CPU, statistics be made to cope with access concurrency, etc. But in the
1283 * end we're dealing with memory transfer to/from some external storage which
1284 * is inherently slow and whose access is most likely serialized anyway.
1285 * So let's simply enforce global demand paging serialization across all CPUs
1286 * with a mutex as there is no real gain from added parallelism here.
1287 */
1288 static K_MUTEX_DEFINE(z_mm_paging_lock);
1289 #endif
1290
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1291 static void virt_region_foreach(void *addr, size_t size,
1292 void (*func)(void *))
1293 {
1294 k_mem_assert_virtual_region(addr, size);
1295
1296 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1297 func((uint8_t *)addr + offset);
1298 }
1299 }
1300
1301 /*
1302 * Perform some preparatory steps before paging out. The provided page frame
1303 * must be evicted to the backing store immediately after this is called
1304 * with a call to k_mem_paging_backing_store_page_out() if it contains
1305 * a data page.
1306 *
1307 * - Map page frame to scratch area if requested. This always is true if we're
1308 * doing a page fault, but is only set on manual evictions if the page is
1309 * dirty.
1310 * - If mapped:
1311 * - obtain backing store location and populate location parameter
1312 * - Update page tables with location
1313 * - Mark page frame as busy
1314 *
1315 * Returns -ENOMEM if the backing store is full
1316 */
page_frame_prepare_locked(struct k_mem_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1317 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
1318 bool page_fault, uintptr_t *location_ptr)
1319 {
1320 uintptr_t phys;
1321 int ret;
1322 bool dirty = *dirty_ptr;
1323
1324 phys = k_mem_page_frame_to_phys(pf);
1325 __ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1326 phys);
1327
1328 /* If the backing store doesn't have a copy of the page, even if it
1329 * wasn't modified, treat as dirty. This can happen for a few
1330 * reasons:
1331 * 1) Page has never been swapped out before, and the backing store
1332 * wasn't pre-populated with this data page.
1333 * 2) Page was swapped out before, but the page contents were not
1334 * preserved after swapping back in.
1335 * 3) Page contents were preserved when swapped back in, but were later
1336 * evicted from the backing store to make room for other evicted
1337 * pages.
1338 */
1339 if (k_mem_page_frame_is_mapped(pf)) {
1340 dirty = dirty || !k_mem_page_frame_is_backed(pf);
1341 }
1342
1343 if (dirty || page_fault) {
1344 arch_mem_scratch(phys);
1345 }
1346
1347 if (k_mem_page_frame_is_mapped(pf)) {
1348 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1349 page_fault);
1350 if (ret != 0) {
1351 LOG_ERR("out of backing store memory");
1352 return -ENOMEM;
1353 }
1354 arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr);
1355
1356 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1357 k_mem_paging_eviction_remove(pf);
1358 }
1359 } else {
1360 /* Shouldn't happen unless this function is mis-used */
1361 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1362 }
1363 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1364 /* Mark as busy so that k_mem_page_frame_is_evictable() returns false */
1365 __ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1366 phys);
1367 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY);
1368 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1369 /* Update dirty parameter, since we set to true if it wasn't backed
1370 * even if otherwise clean
1371 */
1372 *dirty_ptr = dirty;
1373
1374 return 0;
1375 }
1376
do_mem_evict(void * addr)1377 static int do_mem_evict(void *addr)
1378 {
1379 bool dirty;
1380 struct k_mem_page_frame *pf;
1381 uintptr_t location;
1382 k_spinlock_key_t key;
1383 uintptr_t flags, phys;
1384 int ret;
1385
1386 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1387 __ASSERT(!k_is_in_isr(),
1388 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1389 __func__);
1390 #ifdef CONFIG_SMP
1391 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1392 #else
1393 k_sched_lock();
1394 #endif
1395 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1396 key = k_spin_lock(&z_mm_lock);
1397 flags = arch_page_info_get(addr, &phys, false);
1398 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1399 "address %p isn't mapped", addr);
1400 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1401 /* Un-mapped or already evicted. Nothing to do */
1402 ret = 0;
1403 goto out;
1404 }
1405
1406 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1407 pf = k_mem_phys_to_page_frame(phys);
1408 __ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch");
1409 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1410 if (ret != 0) {
1411 goto out;
1412 }
1413
1414 __ASSERT(ret == 0, "failed to prepare page frame");
1415 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1416 k_spin_unlock(&z_mm_lock, key);
1417 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1418 if (dirty) {
1419 do_backing_store_page_out(location);
1420 }
1421 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1422 key = k_spin_lock(&z_mm_lock);
1423 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1424 page_frame_free_locked(pf);
1425 out:
1426 k_spin_unlock(&z_mm_lock, key);
1427 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1428 #ifdef CONFIG_SMP
1429 k_mutex_unlock(&z_mm_paging_lock);
1430 #else
1431 k_sched_unlock();
1432 #endif
1433 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1434 return ret;
1435 }
1436
k_mem_page_out(void * addr,size_t size)1437 int k_mem_page_out(void *addr, size_t size)
1438 {
1439 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1440 addr);
1441 k_mem_assert_virtual_region(addr, size);
1442
1443 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1444 void *pos = (uint8_t *)addr + offset;
1445 int ret;
1446
1447 ret = do_mem_evict(pos);
1448 if (ret != 0) {
1449 return ret;
1450 }
1451 }
1452
1453 return 0;
1454 }
1455
k_mem_page_frame_evict(uintptr_t phys)1456 int k_mem_page_frame_evict(uintptr_t phys)
1457 {
1458 k_spinlock_key_t key;
1459 struct k_mem_page_frame *pf;
1460 bool dirty;
1461 uintptr_t flags;
1462 uintptr_t location;
1463 int ret;
1464
1465 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1466 __func__, phys);
1467
1468 /* Implementation is similar to do_page_fault() except there is no
1469 * data page to page-in, see comments in that function.
1470 */
1471
1472 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1473 __ASSERT(!k_is_in_isr(),
1474 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1475 __func__);
1476 #ifdef CONFIG_SMP
1477 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1478 #else
1479 k_sched_lock();
1480 #endif
1481 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1482 key = k_spin_lock(&z_mm_lock);
1483 pf = k_mem_phys_to_page_frame(phys);
1484 if (!k_mem_page_frame_is_mapped(pf)) {
1485 /* Nothing to do, free page */
1486 ret = 0;
1487 goto out;
1488 }
1489 flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
1490 /* Shouldn't ever happen */
1491 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1492 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1493 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1494 if (ret != 0) {
1495 goto out;
1496 }
1497
1498 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1499 k_spin_unlock(&z_mm_lock, key);
1500 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1501 if (dirty) {
1502 do_backing_store_page_out(location);
1503 }
1504 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1505 k_spin_unlock(&z_mm_lock, key);
1506 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1507 page_frame_free_locked(pf);
1508 out:
1509 k_spin_unlock(&z_mm_lock, key);
1510 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1511 #ifdef CONFIG_SMP
1512 k_mutex_unlock(&z_mm_paging_lock);
1513 #else
1514 k_sched_unlock();
1515 #endif
1516 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1517 return ret;
1518 }
1519
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1520 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1521 int key)
1522 {
1523 #ifdef CONFIG_DEMAND_PAGING_STATS
1524 bool is_irq_unlocked = arch_irq_unlocked(key);
1525
1526 paging_stats.pagefaults.cnt++;
1527
1528 if (is_irq_unlocked) {
1529 paging_stats.pagefaults.irq_unlocked++;
1530 } else {
1531 paging_stats.pagefaults.irq_locked++;
1532 }
1533
1534 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1535 faulting_thread->paging_stats.pagefaults.cnt++;
1536
1537 if (is_irq_unlocked) {
1538 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1539 } else {
1540 faulting_thread->paging_stats.pagefaults.irq_locked++;
1541 }
1542 #else
1543 ARG_UNUSED(faulting_thread);
1544 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1545
1546 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1547 if (k_is_in_isr()) {
1548 paging_stats.pagefaults.in_isr++;
1549
1550 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1551 faulting_thread->paging_stats.pagefaults.in_isr++;
1552 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1553 }
1554 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1555 #endif /* CONFIG_DEMAND_PAGING_STATS */
1556 }
1557
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1558 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1559 bool dirty)
1560 {
1561 #ifdef CONFIG_DEMAND_PAGING_STATS
1562 if (dirty) {
1563 paging_stats.eviction.dirty++;
1564 } else {
1565 paging_stats.eviction.clean++;
1566 }
1567 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1568 if (dirty) {
1569 faulting_thread->paging_stats.eviction.dirty++;
1570 } else {
1571 faulting_thread->paging_stats.eviction.clean++;
1572 }
1573 #else
1574 ARG_UNUSED(faulting_thread);
1575 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1576 #endif /* CONFIG_DEMAND_PAGING_STATS */
1577 }
1578
do_eviction_select(bool * dirty)1579 static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
1580 {
1581 struct k_mem_page_frame *pf;
1582
1583 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1584 uint32_t time_diff;
1585
1586 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1587 timing_t time_start, time_end;
1588
1589 time_start = timing_counter_get();
1590 #else
1591 uint32_t time_start;
1592
1593 time_start = k_cycle_get_32();
1594 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1595 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1596
1597 pf = k_mem_paging_eviction_select(dirty);
1598
1599 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1600 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1601 time_end = timing_counter_get();
1602 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1603 #else
1604 time_diff = k_cycle_get_32() - time_start;
1605 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1606
1607 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1608 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1609
1610 return pf;
1611 }
1612
do_page_fault(void * addr,bool pin)1613 static bool do_page_fault(void *addr, bool pin)
1614 {
1615 struct k_mem_page_frame *pf;
1616 k_spinlock_key_t key;
1617 uintptr_t page_in_location, page_out_location;
1618 enum arch_page_location status;
1619 bool result;
1620 bool dirty = false;
1621 struct k_thread *faulting_thread;
1622 int ret;
1623
1624 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1625 addr);
1626
1627 LOG_DBG("page fault at %p", addr);
1628
1629 /*
1630 * TODO: Add performance accounting:
1631 * - k_mem_paging_eviction_select() metrics
1632 * * periodic timer execution time histogram (if implemented)
1633 */
1634
1635 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1636 /*
1637 * We do re-enable interrupts during the page-in/page-out operation
1638 * if and only if interrupts were enabled when the exception was
1639 * taken; in this configuration page faults in an ISR are a bug; all
1640 * their code/data must be pinned.
1641 *
1642 * If interrupts were disabled when the exception was taken, the
1643 * arch code is responsible for keeping them that way when entering
1644 * this function.
1645 *
1646 * If this is not enabled, then interrupts are always locked for the
1647 * entire operation. This is far worse for system interrupt latency
1648 * but requires less pinned pages and ISRs may also take page faults.
1649 *
1650 * On UP we lock the scheduler so that other threads are never
1651 * scheduled during the page-in/out operation. Support for
1652 * allowing k_mem_paging_backing_store_page_out() and
1653 * k_mem_paging_backing_store_page_in() to also sleep and allow
1654 * other threads to run (such as in the case where the transfer is
1655 * async DMA) is not supported on UP. Even if limited to thread
1656 * context, arbitrary memory access triggering exceptions that put
1657 * a thread to sleep on a contended page fault operation will break
1658 * scheduling assumptions of cooperative threads or threads that
1659 * implement critical sections with spinlocks or disabling IRQs.
1660 *
1661 * On SMP, though, exclusivity cannot be assumed solely from being
1662 * a cooperative thread. Another thread with any prio may be running
1663 * on another CPU so exclusion must already be enforced by other
1664 * means. Therefore trying to prevent scheduling on SMP is pointless,
1665 * and k_sched_lock() is equivalent to a no-op on SMP anyway.
1666 * As a result, sleeping/rescheduling in the SMP case is fine.
1667 */
1668 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1669 #ifdef CONFIG_SMP
1670 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1671 #else
1672 k_sched_lock();
1673 #endif
1674 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1675
1676 key = k_spin_lock(&z_mm_lock);
1677 faulting_thread = arch_current_thread();
1678
1679 status = arch_page_location_get(addr, &page_in_location);
1680 if (status == ARCH_PAGE_LOCATION_BAD) {
1681 /* Return false to treat as a fatal error */
1682 result = false;
1683 goto out;
1684 }
1685 result = true;
1686
1687 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1688 if (pin) {
1689 /* It's a physical memory address */
1690 uintptr_t phys = page_in_location;
1691
1692 pf = k_mem_phys_to_page_frame(phys);
1693 if (!k_mem_page_frame_is_pinned(pf)) {
1694 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1695 k_mem_paging_eviction_remove(pf);
1696 }
1697 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1698 }
1699 }
1700
1701 /* This if-block is to pin the page if it is
1702 * already present in physical memory. There is
1703 * no need to go through the following code to
1704 * pull in the data pages. So skip to the end.
1705 */
1706 goto out;
1707 }
1708 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1709 "unexpected status value %d", status);
1710
1711 paging_stats_faults_inc(faulting_thread, key.key);
1712
1713 pf = free_page_frame_list_get();
1714 if (pf == NULL) {
1715 /* Need to evict a page frame */
1716 pf = do_eviction_select(&dirty);
1717 __ASSERT(pf != NULL, "failed to get a page frame");
1718 LOG_DBG("evicting %p at 0x%lx",
1719 k_mem_page_frame_to_virt(pf),
1720 k_mem_page_frame_to_phys(pf));
1721
1722 paging_stats_eviction_inc(faulting_thread, dirty);
1723 }
1724 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1725 __ASSERT(ret == 0, "failed to prepare page frame");
1726
1727 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1728 k_spin_unlock(&z_mm_lock, key);
1729 /* Interrupts are now unlocked if they were not locked when we entered
1730 * this function, and we may service ISRs. The scheduler is still
1731 * locked.
1732 */
1733 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1734 if (dirty) {
1735 do_backing_store_page_out(page_out_location);
1736 }
1737 do_backing_store_page_in(page_in_location);
1738
1739 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1740 key = k_spin_lock(&z_mm_lock);
1741 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
1742 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1743 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
1744 frame_mapped_set(pf, addr);
1745 if (pin) {
1746 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1747 }
1748
1749 arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf));
1750 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1751 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) && (!pin)) {
1752 k_mem_paging_eviction_add(pf);
1753 }
1754 out:
1755 k_spin_unlock(&z_mm_lock, key);
1756 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1757 #ifdef CONFIG_SMP
1758 k_mutex_unlock(&z_mm_paging_lock);
1759 #else
1760 k_sched_unlock();
1761 #endif
1762 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1763
1764 return result;
1765 }
1766
do_page_in(void * addr)1767 static void do_page_in(void *addr)
1768 {
1769 bool ret;
1770
1771 ret = do_page_fault(addr, false);
1772 __ASSERT(ret, "unmapped memory address %p", addr);
1773 (void)ret;
1774 }
1775
k_mem_page_in(void * addr,size_t size)1776 void k_mem_page_in(void *addr, size_t size)
1777 {
1778 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1779 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1780 __func__);
1781 virt_region_foreach(addr, size, do_page_in);
1782 }
1783
do_mem_pin(void * addr)1784 static void do_mem_pin(void *addr)
1785 {
1786 bool ret;
1787
1788 ret = do_page_fault(addr, true);
1789 __ASSERT(ret, "unmapped memory address %p", addr);
1790 (void)ret;
1791 }
1792
k_mem_pin(void * addr,size_t size)1793 void k_mem_pin(void *addr, size_t size)
1794 {
1795 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1796 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1797 __func__);
1798 virt_region_foreach(addr, size, do_mem_pin);
1799 }
1800
k_mem_page_fault(void * addr)1801 bool k_mem_page_fault(void *addr)
1802 {
1803 return do_page_fault(addr, false);
1804 }
1805
do_mem_unpin(void * addr)1806 static void do_mem_unpin(void *addr)
1807 {
1808 struct k_mem_page_frame *pf;
1809 k_spinlock_key_t key;
1810 uintptr_t flags, phys;
1811
1812 key = k_spin_lock(&z_mm_lock);
1813 flags = arch_page_info_get(addr, &phys, false);
1814 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1815 "invalid data page at %p", addr);
1816 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1817 pf = k_mem_phys_to_page_frame(phys);
1818 if (k_mem_page_frame_is_pinned(pf)) {
1819 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1820
1821 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1822 k_mem_paging_eviction_add(pf);
1823 }
1824 }
1825 }
1826 k_spin_unlock(&z_mm_lock, key);
1827 }
1828
k_mem_unpin(void * addr,size_t size)1829 void k_mem_unpin(void *addr, size_t size)
1830 {
1831 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1832 addr);
1833 virt_region_foreach(addr, size, do_mem_unpin);
1834 }
1835
1836 #endif /* CONFIG_DEMAND_PAGING */
1837