1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Routines for managing virtual address spaces
7 */
8
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/internal/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/sys/check.h>
20 #include <zephyr/sys/math_extras.h>
21 #include <zephyr/timing/timing.h>
22 #include <zephyr/arch/common/init.h>
23 #include <zephyr/logging/log.h>
24 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
25
26 #ifdef CONFIG_DEMAND_PAGING
27 #include <zephyr/kernel/mm/demand_paging.h>
28 #endif /* CONFIG_DEMAND_PAGING */
29
30 /*
31 * General terminology:
32 * - A page frame is a page-sized physical memory region in RAM. It is a
33 * container where a data page may be placed. It is always referred to by
34 * physical address. We have a convention of using uintptr_t for physical
35 * addresses. We instantiate a struct k_mem_page_frame to store metadata for
36 * every page frame.
37 *
38 * - A data page is a page-sized region of data. It may exist in a page frame,
39 * or be paged out to some backing store. Its location can always be looked
40 * up in the CPU's page tables (or equivalent) by virtual address.
41 * The data type will always be void * or in some cases uint8_t * when we
42 * want to do pointer arithmetic.
43 */
44
45 /* Spinlock to protect any globals in this file and serialize page table
46 * updates in arch code
47 */
48 struct k_spinlock z_mm_lock;
49
50 /*
51 * General page frame management
52 */
53
54 /* Database of all RAM page frames */
55 struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES];
56
57 #if __ASSERT_ON
58 /* Indicator that k_mem_page_frames has been initialized, many of these APIs do
59 * not work before POST_KERNEL
60 */
61 static bool page_frames_initialized;
62 #endif
63
64 /* Add colors to page table dumps to indicate mapping type */
65 #define COLOR_PAGE_FRAMES 1
66
67 #if COLOR_PAGE_FRAMES
68 #define ANSI_DEFAULT "\x1B" "[0m"
69 #define ANSI_RED "\x1B" "[1;31m"
70 #define ANSI_GREEN "\x1B" "[1;32m"
71 #define ANSI_YELLOW "\x1B" "[1;33m"
72 #define ANSI_BLUE "\x1B" "[1;34m"
73 #define ANSI_MAGENTA "\x1B" "[1;35m"
74 #define ANSI_CYAN "\x1B" "[1;36m"
75 #define ANSI_GREY "\x1B" "[1;90m"
76
77 #define COLOR(x) printk(_CONCAT(ANSI_, x))
78 #else
79 #define COLOR(x) do { } while (false)
80 #endif /* COLOR_PAGE_FRAMES */
81
82 /* LCOV_EXCL_START */
page_frame_dump(struct k_mem_page_frame * pf)83 static void page_frame_dump(struct k_mem_page_frame *pf)
84 {
85 if (k_mem_page_frame_is_free(pf)) {
86 COLOR(GREY);
87 printk("-");
88 } else if (k_mem_page_frame_is_reserved(pf)) {
89 COLOR(CYAN);
90 printk("R");
91 } else if (k_mem_page_frame_is_busy(pf)) {
92 COLOR(MAGENTA);
93 printk("B");
94 } else if (k_mem_page_frame_is_pinned(pf)) {
95 COLOR(YELLOW);
96 printk("P");
97 } else if (k_mem_page_frame_is_available(pf)) {
98 COLOR(GREY);
99 printk(".");
100 } else if (k_mem_page_frame_is_mapped(pf)) {
101 COLOR(DEFAULT);
102 printk("M");
103 } else {
104 COLOR(RED);
105 printk("?");
106 }
107 }
108
k_mem_page_frames_dump(void)109 void k_mem_page_frames_dump(void)
110 {
111 int column = 0;
112
113 __ASSERT(page_frames_initialized, "%s called too early", __func__);
114 printk("Physical memory from 0x%lx to 0x%lx\n",
115 K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END);
116
117 for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) {
118 struct k_mem_page_frame *pf = &k_mem_page_frames[i];
119
120 page_frame_dump(pf);
121
122 column++;
123 if (column == 64) {
124 column = 0;
125 printk("\n");
126 }
127 }
128
129 COLOR(DEFAULT);
130 if (column != 0) {
131 printk("\n");
132 }
133 }
134 /* LCOV_EXCL_STOP */
135
136 #define VIRT_FOREACH(_base, _size, _pos) \
137 for ((_pos) = (_base); \
138 (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
139
140 #define PHYS_FOREACH(_base, _size, _pos) \
141 for ((_pos) = (_base); \
142 (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
143
144
145 /*
146 * Virtual address space management
147 *
148 * Call all of these functions with z_mm_lock held.
149 *
150 * Overall virtual memory map: When the kernel starts, it resides in
151 * virtual memory in the region K_MEM_KERNEL_VIRT_START to
152 * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
153 * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
154 *
155 * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
156 * but have a mapping for all RAM in place. This is for special architectural
157 * purposes and does not otherwise affect page frame accounting or flags;
158 * the only guarantee is that such RAM mapping outside of the Zephyr image
159 * won't be disturbed by subsequent memory mapping calls.
160 *
161 * +--------------+ <- K_MEM_VIRT_RAM_START
162 * | Undefined VM | <- May contain ancillary regions like x86_64's locore
163 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
164 * | Mapping for |
165 * | main kernel |
166 * | image |
167 * | |
168 * | |
169 * +--------------+ <- K_MEM_VM_FREE_START
170 * | |
171 * | Unused, |
172 * | Available VM |
173 * | |
174 * |..............| <- mapping_pos (grows downward as more mappings are made)
175 * | Mapping |
176 * +--------------+
177 * | Mapping |
178 * +--------------+
179 * | ... |
180 * +--------------+
181 * | Mapping |
182 * +--------------+ <- mappings start here
183 * | Reserved | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED
184 * +--------------+ <- K_MEM_VIRT_RAM_END
185 */
186
187 /* Bitmap of virtual addresses where one bit corresponds to one page.
188 * This is being used for virt_region_alloc() to figure out which
189 * region of virtual addresses can be used for memory mapping.
190 *
191 * Note that bit #0 is the highest address so that allocation is
192 * done in reverse from highest address.
193 */
194 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
195 CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
196
197 static bool virt_region_inited;
198
199 #define Z_VIRT_REGION_START_ADDR K_MEM_VM_FREE_START
200 #define Z_VIRT_REGION_END_ADDR (K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED)
201
virt_from_bitmap_offset(size_t offset,size_t size)202 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
203 {
204 return POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
205 - (offset * CONFIG_MMU_PAGE_SIZE) - size;
206 }
207
virt_to_bitmap_offset(void * vaddr,size_t size)208 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
209 {
210 return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
211 - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
212 }
213
virt_region_init(void)214 static void virt_region_init(void)
215 {
216 size_t offset, num_bits;
217
218 /* There are regions where we should never map via
219 * k_mem_map() and k_mem_map_phys_bare(). Mark them as
220 * already allocated so they will never be used.
221 */
222
223 if (K_MEM_VM_RESERVED > 0) {
224 /* Mark reserved region at end of virtual address space */
225 num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
226 (void)sys_bitarray_set_region(&virt_region_bitmap,
227 num_bits, 0);
228 }
229
230 /* Mark all bits up to Z_FREE_VM_START as allocated */
231 num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START)
232 - POINTER_TO_UINT(K_MEM_VIRT_RAM_START);
233 offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits);
234 num_bits /= CONFIG_MMU_PAGE_SIZE;
235 (void)sys_bitarray_set_region(&virt_region_bitmap,
236 num_bits, offset);
237
238 virt_region_inited = true;
239 }
240
virt_region_free(void * vaddr,size_t size)241 static void virt_region_free(void *vaddr, size_t size)
242 {
243 size_t offset, num_bits;
244 uint8_t *vaddr_u8 = (uint8_t *)vaddr;
245
246 if (unlikely(!virt_region_inited)) {
247 virt_region_init();
248 }
249
250 #ifndef CONFIG_KERNEL_DIRECT_MAP
251 /* Without the need to support K_MEM_DIRECT_MAP, the region must be
252 * able to be represented in the bitmap. So this case is
253 * simple.
254 */
255
256 __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
257 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
258 "invalid virtual address region %p (%zu)", vaddr_u8, size);
259 if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
260 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
261 return;
262 }
263
264 offset = virt_to_bitmap_offset(vaddr, size);
265 num_bits = size / CONFIG_MMU_PAGE_SIZE;
266 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
267 #else /* !CONFIG_KERNEL_DIRECT_MAP */
268 /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
269 * memory space, wholly within it, or overlap partially.
270 * So additional processing is needed to make sure we only
271 * mark the pages within the bitmap.
272 */
273 if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
274 (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
275 (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
276 ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
277 uint8_t *adjusted_start = max(vaddr_u8, Z_VIRT_REGION_START_ADDR);
278 uint8_t *adjusted_end = min(vaddr_u8 + size,
279 Z_VIRT_REGION_END_ADDR);
280 size_t adjusted_sz = adjusted_end - adjusted_start;
281
282 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
283 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
284 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
285 }
286 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
287 }
288
virt_region_alloc(size_t size,size_t align)289 static void *virt_region_alloc(size_t size, size_t align)
290 {
291 uintptr_t dest_addr;
292 size_t alloc_size;
293 size_t offset;
294 size_t num_bits;
295 int ret;
296
297 if (unlikely(!virt_region_inited)) {
298 virt_region_init();
299 }
300
301 /* Possibly request more pages to ensure we can get an aligned virtual address */
302 num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
303 alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
304 ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
305 if (ret != 0) {
306 LOG_ERR("insufficient virtual address space (requested %zu)",
307 size);
308 return NULL;
309 }
310
311 /* Remember that bit #0 in bitmap corresponds to the highest
312 * virtual address. So here we need to go downwards (backwards?)
313 * to get the starting address of the allocated region.
314 */
315 dest_addr = virt_from_bitmap_offset(offset, alloc_size);
316
317 if (alloc_size > size) {
318 uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
319
320 /* Here is the memory organization when trying to get an aligned
321 * virtual address:
322 *
323 * +--------------+ <- K_MEM_VIRT_RAM_START
324 * | Undefined VM |
325 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
326 * | Mapping for |
327 * | main kernel |
328 * | image |
329 * | |
330 * | |
331 * +--------------+ <- K_MEM_VM_FREE_START
332 * | ... |
333 * +==============+ <- dest_addr
334 * | Unused |
335 * |..............| <- aligned_dest_addr
336 * | |
337 * | Aligned |
338 * | Mapping |
339 * | |
340 * |..............| <- aligned_dest_addr + size
341 * | Unused |
342 * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size
343 * | ... |
344 * +--------------+
345 * | Mapping |
346 * +--------------+
347 * | Reserved |
348 * +--------------+ <- K_MEM_VIRT_RAM_END
349 */
350
351 /* Free the two unused regions */
352 virt_region_free(UINT_TO_POINTER(dest_addr),
353 aligned_dest_addr - dest_addr);
354 if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
355 virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
356 (dest_addr + alloc_size) - (aligned_dest_addr + size));
357 }
358
359 dest_addr = aligned_dest_addr;
360 }
361
362 /* Need to make sure this does not step into kernel memory */
363 if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
364 (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
365 return NULL;
366 }
367
368 return UINT_TO_POINTER(dest_addr);
369 }
370
371 /*
372 * Free page frames management
373 *
374 * Call all of these functions with z_mm_lock held.
375 */
376
377 /* Linked list of unused and available page frames.
378 *
379 * TODO: This is very simple and treats all free page frames as being equal.
380 * However, there are use-cases to consolidate free pages such that entire
381 * SRAM banks can be switched off to save power, and so obtaining free pages
382 * may require a more complex ontology which prefers page frames in RAM banks
383 * which are still active.
384 *
385 * This implies in the future there may be multiple slists managing physical
386 * pages. Each page frame will still just have one snode link.
387 */
388 static sys_sflist_t free_page_frame_list;
389
390 /* Number of unused and available free page frames.
391 * This information may go stale immediately.
392 */
393 static size_t z_free_page_count;
394
395 #define PF_ASSERT(pf, expr, fmt, ...) \
396 __ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \
397 ##__VA_ARGS__)
398
399 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)400 static struct k_mem_page_frame *free_page_frame_list_get(void)
401 {
402 sys_sfnode_t *node;
403 struct k_mem_page_frame *pf = NULL;
404
405 node = sys_sflist_get(&free_page_frame_list);
406 if (node != NULL) {
407 z_free_page_count--;
408 pf = CONTAINER_OF(node, struct k_mem_page_frame, node);
409 PF_ASSERT(pf, k_mem_page_frame_is_free(pf),
410 "on free list but not free");
411 pf->va_and_flags = 0;
412 }
413
414 return pf;
415 }
416
417 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct k_mem_page_frame * pf)418 static void free_page_frame_list_put(struct k_mem_page_frame *pf)
419 {
420 PF_ASSERT(pf, k_mem_page_frame_is_available(pf),
421 "unavailable page put on free list");
422
423 sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE);
424 sys_sflist_append(&free_page_frame_list, &pf->node);
425 z_free_page_count++;
426 }
427
free_page_frame_list_init(void)428 static void free_page_frame_list_init(void)
429 {
430 sys_sflist_init(&free_page_frame_list);
431 }
432
page_frame_free_locked(struct k_mem_page_frame * pf)433 static void page_frame_free_locked(struct k_mem_page_frame *pf)
434 {
435 pf->va_and_flags = 0;
436 free_page_frame_list_put(pf);
437 }
438
439 /*
440 * Memory Mapping
441 */
442
443 /* Called after the frame is mapped in the arch layer, to update our
444 * local ontology (and do some assertions while we're at it)
445 */
frame_mapped_set(struct k_mem_page_frame * pf,void * addr)446 static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr)
447 {
448 PF_ASSERT(pf, !k_mem_page_frame_is_free(pf),
449 "attempted to map a page frame on the free list");
450 PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf),
451 "attempted to map a reserved page frame");
452
453 /* We do allow multiple mappings for pinned page frames
454 * since we will never need to reverse map them.
455 * This is uncommon, use-cases are for things like the
456 * Zephyr equivalent of VSDOs
457 */
458 PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf),
459 "non-pinned and already mapped to %p",
460 k_mem_page_frame_to_virt(pf));
461
462 uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;
463 uintptr_t va = (uintptr_t)addr & ~flags_mask;
464
465 pf->va_and_flags &= flags_mask;
466 pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED;
467 }
468
469 /* LCOV_EXCL_START */
470 /* Go through page frames to find the physical address mapped
471 * by a virtual address.
472 *
473 * @param[in] virt Virtual Address
474 * @param[out] phys Physical address mapped to the input virtual address
475 * if such mapping exists.
476 *
477 * @retval 0 if mapping is found and valid
478 * @retval -EFAULT if virtual address is not mapped
479 */
virt_to_page_frame(void * virt,uintptr_t * phys)480 static int virt_to_page_frame(void *virt, uintptr_t *phys)
481 {
482 uintptr_t paddr;
483 struct k_mem_page_frame *pf;
484 int ret = -EFAULT;
485
486 K_MEM_PAGE_FRAME_FOREACH(paddr, pf) {
487 if (k_mem_page_frame_is_mapped(pf)) {
488 if (virt == k_mem_page_frame_to_virt(pf)) {
489 ret = 0;
490 if (phys != NULL) {
491 *phys = k_mem_page_frame_to_phys(pf);
492 }
493 break;
494 }
495 }
496 }
497
498 return ret;
499 }
500 /* LCOV_EXCL_STOP */
501
502 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
503
504 #ifdef CONFIG_DEMAND_PAGING
505 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
506 bool page_in, uintptr_t *location_ptr);
507
508 static inline void do_backing_store_page_in(uintptr_t location);
509 static inline void do_backing_store_page_out(uintptr_t location);
510 #endif /* CONFIG_DEMAND_PAGING */
511
512 /* Allocate a free page frame, and map it to a specified virtual address
513 *
514 * TODO: Add optional support for copy-on-write mappings to a zero page instead
515 * of allocating, in which case page frames will be allocated lazily as
516 * the mappings to the zero page get touched. This will avoid expensive
517 * page-ins as memory is mapped and physical RAM or backing store storage will
518 * not be used if the mapped memory is unused. The cost is an empty physical
519 * page of zeroes.
520 */
map_anon_page(void * addr,uint32_t flags)521 static int map_anon_page(void *addr, uint32_t flags)
522 {
523 struct k_mem_page_frame *pf;
524 uintptr_t phys;
525 bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
526
527 pf = free_page_frame_list_get();
528 if (pf == NULL) {
529 #ifdef CONFIG_DEMAND_PAGING
530 uintptr_t location;
531 bool dirty;
532 int ret;
533
534 pf = k_mem_paging_eviction_select(&dirty);
535 __ASSERT(pf != NULL, "failed to get a page frame");
536 LOG_DBG("evicting %p at 0x%lx",
537 k_mem_page_frame_to_virt(pf),
538 k_mem_page_frame_to_phys(pf));
539 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
540 if (ret != 0) {
541 return -ENOMEM;
542 }
543 if (dirty) {
544 do_backing_store_page_out(location);
545 }
546 pf->va_and_flags = 0;
547 #else
548 return -ENOMEM;
549 #endif /* CONFIG_DEMAND_PAGING */
550 }
551
552 phys = k_mem_page_frame_to_phys(pf);
553 arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags);
554
555 if (lock) {
556 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
557 }
558 frame_mapped_set(pf, addr);
559 #ifdef CONFIG_DEMAND_PAGING
560 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) && (!lock)) {
561 k_mem_paging_eviction_add(pf);
562 }
563 #endif
564
565 LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
566
567 return 0;
568 }
569
k_mem_map_phys_guard(uintptr_t phys,size_t size,uint32_t flags,bool is_anon)570 void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon)
571 {
572 uint8_t *dst;
573 size_t total_size;
574 int ret;
575 k_spinlock_key_t key;
576 uint8_t *pos;
577 bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
578
579 __ASSERT(!is_anon || (is_anon && page_frames_initialized),
580 "%s called too early", __func__);
581 __ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
582 "%s does not support explicit cache settings", __func__);
583
584 if (((flags & K_MEM_PERM_USER) != 0U) &&
585 ((flags & K_MEM_MAP_UNINIT) != 0U)) {
586 LOG_ERR("user access to anonymous uninitialized pages is forbidden");
587 return NULL;
588 }
589 if ((size % CONFIG_MMU_PAGE_SIZE) != 0U) {
590 LOG_ERR("unaligned size %zu passed to %s", size, __func__);
591 return NULL;
592 }
593 if (size == 0) {
594 LOG_ERR("zero sized memory mapping");
595 return NULL;
596 }
597
598 /* Need extra for the guard pages (before and after) which we
599 * won't map.
600 */
601 if (size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) {
602 LOG_ERR("too large size %zu passed to %s", size, __func__);
603 return NULL;
604 }
605
606 key = k_spin_lock(&z_mm_lock);
607
608 dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
609 if (dst == NULL) {
610 /* Address space has no free region */
611 goto out;
612 }
613
614 /* Unmap both guard pages to make sure accessing them
615 * will generate fault.
616 */
617 arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
618 arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
619 CONFIG_MMU_PAGE_SIZE);
620
621 /* Skip over the "before" guard page in returned address. */
622 dst += CONFIG_MMU_PAGE_SIZE;
623
624 if (is_anon) {
625 /* Mapping from anonymous memory */
626 flags |= K_MEM_CACHE_WB;
627 #ifdef CONFIG_DEMAND_MAPPING
628 if ((flags & K_MEM_MAP_LOCK) == 0) {
629 flags |= K_MEM_MAP_UNPAGED;
630 VIRT_FOREACH(dst, size, pos) {
631 arch_mem_map(pos,
632 uninit ? ARCH_UNPAGED_ANON_UNINIT
633 : ARCH_UNPAGED_ANON_ZERO,
634 CONFIG_MMU_PAGE_SIZE, flags);
635 }
636 LOG_DBG("memory mapping anon pages %p to %p unpaged", dst, pos-1);
637 /* skip the memset() below */
638 uninit = true;
639 } else
640 #endif
641 {
642 VIRT_FOREACH(dst, size, pos) {
643 ret = map_anon_page(pos, flags);
644
645 if (ret != 0) {
646 /* TODO:
647 * call k_mem_unmap(dst, pos - dst)
648 * when implemented in #28990 and
649 * release any guard virtual page as well.
650 */
651 dst = NULL;
652 goto out;
653 }
654 }
655 }
656 } else {
657 /* Mapping known physical memory.
658 *
659 * arch_mem_map() is a void function and does not return
660 * anything. Arch code usually uses ASSERT() to catch
661 * mapping errors. Assume this works correctly for now.
662 */
663 arch_mem_map(dst, phys, size, flags);
664 }
665
666 out:
667 k_spin_unlock(&z_mm_lock, key);
668
669 if (dst != NULL && !uninit) {
670 /* If we later implement mappings to a copy-on-write
671 * zero page, won't need this step
672 */
673 memset(dst, 0, size);
674 }
675
676 return dst;
677 }
678
k_mem_unmap_phys_guard(void * addr,size_t size,bool is_anon)679 void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon)
680 {
681 uintptr_t phys;
682 uint8_t *pos;
683 struct k_mem_page_frame *pf;
684 k_spinlock_key_t key;
685 size_t total_size;
686 int ret;
687
688 /* Need space for the "before" guard page */
689 __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
690
691 /* Make sure address range is still valid after accounting
692 * for two guard pages.
693 */
694 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
695 k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
696
697 key = k_spin_lock(&z_mm_lock);
698
699 /* Check if both guard pages are unmapped.
700 * Bail if not, as this is probably a region not mapped
701 * using k_mem_map().
702 */
703 pos = addr;
704 ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
705 if (ret == 0) {
706 __ASSERT(ret == 0,
707 "%s: cannot find preceding guard page for (%p, %zu)",
708 __func__, addr, size);
709 goto out;
710 }
711
712 ret = arch_page_phys_get(pos + size, NULL);
713 if (ret == 0) {
714 __ASSERT(ret == 0,
715 "%s: cannot find succeeding guard page for (%p, %zu)",
716 __func__, addr, size);
717 goto out;
718 }
719
720 if (is_anon) {
721 /* Unmapping anonymous memory */
722 VIRT_FOREACH(addr, size, pos) {
723 #ifdef CONFIG_DEMAND_PAGING
724 enum arch_page_location status;
725 uintptr_t location;
726
727 status = arch_page_location_get(pos, &location);
728 switch (status) {
729 case ARCH_PAGE_LOCATION_PAGED_OUT:
730 /*
731 * No pf is associated with this mapping.
732 * Simply get rid of the MMU entry and free
733 * corresponding backing store.
734 */
735 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
736 k_mem_paging_backing_store_location_free(location);
737 continue;
738 case ARCH_PAGE_LOCATION_PAGED_IN:
739 /*
740 * The page is in memory but it may not be
741 * accessible in order to manage tracking
742 * of the ARCH_DATA_PAGE_ACCESSED flag
743 * meaning arch_page_phys_get() could fail.
744 * Still, we know the actual phys address.
745 */
746 phys = location;
747 ret = 0;
748 break;
749 default:
750 ret = arch_page_phys_get(pos, &phys);
751 break;
752 }
753 #else
754 ret = arch_page_phys_get(pos, &phys);
755 #endif
756 __ASSERT(ret == 0,
757 "%s: cannot unmap an unmapped address %p",
758 __func__, pos);
759 if (ret != 0) {
760 /* Found an address not mapped. Do not continue. */
761 goto out;
762 }
763
764 __ASSERT(k_mem_is_page_frame(phys),
765 "%s: 0x%lx is not a page frame", __func__, phys);
766 if (!k_mem_is_page_frame(phys)) {
767 /* Physical address has no corresponding page frame
768 * description in the page frame array.
769 * This should not happen. Do not continue.
770 */
771 goto out;
772 }
773
774 /* Grab the corresponding page frame from physical address */
775 pf = k_mem_phys_to_page_frame(phys);
776
777 __ASSERT(k_mem_page_frame_is_mapped(pf),
778 "%s: 0x%lx is not a mapped page frame", __func__, phys);
779 if (!k_mem_page_frame_is_mapped(pf)) {
780 /* Page frame is not marked mapped.
781 * This should not happen. Do not continue.
782 */
783 goto out;
784 }
785
786 arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
787 #ifdef CONFIG_DEMAND_PAGING
788 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) &&
789 (!k_mem_page_frame_is_pinned(pf))) {
790 k_mem_paging_eviction_remove(pf);
791 }
792 #endif
793
794 /* Put the page frame back into free list */
795 page_frame_free_locked(pf);
796 }
797 } else {
798 /*
799 * Unmapping previous mapped memory with specific physical address.
800 *
801 * Note that we don't have to unmap the guard pages, as they should
802 * have been unmapped. We just need to unmapped the in-between
803 * region [addr, (addr + size)).
804 */
805 arch_mem_unmap(addr, size);
806 }
807
808 /* There are guard pages just before and after the mapped
809 * region. So we also need to free them from the bitmap.
810 */
811 pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
812 total_size = size + (CONFIG_MMU_PAGE_SIZE * 2);
813 virt_region_free(pos, total_size);
814
815 out:
816 k_spin_unlock(&z_mm_lock, key);
817 }
818
k_mem_update_flags(void * addr,size_t size,uint32_t flags)819 int k_mem_update_flags(void *addr, size_t size, uint32_t flags)
820 {
821 uintptr_t phys;
822 k_spinlock_key_t key;
823 int ret;
824
825 k_mem_assert_virtual_region(addr, size);
826
827 key = k_spin_lock(&z_mm_lock);
828
829 /*
830 * We can achieve desired result without explicit architecture support
831 * by unmapping and remapping the same physical memory using new flags.
832 */
833
834 ret = arch_page_phys_get(addr, &phys);
835 if (ret < 0) {
836 goto out;
837 }
838
839 /* TODO: detect and handle paged-out memory as well */
840
841 arch_mem_unmap(addr, size);
842 arch_mem_map(addr, phys, size, flags);
843
844 out:
845 k_spin_unlock(&z_mm_lock, key);
846 return ret;
847 }
848
k_mem_free_get(void)849 size_t k_mem_free_get(void)
850 {
851 size_t ret;
852 k_spinlock_key_t key;
853
854 __ASSERT(page_frames_initialized, "%s called too early", __func__);
855
856 key = k_spin_lock(&z_mm_lock);
857 #ifdef CONFIG_DEMAND_PAGING
858 if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
859 ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
860 } else {
861 ret = 0;
862 }
863 #else
864 ret = z_free_page_count;
865 #endif /* CONFIG_DEMAND_PAGING */
866 k_spin_unlock(&z_mm_lock, key);
867
868 return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
869 }
870
871 /* Get the default virtual region alignment, here the default MMU page size
872 *
873 * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
874 * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
875 *
876 * @retval alignment to apply on the virtual address of this region
877 */
virt_region_align(uintptr_t phys,size_t size)878 static size_t virt_region_align(uintptr_t phys, size_t size)
879 {
880 ARG_UNUSED(phys);
881 ARG_UNUSED(size);
882
883 return CONFIG_MMU_PAGE_SIZE;
884 }
885
886 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
887
888 /* This may be called from arch early boot code before z_cstart() is invoked.
889 * Data will be copied and BSS zeroed, but this must not rely on any
890 * initialization functions being called prior to work correctly.
891 */
k_mem_map_phys_bare(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)892 void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
893 {
894 uintptr_t aligned_phys, addr_offset;
895 size_t aligned_size, align_boundary;
896 k_spinlock_key_t key;
897 uint8_t *dest_addr;
898 size_t num_bits;
899 size_t offset;
900
901 #ifndef CONFIG_KERNEL_DIRECT_MAP
902 __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
903 #endif /* CONFIG_KERNEL_DIRECT_MAP */
904 addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
905 phys, size,
906 CONFIG_MMU_PAGE_SIZE);
907 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
908 __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
909 "wraparound for physical address 0x%lx (size %zu)",
910 aligned_phys, aligned_size);
911
912 align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
913
914 key = k_spin_lock(&z_mm_lock);
915
916 if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
917 (flags & K_MEM_DIRECT_MAP)) {
918 dest_addr = (uint8_t *)aligned_phys;
919
920 /* Mark the region of virtual memory bitmap as used
921 * if the region overlaps the virtual memory space.
922 *
923 * Basically if either end of region is within
924 * virtual memory space, we need to mark the bits.
925 */
926
927 if (IN_RANGE(aligned_phys,
928 (uintptr_t)K_MEM_VIRT_RAM_START,
929 (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) ||
930 IN_RANGE(aligned_phys + aligned_size - 1,
931 (uintptr_t)K_MEM_VIRT_RAM_START,
932 (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) {
933 uint8_t *adjusted_start = max(dest_addr, K_MEM_VIRT_RAM_START);
934 uint8_t *adjusted_end = min(dest_addr + aligned_size,
935 K_MEM_VIRT_RAM_END);
936 size_t adjusted_sz = adjusted_end - adjusted_start;
937
938 num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
939 offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
940 if (sys_bitarray_test_and_set_region(
941 &virt_region_bitmap, num_bits, offset, true)) {
942 goto fail;
943 }
944 }
945 } else {
946 /* Obtain an appropriately sized chunk of virtual memory */
947 dest_addr = virt_region_alloc(aligned_size, align_boundary);
948 if (!dest_addr) {
949 goto fail;
950 }
951 }
952
953 /* If this fails there's something amiss with virt_region_get */
954 __ASSERT((uintptr_t)dest_addr <
955 ((uintptr_t)dest_addr + (size - 1)),
956 "wraparound for virtual address %p (size %zu)",
957 dest_addr, size);
958
959 LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", (void *)dest_addr,
960 aligned_phys, aligned_size, flags, addr_offset);
961
962 arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
963 k_spin_unlock(&z_mm_lock, key);
964
965 *virt_ptr = dest_addr + addr_offset;
966 return;
967 fail:
968 /* May re-visit this in the future, but for now running out of
969 * virtual address space or failing the arch_mem_map() call is
970 * an unrecoverable situation.
971 *
972 * Other problems not related to resource exhaustion we leave as
973 * assertions since they are clearly programming mistakes.
974 */
975 LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
976 phys, size, flags);
977 k_panic();
978 }
979
k_mem_unmap_phys_bare(uint8_t * virt,size_t size)980 void k_mem_unmap_phys_bare(uint8_t *virt, size_t size)
981 {
982 uintptr_t aligned_virt, addr_offset;
983 size_t aligned_size;
984 k_spinlock_key_t key;
985
986 addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
987 POINTER_TO_UINT(virt), size,
988 CONFIG_MMU_PAGE_SIZE);
989 __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
990 __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
991 "wraparound for virtual address 0x%lx (size %zu)",
992 aligned_virt, aligned_size);
993
994 key = k_spin_lock(&z_mm_lock);
995
996 LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
997 aligned_virt, aligned_size, addr_offset);
998
999 arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
1000 virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
1001 k_spin_unlock(&z_mm_lock, key);
1002 }
1003
1004 /*
1005 * Miscellaneous
1006 */
1007
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)1008 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
1009 uintptr_t addr, size_t size, size_t align)
1010 {
1011 size_t addr_offset;
1012
1013 /* The actual mapped region must be page-aligned. Round down the
1014 * physical address and pad the region size appropriately
1015 */
1016 *aligned_addr = ROUND_DOWN(addr, align);
1017 addr_offset = addr - *aligned_addr;
1018 *aligned_size = ROUND_UP(size + addr_offset, align);
1019
1020 return addr_offset;
1021 }
1022
1023 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)1024 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
1025 bool pin)
1026 {
1027 struct k_mem_page_frame *pf;
1028 uint8_t *addr;
1029
1030 uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
1031 CONFIG_MMU_PAGE_SIZE);
1032 uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
1033 CONFIG_MMU_PAGE_SIZE);
1034 size_t pinned_size = pinned_end - pinned_start;
1035
1036 VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
1037 {
1038 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1039 frame_mapped_set(pf, addr);
1040
1041 if (pin) {
1042 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1043 } else {
1044 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1045 #ifdef CONFIG_DEMAND_PAGING
1046 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) &&
1047 k_mem_page_frame_is_evictable(pf)) {
1048 k_mem_paging_eviction_add(pf);
1049 }
1050 #endif
1051 }
1052 }
1053 }
1054 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
1055
1056 #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
z_paging_ondemand_section_map(void)1057 static void z_paging_ondemand_section_map(void)
1058 {
1059 uint8_t *addr;
1060 size_t size;
1061 uintptr_t location;
1062 uint32_t flags;
1063
1064 size = (uintptr_t)lnkr_ondemand_text_size;
1065 flags = K_MEM_MAP_UNPAGED | K_MEM_PERM_EXEC | K_MEM_CACHE_WB;
1066 VIRT_FOREACH(lnkr_ondemand_text_start, size, addr) {
1067 k_mem_paging_backing_store_location_query(addr, &location);
1068 arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1069 sys_bitarray_set_region(&virt_region_bitmap, 1,
1070 virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1071 }
1072
1073 size = (uintptr_t)lnkr_ondemand_rodata_size;
1074 flags = K_MEM_MAP_UNPAGED | K_MEM_CACHE_WB;
1075 VIRT_FOREACH(lnkr_ondemand_rodata_start, size, addr) {
1076 k_mem_paging_backing_store_location_query(addr, &location);
1077 arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1078 sys_bitarray_set_region(&virt_region_bitmap, 1,
1079 virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1080 }
1081 }
1082 #endif /* CONFIG_LINKER_USE_ONDEMAND_SECTION */
1083
z_mem_manage_init(void)1084 void z_mem_manage_init(void)
1085 {
1086 uintptr_t phys;
1087 uint8_t *addr;
1088 struct k_mem_page_frame *pf;
1089 k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
1090
1091 free_page_frame_list_init();
1092
1093 ARG_UNUSED(addr);
1094
1095 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
1096 /* If some page frames are unavailable for use as memory, arch
1097 * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags
1098 */
1099 arch_reserved_pages_update();
1100 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
1101
1102 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1103 /* All pages composing the Zephyr image are mapped at boot in a
1104 * predictable way. This can change at runtime.
1105 */
1106 VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr)
1107 {
1108 pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1109 frame_mapped_set(pf, addr);
1110
1111 /* TODO: for now we pin the whole Zephyr image. Demand paging
1112 * currently tested with anonymously-mapped pages which are not
1113 * pinned.
1114 *
1115 * We will need to setup linker regions for a subset of kernel
1116 * code/data pages which are pinned in memory and
1117 * may not be evicted. This will contain critical CPU data
1118 * structures, and any code used to perform page fault
1119 * handling, page-ins, etc.
1120 */
1121 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1122 }
1123 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1124
1125 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1126 /* Pin the boot section to prevent it from being swapped out during
1127 * boot process. Will be un-pinned once boot process completes.
1128 */
1129 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
1130 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1131
1132 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
1133 /* Pin the page frames correspondng to the pinned symbols */
1134 mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
1135 #endif /* CONFIG_LINKER_USE_PINNED_SECTION */
1136
1137 /* Any remaining pages that aren't mapped, reserved, or pinned get
1138 * added to the free pages list
1139 */
1140 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1141 if (k_mem_page_frame_is_available(pf)) {
1142 free_page_frame_list_put(pf);
1143 }
1144 }
1145 LOG_DBG("free page frames: %zu", z_free_page_count);
1146
1147 #ifdef CONFIG_DEMAND_PAGING
1148 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1149 z_paging_histogram_init();
1150 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1151 k_mem_paging_backing_store_init();
1152 k_mem_paging_eviction_init();
1153
1154 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1155 /* start tracking evictable page installed above if any */
1156 K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1157 if (k_mem_page_frame_is_evictable(pf)) {
1158 k_mem_paging_eviction_add(pf);
1159 }
1160 }
1161 }
1162 #endif /* CONFIG_DEMAND_PAGING */
1163
1164 #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
1165 z_paging_ondemand_section_map();
1166 #endif
1167
1168 #if __ASSERT_ON
1169 page_frames_initialized = true;
1170 #endif
1171 k_spin_unlock(&z_mm_lock, key);
1172
1173 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1174 /* If BSS section is not present in memory at boot,
1175 * it would not have been cleared. This needs to be
1176 * done now since paging mechanism has been initialized
1177 * and the BSS pages can be brought into physical
1178 * memory to be cleared.
1179 */
1180 arch_bss_zero();
1181 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1182 }
1183
z_mem_manage_boot_finish(void)1184 void z_mem_manage_boot_finish(void)
1185 {
1186 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1187 /* At the end of boot process, unpin the boot sections
1188 * as they don't need to be in memory all the time anymore.
1189 */
1190 mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
1191 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1192 }
1193
1194 #ifdef CONFIG_DEMAND_PAGING
1195
1196 #ifdef CONFIG_DEMAND_PAGING_STATS
1197 struct k_mem_paging_stats_t paging_stats;
1198 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1199 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1200 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1201 #endif /* CONFIG_DEMAND_PAGING_STATS */
1202
do_backing_store_page_in(uintptr_t location)1203 static inline void do_backing_store_page_in(uintptr_t location)
1204 {
1205 #ifdef CONFIG_DEMAND_MAPPING
1206 /* Check for special cases */
1207 switch (location) {
1208 case ARCH_UNPAGED_ANON_ZERO:
1209 memset(K_MEM_SCRATCH_PAGE, 0, CONFIG_MMU_PAGE_SIZE);
1210 __fallthrough;
1211 case ARCH_UNPAGED_ANON_UNINIT:
1212 /* nothing else to do */
1213 return;
1214 default:
1215 break;
1216 }
1217 #endif /* CONFIG_DEMAND_MAPPING */
1218
1219 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1220 uint32_t time_diff;
1221
1222 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1223 timing_t time_start, time_end;
1224
1225 time_start = timing_counter_get();
1226 #else
1227 uint32_t time_start;
1228
1229 time_start = k_cycle_get_32();
1230 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1231 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1232
1233 k_mem_paging_backing_store_page_in(location);
1234
1235 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1236 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1237 time_end = timing_counter_get();
1238 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1239 #else
1240 time_diff = k_cycle_get_32() - time_start;
1241 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1242
1243 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1244 time_diff);
1245 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1246 }
1247
do_backing_store_page_out(uintptr_t location)1248 static inline void do_backing_store_page_out(uintptr_t location)
1249 {
1250 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1251 uint32_t time_diff;
1252
1253 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1254 timing_t time_start, time_end;
1255
1256 time_start = timing_counter_get();
1257 #else
1258 uint32_t time_start;
1259
1260 time_start = k_cycle_get_32();
1261 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1262 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1263
1264 k_mem_paging_backing_store_page_out(location);
1265
1266 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1267 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1268 time_end = timing_counter_get();
1269 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1270 #else
1271 time_diff = k_cycle_get_32() - time_start;
1272 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1273
1274 z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1275 time_diff);
1276 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1277 }
1278
1279 #if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ)
1280 /*
1281 * SMP support is very simple. Some resources such as the scratch page could
1282 * be made per CPU, backing store driver execution be confined to the faulting
1283 * CPU, statistics be made to cope with access concurrency, etc. But in the
1284 * end we're dealing with memory transfer to/from some external storage which
1285 * is inherently slow and whose access is most likely serialized anyway.
1286 * So let's simply enforce global demand paging serialization across all CPUs
1287 * with a mutex as there is no real gain from added parallelism here.
1288 */
1289 static K_MUTEX_DEFINE(z_mm_paging_lock);
1290 #endif
1291
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1292 static void virt_region_foreach(void *addr, size_t size,
1293 void (*func)(void *))
1294 {
1295 k_mem_assert_virtual_region(addr, size);
1296
1297 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1298 func((uint8_t *)addr + offset);
1299 }
1300 }
1301
1302 /*
1303 * Perform some preparatory steps before paging out. The provided page frame
1304 * must be evicted to the backing store immediately after this is called
1305 * with a call to k_mem_paging_backing_store_page_out() if it contains
1306 * a data page.
1307 *
1308 * - Map page frame to scratch area if requested. This always is true if we're
1309 * doing a page fault, but is only set on manual evictions if the page is
1310 * dirty.
1311 * - If mapped:
1312 * - obtain backing store location and populate location parameter
1313 * - Update page tables with location
1314 * - Mark page frame as busy
1315 *
1316 * Returns -ENOMEM if the backing store is full
1317 */
page_frame_prepare_locked(struct k_mem_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1318 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
1319 bool page_fault, uintptr_t *location_ptr)
1320 {
1321 uintptr_t phys;
1322 int ret;
1323 bool dirty = *dirty_ptr;
1324
1325 phys = k_mem_page_frame_to_phys(pf);
1326 __ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1327 phys);
1328
1329 /* If the backing store doesn't have a copy of the page, even if it
1330 * wasn't modified, treat as dirty. This can happen for a few
1331 * reasons:
1332 * 1) Page has never been swapped out before, and the backing store
1333 * wasn't pre-populated with this data page.
1334 * 2) Page was swapped out before, but the page contents were not
1335 * preserved after swapping back in.
1336 * 3) Page contents were preserved when swapped back in, but were later
1337 * evicted from the backing store to make room for other evicted
1338 * pages.
1339 */
1340 if (k_mem_page_frame_is_mapped(pf)) {
1341 dirty = dirty || !k_mem_page_frame_is_backed(pf);
1342 }
1343
1344 if (dirty || page_fault) {
1345 arch_mem_scratch(phys);
1346 }
1347
1348 if (k_mem_page_frame_is_mapped(pf)) {
1349 ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1350 page_fault);
1351 if (ret != 0) {
1352 LOG_ERR("out of backing store memory");
1353 return -ENOMEM;
1354 }
1355 arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr);
1356
1357 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1358 k_mem_paging_eviction_remove(pf);
1359 }
1360 } else {
1361 /* Shouldn't happen unless this function is mis-used */
1362 __ASSERT(!dirty, "un-mapped page determined to be dirty");
1363 }
1364 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1365 /* Mark as busy so that k_mem_page_frame_is_evictable() returns false */
1366 __ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1367 phys);
1368 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY);
1369 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1370 /* Update dirty parameter, since we set to true if it wasn't backed
1371 * even if otherwise clean
1372 */
1373 *dirty_ptr = dirty;
1374
1375 return 0;
1376 }
1377
do_mem_evict(void * addr)1378 static int do_mem_evict(void *addr)
1379 {
1380 bool dirty;
1381 struct k_mem_page_frame *pf;
1382 uintptr_t location;
1383 k_spinlock_key_t key;
1384 uintptr_t flags, phys;
1385 int ret;
1386
1387 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1388 __ASSERT(!k_is_in_isr(),
1389 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1390 __func__);
1391 #ifdef CONFIG_SMP
1392 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1393 #else
1394 k_sched_lock();
1395 #endif
1396 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1397 key = k_spin_lock(&z_mm_lock);
1398 flags = arch_page_info_get(addr, &phys, false);
1399 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1400 "address %p isn't mapped", addr);
1401 if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1402 /* Un-mapped or already evicted. Nothing to do */
1403 ret = 0;
1404 goto out;
1405 }
1406
1407 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1408 pf = k_mem_phys_to_page_frame(phys);
1409 __ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch");
1410 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1411 if (ret != 0) {
1412 goto out;
1413 }
1414
1415 __ASSERT(ret == 0, "failed to prepare page frame");
1416 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1417 k_spin_unlock(&z_mm_lock, key);
1418 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1419 if (dirty) {
1420 do_backing_store_page_out(location);
1421 }
1422 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1423 key = k_spin_lock(&z_mm_lock);
1424 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1425 page_frame_free_locked(pf);
1426 out:
1427 k_spin_unlock(&z_mm_lock, key);
1428 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1429 #ifdef CONFIG_SMP
1430 k_mutex_unlock(&z_mm_paging_lock);
1431 #else
1432 k_sched_unlock();
1433 #endif
1434 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1435 return ret;
1436 }
1437
k_mem_page_out(void * addr,size_t size)1438 int k_mem_page_out(void *addr, size_t size)
1439 {
1440 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1441 addr);
1442 k_mem_assert_virtual_region(addr, size);
1443
1444 for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1445 void *pos = (uint8_t *)addr + offset;
1446 int ret;
1447
1448 ret = do_mem_evict(pos);
1449 if (ret != 0) {
1450 return ret;
1451 }
1452 }
1453
1454 return 0;
1455 }
1456
k_mem_page_frame_evict(uintptr_t phys)1457 int k_mem_page_frame_evict(uintptr_t phys)
1458 {
1459 k_spinlock_key_t key;
1460 struct k_mem_page_frame *pf;
1461 bool dirty;
1462 uintptr_t flags;
1463 uintptr_t location;
1464 int ret;
1465
1466 __ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1467 __func__, phys);
1468
1469 /* Implementation is similar to do_page_fault() except there is no
1470 * data page to page-in, see comments in that function.
1471 */
1472
1473 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1474 __ASSERT(!k_is_in_isr(),
1475 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1476 __func__);
1477 #ifdef CONFIG_SMP
1478 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1479 #else
1480 k_sched_lock();
1481 #endif
1482 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1483 key = k_spin_lock(&z_mm_lock);
1484 pf = k_mem_phys_to_page_frame(phys);
1485 if (!k_mem_page_frame_is_mapped(pf)) {
1486 /* Nothing to do, free page */
1487 ret = 0;
1488 goto out;
1489 }
1490 flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
1491 /* Shouldn't ever happen */
1492 __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1493 dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1494 ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1495 if (ret != 0) {
1496 goto out;
1497 }
1498
1499 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1500 k_spin_unlock(&z_mm_lock, key);
1501 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1502 if (dirty) {
1503 do_backing_store_page_out(location);
1504 }
1505 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1506 key = k_spin_lock(&z_mm_lock);
1507 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1508 page_frame_free_locked(pf);
1509 out:
1510 k_spin_unlock(&z_mm_lock, key);
1511 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1512 #ifdef CONFIG_SMP
1513 k_mutex_unlock(&z_mm_paging_lock);
1514 #else
1515 k_sched_unlock();
1516 #endif
1517 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1518 return ret;
1519 }
1520
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1521 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1522 int key)
1523 {
1524 #ifdef CONFIG_DEMAND_PAGING_STATS
1525 bool is_irq_unlocked = arch_irq_unlocked(key);
1526
1527 paging_stats.pagefaults.cnt++;
1528
1529 if (is_irq_unlocked) {
1530 paging_stats.pagefaults.irq_unlocked++;
1531 } else {
1532 paging_stats.pagefaults.irq_locked++;
1533 }
1534
1535 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1536 faulting_thread->paging_stats.pagefaults.cnt++;
1537
1538 if (is_irq_unlocked) {
1539 faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1540 } else {
1541 faulting_thread->paging_stats.pagefaults.irq_locked++;
1542 }
1543 #else
1544 ARG_UNUSED(faulting_thread);
1545 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1546
1547 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1548 if (k_is_in_isr()) {
1549 paging_stats.pagefaults.in_isr++;
1550
1551 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1552 faulting_thread->paging_stats.pagefaults.in_isr++;
1553 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1554 }
1555 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1556 #endif /* CONFIG_DEMAND_PAGING_STATS */
1557 }
1558
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1559 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1560 bool dirty)
1561 {
1562 #ifdef CONFIG_DEMAND_PAGING_STATS
1563 if (dirty) {
1564 paging_stats.eviction.dirty++;
1565 } else {
1566 paging_stats.eviction.clean++;
1567 }
1568 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1569 if (dirty) {
1570 faulting_thread->paging_stats.eviction.dirty++;
1571 } else {
1572 faulting_thread->paging_stats.eviction.clean++;
1573 }
1574 #else
1575 ARG_UNUSED(faulting_thread);
1576 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1577 #endif /* CONFIG_DEMAND_PAGING_STATS */
1578 }
1579
do_eviction_select(bool * dirty)1580 static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
1581 {
1582 struct k_mem_page_frame *pf;
1583
1584 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1585 uint32_t time_diff;
1586
1587 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1588 timing_t time_start, time_end;
1589
1590 time_start = timing_counter_get();
1591 #else
1592 uint32_t time_start;
1593
1594 time_start = k_cycle_get_32();
1595 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1596 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1597
1598 pf = k_mem_paging_eviction_select(dirty);
1599
1600 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1601 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1602 time_end = timing_counter_get();
1603 time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1604 #else
1605 time_diff = k_cycle_get_32() - time_start;
1606 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1607
1608 z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1609 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1610
1611 return pf;
1612 }
1613
do_page_fault(void * addr,bool pin)1614 static bool do_page_fault(void *addr, bool pin)
1615 {
1616 struct k_mem_page_frame *pf;
1617 k_spinlock_key_t key;
1618 uintptr_t page_in_location, page_out_location;
1619 enum arch_page_location status;
1620 bool result;
1621 bool dirty = false;
1622 struct k_thread *faulting_thread;
1623 int ret;
1624
1625 __ASSERT(page_frames_initialized, "page fault at %p happened too early",
1626 addr);
1627
1628 LOG_DBG("page fault at %p", addr);
1629
1630 /*
1631 * TODO: Add performance accounting:
1632 * - k_mem_paging_eviction_select() metrics
1633 * * periodic timer execution time histogram (if implemented)
1634 */
1635
1636 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1637 /*
1638 * We do re-enable interrupts during the page-in/page-out operation
1639 * if and only if interrupts were enabled when the exception was
1640 * taken; in this configuration page faults in an ISR are a bug; all
1641 * their code/data must be pinned.
1642 *
1643 * If interrupts were disabled when the exception was taken, the
1644 * arch code is responsible for keeping them that way when entering
1645 * this function.
1646 *
1647 * If this is not enabled, then interrupts are always locked for the
1648 * entire operation. This is far worse for system interrupt latency
1649 * but requires less pinned pages and ISRs may also take page faults.
1650 *
1651 * On UP we lock the scheduler so that other threads are never
1652 * scheduled during the page-in/out operation. Support for
1653 * allowing k_mem_paging_backing_store_page_out() and
1654 * k_mem_paging_backing_store_page_in() to also sleep and allow
1655 * other threads to run (such as in the case where the transfer is
1656 * async DMA) is not supported on UP. Even if limited to thread
1657 * context, arbitrary memory access triggering exceptions that put
1658 * a thread to sleep on a contended page fault operation will break
1659 * scheduling assumptions of cooperative threads or threads that
1660 * implement critical sections with spinlocks or disabling IRQs.
1661 *
1662 * On SMP, though, exclusivity cannot be assumed solely from being
1663 * a cooperative thread. Another thread with any prio may be running
1664 * on another CPU so exclusion must already be enforced by other
1665 * means. Therefore trying to prevent scheduling on SMP is pointless,
1666 * and k_sched_lock() is equivalent to a no-op on SMP anyway.
1667 * As a result, sleeping/rescheduling in the SMP case is fine.
1668 */
1669 __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1670 #ifdef CONFIG_SMP
1671 k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1672 #else
1673 k_sched_lock();
1674 #endif
1675 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1676
1677 key = k_spin_lock(&z_mm_lock);
1678 faulting_thread = _current;
1679
1680 status = arch_page_location_get(addr, &page_in_location);
1681 if (status == ARCH_PAGE_LOCATION_BAD) {
1682 /* Return false to treat as a fatal error */
1683 result = false;
1684 goto out;
1685 }
1686 result = true;
1687
1688 if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1689 if (pin) {
1690 /* It's a physical memory address */
1691 uintptr_t phys = page_in_location;
1692
1693 pf = k_mem_phys_to_page_frame(phys);
1694 if (!k_mem_page_frame_is_pinned(pf)) {
1695 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1696 k_mem_paging_eviction_remove(pf);
1697 }
1698 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1699 }
1700 }
1701
1702 /* This if-block is to pin the page if it is
1703 * already present in physical memory. There is
1704 * no need to go through the following code to
1705 * pull in the data pages. So skip to the end.
1706 */
1707 goto out;
1708 }
1709 __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1710 "unexpected status value %d", status);
1711
1712 paging_stats_faults_inc(faulting_thread, key.key);
1713
1714 pf = free_page_frame_list_get();
1715 if (pf == NULL) {
1716 /* Need to evict a page frame */
1717 pf = do_eviction_select(&dirty);
1718 __ASSERT(pf != NULL, "failed to get a page frame");
1719 LOG_DBG("evicting %p at 0x%lx",
1720 k_mem_page_frame_to_virt(pf),
1721 k_mem_page_frame_to_phys(pf));
1722
1723 paging_stats_eviction_inc(faulting_thread, dirty);
1724 }
1725 ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1726 __ASSERT(ret == 0, "failed to prepare page frame");
1727
1728 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1729 k_spin_unlock(&z_mm_lock, key);
1730 /* Interrupts are now unlocked if they were not locked when we entered
1731 * this function, and we may service ISRs. The scheduler is still
1732 * locked.
1733 */
1734 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1735 if (dirty) {
1736 do_backing_store_page_out(page_out_location);
1737 }
1738 do_backing_store_page_in(page_in_location);
1739
1740 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1741 key = k_spin_lock(&z_mm_lock);
1742 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
1743 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1744 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
1745 frame_mapped_set(pf, addr);
1746 if (pin) {
1747 k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1748 }
1749
1750 arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf));
1751 k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1752 if (IS_ENABLED(CONFIG_EVICTION_TRACKING) && (!pin)) {
1753 k_mem_paging_eviction_add(pf);
1754 }
1755 out:
1756 k_spin_unlock(&z_mm_lock, key);
1757 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1758 #ifdef CONFIG_SMP
1759 k_mutex_unlock(&z_mm_paging_lock);
1760 #else
1761 k_sched_unlock();
1762 #endif
1763 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1764
1765 return result;
1766 }
1767
do_page_in(void * addr)1768 static void do_page_in(void *addr)
1769 {
1770 bool ret;
1771
1772 ret = do_page_fault(addr, false);
1773 __ASSERT(ret, "unmapped memory address %p", addr);
1774 (void)ret;
1775 }
1776
k_mem_page_in(void * addr,size_t size)1777 void k_mem_page_in(void *addr, size_t size)
1778 {
1779 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1780 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1781 __func__);
1782 virt_region_foreach(addr, size, do_page_in);
1783 }
1784
do_mem_pin(void * addr)1785 static void do_mem_pin(void *addr)
1786 {
1787 bool ret;
1788
1789 ret = do_page_fault(addr, true);
1790 __ASSERT(ret, "unmapped memory address %p", addr);
1791 (void)ret;
1792 }
1793
k_mem_pin(void * addr,size_t size)1794 void k_mem_pin(void *addr, size_t size)
1795 {
1796 __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1797 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1798 __func__);
1799 virt_region_foreach(addr, size, do_mem_pin);
1800 }
1801
k_mem_page_fault(void * addr)1802 bool k_mem_page_fault(void *addr)
1803 {
1804 return do_page_fault(addr, false);
1805 }
1806
do_mem_unpin(void * addr)1807 static void do_mem_unpin(void *addr)
1808 {
1809 struct k_mem_page_frame *pf;
1810 k_spinlock_key_t key;
1811 uintptr_t flags, phys;
1812
1813 key = k_spin_lock(&z_mm_lock);
1814 flags = arch_page_info_get(addr, &phys, false);
1815 __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1816 "invalid data page at %p", addr);
1817 if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1818 pf = k_mem_phys_to_page_frame(phys);
1819 if (k_mem_page_frame_is_pinned(pf)) {
1820 k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1821
1822 if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1823 k_mem_paging_eviction_add(pf);
1824 }
1825 }
1826 }
1827 k_spin_unlock(&z_mm_lock, key);
1828 }
1829
k_mem_unpin(void * addr,size_t size)1830 void k_mem_unpin(void *addr, size_t size)
1831 {
1832 __ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1833 addr);
1834 virt_region_foreach(addr, size, do_mem_unpin);
1835 }
1836
1837 #endif /* CONFIG_DEMAND_PAGING */
1838