1 /*
2  * Copyright (c) 2020 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Routines for managing virtual address spaces
7  */
8 
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/internal/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/sys/check.h>
20 #include <zephyr/sys/math_extras.h>
21 #include <zephyr/timing/timing.h>
22 #include <zephyr/logging/log.h>
23 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
24 
25 #ifdef CONFIG_DEMAND_PAGING
26 #include <zephyr/kernel/mm/demand_paging.h>
27 #endif /* CONFIG_DEMAND_PAGING */
28 
29 /*
30  * General terminology:
31  * - A page frame is a page-sized physical memory region in RAM. It is a
32  *   container where a data page may be placed. It is always referred to by
33  *   physical address. We have a convention of using uintptr_t for physical
34  *   addresses. We instantiate a struct k_mem_page_frame to store metadata for
35  *   every page frame.
36  *
37  * - A data page is a page-sized region of data. It may exist in a page frame,
38  *   or be paged out to some backing store. Its location can always be looked
39  *   up in the CPU's page tables (or equivalent) by virtual address.
40  *   The data type will always be void * or in some cases uint8_t * when we
41  *   want to do pointer arithmetic.
42  */
43 
44 /* Spinlock to protect any globals in this file and serialize page table
45  * updates in arch code
46  */
47 struct k_spinlock z_mm_lock;
48 
49 /*
50  * General page frame management
51  */
52 
53 /* Database of all RAM page frames */
54 struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES];
55 
56 #if __ASSERT_ON
57 /* Indicator that k_mem_page_frames has been initialized, many of these APIs do
58  * not work before POST_KERNEL
59  */
60 static bool page_frames_initialized;
61 #endif
62 
63 /* Add colors to page table dumps to indicate mapping type */
64 #define COLOR_PAGE_FRAMES	1
65 
66 #if COLOR_PAGE_FRAMES
67 #define ANSI_DEFAULT "\x1B" "[0m"
68 #define ANSI_RED     "\x1B" "[1;31m"
69 #define ANSI_GREEN   "\x1B" "[1;32m"
70 #define ANSI_YELLOW  "\x1B" "[1;33m"
71 #define ANSI_BLUE    "\x1B" "[1;34m"
72 #define ANSI_MAGENTA "\x1B" "[1;35m"
73 #define ANSI_CYAN    "\x1B" "[1;36m"
74 #define ANSI_GREY    "\x1B" "[1;90m"
75 
76 #define COLOR(x)	printk(_CONCAT(ANSI_, x))
77 #else
78 #define COLOR(x)	do { } while (false)
79 #endif /* COLOR_PAGE_FRAMES */
80 
81 /* LCOV_EXCL_START */
page_frame_dump(struct k_mem_page_frame * pf)82 static void page_frame_dump(struct k_mem_page_frame *pf)
83 {
84 	if (k_mem_page_frame_is_free(pf)) {
85 		COLOR(GREY);
86 		printk("-");
87 	} else if (k_mem_page_frame_is_reserved(pf)) {
88 		COLOR(CYAN);
89 		printk("R");
90 	} else if (k_mem_page_frame_is_busy(pf)) {
91 		COLOR(MAGENTA);
92 		printk("B");
93 	} else if (k_mem_page_frame_is_pinned(pf)) {
94 		COLOR(YELLOW);
95 		printk("P");
96 	} else if (k_mem_page_frame_is_available(pf)) {
97 		COLOR(GREY);
98 		printk(".");
99 	} else if (k_mem_page_frame_is_mapped(pf)) {
100 		COLOR(DEFAULT);
101 		printk("M");
102 	} else {
103 		COLOR(RED);
104 		printk("?");
105 	}
106 }
107 
k_mem_page_frames_dump(void)108 void k_mem_page_frames_dump(void)
109 {
110 	int column = 0;
111 
112 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
113 	printk("Physical memory from 0x%lx to 0x%lx\n",
114 	       K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END);
115 
116 	for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) {
117 		struct k_mem_page_frame *pf = &k_mem_page_frames[i];
118 
119 		page_frame_dump(pf);
120 
121 		column++;
122 		if (column == 64) {
123 			column = 0;
124 			printk("\n");
125 		}
126 	}
127 
128 	COLOR(DEFAULT);
129 	if (column != 0) {
130 		printk("\n");
131 	}
132 }
133 /* LCOV_EXCL_STOP */
134 
135 #define VIRT_FOREACH(_base, _size, _pos) \
136 	for ((_pos) = (_base); \
137 	     (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
138 
139 #define PHYS_FOREACH(_base, _size, _pos) \
140 	for ((_pos) = (_base); \
141 	     (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE)
142 
143 
144 /*
145  * Virtual address space management
146  *
147  * Call all of these functions with z_mm_lock held.
148  *
149  * Overall virtual memory map: When the kernel starts, it resides in
150  * virtual memory in the region K_MEM_KERNEL_VIRT_START to
151  * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
152  * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
153  *
154  * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
155  * but have a mapping for all RAM in place. This is for special architectural
156  * purposes and does not otherwise affect page frame accounting or flags;
157  * the only guarantee is that such RAM mapping outside of the Zephyr image
158  * won't be disturbed by subsequent memory mapping calls.
159  *
160  * +--------------+ <- K_MEM_VIRT_RAM_START
161  * | Undefined VM | <- May contain ancillary regions like x86_64's locore
162  * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
163  * | Mapping for  |
164  * | main kernel  |
165  * | image        |
166  * |		  |
167  * |		  |
168  * +--------------+ <- K_MEM_VM_FREE_START
169  * |              |
170  * | Unused,      |
171  * | Available VM |
172  * |              |
173  * |..............| <- mapping_pos (grows downward as more mappings are made)
174  * | Mapping      |
175  * +--------------+
176  * | Mapping      |
177  * +--------------+
178  * | ...          |
179  * +--------------+
180  * | Mapping      |
181  * +--------------+ <- mappings start here
182  * | Reserved     | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED
183  * +--------------+ <- K_MEM_VIRT_RAM_END
184  */
185 
186 /* Bitmap of virtual addresses where one bit corresponds to one page.
187  * This is being used for virt_region_alloc() to figure out which
188  * region of virtual addresses can be used for memory mapping.
189  *
190  * Note that bit #0 is the highest address so that allocation is
191  * done in reverse from highest address.
192  */
193 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
194 			   CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
195 
196 static bool virt_region_inited;
197 
198 #define Z_VIRT_REGION_START_ADDR	K_MEM_VM_FREE_START
199 #define Z_VIRT_REGION_END_ADDR		(K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED)
200 
virt_from_bitmap_offset(size_t offset,size_t size)201 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
202 {
203 	return POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
204 	       - (offset * CONFIG_MMU_PAGE_SIZE) - size;
205 }
206 
virt_to_bitmap_offset(void * vaddr,size_t size)207 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
208 {
209 	return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END)
210 		- POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
211 }
212 
virt_region_init(void)213 static void virt_region_init(void)
214 {
215 	size_t offset, num_bits;
216 
217 	/* There are regions where we should never map via
218 	 * k_mem_map() and k_mem_map_phys_bare(). Mark them as
219 	 * already allocated so they will never be used.
220 	 */
221 
222 	if (K_MEM_VM_RESERVED > 0) {
223 		/* Mark reserved region at end of virtual address space */
224 		num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
225 		(void)sys_bitarray_set_region(&virt_region_bitmap,
226 					      num_bits, 0);
227 	}
228 
229 	/* Mark all bits up to Z_FREE_VM_START as allocated */
230 	num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START)
231 		   - POINTER_TO_UINT(K_MEM_VIRT_RAM_START);
232 	offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits);
233 	num_bits /= CONFIG_MMU_PAGE_SIZE;
234 	(void)sys_bitarray_set_region(&virt_region_bitmap,
235 				      num_bits, offset);
236 
237 	virt_region_inited = true;
238 }
239 
virt_region_free(void * vaddr,size_t size)240 static void virt_region_free(void *vaddr, size_t size)
241 {
242 	size_t offset, num_bits;
243 	uint8_t *vaddr_u8 = (uint8_t *)vaddr;
244 
245 	if (unlikely(!virt_region_inited)) {
246 		virt_region_init();
247 	}
248 
249 #ifndef CONFIG_KERNEL_DIRECT_MAP
250 	/* Without the need to support K_MEM_DIRECT_MAP, the region must be
251 	 * able to be represented in the bitmap. So this case is
252 	 * simple.
253 	 */
254 
255 	__ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
256 		 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
257 		 "invalid virtual address region %p (%zu)", vaddr_u8, size);
258 	if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
259 	      && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
260 		return;
261 	}
262 
263 	offset = virt_to_bitmap_offset(vaddr, size);
264 	num_bits = size / CONFIG_MMU_PAGE_SIZE;
265 	(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
266 #else /* !CONFIG_KERNEL_DIRECT_MAP */
267 	/* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
268 	 * memory space, wholly within it, or overlap partially.
269 	 * So additional processing is needed to make sure we only
270 	 * mark the pages within the bitmap.
271 	 */
272 	if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
273 	     (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
274 	    (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
275 	     ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
276 		uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
277 		uint8_t *adjusted_end = MIN(vaddr_u8 + size,
278 					    Z_VIRT_REGION_END_ADDR);
279 		size_t adjusted_sz = adjusted_end - adjusted_start;
280 
281 		offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
282 		num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
283 		(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
284 	}
285 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
286 }
287 
virt_region_alloc(size_t size,size_t align)288 static void *virt_region_alloc(size_t size, size_t align)
289 {
290 	uintptr_t dest_addr;
291 	size_t alloc_size;
292 	size_t offset;
293 	size_t num_bits;
294 	int ret;
295 
296 	if (unlikely(!virt_region_inited)) {
297 		virt_region_init();
298 	}
299 
300 	/* Possibly request more pages to ensure we can get an aligned virtual address */
301 	num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
302 	alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
303 	ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
304 	if (ret != 0) {
305 		LOG_ERR("insufficient virtual address space (requested %zu)",
306 			size);
307 		return NULL;
308 	}
309 
310 	/* Remember that bit #0 in bitmap corresponds to the highest
311 	 * virtual address. So here we need to go downwards (backwards?)
312 	 * to get the starting address of the allocated region.
313 	 */
314 	dest_addr = virt_from_bitmap_offset(offset, alloc_size);
315 
316 	if (alloc_size > size) {
317 		uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
318 
319 		/* Here is the memory organization when trying to get an aligned
320 		 * virtual address:
321 		 *
322 		 * +--------------+ <- K_MEM_VIRT_RAM_START
323 		 * | Undefined VM |
324 		 * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START)
325 		 * | Mapping for  |
326 		 * | main kernel  |
327 		 * | image        |
328 		 * |		  |
329 		 * |		  |
330 		 * +--------------+ <- K_MEM_VM_FREE_START
331 		 * | ...          |
332 		 * +==============+ <- dest_addr
333 		 * | Unused       |
334 		 * |..............| <- aligned_dest_addr
335 		 * |              |
336 		 * | Aligned      |
337 		 * | Mapping      |
338 		 * |              |
339 		 * |..............| <- aligned_dest_addr + size
340 		 * | Unused       |
341 		 * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size
342 		 * | ...          |
343 		 * +--------------+
344 		 * | Mapping      |
345 		 * +--------------+
346 		 * | Reserved     |
347 		 * +--------------+ <- K_MEM_VIRT_RAM_END
348 		 */
349 
350 		/* Free the two unused regions */
351 		virt_region_free(UINT_TO_POINTER(dest_addr),
352 				 aligned_dest_addr - dest_addr);
353 		if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
354 			virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
355 					 (dest_addr + alloc_size) - (aligned_dest_addr + size));
356 		}
357 
358 		dest_addr = aligned_dest_addr;
359 	}
360 
361 	/* Need to make sure this does not step into kernel memory */
362 	if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
363 		(void)sys_bitarray_free(&virt_region_bitmap, size, offset);
364 		return NULL;
365 	}
366 
367 	return UINT_TO_POINTER(dest_addr);
368 }
369 
370 /*
371  * Free page frames management
372  *
373  * Call all of these functions with z_mm_lock held.
374  */
375 
376 /* Linked list of unused and available page frames.
377  *
378  * TODO: This is very simple and treats all free page frames as being equal.
379  * However, there are use-cases to consolidate free pages such that entire
380  * SRAM banks can be switched off to save power, and so obtaining free pages
381  * may require a more complex ontology which prefers page frames in RAM banks
382  * which are still active.
383  *
384  * This implies in the future there may be multiple slists managing physical
385  * pages. Each page frame will still just have one snode link.
386  */
387 static sys_sflist_t free_page_frame_list;
388 
389 /* Number of unused and available free page frames.
390  * This information may go stale immediately.
391  */
392 static size_t z_free_page_count;
393 
394 #define PF_ASSERT(pf, expr, fmt, ...) \
395 	__ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \
396 		 ##__VA_ARGS__)
397 
398 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)399 static struct k_mem_page_frame *free_page_frame_list_get(void)
400 {
401 	sys_sfnode_t *node;
402 	struct k_mem_page_frame *pf = NULL;
403 
404 	node = sys_sflist_get(&free_page_frame_list);
405 	if (node != NULL) {
406 		z_free_page_count--;
407 		pf = CONTAINER_OF(node, struct k_mem_page_frame, node);
408 		PF_ASSERT(pf, k_mem_page_frame_is_free(pf),
409 			 "on free list but not free");
410 		pf->va_and_flags = 0;
411 	}
412 
413 	return pf;
414 }
415 
416 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct k_mem_page_frame * pf)417 static void free_page_frame_list_put(struct k_mem_page_frame *pf)
418 {
419 	PF_ASSERT(pf, k_mem_page_frame_is_available(pf),
420 		 "unavailable page put on free list");
421 
422 	sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE);
423 	sys_sflist_append(&free_page_frame_list, &pf->node);
424 	z_free_page_count++;
425 }
426 
free_page_frame_list_init(void)427 static void free_page_frame_list_init(void)
428 {
429 	sys_sflist_init(&free_page_frame_list);
430 }
431 
page_frame_free_locked(struct k_mem_page_frame * pf)432 static void page_frame_free_locked(struct k_mem_page_frame *pf)
433 {
434 	pf->va_and_flags = 0;
435 	free_page_frame_list_put(pf);
436 }
437 
438 /*
439  * Memory Mapping
440  */
441 
442 /* Called after the frame is mapped in the arch layer, to update our
443  * local ontology (and do some assertions while we're at it)
444  */
frame_mapped_set(struct k_mem_page_frame * pf,void * addr)445 static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr)
446 {
447 	PF_ASSERT(pf, !k_mem_page_frame_is_free(pf),
448 		  "attempted to map a page frame on the free list");
449 	PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf),
450 		  "attempted to map a reserved page frame");
451 
452 	/* We do allow multiple mappings for pinned page frames
453 	 * since we will never need to reverse map them.
454 	 * This is uncommon, use-cases are for things like the
455 	 * Zephyr equivalent of VSDOs
456 	 */
457 	PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf),
458 		 "non-pinned and already mapped to %p",
459 		 k_mem_page_frame_to_virt(pf));
460 
461 	uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;
462 	uintptr_t va = (uintptr_t)addr & ~flags_mask;
463 
464 	pf->va_and_flags &= flags_mask;
465 	pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED;
466 }
467 
468 /* LCOV_EXCL_START */
469 /* Go through page frames to find the physical address mapped
470  * by a virtual address.
471  *
472  * @param[in]  virt Virtual Address
473  * @param[out] phys Physical address mapped to the input virtual address
474  *                  if such mapping exists.
475  *
476  * @retval 0 if mapping is found and valid
477  * @retval -EFAULT if virtual address is not mapped
478  */
virt_to_page_frame(void * virt,uintptr_t * phys)479 static int virt_to_page_frame(void *virt, uintptr_t *phys)
480 {
481 	uintptr_t paddr;
482 	struct k_mem_page_frame *pf;
483 	int ret = -EFAULT;
484 
485 	K_MEM_PAGE_FRAME_FOREACH(paddr, pf) {
486 		if (k_mem_page_frame_is_mapped(pf)) {
487 			if (virt == k_mem_page_frame_to_virt(pf)) {
488 				ret = 0;
489 				if (phys != NULL) {
490 					*phys = k_mem_page_frame_to_phys(pf);
491 				}
492 				break;
493 			}
494 		}
495 	}
496 
497 	return ret;
498 }
499 /* LCOV_EXCL_STOP */
500 
501 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
502 
503 #ifdef CONFIG_DEMAND_PAGING
504 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
505 				     bool page_in, uintptr_t *location_ptr);
506 
507 static inline void do_backing_store_page_in(uintptr_t location);
508 static inline void do_backing_store_page_out(uintptr_t location);
509 #endif /* CONFIG_DEMAND_PAGING */
510 
511 /* Allocate a free page frame, and map it to a specified virtual address
512  *
513  * TODO: Add optional support for copy-on-write mappings to a zero page instead
514  * of allocating, in which case page frames will be allocated lazily as
515  * the mappings to the zero page get touched. This will avoid expensive
516  * page-ins as memory is mapped and physical RAM or backing store storage will
517  * not be used if the mapped memory is unused. The cost is an empty physical
518  * page of zeroes.
519  */
map_anon_page(void * addr,uint32_t flags)520 static int map_anon_page(void *addr, uint32_t flags)
521 {
522 	struct k_mem_page_frame *pf;
523 	uintptr_t phys;
524 	bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
525 
526 	pf = free_page_frame_list_get();
527 	if (pf == NULL) {
528 #ifdef CONFIG_DEMAND_PAGING
529 		uintptr_t location;
530 		bool dirty;
531 		int ret;
532 
533 		pf = k_mem_paging_eviction_select(&dirty);
534 		__ASSERT(pf != NULL, "failed to get a page frame");
535 		LOG_DBG("evicting %p at 0x%lx",
536 			k_mem_page_frame_to_virt(pf),
537 			k_mem_page_frame_to_phys(pf));
538 		ret = page_frame_prepare_locked(pf, &dirty, false, &location);
539 		if (ret != 0) {
540 			return -ENOMEM;
541 		}
542 		if (dirty) {
543 			do_backing_store_page_out(location);
544 		}
545 		pf->va_and_flags = 0;
546 #else
547 		return -ENOMEM;
548 #endif /* CONFIG_DEMAND_PAGING */
549 	}
550 
551 	phys = k_mem_page_frame_to_phys(pf);
552 	arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags);
553 
554 	if (lock) {
555 		k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
556 	}
557 	frame_mapped_set(pf, addr);
558 #ifdef CONFIG_DEMAND_PAGING
559 	if (IS_ENABLED(CONFIG_EVICTION_TRACKING) && (!lock)) {
560 		k_mem_paging_eviction_add(pf);
561 	}
562 #endif
563 
564 	LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
565 
566 	return 0;
567 }
568 
k_mem_map_phys_guard(uintptr_t phys,size_t size,uint32_t flags,bool is_anon)569 void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon)
570 {
571 	uint8_t *dst;
572 	size_t total_size;
573 	int ret;
574 	k_spinlock_key_t key;
575 	uint8_t *pos;
576 	bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
577 
578 	__ASSERT(!is_anon || (is_anon && page_frames_initialized),
579 		 "%s called too early", __func__);
580 	__ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
581 		 "%s does not support explicit cache settings", __func__);
582 
583 	if (((flags & K_MEM_PERM_USER) != 0U) &&
584 	    ((flags & K_MEM_MAP_UNINIT) != 0U)) {
585 		LOG_ERR("user access to anonymous uninitialized pages is forbidden");
586 		return NULL;
587 	}
588 	if ((size % CONFIG_MMU_PAGE_SIZE) != 0U) {
589 		LOG_ERR("unaligned size %zu passed to %s", size, __func__);
590 		return NULL;
591 	}
592 	if (size == 0) {
593 		LOG_ERR("zero sized memory mapping");
594 		return NULL;
595 	}
596 
597 	/* Need extra for the guard pages (before and after) which we
598 	 * won't map.
599 	 */
600 	if (size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) {
601 		LOG_ERR("too large size %zu passed to %s", size, __func__);
602 		return NULL;
603 	}
604 
605 	key = k_spin_lock(&z_mm_lock);
606 
607 	dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
608 	if (dst == NULL) {
609 		/* Address space has no free region */
610 		goto out;
611 	}
612 
613 	/* Unmap both guard pages to make sure accessing them
614 	 * will generate fault.
615 	 */
616 	arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
617 	arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
618 		       CONFIG_MMU_PAGE_SIZE);
619 
620 	/* Skip over the "before" guard page in returned address. */
621 	dst += CONFIG_MMU_PAGE_SIZE;
622 
623 	if (is_anon) {
624 		/* Mapping from anonymous memory */
625 		flags |= K_MEM_CACHE_WB;
626 #ifdef CONFIG_DEMAND_MAPPING
627 		if ((flags & K_MEM_MAP_LOCK) == 0) {
628 			flags |= K_MEM_MAP_UNPAGED;
629 			VIRT_FOREACH(dst, size, pos) {
630 				arch_mem_map(pos,
631 					     uninit ? ARCH_UNPAGED_ANON_UNINIT
632 						    : ARCH_UNPAGED_ANON_ZERO,
633 					     CONFIG_MMU_PAGE_SIZE, flags);
634 			}
635 			LOG_DBG("memory mapping anon pages %p to %p unpaged", dst, pos-1);
636 			/* skip the memset() below */
637 			uninit = true;
638 		} else
639 #endif
640 		{
641 			VIRT_FOREACH(dst, size, pos) {
642 				ret = map_anon_page(pos, flags);
643 
644 				if (ret != 0) {
645 					/* TODO:
646 					 * call k_mem_unmap(dst, pos - dst)
647 					 * when implemented in #28990 and
648 					 * release any guard virtual page as well.
649 					 */
650 					dst = NULL;
651 					goto out;
652 				}
653 			}
654 		}
655 	} else {
656 		/* Mapping known physical memory.
657 		 *
658 		 * arch_mem_map() is a void function and does not return
659 		 * anything. Arch code usually uses ASSERT() to catch
660 		 * mapping errors. Assume this works correctly for now.
661 		 */
662 		arch_mem_map(dst, phys, size, flags);
663 	}
664 
665 out:
666 	k_spin_unlock(&z_mm_lock, key);
667 
668 	if (dst != NULL && !uninit) {
669 		/* If we later implement mappings to a copy-on-write
670 		 * zero page, won't need this step
671 		 */
672 		memset(dst, 0, size);
673 	}
674 
675 	return dst;
676 }
677 
k_mem_unmap_phys_guard(void * addr,size_t size,bool is_anon)678 void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon)
679 {
680 	uintptr_t phys;
681 	uint8_t *pos;
682 	struct k_mem_page_frame *pf;
683 	k_spinlock_key_t key;
684 	size_t total_size;
685 	int ret;
686 
687 	/* Need space for the "before" guard page */
688 	__ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
689 
690 	/* Make sure address range is still valid after accounting
691 	 * for two guard pages.
692 	 */
693 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
694 	k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
695 
696 	key = k_spin_lock(&z_mm_lock);
697 
698 	/* Check if both guard pages are unmapped.
699 	 * Bail if not, as this is probably a region not mapped
700 	 * using k_mem_map().
701 	 */
702 	pos = addr;
703 	ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
704 	if (ret == 0) {
705 		__ASSERT(ret == 0,
706 			 "%s: cannot find preceding guard page for (%p, %zu)",
707 			 __func__, addr, size);
708 		goto out;
709 	}
710 
711 	ret = arch_page_phys_get(pos + size, NULL);
712 	if (ret == 0) {
713 		__ASSERT(ret == 0,
714 			 "%s: cannot find succeeding guard page for (%p, %zu)",
715 			 __func__, addr, size);
716 		goto out;
717 	}
718 
719 	if (is_anon) {
720 		/* Unmapping anonymous memory */
721 		VIRT_FOREACH(addr, size, pos) {
722 #ifdef CONFIG_DEMAND_PAGING
723 			enum arch_page_location status;
724 			uintptr_t location;
725 
726 			status = arch_page_location_get(pos, &location);
727 			switch (status) {
728 			case ARCH_PAGE_LOCATION_PAGED_OUT:
729 				/*
730 				 * No pf is associated with this mapping.
731 				 * Simply get rid of the MMU entry and free
732 				 * corresponding backing store.
733 				 */
734 				arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
735 				k_mem_paging_backing_store_location_free(location);
736 				continue;
737 			case ARCH_PAGE_LOCATION_PAGED_IN:
738 				/*
739 				 * The page is in memory but it may not be
740 				 * accessible in order to manage tracking
741 				 * of the ARCH_DATA_PAGE_ACCESSED flag
742 				 * meaning arch_page_phys_get() could fail.
743 				 * Still, we know the actual phys address.
744 				 */
745 				phys = location;
746 				ret = 0;
747 				break;
748 			default:
749 				ret = arch_page_phys_get(pos, &phys);
750 				break;
751 			}
752 #else
753 			ret = arch_page_phys_get(pos, &phys);
754 #endif
755 			__ASSERT(ret == 0,
756 				 "%s: cannot unmap an unmapped address %p",
757 				 __func__, pos);
758 			if (ret != 0) {
759 				/* Found an address not mapped. Do not continue. */
760 				goto out;
761 			}
762 
763 			__ASSERT(k_mem_is_page_frame(phys),
764 				 "%s: 0x%lx is not a page frame", __func__, phys);
765 			if (!k_mem_is_page_frame(phys)) {
766 				/* Physical address has no corresponding page frame
767 				 * description in the page frame array.
768 				 * This should not happen. Do not continue.
769 				 */
770 				goto out;
771 			}
772 
773 			/* Grab the corresponding page frame from physical address */
774 			pf = k_mem_phys_to_page_frame(phys);
775 
776 			__ASSERT(k_mem_page_frame_is_mapped(pf),
777 				 "%s: 0x%lx is not a mapped page frame", __func__, phys);
778 			if (!k_mem_page_frame_is_mapped(pf)) {
779 				/* Page frame is not marked mapped.
780 				 * This should not happen. Do not continue.
781 				 */
782 				goto out;
783 			}
784 
785 			arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
786 #ifdef CONFIG_DEMAND_PAGING
787 			if (IS_ENABLED(CONFIG_EVICTION_TRACKING) &&
788 			    (!k_mem_page_frame_is_pinned(pf))) {
789 				k_mem_paging_eviction_remove(pf);
790 			}
791 #endif
792 
793 			/* Put the page frame back into free list */
794 			page_frame_free_locked(pf);
795 		}
796 	} else {
797 		/*
798 		 * Unmapping previous mapped memory with specific physical address.
799 		 *
800 		 * Note that we don't have to unmap the guard pages, as they should
801 		 * have been unmapped. We just need to unmapped the in-between
802 		 * region [addr, (addr + size)).
803 		 */
804 		arch_mem_unmap(addr, size);
805 	}
806 
807 	/* There are guard pages just before and after the mapped
808 	 * region. So we also need to free them from the bitmap.
809 	 */
810 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
811 	total_size = size + (CONFIG_MMU_PAGE_SIZE * 2);
812 	virt_region_free(pos, total_size);
813 
814 out:
815 	k_spin_unlock(&z_mm_lock, key);
816 }
817 
k_mem_update_flags(void * addr,size_t size,uint32_t flags)818 int k_mem_update_flags(void *addr, size_t size, uint32_t flags)
819 {
820 	uintptr_t phys;
821 	k_spinlock_key_t key;
822 	int ret;
823 
824 	k_mem_assert_virtual_region(addr, size);
825 
826 	key = k_spin_lock(&z_mm_lock);
827 
828 	/*
829 	 * We can achieve desired result without explicit architecture support
830 	 * by unmapping and remapping the same physical memory using new flags.
831 	 */
832 
833 	ret = arch_page_phys_get(addr, &phys);
834 	if (ret < 0) {
835 		goto out;
836 	}
837 
838 	/* TODO: detect and handle paged-out memory as well */
839 
840 	arch_mem_unmap(addr, size);
841 	arch_mem_map(addr, phys, size, flags);
842 
843 out:
844 	k_spin_unlock(&z_mm_lock, key);
845 	return ret;
846 }
847 
k_mem_free_get(void)848 size_t k_mem_free_get(void)
849 {
850 	size_t ret;
851 	k_spinlock_key_t key;
852 
853 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
854 
855 	key = k_spin_lock(&z_mm_lock);
856 #ifdef CONFIG_DEMAND_PAGING
857 	if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
858 		ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
859 	} else {
860 		ret = 0;
861 	}
862 #else
863 	ret = z_free_page_count;
864 #endif /* CONFIG_DEMAND_PAGING */
865 	k_spin_unlock(&z_mm_lock, key);
866 
867 	return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
868 }
869 
870 /* Get the default virtual region alignment, here the default MMU page size
871  *
872  * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
873  * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
874  *
875  * @retval alignment to apply on the virtual address of this region
876  */
virt_region_align(uintptr_t phys,size_t size)877 static size_t virt_region_align(uintptr_t phys, size_t size)
878 {
879 	ARG_UNUSED(phys);
880 	ARG_UNUSED(size);
881 
882 	return CONFIG_MMU_PAGE_SIZE;
883 }
884 
885 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
886 
887 /* This may be called from arch early boot code before z_cstart() is invoked.
888  * Data will be copied and BSS zeroed, but this must not rely on any
889  * initialization functions being called prior to work correctly.
890  */
k_mem_map_phys_bare(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)891 void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
892 {
893 	uintptr_t aligned_phys, addr_offset;
894 	size_t aligned_size, align_boundary;
895 	k_spinlock_key_t key;
896 	uint8_t *dest_addr;
897 	size_t num_bits;
898 	size_t offset;
899 
900 #ifndef CONFIG_KERNEL_DIRECT_MAP
901 	__ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
902 #endif /* CONFIG_KERNEL_DIRECT_MAP */
903 	addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
904 					 phys, size,
905 					 CONFIG_MMU_PAGE_SIZE);
906 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
907 	__ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
908 		 "wraparound for physical address 0x%lx (size %zu)",
909 		 aligned_phys, aligned_size);
910 
911 	align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
912 
913 	key = k_spin_lock(&z_mm_lock);
914 
915 	if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
916 	    (flags & K_MEM_DIRECT_MAP)) {
917 		dest_addr = (uint8_t *)aligned_phys;
918 
919 		/* Mark the region of virtual memory bitmap as used
920 		 * if the region overlaps the virtual memory space.
921 		 *
922 		 * Basically if either end of region is within
923 		 * virtual memory space, we need to mark the bits.
924 		 */
925 
926 		if (IN_RANGE(aligned_phys,
927 			      (uintptr_t)K_MEM_VIRT_RAM_START,
928 			      (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) ||
929 		    IN_RANGE(aligned_phys + aligned_size - 1,
930 			      (uintptr_t)K_MEM_VIRT_RAM_START,
931 			      (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) {
932 			uint8_t *adjusted_start = MAX(dest_addr, K_MEM_VIRT_RAM_START);
933 			uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
934 						    K_MEM_VIRT_RAM_END);
935 			size_t adjusted_sz = adjusted_end - adjusted_start;
936 
937 			num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
938 			offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
939 			if (sys_bitarray_test_and_set_region(
940 			    &virt_region_bitmap, num_bits, offset, true)) {
941 				goto fail;
942 			}
943 		}
944 	} else {
945 		/* Obtain an appropriately sized chunk of virtual memory */
946 		dest_addr = virt_region_alloc(aligned_size, align_boundary);
947 		if (!dest_addr) {
948 			goto fail;
949 		}
950 	}
951 
952 	/* If this fails there's something amiss with virt_region_get */
953 	__ASSERT((uintptr_t)dest_addr <
954 		 ((uintptr_t)dest_addr + (size - 1)),
955 		 "wraparound for virtual address %p (size %zu)",
956 		 dest_addr, size);
957 
958 	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
959 		aligned_phys, aligned_size, flags, addr_offset);
960 
961 	arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
962 	k_spin_unlock(&z_mm_lock, key);
963 
964 	*virt_ptr = dest_addr + addr_offset;
965 	return;
966 fail:
967 	/* May re-visit this in the future, but for now running out of
968 	 * virtual address space or failing the arch_mem_map() call is
969 	 * an unrecoverable situation.
970 	 *
971 	 * Other problems not related to resource exhaustion we leave as
972 	 * assertions since they are clearly programming mistakes.
973 	 */
974 	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
975 		phys, size, flags);
976 	k_panic();
977 }
978 
k_mem_unmap_phys_bare(uint8_t * virt,size_t size)979 void k_mem_unmap_phys_bare(uint8_t *virt, size_t size)
980 {
981 	uintptr_t aligned_virt, addr_offset;
982 	size_t aligned_size;
983 	k_spinlock_key_t key;
984 
985 	addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
986 					 POINTER_TO_UINT(virt), size,
987 					 CONFIG_MMU_PAGE_SIZE);
988 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
989 	__ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
990 		 "wraparound for virtual address 0x%lx (size %zu)",
991 		 aligned_virt, aligned_size);
992 
993 	key = k_spin_lock(&z_mm_lock);
994 
995 	LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
996 		aligned_virt, aligned_size, addr_offset);
997 
998 	arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
999 	virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
1000 	k_spin_unlock(&z_mm_lock, key);
1001 }
1002 
1003 /*
1004  * Miscellaneous
1005  */
1006 
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)1007 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
1008 			  uintptr_t addr, size_t size, size_t align)
1009 {
1010 	size_t addr_offset;
1011 
1012 	/* The actual mapped region must be page-aligned. Round down the
1013 	 * physical address and pad the region size appropriately
1014 	 */
1015 	*aligned_addr = ROUND_DOWN(addr, align);
1016 	addr_offset = addr - *aligned_addr;
1017 	*aligned_size = ROUND_UP(size + addr_offset, align);
1018 
1019 	return addr_offset;
1020 }
1021 
1022 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)1023 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
1024 				       bool pin)
1025 {
1026 	struct k_mem_page_frame *pf;
1027 	uint8_t *addr;
1028 
1029 	uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
1030 					    CONFIG_MMU_PAGE_SIZE);
1031 	uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
1032 					CONFIG_MMU_PAGE_SIZE);
1033 	size_t pinned_size = pinned_end - pinned_start;
1034 
1035 	VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
1036 	{
1037 		pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1038 		frame_mapped_set(pf, addr);
1039 
1040 		if (pin) {
1041 			k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1042 		} else {
1043 			k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1044 #ifdef CONFIG_DEMAND_PAGING
1045 			if (IS_ENABLED(CONFIG_EVICTION_TRACKING) &&
1046 			    k_mem_page_frame_is_evictable(pf)) {
1047 				k_mem_paging_eviction_add(pf);
1048 			}
1049 #endif
1050 		}
1051 	}
1052 }
1053 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
1054 
1055 #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
z_paging_ondemand_section_map(void)1056 static void z_paging_ondemand_section_map(void)
1057 {
1058 	uint8_t *addr;
1059 	size_t size;
1060 	uintptr_t location;
1061 	uint32_t flags;
1062 
1063 	size = (uintptr_t)lnkr_ondemand_text_size;
1064 	flags = K_MEM_MAP_UNPAGED | K_MEM_PERM_EXEC | K_MEM_CACHE_WB;
1065 	VIRT_FOREACH(lnkr_ondemand_text_start, size, addr) {
1066 		k_mem_paging_backing_store_location_query(addr, &location);
1067 		arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1068 		sys_bitarray_set_region(&virt_region_bitmap, 1,
1069 					virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1070 	}
1071 
1072 	size = (uintptr_t)lnkr_ondemand_rodata_size;
1073 	flags = K_MEM_MAP_UNPAGED | K_MEM_CACHE_WB;
1074 	VIRT_FOREACH(lnkr_ondemand_rodata_start, size, addr) {
1075 		k_mem_paging_backing_store_location_query(addr, &location);
1076 		arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags);
1077 		sys_bitarray_set_region(&virt_region_bitmap, 1,
1078 					virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE));
1079 	}
1080 }
1081 #endif /* CONFIG_LINKER_USE_ONDEMAND_SECTION */
1082 
z_mem_manage_init(void)1083 void z_mem_manage_init(void)
1084 {
1085 	uintptr_t phys;
1086 	uint8_t *addr;
1087 	struct k_mem_page_frame *pf;
1088 	k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
1089 
1090 	free_page_frame_list_init();
1091 
1092 	ARG_UNUSED(addr);
1093 
1094 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
1095 	/* If some page frames are unavailable for use as memory, arch
1096 	 * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags
1097 	 */
1098 	arch_reserved_pages_update();
1099 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
1100 
1101 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1102 	/* All pages composing the Zephyr image are mapped at boot in a
1103 	 * predictable way. This can change at runtime.
1104 	 */
1105 	VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr)
1106 	{
1107 		pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr));
1108 		frame_mapped_set(pf, addr);
1109 
1110 		/* TODO: for now we pin the whole Zephyr image. Demand paging
1111 		 * currently tested with anonymously-mapped pages which are not
1112 		 * pinned.
1113 		 *
1114 		 * We will need to setup linker regions for a subset of kernel
1115 		 * code/data pages which are pinned in memory and
1116 		 * may not be evicted. This will contain critical CPU data
1117 		 * structures, and any code used to perform page fault
1118 		 * handling, page-ins, etc.
1119 		 */
1120 		k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1121 	}
1122 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1123 
1124 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1125 	/* Pin the boot section to prevent it from being swapped out during
1126 	 * boot process. Will be un-pinned once boot process completes.
1127 	 */
1128 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
1129 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1130 
1131 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
1132 	/* Pin the page frames correspondng to the pinned symbols */
1133 	mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
1134 #endif /* CONFIG_LINKER_USE_PINNED_SECTION */
1135 
1136 	/* Any remaining pages that aren't mapped, reserved, or pinned get
1137 	 * added to the free pages list
1138 	 */
1139 	K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1140 		if (k_mem_page_frame_is_available(pf)) {
1141 			free_page_frame_list_put(pf);
1142 		}
1143 	}
1144 	LOG_DBG("free page frames: %zu", z_free_page_count);
1145 
1146 #ifdef CONFIG_DEMAND_PAGING
1147 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1148 	z_paging_histogram_init();
1149 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1150 	k_mem_paging_backing_store_init();
1151 	k_mem_paging_eviction_init();
1152 
1153 	if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1154 		/* start tracking evictable page installed above if any */
1155 		K_MEM_PAGE_FRAME_FOREACH(phys, pf) {
1156 			if (k_mem_page_frame_is_evictable(pf)) {
1157 				k_mem_paging_eviction_add(pf);
1158 			}
1159 		}
1160 	}
1161 #endif /* CONFIG_DEMAND_PAGING */
1162 
1163 #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION
1164 	z_paging_ondemand_section_map();
1165 #endif
1166 
1167 #if __ASSERT_ON
1168 	page_frames_initialized = true;
1169 #endif
1170 	k_spin_unlock(&z_mm_lock, key);
1171 
1172 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
1173 	/* If BSS section is not present in memory at boot,
1174 	 * it would not have been cleared. This needs to be
1175 	 * done now since paging mechanism has been initialized
1176 	 * and the BSS pages can be brought into physical
1177 	 * memory to be cleared.
1178 	 */
1179 	z_bss_zero();
1180 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
1181 }
1182 
z_mem_manage_boot_finish(void)1183 void z_mem_manage_boot_finish(void)
1184 {
1185 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
1186 	/* At the end of boot process, unpin the boot sections
1187 	 * as they don't need to be in memory all the time anymore.
1188 	 */
1189 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
1190 #endif /* CONFIG_LINKER_USE_BOOT_SECTION */
1191 }
1192 
1193 #ifdef CONFIG_DEMAND_PAGING
1194 
1195 #ifdef CONFIG_DEMAND_PAGING_STATS
1196 struct k_mem_paging_stats_t paging_stats;
1197 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1198 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1199 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1200 #endif /* CONFIG_DEMAND_PAGING_STATS */
1201 
do_backing_store_page_in(uintptr_t location)1202 static inline void do_backing_store_page_in(uintptr_t location)
1203 {
1204 #ifdef CONFIG_DEMAND_MAPPING
1205 	/* Check for special cases */
1206 	switch (location) {
1207 	case ARCH_UNPAGED_ANON_ZERO:
1208 		memset(K_MEM_SCRATCH_PAGE, 0, CONFIG_MMU_PAGE_SIZE);
1209 		__fallthrough;
1210 	case ARCH_UNPAGED_ANON_UNINIT:
1211 		/* nothing else to do */
1212 		return;
1213 	default:
1214 		break;
1215 	}
1216 #endif /* CONFIG_DEMAND_MAPPING */
1217 
1218 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1219 	uint32_t time_diff;
1220 
1221 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1222 	timing_t time_start, time_end;
1223 
1224 	time_start = timing_counter_get();
1225 #else
1226 	uint32_t time_start;
1227 
1228 	time_start = k_cycle_get_32();
1229 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1230 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1231 
1232 	k_mem_paging_backing_store_page_in(location);
1233 
1234 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1235 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1236 	time_end = timing_counter_get();
1237 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1238 #else
1239 	time_diff = k_cycle_get_32() - time_start;
1240 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1241 
1242 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1243 			       time_diff);
1244 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1245 }
1246 
do_backing_store_page_out(uintptr_t location)1247 static inline void do_backing_store_page_out(uintptr_t location)
1248 {
1249 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1250 	uint32_t time_diff;
1251 
1252 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1253 	timing_t time_start, time_end;
1254 
1255 	time_start = timing_counter_get();
1256 #else
1257 	uint32_t time_start;
1258 
1259 	time_start = k_cycle_get_32();
1260 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1261 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1262 
1263 	k_mem_paging_backing_store_page_out(location);
1264 
1265 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1266 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1267 	time_end = timing_counter_get();
1268 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1269 #else
1270 	time_diff = k_cycle_get_32() - time_start;
1271 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1272 
1273 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1274 			       time_diff);
1275 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1276 }
1277 
1278 #if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ)
1279 /*
1280  * SMP support is very simple. Some resources such as the scratch page could
1281  * be made per CPU, backing store driver execution be confined to the faulting
1282  * CPU, statistics be made to cope with access concurrency, etc. But in the
1283  * end we're dealing with memory transfer to/from some external storage which
1284  * is inherently slow and whose access is most likely serialized anyway.
1285  * So let's simply enforce global demand paging serialization across all CPUs
1286  * with a mutex as there is no real gain from added parallelism here.
1287  */
1288 static K_MUTEX_DEFINE(z_mm_paging_lock);
1289 #endif
1290 
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1291 static void virt_region_foreach(void *addr, size_t size,
1292 				void (*func)(void *))
1293 {
1294 	k_mem_assert_virtual_region(addr, size);
1295 
1296 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1297 		func((uint8_t *)addr + offset);
1298 	}
1299 }
1300 
1301 /*
1302  * Perform some preparatory steps before paging out. The provided page frame
1303  * must be evicted to the backing store immediately after this is called
1304  * with a call to k_mem_paging_backing_store_page_out() if it contains
1305  * a data page.
1306  *
1307  * - Map page frame to scratch area if requested. This always is true if we're
1308  *   doing a page fault, but is only set on manual evictions if the page is
1309  *   dirty.
1310  * - If mapped:
1311  *    - obtain backing store location and populate location parameter
1312  *    - Update page tables with location
1313  * - Mark page frame as busy
1314  *
1315  * Returns -ENOMEM if the backing store is full
1316  */
page_frame_prepare_locked(struct k_mem_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1317 static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr,
1318 				     bool page_fault, uintptr_t *location_ptr)
1319 {
1320 	uintptr_t phys;
1321 	int ret;
1322 	bool dirty = *dirty_ptr;
1323 
1324 	phys = k_mem_page_frame_to_phys(pf);
1325 	__ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1326 		 phys);
1327 
1328 	/* If the backing store doesn't have a copy of the page, even if it
1329 	 * wasn't modified, treat as dirty. This can happen for a few
1330 	 * reasons:
1331 	 * 1) Page has never been swapped out before, and the backing store
1332 	 *    wasn't pre-populated with this data page.
1333 	 * 2) Page was swapped out before, but the page contents were not
1334 	 *    preserved after swapping back in.
1335 	 * 3) Page contents were preserved when swapped back in, but were later
1336 	 *    evicted from the backing store to make room for other evicted
1337 	 *    pages.
1338 	 */
1339 	if (k_mem_page_frame_is_mapped(pf)) {
1340 		dirty = dirty || !k_mem_page_frame_is_backed(pf);
1341 	}
1342 
1343 	if (dirty || page_fault) {
1344 		arch_mem_scratch(phys);
1345 	}
1346 
1347 	if (k_mem_page_frame_is_mapped(pf)) {
1348 		ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1349 							      page_fault);
1350 		if (ret != 0) {
1351 			LOG_ERR("out of backing store memory");
1352 			return -ENOMEM;
1353 		}
1354 		arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr);
1355 
1356 		if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1357 			k_mem_paging_eviction_remove(pf);
1358 		}
1359 	} else {
1360 		/* Shouldn't happen unless this function is mis-used */
1361 		__ASSERT(!dirty, "un-mapped page determined to be dirty");
1362 	}
1363 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1364 	/* Mark as busy so that k_mem_page_frame_is_evictable() returns false */
1365 	__ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1366 		 phys);
1367 	k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY);
1368 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1369 	/* Update dirty parameter, since we set to true if it wasn't backed
1370 	 * even if otherwise clean
1371 	 */
1372 	*dirty_ptr = dirty;
1373 
1374 	return 0;
1375 }
1376 
do_mem_evict(void * addr)1377 static int do_mem_evict(void *addr)
1378 {
1379 	bool dirty;
1380 	struct k_mem_page_frame *pf;
1381 	uintptr_t location;
1382 	k_spinlock_key_t key;
1383 	uintptr_t flags, phys;
1384 	int ret;
1385 
1386 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1387 	__ASSERT(!k_is_in_isr(),
1388 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1389 		 __func__);
1390 #ifdef CONFIG_SMP
1391 	k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1392 #else
1393 	k_sched_lock();
1394 #endif
1395 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1396 	key = k_spin_lock(&z_mm_lock);
1397 	flags = arch_page_info_get(addr, &phys, false);
1398 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1399 		 "address %p isn't mapped", addr);
1400 	if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1401 		/* Un-mapped or already evicted. Nothing to do */
1402 		ret = 0;
1403 		goto out;
1404 	}
1405 
1406 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1407 	pf = k_mem_phys_to_page_frame(phys);
1408 	__ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch");
1409 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1410 	if (ret != 0) {
1411 		goto out;
1412 	}
1413 
1414 	__ASSERT(ret == 0, "failed to prepare page frame");
1415 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1416 	k_spin_unlock(&z_mm_lock, key);
1417 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1418 	if (dirty) {
1419 		do_backing_store_page_out(location);
1420 	}
1421 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1422 	key = k_spin_lock(&z_mm_lock);
1423 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1424 	page_frame_free_locked(pf);
1425 out:
1426 	k_spin_unlock(&z_mm_lock, key);
1427 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1428 #ifdef CONFIG_SMP
1429 	k_mutex_unlock(&z_mm_paging_lock);
1430 #else
1431 	k_sched_unlock();
1432 #endif
1433 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1434 	return ret;
1435 }
1436 
k_mem_page_out(void * addr,size_t size)1437 int k_mem_page_out(void *addr, size_t size)
1438 {
1439 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1440 		 addr);
1441 	k_mem_assert_virtual_region(addr, size);
1442 
1443 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1444 		void *pos = (uint8_t *)addr + offset;
1445 		int ret;
1446 
1447 		ret = do_mem_evict(pos);
1448 		if (ret != 0) {
1449 			return ret;
1450 		}
1451 	}
1452 
1453 	return 0;
1454 }
1455 
k_mem_page_frame_evict(uintptr_t phys)1456 int k_mem_page_frame_evict(uintptr_t phys)
1457 {
1458 	k_spinlock_key_t key;
1459 	struct k_mem_page_frame *pf;
1460 	bool dirty;
1461 	uintptr_t flags;
1462 	uintptr_t location;
1463 	int ret;
1464 
1465 	__ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1466 		 __func__, phys);
1467 
1468 	/* Implementation is similar to do_page_fault() except there is no
1469 	 * data page to page-in, see comments in that function.
1470 	 */
1471 
1472 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1473 	__ASSERT(!k_is_in_isr(),
1474 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1475 		 __func__);
1476 #ifdef CONFIG_SMP
1477 	k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1478 #else
1479 	k_sched_lock();
1480 #endif
1481 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1482 	key = k_spin_lock(&z_mm_lock);
1483 	pf = k_mem_phys_to_page_frame(phys);
1484 	if (!k_mem_page_frame_is_mapped(pf)) {
1485 		/* Nothing to do, free page */
1486 		ret = 0;
1487 		goto out;
1488 	}
1489 	flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
1490 	/* Shouldn't ever happen */
1491 	__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1492 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1493 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1494 	if (ret != 0) {
1495 		goto out;
1496 	}
1497 
1498 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1499 	k_spin_unlock(&z_mm_lock, key);
1500 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1501 	if (dirty) {
1502 		do_backing_store_page_out(location);
1503 	}
1504 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1505 	k_spin_unlock(&z_mm_lock, key);
1506 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1507 	page_frame_free_locked(pf);
1508 out:
1509 	k_spin_unlock(&z_mm_lock, key);
1510 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1511 #ifdef CONFIG_SMP
1512 	k_mutex_unlock(&z_mm_paging_lock);
1513 #else
1514 	k_sched_unlock();
1515 #endif
1516 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1517 	return ret;
1518 }
1519 
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1520 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1521 					   int key)
1522 {
1523 #ifdef CONFIG_DEMAND_PAGING_STATS
1524 	bool is_irq_unlocked = arch_irq_unlocked(key);
1525 
1526 	paging_stats.pagefaults.cnt++;
1527 
1528 	if (is_irq_unlocked) {
1529 		paging_stats.pagefaults.irq_unlocked++;
1530 	} else {
1531 		paging_stats.pagefaults.irq_locked++;
1532 	}
1533 
1534 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1535 	faulting_thread->paging_stats.pagefaults.cnt++;
1536 
1537 	if (is_irq_unlocked) {
1538 		faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1539 	} else {
1540 		faulting_thread->paging_stats.pagefaults.irq_locked++;
1541 	}
1542 #else
1543 	ARG_UNUSED(faulting_thread);
1544 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1545 
1546 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1547 	if (k_is_in_isr()) {
1548 		paging_stats.pagefaults.in_isr++;
1549 
1550 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1551 		faulting_thread->paging_stats.pagefaults.in_isr++;
1552 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1553 	}
1554 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1555 #endif /* CONFIG_DEMAND_PAGING_STATS */
1556 }
1557 
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1558 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1559 					     bool dirty)
1560 {
1561 #ifdef CONFIG_DEMAND_PAGING_STATS
1562 	if (dirty) {
1563 		paging_stats.eviction.dirty++;
1564 	} else {
1565 		paging_stats.eviction.clean++;
1566 	}
1567 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1568 	if (dirty) {
1569 		faulting_thread->paging_stats.eviction.dirty++;
1570 	} else {
1571 		faulting_thread->paging_stats.eviction.clean++;
1572 	}
1573 #else
1574 	ARG_UNUSED(faulting_thread);
1575 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1576 #endif /* CONFIG_DEMAND_PAGING_STATS */
1577 }
1578 
do_eviction_select(bool * dirty)1579 static inline struct k_mem_page_frame *do_eviction_select(bool *dirty)
1580 {
1581 	struct k_mem_page_frame *pf;
1582 
1583 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1584 	uint32_t time_diff;
1585 
1586 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1587 	timing_t time_start, time_end;
1588 
1589 	time_start = timing_counter_get();
1590 #else
1591 	uint32_t time_start;
1592 
1593 	time_start = k_cycle_get_32();
1594 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1595 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1596 
1597 	pf = k_mem_paging_eviction_select(dirty);
1598 
1599 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1600 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1601 	time_end = timing_counter_get();
1602 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1603 #else
1604 	time_diff = k_cycle_get_32() - time_start;
1605 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1606 
1607 	z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1608 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1609 
1610 	return pf;
1611 }
1612 
do_page_fault(void * addr,bool pin)1613 static bool do_page_fault(void *addr, bool pin)
1614 {
1615 	struct k_mem_page_frame *pf;
1616 	k_spinlock_key_t key;
1617 	uintptr_t page_in_location, page_out_location;
1618 	enum arch_page_location status;
1619 	bool result;
1620 	bool dirty = false;
1621 	struct k_thread *faulting_thread;
1622 	int ret;
1623 
1624 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
1625 		 addr);
1626 
1627 	LOG_DBG("page fault at %p", addr);
1628 
1629 	/*
1630 	 * TODO: Add performance accounting:
1631 	 * - k_mem_paging_eviction_select() metrics
1632 	 *   * periodic timer execution time histogram (if implemented)
1633 	 */
1634 
1635 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1636 	/*
1637 	 * We do re-enable interrupts during the page-in/page-out operation
1638 	 * if and only if interrupts were enabled when the exception was
1639 	 * taken; in this configuration page faults in an ISR are a bug; all
1640 	 * their code/data must be pinned.
1641 	 *
1642 	 * If interrupts were disabled when the exception was taken, the
1643 	 * arch code is responsible for keeping them that way when entering
1644 	 * this function.
1645 	 *
1646 	 * If this is not enabled, then interrupts are always locked for the
1647 	 * entire operation. This is far worse for system interrupt latency
1648 	 * but requires less pinned pages and ISRs may also take page faults.
1649 	 *
1650 	 * On UP we lock the scheduler so that other threads are never
1651 	 * scheduled during the page-in/out operation. Support for
1652 	 * allowing k_mem_paging_backing_store_page_out() and
1653 	 * k_mem_paging_backing_store_page_in() to also sleep and allow
1654 	 * other threads to run (such as in the case where the transfer is
1655 	 * async DMA) is not supported on UP. Even if limited to thread
1656 	 * context, arbitrary memory access triggering exceptions that put
1657 	 * a thread to sleep on a contended page fault operation will break
1658 	 * scheduling assumptions of cooperative threads or threads that
1659 	 * implement critical sections with spinlocks or disabling IRQs.
1660 	 *
1661 	 * On SMP, though, exclusivity cannot be assumed solely from being
1662 	 * a cooperative thread. Another thread with any prio may be running
1663 	 * on another CPU so exclusion must already be enforced by other
1664 	 * means. Therefore trying to prevent scheduling on SMP is pointless,
1665 	 * and k_sched_lock()  is equivalent to a no-op on SMP anyway.
1666 	 * As a result, sleeping/rescheduling in the SMP case is fine.
1667 	 */
1668 	__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1669 #ifdef CONFIG_SMP
1670 	k_mutex_lock(&z_mm_paging_lock, K_FOREVER);
1671 #else
1672 	k_sched_lock();
1673 #endif
1674 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1675 
1676 	key = k_spin_lock(&z_mm_lock);
1677 	faulting_thread = arch_current_thread();
1678 
1679 	status = arch_page_location_get(addr, &page_in_location);
1680 	if (status == ARCH_PAGE_LOCATION_BAD) {
1681 		/* Return false to treat as a fatal error */
1682 		result = false;
1683 		goto out;
1684 	}
1685 	result = true;
1686 
1687 	if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1688 		if (pin) {
1689 			/* It's a physical memory address */
1690 			uintptr_t phys = page_in_location;
1691 
1692 			pf = k_mem_phys_to_page_frame(phys);
1693 			if (!k_mem_page_frame_is_pinned(pf)) {
1694 				if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1695 					k_mem_paging_eviction_remove(pf);
1696 				}
1697 				k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1698 			}
1699 		}
1700 
1701 		/* This if-block is to pin the page if it is
1702 		 * already present in physical memory. There is
1703 		 * no need to go through the following code to
1704 		 * pull in the data pages. So skip to the end.
1705 		 */
1706 		goto out;
1707 	}
1708 	__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1709 		 "unexpected status value %d", status);
1710 
1711 	paging_stats_faults_inc(faulting_thread, key.key);
1712 
1713 	pf = free_page_frame_list_get();
1714 	if (pf == NULL) {
1715 		/* Need to evict a page frame */
1716 		pf = do_eviction_select(&dirty);
1717 		__ASSERT(pf != NULL, "failed to get a page frame");
1718 		LOG_DBG("evicting %p at 0x%lx",
1719 			k_mem_page_frame_to_virt(pf),
1720 			k_mem_page_frame_to_phys(pf));
1721 
1722 		paging_stats_eviction_inc(faulting_thread, dirty);
1723 	}
1724 	ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1725 	__ASSERT(ret == 0, "failed to prepare page frame");
1726 
1727 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1728 	k_spin_unlock(&z_mm_lock, key);
1729 	/* Interrupts are now unlocked if they were not locked when we entered
1730 	 * this function, and we may service ISRs. The scheduler is still
1731 	 * locked.
1732 	 */
1733 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1734 	if (dirty) {
1735 		do_backing_store_page_out(page_out_location);
1736 	}
1737 	do_backing_store_page_in(page_in_location);
1738 
1739 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1740 	key = k_spin_lock(&z_mm_lock);
1741 	k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY);
1742 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1743 	k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED);
1744 	frame_mapped_set(pf, addr);
1745 	if (pin) {
1746 		k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED);
1747 	}
1748 
1749 	arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf));
1750 	k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1751 	if (IS_ENABLED(CONFIG_EVICTION_TRACKING) && (!pin)) {
1752 		k_mem_paging_eviction_add(pf);
1753 	}
1754 out:
1755 	k_spin_unlock(&z_mm_lock, key);
1756 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1757 #ifdef CONFIG_SMP
1758 	k_mutex_unlock(&z_mm_paging_lock);
1759 #else
1760 	k_sched_unlock();
1761 #endif
1762 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1763 
1764 	return result;
1765 }
1766 
do_page_in(void * addr)1767 static void do_page_in(void *addr)
1768 {
1769 	bool ret;
1770 
1771 	ret = do_page_fault(addr, false);
1772 	__ASSERT(ret, "unmapped memory address %p", addr);
1773 	(void)ret;
1774 }
1775 
k_mem_page_in(void * addr,size_t size)1776 void k_mem_page_in(void *addr, size_t size)
1777 {
1778 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1779 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1780 		 __func__);
1781 	virt_region_foreach(addr, size, do_page_in);
1782 }
1783 
do_mem_pin(void * addr)1784 static void do_mem_pin(void *addr)
1785 {
1786 	bool ret;
1787 
1788 	ret = do_page_fault(addr, true);
1789 	__ASSERT(ret, "unmapped memory address %p", addr);
1790 	(void)ret;
1791 }
1792 
k_mem_pin(void * addr,size_t size)1793 void k_mem_pin(void *addr, size_t size)
1794 {
1795 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1796 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1797 		 __func__);
1798 	virt_region_foreach(addr, size, do_mem_pin);
1799 }
1800 
k_mem_page_fault(void * addr)1801 bool k_mem_page_fault(void *addr)
1802 {
1803 	return do_page_fault(addr, false);
1804 }
1805 
do_mem_unpin(void * addr)1806 static void do_mem_unpin(void *addr)
1807 {
1808 	struct k_mem_page_frame *pf;
1809 	k_spinlock_key_t key;
1810 	uintptr_t flags, phys;
1811 
1812 	key = k_spin_lock(&z_mm_lock);
1813 	flags = arch_page_info_get(addr, &phys, false);
1814 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1815 		 "invalid data page at %p", addr);
1816 	if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1817 		pf = k_mem_phys_to_page_frame(phys);
1818 		if (k_mem_page_frame_is_pinned(pf)) {
1819 			k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED);
1820 
1821 			if (IS_ENABLED(CONFIG_EVICTION_TRACKING)) {
1822 				k_mem_paging_eviction_add(pf);
1823 			}
1824 		}
1825 	}
1826 	k_spin_unlock(&z_mm_lock, key);
1827 }
1828 
k_mem_unpin(void * addr,size_t size)1829 void k_mem_unpin(void *addr, size_t size)
1830 {
1831 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1832 		 addr);
1833 	virt_region_foreach(addr, size, do_mem_unpin);
1834 }
1835 
1836 #endif /* CONFIG_DEMAND_PAGING */
1837