1 /*
2  * Copyright (c) 2020 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Routines for managing virtual address spaces
7  */
8 
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/internal/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/timing/timing.h>
20 #include <zephyr/logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22 
23 #ifdef CONFIG_DEMAND_PAGING
24 #include <zephyr/kernel/mm/demand_paging.h>
25 #endif
26 
27 /*
28  * General terminology:
29  * - A page frame is a page-sized physical memory region in RAM. It is a
30  *   container where a data page may be placed. It is always referred to by
31  *   physical address. We have a convention of using uintptr_t for physical
32  *   addresses. We instantiate a struct z_page_frame to store metadata for
33  *   every page frame.
34  *
35  * - A data page is a page-sized region of data. It may exist in a page frame,
36  *   or be paged out to some backing store. Its location can always be looked
37  *   up in the CPU's page tables (or equivalent) by virtual address.
38  *   The data type will always be void * or in some cases uint8_t * when we
39  *   want to do pointer arithmetic.
40  */
41 
42 /* Spinlock to protect any globals in this file and serialize page table
43  * updates in arch code
44  */
45 struct k_spinlock z_mm_lock;
46 
47 /*
48  * General page frame management
49  */
50 
51 /* Database of all RAM page frames */
52 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
53 
54 #if __ASSERT_ON
55 /* Indicator that z_page_frames has been initialized, many of these APIs do
56  * not work before POST_KERNEL
57  */
58 static bool page_frames_initialized;
59 #endif
60 
61 /* Add colors to page table dumps to indicate mapping type */
62 #define COLOR_PAGE_FRAMES	1
63 
64 #if COLOR_PAGE_FRAMES
65 #define ANSI_DEFAULT "\x1B" "[0m"
66 #define ANSI_RED     "\x1B" "[1;31m"
67 #define ANSI_GREEN   "\x1B" "[1;32m"
68 #define ANSI_YELLOW  "\x1B" "[1;33m"
69 #define ANSI_BLUE    "\x1B" "[1;34m"
70 #define ANSI_MAGENTA "\x1B" "[1;35m"
71 #define ANSI_CYAN    "\x1B" "[1;36m"
72 #define ANSI_GREY    "\x1B" "[1;90m"
73 
74 #define COLOR(x)	printk(_CONCAT(ANSI_, x))
75 #else
76 #define COLOR(x)	do { } while (false)
77 #endif
78 
79 /* LCOV_EXCL_START */
page_frame_dump(struct z_page_frame * pf)80 static void page_frame_dump(struct z_page_frame *pf)
81 {
82 	if (z_page_frame_is_reserved(pf)) {
83 		COLOR(CYAN);
84 		printk("R");
85 	} else if (z_page_frame_is_busy(pf)) {
86 		COLOR(MAGENTA);
87 		printk("B");
88 	} else if (z_page_frame_is_pinned(pf)) {
89 		COLOR(YELLOW);
90 		printk("P");
91 	} else if (z_page_frame_is_available(pf)) {
92 		COLOR(GREY);
93 		printk(".");
94 	} else if (z_page_frame_is_mapped(pf)) {
95 		COLOR(DEFAULT);
96 		printk("M");
97 	} else {
98 		COLOR(RED);
99 		printk("?");
100 	}
101 }
102 
z_page_frames_dump(void)103 void z_page_frames_dump(void)
104 {
105 	int column = 0;
106 
107 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
108 	printk("Physical memory from 0x%lx to 0x%lx\n",
109 	       Z_PHYS_RAM_START, Z_PHYS_RAM_END);
110 
111 	for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
112 		struct z_page_frame *pf = &z_page_frames[i];
113 
114 		page_frame_dump(pf);
115 
116 		column++;
117 		if (column == 64) {
118 			column = 0;
119 			printk("\n");
120 		}
121 	}
122 
123 	COLOR(DEFAULT);
124 	if (column != 0) {
125 		printk("\n");
126 	}
127 }
128 /* LCOV_EXCL_STOP */
129 
130 #define VIRT_FOREACH(_base, _size, _pos) \
131 	for (_pos = _base; \
132 	     _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
133 
134 #define PHYS_FOREACH(_base, _size, _pos) \
135 	for (_pos = _base; \
136 	     _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
137 
138 
139 /*
140  * Virtual address space management
141  *
142  * Call all of these functions with z_mm_lock held.
143  *
144  * Overall virtual memory map: When the kernel starts, it resides in
145  * virtual memory in the region Z_KERNEL_VIRT_START to
146  * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
147  * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
148  *
149  * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
150  * but have a mapping for all RAM in place. This is for special architectural
151  * purposes and does not otherwise affect page frame accounting or flags;
152  * the only guarantee is that such RAM mapping outside of the Zephyr image
153  * won't be disturbed by subsequent memory mapping calls.
154  *
155  * +--------------+ <- Z_VIRT_RAM_START
156  * | Undefined VM | <- May contain ancillary regions like x86_64's locore
157  * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
158  * | Mapping for  |
159  * | main kernel  |
160  * | image        |
161  * |		  |
162  * |		  |
163  * +--------------+ <- Z_FREE_VM_START
164  * |              |
165  * | Unused,      |
166  * | Available VM |
167  * |              |
168  * |..............| <- mapping_pos (grows downward as more mappings are made)
169  * | Mapping      |
170  * +--------------+
171  * | Mapping      |
172  * +--------------+
173  * | ...          |
174  * +--------------+
175  * | Mapping      |
176  * +--------------+ <- mappings start here
177  * | Reserved     | <- special purpose virtual page(s) of size Z_VM_RESERVED
178  * +--------------+ <- Z_VIRT_RAM_END
179  */
180 
181 /* Bitmap of virtual addresses where one bit corresponds to one page.
182  * This is being used for virt_region_alloc() to figure out which
183  * region of virtual addresses can be used for memory mapping.
184  *
185  * Note that bit #0 is the highest address so that allocation is
186  * done in reverse from highest address.
187  */
188 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
189 			   CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
190 
191 static bool virt_region_inited;
192 
193 #define Z_VIRT_REGION_START_ADDR	Z_FREE_VM_START
194 #define Z_VIRT_REGION_END_ADDR		(Z_VIRT_RAM_END - Z_VM_RESERVED)
195 
virt_from_bitmap_offset(size_t offset,size_t size)196 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
197 {
198 	return POINTER_TO_UINT(Z_VIRT_RAM_END)
199 	       - (offset * CONFIG_MMU_PAGE_SIZE) - size;
200 }
201 
virt_to_bitmap_offset(void * vaddr,size_t size)202 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
203 {
204 	return (POINTER_TO_UINT(Z_VIRT_RAM_END)
205 		- POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
206 }
207 
virt_region_init(void)208 static void virt_region_init(void)
209 {
210 	size_t offset, num_bits;
211 
212 	/* There are regions where we should never map via
213 	 * k_mem_map() and z_phys_map(). Mark them as
214 	 * already allocated so they will never be used.
215 	 */
216 
217 	if (Z_VM_RESERVED > 0) {
218 		/* Mark reserved region at end of virtual address space */
219 		num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
220 		(void)sys_bitarray_set_region(&virt_region_bitmap,
221 					      num_bits, 0);
222 	}
223 
224 	/* Mark all bits up to Z_FREE_VM_START as allocated */
225 	num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
226 		   - POINTER_TO_UINT(Z_VIRT_RAM_START);
227 	offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
228 	num_bits /= CONFIG_MMU_PAGE_SIZE;
229 	(void)sys_bitarray_set_region(&virt_region_bitmap,
230 				      num_bits, offset);
231 
232 	virt_region_inited = true;
233 }
234 
virt_region_free(void * vaddr,size_t size)235 static void virt_region_free(void *vaddr, size_t size)
236 {
237 	size_t offset, num_bits;
238 	uint8_t *vaddr_u8 = (uint8_t *)vaddr;
239 
240 	if (unlikely(!virt_region_inited)) {
241 		virt_region_init();
242 	}
243 
244 #ifndef CONFIG_KERNEL_DIRECT_MAP
245 	/* Without the need to support K_MEM_DIRECT_MAP, the region must be
246 	 * able to be represented in the bitmap. So this case is
247 	 * simple.
248 	 */
249 
250 	__ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
251 		 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
252 		 "invalid virtual address region %p (%zu)", vaddr_u8, size);
253 	if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
254 	      && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
255 		return;
256 	}
257 
258 	offset = virt_to_bitmap_offset(vaddr, size);
259 	num_bits = size / CONFIG_MMU_PAGE_SIZE;
260 	(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
261 #else /* !CONFIG_KERNEL_DIRECT_MAP */
262 	/* With K_MEM_DIRECT_MAP, the region can be outside of the virtual
263 	 * memory space, wholly within it, or overlap partially.
264 	 * So additional processing is needed to make sure we only
265 	 * mark the pages within the bitmap.
266 	 */
267 	if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
268 	     (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
269 	    (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
270 	     ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
271 		uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
272 		uint8_t *adjusted_end = MIN(vaddr_u8 + size,
273 					    Z_VIRT_REGION_END_ADDR);
274 		size_t adjusted_sz = adjusted_end - adjusted_start;
275 
276 		offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
277 		num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
278 		(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
279 	}
280 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
281 }
282 
virt_region_alloc(size_t size,size_t align)283 static void *virt_region_alloc(size_t size, size_t align)
284 {
285 	uintptr_t dest_addr;
286 	size_t alloc_size;
287 	size_t offset;
288 	size_t num_bits;
289 	int ret;
290 
291 	if (unlikely(!virt_region_inited)) {
292 		virt_region_init();
293 	}
294 
295 	/* Possibly request more pages to ensure we can get an aligned virtual address */
296 	num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
297 	alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
298 	ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
299 	if (ret != 0) {
300 		LOG_ERR("insufficient virtual address space (requested %zu)",
301 			size);
302 		return NULL;
303 	}
304 
305 	/* Remember that bit #0 in bitmap corresponds to the highest
306 	 * virtual address. So here we need to go downwards (backwards?)
307 	 * to get the starting address of the allocated region.
308 	 */
309 	dest_addr = virt_from_bitmap_offset(offset, alloc_size);
310 
311 	if (alloc_size > size) {
312 		uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
313 
314 		/* Here is the memory organization when trying to get an aligned
315 		 * virtual address:
316 		 *
317 		 * +--------------+ <- Z_VIRT_RAM_START
318 		 * | Undefined VM |
319 		 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
320 		 * | Mapping for  |
321 		 * | main kernel  |
322 		 * | image        |
323 		 * |		  |
324 		 * |		  |
325 		 * +--------------+ <- Z_FREE_VM_START
326 		 * | ...          |
327 		 * +==============+ <- dest_addr
328 		 * | Unused       |
329 		 * |..............| <- aligned_dest_addr
330 		 * |              |
331 		 * | Aligned      |
332 		 * | Mapping      |
333 		 * |              |
334 		 * |..............| <- aligned_dest_addr + size
335 		 * | Unused       |
336 		 * +==============+ <- offset from Z_VIRT_RAM_END == dest_addr + alloc_size
337 		 * | ...          |
338 		 * +--------------+
339 		 * | Mapping      |
340 		 * +--------------+
341 		 * | Reserved     |
342 		 * +--------------+ <- Z_VIRT_RAM_END
343 		 */
344 
345 		/* Free the two unused regions */
346 		virt_region_free(UINT_TO_POINTER(dest_addr),
347 				 aligned_dest_addr - dest_addr);
348 		if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
349 			virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
350 					 (dest_addr + alloc_size) - (aligned_dest_addr + size));
351 		}
352 
353 		dest_addr = aligned_dest_addr;
354 	}
355 
356 	/* Need to make sure this does not step into kernel memory */
357 	if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
358 		(void)sys_bitarray_free(&virt_region_bitmap, size, offset);
359 		return NULL;
360 	}
361 
362 	return UINT_TO_POINTER(dest_addr);
363 }
364 
365 /*
366  * Free page frames management
367  *
368  * Call all of these functions with z_mm_lock held.
369  */
370 
371 /* Linked list of unused and available page frames.
372  *
373  * TODO: This is very simple and treats all free page frames as being equal.
374  * However, there are use-cases to consolidate free pages such that entire
375  * SRAM banks can be switched off to save power, and so obtaining free pages
376  * may require a more complex ontology which prefers page frames in RAM banks
377  * which are still active.
378  *
379  * This implies in the future there may be multiple slists managing physical
380  * pages. Each page frame will still just have one snode link.
381  */
382 static sys_slist_t free_page_frame_list;
383 
384 /* Number of unused and available free page frames.
385  * This information may go stale immediately.
386  */
387 static size_t z_free_page_count;
388 
389 #define PF_ASSERT(pf, expr, fmt, ...) \
390 	__ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
391 		 ##__VA_ARGS__)
392 
393 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)394 static struct z_page_frame *free_page_frame_list_get(void)
395 {
396 	sys_snode_t *node;
397 	struct z_page_frame *pf = NULL;
398 
399 	node = sys_slist_get(&free_page_frame_list);
400 	if (node != NULL) {
401 		z_free_page_count--;
402 		pf = CONTAINER_OF(node, struct z_page_frame, node);
403 		PF_ASSERT(pf, z_page_frame_is_available(pf),
404 			 "unavailable but somehow on free list");
405 	}
406 
407 	return pf;
408 }
409 
410 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)411 static void free_page_frame_list_put(struct z_page_frame *pf)
412 {
413 	PF_ASSERT(pf, z_page_frame_is_available(pf),
414 		 "unavailable page put on free list");
415 	/* The structure is packed, which ensures that this is true */
416 	void *node = pf;
417 
418 	sys_slist_append(&free_page_frame_list, node);
419 	z_free_page_count++;
420 }
421 
free_page_frame_list_init(void)422 static void free_page_frame_list_init(void)
423 {
424 	sys_slist_init(&free_page_frame_list);
425 }
426 
page_frame_free_locked(struct z_page_frame * pf)427 static void page_frame_free_locked(struct z_page_frame *pf)
428 {
429 	pf->flags = 0;
430 	free_page_frame_list_put(pf);
431 }
432 
433 /*
434  * Memory Mapping
435  */
436 
437 /* Called after the frame is mapped in the arch layer, to update our
438  * local ontology (and do some assertions while we're at it)
439  */
frame_mapped_set(struct z_page_frame * pf,void * addr)440 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
441 {
442 	PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
443 		  "attempted to map a reserved page frame");
444 
445 	/* We do allow multiple mappings for pinned page frames
446 	 * since we will never need to reverse map them.
447 	 * This is uncommon, use-cases are for things like the
448 	 * Zephyr equivalent of VSDOs
449 	 */
450 	PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
451 		 "non-pinned and already mapped to %p", pf->addr);
452 
453 	pf->flags |= Z_PAGE_FRAME_MAPPED;
454 	pf->addr = addr;
455 }
456 
457 /* LCOV_EXCL_START */
458 /* Go through page frames to find the physical address mapped
459  * by a virtual address.
460  *
461  * @param[in]  virt Virtual Address
462  * @param[out] phys Physical address mapped to the input virtual address
463  *                  if such mapping exists.
464  *
465  * @retval 0 if mapping is found and valid
466  * @retval -EFAULT if virtual address is not mapped
467  */
virt_to_page_frame(void * virt,uintptr_t * phys)468 static int virt_to_page_frame(void *virt, uintptr_t *phys)
469 {
470 	uintptr_t paddr;
471 	struct z_page_frame *pf;
472 	int ret = -EFAULT;
473 
474 	Z_PAGE_FRAME_FOREACH(paddr, pf) {
475 		if (z_page_frame_is_mapped(pf)) {
476 			if (virt == pf->addr) {
477 				ret = 0;
478 				if (phys != NULL) {
479 					*phys = z_page_frame_to_phys(pf);
480 				}
481 				break;
482 			}
483 		}
484 	}
485 
486 	return ret;
487 }
488 /* LCOV_EXCL_STOP */
489 
490 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
491 
492 #ifdef CONFIG_DEMAND_PAGING
493 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
494 				     bool page_in, uintptr_t *location_ptr);
495 
496 static inline void do_backing_store_page_in(uintptr_t location);
497 static inline void do_backing_store_page_out(uintptr_t location);
498 #endif /* CONFIG_DEMAND_PAGING */
499 
500 /* Allocate a free page frame, and map it to a specified virtual address
501  *
502  * TODO: Add optional support for copy-on-write mappings to a zero page instead
503  * of allocating, in which case page frames will be allocated lazily as
504  * the mappings to the zero page get touched. This will avoid expensive
505  * page-ins as memory is mapped and physical RAM or backing store storage will
506  * not be used if the mapped memory is unused. The cost is an empty physical
507  * page of zeroes.
508  */
map_anon_page(void * addr,uint32_t flags)509 static int map_anon_page(void *addr, uint32_t flags)
510 {
511 	struct z_page_frame *pf;
512 	uintptr_t phys;
513 	bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
514 	bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
515 
516 	pf = free_page_frame_list_get();
517 	if (pf == NULL) {
518 #ifdef CONFIG_DEMAND_PAGING
519 		uintptr_t location;
520 		bool dirty;
521 		int ret;
522 
523 		pf = k_mem_paging_eviction_select(&dirty);
524 		__ASSERT(pf != NULL, "failed to get a page frame");
525 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
526 			z_page_frame_to_phys(pf));
527 		ret = page_frame_prepare_locked(pf, &dirty, false, &location);
528 		if (ret != 0) {
529 			return -ENOMEM;
530 		}
531 		if (dirty) {
532 			do_backing_store_page_out(location);
533 		}
534 		pf->flags = 0;
535 #else
536 		return -ENOMEM;
537 #endif /* CONFIG_DEMAND_PAGING */
538 	}
539 
540 	phys = z_page_frame_to_phys(pf);
541 	arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
542 
543 	if (lock) {
544 		pf->flags |= Z_PAGE_FRAME_PINNED;
545 	}
546 	frame_mapped_set(pf, addr);
547 
548 	LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
549 
550 	if (!uninit) {
551 		/* If we later implement mappings to a copy-on-write
552 		 * zero page, won't need this step
553 		 */
554 		memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
555 	}
556 
557 	return 0;
558 }
559 
k_mem_map(size_t size,uint32_t flags)560 void *k_mem_map(size_t size, uint32_t flags)
561 {
562 	uint8_t *dst;
563 	size_t total_size;
564 	int ret;
565 	k_spinlock_key_t key;
566 	uint8_t *pos;
567 
568 	__ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
569 		   ((flags & K_MEM_MAP_UNINIT) != 0U)),
570 		 "user access to anonymous uninitialized pages is forbidden");
571 	__ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
572 		 "unaligned size %zu passed to %s", size, __func__);
573 	__ASSERT(size != 0, "zero sized memory mapping");
574 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
575 	__ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
576 		 "%s does not support explicit cache settings", __func__);
577 
578 	key = k_spin_lock(&z_mm_lock);
579 
580 	/* Need extra for the guard pages (before and after) which we
581 	 * won't map.
582 	 */
583 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
584 
585 	dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
586 	if (dst == NULL) {
587 		/* Address space has no free region */
588 		goto out;
589 	}
590 
591 	/* Unmap both guard pages to make sure accessing them
592 	 * will generate fault.
593 	 */
594 	arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
595 	arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
596 		       CONFIG_MMU_PAGE_SIZE);
597 
598 	/* Skip over the "before" guard page in returned address. */
599 	dst += CONFIG_MMU_PAGE_SIZE;
600 
601 	VIRT_FOREACH(dst, size, pos) {
602 		ret = map_anon_page(pos, flags);
603 
604 		if (ret != 0) {
605 			/* TODO: call k_mem_unmap(dst, pos - dst)  when
606 			 * implemented in #28990 and release any guard virtual
607 			 * page as well.
608 			 */
609 			dst = NULL;
610 			goto out;
611 		}
612 	}
613 out:
614 	k_spin_unlock(&z_mm_lock, key);
615 	return dst;
616 }
617 
k_mem_unmap(void * addr,size_t size)618 void k_mem_unmap(void *addr, size_t size)
619 {
620 	uintptr_t phys;
621 	uint8_t *pos;
622 	struct z_page_frame *pf;
623 	k_spinlock_key_t key;
624 	size_t total_size;
625 	int ret;
626 
627 	/* Need space for the "before" guard page */
628 	__ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
629 
630 	/* Make sure address range is still valid after accounting
631 	 * for two guard pages.
632 	 */
633 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
634 	z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
635 
636 	key = k_spin_lock(&z_mm_lock);
637 
638 	/* Check if both guard pages are unmapped.
639 	 * Bail if not, as this is probably a region not mapped
640 	 * using k_mem_map().
641 	 */
642 	pos = addr;
643 	ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
644 	if (ret == 0) {
645 		__ASSERT(ret == 0,
646 			 "%s: cannot find preceding guard page for (%p, %zu)",
647 			 __func__, addr, size);
648 		goto out;
649 	}
650 
651 	ret = arch_page_phys_get(pos + size, NULL);
652 	if (ret == 0) {
653 		__ASSERT(ret == 0,
654 			 "%s: cannot find succeeding guard page for (%p, %zu)",
655 			 __func__, addr, size);
656 		goto out;
657 	}
658 
659 	VIRT_FOREACH(addr, size, pos) {
660 		ret = arch_page_phys_get(pos, &phys);
661 
662 		__ASSERT(ret == 0,
663 			 "%s: cannot unmap an unmapped address %p",
664 			 __func__, pos);
665 		if (ret != 0) {
666 			/* Found an address not mapped. Do not continue. */
667 			goto out;
668 		}
669 
670 		__ASSERT(z_is_page_frame(phys),
671 			 "%s: 0x%lx is not a page frame", __func__, phys);
672 		if (!z_is_page_frame(phys)) {
673 			/* Physical address has no corresponding page frame
674 			 * description in the page frame array.
675 			 * This should not happen. Do not continue.
676 			 */
677 			goto out;
678 		}
679 
680 		/* Grab the corresponding page frame from physical address */
681 		pf = z_phys_to_page_frame(phys);
682 
683 		__ASSERT(z_page_frame_is_mapped(pf),
684 			 "%s: 0x%lx is not a mapped page frame", __func__, phys);
685 		if (!z_page_frame_is_mapped(pf)) {
686 			/* Page frame is not marked mapped.
687 			 * This should not happen. Do not continue.
688 			 */
689 			goto out;
690 		}
691 
692 		arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
693 
694 		/* Put the page frame back into free list */
695 		page_frame_free_locked(pf);
696 	}
697 
698 	/* There are guard pages just before and after the mapped
699 	 * region. So we also need to free them from the bitmap.
700 	 */
701 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
702 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
703 	virt_region_free(pos, total_size);
704 
705 out:
706 	k_spin_unlock(&z_mm_lock, key);
707 }
708 
k_mem_free_get(void)709 size_t k_mem_free_get(void)
710 {
711 	size_t ret;
712 	k_spinlock_key_t key;
713 
714 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
715 
716 	key = k_spin_lock(&z_mm_lock);
717 #ifdef CONFIG_DEMAND_PAGING
718 	if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
719 		ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
720 	} else {
721 		ret = 0;
722 	}
723 #else
724 	ret = z_free_page_count;
725 #endif
726 	k_spin_unlock(&z_mm_lock, key);
727 
728 	return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
729 }
730 
731 /* Get the default virtual region alignment, here the default MMU page size
732  *
733  * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
734  * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
735  *
736  * @retval alignment to apply on the virtual address of this region
737  */
virt_region_align(uintptr_t phys,size_t size)738 static size_t virt_region_align(uintptr_t phys, size_t size)
739 {
740 	ARG_UNUSED(phys);
741 	ARG_UNUSED(size);
742 
743 	return CONFIG_MMU_PAGE_SIZE;
744 }
745 
746 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
747 
748 /* This may be called from arch early boot code before z_cstart() is invoked.
749  * Data will be copied and BSS zeroed, but this must not rely on any
750  * initialization functions being called prior to work correctly.
751  */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)752 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
753 {
754 	uintptr_t aligned_phys, addr_offset;
755 	size_t aligned_size, align_boundary;
756 	k_spinlock_key_t key;
757 	uint8_t *dest_addr;
758 	size_t num_bits;
759 	size_t offset;
760 
761 #ifndef CONFIG_KERNEL_DIRECT_MAP
762 	__ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
763 #endif
764 	addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
765 					 phys, size,
766 					 CONFIG_MMU_PAGE_SIZE);
767 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
768 	__ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
769 		 "wraparound for physical address 0x%lx (size %zu)",
770 		 aligned_phys, aligned_size);
771 
772 	align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
773 
774 	key = k_spin_lock(&z_mm_lock);
775 
776 	if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
777 	    (flags & K_MEM_DIRECT_MAP)) {
778 		dest_addr = (uint8_t *)aligned_phys;
779 
780 		/* Mark the region of virtual memory bitmap as used
781 		 * if the region overlaps the virtual memory space.
782 		 *
783 		 * Basically if either end of region is within
784 		 * virtual memory space, we need to mark the bits.
785 		 */
786 
787 		if (IN_RANGE(aligned_phys,
788 			      (uintptr_t)Z_VIRT_RAM_START,
789 			      (uintptr_t)(Z_VIRT_RAM_END - 1)) ||
790 		    IN_RANGE(aligned_phys + aligned_size - 1,
791 			      (uintptr_t)Z_VIRT_RAM_START,
792 			      (uintptr_t)(Z_VIRT_RAM_END - 1))) {
793 			uint8_t *adjusted_start = MAX(dest_addr, Z_VIRT_RAM_START);
794 			uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
795 						    Z_VIRT_RAM_END);
796 			size_t adjusted_sz = adjusted_end - adjusted_start;
797 
798 			num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
799 			offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
800 			if (sys_bitarray_test_and_set_region(
801 			    &virt_region_bitmap, num_bits, offset, true))
802 				goto fail;
803 		}
804 	} else {
805 		/* Obtain an appropriately sized chunk of virtual memory */
806 		dest_addr = virt_region_alloc(aligned_size, align_boundary);
807 		if (!dest_addr) {
808 			goto fail;
809 		}
810 	}
811 
812 	/* If this fails there's something amiss with virt_region_get */
813 	__ASSERT((uintptr_t)dest_addr <
814 		 ((uintptr_t)dest_addr + (size - 1)),
815 		 "wraparound for virtual address %p (size %zu)",
816 		 dest_addr, size);
817 
818 	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
819 		aligned_phys, aligned_size, flags, addr_offset);
820 
821 	arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
822 	k_spin_unlock(&z_mm_lock, key);
823 
824 	*virt_ptr = dest_addr + addr_offset;
825 	return;
826 fail:
827 	/* May re-visit this in the future, but for now running out of
828 	 * virtual address space or failing the arch_mem_map() call is
829 	 * an unrecoverable situation.
830 	 *
831 	 * Other problems not related to resource exhaustion we leave as
832 	 * assertions since they are clearly programming mistakes.
833 	 */
834 	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
835 		phys, size, flags);
836 	k_panic();
837 }
838 
z_phys_unmap(uint8_t * virt,size_t size)839 void z_phys_unmap(uint8_t *virt, size_t size)
840 {
841 	uintptr_t aligned_virt, addr_offset;
842 	size_t aligned_size;
843 	k_spinlock_key_t key;
844 
845 	addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
846 					 POINTER_TO_UINT(virt), size,
847 					 CONFIG_MMU_PAGE_SIZE);
848 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
849 	__ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
850 		 "wraparound for virtual address 0x%lx (size %zu)",
851 		 aligned_virt, aligned_size);
852 
853 	key = k_spin_lock(&z_mm_lock);
854 
855 	LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
856 		aligned_virt, aligned_size, addr_offset);
857 
858 	arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
859 	virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
860 	k_spin_unlock(&z_mm_lock, key);
861 }
862 
863 /*
864  * Miscellaneous
865  */
866 
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)867 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
868 			  uintptr_t addr, size_t size, size_t align)
869 {
870 	size_t addr_offset;
871 
872 	/* The actual mapped region must be page-aligned. Round down the
873 	 * physical address and pad the region size appropriately
874 	 */
875 	*aligned_addr = ROUND_DOWN(addr, align);
876 	addr_offset = addr - *aligned_addr;
877 	*aligned_size = ROUND_UP(size + addr_offset, align);
878 
879 	return addr_offset;
880 }
881 
882 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)883 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
884 				       bool pin)
885 {
886 	struct z_page_frame *pf;
887 	uint8_t *addr;
888 
889 	uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
890 					    CONFIG_MMU_PAGE_SIZE);
891 	uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
892 					CONFIG_MMU_PAGE_SIZE);
893 	size_t pinned_size = pinned_end - pinned_start;
894 
895 	VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
896 	{
897 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
898 		frame_mapped_set(pf, addr);
899 
900 		if (pin) {
901 			pf->flags |= Z_PAGE_FRAME_PINNED;
902 		} else {
903 			pf->flags &= ~Z_PAGE_FRAME_PINNED;
904 		}
905 	}
906 }
907 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
908 
z_mem_manage_init(void)909 void z_mem_manage_init(void)
910 {
911 	uintptr_t phys;
912 	uint8_t *addr;
913 	struct z_page_frame *pf;
914 	k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
915 
916 	free_page_frame_list_init();
917 
918 	ARG_UNUSED(addr);
919 
920 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
921 	/* If some page frames are unavailable for use as memory, arch
922 	 * code will mark Z_PAGE_FRAME_RESERVED in their flags
923 	 */
924 	arch_reserved_pages_update();
925 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
926 
927 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
928 	/* All pages composing the Zephyr image are mapped at boot in a
929 	 * predictable way. This can change at runtime.
930 	 */
931 	VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
932 	{
933 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
934 		frame_mapped_set(pf, addr);
935 
936 		/* TODO: for now we pin the whole Zephyr image. Demand paging
937 		 * currently tested with anonymously-mapped pages which are not
938 		 * pinned.
939 		 *
940 		 * We will need to setup linker regions for a subset of kernel
941 		 * code/data pages which are pinned in memory and
942 		 * may not be evicted. This will contain critical CPU data
943 		 * structures, and any code used to perform page fault
944 		 * handling, page-ins, etc.
945 		 */
946 		pf->flags |= Z_PAGE_FRAME_PINNED;
947 	}
948 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
949 
950 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
951 	/* Pin the boot section to prevent it from being swapped out during
952 	 * boot process. Will be un-pinned once boot process completes.
953 	 */
954 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
955 #endif
956 
957 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
958 	/* Pin the page frames correspondng to the pinned symbols */
959 	mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
960 #endif
961 
962 	/* Any remaining pages that aren't mapped, reserved, or pinned get
963 	 * added to the free pages list
964 	 */
965 	Z_PAGE_FRAME_FOREACH(phys, pf) {
966 		if (z_page_frame_is_available(pf)) {
967 			free_page_frame_list_put(pf);
968 		}
969 	}
970 	LOG_DBG("free page frames: %zu", z_free_page_count);
971 
972 #ifdef CONFIG_DEMAND_PAGING
973 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
974 	z_paging_histogram_init();
975 #endif
976 	k_mem_paging_backing_store_init();
977 	k_mem_paging_eviction_init();
978 #endif
979 #if __ASSERT_ON
980 	page_frames_initialized = true;
981 #endif
982 	k_spin_unlock(&z_mm_lock, key);
983 
984 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
985 	/* If BSS section is not present in memory at boot,
986 	 * it would not have been cleared. This needs to be
987 	 * done now since paging mechanism has been initialized
988 	 * and the BSS pages can be brought into physical
989 	 * memory to be cleared.
990 	 */
991 	z_bss_zero();
992 #endif
993 }
994 
z_mem_manage_boot_finish(void)995 void z_mem_manage_boot_finish(void)
996 {
997 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
998 	/* At the end of boot process, unpin the boot sections
999 	 * as they don't need to be in memory all the time anymore.
1000 	 */
1001 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
1002 #endif
1003 }
1004 
1005 #ifdef CONFIG_DEMAND_PAGING
1006 
1007 #ifdef CONFIG_DEMAND_PAGING_STATS
1008 struct k_mem_paging_stats_t paging_stats;
1009 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
1010 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1011 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1012 #endif
1013 
do_backing_store_page_in(uintptr_t location)1014 static inline void do_backing_store_page_in(uintptr_t location)
1015 {
1016 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1017 	uint32_t time_diff;
1018 
1019 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1020 	timing_t time_start, time_end;
1021 
1022 	time_start = timing_counter_get();
1023 #else
1024 	uint32_t time_start;
1025 
1026 	time_start = k_cycle_get_32();
1027 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1028 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1029 
1030 	k_mem_paging_backing_store_page_in(location);
1031 
1032 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1033 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1034 	time_end = timing_counter_get();
1035 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1036 #else
1037 	time_diff = k_cycle_get_32() - time_start;
1038 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1039 
1040 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1041 			       time_diff);
1042 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1043 }
1044 
do_backing_store_page_out(uintptr_t location)1045 static inline void do_backing_store_page_out(uintptr_t location)
1046 {
1047 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1048 	uint32_t time_diff;
1049 
1050 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1051 	timing_t time_start, time_end;
1052 
1053 	time_start = timing_counter_get();
1054 #else
1055 	uint32_t time_start;
1056 
1057 	time_start = k_cycle_get_32();
1058 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1059 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1060 
1061 	k_mem_paging_backing_store_page_out(location);
1062 
1063 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1064 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1065 	time_end = timing_counter_get();
1066 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1067 #else
1068 	time_diff = k_cycle_get_32() - time_start;
1069 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1070 
1071 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1072 			       time_diff);
1073 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1074 }
1075 
1076 /* Current implementation relies on interrupt locking to any prevent page table
1077  * access, which falls over if other CPUs are active. Addressing this is not
1078  * as simple as using spinlocks as regular memory reads/writes constitute
1079  * "access" in this sense.
1080  *
1081  * Current needs for demand paging are on uniprocessor systems.
1082  */
1083 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
1084 
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1085 static void virt_region_foreach(void *addr, size_t size,
1086 				void (*func)(void *))
1087 {
1088 	z_mem_assert_virtual_region(addr, size);
1089 
1090 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1091 		func((uint8_t *)addr + offset);
1092 	}
1093 }
1094 
1095 /*
1096  * Perform some preparatory steps before paging out. The provided page frame
1097  * must be evicted to the backing store immediately after this is called
1098  * with a call to k_mem_paging_backing_store_page_out() if it contains
1099  * a data page.
1100  *
1101  * - Map page frame to scratch area if requested. This always is true if we're
1102  *   doing a page fault, but is only set on manual evictions if the page is
1103  *   dirty.
1104  * - If mapped:
1105  *    - obtain backing store location and populate location parameter
1106  *    - Update page tables with location
1107  * - Mark page frame as busy
1108  *
1109  * Returns -ENOMEM if the backing store is full
1110  */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1111 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
1112 				     bool page_fault, uintptr_t *location_ptr)
1113 {
1114 	uintptr_t phys;
1115 	int ret;
1116 	bool dirty = *dirty_ptr;
1117 
1118 	phys = z_page_frame_to_phys(pf);
1119 	__ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1120 		 phys);
1121 
1122 	/* If the backing store doesn't have a copy of the page, even if it
1123 	 * wasn't modified, treat as dirty. This can happen for a few
1124 	 * reasons:
1125 	 * 1) Page has never been swapped out before, and the backing store
1126 	 *    wasn't pre-populated with this data page.
1127 	 * 2) Page was swapped out before, but the page contents were not
1128 	 *    preserved after swapping back in.
1129 	 * 3) Page contents were preserved when swapped back in, but were later
1130 	 *    evicted from the backing store to make room for other evicted
1131 	 *    pages.
1132 	 */
1133 	if (z_page_frame_is_mapped(pf)) {
1134 		dirty = dirty || !z_page_frame_is_backed(pf);
1135 	}
1136 
1137 	if (dirty || page_fault) {
1138 		arch_mem_scratch(phys);
1139 	}
1140 
1141 	if (z_page_frame_is_mapped(pf)) {
1142 		ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1143 							      page_fault);
1144 		if (ret != 0) {
1145 			LOG_ERR("out of backing store memory");
1146 			return -ENOMEM;
1147 		}
1148 		arch_mem_page_out(pf->addr, *location_ptr);
1149 	} else {
1150 		/* Shouldn't happen unless this function is mis-used */
1151 		__ASSERT(!dirty, "un-mapped page determined to be dirty");
1152 	}
1153 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1154 	/* Mark as busy so that z_page_frame_is_evictable() returns false */
1155 	__ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1156 		 phys);
1157 	pf->flags |= Z_PAGE_FRAME_BUSY;
1158 #endif
1159 	/* Update dirty parameter, since we set to true if it wasn't backed
1160 	 * even if otherwise clean
1161 	 */
1162 	*dirty_ptr = dirty;
1163 
1164 	return 0;
1165 }
1166 
do_mem_evict(void * addr)1167 static int do_mem_evict(void *addr)
1168 {
1169 	bool dirty;
1170 	struct z_page_frame *pf;
1171 	uintptr_t location;
1172 	int key, ret;
1173 	uintptr_t flags, phys;
1174 
1175 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1176 	__ASSERT(!k_is_in_isr(),
1177 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1178 		 __func__);
1179 	k_sched_lock();
1180 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1181 	key = irq_lock();
1182 	flags = arch_page_info_get(addr, &phys, false);
1183 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1184 		 "address %p isn't mapped", addr);
1185 	if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1186 		/* Un-mapped or already evicted. Nothing to do */
1187 		ret = 0;
1188 		goto out;
1189 	}
1190 
1191 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1192 	pf = z_phys_to_page_frame(phys);
1193 	__ASSERT(pf->addr == addr, "page frame address mismatch");
1194 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1195 	if (ret != 0) {
1196 		goto out;
1197 	}
1198 
1199 	__ASSERT(ret == 0, "failed to prepare page frame");
1200 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1201 	irq_unlock(key);
1202 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1203 	if (dirty) {
1204 		do_backing_store_page_out(location);
1205 	}
1206 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1207 	key = irq_lock();
1208 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1209 	page_frame_free_locked(pf);
1210 out:
1211 	irq_unlock(key);
1212 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1213 	k_sched_unlock();
1214 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1215 	return ret;
1216 }
1217 
k_mem_page_out(void * addr,size_t size)1218 int k_mem_page_out(void *addr, size_t size)
1219 {
1220 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1221 		 addr);
1222 	z_mem_assert_virtual_region(addr, size);
1223 
1224 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1225 		void *pos = (uint8_t *)addr + offset;
1226 		int ret;
1227 
1228 		ret = do_mem_evict(pos);
1229 		if (ret != 0) {
1230 			return ret;
1231 		}
1232 	}
1233 
1234 	return 0;
1235 }
1236 
z_page_frame_evict(uintptr_t phys)1237 int z_page_frame_evict(uintptr_t phys)
1238 {
1239 	int key, ret;
1240 	struct z_page_frame *pf;
1241 	bool dirty;
1242 	uintptr_t flags;
1243 	uintptr_t location;
1244 
1245 	__ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1246 		 __func__, phys);
1247 
1248 	/* Implementation is similar to do_page_fault() except there is no
1249 	 * data page to page-in, see comments in that function.
1250 	 */
1251 
1252 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1253 	__ASSERT(!k_is_in_isr(),
1254 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1255 		 __func__);
1256 	k_sched_lock();
1257 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1258 	key = irq_lock();
1259 	pf = z_phys_to_page_frame(phys);
1260 	if (!z_page_frame_is_mapped(pf)) {
1261 		/* Nothing to do, free page */
1262 		ret = 0;
1263 		goto out;
1264 	}
1265 	flags = arch_page_info_get(pf->addr, NULL, false);
1266 	/* Shouldn't ever happen */
1267 	__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1268 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1269 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1270 	if (ret != 0) {
1271 		goto out;
1272 	}
1273 
1274 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1275 	irq_unlock(key);
1276 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1277 	if (dirty) {
1278 		do_backing_store_page_out(location);
1279 	}
1280 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1281 	key = irq_lock();
1282 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1283 	page_frame_free_locked(pf);
1284 out:
1285 	irq_unlock(key);
1286 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1287 	k_sched_unlock();
1288 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1289 	return ret;
1290 }
1291 
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1292 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1293 					   int key)
1294 {
1295 #ifdef CONFIG_DEMAND_PAGING_STATS
1296 	bool is_irq_unlocked = arch_irq_unlocked(key);
1297 
1298 	paging_stats.pagefaults.cnt++;
1299 
1300 	if (is_irq_unlocked) {
1301 		paging_stats.pagefaults.irq_unlocked++;
1302 	} else {
1303 		paging_stats.pagefaults.irq_locked++;
1304 	}
1305 
1306 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1307 	faulting_thread->paging_stats.pagefaults.cnt++;
1308 
1309 	if (is_irq_unlocked) {
1310 		faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1311 	} else {
1312 		faulting_thread->paging_stats.pagefaults.irq_locked++;
1313 	}
1314 #else
1315 	ARG_UNUSED(faulting_thread);
1316 #endif
1317 
1318 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1319 	if (k_is_in_isr()) {
1320 		paging_stats.pagefaults.in_isr++;
1321 
1322 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1323 		faulting_thread->paging_stats.pagefaults.in_isr++;
1324 #endif
1325 	}
1326 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1327 #endif /* CONFIG_DEMAND_PAGING_STATS */
1328 }
1329 
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1330 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1331 					     bool dirty)
1332 {
1333 #ifdef CONFIG_DEMAND_PAGING_STATS
1334 	if (dirty) {
1335 		paging_stats.eviction.dirty++;
1336 	} else {
1337 		paging_stats.eviction.clean++;
1338 	}
1339 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1340 	if (dirty) {
1341 		faulting_thread->paging_stats.eviction.dirty++;
1342 	} else {
1343 		faulting_thread->paging_stats.eviction.clean++;
1344 	}
1345 #else
1346 	ARG_UNUSED(faulting_thread);
1347 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1348 #endif /* CONFIG_DEMAND_PAGING_STATS */
1349 }
1350 
do_eviction_select(bool * dirty)1351 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1352 {
1353 	struct z_page_frame *pf;
1354 
1355 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1356 	uint32_t time_diff;
1357 
1358 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1359 	timing_t time_start, time_end;
1360 
1361 	time_start = timing_counter_get();
1362 #else
1363 	uint32_t time_start;
1364 
1365 	time_start = k_cycle_get_32();
1366 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1367 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1368 
1369 	pf = k_mem_paging_eviction_select(dirty);
1370 
1371 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1372 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1373 	time_end = timing_counter_get();
1374 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1375 #else
1376 	time_diff = k_cycle_get_32() - time_start;
1377 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1378 
1379 	z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1380 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1381 
1382 	return pf;
1383 }
1384 
do_page_fault(void * addr,bool pin)1385 static bool do_page_fault(void *addr, bool pin)
1386 {
1387 	struct z_page_frame *pf;
1388 	int key, ret;
1389 	uintptr_t page_in_location, page_out_location;
1390 	enum arch_page_location status;
1391 	bool result;
1392 	bool dirty = false;
1393 	struct k_thread *faulting_thread = _current_cpu->current;
1394 
1395 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
1396 		 addr);
1397 
1398 	LOG_DBG("page fault at %p", addr);
1399 
1400 	/*
1401 	 * TODO: Add performance accounting:
1402 	 * - k_mem_paging_eviction_select() metrics
1403 	 *   * periodic timer execution time histogram (if implemented)
1404 	 */
1405 
1406 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1407 	/* We lock the scheduler so that other threads are never scheduled
1408 	 * during the page-in/out operation.
1409 	 *
1410 	 * We do however re-enable interrupts during the page-in/page-out
1411 	 * operation iff interrupts were enabled when the exception was taken;
1412 	 * in this configuration page faults in an ISR are a bug; all their
1413 	 * code/data must be pinned.
1414 	 *
1415 	 * If interrupts were disabled when the exception was taken, the
1416 	 * arch code is responsible for keeping them that way when entering
1417 	 * this function.
1418 	 *
1419 	 * If this is not enabled, then interrupts are always locked for the
1420 	 * entire operation. This is far worse for system interrupt latency
1421 	 * but requires less pinned pages and ISRs may also take page faults.
1422 	 *
1423 	 * Support for allowing k_mem_paging_backing_store_page_out() and
1424 	 * k_mem_paging_backing_store_page_in() to also sleep and allow
1425 	 * other threads to run (such as in the case where the transfer is
1426 	 * async DMA) is not implemented. Even if limited to thread context,
1427 	 * arbitrary memory access triggering exceptions that put a thread to
1428 	 * sleep on a contended page fault operation will break scheduling
1429 	 * assumptions of cooperative threads or threads that implement
1430 	 * crticial sections with spinlocks or disabling IRQs.
1431 	 */
1432 	k_sched_lock();
1433 	__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1434 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1435 
1436 	key = irq_lock();
1437 	status = arch_page_location_get(addr, &page_in_location);
1438 	if (status == ARCH_PAGE_LOCATION_BAD) {
1439 		/* Return false to treat as a fatal error */
1440 		result = false;
1441 		goto out;
1442 	}
1443 	result = true;
1444 
1445 	if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1446 		if (pin) {
1447 			/* It's a physical memory address */
1448 			uintptr_t phys = page_in_location;
1449 
1450 			pf = z_phys_to_page_frame(phys);
1451 			pf->flags |= Z_PAGE_FRAME_PINNED;
1452 		}
1453 
1454 		/* This if-block is to pin the page if it is
1455 		 * already present in physical memory. There is
1456 		 * no need to go through the following code to
1457 		 * pull in the data pages. So skip to the end.
1458 		 */
1459 		goto out;
1460 	}
1461 	__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1462 		 "unexpected status value %d", status);
1463 
1464 	paging_stats_faults_inc(faulting_thread, key);
1465 
1466 	pf = free_page_frame_list_get();
1467 	if (pf == NULL) {
1468 		/* Need to evict a page frame */
1469 		pf = do_eviction_select(&dirty);
1470 		__ASSERT(pf != NULL, "failed to get a page frame");
1471 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
1472 			z_page_frame_to_phys(pf));
1473 
1474 		paging_stats_eviction_inc(faulting_thread, dirty);
1475 	}
1476 	ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1477 	__ASSERT(ret == 0, "failed to prepare page frame");
1478 
1479 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1480 	irq_unlock(key);
1481 	/* Interrupts are now unlocked if they were not locked when we entered
1482 	 * this function, and we may service ISRs. The scheduler is still
1483 	 * locked.
1484 	 */
1485 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1486 	if (dirty) {
1487 		do_backing_store_page_out(page_out_location);
1488 	}
1489 	do_backing_store_page_in(page_in_location);
1490 
1491 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1492 	key = irq_lock();
1493 	pf->flags &= ~Z_PAGE_FRAME_BUSY;
1494 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1495 	if (pin) {
1496 		pf->flags |= Z_PAGE_FRAME_PINNED;
1497 	}
1498 	pf->flags |= Z_PAGE_FRAME_MAPPED;
1499 	pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1500 				   & ~(CONFIG_MMU_PAGE_SIZE - 1));
1501 
1502 	arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1503 	k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1504 out:
1505 	irq_unlock(key);
1506 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1507 	k_sched_unlock();
1508 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1509 
1510 	return result;
1511 }
1512 
do_page_in(void * addr)1513 static void do_page_in(void *addr)
1514 {
1515 	bool ret;
1516 
1517 	ret = do_page_fault(addr, false);
1518 	__ASSERT(ret, "unmapped memory address %p", addr);
1519 	(void)ret;
1520 }
1521 
k_mem_page_in(void * addr,size_t size)1522 void k_mem_page_in(void *addr, size_t size)
1523 {
1524 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1525 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1526 		 __func__);
1527 	virt_region_foreach(addr, size, do_page_in);
1528 }
1529 
do_mem_pin(void * addr)1530 static void do_mem_pin(void *addr)
1531 {
1532 	bool ret;
1533 
1534 	ret = do_page_fault(addr, true);
1535 	__ASSERT(ret, "unmapped memory address %p", addr);
1536 	(void)ret;
1537 }
1538 
k_mem_pin(void * addr,size_t size)1539 void k_mem_pin(void *addr, size_t size)
1540 {
1541 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1542 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1543 		 __func__);
1544 	virt_region_foreach(addr, size, do_mem_pin);
1545 }
1546 
z_page_fault(void * addr)1547 bool z_page_fault(void *addr)
1548 {
1549 	return do_page_fault(addr, false);
1550 }
1551 
do_mem_unpin(void * addr)1552 static void do_mem_unpin(void *addr)
1553 {
1554 	struct z_page_frame *pf;
1555 	unsigned int key;
1556 	uintptr_t flags, phys;
1557 
1558 	key = irq_lock();
1559 	flags = arch_page_info_get(addr, &phys, false);
1560 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1561 		 "invalid data page at %p", addr);
1562 	if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1563 		pf = z_phys_to_page_frame(phys);
1564 		pf->flags &= ~Z_PAGE_FRAME_PINNED;
1565 	}
1566 	irq_unlock(key);
1567 }
1568 
k_mem_unpin(void * addr,size_t size)1569 void k_mem_unpin(void *addr, size_t size)
1570 {
1571 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1572 		 addr);
1573 	virt_region_foreach(addr, size, do_mem_unpin);
1574 }
1575 
1576 #endif /* CONFIG_DEMAND_PAGING */
1577