1 /*
2  * Copyright (c) 2020 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Routines for managing virtual address spaces
7  */
8 
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/spinlock.h>
12 #include <mmu.h>
13 #include <zephyr/init.h>
14 #include <kernel_internal.h>
15 #include <zephyr/syscall_handler.h>
16 #include <zephyr/toolchain.h>
17 #include <zephyr/linker/linker-defs.h>
18 #include <zephyr/sys/bitarray.h>
19 #include <zephyr/timing/timing.h>
20 #include <zephyr/logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22 
23 /*
24  * General terminology:
25  * - A page frame is a page-sized physical memory region in RAM. It is a
26  *   container where a data page may be placed. It is always referred to by
27  *   physical address. We have a convention of using uintptr_t for physical
28  *   addresses. We instantiate a struct z_page_frame to store metadata for
29  *   every page frame.
30  *
31  * - A data page is a page-sized region of data. It may exist in a page frame,
32  *   or be paged out to some backing store. Its location can always be looked
33  *   up in the CPU's page tables (or equivalent) by virtual address.
34  *   The data type will always be void * or in some cases uint8_t * when we
35  *   want to do pointer arithmetic.
36  */
37 
38 /* Spinlock to protect any globals in this file and serialize page table
39  * updates in arch code
40  */
41 struct k_spinlock z_mm_lock;
42 
43 /*
44  * General page frame management
45  */
46 
47 /* Database of all RAM page frames */
48 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
49 
50 #if __ASSERT_ON
51 /* Indicator that z_page_frames has been initialized, many of these APIs do
52  * not work before POST_KERNEL
53  */
54 static bool page_frames_initialized;
55 #endif
56 
57 /* Add colors to page table dumps to indicate mapping type */
58 #define COLOR_PAGE_FRAMES	1
59 
60 #if COLOR_PAGE_FRAMES
61 #define ANSI_DEFAULT "\x1B" "[0m"
62 #define ANSI_RED     "\x1B" "[1;31m"
63 #define ANSI_GREEN   "\x1B" "[1;32m"
64 #define ANSI_YELLOW  "\x1B" "[1;33m"
65 #define ANSI_BLUE    "\x1B" "[1;34m"
66 #define ANSI_MAGENTA "\x1B" "[1;35m"
67 #define ANSI_CYAN    "\x1B" "[1;36m"
68 #define ANSI_GREY    "\x1B" "[1;90m"
69 
70 #define COLOR(x)	printk(_CONCAT(ANSI_, x))
71 #else
72 #define COLOR(x)	do { } while (false)
73 #endif
74 
75 /* LCOV_EXCL_START */
page_frame_dump(struct z_page_frame * pf)76 static void page_frame_dump(struct z_page_frame *pf)
77 {
78 	if (z_page_frame_is_reserved(pf)) {
79 		COLOR(CYAN);
80 		printk("R");
81 	} else if (z_page_frame_is_busy(pf)) {
82 		COLOR(MAGENTA);
83 		printk("B");
84 	} else if (z_page_frame_is_pinned(pf)) {
85 		COLOR(YELLOW);
86 		printk("P");
87 	} else if (z_page_frame_is_available(pf)) {
88 		COLOR(GREY);
89 		printk(".");
90 	} else if (z_page_frame_is_mapped(pf)) {
91 		COLOR(DEFAULT);
92 		printk("M");
93 	} else {
94 		COLOR(RED);
95 		printk("?");
96 	}
97 }
98 
z_page_frames_dump(void)99 void z_page_frames_dump(void)
100 {
101 	int column = 0;
102 
103 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
104 	printk("Physical memory from 0x%lx to 0x%lx\n",
105 	       Z_PHYS_RAM_START, Z_PHYS_RAM_END);
106 
107 	for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
108 		struct z_page_frame *pf = &z_page_frames[i];
109 
110 		page_frame_dump(pf);
111 
112 		column++;
113 		if (column == 64) {
114 			column = 0;
115 			printk("\n");
116 		}
117 	}
118 
119 	COLOR(DEFAULT);
120 	if (column != 0) {
121 		printk("\n");
122 	}
123 }
124 /* LCOV_EXCL_STOP */
125 
126 #define VIRT_FOREACH(_base, _size, _pos) \
127 	for (_pos = _base; \
128 	     _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
129 
130 #define PHYS_FOREACH(_base, _size, _pos) \
131 	for (_pos = _base; \
132 	     _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
133 
134 
135 /*
136  * Virtual address space management
137  *
138  * Call all of these functions with z_mm_lock held.
139  *
140  * Overall virtual memory map: When the kernel starts, it resides in
141  * virtual memory in the region Z_KERNEL_VIRT_START to
142  * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
143  * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
144  *
145  * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
146  * but have a mapping for all RAM in place. This is for special architectural
147  * purposes and does not otherwise affect page frame accounting or flags;
148  * the only guarantee is that such RAM mapping outside of the Zephyr image
149  * won't be disturbed by subsequent memory mapping calls.
150  *
151  * +--------------+ <- Z_VIRT_RAM_START
152  * | Undefined VM | <- May contain ancillary regions like x86_64's locore
153  * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
154  * | Mapping for  |
155  * | main kernel  |
156  * | image        |
157  * |		  |
158  * |		  |
159  * +--------------+ <- Z_FREE_VM_START
160  * |              |
161  * | Unused,      |
162  * | Available VM |
163  * |              |
164  * |..............| <- mapping_pos (grows downward as more mappings are made)
165  * | Mapping      |
166  * +--------------+
167  * | Mapping      |
168  * +--------------+
169  * | ...          |
170  * +--------------+
171  * | Mapping      |
172  * +--------------+ <- mappings start here
173  * | Reserved     | <- special purpose virtual page(s) of size Z_VM_RESERVED
174  * +--------------+ <- Z_VIRT_RAM_END
175  */
176 
177 /* Bitmap of virtual addresses where one bit corresponds to one page.
178  * This is being used for virt_region_alloc() to figure out which
179  * region of virtual addresses can be used for memory mapping.
180  *
181  * Note that bit #0 is the highest address so that allocation is
182  * done in reverse from highest address.
183  */
184 SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap,
185 			   CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
186 
187 static bool virt_region_inited;
188 
189 #define Z_VIRT_REGION_START_ADDR	Z_FREE_VM_START
190 #define Z_VIRT_REGION_END_ADDR		(Z_VIRT_RAM_END - Z_VM_RESERVED)
191 
virt_from_bitmap_offset(size_t offset,size_t size)192 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
193 {
194 	return POINTER_TO_UINT(Z_VIRT_RAM_END)
195 	       - (offset * CONFIG_MMU_PAGE_SIZE) - size;
196 }
197 
virt_to_bitmap_offset(void * vaddr,size_t size)198 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
199 {
200 	return (POINTER_TO_UINT(Z_VIRT_RAM_END)
201 		- POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
202 }
203 
virt_region_init(void)204 static void virt_region_init(void)
205 {
206 	size_t offset, num_bits;
207 
208 	/* There are regions where we should never map via
209 	 * k_mem_map() and z_phys_map(). Mark them as
210 	 * already allocated so they will never be used.
211 	 */
212 
213 	if (Z_VM_RESERVED > 0) {
214 		/* Mark reserved region at end of virtual address space */
215 		num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
216 		(void)sys_bitarray_set_region(&virt_region_bitmap,
217 					      num_bits, 0);
218 	}
219 
220 	/* Mark all bits up to Z_FREE_VM_START as allocated */
221 	num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
222 		   - POINTER_TO_UINT(Z_VIRT_RAM_START);
223 	offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
224 	num_bits /= CONFIG_MMU_PAGE_SIZE;
225 	(void)sys_bitarray_set_region(&virt_region_bitmap,
226 				      num_bits, offset);
227 
228 	virt_region_inited = true;
229 }
230 
virt_region_free(void * vaddr,size_t size)231 static void virt_region_free(void *vaddr, size_t size)
232 {
233 	size_t offset, num_bits;
234 	uint8_t *vaddr_u8 = (uint8_t *)vaddr;
235 
236 	if (unlikely(!virt_region_inited)) {
237 		virt_region_init();
238 	}
239 
240 #ifndef CONFIG_KERNEL_DIRECT_MAP
241 	/* Without the need to support K_DIRECT_MAP, the region must be
242 	 * able to be represented in the bitmap. So this case is
243 	 * simple.
244 	 */
245 
246 	__ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
247 		 && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR),
248 		 "invalid virtual address region %p (%zu)", vaddr_u8, size);
249 	if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
250 	      && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
251 		return;
252 	}
253 
254 	offset = virt_to_bitmap_offset(vaddr, size);
255 	num_bits = size / CONFIG_MMU_PAGE_SIZE;
256 	(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
257 #else /* !CONFIG_KERNEL_DIRECT_MAP */
258 	/* With K_DIRECT_MAP, the region can be outside of the virtual
259 	 * memory space, wholly within it, or overlap partially.
260 	 * So additional processing is needed to make sure we only
261 	 * mark the pages within the bitmap.
262 	 */
263 	if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) &&
264 	     (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) ||
265 	    (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) &&
266 	     ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) {
267 		uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR);
268 		uint8_t *adjusted_end = MIN(vaddr_u8 + size,
269 					    Z_VIRT_REGION_END_ADDR);
270 		size_t adjusted_sz = adjusted_end - adjusted_start;
271 
272 		offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
273 		num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
274 		(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
275 	}
276 #endif /* !CONFIG_KERNEL_DIRECT_MAP */
277 }
278 
virt_region_alloc(size_t size,size_t align)279 static void *virt_region_alloc(size_t size, size_t align)
280 {
281 	uintptr_t dest_addr;
282 	size_t alloc_size;
283 	size_t offset;
284 	size_t num_bits;
285 	int ret;
286 
287 	if (unlikely(!virt_region_inited)) {
288 		virt_region_init();
289 	}
290 
291 	/* Possibly request more pages to ensure we can get an aligned virtual address */
292 	num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE;
293 	alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE;
294 	ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
295 	if (ret != 0) {
296 		LOG_ERR("insufficient virtual address space (requested %zu)",
297 			size);
298 		return NULL;
299 	}
300 
301 	/* Remember that bit #0 in bitmap corresponds to the highest
302 	 * virtual address. So here we need to go downwards (backwards?)
303 	 * to get the starting address of the allocated region.
304 	 */
305 	dest_addr = virt_from_bitmap_offset(offset, alloc_size);
306 
307 	if (alloc_size > size) {
308 		uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align);
309 
310 		/* Here is the memory organization when trying to get an aligned
311 		 * virtual address:
312 		 *
313 		 * +--------------+ <- Z_VIRT_RAM_START
314 		 * | Undefined VM |
315 		 * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
316 		 * | Mapping for  |
317 		 * | main kernel  |
318 		 * | image        |
319 		 * |		  |
320 		 * |		  |
321 		 * +--------------+ <- Z_FREE_VM_START
322 		 * | ...          |
323 		 * +==============+ <- dest_addr
324 		 * | Unused       |
325 		 * |..............| <- aligned_dest_addr
326 		 * |              |
327 		 * | Aligned      |
328 		 * | Mapping      |
329 		 * |              |
330 		 * |..............| <- aligned_dest_addr + size
331 		 * | Unused       |
332 		 * +==============+ <- offset from Z_VIRT_RAM_END == dest_addr + alloc_size
333 		 * | ...          |
334 		 * +--------------+
335 		 * | Mapping      |
336 		 * +--------------+
337 		 * | Reserved     |
338 		 * +--------------+ <- Z_VIRT_RAM_END
339 		 */
340 
341 		/* Free the two unused regions */
342 		virt_region_free(UINT_TO_POINTER(dest_addr),
343 				 aligned_dest_addr - dest_addr);
344 		if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) {
345 			virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size),
346 					 (dest_addr + alloc_size) - (aligned_dest_addr + size));
347 		}
348 
349 		dest_addr = aligned_dest_addr;
350 	}
351 
352 	/* Need to make sure this does not step into kernel memory */
353 	if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
354 		(void)sys_bitarray_free(&virt_region_bitmap, size, offset);
355 		return NULL;
356 	}
357 
358 	return UINT_TO_POINTER(dest_addr);
359 }
360 
361 /*
362  * Free page frames management
363  *
364  * Call all of these functions with z_mm_lock held.
365  */
366 
367 /* Linked list of unused and available page frames.
368  *
369  * TODO: This is very simple and treats all free page frames as being equal.
370  * However, there are use-cases to consolidate free pages such that entire
371  * SRAM banks can be switched off to save power, and so obtaining free pages
372  * may require a more complex ontology which prefers page frames in RAM banks
373  * which are still active.
374  *
375  * This implies in the future there may be multiple slists managing physical
376  * pages. Each page frame will still just have one snode link.
377  */
378 static sys_slist_t free_page_frame_list;
379 
380 /* Number of unused and available free page frames */
381 size_t z_free_page_count;
382 
383 #define PF_ASSERT(pf, expr, fmt, ...) \
384 	__ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
385 		 ##__VA_ARGS__)
386 
387 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)388 static struct z_page_frame *free_page_frame_list_get(void)
389 {
390 	sys_snode_t *node;
391 	struct z_page_frame *pf = NULL;
392 
393 	node = sys_slist_get(&free_page_frame_list);
394 	if (node != NULL) {
395 		z_free_page_count--;
396 		pf = CONTAINER_OF(node, struct z_page_frame, node);
397 		PF_ASSERT(pf, z_page_frame_is_available(pf),
398 			 "unavailable but somehow on free list");
399 	}
400 
401 	return pf;
402 }
403 
404 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)405 static void free_page_frame_list_put(struct z_page_frame *pf)
406 {
407 	PF_ASSERT(pf, z_page_frame_is_available(pf),
408 		 "unavailable page put on free list");
409 	/* The structure is packed, which ensures that this is true */
410 	void *node = pf;
411 
412 	sys_slist_append(&free_page_frame_list, node);
413 	z_free_page_count++;
414 }
415 
free_page_frame_list_init(void)416 static void free_page_frame_list_init(void)
417 {
418 	sys_slist_init(&free_page_frame_list);
419 }
420 
page_frame_free_locked(struct z_page_frame * pf)421 static void page_frame_free_locked(struct z_page_frame *pf)
422 {
423 	pf->flags = 0;
424 	free_page_frame_list_put(pf);
425 }
426 
427 /*
428  * Memory Mapping
429  */
430 
431 /* Called after the frame is mapped in the arch layer, to update our
432  * local ontology (and do some assertions while we're at it)
433  */
frame_mapped_set(struct z_page_frame * pf,void * addr)434 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
435 {
436 	PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
437 		  "attempted to map a reserved page frame");
438 
439 	/* We do allow multiple mappings for pinned page frames
440 	 * since we will never need to reverse map them.
441 	 * This is uncommon, use-cases are for things like the
442 	 * Zephyr equivalent of VSDOs
443 	 */
444 	PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
445 		 "non-pinned and already mapped to %p", pf->addr);
446 
447 	pf->flags |= Z_PAGE_FRAME_MAPPED;
448 	pf->addr = addr;
449 }
450 
451 /* LCOV_EXCL_START */
452 /* Go through page frames to find the physical address mapped
453  * by a virtual address.
454  *
455  * @param[in]  virt Virtual Address
456  * @param[out] phys Physical address mapped to the input virtual address
457  *                  if such mapping exists.
458  *
459  * @retval 0 if mapping is found and valid
460  * @retval -EFAULT if virtual address is not mapped
461  */
virt_to_page_frame(void * virt,uintptr_t * phys)462 static int virt_to_page_frame(void *virt, uintptr_t *phys)
463 {
464 	uintptr_t paddr;
465 	struct z_page_frame *pf;
466 	int ret = -EFAULT;
467 
468 	Z_PAGE_FRAME_FOREACH(paddr, pf) {
469 		if (z_page_frame_is_mapped(pf)) {
470 			if (virt == pf->addr) {
471 				ret = 0;
472 				*phys = z_page_frame_to_phys(pf);
473 				break;
474 			}
475 		}
476 	}
477 
478 	return ret;
479 }
480 /* LCOV_EXCL_STOP */
481 
482 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
483 
484 #ifdef CONFIG_DEMAND_PAGING
485 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
486 				     bool page_in, uintptr_t *location_ptr);
487 
488 static inline void do_backing_store_page_in(uintptr_t location);
489 static inline void do_backing_store_page_out(uintptr_t location);
490 #endif /* CONFIG_DEMAND_PAGING */
491 
492 /* Allocate a free page frame, and map it to a specified virtual address
493  *
494  * TODO: Add optional support for copy-on-write mappings to a zero page instead
495  * of allocating, in which case page frames will be allocated lazily as
496  * the mappings to the zero page get touched. This will avoid expensive
497  * page-ins as memory is mapped and physical RAM or backing store storage will
498  * not be used if the mapped memory is unused. The cost is an empty physical
499  * page of zeroes.
500  */
map_anon_page(void * addr,uint32_t flags)501 static int map_anon_page(void *addr, uint32_t flags)
502 {
503 	struct z_page_frame *pf;
504 	uintptr_t phys;
505 	bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
506 	bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
507 
508 	pf = free_page_frame_list_get();
509 	if (pf == NULL) {
510 #ifdef CONFIG_DEMAND_PAGING
511 		uintptr_t location;
512 		bool dirty;
513 		int ret;
514 
515 		pf = k_mem_paging_eviction_select(&dirty);
516 		__ASSERT(pf != NULL, "failed to get a page frame");
517 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
518 			z_page_frame_to_phys(pf));
519 		ret = page_frame_prepare_locked(pf, &dirty, false, &location);
520 		if (ret != 0) {
521 			return -ENOMEM;
522 		}
523 		if (dirty) {
524 			do_backing_store_page_out(location);
525 		}
526 		pf->flags = 0;
527 #else
528 		return -ENOMEM;
529 #endif /* CONFIG_DEMAND_PAGING */
530 	}
531 
532 	phys = z_page_frame_to_phys(pf);
533 	arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
534 
535 	if (lock) {
536 		pf->flags |= Z_PAGE_FRAME_PINNED;
537 	}
538 	frame_mapped_set(pf, addr);
539 
540 	LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
541 
542 	if (!uninit) {
543 		/* If we later implement mappings to a copy-on-write
544 		 * zero page, won't need this step
545 		 */
546 		memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
547 	}
548 
549 	return 0;
550 }
551 
k_mem_map(size_t size,uint32_t flags)552 void *k_mem_map(size_t size, uint32_t flags)
553 {
554 	uint8_t *dst;
555 	size_t total_size;
556 	int ret;
557 	k_spinlock_key_t key;
558 	uint8_t *pos;
559 
560 	__ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
561 		   ((flags & K_MEM_MAP_UNINIT) != 0U)),
562 		 "user access to anonymous uninitialized pages is forbidden");
563 	__ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
564 		 "unaligned size %zu passed to %s", size, __func__);
565 	__ASSERT(size != 0, "zero sized memory mapping");
566 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
567 	__ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
568 		 "%s does not support explicit cache settings", __func__);
569 
570 	key = k_spin_lock(&z_mm_lock);
571 
572 	/* Need extra for the guard pages (before and after) which we
573 	 * won't map.
574 	 */
575 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
576 
577 	dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE);
578 	if (dst == NULL) {
579 		/* Address space has no free region */
580 		goto out;
581 	}
582 
583 	/* Unmap both guard pages to make sure accessing them
584 	 * will generate fault.
585 	 */
586 	arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
587 	arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
588 		       CONFIG_MMU_PAGE_SIZE);
589 
590 	/* Skip over the "before" guard page in returned address. */
591 	dst += CONFIG_MMU_PAGE_SIZE;
592 
593 	VIRT_FOREACH(dst, size, pos) {
594 		ret = map_anon_page(pos, flags);
595 
596 		if (ret != 0) {
597 			/* TODO: call k_mem_unmap(dst, pos - dst)  when
598 			 * implemented in #28990 and release any guard virtual
599 			 * page as well.
600 			 */
601 			dst = NULL;
602 			goto out;
603 		}
604 	}
605 out:
606 	k_spin_unlock(&z_mm_lock, key);
607 	return dst;
608 }
609 
k_mem_unmap(void * addr,size_t size)610 void k_mem_unmap(void *addr, size_t size)
611 {
612 	uintptr_t phys;
613 	uint8_t *pos;
614 	struct z_page_frame *pf;
615 	k_spinlock_key_t key;
616 	size_t total_size;
617 	int ret;
618 
619 	/* Need space for the "before" guard page */
620 	__ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
621 
622 	/* Make sure address range is still valid after accounting
623 	 * for two guard pages.
624 	 */
625 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
626 	z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
627 
628 	key = k_spin_lock(&z_mm_lock);
629 
630 	/* Check if both guard pages are unmapped.
631 	 * Bail if not, as this is probably a region not mapped
632 	 * using k_mem_map().
633 	 */
634 	pos = addr;
635 	ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
636 	if (ret == 0) {
637 		__ASSERT(ret == 0,
638 			 "%s: cannot find preceding guard page for (%p, %zu)",
639 			 __func__, addr, size);
640 		goto out;
641 	}
642 
643 	ret = arch_page_phys_get(pos + size, NULL);
644 	if (ret == 0) {
645 		__ASSERT(ret == 0,
646 			 "%s: cannot find succeeding guard page for (%p, %zu)",
647 			 __func__, addr, size);
648 		goto out;
649 	}
650 
651 	VIRT_FOREACH(addr, size, pos) {
652 		ret = arch_page_phys_get(pos, &phys);
653 
654 		__ASSERT(ret == 0,
655 			 "%s: cannot unmap an unmapped address %p",
656 			 __func__, pos);
657 		if (ret != 0) {
658 			/* Found an address not mapped. Do not continue. */
659 			goto out;
660 		}
661 
662 		__ASSERT(z_is_page_frame(phys),
663 			 "%s: 0x%lx is not a page frame", __func__, phys);
664 		if (!z_is_page_frame(phys)) {
665 			/* Physical address has no corresponding page frame
666 			 * description in the page frame array.
667 			 * This should not happen. Do not continue.
668 			 */
669 			goto out;
670 		}
671 
672 		/* Grab the corresponding page frame from physical address */
673 		pf = z_phys_to_page_frame(phys);
674 
675 		__ASSERT(z_page_frame_is_mapped(pf),
676 			 "%s: 0x%lx is not a mapped page frame", __func__, phys);
677 		if (!z_page_frame_is_mapped(pf)) {
678 			/* Page frame is not marked mapped.
679 			 * This should not happen. Do not continue.
680 			 */
681 			goto out;
682 		}
683 
684 		arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
685 
686 		/* Put the page frame back into free list */
687 		page_frame_free_locked(pf);
688 	}
689 
690 	/* There are guard pages just before and after the mapped
691 	 * region. So we also need to free them from the bitmap.
692 	 */
693 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
694 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
695 	virt_region_free(pos, total_size);
696 
697 out:
698 	k_spin_unlock(&z_mm_lock, key);
699 }
700 
k_mem_free_get(void)701 size_t k_mem_free_get(void)
702 {
703 	size_t ret;
704 	k_spinlock_key_t key;
705 
706 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
707 
708 	key = k_spin_lock(&z_mm_lock);
709 #ifdef CONFIG_DEMAND_PAGING
710 	if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
711 		ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
712 	} else {
713 		ret = 0;
714 	}
715 #else
716 	ret = z_free_page_count;
717 #endif
718 	k_spin_unlock(&z_mm_lock, key);
719 
720 	return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
721 }
722 
723 /* Get the default virtual region alignment, here the default MMU page size
724  *
725  * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE
726  * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE
727  *
728  * @retval alignment to apply on the virtual address of this region
729  */
virt_region_align(uintptr_t phys,size_t size)730 static size_t virt_region_align(uintptr_t phys, size_t size)
731 {
732 	ARG_UNUSED(phys);
733 	ARG_UNUSED(size);
734 
735 	return CONFIG_MMU_PAGE_SIZE;
736 }
737 
738 __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t);
739 
740 /* This may be called from arch early boot code before z_cstart() is invoked.
741  * Data will be copied and BSS zeroed, but this must not rely on any
742  * initialization functions being called prior to work correctly.
743  */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)744 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
745 {
746 	uintptr_t aligned_phys, addr_offset;
747 	size_t aligned_size, align_boundary;
748 	k_spinlock_key_t key;
749 	uint8_t *dest_addr;
750 	size_t num_bits;
751 	size_t offset;
752 
753 #ifndef CONFIG_KERNEL_DIRECT_MAP
754 	__ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled");
755 #endif
756 	addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
757 					 phys, size,
758 					 CONFIG_MMU_PAGE_SIZE);
759 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
760 	__ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
761 		 "wraparound for physical address 0x%lx (size %zu)",
762 		 aligned_phys, aligned_size);
763 
764 	align_boundary = arch_virt_region_align(aligned_phys, aligned_size);
765 
766 	key = k_spin_lock(&z_mm_lock);
767 
768 	if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) &&
769 	    (flags & K_MEM_DIRECT_MAP)) {
770 		dest_addr = (uint8_t *)aligned_phys;
771 
772 		/* Mark the region of virtual memory bitmap as used
773 		 * if the region overlaps the virtual memory space.
774 		 *
775 		 * Basically if either end of region is within
776 		 * virtual memory space, we need to mark the bits.
777 		 */
778 		if (((dest_addr >= Z_VIRT_RAM_START) &&
779 		     (dest_addr < Z_VIRT_RAM_END)) ||
780 		    (((dest_addr + aligned_size) >= Z_VIRT_RAM_START) &&
781 		     ((dest_addr + aligned_size) < Z_VIRT_RAM_END))) {
782 			uint8_t *adjusted_start = MAX(dest_addr, Z_VIRT_RAM_START);
783 			uint8_t *adjusted_end = MIN(dest_addr + aligned_size,
784 						    Z_VIRT_RAM_END);
785 			size_t adjusted_sz = adjusted_end - adjusted_start;
786 
787 			num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE;
788 			offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz);
789 			if (sys_bitarray_test_and_set_region(
790 			    &virt_region_bitmap, num_bits, offset, true))
791 				goto fail;
792 		}
793 	} else {
794 		/* Obtain an appropriately sized chunk of virtual memory */
795 		dest_addr = virt_region_alloc(aligned_size, align_boundary);
796 		if (!dest_addr) {
797 			goto fail;
798 		}
799 	}
800 
801 	/* If this fails there's something amiss with virt_region_get */
802 	__ASSERT((uintptr_t)dest_addr <
803 		 ((uintptr_t)dest_addr + (size - 1)),
804 		 "wraparound for virtual address %p (size %zu)",
805 		 dest_addr, size);
806 
807 	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
808 		aligned_phys, aligned_size, flags, addr_offset);
809 
810 	arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
811 	k_spin_unlock(&z_mm_lock, key);
812 
813 	*virt_ptr = dest_addr + addr_offset;
814 	return;
815 fail:
816 	/* May re-visit this in the future, but for now running out of
817 	 * virtual address space or failing the arch_mem_map() call is
818 	 * an unrecoverable situation.
819 	 *
820 	 * Other problems not related to resource exhaustion we leave as
821 	 * assertions since they are clearly programming mistakes.
822 	 */
823 	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
824 		phys, size, flags);
825 	k_panic();
826 }
827 
z_phys_unmap(uint8_t * virt,size_t size)828 void z_phys_unmap(uint8_t *virt, size_t size)
829 {
830 	uintptr_t aligned_virt, addr_offset;
831 	size_t aligned_size;
832 	k_spinlock_key_t key;
833 
834 	addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
835 					 POINTER_TO_UINT(virt), size,
836 					 CONFIG_MMU_PAGE_SIZE);
837 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
838 	__ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
839 		 "wraparound for virtual address 0x%lx (size %zu)",
840 		 aligned_virt, aligned_size);
841 
842 	key = k_spin_lock(&z_mm_lock);
843 
844 	LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu",
845 		aligned_virt, aligned_size, addr_offset);
846 
847 	arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
848 	virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size);
849 	k_spin_unlock(&z_mm_lock, key);
850 }
851 
852 /*
853  * Miscellaneous
854  */
855 
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)856 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
857 			  uintptr_t addr, size_t size, size_t align)
858 {
859 	size_t addr_offset;
860 
861 	/* The actual mapped region must be page-aligned. Round down the
862 	 * physical address and pad the region size appropriately
863 	 */
864 	*aligned_addr = ROUND_DOWN(addr, align);
865 	addr_offset = addr - *aligned_addr;
866 	*aligned_size = ROUND_UP(size + addr_offset, align);
867 
868 	return addr_offset;
869 }
870 
871 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)872 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
873 				       bool pin)
874 {
875 	struct z_page_frame *pf;
876 	uint8_t *addr;
877 
878 	uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
879 					    CONFIG_MMU_PAGE_SIZE);
880 	uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
881 					CONFIG_MMU_PAGE_SIZE);
882 	size_t pinned_size = pinned_end - pinned_start;
883 
884 	VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
885 	{
886 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
887 		frame_mapped_set(pf, addr);
888 
889 		if (pin) {
890 			pf->flags |= Z_PAGE_FRAME_PINNED;
891 		} else {
892 			pf->flags &= ~Z_PAGE_FRAME_PINNED;
893 		}
894 	}
895 }
896 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
897 
z_mem_manage_init(void)898 void z_mem_manage_init(void)
899 {
900 	uintptr_t phys;
901 	uint8_t *addr;
902 	struct z_page_frame *pf;
903 	k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
904 
905 	free_page_frame_list_init();
906 
907 	ARG_UNUSED(addr);
908 
909 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
910 	/* If some page frames are unavailable for use as memory, arch
911 	 * code will mark Z_PAGE_FRAME_RESERVED in their flags
912 	 */
913 	arch_reserved_pages_update();
914 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
915 
916 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
917 	/* All pages composing the Zephyr image are mapped at boot in a
918 	 * predictable way. This can change at runtime.
919 	 */
920 	VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
921 	{
922 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
923 		frame_mapped_set(pf, addr);
924 
925 		/* TODO: for now we pin the whole Zephyr image. Demand paging
926 		 * currently tested with anonymously-mapped pages which are not
927 		 * pinned.
928 		 *
929 		 * We will need to setup linker regions for a subset of kernel
930 		 * code/data pages which are pinned in memory and
931 		 * may not be evicted. This will contain critical CPU data
932 		 * structures, and any code used to perform page fault
933 		 * handling, page-ins, etc.
934 		 */
935 		pf->flags |= Z_PAGE_FRAME_PINNED;
936 	}
937 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
938 
939 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
940 	/* Pin the boot section to prevent it from being swapped out during
941 	 * boot process. Will be un-pinned once boot process completes.
942 	 */
943 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
944 #endif
945 
946 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
947 	/* Pin the page frames correspondng to the pinned symbols */
948 	mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
949 #endif
950 
951 	/* Any remaining pages that aren't mapped, reserved, or pinned get
952 	 * added to the free pages list
953 	 */
954 	Z_PAGE_FRAME_FOREACH(phys, pf) {
955 		if (z_page_frame_is_available(pf)) {
956 			free_page_frame_list_put(pf);
957 		}
958 	}
959 	LOG_DBG("free page frames: %zu", z_free_page_count);
960 
961 #ifdef CONFIG_DEMAND_PAGING
962 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
963 	z_paging_histogram_init();
964 #endif
965 	k_mem_paging_backing_store_init();
966 	k_mem_paging_eviction_init();
967 #endif
968 #if __ASSERT_ON
969 	page_frames_initialized = true;
970 #endif
971 	k_spin_unlock(&z_mm_lock, key);
972 
973 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
974 	/* If BSS section is not present in memory at boot,
975 	 * it would not have been cleared. This needs to be
976 	 * done now since paging mechanism has been initialized
977 	 * and the BSS pages can be brought into physical
978 	 * memory to be cleared.
979 	 */
980 	z_bss_zero();
981 #endif
982 }
983 
z_mem_manage_boot_finish(void)984 void z_mem_manage_boot_finish(void)
985 {
986 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
987 	/* At the end of boot process, unpin the boot sections
988 	 * as they don't need to be in memory all the time anymore.
989 	 */
990 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
991 #endif
992 }
993 
994 #ifdef CONFIG_DEMAND_PAGING
995 
996 #ifdef CONFIG_DEMAND_PAGING_STATS
997 struct k_mem_paging_stats_t paging_stats;
998 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
999 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
1000 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
1001 #endif
1002 
do_backing_store_page_in(uintptr_t location)1003 static inline void do_backing_store_page_in(uintptr_t location)
1004 {
1005 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1006 	uint32_t time_diff;
1007 
1008 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1009 	timing_t time_start, time_end;
1010 
1011 	time_start = timing_counter_get();
1012 #else
1013 	uint32_t time_start;
1014 
1015 	time_start = k_cycle_get_32();
1016 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1017 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1018 
1019 	k_mem_paging_backing_store_page_in(location);
1020 
1021 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1022 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1023 	time_end = timing_counter_get();
1024 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1025 #else
1026 	time_diff = k_cycle_get_32() - time_start;
1027 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1028 
1029 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
1030 			       time_diff);
1031 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1032 }
1033 
do_backing_store_page_out(uintptr_t location)1034 static inline void do_backing_store_page_out(uintptr_t location)
1035 {
1036 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1037 	uint32_t time_diff;
1038 
1039 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1040 	timing_t time_start, time_end;
1041 
1042 	time_start = timing_counter_get();
1043 #else
1044 	uint32_t time_start;
1045 
1046 	time_start = k_cycle_get_32();
1047 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1048 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1049 
1050 	k_mem_paging_backing_store_page_out(location);
1051 
1052 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1053 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1054 	time_end = timing_counter_get();
1055 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1056 #else
1057 	time_diff = k_cycle_get_32() - time_start;
1058 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1059 
1060 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
1061 			       time_diff);
1062 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1063 }
1064 
1065 /* Current implementation relies on interrupt locking to any prevent page table
1066  * access, which falls over if other CPUs are active. Addressing this is not
1067  * as simple as using spinlocks as regular memory reads/writes constitute
1068  * "access" in this sense.
1069  *
1070  * Current needs for demand paging are on uniprocessor systems.
1071  */
1072 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
1073 
virt_region_foreach(void * addr,size_t size,void (* func)(void *))1074 static void virt_region_foreach(void *addr, size_t size,
1075 				void (*func)(void *))
1076 {
1077 	z_mem_assert_virtual_region(addr, size);
1078 
1079 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1080 		func((uint8_t *)addr + offset);
1081 	}
1082 }
1083 
1084 /*
1085  * Perform some preparatory steps before paging out. The provided page frame
1086  * must be evicted to the backing store immediately after this is called
1087  * with a call to k_mem_paging_backing_store_page_out() if it contains
1088  * a data page.
1089  *
1090  * - Map page frame to scratch area if requested. This always is true if we're
1091  *   doing a page fault, but is only set on manual evictions if the page is
1092  *   dirty.
1093  * - If mapped:
1094  *    - obtain backing store location and populate location parameter
1095  *    - Update page tables with location
1096  * - Mark page frame as busy
1097  *
1098  * Returns -ENOMEM if the backing store is full
1099  */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)1100 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
1101 				     bool page_fault, uintptr_t *location_ptr)
1102 {
1103 	uintptr_t phys;
1104 	int ret;
1105 	bool dirty = *dirty_ptr;
1106 
1107 	phys = z_page_frame_to_phys(pf);
1108 	__ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
1109 		 phys);
1110 
1111 	/* If the backing store doesn't have a copy of the page, even if it
1112 	 * wasn't modified, treat as dirty. This can happen for a few
1113 	 * reasons:
1114 	 * 1) Page has never been swapped out before, and the backing store
1115 	 *    wasn't pre-populated with this data page.
1116 	 * 2) Page was swapped out before, but the page contents were not
1117 	 *    preserved after swapping back in.
1118 	 * 3) Page contents were preserved when swapped back in, but were later
1119 	 *    evicted from the backing store to make room for other evicted
1120 	 *    pages.
1121 	 */
1122 	if (z_page_frame_is_mapped(pf)) {
1123 		dirty = dirty || !z_page_frame_is_backed(pf);
1124 	}
1125 
1126 	if (dirty || page_fault) {
1127 		arch_mem_scratch(phys);
1128 	}
1129 
1130 	if (z_page_frame_is_mapped(pf)) {
1131 		ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
1132 							      page_fault);
1133 		if (ret != 0) {
1134 			LOG_ERR("out of backing store memory");
1135 			return -ENOMEM;
1136 		}
1137 		arch_mem_page_out(pf->addr, *location_ptr);
1138 	} else {
1139 		/* Shouldn't happen unless this function is mis-used */
1140 		__ASSERT(!dirty, "un-mapped page determined to be dirty");
1141 	}
1142 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1143 	/* Mark as busy so that z_page_frame_is_evictable() returns false */
1144 	__ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1145 		 phys);
1146 	pf->flags |= Z_PAGE_FRAME_BUSY;
1147 #endif
1148 	/* Update dirty parameter, since we set to true if it wasn't backed
1149 	 * even if otherwise clean
1150 	 */
1151 	*dirty_ptr = dirty;
1152 
1153 	return 0;
1154 }
1155 
do_mem_evict(void * addr)1156 static int do_mem_evict(void *addr)
1157 {
1158 	bool dirty;
1159 	struct z_page_frame *pf;
1160 	uintptr_t location;
1161 	int key, ret;
1162 	uintptr_t flags, phys;
1163 
1164 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1165 	__ASSERT(!k_is_in_isr(),
1166 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1167 		 __func__);
1168 	k_sched_lock();
1169 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1170 	key = irq_lock();
1171 	flags = arch_page_info_get(addr, &phys, false);
1172 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1173 		 "address %p isn't mapped", addr);
1174 	if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1175 		/* Un-mapped or already evicted. Nothing to do */
1176 		ret = 0;
1177 		goto out;
1178 	}
1179 
1180 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1181 	pf = z_phys_to_page_frame(phys);
1182 	__ASSERT(pf->addr == addr, "page frame address mismatch");
1183 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1184 	if (ret != 0) {
1185 		goto out;
1186 	}
1187 
1188 	__ASSERT(ret == 0, "failed to prepare page frame");
1189 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1190 	irq_unlock(key);
1191 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1192 	if (dirty) {
1193 		do_backing_store_page_out(location);
1194 	}
1195 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1196 	key = irq_lock();
1197 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1198 	page_frame_free_locked(pf);
1199 out:
1200 	irq_unlock(key);
1201 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1202 	k_sched_unlock();
1203 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1204 	return ret;
1205 }
1206 
k_mem_page_out(void * addr,size_t size)1207 int k_mem_page_out(void *addr, size_t size)
1208 {
1209 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1210 		 addr);
1211 	z_mem_assert_virtual_region(addr, size);
1212 
1213 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1214 		void *pos = (uint8_t *)addr + offset;
1215 		int ret;
1216 
1217 		ret = do_mem_evict(pos);
1218 		if (ret != 0) {
1219 			return ret;
1220 		}
1221 	}
1222 
1223 	return 0;
1224 }
1225 
z_page_frame_evict(uintptr_t phys)1226 int z_page_frame_evict(uintptr_t phys)
1227 {
1228 	int key, ret;
1229 	struct z_page_frame *pf;
1230 	bool dirty;
1231 	uintptr_t flags;
1232 	uintptr_t location;
1233 
1234 	__ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1235 		 __func__, phys);
1236 
1237 	/* Implementation is similar to do_page_fault() except there is no
1238 	 * data page to page-in, see comments in that function.
1239 	 */
1240 
1241 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1242 	__ASSERT(!k_is_in_isr(),
1243 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1244 		 __func__);
1245 	k_sched_lock();
1246 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1247 	key = irq_lock();
1248 	pf = z_phys_to_page_frame(phys);
1249 	if (!z_page_frame_is_mapped(pf)) {
1250 		/* Nothing to do, free page */
1251 		ret = 0;
1252 		goto out;
1253 	}
1254 	flags = arch_page_info_get(pf->addr, NULL, false);
1255 	/* Shouldn't ever happen */
1256 	__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1257 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1258 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1259 	if (ret != 0) {
1260 		goto out;
1261 	}
1262 
1263 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1264 	irq_unlock(key);
1265 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1266 	if (dirty) {
1267 		do_backing_store_page_out(location);
1268 	}
1269 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1270 	key = irq_lock();
1271 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1272 	page_frame_free_locked(pf);
1273 out:
1274 	irq_unlock(key);
1275 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1276 	k_sched_unlock();
1277 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1278 	return ret;
1279 }
1280 
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1281 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1282 					   int key)
1283 {
1284 #ifdef CONFIG_DEMAND_PAGING_STATS
1285 	bool is_irq_unlocked = arch_irq_unlocked(key);
1286 
1287 	paging_stats.pagefaults.cnt++;
1288 
1289 	if (is_irq_unlocked) {
1290 		paging_stats.pagefaults.irq_unlocked++;
1291 	} else {
1292 		paging_stats.pagefaults.irq_locked++;
1293 	}
1294 
1295 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1296 	faulting_thread->paging_stats.pagefaults.cnt++;
1297 
1298 	if (is_irq_unlocked) {
1299 		faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1300 	} else {
1301 		faulting_thread->paging_stats.pagefaults.irq_locked++;
1302 	}
1303 #else
1304 	ARG_UNUSED(faulting_thread);
1305 #endif
1306 
1307 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1308 	if (k_is_in_isr()) {
1309 		paging_stats.pagefaults.in_isr++;
1310 
1311 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1312 		faulting_thread->paging_stats.pagefaults.in_isr++;
1313 #endif
1314 	}
1315 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1316 #endif /* CONFIG_DEMAND_PAGING_STATS */
1317 }
1318 
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1319 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1320 					     bool dirty)
1321 {
1322 #ifdef CONFIG_DEMAND_PAGING_STATS
1323 	if (dirty) {
1324 		paging_stats.eviction.dirty++;
1325 	} else {
1326 		paging_stats.eviction.clean++;
1327 	}
1328 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1329 	if (dirty) {
1330 		faulting_thread->paging_stats.eviction.dirty++;
1331 	} else {
1332 		faulting_thread->paging_stats.eviction.clean++;
1333 	}
1334 #else
1335 	ARG_UNUSED(faulting_thread);
1336 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1337 #endif /* CONFIG_DEMAND_PAGING_STATS */
1338 }
1339 
do_eviction_select(bool * dirty)1340 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1341 {
1342 	struct z_page_frame *pf;
1343 
1344 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1345 	uint32_t time_diff;
1346 
1347 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1348 	timing_t time_start, time_end;
1349 
1350 	time_start = timing_counter_get();
1351 #else
1352 	uint32_t time_start;
1353 
1354 	time_start = k_cycle_get_32();
1355 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1356 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1357 
1358 	pf = k_mem_paging_eviction_select(dirty);
1359 
1360 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1361 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1362 	time_end = timing_counter_get();
1363 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1364 #else
1365 	time_diff = k_cycle_get_32() - time_start;
1366 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1367 
1368 	z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1369 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1370 
1371 	return pf;
1372 }
1373 
do_page_fault(void * addr,bool pin)1374 static bool do_page_fault(void *addr, bool pin)
1375 {
1376 	struct z_page_frame *pf;
1377 	int key, ret;
1378 	uintptr_t page_in_location, page_out_location;
1379 	enum arch_page_location status;
1380 	bool result;
1381 	bool dirty = false;
1382 	struct k_thread *faulting_thread = _current_cpu->current;
1383 
1384 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
1385 		 addr);
1386 
1387 	LOG_DBG("page fault at %p", addr);
1388 
1389 	/*
1390 	 * TODO: Add performance accounting:
1391 	 * - k_mem_paging_eviction_select() metrics
1392 	 *   * periodic timer execution time histogram (if implemented)
1393 	 */
1394 
1395 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1396 	/* We lock the scheduler so that other threads are never scheduled
1397 	 * during the page-in/out operation.
1398 	 *
1399 	 * We do however re-enable interrupts during the page-in/page-out
1400 	 * operation iff interrupts were enabled when the exception was taken;
1401 	 * in this configuration page faults in an ISR are a bug; all their
1402 	 * code/data must be pinned.
1403 	 *
1404 	 * If interrupts were disabled when the exception was taken, the
1405 	 * arch code is responsible for keeping them that way when entering
1406 	 * this function.
1407 	 *
1408 	 * If this is not enabled, then interrupts are always locked for the
1409 	 * entire operation. This is far worse for system interrupt latency
1410 	 * but requires less pinned pages and ISRs may also take page faults.
1411 	 *
1412 	 * Support for allowing k_mem_paging_backing_store_page_out() and
1413 	 * k_mem_paging_backing_store_page_in() to also sleep and allow
1414 	 * other threads to run (such as in the case where the transfer is
1415 	 * async DMA) is not implemented. Even if limited to thread context,
1416 	 * arbitrary memory access triggering exceptions that put a thread to
1417 	 * sleep on a contended page fault operation will break scheduling
1418 	 * assumptions of cooperative threads or threads that implement
1419 	 * crticial sections with spinlocks or disabling IRQs.
1420 	 */
1421 	k_sched_lock();
1422 	__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1423 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1424 
1425 	key = irq_lock();
1426 	status = arch_page_location_get(addr, &page_in_location);
1427 	if (status == ARCH_PAGE_LOCATION_BAD) {
1428 		/* Return false to treat as a fatal error */
1429 		result = false;
1430 		goto out;
1431 	}
1432 	result = true;
1433 
1434 	if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1435 		if (pin) {
1436 			/* It's a physical memory address */
1437 			uintptr_t phys = page_in_location;
1438 
1439 			pf = z_phys_to_page_frame(phys);
1440 			pf->flags |= Z_PAGE_FRAME_PINNED;
1441 		}
1442 
1443 		/* This if-block is to pin the page if it is
1444 		 * already present in physical memory. There is
1445 		 * no need to go through the following code to
1446 		 * pull in the data pages. So skip to the end.
1447 		 */
1448 		goto out;
1449 	}
1450 	__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1451 		 "unexpected status value %d", status);
1452 
1453 	paging_stats_faults_inc(faulting_thread, key);
1454 
1455 	pf = free_page_frame_list_get();
1456 	if (pf == NULL) {
1457 		/* Need to evict a page frame */
1458 		pf = do_eviction_select(&dirty);
1459 		__ASSERT(pf != NULL, "failed to get a page frame");
1460 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
1461 			z_page_frame_to_phys(pf));
1462 
1463 		paging_stats_eviction_inc(faulting_thread, dirty);
1464 	}
1465 	ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1466 	__ASSERT(ret == 0, "failed to prepare page frame");
1467 
1468 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1469 	irq_unlock(key);
1470 	/* Interrupts are now unlocked if they were not locked when we entered
1471 	 * this function, and we may service ISRs. The scheduler is still
1472 	 * locked.
1473 	 */
1474 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1475 	if (dirty) {
1476 		do_backing_store_page_out(page_out_location);
1477 	}
1478 	do_backing_store_page_in(page_in_location);
1479 
1480 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1481 	key = irq_lock();
1482 	pf->flags &= ~Z_PAGE_FRAME_BUSY;
1483 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1484 	if (pin) {
1485 		pf->flags |= Z_PAGE_FRAME_PINNED;
1486 	}
1487 	pf->flags |= Z_PAGE_FRAME_MAPPED;
1488 	pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1489 				   & ~(CONFIG_MMU_PAGE_SIZE - 1));
1490 
1491 	arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1492 	k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1493 out:
1494 	irq_unlock(key);
1495 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1496 	k_sched_unlock();
1497 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1498 
1499 	return result;
1500 }
1501 
do_page_in(void * addr)1502 static void do_page_in(void *addr)
1503 {
1504 	bool ret;
1505 
1506 	ret = do_page_fault(addr, false);
1507 	__ASSERT(ret, "unmapped memory address %p", addr);
1508 	(void)ret;
1509 }
1510 
k_mem_page_in(void * addr,size_t size)1511 void k_mem_page_in(void *addr, size_t size)
1512 {
1513 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1514 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1515 		 __func__);
1516 	virt_region_foreach(addr, size, do_page_in);
1517 }
1518 
do_mem_pin(void * addr)1519 static void do_mem_pin(void *addr)
1520 {
1521 	bool ret;
1522 
1523 	ret = do_page_fault(addr, true);
1524 	__ASSERT(ret, "unmapped memory address %p", addr);
1525 	(void)ret;
1526 }
1527 
k_mem_pin(void * addr,size_t size)1528 void k_mem_pin(void *addr, size_t size)
1529 {
1530 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1531 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1532 		 __func__);
1533 	virt_region_foreach(addr, size, do_mem_pin);
1534 }
1535 
z_page_fault(void * addr)1536 bool z_page_fault(void *addr)
1537 {
1538 	return do_page_fault(addr, false);
1539 }
1540 
do_mem_unpin(void * addr)1541 static void do_mem_unpin(void *addr)
1542 {
1543 	struct z_page_frame *pf;
1544 	unsigned int key;
1545 	uintptr_t flags, phys;
1546 
1547 	key = irq_lock();
1548 	flags = arch_page_info_get(addr, &phys, false);
1549 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1550 		 "invalid data page at %p", addr);
1551 	if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1552 		pf = z_phys_to_page_frame(phys);
1553 		pf->flags &= ~Z_PAGE_FRAME_PINNED;
1554 	}
1555 	irq_unlock(key);
1556 }
1557 
k_mem_unpin(void * addr,size_t size)1558 void k_mem_unpin(void *addr, size_t size)
1559 {
1560 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1561 		 addr);
1562 	virt_region_foreach(addr, size, do_mem_unpin);
1563 }
1564 
1565 #endif /* CONFIG_DEMAND_PAGING */
1566