1 /*
2  * Copyright (c) 2020 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Routines for managing virtual address spaces
7  */
8 
9 #include <stdint.h>
10 #include <kernel_arch_interface.h>
11 #include <spinlock.h>
12 #include <mmu.h>
13 #include <init.h>
14 #include <kernel_internal.h>
15 #include <syscall_handler.h>
16 #include <toolchain.h>
17 #include <linker/linker-defs.h>
18 #include <sys/bitarray.h>
19 #include <timing/timing.h>
20 #include <logging/log.h>
21 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
22 
23 /*
24  * General terminology:
25  * - A page frame is a page-sized physical memory region in RAM. It is a
26  *   container where a data page may be placed. It is always referred to by
27  *   physical address. We have a convention of using uintptr_t for physical
28  *   addresses. We instantiate a struct z_page_frame to store metadata for
29  *   every page frame.
30  *
31  * - A data page is a page-sized region of data. It may exist in a page frame,
32  *   or be paged out to some backing store. Its location can always be looked
33  *   up in the CPU's page tables (or equivalent) by virtual address.
34  *   The data type will always be void * or in some cases uint8_t * when we
35  *   want to do pointer arithmetic.
36  */
37 
38 /* Spinlock to protect any globals in this file and serialize page table
39  * updates in arch code
40  */
41 struct k_spinlock z_mm_lock;
42 
43 /*
44  * General page frame management
45  */
46 
47 /* Database of all RAM page frames */
48 struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES];
49 
50 #if __ASSERT_ON
51 /* Indicator that z_page_frames has been initialized, many of these APIs do
52  * not work before POST_KERNEL
53  */
54 static bool page_frames_initialized;
55 #endif
56 
57 /* Add colors to page table dumps to indicate mapping type */
58 #define COLOR_PAGE_FRAMES	1
59 
60 #if COLOR_PAGE_FRAMES
61 #define ANSI_DEFAULT "\x1B[0m"
62 #define ANSI_RED     "\x1B[1;31m"
63 #define ANSI_GREEN   "\x1B[1;32m"
64 #define ANSI_YELLOW  "\x1B[1;33m"
65 #define ANSI_BLUE    "\x1B[1;34m"
66 #define ANSI_MAGENTA "\x1B[1;35m"
67 #define ANSI_CYAN    "\x1B[1;36m"
68 #define ANSI_GREY    "\x1B[1;90m"
69 
70 #define COLOR(x)	printk(_CONCAT(ANSI_, x))
71 #else
72 #define COLOR(x)	do { } while (0)
73 #endif
74 
page_frame_dump(struct z_page_frame * pf)75 static void page_frame_dump(struct z_page_frame *pf)
76 {
77 	if (z_page_frame_is_reserved(pf)) {
78 		COLOR(CYAN);
79 		printk("R");
80 	} else if (z_page_frame_is_busy(pf)) {
81 		COLOR(MAGENTA);
82 		printk("B");
83 	} else if (z_page_frame_is_pinned(pf)) {
84 		COLOR(YELLOW);
85 		printk("P");
86 	} else if (z_page_frame_is_available(pf)) {
87 		COLOR(GREY);
88 		printk(".");
89 	} else if (z_page_frame_is_mapped(pf)) {
90 		COLOR(DEFAULT);
91 		printk("M");
92 	} else {
93 		COLOR(RED);
94 		printk("?");
95 	}
96 }
97 
z_page_frames_dump(void)98 void z_page_frames_dump(void)
99 {
100 	int column = 0;
101 
102 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
103 	printk("Physical memory from 0x%lx to 0x%lx\n",
104 	       Z_PHYS_RAM_START, Z_PHYS_RAM_END);
105 
106 	for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) {
107 		struct z_page_frame *pf = &z_page_frames[i];
108 
109 		page_frame_dump(pf);
110 
111 		column++;
112 		if (column == 64) {
113 			column = 0;
114 			printk("\n");
115 		}
116 	}
117 
118 	COLOR(DEFAULT);
119 	if (column != 0) {
120 		printk("\n");
121 	}
122 }
123 
124 #define VIRT_FOREACH(_base, _size, _pos) \
125 	for (_pos = _base; \
126 	     _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
127 
128 #define PHYS_FOREACH(_base, _size, _pos) \
129 	for (_pos = _base; \
130 	     _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE)
131 
132 
133 /*
134  * Virtual address space management
135  *
136  * Call all of these functions with z_mm_lock held.
137  *
138  * Overall virtual memory map: When the kernel starts, it resides in
139  * virtual memory in the region Z_KERNEL_VIRT_START to
140  * Z_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit
141  * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings.
142  *
143  * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image,
144  * but have a mapping for all RAM in place. This is for special architectural
145  * purposes and does not otherwise affect page frame accounting or flags;
146  * the only guarantee is that such RAM mapping outside of the Zephyr image
147  * won't be disturbed by subsequent memory mapping calls.
148  *
149  * +--------------+ <- Z_VIRT_RAM_START
150  * | Undefined VM | <- May contain ancillary regions like x86_64's locore
151  * +--------------+ <- Z_KERNEL_VIRT_START (often == Z_VIRT_RAM_START)
152  * | Mapping for  |
153  * | main kernel  |
154  * | image        |
155  * |		  |
156  * |		  |
157  * +--------------+ <- Z_FREE_VM_START
158  * |              |
159  * | Unused,      |
160  * | Available VM |
161  * |              |
162  * |..............| <- mapping_pos (grows downward as more mappings are made)
163  * | Mapping      |
164  * +--------------+
165  * | Mapping      |
166  * +--------------+
167  * | ...          |
168  * +--------------+
169  * | Mapping      |
170  * +--------------+ <- mappings start here
171  * | Reserved     | <- special purpose virtual page(s) of size Z_VM_RESERVED
172  * +--------------+ <- Z_VIRT_RAM_END
173  */
174 
175 /* Bitmap of virtual addresses where one bit corresponds to one page.
176  * This is being used for virt_region_alloc() to figure out which
177  * region of virtual addresses can be used for memory mapping.
178  *
179  * Note that bit #0 is the highest address so that allocation is
180  * done in reverse from highest address.
181  */
182 SYS_BITARRAY_DEFINE(virt_region_bitmap,
183 		    CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE);
184 
185 static bool virt_region_inited;
186 
187 #define Z_VIRT_REGION_START_ADDR	Z_FREE_VM_START
188 #define Z_VIRT_REGION_END_ADDR		(Z_VIRT_RAM_END - Z_VM_RESERVED)
189 
virt_from_bitmap_offset(size_t offset,size_t size)190 static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size)
191 {
192 	return POINTER_TO_UINT(Z_VIRT_RAM_END)
193 	       - (offset * CONFIG_MMU_PAGE_SIZE) - size;
194 }
195 
virt_to_bitmap_offset(void * vaddr,size_t size)196 static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size)
197 {
198 	return (POINTER_TO_UINT(Z_VIRT_RAM_END)
199 		- POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE;
200 }
201 
virt_region_init(void)202 static void virt_region_init(void)
203 {
204 	size_t offset, num_bits;
205 
206 	/* There are regions where we should never map via
207 	 * k_mem_map() and z_phys_map(). Mark them as
208 	 * already allocated so they will never be used.
209 	 */
210 
211 	if (Z_VM_RESERVED > 0) {
212 		/* Mark reserved region at end of virtual address space */
213 		num_bits = Z_VM_RESERVED / CONFIG_MMU_PAGE_SIZE;
214 		(void)sys_bitarray_set_region(&virt_region_bitmap,
215 					      num_bits, 0);
216 	}
217 
218 	/* Mark all bits up to Z_FREE_VM_START as allocated */
219 	num_bits = POINTER_TO_UINT(Z_FREE_VM_START)
220 		   - POINTER_TO_UINT(Z_VIRT_RAM_START);
221 	offset = virt_to_bitmap_offset(Z_VIRT_RAM_START, num_bits);
222 	num_bits /= CONFIG_MMU_PAGE_SIZE;
223 	(void)sys_bitarray_set_region(&virt_region_bitmap,
224 				      num_bits, offset);
225 
226 	virt_region_inited = true;
227 }
228 
virt_region_alloc(size_t size)229 static void *virt_region_alloc(size_t size)
230 {
231 	uintptr_t dest_addr;
232 	size_t offset;
233 	size_t num_bits;
234 	int ret;
235 
236 	if (unlikely(!virt_region_inited)) {
237 		virt_region_init();
238 	}
239 
240 	num_bits = size / CONFIG_MMU_PAGE_SIZE;
241 	ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset);
242 	if (ret != 0) {
243 		LOG_ERR("insufficient virtual address space (requested %zu)",
244 			size);
245 		return NULL;
246 	}
247 
248 	/* Remember that bit #0 in bitmap corresponds to the highest
249 	 * virtual address. So here we need to go downwards (backwards?)
250 	 * to get the starting address of the allocated region.
251 	 */
252 	dest_addr = virt_from_bitmap_offset(offset, size);
253 
254 	/* Need to make sure this does not step into kernel memory */
255 	if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) {
256 		(void)sys_bitarray_free(&virt_region_bitmap, size, offset);
257 		return NULL;
258 	}
259 
260 	return UINT_TO_POINTER(dest_addr);
261 }
262 
virt_region_free(void * vaddr,size_t size)263 static void virt_region_free(void *vaddr, size_t size)
264 {
265 	size_t offset, num_bits;
266 	uint8_t *vaddr_u8 = (uint8_t *)vaddr;
267 
268 	if (unlikely(!virt_region_inited)) {
269 		virt_region_init();
270 	}
271 
272 	__ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
273 		 && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR),
274 		 "invalid virtual address region %p (%zu)", vaddr_u8, size);
275 	if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR)
276 	      && ((vaddr_u8 + size) < Z_VIRT_REGION_END_ADDR))) {
277 		return;
278 	}
279 
280 	offset = virt_to_bitmap_offset(vaddr, size);
281 	num_bits = size / CONFIG_MMU_PAGE_SIZE;
282 	(void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset);
283 }
284 
285 /*
286  * Free page frames management
287  *
288  * Call all of these functions with z_mm_lock held.
289  */
290 
291 /* Linked list of unused and available page frames.
292  *
293  * TODO: This is very simple and treats all free page frames as being equal.
294  * However, there are use-cases to consolidate free pages such that entire
295  * SRAM banks can be switched off to save power, and so obtaining free pages
296  * may require a more complex ontology which prefers page frames in RAM banks
297  * which are still active.
298  *
299  * This implies in the future there may be multiple slists managing physical
300  * pages. Each page frame will still just have one snode link.
301  */
302 static sys_slist_t free_page_frame_list;
303 
304 /* Number of unused and available free page frames */
305 size_t z_free_page_count;
306 
307 #define PF_ASSERT(pf, expr, fmt, ...) \
308 	__ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \
309 		 ##__VA_ARGS__)
310 
311 /* Get an unused page frame. don't care which one, or NULL if there are none */
free_page_frame_list_get(void)312 static struct z_page_frame *free_page_frame_list_get(void)
313 {
314 	sys_snode_t *node;
315 	struct z_page_frame *pf = NULL;
316 
317 	node = sys_slist_get(&free_page_frame_list);
318 	if (node != NULL) {
319 		z_free_page_count--;
320 		pf = CONTAINER_OF(node, struct z_page_frame, node);
321 		PF_ASSERT(pf, z_page_frame_is_available(pf),
322 			 "unavailable but somehow on free list");
323 	}
324 
325 	return pf;
326 }
327 
328 /* Release a page frame back into the list of free pages */
free_page_frame_list_put(struct z_page_frame * pf)329 static void free_page_frame_list_put(struct z_page_frame *pf)
330 {
331 	PF_ASSERT(pf, z_page_frame_is_available(pf),
332 		 "unavailable page put on free list");
333 	sys_slist_append(&free_page_frame_list, &pf->node);
334 	z_free_page_count++;
335 }
336 
free_page_frame_list_init(void)337 static void free_page_frame_list_init(void)
338 {
339 	sys_slist_init(&free_page_frame_list);
340 }
341 
page_frame_free_locked(struct z_page_frame * pf)342 static void page_frame_free_locked(struct z_page_frame *pf)
343 {
344 	pf->flags = 0;
345 	free_page_frame_list_put(pf);
346 }
347 
348 /*
349  * Memory Mapping
350  */
351 
352 /* Called after the frame is mapped in the arch layer, to update our
353  * local ontology (and do some assertions while we're at it)
354  */
frame_mapped_set(struct z_page_frame * pf,void * addr)355 static void frame_mapped_set(struct z_page_frame *pf, void *addr)
356 {
357 	PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
358 		  "attempted to map a reserved page frame");
359 
360 	/* We do allow multiple mappings for pinned page frames
361 	 * since we will never need to reverse map them.
362 	 * This is uncommon, use-cases are for things like the
363 	 * Zephyr equivalent of VSDOs
364 	 */
365 	PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf),
366 		 "non-pinned and already mapped to %p", pf->addr);
367 
368 	pf->flags |= Z_PAGE_FRAME_MAPPED;
369 	pf->addr = addr;
370 }
371 
372 /* Go through page frames to find the physical address mapped
373  * by a virtual address.
374  *
375  * @param[in]  virt Virtual Address
376  * @param[out] phys Physical address mapped to the input virtual address
377  *                  if such mapping exists.
378  *
379  * @retval 0 if mapping is found and valid
380  * @retval -EFAULT if virtual address is not mapped
381  */
virt_to_page_frame(void * virt,uintptr_t * phys)382 static int virt_to_page_frame(void *virt, uintptr_t *phys)
383 {
384 	uintptr_t paddr;
385 	struct z_page_frame *pf;
386 	int ret = -EFAULT;
387 
388 	Z_PAGE_FRAME_FOREACH(paddr, pf) {
389 		if (z_page_frame_is_mapped(pf)) {
390 			if (virt == pf->addr) {
391 				ret = 0;
392 				*phys = z_page_frame_to_phys(pf);
393 				break;
394 			}
395 		}
396 	}
397 
398 	return ret;
399 }
400 __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int);
401 
402 #ifdef CONFIG_DEMAND_PAGING
403 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
404 				     bool page_in, uintptr_t *location_ptr);
405 
406 static inline void do_backing_store_page_in(uintptr_t location);
407 static inline void do_backing_store_page_out(uintptr_t location);
408 #endif /* CONFIG_DEMAND_PAGING */
409 
410 /* Allocate a free page frame, and map it to a specified virtual address
411  *
412  * TODO: Add optional support for copy-on-write mappings to a zero page instead
413  * of allocating, in which case page frames will be allocated lazily as
414  * the mappings to the zero page get touched. This will avoid expensive
415  * page-ins as memory is mapped and physical RAM or backing store storage will
416  * not be used if the mapped memory is unused. The cost is an empty physical
417  * page of zeroes.
418  */
map_anon_page(void * addr,uint32_t flags)419 static int map_anon_page(void *addr, uint32_t flags)
420 {
421 	struct z_page_frame *pf;
422 	uintptr_t phys;
423 	bool lock = (flags & K_MEM_MAP_LOCK) != 0U;
424 	bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U;
425 
426 	pf = free_page_frame_list_get();
427 	if (pf == NULL) {
428 #ifdef CONFIG_DEMAND_PAGING
429 		uintptr_t location;
430 		bool dirty;
431 		int ret;
432 
433 		pf = k_mem_paging_eviction_select(&dirty);
434 		__ASSERT(pf != NULL, "failed to get a page frame");
435 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
436 			z_page_frame_to_phys(pf));
437 		ret = page_frame_prepare_locked(pf, &dirty, false, &location);
438 		if (ret != 0) {
439 			return -ENOMEM;
440 		}
441 		if (dirty) {
442 			do_backing_store_page_out(location);
443 		}
444 		pf->flags = 0;
445 #else
446 		return -ENOMEM;
447 #endif /* CONFIG_DEMAND_PAGING */
448 	}
449 
450 	phys = z_page_frame_to_phys(pf);
451 	arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags | K_MEM_CACHE_WB);
452 
453 	if (lock) {
454 		pf->flags |= Z_PAGE_FRAME_PINNED;
455 	}
456 	frame_mapped_set(pf, addr);
457 
458 	LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys);
459 
460 	if (!uninit) {
461 		/* If we later implement mappings to a copy-on-write
462 		 * zero page, won't need this step
463 		 */
464 		memset(addr, 0, CONFIG_MMU_PAGE_SIZE);
465 	}
466 
467 	return 0;
468 }
469 
k_mem_map(size_t size,uint32_t flags)470 void *k_mem_map(size_t size, uint32_t flags)
471 {
472 	uint8_t *dst;
473 	size_t total_size;
474 	int ret;
475 	k_spinlock_key_t key;
476 	uint8_t *pos;
477 
478 	__ASSERT(!(((flags & K_MEM_PERM_USER) != 0U) &&
479 		   ((flags & K_MEM_MAP_UNINIT) != 0U)),
480 		 "user access to anonymous uninitialized pages is forbidden");
481 	__ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0U,
482 		 "unaligned size %zu passed to %s", size, __func__);
483 	__ASSERT(size != 0, "zero sized memory mapping");
484 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
485 	__ASSERT((flags & K_MEM_CACHE_MASK) == 0U,
486 		 "%s does not support explicit cache settings", __func__);
487 
488 	key = k_spin_lock(&z_mm_lock);
489 
490 	/* Need extra for the guard pages (before and after) which we
491 	 * won't map.
492 	 */
493 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
494 
495 	dst = virt_region_alloc(total_size);
496 	if (dst == NULL) {
497 		/* Address space has no free region */
498 		goto out;
499 	}
500 
501 	/* Unmap both guard pages to make sure accessing them
502 	 * will generate fault.
503 	 */
504 	arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE);
505 	arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size,
506 		       CONFIG_MMU_PAGE_SIZE);
507 
508 	/* Skip over the "before" guard page in returned address. */
509 	dst += CONFIG_MMU_PAGE_SIZE;
510 
511 	VIRT_FOREACH(dst, size, pos) {
512 		ret = map_anon_page(pos, flags);
513 
514 		if (ret != 0) {
515 			/* TODO: call k_mem_unmap(dst, pos - dst)  when
516 			 * implmented in #28990 and release any guard virtual
517 			 * page as well.
518 			 */
519 			dst = NULL;
520 			goto out;
521 		}
522 	}
523 out:
524 	k_spin_unlock(&z_mm_lock, key);
525 	return dst;
526 }
527 
k_mem_unmap(void * addr,size_t size)528 void k_mem_unmap(void *addr, size_t size)
529 {
530 	uintptr_t phys;
531 	uint8_t *pos;
532 	struct z_page_frame *pf;
533 	k_spinlock_key_t key;
534 	size_t total_size;
535 	int ret;
536 
537 	/* Need space for the "before" guard page */
538 	__ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE);
539 
540 	/* Make sure address range is still valid after accounting
541 	 * for two guard pages.
542 	 */
543 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
544 	z_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2));
545 
546 	key = k_spin_lock(&z_mm_lock);
547 
548 	/* Check if both guard pages are unmapped.
549 	 * Bail if not, as this is probably a region not mapped
550 	 * using k_mem_map().
551 	 */
552 	pos = addr;
553 	ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL);
554 	if (ret == 0) {
555 		__ASSERT(ret == 0,
556 			 "%s: cannot find preceding guard page for (%p, %zu)",
557 			 __func__, addr, size);
558 		goto out;
559 	}
560 
561 	ret = arch_page_phys_get(pos + size, NULL);
562 	if (ret == 0) {
563 		__ASSERT(ret == 0,
564 			 "%s: cannot find succeeding guard page for (%p, %zu)",
565 			 __func__, addr, size);
566 		goto out;
567 	}
568 
569 	VIRT_FOREACH(addr, size, pos) {
570 		ret = arch_page_phys_get(pos, &phys);
571 
572 		__ASSERT(ret == 0,
573 			 "%s: cannot unmap an unmapped address %p",
574 			 __func__, pos);
575 		if (ret != 0) {
576 			/* Found an address not mapped. Do not continue. */
577 			goto out;
578 		}
579 
580 		__ASSERT(z_is_page_frame(phys),
581 			 "%s: 0x%lx is not a page frame", __func__, phys);
582 		if (!z_is_page_frame(phys)) {
583 			/* Physical address has no corresponding page frame
584 			 * description in the page frame array.
585 			 * This should not happen. Do not continue.
586 			 */
587 			goto out;
588 		}
589 
590 		/* Grab the corresponding page frame from physical address */
591 		pf = z_phys_to_page_frame(phys);
592 
593 		__ASSERT(z_page_frame_is_mapped(pf),
594 			 "%s: 0x%lx is not a mapped page frame", __func__, phys);
595 		if (!z_page_frame_is_mapped(pf)) {
596 			/* Page frame is not marked mapped.
597 			 * This should not happen. Do not continue.
598 			 */
599 			goto out;
600 		}
601 
602 		arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE);
603 
604 		/* Put the page frame back into free list */
605 		page_frame_free_locked(pf);
606 	}
607 
608 	/* There are guard pages just before and after the mapped
609 	 * region. So we also need to free them from the bitmap.
610 	 */
611 	pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE;
612 	total_size = size + CONFIG_MMU_PAGE_SIZE * 2;
613 	virt_region_free(pos, total_size);
614 
615 out:
616 	k_spin_unlock(&z_mm_lock, key);
617 }
618 
k_mem_free_get(void)619 size_t k_mem_free_get(void)
620 {
621 	size_t ret;
622 	k_spinlock_key_t key;
623 
624 	__ASSERT(page_frames_initialized, "%s called too early", __func__);
625 
626 	key = k_spin_lock(&z_mm_lock);
627 #ifdef CONFIG_DEMAND_PAGING
628 	if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) {
629 		ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE;
630 	} else {
631 		ret = 0;
632 	}
633 #else
634 	ret = z_free_page_count;
635 #endif
636 	k_spin_unlock(&z_mm_lock, key);
637 
638 	return ret * (size_t)CONFIG_MMU_PAGE_SIZE;
639 }
640 
641 /* This may be called from arch early boot code before z_cstart() is invoked.
642  * Data will be copied and BSS zeroed, but this must not rely on any
643  * initialization functions being called prior to work correctly.
644  */
z_phys_map(uint8_t ** virt_ptr,uintptr_t phys,size_t size,uint32_t flags)645 void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags)
646 {
647 	uintptr_t aligned_phys, addr_offset;
648 	size_t aligned_size;
649 	k_spinlock_key_t key;
650 	uint8_t *dest_addr;
651 
652 	addr_offset = k_mem_region_align(&aligned_phys, &aligned_size,
653 					 phys, size,
654 					 CONFIG_MMU_PAGE_SIZE);
655 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys);
656 	__ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)),
657 		 "wraparound for physical address 0x%lx (size %zu)",
658 		 aligned_phys, aligned_size);
659 
660 	key = k_spin_lock(&z_mm_lock);
661 	/* Obtain an appropriately sized chunk of virtual memory */
662 	dest_addr = virt_region_alloc(aligned_size);
663 	if (!dest_addr) {
664 		goto fail;
665 	}
666 
667 	/* If this fails there's something amiss with virt_region_get */
668 	__ASSERT((uintptr_t)dest_addr <
669 		 ((uintptr_t)dest_addr + (size - 1)),
670 		 "wraparound for virtual address %p (size %zu)",
671 		 dest_addr, size);
672 
673 	LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr,
674 		aligned_phys, aligned_size, flags, addr_offset);
675 
676 	arch_mem_map(dest_addr, aligned_phys, aligned_size, flags);
677 	k_spin_unlock(&z_mm_lock, key);
678 
679 	*virt_ptr = dest_addr + addr_offset;
680 	return;
681 fail:
682 	/* May re-visit this in the future, but for now running out of
683 	 * virtual address space or failing the arch_mem_map() call is
684 	 * an unrecoverable situation.
685 	 *
686 	 * Other problems not related to resource exhaustion we leave as
687 	 * assertions since they are clearly programming mistakes.
688 	 */
689 	LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed",
690 		phys, size, flags);
691 	k_panic();
692 }
693 
z_phys_unmap(uint8_t * virt,size_t size)694 void z_phys_unmap(uint8_t *virt, size_t size)
695 {
696 	uintptr_t aligned_virt, addr_offset;
697 	size_t aligned_size;
698 	k_spinlock_key_t key;
699 
700 	addr_offset = k_mem_region_align(&aligned_virt, &aligned_size,
701 					 POINTER_TO_UINT(virt), size,
702 					 CONFIG_MMU_PAGE_SIZE);
703 	__ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt);
704 	__ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)),
705 		 "wraparound for virtual address 0x%lx (size %zu)",
706 		 aligned_virt, aligned_size);
707 
708 	key = k_spin_lock(&z_mm_lock);
709 	arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size);
710 	virt_region_free(virt, size);
711 	k_spin_unlock(&z_mm_lock, key);
712 }
713 
714 /*
715  * Miscellaneous
716  */
717 
k_mem_region_align(uintptr_t * aligned_addr,size_t * aligned_size,uintptr_t addr,size_t size,size_t align)718 size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size,
719 			  uintptr_t addr, size_t size, size_t align)
720 {
721 	size_t addr_offset;
722 
723 	/* The actual mapped region must be page-aligned. Round down the
724 	 * physical address and pad the region size appropriately
725 	 */
726 	*aligned_addr = ROUND_DOWN(addr, align);
727 	addr_offset = addr - *aligned_addr;
728 	*aligned_size = ROUND_UP(size + addr_offset, align);
729 
730 	return addr_offset;
731 }
732 
733 #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION)
mark_linker_section_pinned(void * start_addr,void * end_addr,bool pin)734 static void mark_linker_section_pinned(void *start_addr, void *end_addr,
735 				       bool pin)
736 {
737 	struct z_page_frame *pf;
738 	uint8_t *addr;
739 
740 	uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr),
741 					    CONFIG_MMU_PAGE_SIZE);
742 	uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr),
743 					CONFIG_MMU_PAGE_SIZE);
744 	size_t pinned_size = pinned_end - pinned_start;
745 
746 	VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr)
747 	{
748 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
749 		frame_mapped_set(pf, addr);
750 
751 		if (pin) {
752 			pf->flags |= Z_PAGE_FRAME_PINNED;
753 		} else {
754 			pf->flags &= ~Z_PAGE_FRAME_PINNED;
755 		}
756 	}
757 }
758 #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */
759 
z_mem_manage_init(void)760 void z_mem_manage_init(void)
761 {
762 	uintptr_t phys;
763 	uint8_t *addr;
764 	struct z_page_frame *pf;
765 	k_spinlock_key_t key = k_spin_lock(&z_mm_lock);
766 
767 	free_page_frame_list_init();
768 
769 	ARG_UNUSED(addr);
770 
771 #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES
772 	/* If some page frames are unavailable for use as memory, arch
773 	 * code will mark Z_PAGE_FRAME_RESERVED in their flags
774 	 */
775 	arch_reserved_pages_update();
776 #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */
777 
778 #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
779 	/* All pages composing the Zephyr image are mapped at boot in a
780 	 * predictable way. This can change at runtime.
781 	 */
782 	VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr)
783 	{
784 		pf = z_phys_to_page_frame(Z_BOOT_VIRT_TO_PHYS(addr));
785 		frame_mapped_set(pf, addr);
786 
787 		/* TODO: for now we pin the whole Zephyr image. Demand paging
788 		 * currently tested with anonymously-mapped pages which are not
789 		 * pinned.
790 		 *
791 		 * We will need to setup linker regions for a subset of kernel
792 		 * code/data pages which are pinned in memory and
793 		 * may not be evicted. This will contain critical CPU data
794 		 * structures, and any code used to perform page fault
795 		 * handling, page-ins, etc.
796 		 */
797 		pf->flags |= Z_PAGE_FRAME_PINNED;
798 	}
799 #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */
800 
801 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
802 	/* Pin the boot section to prevent it from being swapped out during
803 	 * boot process. Will be un-pinned once boot process completes.
804 	 */
805 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true);
806 #endif
807 
808 #ifdef CONFIG_LINKER_USE_PINNED_SECTION
809 	/* Pin the page frames correspondng to the pinned symbols */
810 	mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true);
811 #endif
812 
813 	/* Any remaining pages that aren't mapped, reserved, or pinned get
814 	 * added to the free pages list
815 	 */
816 	Z_PAGE_FRAME_FOREACH(phys, pf) {
817 		if (z_page_frame_is_available(pf)) {
818 			free_page_frame_list_put(pf);
819 		}
820 	}
821 	LOG_DBG("free page frames: %zu", z_free_page_count);
822 
823 #ifdef CONFIG_DEMAND_PAGING
824 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
825 	z_paging_histogram_init();
826 #endif
827 	k_mem_paging_backing_store_init();
828 	k_mem_paging_eviction_init();
829 #endif
830 #if __ASSERT_ON
831 	page_frames_initialized = true;
832 #endif
833 	k_spin_unlock(&z_mm_lock, key);
834 
835 #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT
836 	/* If BSS section is not present in memory at boot,
837 	 * it would not have been cleared. This needs to be
838 	 * done now since paging mechanism has been initialized
839 	 * and the BSS pages can be brought into physical
840 	 * memory to be cleared.
841 	 */
842 	z_bss_zero();
843 #endif
844 }
845 
z_mem_manage_boot_finish(void)846 void z_mem_manage_boot_finish(void)
847 {
848 #ifdef CONFIG_LINKER_USE_BOOT_SECTION
849 	/* At the end of boot process, unpin the boot sections
850 	 * as they don't need to be in memory all the time anymore.
851 	 */
852 	mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false);
853 #endif
854 }
855 
856 #ifdef CONFIG_DEMAND_PAGING
857 
858 #ifdef CONFIG_DEMAND_PAGING_STATS
859 struct k_mem_paging_stats_t paging_stats;
860 extern struct k_mem_paging_histogram_t z_paging_histogram_eviction;
861 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in;
862 extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out;
863 #endif
864 
do_backing_store_page_in(uintptr_t location)865 static inline void do_backing_store_page_in(uintptr_t location)
866 {
867 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
868 	uint32_t time_diff;
869 
870 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
871 	timing_t time_start, time_end;
872 
873 	time_start = timing_counter_get();
874 #else
875 	uint32_t time_start;
876 
877 	time_start = k_cycle_get_32();
878 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
879 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
880 
881 	k_mem_paging_backing_store_page_in(location);
882 
883 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
884 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
885 	time_end = timing_counter_get();
886 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
887 #else
888 	time_diff = k_cycle_get_32() - time_start;
889 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
890 
891 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in,
892 			       time_diff);
893 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
894 }
895 
do_backing_store_page_out(uintptr_t location)896 static inline void do_backing_store_page_out(uintptr_t location)
897 {
898 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
899 	uint32_t time_diff;
900 
901 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
902 	timing_t time_start, time_end;
903 
904 	time_start = timing_counter_get();
905 #else
906 	uint32_t time_start;
907 
908 	time_start = k_cycle_get_32();
909 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
910 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
911 
912 	k_mem_paging_backing_store_page_out(location);
913 
914 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
915 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
916 	time_end = timing_counter_get();
917 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
918 #else
919 	time_diff = k_cycle_get_32() - time_start;
920 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
921 
922 	z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out,
923 			       time_diff);
924 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
925 }
926 
927 /* Current implementation relies on interrupt locking to any prevent page table
928  * access, which falls over if other CPUs are active. Addressing this is not
929  * as simple as using spinlocks as regular memory reads/writes constitute
930  * "access" in this sense.
931  *
932  * Current needs for demand paging are on uniprocessor systems.
933  */
934 BUILD_ASSERT(!IS_ENABLED(CONFIG_SMP));
935 
virt_region_foreach(void * addr,size_t size,void (* func)(void *))936 static void virt_region_foreach(void *addr, size_t size,
937 				void (*func)(void *))
938 {
939 	z_mem_assert_virtual_region(addr, size);
940 
941 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
942 		func((uint8_t *)addr + offset);
943 	}
944 }
945 
946 /*
947  * Perform some preparatory steps before paging out. The provided page frame
948  * must be evicted to the backing store immediately after this is called
949  * with a call to k_mem_paging_backing_store_page_out() if it contains
950  * a data page.
951  *
952  * - Map page frame to scratch area if requested. This always is true if we're
953  *   doing a page fault, but is only set on manual evictions if the page is
954  *   dirty.
955  * - If mapped:
956  *    - obtain backing store location and populate location parameter
957  *    - Update page tables with location
958  * - Mark page frame as busy
959  *
960  * Returns -ENOMEM if the backing store is full
961  */
page_frame_prepare_locked(struct z_page_frame * pf,bool * dirty_ptr,bool page_fault,uintptr_t * location_ptr)962 static int page_frame_prepare_locked(struct z_page_frame *pf, bool *dirty_ptr,
963 				     bool page_fault, uintptr_t *location_ptr)
964 {
965 	uintptr_t phys;
966 	int ret;
967 	bool dirty = *dirty_ptr;
968 
969 	phys = z_page_frame_to_phys(pf);
970 	__ASSERT(!z_page_frame_is_pinned(pf), "page frame 0x%lx is pinned",
971 		 phys);
972 
973 	/* If the backing store doesn't have a copy of the page, even if it
974 	 * wasn't modified, treat as dirty. This can happen for a few
975 	 * reasons:
976 	 * 1) Page has never been swapped out before, and the backing store
977 	 *    wasn't pre-populated with this data page.
978 	 * 2) Page was swapped out before, but the page contents were not
979 	 *    preserved after swapping back in.
980 	 * 3) Page contents were preserved when swapped back in, but were later
981 	 *    evicted from the backing store to make room for other evicted
982 	 *    pages.
983 	 */
984 	if (z_page_frame_is_mapped(pf)) {
985 		dirty = dirty || !z_page_frame_is_backed(pf);
986 	}
987 
988 	if (dirty || page_fault) {
989 		arch_mem_scratch(phys);
990 	}
991 
992 	if (z_page_frame_is_mapped(pf)) {
993 		ret = k_mem_paging_backing_store_location_get(pf, location_ptr,
994 							      page_fault);
995 		if (ret != 0) {
996 			LOG_ERR("out of backing store memory");
997 			return -ENOMEM;
998 		}
999 		arch_mem_page_out(pf->addr, *location_ptr);
1000 	} else {
1001 		/* Shouldn't happen unless this function is mis-used */
1002 		__ASSERT(!dirty, "un-mapped page determined to be dirty");
1003 	}
1004 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1005 	/* Mark as busy so that z_page_frame_is_evictable() returns false */
1006 	__ASSERT(!z_page_frame_is_busy(pf), "page frame 0x%lx is already busy",
1007 		 phys);
1008 	pf->flags |= Z_PAGE_FRAME_BUSY;
1009 #endif
1010 	/* Update dirty parameter, since we set to true if it wasn't backed
1011 	 * even if otherwise clean
1012 	 */
1013 	*dirty_ptr = dirty;
1014 
1015 	return 0;
1016 }
1017 
do_mem_evict(void * addr)1018 static int do_mem_evict(void *addr)
1019 {
1020 	bool dirty;
1021 	struct z_page_frame *pf;
1022 	uintptr_t location;
1023 	int key, ret;
1024 	uintptr_t flags, phys;
1025 
1026 #if CONFIG_DEMAND_PAGING_ALLOW_IRQ
1027 	__ASSERT(!k_is_in_isr(),
1028 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1029 		 __func__);
1030 	k_sched_lock();
1031 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1032 	key = irq_lock();
1033 	flags = arch_page_info_get(addr, &phys, false);
1034 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1035 		 "address %p isn't mapped", addr);
1036 	if ((flags & ARCH_DATA_PAGE_LOADED) == 0) {
1037 		/* Un-mapped or already evicted. Nothing to do */
1038 		ret = 0;
1039 		goto out;
1040 	}
1041 
1042 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1043 	pf = z_phys_to_page_frame(phys);
1044 	__ASSERT(pf->addr == addr, "page frame address mismatch");
1045 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1046 	if (ret != 0) {
1047 		goto out;
1048 	}
1049 
1050 	__ASSERT(ret == 0, "failed to prepare page frame");
1051 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1052 	irq_unlock(key);
1053 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1054 	if (dirty) {
1055 		do_backing_store_page_out(location);
1056 	}
1057 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1058 	key = irq_lock();
1059 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1060 	page_frame_free_locked(pf);
1061 out:
1062 	irq_unlock(key);
1063 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1064 	k_sched_unlock();
1065 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1066 	return ret;
1067 }
1068 
k_mem_page_out(void * addr,size_t size)1069 int k_mem_page_out(void *addr, size_t size)
1070 {
1071 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1072 		 addr);
1073 	z_mem_assert_virtual_region(addr, size);
1074 
1075 	for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) {
1076 		void *pos = (uint8_t *)addr + offset;
1077 		int ret;
1078 
1079 		ret = do_mem_evict(pos);
1080 		if (ret != 0) {
1081 			return ret;
1082 		}
1083 	}
1084 
1085 	return 0;
1086 }
1087 
z_page_frame_evict(uintptr_t phys)1088 int z_page_frame_evict(uintptr_t phys)
1089 {
1090 	int key, ret;
1091 	struct z_page_frame *pf;
1092 	bool dirty;
1093 	uintptr_t flags;
1094 	uintptr_t location;
1095 
1096 	__ASSERT(page_frames_initialized, "%s called on 0x%lx too early",
1097 		 __func__, phys);
1098 
1099 	/* Implementation is similar to do_page_fault() except there is no
1100 	 * data page to page-in, see comments in that function.
1101 	 */
1102 
1103 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1104 	__ASSERT(!k_is_in_isr(),
1105 		 "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ",
1106 		 __func__);
1107 	k_sched_lock();
1108 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1109 	key = irq_lock();
1110 	pf = z_phys_to_page_frame(phys);
1111 	if (!z_page_frame_is_mapped(pf)) {
1112 		/* Nothing to do, free page */
1113 		ret = 0;
1114 		goto out;
1115 	}
1116 	flags = arch_page_info_get(pf->addr, NULL, false);
1117 	/* Shouldn't ever happen */
1118 	__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded");
1119 	dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0;
1120 	ret = page_frame_prepare_locked(pf, &dirty, false, &location);
1121 	if (ret != 0) {
1122 		goto out;
1123 	}
1124 
1125 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1126 	irq_unlock(key);
1127 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1128 	if (dirty) {
1129 		do_backing_store_page_out(location);
1130 	}
1131 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1132 	key = irq_lock();
1133 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1134 	page_frame_free_locked(pf);
1135 out:
1136 	irq_unlock(key);
1137 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1138 	k_sched_unlock();
1139 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1140 	return ret;
1141 }
1142 
paging_stats_faults_inc(struct k_thread * faulting_thread,int key)1143 static inline void paging_stats_faults_inc(struct k_thread *faulting_thread,
1144 					   int key)
1145 {
1146 #ifdef CONFIG_DEMAND_PAGING_STATS
1147 	bool is_irq_unlocked = arch_irq_unlocked(key);
1148 
1149 	paging_stats.pagefaults.cnt++;
1150 
1151 	if (is_irq_unlocked) {
1152 		paging_stats.pagefaults.irq_unlocked++;
1153 	} else {
1154 		paging_stats.pagefaults.irq_locked++;
1155 	}
1156 
1157 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1158 	faulting_thread->paging_stats.pagefaults.cnt++;
1159 
1160 	if (is_irq_unlocked) {
1161 		faulting_thread->paging_stats.pagefaults.irq_unlocked++;
1162 	} else {
1163 		faulting_thread->paging_stats.pagefaults.irq_locked++;
1164 	}
1165 #else
1166 	ARG_UNUSED(faulting_thread);
1167 #endif
1168 
1169 #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1170 	if (k_is_in_isr()) {
1171 		paging_stats.pagefaults.in_isr++;
1172 
1173 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1174 		faulting_thread->paging_stats.pagefaults.in_isr++;
1175 #endif
1176 	}
1177 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1178 #endif /* CONFIG_DEMAND_PAGING_STATS */
1179 }
1180 
paging_stats_eviction_inc(struct k_thread * faulting_thread,bool dirty)1181 static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread,
1182 					     bool dirty)
1183 {
1184 #ifdef CONFIG_DEMAND_PAGING_STATS
1185 	if (dirty) {
1186 		paging_stats.eviction.dirty++;
1187 	} else {
1188 		paging_stats.eviction.clean++;
1189 	}
1190 #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS
1191 	if (dirty) {
1192 		faulting_thread->paging_stats.eviction.dirty++;
1193 	} else {
1194 		faulting_thread->paging_stats.eviction.clean++;
1195 	}
1196 #else
1197 	ARG_UNUSED(faulting_thread);
1198 #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */
1199 #endif /* CONFIG_DEMAND_PAGING_STATS */
1200 }
1201 
do_eviction_select(bool * dirty)1202 static inline struct z_page_frame *do_eviction_select(bool *dirty)
1203 {
1204 	struct z_page_frame *pf;
1205 
1206 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1207 	uint32_t time_diff;
1208 
1209 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1210 	timing_t time_start, time_end;
1211 
1212 	time_start = timing_counter_get();
1213 #else
1214 	uint32_t time_start;
1215 
1216 	time_start = k_cycle_get_32();
1217 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1218 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1219 
1220 	pf = k_mem_paging_eviction_select(dirty);
1221 
1222 #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM
1223 #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS
1224 	time_end = timing_counter_get();
1225 	time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end);
1226 #else
1227 	time_diff = k_cycle_get_32() - time_start;
1228 #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */
1229 
1230 	z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff);
1231 #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */
1232 
1233 	return pf;
1234 }
1235 
do_page_fault(void * addr,bool pin)1236 static bool do_page_fault(void *addr, bool pin)
1237 {
1238 	struct z_page_frame *pf;
1239 	int key, ret;
1240 	uintptr_t page_in_location, page_out_location;
1241 	enum arch_page_location status;
1242 	bool result;
1243 	bool dirty = false;
1244 	struct k_thread *faulting_thread = _current_cpu->current;
1245 
1246 	__ASSERT(page_frames_initialized, "page fault at %p happened too early",
1247 		 addr);
1248 
1249 	LOG_DBG("page fault at %p", addr);
1250 
1251 	/*
1252 	 * TODO: Add performance accounting:
1253 	 * - k_mem_paging_eviction_select() metrics
1254 	 *   * periodic timer execution time histogram (if implemented)
1255 	 */
1256 
1257 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1258 	/* We lock the scheduler so that other threads are never scheduled
1259 	 * during the page-in/out operation.
1260 	 *
1261 	 * We do however re-enable interrupts during the page-in/page-out
1262 	 * operation iff interrupts were enabled when the exception was taken;
1263 	 * in this configuration page faults in an ISR are a bug; all their
1264 	 * code/data must be pinned.
1265 	 *
1266 	 * If interrupts were disabled when the exception was taken, the
1267 	 * arch code is responsible for keeping them that way when entering
1268 	 * this function.
1269 	 *
1270 	 * If this is not enabled, then interrupts are always locked for the
1271 	 * entire operation. This is far worse for system interrupt latency
1272 	 * but requires less pinned pages and ISRs may also take page faults.
1273 	 *
1274 	 * Support for allowing k_mem_paging_backing_store_page_out() and
1275 	 * k_mem_paging_backing_store_page_in() to also sleep and allow
1276 	 * other threads to run (such as in the case where the transfer is
1277 	 * async DMA) is not implemented. Even if limited to thread context,
1278 	 * arbitrary memory access triggering exceptions that put a thread to
1279 	 * sleep on a contended page fault operation will break scheduling
1280 	 * assumptions of cooperative threads or threads that implement
1281 	 * crticial sections with spinlocks or disabling IRQs.
1282 	 */
1283 	k_sched_lock();
1284 	__ASSERT(!k_is_in_isr(), "ISR page faults are forbidden");
1285 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1286 
1287 	key = irq_lock();
1288 	status = arch_page_location_get(addr, &page_in_location);
1289 	if (status == ARCH_PAGE_LOCATION_BAD) {
1290 		/* Return false to treat as a fatal error */
1291 		result = false;
1292 		goto out;
1293 	}
1294 	result = true;
1295 
1296 	if (status == ARCH_PAGE_LOCATION_PAGED_IN) {
1297 		if (pin) {
1298 			/* It's a physical memory address */
1299 			uintptr_t phys = page_in_location;
1300 
1301 			pf = z_phys_to_page_frame(phys);
1302 			pf->flags |= Z_PAGE_FRAME_PINNED;
1303 		}
1304 
1305 		/* This if-block is to pin the page if it is
1306 		 * already present in physical memory. There is
1307 		 * no need to go through the following code to
1308 		 * pull in the data pages. So skip to the end.
1309 		 */
1310 		goto out;
1311 	}
1312 	__ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT,
1313 		 "unexpected status value %d", status);
1314 
1315 	paging_stats_faults_inc(faulting_thread, key);
1316 
1317 	pf = free_page_frame_list_get();
1318 	if (pf == NULL) {
1319 		/* Need to evict a page frame */
1320 		pf = do_eviction_select(&dirty);
1321 		__ASSERT(pf != NULL, "failed to get a page frame");
1322 		LOG_DBG("evicting %p at 0x%lx", pf->addr,
1323 			z_page_frame_to_phys(pf));
1324 
1325 		paging_stats_eviction_inc(faulting_thread, dirty);
1326 	}
1327 	ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location);
1328 	__ASSERT(ret == 0, "failed to prepare page frame");
1329 
1330 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1331 	irq_unlock(key);
1332 	/* Interrupts are now unlocked if they were not locked when we entered
1333 	 * this function, and we may service ISRs. The scheduler is still
1334 	 * locked.
1335 	 */
1336 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1337 	if (dirty) {
1338 		do_backing_store_page_out(page_out_location);
1339 	}
1340 	do_backing_store_page_in(page_in_location);
1341 
1342 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1343 	key = irq_lock();
1344 	pf->flags &= ~Z_PAGE_FRAME_BUSY;
1345 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1346 	if (pin) {
1347 		pf->flags |= Z_PAGE_FRAME_PINNED;
1348 	}
1349 	pf->flags |= Z_PAGE_FRAME_MAPPED;
1350 	pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
1351 				   & ~(CONFIG_MMU_PAGE_SIZE - 1));
1352 
1353 	arch_mem_page_in(addr, z_page_frame_to_phys(pf));
1354 	k_mem_paging_backing_store_page_finalize(pf, page_in_location);
1355 out:
1356 	irq_unlock(key);
1357 #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ
1358 	k_sched_unlock();
1359 #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */
1360 
1361 	return result;
1362 }
1363 
do_page_in(void * addr)1364 static void do_page_in(void *addr)
1365 {
1366 	bool ret;
1367 
1368 	ret = do_page_fault(addr, false);
1369 	__ASSERT(ret, "unmapped memory address %p", addr);
1370 	(void)ret;
1371 }
1372 
k_mem_page_in(void * addr,size_t size)1373 void k_mem_page_in(void *addr, size_t size)
1374 {
1375 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1376 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1377 		 __func__);
1378 	virt_region_foreach(addr, size, do_page_in);
1379 }
1380 
do_mem_pin(void * addr)1381 static void do_mem_pin(void *addr)
1382 {
1383 	bool ret;
1384 
1385 	ret = do_page_fault(addr, true);
1386 	__ASSERT(ret, "unmapped memory address %p", addr);
1387 	(void)ret;
1388 }
1389 
k_mem_pin(void * addr,size_t size)1390 void k_mem_pin(void *addr, size_t size)
1391 {
1392 	__ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(),
1393 		 "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled",
1394 		 __func__);
1395 	virt_region_foreach(addr, size, do_mem_pin);
1396 }
1397 
z_page_fault(void * addr)1398 bool z_page_fault(void *addr)
1399 {
1400 	return do_page_fault(addr, false);
1401 }
1402 
do_mem_unpin(void * addr)1403 static void do_mem_unpin(void *addr)
1404 {
1405 	struct z_page_frame *pf;
1406 	int key;
1407 	uintptr_t flags, phys;
1408 
1409 	key = irq_lock();
1410 	flags = arch_page_info_get(addr, &phys, false);
1411 	__ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0,
1412 		 "invalid data page at %p", addr);
1413 	if ((flags & ARCH_DATA_PAGE_LOADED) != 0) {
1414 		pf = z_phys_to_page_frame(phys);
1415 		pf->flags &= ~Z_PAGE_FRAME_PINNED;
1416 	}
1417 	irq_unlock(key);
1418 }
1419 
k_mem_unpin(void * addr,size_t size)1420 void k_mem_unpin(void *addr, size_t size)
1421 {
1422 	__ASSERT(page_frames_initialized, "%s called on %p too early", __func__,
1423 		 addr);
1424 	virt_region_foreach(addr, size, do_mem_unpin);
1425 }
1426 
1427 #endif /* CONFIG_DEMAND_PAGING */
1428