1 /*
2  * Copyright (c) 2024 BayLibre SAS
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Least Recently Used (LRU) eviction algorithm for demand paging.
7  *
8  * This is meant to be used with MMUs that need manual tracking of their
9  * "accessed" page flag so this can be called at the same time.
10  *
11  * Theory of Operation:
12  *
13  * - Page frames made evictable are appended to the end of the LRU queue with
14  *   k_mem_paging_eviction_add(). They are presumably made unaccessible in
15  *   their corresponding MMU page table initially, but not a deal breaker
16  *   if not.
17  *
18  * - When accessed, an unaccessible page causes a fault. The architecture
19  *   fault handler makes the page accessible, marks it as accessed and calls
20  *   k_mem_paging_eviction_accessed() which moves the corresponding page frame
21  *   back to the end of the queue.
22  *
23  * - On page reclammation, the page at the head of the queue is removed for
24  *   that purpose. The new head page is marked unaccessible.
25  *
26  * - If the new head page is actively used, it will cause a fault and be moved
27  *   to the end of the queue, preventing it from being the next page
28  *   reclamation victim. Then the new head page is made unaccessible.
29  *
30  * This way, unused pages will migrate toward the head of the queue, used
31  * pages will tend to remain towards the end of the queue. And there won't be
32  * any fault overhead while the set of accessed pages remain stable.
33  * This algorithm's complexity is O(1).
34  */
35 
36 #include <zephyr/kernel.h>
37 #include <zephyr/kernel/mm/demand_paging.h>
38 #include <zephyr/spinlock.h>
39 #include <zephyr/sys/util.h>
40 #include <mmu.h>
41 #include <kernel_arch_interface.h>
42 
43 /*
44  * Page frames are ordered according to their access pattern. Using a regular
45  * doubly-linked list with actual pointers would be wasteful as all we need
46  * is a previous PF index and a next PF index for each page frame number
47  * which can be compactly represented in an array.
48  */
49 
50 /*
51  * Number of bits needed to store a page frame index. Rounded up to a byte
52  * boundary for best compromize between code performance and space saving.
53  * The extra entry is used to store head and tail indexes.
54  */
55 #define PF_IDX_BITS ROUND_UP(LOG2CEIL(K_MEM_NUM_PAGE_FRAMES + 1), BITS_PER_BYTE)
56 
57 /* For each page frame, track the previous and next page frame in the queue. */
58 struct lru_pf_idx {
59 	uint32_t next : PF_IDX_BITS;
60 	uint32_t prev : PF_IDX_BITS;
61 } __packed;
62 
63 static struct lru_pf_idx lru_pf_queue[K_MEM_NUM_PAGE_FRAMES + 1];
64 static struct k_spinlock lru_lock;
65 
66 /* Slot 0 is for head and tail indexes (actual indexes are offset by 1) */
67 #define LRU_PF_HEAD lru_pf_queue[0].next
68 #define LRU_PF_TAIL lru_pf_queue[0].prev
69 
pf_to_idx(struct k_mem_page_frame * pf)70 static inline uint32_t pf_to_idx(struct k_mem_page_frame *pf)
71 {
72 	return (pf - k_mem_page_frames) + 1;
73 }
74 
idx_to_pf(uint32_t idx)75 static inline struct k_mem_page_frame *idx_to_pf(uint32_t idx)
76 {
77 	return &k_mem_page_frames[idx - 1];
78 }
79 
lru_pf_append(uint32_t pf_idx)80 static inline void lru_pf_append(uint32_t pf_idx)
81 {
82 	lru_pf_queue[pf_idx].next = 0;
83 	lru_pf_queue[pf_idx].prev = LRU_PF_TAIL;
84 	lru_pf_queue[LRU_PF_TAIL].next = pf_idx;
85 	LRU_PF_TAIL = pf_idx;
86 }
87 
lru_pf_unlink(uint32_t pf_idx)88 static inline void lru_pf_unlink(uint32_t pf_idx)
89 {
90 	uint32_t next = lru_pf_queue[pf_idx].next;
91 	uint32_t prev = lru_pf_queue[pf_idx].prev;
92 
93 	lru_pf_queue[prev].next = next;
94 	lru_pf_queue[next].prev = prev;
95 
96 	lru_pf_queue[pf_idx].next = 0;
97 	lru_pf_queue[pf_idx].prev = 0;
98 }
99 
lru_pf_in_queue(uint32_t pf_idx)100 static inline bool lru_pf_in_queue(uint32_t pf_idx)
101 {
102 	bool unqueued = (lru_pf_queue[pf_idx].next == 0) &&
103 			(lru_pf_queue[pf_idx].prev == 0) &&
104 			(LRU_PF_HEAD != pf_idx);
105 
106 	return !unqueued;
107 }
108 
lru_pf_remove(uint32_t pf_idx)109 static void lru_pf_remove(uint32_t pf_idx)
110 {
111 	bool was_head = (pf_idx == LRU_PF_HEAD);
112 
113 	lru_pf_unlink(pf_idx);
114 
115 	/* make new head PF unaccessible if it exists and it is not alone */
116 	if (was_head &&
117 	    (LRU_PF_HEAD != 0) &&
118 	    (lru_pf_queue[LRU_PF_HEAD].next != 0)) {
119 		struct k_mem_page_frame *pf = idx_to_pf(LRU_PF_HEAD);
120 		uintptr_t flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, true);
121 
122 		/* clearing the accessed flag expected only on loaded pages */
123 		__ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "");
124 		ARG_UNUSED(flags);
125 	}
126 }
127 
k_mem_paging_eviction_add(struct k_mem_page_frame * pf)128 void k_mem_paging_eviction_add(struct k_mem_page_frame *pf)
129 {
130 	uint32_t pf_idx = pf_to_idx(pf);
131 	k_spinlock_key_t key = k_spin_lock(&lru_lock);
132 
133 	__ASSERT(k_mem_page_frame_is_evictable(pf), "");
134 	__ASSERT(!lru_pf_in_queue(pf_idx), "");
135 	lru_pf_append(pf_idx);
136 	k_spin_unlock(&lru_lock, key);
137 }
138 
k_mem_paging_eviction_remove(struct k_mem_page_frame * pf)139 void k_mem_paging_eviction_remove(struct k_mem_page_frame *pf)
140 {
141 	uint32_t pf_idx = pf_to_idx(pf);
142 	k_spinlock_key_t key = k_spin_lock(&lru_lock);
143 
144 	__ASSERT(lru_pf_in_queue(pf_idx), "");
145 	lru_pf_remove(pf_idx);
146 	k_spin_unlock(&lru_lock, key);
147 }
148 
k_mem_paging_eviction_accessed(uintptr_t phys)149 void k_mem_paging_eviction_accessed(uintptr_t phys)
150 {
151 	struct k_mem_page_frame *pf = k_mem_phys_to_page_frame(phys);
152 	uint32_t pf_idx = pf_to_idx(pf);
153 	k_spinlock_key_t key = k_spin_lock(&lru_lock);
154 
155 	if (lru_pf_in_queue(pf_idx)) {
156 		lru_pf_remove(pf_idx);
157 		lru_pf_append(pf_idx);
158 	}
159 	k_spin_unlock(&lru_lock, key);
160 }
161 
k_mem_paging_eviction_select(bool * dirty_ptr)162 struct k_mem_page_frame *k_mem_paging_eviction_select(bool *dirty_ptr)
163 {
164 	uint32_t head_pf_idx = LRU_PF_HEAD;
165 
166 	if (head_pf_idx == 0) {
167 		return NULL;
168 	}
169 
170 	struct k_mem_page_frame *pf = idx_to_pf(head_pf_idx);
171 	uintptr_t flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false);
172 
173 	__ASSERT(k_mem_page_frame_is_evictable(pf), "");
174 	*dirty_ptr = ((flags & ARCH_DATA_PAGE_DIRTY) != 0);
175 	return pf;
176 }
177 
k_mem_paging_eviction_init(void)178 void k_mem_paging_eviction_init(void)
179 {
180 }
181