1 /*
2  * Copyright (c) 2024 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #ifndef ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_
8 #define ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_
9 
10 #include <zephyr/sys/math_extras.h>
11 #include <zephyr/sys/dlist.h>
12 
13 /* Dumb Scheduling */
14 #if defined(CONFIG_SCHED_SIMPLE)
15 #define _priq_run_init		z_priq_simple_init
16 #define _priq_run_add		z_priq_simple_add
17 #define _priq_run_remove	z_priq_simple_remove
18 #define _priq_run_yield         z_priq_simple_yield
19 # if defined(CONFIG_SCHED_CPU_MASK)
20 #  define _priq_run_best	z_priq_simple_mask_best
21 # else
22 #  define _priq_run_best	z_priq_simple_best
23 # endif /* CONFIG_SCHED_CPU_MASK */
24 /* Scalable Scheduling */
25 #elif defined(CONFIG_SCHED_SCALABLE)
26 #define _priq_run_init		z_priq_rb_init
27 #define _priq_run_add		z_priq_rb_add
28 #define _priq_run_remove	z_priq_rb_remove
29 #define _priq_run_yield         z_priq_rb_yield
30 #define _priq_run_best		z_priq_rb_best
31  /* Multi Queue Scheduling */
32 #elif defined(CONFIG_SCHED_MULTIQ)
33 #define _priq_run_init		z_priq_mq_init
34 #define _priq_run_add		z_priq_mq_add
35 #define _priq_run_remove	z_priq_mq_remove
36 #define _priq_run_yield         z_priq_mq_yield
37 #define _priq_run_best		z_priq_mq_best
38 #endif
39 
40 /* Scalable Wait Queue */
41 #if defined(CONFIG_WAITQ_SCALABLE)
42 #define _priq_wait_add		z_priq_rb_add
43 #define _priq_wait_remove	z_priq_rb_remove
44 #define _priq_wait_best		z_priq_rb_best
45 /* Dumb Wait Queue */
46 #elif defined(CONFIG_WAITQ_SIMPLE)
47 #define _priq_wait_add		z_priq_simple_add
48 #define _priq_wait_remove	z_priq_simple_remove
49 #define _priq_wait_best		z_priq_simple_best
50 #endif
51 
52 #if defined(CONFIG_64BIT)
53 #define NBITS          64
54 #define TRAILING_ZEROS u64_count_trailing_zeros
55 #else
56 #define NBITS          32
57 #define TRAILING_ZEROS u32_count_trailing_zeros
58 #endif /* CONFIG_64BIT */
59 
z_priq_simple_init(sys_dlist_t * pq)60 static ALWAYS_INLINE void z_priq_simple_init(sys_dlist_t *pq)
61 {
62 	sys_dlist_init(pq);
63 }
64 
65 /*
66  * Return value same as e.g. memcmp
67  * > 0 -> thread 1 priority  > thread 2 priority
68  * = 0 -> thread 1 priority == thread 2 priority
69  * < 0 -> thread 1 priority  < thread 2 priority
70  * Do not rely on the actual value returned aside from the above.
71  * (Again, like memcmp.)
72  */
z_sched_prio_cmp(struct k_thread * thread_1,struct k_thread * thread_2)73 static ALWAYS_INLINE int32_t z_sched_prio_cmp(struct k_thread *thread_1, struct k_thread *thread_2)
74 {
75 	/* `prio` is <32b, so the below cannot overflow. */
76 	int32_t b1 = thread_1->base.prio;
77 	int32_t b2 = thread_2->base.prio;
78 
79 	if (b1 != b2) {
80 		return b2 - b1;
81 	}
82 
83 #ifdef CONFIG_SCHED_DEADLINE
84 	/* If we assume all deadlines live within the same "half" of
85 	 * the 32 bit modulus space (this is a documented API rule),
86 	 * then the latest deadline in the queue minus the earliest is
87 	 * guaranteed to be (2's complement) non-negative.  We can
88 	 * leverage that to compare the values without having to check
89 	 * the current time.
90 	 */
91 	uint32_t d1 = thread_1->base.prio_deadline;
92 	uint32_t d2 = thread_2->base.prio_deadline;
93 
94 	if (d1 != d2) {
95 		/* Sooner deadline means higher effective priority.
96 		 * Doing the calculation with unsigned types and casting
97 		 * to signed isn't perfect, but at least reduces this
98 		 * from UB on overflow to impdef.
99 		 */
100 		return (int32_t)(d2 - d1);
101 	}
102 #endif /* CONFIG_SCHED_DEADLINE */
103 	return 0;
104 }
105 
z_priq_simple_add(sys_dlist_t * pq,struct k_thread * thread)106 static ALWAYS_INLINE void z_priq_simple_add(sys_dlist_t *pq, struct k_thread *thread)
107 {
108 	struct k_thread *t;
109 
110 	SYS_DLIST_FOR_EACH_CONTAINER(pq, t, base.qnode_dlist) {
111 		if (z_sched_prio_cmp(thread, t) > 0) {
112 			sys_dlist_insert(&t->base.qnode_dlist, &thread->base.qnode_dlist);
113 			return;
114 		}
115 	}
116 
117 	sys_dlist_append(pq, &thread->base.qnode_dlist);
118 }
119 
z_priq_simple_remove(sys_dlist_t * pq,struct k_thread * thread)120 static ALWAYS_INLINE void z_priq_simple_remove(sys_dlist_t *pq, struct k_thread *thread)
121 {
122 	ARG_UNUSED(pq);
123 
124 	sys_dlist_remove(&thread->base.qnode_dlist);
125 }
126 
z_priq_simple_yield(sys_dlist_t * pq)127 static ALWAYS_INLINE void z_priq_simple_yield(sys_dlist_t *pq)
128 {
129 #ifndef CONFIG_SMP
130 	sys_dnode_t *n;
131 
132 	n = sys_dlist_peek_next_no_check(pq, &_current->base.qnode_dlist);
133 
134 	sys_dlist_dequeue(&_current->base.qnode_dlist);
135 
136 	struct k_thread *t;
137 
138 	/*
139 	 * As it is possible that the current thread was not at the head of
140 	 * the run queue, start searching from the present position for where
141 	 * to re-insert it.
142 	 */
143 
144 	while (n != NULL) {
145 		t = CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
146 		if (z_sched_prio_cmp(_current, t) > 0) {
147 			sys_dlist_insert(&t->base.qnode_dlist,
148 					 &_current->base.qnode_dlist);
149 			return;
150 		}
151 		n = sys_dlist_peek_next_no_check(pq, n);
152 	}
153 
154 	sys_dlist_append(pq, &_current->base.qnode_dlist);
155 #endif
156 }
157 
z_priq_simple_best(sys_dlist_t * pq)158 static ALWAYS_INLINE struct k_thread *z_priq_simple_best(sys_dlist_t *pq)
159 {
160 	struct k_thread *thread = NULL;
161 	sys_dnode_t *n = sys_dlist_peek_head(pq);
162 
163 	if (n != NULL) {
164 		thread = CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
165 	}
166 	return thread;
167 }
168 
169 #ifdef CONFIG_SCHED_CPU_MASK
z_priq_simple_mask_best(sys_dlist_t * pq)170 static ALWAYS_INLINE struct k_thread *z_priq_simple_mask_best(sys_dlist_t *pq)
171 {
172 	/* With masks enabled we need to be prepared to walk the list
173 	 * looking for one we can run
174 	 */
175 	struct k_thread *thread;
176 
177 	SYS_DLIST_FOR_EACH_CONTAINER(pq, thread, base.qnode_dlist) {
178 		if ((thread->base.cpu_mask & BIT(_current_cpu->id)) != 0) {
179 			return thread;
180 		}
181 	}
182 	return NULL;
183 }
184 #endif /* CONFIG_SCHED_CPU_MASK */
185 
186 #if defined(CONFIG_SCHED_SCALABLE) || defined(CONFIG_WAITQ_SCALABLE)
z_priq_rb_init(struct _priq_rb * pq)187 static ALWAYS_INLINE void z_priq_rb_init(struct _priq_rb *pq)
188 {
189 	bool z_priq_rb_lessthan(struct rbnode *a, struct rbnode *b);
190 
191 	*pq = (struct _priq_rb) {
192 		.tree = {
193 			.lessthan_fn = z_priq_rb_lessthan,
194 		}
195 	};
196 }
197 
z_priq_rb_add(struct _priq_rb * pq,struct k_thread * thread)198 static ALWAYS_INLINE void z_priq_rb_add(struct _priq_rb *pq, struct k_thread *thread)
199 {
200 	struct k_thread *t;
201 
202 	thread->base.order_key = pq->next_order_key;
203 	++pq->next_order_key;
204 
205 	/* Renumber at wraparound.  This is tiny code, and in practice
206 	 * will almost never be hit on real systems.  BUT on very
207 	 * long-running systems where a priq never completely empties
208 	 * AND that contains very large numbers of threads, it can be
209 	 * a latency glitch to loop over all the threads like this.
210 	 */
211 	if (!pq->next_order_key) {
212 		RB_FOR_EACH_CONTAINER(&pq->tree, t, base.qnode_rb) {
213 			t->base.order_key = pq->next_order_key;
214 			++pq->next_order_key;
215 		}
216 	}
217 
218 	rb_insert(&pq->tree, &thread->base.qnode_rb);
219 }
220 
z_priq_rb_remove(struct _priq_rb * pq,struct k_thread * thread)221 static ALWAYS_INLINE void z_priq_rb_remove(struct _priq_rb *pq, struct k_thread *thread)
222 {
223 	rb_remove(&pq->tree, &thread->base.qnode_rb);
224 
225 	if (!pq->tree.root) {
226 		pq->next_order_key = 0;
227 	}
228 }
229 
z_priq_rb_yield(struct _priq_rb * pq)230 static ALWAYS_INLINE void z_priq_rb_yield(struct _priq_rb *pq)
231 {
232 #ifndef CONFIG_SMP
233 	z_priq_rb_remove(pq, _current);
234 	z_priq_rb_add(pq, _current);
235 #endif
236 }
237 
z_priq_rb_best(struct _priq_rb * pq)238 static ALWAYS_INLINE struct k_thread *z_priq_rb_best(struct _priq_rb *pq)
239 {
240 	struct k_thread *thread = NULL;
241 	struct rbnode *n = rb_get_min(&pq->tree);
242 
243 	if (n != NULL) {
244 		thread = CONTAINER_OF(n, struct k_thread, base.qnode_rb);
245 	}
246 	return thread;
247 }
248 #endif
249 
250 struct prio_info {
251 	uint8_t offset_prio;
252 	uint8_t idx;
253 	uint8_t bit;
254 };
255 
get_prio_info(int8_t old_prio)256 static ALWAYS_INLINE struct prio_info get_prio_info(int8_t old_prio)
257 {
258 	struct prio_info ret;
259 
260 	ret.offset_prio = old_prio - K_HIGHEST_THREAD_PRIO;
261 	ret.idx = ret.offset_prio / NBITS;
262 	ret.bit = ret.offset_prio % NBITS;
263 
264 	return ret;
265 }
266 
z_priq_mq_best_queue_index(struct _priq_mq * pq)267 static ALWAYS_INLINE unsigned int z_priq_mq_best_queue_index(struct _priq_mq *pq)
268 {
269 	unsigned int i = 0;
270 
271 	do {
272 		if (likely(pq->bitmask[i])) {
273 			return i * NBITS + TRAILING_ZEROS(pq->bitmask[i]);
274 		}
275 		i++;
276 	} while (i < PRIQ_BITMAP_SIZE);
277 
278 	return K_NUM_THREAD_PRIO - 1;
279 }
280 
z_priq_mq_init(struct _priq_mq * q)281 static ALWAYS_INLINE void z_priq_mq_init(struct _priq_mq *q)
282 {
283 	for (int i = 0; i < ARRAY_SIZE(q->queues); i++) {
284 		sys_dlist_init(&q->queues[i]);
285 	}
286 
287 #ifndef CONFIG_SMP
288 	q->cached_queue_index = K_NUM_THREAD_PRIO - 1;
289 #endif
290 }
291 
z_priq_mq_add(struct _priq_mq * pq,struct k_thread * thread)292 static ALWAYS_INLINE void z_priq_mq_add(struct _priq_mq *pq,
293 					struct k_thread *thread)
294 {
295 	struct prio_info pos = get_prio_info(thread->base.prio);
296 
297 	sys_dlist_append(&pq->queues[pos.offset_prio], &thread->base.qnode_dlist);
298 	pq->bitmask[pos.idx] |= BIT(pos.bit);
299 
300 #ifndef CONFIG_SMP
301 	if (pos.offset_prio < pq->cached_queue_index) {
302 		pq->cached_queue_index = pos.offset_prio;
303 	}
304 #endif
305 }
306 
z_priq_mq_remove(struct _priq_mq * pq,struct k_thread * thread)307 static ALWAYS_INLINE void z_priq_mq_remove(struct _priq_mq *pq,
308 					   struct k_thread *thread)
309 {
310 	struct prio_info pos = get_prio_info(thread->base.prio);
311 
312 	sys_dlist_dequeue(&thread->base.qnode_dlist);
313 	if (unlikely(sys_dlist_is_empty(&pq->queues[pos.offset_prio]))) {
314 		pq->bitmask[pos.idx] &= ~BIT(pos.bit);
315 #ifndef CONFIG_SMP
316 		pq->cached_queue_index = z_priq_mq_best_queue_index(pq);
317 #endif
318 	}
319 }
320 
z_priq_mq_yield(struct _priq_mq * pq)321 static ALWAYS_INLINE void z_priq_mq_yield(struct _priq_mq *pq)
322 {
323 #ifndef CONFIG_SMP
324 	struct prio_info pos = get_prio_info(_current->base.prio);
325 
326 	sys_dlist_dequeue(&_current->base.qnode_dlist);
327 	sys_dlist_append(&pq->queues[pos.offset_prio],
328 			 &_current->base.qnode_dlist);
329 #endif
330 }
331 
z_priq_mq_best(struct _priq_mq * pq)332 static ALWAYS_INLINE struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
333 {
334 #ifdef CONFIG_SMP
335 	unsigned int index = z_priq_mq_best_queue_index(pq);
336 #else
337 	unsigned int index = pq->cached_queue_index;
338 #endif
339 
340 	sys_dnode_t *n = sys_dlist_peek_head(&pq->queues[index]);
341 
342 	if (likely(n != NULL)) {
343 		return CONTAINER_OF(n, struct k_thread, base.qnode_dlist);
344 	}
345 
346 	return NULL;
347 }
348 
349 #endif /* ZEPHYR_KERNEL_INCLUDE_PRIORITY_Q_H_ */
350