1 /*
2  * Copyright (c) 2020 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 #include <zephyr/logging/log.h>
7 #include <zephyr/sys/p4wq.h>
8 #include <zephyr/kernel.h>
9 #include <zephyr/init.h>
10 #include <zephyr/sys/iterable_sections.h>
11 /* private kernel APIs */
12 #include <ksched.h>
13 #include <wait_q.h>
14 
15 LOG_MODULE_REGISTER(p4wq, CONFIG_LOG_DEFAULT_LEVEL);
16 
17 struct device;
18 
set_prio(struct k_thread * th,struct k_p4wq_work * item)19 static void set_prio(struct k_thread *th, struct k_p4wq_work *item)
20 {
21 	__ASSERT_NO_MSG(!IS_ENABLED(CONFIG_SMP) || !z_is_thread_queued(th));
22 	th->base.prio = item->priority;
23 	th->base.prio_deadline = item->deadline;
24 }
25 
rb_lessthan(struct rbnode * a,struct rbnode * b)26 static bool rb_lessthan(struct rbnode *a, struct rbnode *b)
27 {
28 	struct k_p4wq_work *aw = CONTAINER_OF(a, struct k_p4wq_work, rbnode);
29 	struct k_p4wq_work *bw = CONTAINER_OF(b, struct k_p4wq_work, rbnode);
30 
31 	if (aw->priority != bw->priority) {
32 		return aw->priority > bw->priority;
33 	}
34 
35 	if (aw->deadline != bw->deadline) {
36 		return aw->deadline - bw->deadline > 0;
37 	}
38 
39 	return (uintptr_t)a < (uintptr_t)b;
40 }
41 
thread_set_requeued(struct k_thread * th)42 static void thread_set_requeued(struct k_thread *th)
43 {
44 	th->base.user_options |= K_CALLBACK_STATE;
45 }
46 
thread_clear_requeued(struct k_thread * th)47 static void thread_clear_requeued(struct k_thread *th)
48 {
49 	th->base.user_options &= ~K_CALLBACK_STATE;
50 }
51 
thread_was_requeued(struct k_thread * th)52 static bool thread_was_requeued(struct k_thread *th)
53 {
54 	return !!(th->base.user_options & K_CALLBACK_STATE);
55 }
56 
57 /* Slightly different semantics: rb_lessthan must be perfectly
58  * symmetric (to produce a single tree structure) and will use the
59  * pointer value to break ties where priorities are equal, here we
60  * tolerate equality as meaning "not lessthan"
61  */
item_lessthan(struct k_p4wq_work * a,struct k_p4wq_work * b)62 static inline bool item_lessthan(struct k_p4wq_work *a, struct k_p4wq_work *b)
63 {
64 	if (a->priority > b->priority) {
65 		return true;
66 	} else if ((a->priority == b->priority) &&
67 		   (a->deadline != b->deadline)) {
68 		return a->deadline - b->deadline > 0;
69 	} else {
70 		;
71 	}
72 	return false;
73 }
74 
p4wq_loop(void * p0,void * p1,void * p2)75 static FUNC_NORETURN void p4wq_loop(void *p0, void *p1, void *p2)
76 {
77 	ARG_UNUSED(p1);
78 	ARG_UNUSED(p2);
79 	struct k_p4wq *queue = p0;
80 	k_spinlock_key_t k = k_spin_lock(&queue->lock);
81 
82 	while (true) {
83 		struct rbnode *r = rb_get_max(&queue->queue);
84 
85 		if (r) {
86 			struct k_p4wq_work *w
87 				= CONTAINER_OF(r, struct k_p4wq_work, rbnode);
88 
89 			rb_remove(&queue->queue, r);
90 			w->thread = _current;
91 			sys_dlist_append(&queue->active, &w->dlnode);
92 			set_prio(_current, w);
93 			thread_clear_requeued(_current);
94 
95 			k_spin_unlock(&queue->lock, k);
96 
97 			w->handler(w);
98 
99 			k = k_spin_lock(&queue->lock);
100 
101 			/* Remove from the active list only if it
102 			 * wasn't resubmitted already
103 			 */
104 			if (!thread_was_requeued(_current)) {
105 				sys_dlist_remove(&w->dlnode);
106 				w->thread = NULL;
107 
108 				if (queue->done_handler) {
109 					k_spin_unlock(&queue->lock, k);
110 					queue->done_handler(w);
111 					k = k_spin_lock(&queue->lock);
112 				} else {
113 					k_sem_give(&w->done_sem);
114 				}
115 			}
116 		} else {
117 			z_pend_curr(&queue->lock, k, &queue->waitq, K_FOREVER);
118 			k = k_spin_lock(&queue->lock);
119 		}
120 	}
121 }
122 
123 /* Must be called to regain ownership of the work item */
k_p4wq_wait(struct k_p4wq_work * work,k_timeout_t timeout)124 int k_p4wq_wait(struct k_p4wq_work *work, k_timeout_t timeout)
125 {
126 	if (work->sync) {
127 		return k_sem_take(&work->done_sem, timeout);
128 	}
129 
130 	return k_sem_count_get(&work->done_sem) ? 0 : -EBUSY;
131 }
132 
k_p4wq_init(struct k_p4wq * queue)133 void k_p4wq_init(struct k_p4wq *queue)
134 {
135 	memset(queue, 0, sizeof(*queue));
136 	z_waitq_init(&queue->waitq);
137 	queue->queue.lessthan_fn = rb_lessthan;
138 	sys_dlist_init(&queue->active);
139 }
140 
k_p4wq_add_thread(struct k_p4wq * queue,struct k_thread * thread,k_thread_stack_t * stack,size_t stack_size)141 void k_p4wq_add_thread(struct k_p4wq *queue, struct k_thread *thread,
142 			k_thread_stack_t *stack,
143 			size_t stack_size)
144 {
145 	k_thread_create(thread, stack, stack_size,
146 			p4wq_loop, queue, NULL, NULL,
147 			K_HIGHEST_THREAD_PRIO, 0,
148 			queue->flags & K_P4WQ_DELAYED_START ? K_FOREVER : K_NO_WAIT);
149 }
150 
static_init(void)151 static int static_init(void)
152 {
153 
154 	STRUCT_SECTION_FOREACH(k_p4wq_initparam, pp) {
155 		for (int i = 0; i < pp->num; i++) {
156 			uintptr_t ssz = K_THREAD_STACK_LEN(pp->stack_size);
157 			struct k_p4wq *q = pp->flags & K_P4WQ_QUEUE_PER_THREAD ?
158 				pp->queue + i : pp->queue;
159 
160 			if (!i || (pp->flags & K_P4WQ_QUEUE_PER_THREAD)) {
161 				k_p4wq_init(q);
162 				q->done_handler = pp->done_handler;
163 			}
164 
165 			q->flags = pp->flags;
166 
167 			/*
168 			 * If the user wants to specify CPU affinity, we have to
169 			 * delay starting threads until that has been done
170 			 */
171 			if (q->flags & K_P4WQ_USER_CPU_MASK) {
172 				q->flags |= K_P4WQ_DELAYED_START;
173 			}
174 
175 			k_p4wq_add_thread(q, &pp->threads[i],
176 					  &pp->stacks[ssz * i],
177 					  pp->stack_size);
178 
179 #ifdef CONFIG_SCHED_CPU_MASK
180 			if (pp->flags & K_P4WQ_USER_CPU_MASK) {
181 				int ret = k_thread_cpu_mask_clear(&pp->threads[i]);
182 
183 				if (ret < 0) {
184 					LOG_ERR("Couldn't clear CPU mask: %d", ret);
185 				}
186 			}
187 #endif
188 		}
189 	}
190 
191 	return 0;
192 }
193 
k_p4wq_enable_static_thread(struct k_p4wq * queue,struct k_thread * thread,uint32_t cpu_mask)194 void k_p4wq_enable_static_thread(struct k_p4wq *queue, struct k_thread *thread,
195 				 uint32_t cpu_mask)
196 {
197 #ifdef CONFIG_SCHED_CPU_MASK
198 	if (queue->flags & K_P4WQ_USER_CPU_MASK) {
199 		unsigned int i;
200 
201 		while ((i = find_lsb_set(cpu_mask))) {
202 			int ret = k_thread_cpu_mask_enable(thread, i - 1);
203 
204 			if (ret < 0) {
205 				LOG_ERR("Couldn't set CPU mask for %u: %d", i, ret);
206 			}
207 			cpu_mask &= ~BIT(i - 1);
208 		}
209 	}
210 #endif
211 
212 	if (queue->flags & K_P4WQ_DELAYED_START) {
213 		k_thread_start(thread);
214 	}
215 }
216 
217 /* We spawn a bunch of high priority threads, use the "SMP" initlevel
218  * so they can initialize in parallel instead of serially on the main
219  * CPU.
220  */
221 #if defined(CONFIG_P4WQ_INIT_STAGE_EARLY)
222 SYS_INIT(static_init, POST_KERNEL, 1);
223 #else
224 SYS_INIT(static_init, APPLICATION, 99);
225 #endif
226 
k_p4wq_submit(struct k_p4wq * queue,struct k_p4wq_work * item)227 void k_p4wq_submit(struct k_p4wq *queue, struct k_p4wq_work *item)
228 {
229 	k_spinlock_key_t k = k_spin_lock(&queue->lock);
230 
231 	/* Input is a delta time from now (to match
232 	 * k_thread_deadline_set()), but we store and use the absolute
233 	 * cycle count.
234 	 */
235 	item->deadline += k_cycle_get_32();
236 
237 	/* Resubmission from within handler?  Remove from active list */
238 	if (item->thread == _current) {
239 		sys_dlist_remove(&item->dlnode);
240 		thread_set_requeued(_current);
241 		item->thread = NULL;
242 	} else {
243 		k_sem_init(&item->done_sem, 0, 1);
244 	}
245 	__ASSERT_NO_MSG(item->thread == NULL);
246 
247 	rb_insert(&queue->queue, &item->rbnode);
248 	item->queue = queue;
249 
250 	/* If there were other items already ahead of it in the queue,
251 	 * then we don't need to revisit active thread state and can
252 	 * return.
253 	 */
254 	if (rb_get_max(&queue->queue) != &item->rbnode) {
255 		goto out;
256 	}
257 
258 	/* Check the list of active (running or preempted) items, if
259 	 * there are at least an "active target" of those that are
260 	 * higher priority than the new item, then no one needs to be
261 	 * preempted and we can return.
262 	 */
263 	struct k_p4wq_work *wi;
264 	uint32_t n_beaten_by = 0, active_target = arch_num_cpus();
265 
266 	SYS_DLIST_FOR_EACH_CONTAINER(&queue->active, wi, dlnode) {
267 		/*
268 		 * item_lessthan(a, b) == true means a has lower priority than b
269 		 * !item_lessthan(a, b) counts all work items with higher or
270 		 * equal priority
271 		 */
272 		if (!item_lessthan(wi, item)) {
273 			n_beaten_by++;
274 		}
275 	}
276 
277 	if (n_beaten_by >= active_target) {
278 		/* Too many already have higher priority, not preempting */
279 		goto out;
280 	}
281 
282 	/* Grab a thread, set its priority and queue it.  If there are
283 	 * no threads available to unpend, this is a soft runtime
284 	 * error: we are breaking our promise about run order.
285 	 * Complain.
286 	 */
287 	struct k_thread *th = z_unpend_first_thread(&queue->waitq);
288 
289 	if (th == NULL) {
290 		LOG_WRN("Out of worker threads, priority guarantee violated");
291 		goto out;
292 	}
293 
294 	set_prio(th, item);
295 	z_ready_thread(th);
296 	z_reschedule(&queue->lock, k);
297 
298 	return;
299 
300 out:
301 	k_spin_unlock(&queue->lock, k);
302 }
303 
k_p4wq_cancel(struct k_p4wq * queue,struct k_p4wq_work * item)304 bool k_p4wq_cancel(struct k_p4wq *queue, struct k_p4wq_work *item)
305 {
306 	k_spinlock_key_t k = k_spin_lock(&queue->lock);
307 	bool ret = rb_contains(&queue->queue, &item->rbnode);
308 
309 	if (ret) {
310 		rb_remove(&queue->queue, &item->rbnode);
311 
312 		if (queue->done_handler) {
313 			k_spin_unlock(&queue->lock, k);
314 			queue->done_handler(item);
315 			k = k_spin_lock(&queue->lock);
316 		} else {
317 			k_sem_give(&item->done_sem);
318 		}
319 	}
320 
321 	k_spin_unlock(&queue->lock, k);
322 	return ret;
323 }
324