1 /*
2  * Copyright (c) 2021 Nordic Semiconductor ASA
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 #include <zephyr/ztress.h>
7 #include <zephyr/ztest_test.h>
8 #include <zephyr/sys/printk.h>
9 #include <zephyr/random/random.h>
10 #include <string.h>
11 
12 /* Flag set at startup which determines if stress test can run on this platform.
13  * Stress test should not run on the platform which system clock is too high
14  * compared to cpu clock. System clock is sometimes set globally for the test
15  * and for some platforms it may be unacceptable.
16  */
17 static bool cpu_sys_clock_ok;
18 
19 /* Timer used for adjusting contexts backoff time to get optimal CPU load. */
20 static void ctrl_timeout(struct k_timer *timer);
21 static K_TIMER_DEFINE(ctrl_timer, ctrl_timeout, NULL);
22 
23 /* Timer used for reporting test progress. */
24 static void progress_timeout(struct k_timer *timer);
25 static K_TIMER_DEFINE(progress_timer, progress_timeout, NULL);
26 
27 /* Timer used for higher priority context. */
28 static void ztress_timeout(struct k_timer *timer);
29 static K_TIMER_DEFINE(ztress_timer, ztress_timeout, NULL);
30 
31 /* Timer handling test timeout which ends test prematurely. */
32 static k_timeout_t timeout;
33 static void test_timeout(struct k_timer *timer);
34 static K_TIMER_DEFINE(test_timer, test_timeout, NULL);
35 
36 static atomic_t active_cnt;
37 static struct k_thread threads[CONFIG_ZTRESS_MAX_THREADS];
38 static k_tid_t tids[CONFIG_ZTRESS_MAX_THREADS];
39 
40 static uint32_t context_cnt;
41 struct ztress_context_data *tmr_data;
42 
43 static atomic_t active_mask;
44 static uint32_t preempt_cnt[CONFIG_ZTRESS_MAX_THREADS];
45 static uint32_t exec_cnt[CONFIG_ZTRESS_MAX_THREADS];
46 static k_timeout_t backoff[CONFIG_ZTRESS_MAX_THREADS];
47 static k_timeout_t init_backoff[CONFIG_ZTRESS_MAX_THREADS];
48 K_THREAD_STACK_ARRAY_DEFINE(stacks, CONFIG_ZTRESS_MAX_THREADS, CONFIG_ZTRESS_STACK_SIZE);
49 static k_tid_t idle_tid[CONFIG_MP_MAX_NUM_CPUS];
50 
51 #define THREAD_NAME(i, _) STRINGIFY(ztress_##i)
52 
53 static const char * const thread_names[] = {
54 	LISTIFY(CONFIG_ZTRESS_MAX_THREADS, THREAD_NAME, (,))
55 };
56 
57 struct ztress_runtime {
58 	uint32_t cpu_load;
59 	uint32_t cpu_load_measurements;
60 };
61 
62 static struct ztress_runtime rt;
63 
test_timeout(struct k_timer * timer)64 static void test_timeout(struct k_timer *timer)
65 {
66 	ztress_abort();
67 }
68 
69 /* Ratio is 1/16, e.g using ratio 14 reduces all timeouts by multiplying it by 14/16.
70  * 16 fraction is used to avoid dividing which may take more time on certain platforms.
71  */
adjust_load(uint8_t ratio)72 static void adjust_load(uint8_t ratio)
73 {
74 	for (uint32_t i = 0; i < context_cnt; i++) {
75 		uint32_t new_ticks = ratio * (uint32_t)backoff[i].ticks / 16;
76 
77 		backoff[i].ticks = MAX(4, new_ticks);
78 	}
79 }
80 
progress_timeout(struct k_timer * timer)81 static void progress_timeout(struct k_timer *timer)
82 {
83 	struct ztress_context_data *thread_data = k_timer_user_data_get(timer);
84 	uint32_t progress = 100;
85 	uint32_t cnt = context_cnt;
86 	uint32_t thread_data_start_index = 0;
87 
88 	if (tmr_data != NULL) {
89 		thread_data_start_index = 1;
90 		if (tmr_data->exec_cnt != 0 && exec_cnt[0] != 0) {
91 			progress = (100 * exec_cnt[0]) / tmr_data->exec_cnt;
92 		}
93 	}
94 
95 	for (uint32_t i = thread_data_start_index; i < cnt; i++) {
96 		if (thread_data[i].exec_cnt == 0 && thread_data[i].preempt_cnt == 0) {
97 			continue;
98 		}
99 
100 		uint32_t exec_progress = (thread_data[i].exec_cnt) ?
101 				(100 * exec_cnt[i]) / thread_data[i].exec_cnt : 100;
102 		uint32_t preempt_progress = (thread_data[i].preempt_cnt) ?
103 				(100 * preempt_cnt[i]) / thread_data[i].preempt_cnt : 100;
104 		uint32_t thread_progress = MIN(exec_progress, preempt_progress);
105 
106 		progress = MIN(progress, thread_progress);
107 	}
108 
109 
110 	uint64_t rem = 1000 * (k_timer_expires_ticks(&test_timer) - sys_clock_tick_get()) /
111 			CONFIG_SYS_CLOCK_TICKS_PER_SEC;
112 
113 	printk("\r%u%% remaining:%u ms", progress, (uint32_t)rem);
114 }
115 
control_load(void)116 static void control_load(void)
117 {
118 	static uint64_t prev_idle_cycles;
119 	static uint64_t total_cycles;
120 	uint64_t idle_cycles = 0;
121 	k_thread_runtime_stats_t rt_stats_all;
122 	int err = 0;
123 	unsigned int num_cpus = arch_num_cpus();
124 
125 	for (int i = 0; i < num_cpus; i++) {
126 		k_thread_runtime_stats_t thread_stats;
127 
128 		err = k_thread_runtime_stats_get(idle_tid[i], &thread_stats);
129 		if (err < 0) {
130 			return;
131 		}
132 
133 		idle_cycles += thread_stats.execution_cycles;
134 	}
135 
136 	err = k_thread_runtime_stats_all_get(&rt_stats_all);
137 	if (err < 0) {
138 		return;
139 	}
140 
141 	int load = 1000 - (1000 * (idle_cycles - prev_idle_cycles) /
142 			(rt_stats_all.execution_cycles - total_cycles));
143 
144 	prev_idle_cycles = idle_cycles;
145 	total_cycles = rt_stats_all.execution_cycles;
146 
147 	int avg_load = (rt.cpu_load * rt.cpu_load_measurements + load) /
148 			(rt.cpu_load_measurements + 1);
149 
150 	rt.cpu_load = avg_load;
151 	rt.cpu_load_measurements++;
152 
153 	if (load > 800 && load < 850) {
154 		/* Expected load */
155 	} else if (load > 850) {
156 		/* Slightly reduce load. */
157 		adjust_load(18);
158 	} else if (load < 300) {
159 		adjust_load(8);
160 	} else if (load < 500) {
161 		adjust_load(12);
162 	} else {
163 		adjust_load(14);
164 	}
165 }
166 
ctrl_timeout(struct k_timer * timer)167 static void ctrl_timeout(struct k_timer *timer)
168 {
169 	control_load();
170 }
171 
preempt_update(void)172 void preempt_update(void)
173 {
174 	uint32_t mask = active_mask;
175 
176 	while (mask) {
177 		int idx = 31 - __builtin_clz(mask);
178 
179 		/* Clear mask to ensure that other context does not count same thread. */
180 		if ((atomic_and(&active_mask, ~BIT(idx)) & BIT(idx)) != 0) {
181 			preempt_cnt[idx]++;
182 		}
183 
184 		mask &= ~BIT(idx);
185 	}
186 }
187 
cont_check(struct ztress_context_data * context_data,uint32_t priority)188 static bool cont_check(struct ztress_context_data *context_data, uint32_t priority)
189 {
190 	if (context_data->preempt_cnt != 0 && preempt_cnt[priority] >= context_data->preempt_cnt) {
191 		atomic_dec(&active_cnt);
192 		return false;
193 	}
194 
195 	if (context_data->exec_cnt != 0 && exec_cnt[priority] >= context_data->exec_cnt) {
196 		atomic_dec(&active_cnt);
197 		return false;
198 	}
199 
200 	return active_cnt > 0;
201 }
202 
randomize_t(k_timeout_t t)203 static k_timeout_t randomize_t(k_timeout_t t)
204 {
205 	if (t.ticks <= 4) {
206 		return t;
207 	}
208 
209 	uint32_t mask = BIT_MASK(31 - __builtin_clz((uint32_t)t.ticks));
210 
211 	t.ticks += (sys_rand32_get() & mask);
212 
213 	return t;
214 }
215 
microdelay(void)216 static void microdelay(void)
217 {
218 	static volatile int microdelay_cnt;
219 	uint8_t repeat = sys_rand8_get();
220 
221 	for (int i = 0; i < repeat; i++) {
222 		microdelay_cnt++;
223 	}
224 }
225 
ztress_timeout(struct k_timer * timer)226 static void ztress_timeout(struct k_timer *timer)
227 {
228 	struct ztress_context_data *context_data = k_timer_user_data_get(timer);
229 	uint32_t priority = 0;
230 	bool cont_test, cont;
231 
232 	preempt_update();
233 	cont_test = cont_check(context_data, priority);
234 	cont = context_data->handler(context_data->user_data,
235 				     exec_cnt[priority],
236 				     !cont_test,
237 				     priority);
238 	exec_cnt[priority]++;
239 
240 	if (cont == true && cont_test == true) {
241 		k_timer_start(timer, randomize_t(backoff[priority]), K_NO_WAIT);
242 	}
243 }
244 
sleep(k_timeout_t t)245 static void sleep(k_timeout_t t)
246 {
247 	if (K_TIMEOUT_EQ(t, K_NO_WAIT) == false) {
248 		t = randomize_t(t);
249 		k_sleep(t);
250 	}
251 }
252 
ztress_thread(void * data,void * prio,void * unused)253 static void ztress_thread(void *data, void *prio, void *unused)
254 {
255 	struct ztress_context_data *context_data = data;
256 	uint32_t priority = (uint32_t)(uintptr_t)prio;
257 	bool cont_test, cont;
258 
259 	do {
260 		uint32_t cnt = exec_cnt[priority];
261 
262 		preempt_update();
263 		exec_cnt[priority] = cnt + 1;
264 		cont_test = cont_check(context_data, priority);
265 		microdelay();
266 		atomic_or(&active_mask, BIT(priority));
267 		cont = context_data->handler(context_data->user_data, cnt, !cont_test, priority);
268 		atomic_and(&active_mask, ~BIT(priority));
269 
270 		sleep(backoff[priority]);
271 	} while (cont == true && cont_test == true);
272 }
273 
thread_cb(const struct k_thread * cthread,void * user_data)274 static void thread_cb(const struct k_thread *cthread, void *user_data)
275 {
276 #define GET_IDLE_TID(i, tid) do {\
277 	if (strcmp(tname, (CONFIG_MP_MAX_NUM_CPUS == 1) ? "idle" : "idle 0" STRINGIFY(i)) == 0) { \
278 		idle_tid[i] = tid; \
279 	} \
280 } while (0)
281 
282 	const char *tname = k_thread_name_get((struct k_thread *)cthread);
283 
284 	LISTIFY(CONFIG_MP_MAX_NUM_CPUS, GET_IDLE_TID, (;), (k_tid_t)cthread);
285 }
286 
ztress_init(struct ztress_context_data * thread_data)287 static void ztress_init(struct ztress_context_data *thread_data)
288 {
289 	memset(exec_cnt, 0, sizeof(exec_cnt));
290 	memset(preempt_cnt, 0, sizeof(preempt_cnt));
291 	memset(&rt, 0, sizeof(rt));
292 	k_thread_foreach(thread_cb, NULL);
293 	k_msleep(10);
294 
295 	k_timer_start(&ctrl_timer, K_MSEC(100), K_MSEC(100));
296 	k_timer_user_data_set(&progress_timer, thread_data);
297 	k_timer_start(&progress_timer,
298 		      K_MSEC(CONFIG_ZTRESS_REPORT_PROGRESS_MS),
299 		      K_MSEC(CONFIG_ZTRESS_REPORT_PROGRESS_MS));
300 	if (K_TIMEOUT_EQ(timeout, K_NO_WAIT) == false) {
301 		k_timer_start(&test_timer, timeout, K_NO_WAIT);
302 	}
303 }
304 
ztress_end(int old_prio)305 static void ztress_end(int old_prio)
306 {
307 	k_timer_stop(&ctrl_timer);
308 	k_timer_stop(&progress_timer);
309 	k_timer_stop(&test_timer);
310 	k_thread_priority_set(k_current_get(), old_prio);
311 }
312 
active_cnt_init(struct ztress_context_data * data)313 static void active_cnt_init(struct ztress_context_data *data)
314 {
315 	if (data->preempt_cnt != 0 || data->exec_cnt != 0) {
316 		active_cnt++;
317 	}
318 }
319 
ztress_execute(struct ztress_context_data * timer_data,struct ztress_context_data * thread_data,size_t cnt)320 int ztress_execute(struct ztress_context_data *timer_data,
321 		   struct ztress_context_data *thread_data,
322 		   size_t cnt)
323 {
324 	/* Start control timer. */
325 	int old_prio = k_thread_priority_get(k_current_get());
326 	int priority, ztress_prio = 0;
327 
328 	if ((cnt + (timer_data ? 1 : 0)) > CONFIG_ZTRESS_MAX_THREADS) {
329 		return -EINVAL;
330 	}
331 
332 	if (cnt + 2 > CONFIG_NUM_PREEMPT_PRIORITIES) {
333 		return -EINVAL;
334 	}
335 
336 	/* Skip test if system clock is set too high compared to CPU frequency.
337 	 * It can happen when system clock is set globally for the test which is
338 	 * run on various platforms.
339 	 */
340 	if (!cpu_sys_clock_ok) {
341 		ztest_test_skip();
342 	}
343 
344 	ztress_init(thread_data);
345 
346 	context_cnt = cnt + (timer_data ? 1 : 0);
347 	priority = K_LOWEST_APPLICATION_THREAD_PRIO - cnt - 1;
348 
349 	k_thread_priority_set(k_current_get(), priority);
350 	priority++;
351 
352 	tmr_data = timer_data;
353 
354 	if (timer_data != NULL) {
355 		active_cnt_init(timer_data);
356 		backoff[ztress_prio] = timer_data->t;
357 		init_backoff[ztress_prio] = timer_data->t;
358 		k_timer_user_data_set(&ztress_timer, timer_data);
359 		ztress_prio++;
360 	}
361 
362 	for (int i = 0; i < cnt; i++) {
363 		active_cnt_init(&thread_data[i]);
364 		backoff[ztress_prio] = thread_data[i].t;
365 		init_backoff[ztress_prio] = thread_data[i].t;
366 		tids[i] = k_thread_create(&threads[i], stacks[i], CONFIG_ZTRESS_STACK_SIZE,
367 					  ztress_thread,
368 					  &thread_data[i], (void *)(uintptr_t)ztress_prio, NULL,
369 					  priority, 0, K_MSEC(10));
370 		(void)k_thread_name_set(tids[i], thread_names[i]);
371 		priority++;
372 		ztress_prio++;
373 	}
374 
375 	if (timer_data != NULL) {
376 		k_timer_start(&ztress_timer, K_MSEC(10), K_NO_WAIT);
377 	}
378 
379 
380 	/* Wait until all threads complete. */
381 	for (int i = 0; i < cnt; i++) {
382 		k_thread_join(tids[i], K_FOREVER);
383 	}
384 
385 	/* Abort to stop timer. */
386 	if (timer_data != NULL) {
387 		ztress_abort();
388 		(void)k_timer_status_sync(&ztress_timer);
389 	}
390 
391 	/* print report */
392 	ztress_report();
393 
394 	ztress_end(old_prio);
395 
396 	return 0;
397 }
398 
ztress_abort(void)399 void ztress_abort(void)
400 {
401 	atomic_set(&active_cnt, 0);
402 }
403 
ztress_set_timeout(k_timeout_t t)404 void ztress_set_timeout(k_timeout_t t)
405 {
406 	timeout = t;
407 }
408 
ztress_report(void)409 void ztress_report(void)
410 {
411 	printk("\nZtress execution report:\n");
412 	for (uint32_t i = 0; i < context_cnt; i++) {
413 		printk("\t context %u:\n\t\t - executed:%u, preempted:%u\n",
414 			i, exec_cnt[i], preempt_cnt[i]);
415 		printk("\t\t - ticks initial:%u, optimized:%u\n",
416 			(uint32_t)init_backoff[i].ticks, (uint32_t)backoff[i].ticks);
417 	}
418 
419 	printk("\tAverage CPU load:%u%%, measurements:%u\n",
420 			rt.cpu_load / 10, rt.cpu_load_measurements);
421 }
422 
ztress_exec_count(uint32_t id)423 int ztress_exec_count(uint32_t id)
424 {
425 	if (id >= context_cnt) {
426 		return -EINVAL;
427 	}
428 
429 	return exec_cnt[id];
430 }
431 
ztress_preempt_count(uint32_t id)432 int ztress_preempt_count(uint32_t id)
433 {
434 	if (id >= context_cnt) {
435 		return -EINVAL;
436 	}
437 
438 	return preempt_cnt[id];
439 }
440 
ztress_optimized_ticks(uint32_t id)441 uint32_t ztress_optimized_ticks(uint32_t id)
442 {
443 	if (id >= context_cnt) {
444 		return -EINVAL;
445 	}
446 
447 	return backoff[id].ticks;
448 }
449 
450 /* Doing it here and not before each test because test may have some additional
451  * cpu load (e.g. busy simulator) running that would influence the result.
452  *
453  */
ztress_cpu_clock_to_sys_clock_check(void)454 static int ztress_cpu_clock_to_sys_clock_check(void)
455 {
456 	static volatile int cnt = 2000;
457 	uint32_t t = sys_clock_tick_get_32();
458 
459 	while (cnt-- > 0) {
460 		/* empty */
461 	}
462 
463 	t = sys_clock_tick_get_32() - t;
464 	/* Threshold is arbitrary. Derived from nRF platform where CPU runs
465 	 * at 64MHz and system clock at 32kHz (sys clock interrupt every 1950
466 	 * cycles). That ratio is ok even for no optimization case.
467 	 * If some valid platforms are cut because of that, it can be changed.
468 	 */
469 	cpu_sys_clock_ok = t <= 12;
470 
471 	/* Read first random number. There are some generators which do not support
472 	 * reading first random number from an interrupt context (initialization
473 	 * is performed at the first read).
474 	 */
475 	(void)sys_rand32_get();
476 
477 	return 0;
478 }
479 
480 SYS_INIT(ztress_cpu_clock_to_sys_clock_check, POST_KERNEL, CONFIG_KERNEL_INIT_PRIORITY_DEVICE);
481