1 /*
2  * Copyright (c) 2024 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /*
8  * @file
9  * This file contains the main testing module that invokes all the tests.
10  */
11 
12 #include <zephyr/kernel.h>
13 #include <zephyr/timestamp.h>
14 #include "utils.h"
15 #include <zephyr/tc_util.h>
16 #include <ksched.h>
17 
18 #define TEST_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
19 #define BUSY_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
20 
21 uint32_t tm_off;
22 
23 /*
24  * Warning! Most of the created threads in this test use the same stack!
25  * This is done to reduce the memory footprint as having unique stacks
26  * for hundreds or thousands of threads would require substantial memory.
27  * We can get away with this approach as the threads sharing the same
28  * stack will not be executing, even though they will be ready to run.
29  */
30 
31 static K_THREAD_STACK_DEFINE(test_stack, TEST_STACK_SIZE);
32 
33 K_THREAD_STACK_ARRAY_DEFINE(busy_stack, CONFIG_MP_MAX_NUM_CPUS - 1, BUSY_STACK_SIZE);
34 static struct k_thread busy_thread[CONFIG_MP_MAX_NUM_CPUS - 1];
35 
36 static struct k_thread test_thread[CONFIG_BENCHMARK_NUM_THREADS];
37 
38 static uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS];
39 static uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS];
40 
41 extern void z_unready_thread(struct k_thread *thread);
42 
busy_entry(void * p1,void * p2,void * p3)43 static void busy_entry(void *p1, void *p2, void *p3)
44 {
45 	ARG_UNUSED(p1);
46 	ARG_UNUSED(p2);
47 	ARG_UNUSED(p3);
48 
49 	while (1) {
50 	}
51 }
52 
53 /**
54  * The test entry routine is not expected to execute.
55  */
test_entry(void * p1,void * p2,void * p3)56 static void test_entry(void *p1, void *p2, void *p3)
57 {
58 	ARG_UNUSED(p2);
59 	ARG_UNUSED(p3);
60 
61 	printk("Thread %u unexpectedly executed\n",
62 	       (unsigned int)(uintptr_t)p1);
63 
64 	while (1) {
65 	}
66 }
67 
start_threads(unsigned int num_threads)68 static void start_threads(unsigned int num_threads)
69 {
70 	unsigned int i;
71 	unsigned int bucket_size;
72 
73 	/* Start the busy threads to execute on the other processors */
74 
75 	for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS - 1; i++) {
76 		k_thread_create(&busy_thread[i], busy_stack[i], BUSY_STACK_SIZE,
77 				busy_entry, NULL, NULL, NULL,
78 				-1, 0, K_NO_WAIT);
79 	}
80 
81 	bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1;
82 
83 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
84 		k_thread_create(&test_thread[i], test_stack, TEST_STACK_SIZE,
85 				test_entry, (void *)(uintptr_t)i, NULL, NULL,
86 				i / bucket_size, 0, K_NO_WAIT);
87 	}
88 }
89 
cycles_reset(unsigned int num_threads)90 static void cycles_reset(unsigned int num_threads)
91 {
92 	unsigned int i;
93 
94 	for (i = 0; i < num_threads; i++) {
95 		add_cycles[i] = 0ULL;
96 		remove_cycles[i] = 0ULL;
97 	}
98 }
99 
test_decreasing_priority(unsigned int num_threads)100 static void test_decreasing_priority(unsigned int num_threads)
101 {
102 	unsigned int i;
103 	timing_t start;
104 	timing_t finish;
105 
106 	for (i = num_threads; i > 0; i--) {
107 		start = timing_counter_get();
108 		z_unready_thread(&test_thread[i - 1]);
109 		finish = timing_counter_get();
110 		remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
111 	}
112 
113 	for (i = 0; i < num_threads; i++) {
114 		start = timing_counter_get();
115 		z_ready_thread(&test_thread[i]);
116 		finish = timing_counter_get();
117 		add_cycles[i] += timing_cycles_get(&start, &finish);
118 	}
119 }
120 
test_increasing_priority(unsigned int num_threads)121 static void test_increasing_priority(unsigned int num_threads)
122 {
123 	unsigned int i;
124 	timing_t start;
125 	timing_t finish;
126 
127 	for (i = num_threads; i > 0; i--) {
128 		start = timing_counter_get();
129 		z_unready_thread(&test_thread[num_threads - i]);
130 		finish = timing_counter_get();
131 		remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
132 	}
133 
134 	for (i = num_threads; i > 0; i--) {
135 		start = timing_counter_get();
136 		z_ready_thread(&test_thread[i - 1]);
137 		finish = timing_counter_get();
138 		add_cycles[num_threads - i] += timing_cycles_get(&start, &finish);
139 	}
140 }
141 
sqrt_u64(uint64_t square)142 static uint64_t sqrt_u64(uint64_t square)
143 {
144 	if (square > 1) {
145 		uint64_t lo = sqrt_u64(square >> 2) << 1;
146 		uint64_t hi = lo + 1;
147 
148 		return ((hi * hi) > square) ? lo : hi;
149 	}
150 
151 	return square;
152 }
153 
compute_and_report_stats(unsigned int num_threads,unsigned int num_iterations,uint64_t * cycles,const char * tag,const char * str)154 static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
155 				     uint64_t *cycles, const char *tag, const char *str)
156 {
157 	uint64_t minimum = cycles[0];
158 	uint64_t maximum = cycles[0];
159 	uint64_t total = cycles[0];
160 	uint64_t average;
161 	uint64_t std_dev = 0;
162 	uint64_t tmp;
163 	uint64_t diff;
164 	unsigned int i;
165 
166 	for (i = 1; i < num_threads; i++) {
167 		if (cycles[i] > maximum) {
168 			maximum = cycles[i];
169 		}
170 
171 		if (cycles[i] < minimum) {
172 			minimum = cycles[i];
173 		}
174 
175 		total += cycles[i];
176 	}
177 
178 	minimum /= (uint64_t)num_iterations;
179 	maximum /= (uint64_t)num_iterations;
180 	average = total / (num_threads * num_iterations);
181 
182 	for (i = 0; i < num_threads; i++) {
183 		tmp = cycles[i] / num_iterations;
184 		diff = (average > tmp) ? (average - tmp) : (tmp - average);
185 
186 		std_dev += (diff * diff);
187 	}
188 	std_dev /= num_threads;
189 	std_dev = sqrt_u64(std_dev);
190 
191 #ifdef CONFIG_BENCHMARK_RECORDING
192 	int tag_len = strlen(tag);
193 	int descr_len = strlen(str);
194 	int stag_len = strlen(".stddev");
195 	int sdescr_len = strlen(", stddev.");
196 
197 	stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
198 	sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
199 
200 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
201 	       sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
202 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
203 	       sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
204 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
205 	       sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
206 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
207 	       sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
208 #else
209 	ARG_UNUSED(tag);
210 
211 	printk("------------------------------------\n");
212 	printk("%s\n", str);
213 
214 	printk("    Minimum : %7llu cycles (%7u nsec)\n", minimum,
215 	       (uint32_t)timing_cycles_to_ns(minimum));
216 	printk("    Maximum : %7llu cycles (%7u nsec)\n", maximum,
217 	       (uint32_t)timing_cycles_to_ns(maximum));
218 	printk("    Average : %7llu cycles (%7u nsec)\n", average,
219 	       (uint32_t)timing_cycles_to_ns(average));
220 	printk("    Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
221 	       (uint32_t)timing_cycles_to_ns(std_dev));
222 #endif
223 }
224 
main(void)225 int main(void)
226 {
227 	unsigned int i;
228 	unsigned int freq;
229 #ifdef CONFIG_BENCHMARK_VERBOSE
230 	char description[120];
231 	char tag[50];
232 	struct k_thread *thread;
233 #endif
234 
235 	timing_init();
236 
237 	bench_test_init();
238 
239 	freq = timing_freq_get_mhz();
240 
241 	printk("Time Measurements for %s sched queues\n",
242 	       IS_ENABLED(CONFIG_SCHED_DUMB) ? "dumb" :
243 	       IS_ENABLED(CONFIG_SCHED_SCALABLE) ? "scalable" : "multiq");
244 	printk("Timing results: Clock frequency: %u MHz\n", freq);
245 
246 	start_threads(CONFIG_BENCHMARK_NUM_THREADS);
247 
248 	timing_start();
249 
250 	cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
251 
252 	for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
253 		test_decreasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
254 	}
255 
256 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
257 				 add_cycles, "sched.add.thread.ReadyQ_tail",
258 				 "Add threads of decreasing priority");
259 
260 #ifdef CONFIG_BENCHMARK_VERBOSE
261 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
262 		snprintf(tag, sizeof(tag),
263 			 "ReadyQ.add.to.tail.%04u.waiters", i);
264 		snprintf(description, sizeof(description), "%-40s - Add thread of priority (%u)",
265 			 tag, test_thread[i].base.prio);
266 		PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
267 				CONFIG_BENCHMARK_NUM_ITERATIONS);
268 	}
269 #endif
270 
271 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
272 				 remove_cycles, "sched.remove.thread.ReadyQ_head",
273 				 "Remove threads of decreasing priority");
274 
275 #ifdef CONFIG_BENCHMARK_VERBOSE
276 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
277 		snprintf(tag, sizeof(tag),
278 			 "ReadyQ.remove.from.head.%04u.waiters", i);
279 		snprintf(description, sizeof(description),
280 			 "%-40s - Remove thread of priority %u",
281 			 tag, test_thread[i].base.prio);
282 		PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
283 				CONFIG_BENCHMARK_NUM_ITERATIONS);
284 	}
285 #endif
286 
287 	cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
288 
289 	for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
290 		test_increasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
291 	}
292 
293 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
294 				 add_cycles, "sched.add.thread.ReadyQ_head",
295 				 "Add threads of increasing priority");
296 
297 #ifdef CONFIG_BENCHMARK_VERBOSE
298 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
299 		snprintf(tag, sizeof(tag),
300 			 "ReadyQ.add.to.head.%04u.waiters", i);
301 		thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
302 		snprintf(description, sizeof(description),
303 			 "%-40s - Add priority %u to readyq",
304 			 tag, thread->base.prio);
305 		PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
306 				CONFIG_BENCHMARK_NUM_ITERATIONS);
307 	}
308 #endif
309 
310 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
311 				 remove_cycles, "sched.remove.thread.ReadyQ_tail",
312 				 "Remove threads of increasing priority");
313 
314 #ifdef CONFIG_BENCHMARK_VERBOSE
315 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
316 		snprintf(tag, sizeof(tag),
317 			"ReadyQ.remove.from.tail.%04u.waiters",
318 			CONFIG_BENCHMARK_NUM_THREADS - i);
319 		thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
320 		snprintf(description, sizeof(description),
321 			 "%-40s - Remove lowest priority from readyq (%u)",
322 			 tag, thread->base.prio);
323 		PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
324 				CONFIG_BENCHMARK_NUM_ITERATIONS);
325 	}
326 #endif
327 
328 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
329 		k_thread_abort(&test_thread[i]);
330 	}
331 
332 	timing_stop();
333 
334 	TC_END_REPORT(0);
335 
336 	return 0;
337 }
338