1 /*
2  * Copyright (c) 2024 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /*
8  * @file
9  * This file contains tests that will measure the length of time required
10  * to add and remove threads from a wait queue that holds a varying number of
11  * threads. Each thread added to (and removed from) the wait queue is a dummy
12  * thread. As these dummy threads are inherently non-executable, this helps
13  * prevent the addition/removal of threads to/from the ready queue from being
14  * included in these measurements. Furthermore, the use of dummy threads helps
15  * reduce the memory footprint as not only are thread stacks not required,
16  * but we also do not need the full k_thread structure for each of these
17  * dummy threads.
18  */
19 
20 #include <zephyr/kernel.h>
21 #include <zephyr/timestamp.h>
22 #include <zephyr/timing/timing.h>
23 #include "utils.h"
24 #include <zephyr/tc_util.h>
25 #include <wait_q.h>
26 #include <ksched.h>
27 #include <stdio.h>
28 
29 uint32_t tm_off;
30 
31 static struct _thread_base dummy_thread[CONFIG_BENCHMARK_NUM_THREADS];
32 static _wait_q_t wait_q;
33 
34 uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS];
35 uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS];
36 
37 /**
38  * Initialize each dummy thread.
39  */
dummy_threads_init(unsigned int num_threads)40 static void dummy_threads_init(unsigned int num_threads)
41 {
42 	unsigned int i;
43 	unsigned int bucket_size;
44 
45 	bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1;
46 
47 	for (i = 0; i < num_threads; i++) {
48 		z_init_thread_base(&dummy_thread[i], i / bucket_size,
49 				   _THREAD_DUMMY, 0);
50 	}
51 }
52 
cycles_reset(unsigned int num_threads)53 static void cycles_reset(unsigned int num_threads)
54 {
55 	unsigned int i;
56 
57 	for (i = 0; i < num_threads; i++) {
58 		add_cycles[i] = 0ULL;
59 		remove_cycles[i] = 0ULL;
60 	}
61 }
62 
63 /**
64  * Each successive dummy thread added to the wait queue is either of the
65  * same or lower priority. Each dummy thread removed from the wait queue
66  * is of the same or lower priority than the one previous.
67  */
test_decreasing_priority(_wait_q_t * q,unsigned int num_threads)68 static void test_decreasing_priority(_wait_q_t *q, unsigned int num_threads)
69 {
70 	unsigned int i;
71 	timing_t start;
72 	timing_t finish;
73 
74 	/* Add to tail of wait queue */
75 
76 	for (i = 0; i < num_threads; i++) {
77 		start = timing_counter_get();
78 		z_pend_thread((struct k_thread *)&dummy_thread[i],
79 			      q, K_FOREVER);
80 		finish = timing_counter_get();
81 
82 		add_cycles[i] += timing_cycles_get(&start, &finish);
83 	}
84 
85 	/* Remove from head of wait queue */
86 
87 	for (i = 0; i < num_threads; i++) {
88 		start = timing_counter_get();
89 		z_unpend_thread((struct k_thread *)&dummy_thread[i]);
90 		finish = timing_counter_get();
91 
92 		remove_cycles[i] += timing_cycles_get(&start, &finish);
93 	}
94 }
95 
test_increasing_priority(_wait_q_t * q,unsigned int num_threads)96 static void test_increasing_priority(_wait_q_t *q, unsigned int num_threads)
97 {
98 	unsigned int i;
99 	timing_t start;
100 	timing_t finish;
101 	struct k_thread *thread;
102 
103 	/* Add to head of wait queue */
104 
105 	for (i = 0; i < num_threads; i++) {
106 		start = timing_counter_get();
107 		thread = (struct k_thread *)
108 			 &dummy_thread[num_threads - i - 1];
109 		z_pend_thread(thread, q, K_FOREVER);
110 		finish = timing_counter_get();
111 
112 		add_cycles[i] += timing_cycles_get(&start, &finish);
113 	}
114 
115 	/* Remove from tail of wait queue */
116 
117 	for (i = 0; i < num_threads; i++) {
118 		start = timing_counter_get();
119 		thread = (struct k_thread *)
120 			 &dummy_thread[num_threads - i - 1];
121 		z_unpend_thread(thread);
122 		finish = timing_counter_get();
123 
124 		remove_cycles[i] += timing_cycles_get(&start, &finish);
125 	}
126 }
127 
128 
sqrt_u64(uint64_t square)129 static uint64_t sqrt_u64(uint64_t square)
130 {
131 	if (square > 1) {
132 		uint64_t lo = sqrt_u64(square >> 2) << 1;
133 		uint64_t hi = lo + 1;
134 
135 		return ((hi * hi) > square) ? lo : hi;
136 	}
137 
138 	return square;
139 }
140 
compute_and_report_stats(unsigned int num_threads,unsigned int num_iterations,uint64_t * cycles,const char * tag,const char * str)141 static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
142 				     uint64_t *cycles, const char *tag, const char *str)
143 {
144 	uint64_t minimum = cycles[0];
145 	uint64_t maximum = cycles[0];
146 	uint64_t total = cycles[0];
147 	uint64_t average;
148 	uint64_t std_dev = 0;
149 	uint64_t tmp;
150 	uint64_t diff;
151 	unsigned int i;
152 
153 	for (i = 1; i < num_threads; i++) {
154 		if (cycles[i] > maximum) {
155 			maximum = cycles[i];
156 		}
157 
158 		if (cycles[i] < minimum) {
159 			minimum = cycles[i];
160 		}
161 
162 		total += cycles[i];
163 	}
164 
165 	minimum /= (uint64_t)num_iterations;
166 	maximum /= (uint64_t)num_iterations;
167 	average = total / (num_threads * num_iterations);
168 
169 	/* Calculate standard deviation */
170 
171 	for (i = 0; i < num_threads; i++) {
172 		tmp = cycles[i] / num_iterations;
173 		diff = (average > tmp) ? (average - tmp) : (tmp - average);
174 
175 		std_dev += (diff * diff);
176 	}
177 	std_dev /= num_threads;
178 	std_dev = sqrt_u64(std_dev);
179 
180 #ifdef CONFIG_BENCHMARK_RECORDING
181 	int tag_len = strlen(tag);
182 	int descr_len = strlen(str);
183 	int stag_len = strlen(".stddev");
184 	int sdescr_len = strlen(", stddev.");
185 
186 	stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
187 	sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
188 
189 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
190 	       sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
191 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
192 	       sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
193 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
194 	       sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
195 	printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
196 	       sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
197 #else
198 	ARG_UNUSED(tag);
199 
200 	printk("------------------------------------\n");
201 	printk("%s\n", str);
202 
203 	printk("    Minimum : %7llu cycles (%7u nsec)\n", minimum,
204 	       (uint32_t)timing_cycles_to_ns(minimum));
205 	printk("    Maximum : %7llu cycles (%7u nsec)\n", maximum,
206 	       (uint32_t)timing_cycles_to_ns(maximum));
207 	printk("    Average : %7llu cycles (%7u nsec)\n", average,
208 	       (uint32_t)timing_cycles_to_ns(average));
209 	printk("    Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
210 	       (uint32_t)timing_cycles_to_ns(std_dev));
211 #endif
212 }
213 
main(void)214 int main(void)
215 {
216 	unsigned int i;
217 	unsigned int freq;
218 #ifdef CONFIG_BENCHMARK_VERBOSE
219 	char description[120];
220 	char tag[50];
221 	struct k_thread *thread;
222 #endif
223 
224 	timing_init();
225 
226 	bench_test_init();
227 
228 	freq = timing_freq_get_mhz();
229 
230 	printk("Time Measurements for %s wait queues\n",
231 	       IS_ENABLED(CONFIG_WAITQ_DUMB) ? "dumb" : "scalable");
232 	printk("Timing results: Clock frequency: %u MHz\n", freq);
233 
234 	z_waitq_init(&wait_q);
235 
236 	dummy_threads_init(CONFIG_BENCHMARK_NUM_THREADS);
237 
238 	timing_start();
239 
240 	cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
241 
242 	for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
243 		test_decreasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS);
244 	}
245 
246 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
247 				 add_cycles, "sched.add.thread.WaitQ_tail",
248 				 "Add threads of decreasing priority");
249 
250 #ifdef CONFIG_BENCHMARK_VERBOSE
251 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
252 		snprintf(tag, sizeof(tag),
253 			 "WaitQ.add.to.tail.%04u.waiters", i);
254 		snprintf(description, sizeof(description), "%-40s - Add thread of priority %u", tag,
255 			 dummy_thread[i].prio);
256 		PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
257 				CONFIG_BENCHMARK_NUM_ITERATIONS);
258 	}
259 #endif
260 
261 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
262 				 remove_cycles, "sched.remove.thread.WaitQ_head",
263 				 "Remove threads of decreasing priority");
264 
265 #ifdef CONFIG_BENCHMARK_VERBOSE
266 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
267 		snprintf(tag, sizeof(tag),
268 			 "WaitQ.remove.from.head.%04u.waiters",
269 			 CONFIG_BENCHMARK_NUM_THREADS - i);
270 		snprintf(description, sizeof(description),
271 			 "%-40s - Remove thread of priority %u",
272 			 tag, dummy_thread[i].prio);
273 		PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
274 				CONFIG_BENCHMARK_NUM_ITERATIONS);
275 	}
276 #endif
277 
278 	cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
279 
280 	for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
281 		test_increasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS);
282 	}
283 
284 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
285 				 add_cycles, "sched.add.thread.WaitQ_head",
286 				 "Add threads of increasing priority");
287 
288 #ifdef CONFIG_BENCHMARK_VERBOSE
289 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
290 		snprintf(tag, sizeof(tag),
291 			 "WaitQ.add.to.head.%04u.waiters", i);
292 		thread = (struct k_thread *)
293 			 &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
294 		snprintf(description, sizeof(description),
295 			 "%-40s - Add priority %u to waitq",
296 			 tag, thread->base.prio);
297 		PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
298 				CONFIG_BENCHMARK_NUM_ITERATIONS);
299 	}
300 #endif
301 
302 	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
303 				 remove_cycles, "sched.remove.thread.WaitQ_tail",
304 				 "Remove threads of increasing priority");
305 
306 #ifdef CONFIG_BENCHMARK_VERBOSE
307 	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
308 		snprintf(tag, sizeof(tag),
309 			"WaitQ.remove.from.tail.%04u.waiters",
310 			CONFIG_BENCHMARK_NUM_THREADS - i);
311 		thread = (struct k_thread *)
312 			 &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
313 		snprintf(description, sizeof(description),
314 			 "%-40s - Remove priority %u from waitq",
315 			 tag, thread->base.prio);
316 		PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
317 				CONFIG_BENCHMARK_NUM_ITERATIONS);
318 	}
319 #endif
320 
321 	timing_stop();
322 
323 	TC_END_REPORT(0);
324 
325 	return 0;
326 }
327