1 /*
2 * Copyright (c) 2024 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 /*
8 * @file
9 * This file contains tests that will measure the length of time required
10 * to add and remove threads from a wait queue that holds a varying number of
11 * threads. Each thread added to (and removed from) the wait queue is a dummy
12 * thread. As these dummy threads are inherently non-executable, this helps
13 * prevent the addition/removal of threads to/from the ready queue from being
14 * included in these measurements. Furthermore, the use of dummy threads helps
15 * reduce the memory footprint as not only are thread stacks not required,
16 * but we also do not need the full k_thread structure for each of these
17 * dummy threads.
18 */
19
20 #include <zephyr/kernel.h>
21 #include <zephyr/timestamp.h>
22 #include <zephyr/timing/timing.h>
23 #include "utils.h"
24 #include <zephyr/tc_util.h>
25 #include <wait_q.h>
26 #include <ksched.h>
27 #include <stdio.h>
28
29 uint32_t tm_off;
30
31 static struct _thread_base dummy_thread[CONFIG_BENCHMARK_NUM_THREADS];
32 static _wait_q_t wait_q;
33
34 uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS];
35 uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS];
36
37 /**
38 * Initialize each dummy thread.
39 */
dummy_threads_init(unsigned int num_threads)40 static void dummy_threads_init(unsigned int num_threads)
41 {
42 unsigned int i;
43 unsigned int bucket_size;
44
45 bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1;
46
47 for (i = 0; i < num_threads; i++) {
48 z_init_thread_base(&dummy_thread[i], i / bucket_size,
49 _THREAD_DUMMY, 0);
50 }
51 }
52
cycles_reset(unsigned int num_threads)53 static void cycles_reset(unsigned int num_threads)
54 {
55 unsigned int i;
56
57 for (i = 0; i < num_threads; i++) {
58 add_cycles[i] = 0ULL;
59 remove_cycles[i] = 0ULL;
60 }
61 }
62
63 /**
64 * Each successive dummy thread added to the wait queue is either of the
65 * same or lower priority. Each dummy thread removed from the wait queue
66 * is of the same or lower priority than the one previous.
67 */
test_decreasing_priority(_wait_q_t * q,unsigned int num_threads)68 static void test_decreasing_priority(_wait_q_t *q, unsigned int num_threads)
69 {
70 unsigned int i;
71 timing_t start;
72 timing_t finish;
73
74 /* Add to tail of wait queue */
75
76 for (i = 0; i < num_threads; i++) {
77 start = timing_counter_get();
78 z_pend_thread((struct k_thread *)&dummy_thread[i],
79 q, K_FOREVER);
80 finish = timing_counter_get();
81
82 add_cycles[i] += timing_cycles_get(&start, &finish);
83 }
84
85 /* Remove from head of wait queue */
86
87 for (i = 0; i < num_threads; i++) {
88 start = timing_counter_get();
89 z_unpend_thread((struct k_thread *)&dummy_thread[i]);
90 finish = timing_counter_get();
91
92 remove_cycles[i] += timing_cycles_get(&start, &finish);
93 }
94 }
95
test_increasing_priority(_wait_q_t * q,unsigned int num_threads)96 static void test_increasing_priority(_wait_q_t *q, unsigned int num_threads)
97 {
98 unsigned int i;
99 timing_t start;
100 timing_t finish;
101 struct k_thread *thread;
102
103 /* Add to head of wait queue */
104
105 for (i = 0; i < num_threads; i++) {
106 start = timing_counter_get();
107 thread = (struct k_thread *)
108 &dummy_thread[num_threads - i - 1];
109 z_pend_thread(thread, q, K_FOREVER);
110 finish = timing_counter_get();
111
112 add_cycles[i] += timing_cycles_get(&start, &finish);
113 }
114
115 /* Remove from tail of wait queue */
116
117 for (i = 0; i < num_threads; i++) {
118 start = timing_counter_get();
119 thread = (struct k_thread *)
120 &dummy_thread[num_threads - i - 1];
121 z_unpend_thread(thread);
122 finish = timing_counter_get();
123
124 remove_cycles[i] += timing_cycles_get(&start, &finish);
125 }
126 }
127
128
sqrt_u64(uint64_t square)129 static uint64_t sqrt_u64(uint64_t square)
130 {
131 if (square > 1) {
132 uint64_t lo = sqrt_u64(square >> 2) << 1;
133 uint64_t hi = lo + 1;
134
135 return ((hi * hi) > square) ? lo : hi;
136 }
137
138 return square;
139 }
140
compute_and_report_stats(unsigned int num_threads,unsigned int num_iterations,uint64_t * cycles,const char * tag,const char * str)141 static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
142 uint64_t *cycles, const char *tag, const char *str)
143 {
144 uint64_t minimum = cycles[0];
145 uint64_t maximum = cycles[0];
146 uint64_t total = cycles[0];
147 uint64_t average;
148 uint64_t std_dev = 0;
149 uint64_t tmp;
150 uint64_t diff;
151 unsigned int i;
152
153 for (i = 1; i < num_threads; i++) {
154 if (cycles[i] > maximum) {
155 maximum = cycles[i];
156 }
157
158 if (cycles[i] < minimum) {
159 minimum = cycles[i];
160 }
161
162 total += cycles[i];
163 }
164
165 minimum /= (uint64_t)num_iterations;
166 maximum /= (uint64_t)num_iterations;
167 average = total / (num_threads * num_iterations);
168
169 /* Calculate standard deviation */
170
171 for (i = 0; i < num_threads; i++) {
172 tmp = cycles[i] / num_iterations;
173 diff = (average > tmp) ? (average - tmp) : (tmp - average);
174
175 std_dev += (diff * diff);
176 }
177 std_dev /= num_threads;
178 std_dev = sqrt_u64(std_dev);
179
180 #ifdef CONFIG_BENCHMARK_RECORDING
181 int tag_len = strlen(tag);
182 int descr_len = strlen(str);
183 int stag_len = strlen(".stddev");
184 int sdescr_len = strlen(", stddev.");
185
186 stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
187 sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
188
189 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
190 sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
191 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
192 sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
193 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
194 sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
195 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
196 sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
197 #else
198 ARG_UNUSED(tag);
199
200 printk("------------------------------------\n");
201 printk("%s\n", str);
202
203 printk(" Minimum : %7llu cycles (%7u nsec)\n", minimum,
204 (uint32_t)timing_cycles_to_ns(minimum));
205 printk(" Maximum : %7llu cycles (%7u nsec)\n", maximum,
206 (uint32_t)timing_cycles_to_ns(maximum));
207 printk(" Average : %7llu cycles (%7u nsec)\n", average,
208 (uint32_t)timing_cycles_to_ns(average));
209 printk(" Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
210 (uint32_t)timing_cycles_to_ns(std_dev));
211 #endif
212 }
213
main(void)214 int main(void)
215 {
216 unsigned int i;
217 unsigned int freq;
218 #ifdef CONFIG_BENCHMARK_VERBOSE
219 char description[120];
220 char tag[50];
221 struct k_thread *thread;
222 #endif
223
224 timing_init();
225
226 bench_test_init();
227
228 freq = timing_freq_get_mhz();
229
230 printk("Time Measurements for %s wait queues\n",
231 IS_ENABLED(CONFIG_WAITQ_DUMB) ? "dumb" : "scalable");
232 printk("Timing results: Clock frequency: %u MHz\n", freq);
233
234 z_waitq_init(&wait_q);
235
236 dummy_threads_init(CONFIG_BENCHMARK_NUM_THREADS);
237
238 timing_start();
239
240 cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
241
242 for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
243 test_decreasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS);
244 }
245
246 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
247 add_cycles, "sched.add.thread.WaitQ_tail",
248 "Add threads of decreasing priority");
249
250 #ifdef CONFIG_BENCHMARK_VERBOSE
251 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
252 snprintf(tag, sizeof(tag),
253 "WaitQ.add.to.tail.%04u.waiters", i);
254 snprintf(description, sizeof(description), "%-40s - Add thread of priority %u", tag,
255 dummy_thread[i].prio);
256 PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
257 CONFIG_BENCHMARK_NUM_ITERATIONS);
258 }
259 #endif
260
261 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
262 remove_cycles, "sched.remove.thread.WaitQ_head",
263 "Remove threads of decreasing priority");
264
265 #ifdef CONFIG_BENCHMARK_VERBOSE
266 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
267 snprintf(tag, sizeof(tag),
268 "WaitQ.remove.from.head.%04u.waiters",
269 CONFIG_BENCHMARK_NUM_THREADS - i);
270 snprintf(description, sizeof(description),
271 "%-40s - Remove thread of priority %u",
272 tag, dummy_thread[i].prio);
273 PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
274 CONFIG_BENCHMARK_NUM_ITERATIONS);
275 }
276 #endif
277
278 cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
279
280 for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
281 test_increasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS);
282 }
283
284 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
285 add_cycles, "sched.add.thread.WaitQ_head",
286 "Add threads of increasing priority");
287
288 #ifdef CONFIG_BENCHMARK_VERBOSE
289 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
290 snprintf(tag, sizeof(tag),
291 "WaitQ.add.to.head.%04u.waiters", i);
292 thread = (struct k_thread *)
293 &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
294 snprintf(description, sizeof(description),
295 "%-40s - Add priority %u to waitq",
296 tag, thread->base.prio);
297 PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
298 CONFIG_BENCHMARK_NUM_ITERATIONS);
299 }
300 #endif
301
302 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
303 remove_cycles, "sched.remove.thread.WaitQ_tail",
304 "Remove threads of increasing priority");
305
306 #ifdef CONFIG_BENCHMARK_VERBOSE
307 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
308 snprintf(tag, sizeof(tag),
309 "WaitQ.remove.from.tail.%04u.waiters",
310 CONFIG_BENCHMARK_NUM_THREADS - i);
311 thread = (struct k_thread *)
312 &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
313 snprintf(description, sizeof(description),
314 "%-40s - Remove priority %u from waitq",
315 tag, thread->base.prio);
316 PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
317 CONFIG_BENCHMARK_NUM_ITERATIONS);
318 }
319 #endif
320
321 timing_stop();
322
323 TC_END_REPORT(0);
324
325 return 0;
326 }
327