1 /*
2 * Copyright (c) 2024 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 /*
8 * @file
9 * This file contains the main testing module that invokes all the tests.
10 */
11
12 #include <zephyr/kernel.h>
13 #include <zephyr/timestamp.h>
14 #include "utils.h"
15 #include <zephyr/tc_util.h>
16 #include <ksched.h>
17
18 #define TEST_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
19 #define BUSY_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
20
21 uint32_t tm_off;
22
23 /*
24 * Warning! Most of the created threads in this test use the same stack!
25 * This is done to reduce the memory footprint as having unique stacks
26 * for hundreds or thousands of threads would require substantial memory.
27 * We can get away with this approach as the threads sharing the same
28 * stack will not be executing, even though they will be ready to run.
29 */
30
31 static K_THREAD_STACK_DEFINE(test_stack, TEST_STACK_SIZE);
32
33 K_THREAD_STACK_ARRAY_DEFINE(busy_stack, CONFIG_MP_MAX_NUM_CPUS - 1, BUSY_STACK_SIZE);
34 static struct k_thread busy_thread[CONFIG_MP_MAX_NUM_CPUS - 1];
35
36 static struct k_thread test_thread[CONFIG_BENCHMARK_NUM_THREADS];
37
38 static uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS];
39 static uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS];
40
41 extern void z_unready_thread(struct k_thread *thread);
42
busy_entry(void * p1,void * p2,void * p3)43 static void busy_entry(void *p1, void *p2, void *p3)
44 {
45 ARG_UNUSED(p1);
46 ARG_UNUSED(p2);
47 ARG_UNUSED(p3);
48
49 while (1) {
50 }
51 }
52
53 /**
54 * The test entry routine is not expected to execute.
55 */
test_entry(void * p1,void * p2,void * p3)56 static void test_entry(void *p1, void *p2, void *p3)
57 {
58 ARG_UNUSED(p2);
59 ARG_UNUSED(p3);
60
61 printk("Thread %u unexpectedly executed\n",
62 (unsigned int)(uintptr_t)p1);
63
64 while (1) {
65 }
66 }
67
start_threads(unsigned int num_threads)68 static void start_threads(unsigned int num_threads)
69 {
70 unsigned int i;
71 unsigned int bucket_size;
72
73 /* Start the busy threads to execute on the other processors */
74
75 for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS - 1; i++) {
76 k_thread_create(&busy_thread[i], busy_stack[i], BUSY_STACK_SIZE,
77 busy_entry, NULL, NULL, NULL,
78 -1, 0, K_NO_WAIT);
79 }
80
81 bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1;
82
83 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
84 k_thread_create(&test_thread[i], test_stack, TEST_STACK_SIZE,
85 test_entry, (void *)(uintptr_t)i, NULL, NULL,
86 i / bucket_size, 0, K_NO_WAIT);
87 }
88 }
89
cycles_reset(unsigned int num_threads)90 static void cycles_reset(unsigned int num_threads)
91 {
92 unsigned int i;
93
94 for (i = 0; i < num_threads; i++) {
95 add_cycles[i] = 0ULL;
96 remove_cycles[i] = 0ULL;
97 }
98 }
99
test_decreasing_priority(unsigned int num_threads)100 static void test_decreasing_priority(unsigned int num_threads)
101 {
102 unsigned int i;
103 timing_t start;
104 timing_t finish;
105
106 for (i = num_threads; i > 0; i--) {
107 start = timing_counter_get();
108 z_unready_thread(&test_thread[i - 1]);
109 finish = timing_counter_get();
110 remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
111 }
112
113 for (i = 0; i < num_threads; i++) {
114 start = timing_counter_get();
115 z_ready_thread(&test_thread[i]);
116 finish = timing_counter_get();
117 add_cycles[i] += timing_cycles_get(&start, &finish);
118 }
119 }
120
test_increasing_priority(unsigned int num_threads)121 static void test_increasing_priority(unsigned int num_threads)
122 {
123 unsigned int i;
124 timing_t start;
125 timing_t finish;
126
127 for (i = num_threads; i > 0; i--) {
128 start = timing_counter_get();
129 z_unready_thread(&test_thread[num_threads - i]);
130 finish = timing_counter_get();
131 remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
132 }
133
134 for (i = num_threads; i > 0; i--) {
135 start = timing_counter_get();
136 z_ready_thread(&test_thread[i - 1]);
137 finish = timing_counter_get();
138 add_cycles[num_threads - i] += timing_cycles_get(&start, &finish);
139 }
140 }
141
sqrt_u64(uint64_t square)142 static uint64_t sqrt_u64(uint64_t square)
143 {
144 if (square > 1) {
145 uint64_t lo = sqrt_u64(square >> 2) << 1;
146 uint64_t hi = lo + 1;
147
148 return ((hi * hi) > square) ? lo : hi;
149 }
150
151 return square;
152 }
153
compute_and_report_stats(unsigned int num_threads,unsigned int num_iterations,uint64_t * cycles,const char * tag,const char * str)154 static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
155 uint64_t *cycles, const char *tag, const char *str)
156 {
157 uint64_t minimum = cycles[0];
158 uint64_t maximum = cycles[0];
159 uint64_t total = cycles[0];
160 uint64_t average;
161 uint64_t std_dev = 0;
162 uint64_t tmp;
163 uint64_t diff;
164 unsigned int i;
165
166 for (i = 1; i < num_threads; i++) {
167 if (cycles[i] > maximum) {
168 maximum = cycles[i];
169 }
170
171 if (cycles[i] < minimum) {
172 minimum = cycles[i];
173 }
174
175 total += cycles[i];
176 }
177
178 minimum /= (uint64_t)num_iterations;
179 maximum /= (uint64_t)num_iterations;
180 average = total / (num_threads * num_iterations);
181
182 for (i = 0; i < num_threads; i++) {
183 tmp = cycles[i] / num_iterations;
184 diff = (average > tmp) ? (average - tmp) : (tmp - average);
185
186 std_dev += (diff * diff);
187 }
188 std_dev /= num_threads;
189 std_dev = sqrt_u64(std_dev);
190
191 #ifdef CONFIG_BENCHMARK_RECORDING
192 int tag_len = strlen(tag);
193 int descr_len = strlen(str);
194 int stag_len = strlen(".stddev");
195 int sdescr_len = strlen(", stddev.");
196
197 stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
198 sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
199
200 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
201 sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
202 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
203 sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
204 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
205 sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
206 printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
207 sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
208 #else
209 ARG_UNUSED(tag);
210
211 printk("------------------------------------\n");
212 printk("%s\n", str);
213
214 printk(" Minimum : %7llu cycles (%7u nsec)\n", minimum,
215 (uint32_t)timing_cycles_to_ns(minimum));
216 printk(" Maximum : %7llu cycles (%7u nsec)\n", maximum,
217 (uint32_t)timing_cycles_to_ns(maximum));
218 printk(" Average : %7llu cycles (%7u nsec)\n", average,
219 (uint32_t)timing_cycles_to_ns(average));
220 printk(" Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
221 (uint32_t)timing_cycles_to_ns(std_dev));
222 #endif
223 }
224
main(void)225 int main(void)
226 {
227 unsigned int i;
228 unsigned int freq;
229 #ifdef CONFIG_BENCHMARK_VERBOSE
230 char description[120];
231 char tag[50];
232 struct k_thread *thread;
233 #endif
234
235 timing_init();
236
237 bench_test_init();
238
239 freq = timing_freq_get_mhz();
240
241 printk("Time Measurements for %s sched queues\n",
242 IS_ENABLED(CONFIG_SCHED_DUMB) ? "dumb" :
243 IS_ENABLED(CONFIG_SCHED_SCALABLE) ? "scalable" : "multiq");
244 printk("Timing results: Clock frequency: %u MHz\n", freq);
245
246 start_threads(CONFIG_BENCHMARK_NUM_THREADS);
247
248 timing_start();
249
250 cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
251
252 for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
253 test_decreasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
254 }
255
256 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
257 add_cycles, "sched.add.thread.ReadyQ_tail",
258 "Add threads of decreasing priority");
259
260 #ifdef CONFIG_BENCHMARK_VERBOSE
261 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
262 snprintf(tag, sizeof(tag),
263 "ReadyQ.add.to.tail.%04u.waiters", i);
264 snprintf(description, sizeof(description), "%-40s - Add thread of priority (%u)",
265 tag, test_thread[i].base.prio);
266 PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
267 CONFIG_BENCHMARK_NUM_ITERATIONS);
268 }
269 #endif
270
271 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
272 remove_cycles, "sched.remove.thread.ReadyQ_head",
273 "Remove threads of decreasing priority");
274
275 #ifdef CONFIG_BENCHMARK_VERBOSE
276 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
277 snprintf(tag, sizeof(tag),
278 "ReadyQ.remove.from.head.%04u.waiters", i);
279 snprintf(description, sizeof(description),
280 "%-40s - Remove thread of priority %u",
281 tag, test_thread[i].base.prio);
282 PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
283 CONFIG_BENCHMARK_NUM_ITERATIONS);
284 }
285 #endif
286
287 cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
288
289 for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
290 test_increasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
291 }
292
293 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
294 add_cycles, "sched.add.thread.ReadyQ_head",
295 "Add threads of increasing priority");
296
297 #ifdef CONFIG_BENCHMARK_VERBOSE
298 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
299 snprintf(tag, sizeof(tag),
300 "ReadyQ.add.to.head.%04u.waiters", i);
301 thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
302 snprintf(description, sizeof(description),
303 "%-40s - Add priority %u to readyq",
304 tag, thread->base.prio);
305 PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
306 CONFIG_BENCHMARK_NUM_ITERATIONS);
307 }
308 #endif
309
310 compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
311 remove_cycles, "sched.remove.thread.ReadyQ_tail",
312 "Remove threads of increasing priority");
313
314 #ifdef CONFIG_BENCHMARK_VERBOSE
315 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
316 snprintf(tag, sizeof(tag),
317 "ReadyQ.remove.from.tail.%04u.waiters",
318 CONFIG_BENCHMARK_NUM_THREADS - i);
319 thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
320 snprintf(description, sizeof(description),
321 "%-40s - Remove lowest priority from readyq (%u)",
322 tag, thread->base.prio);
323 PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
324 CONFIG_BENCHMARK_NUM_ITERATIONS);
325 }
326 #endif
327
328 for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
329 k_thread_abort(&test_thread[i]);
330 }
331
332 timing_stop();
333
334 TC_END_REPORT(0);
335
336 return 0;
337 }
338