1 /*
2 * Copyright (c) 2019 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #include <zephyr/kernel.h>
8 #include <zephyr/sys/printk.h>
9
10 /* private kernel APIs */
11 #include <wait_q.h>
12 #include <ksched.h>
13
14 #include "app_threads.h"
15 #include "user.h"
16
17 #define MAIN_PRIO 8
18 #define THREADS_PRIO 9
19
20 enum {
21 MEAS_START,
22 MEAS_END,
23 NUM_STAMP_STATES
24 };
25
26 uint32_t stamps[NUM_STAMP_STATES];
27
stamp(int state)28 static inline int stamp(int state)
29 {
30 uint32_t t;
31
32 /* In theory the TSC has much lower overhead and higher
33 * precision. In practice it's VERY jittery in recent qemu
34 * versions and frankly too noisy to trust.
35 */
36 #ifdef CONFIG_X86
37 __asm__ volatile("rdtsc" : "=a"(t) : : "edx");
38 #else
39 t = k_cycle_get_32();
40 #endif
41
42 stamps[state] = t;
43 return t;
44 }
45
46 static int yielder_status;
47
yielder_entry(void * _thread,void * _tid,void * _nb_threads)48 void yielder_entry(void *_thread, void *_tid, void *_nb_threads)
49 {
50 struct k_app_thread *thread = (struct k_app_thread *) _thread;
51 int ret;
52
53 struct k_mem_partition *parts[] = {
54 thread->partition,
55 };
56
57 ret = k_mem_domain_init(&thread->domain, ARRAY_SIZE(parts), parts);
58 if (ret != 0) {
59 printk("k_mem_domain_init failed %d\n", ret);
60 yielder_status = 1;
61 return;
62 }
63
64 k_mem_domain_add_thread(&thread->domain, k_current_get());
65
66 k_thread_user_mode_enter(context_switch_yield, _nb_threads, NULL, NULL);
67 }
68
69
70 static k_tid_t threads[MAX_NB_THREADS];
71
exec_test(uint8_t nb_threads)72 static int exec_test(uint8_t nb_threads)
73 {
74 if (nb_threads > MAX_NB_THREADS) {
75 printk("Too many threads\n");
76 return 1;
77 }
78
79 yielder_status = 0;
80
81 for (size_t tid = 0; tid < nb_threads; tid++) {
82 app_threads[tid].partition = app_partitions[tid];
83 app_threads[tid].stack = &app_thread_stacks[tid];
84
85 void *_tid = (void *)(uintptr_t)tid;
86
87 threads[tid] = k_thread_create(&app_threads[tid].thread,
88 app_thread_stacks[tid],
89 APP_STACKSIZE, yielder_entry,
90 &app_threads[tid], _tid, (void *)(uintptr_t)nb_threads,
91 THREADS_PRIO, 0, K_FOREVER);
92 }
93
94 /* make sure the main thread has a higher priority
95 * this way, user threads all start together
96 * (lower number --> higher prio)
97 */
98 k_thread_priority_set(k_current_get(), MAIN_PRIO);
99
100 stamp(MEAS_START);
101 for (size_t tid = 0; tid < nb_threads; tid++) {
102 k_thread_start(threads[tid]);
103 }
104 for (size_t tid = 0; tid < nb_threads; tid++) {
105 k_thread_join(threads[tid], K_FOREVER);
106 }
107 stamp(MEAS_END);
108
109 uint32_t full_time = stamps[MEAS_END] - stamps[MEAS_START];
110 uint64_t time_ms = k_cyc_to_ns_near64(full_time)/NB_YIELDS;
111
112 printk("Swapping %2u threads: %8" PRIu32 " cyc & %6" PRIu32 " rounds -> %6"
113 PRIu64 " ns per ctx\n", nb_threads, full_time,
114 NB_YIELDS, time_ms);
115
116 return yielder_status;
117 }
118
119
main(void)120 int main(void)
121 {
122 int ret;
123
124 printk("Userspace scheduling benchmark started on board %s\n", CONFIG_BOARD);
125
126 size_t nb_threads_list[] = {2, 8, 16, 32, 0};
127
128 printk("============================\n");
129 printk("user/user^n swapping (yield)\n");
130
131 for (size_t i = 0; nb_threads_list[i] > 0; i++) {
132 ret = exec_test(nb_threads_list[i]);
133 if (ret != 0) {
134 printk("FAIL\n");
135 return 0;
136 }
137 }
138
139 printk("SUCCESS\n");
140 return 0;
141 }
142