1 /*
2  * Copyright (c) 2019 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <zephyr/kernel.h>
8 #include <zephyr/sys/printk.h>
9 
10 /* private kernel APIs */
11 #include <wait_q.h>
12 #include <ksched.h>
13 
14 #include "app_threads.h"
15 #include "user.h"
16 
17 #define MAIN_PRIO 8
18 #define THREADS_PRIO 9
19 
20 enum {
21 	MEAS_START,
22 	MEAS_END,
23 	NUM_STAMP_STATES
24 };
25 
26 uint32_t stamps[NUM_STAMP_STATES];
27 
stamp(int state)28 static inline int stamp(int state)
29 {
30 	uint32_t t;
31 
32 	/* In theory the TSC has much lower overhead and higher
33 	 * precision.  In practice it's VERY jittery in recent qemu
34 	 * versions and frankly too noisy to trust.
35 	 */
36 #ifdef CONFIG_X86
37 	__asm__ volatile("rdtsc" : "=a"(t) : : "edx");
38 #else
39 	t = k_cycle_get_32();
40 #endif
41 
42 	stamps[state] = t;
43 	return t;
44 }
45 
46 static int yielder_status;
47 
yielder_entry(void * _thread,void * _tid,void * _nb_threads)48 void yielder_entry(void *_thread, void *_tid, void *_nb_threads)
49 {
50 	struct k_app_thread *thread = (struct k_app_thread *) _thread;
51 	int ret;
52 
53 	struct k_mem_partition *parts[] = {
54 		thread->partition,
55 	};
56 
57 	ret = k_mem_domain_init(&thread->domain, ARRAY_SIZE(parts), parts);
58 	if (ret != 0) {
59 		printk("k_mem_domain_init failed %d\n", ret);
60 		yielder_status = 1;
61 		return;
62 	}
63 
64 	k_mem_domain_add_thread(&thread->domain, k_current_get());
65 
66 	k_thread_user_mode_enter(context_switch_yield, _nb_threads, NULL, NULL);
67 }
68 
69 
70 static k_tid_t threads[MAX_NB_THREADS];
71 
exec_test(uint8_t nb_threads)72 static int exec_test(uint8_t nb_threads)
73 {
74 	if (nb_threads > MAX_NB_THREADS) {
75 		printk("Too many threads\n");
76 		return 1;
77 	}
78 
79 	yielder_status = 0;
80 
81 	for (size_t tid = 0; tid < nb_threads; tid++) {
82 		app_threads[tid].partition = app_partitions[tid];
83 		app_threads[tid].stack = &app_thread_stacks[tid];
84 
85 		void *_tid = (void *)(uintptr_t)tid;
86 
87 		threads[tid] = k_thread_create(&app_threads[tid].thread,
88 					app_thread_stacks[tid],
89 					APP_STACKSIZE, yielder_entry,
90 					&app_threads[tid], _tid, (void *)(uintptr_t)nb_threads,
91 					THREADS_PRIO, 0, K_FOREVER);
92 	}
93 
94 	/* make sure the main thread has a higher priority
95 	 * this way, user threads all start together
96 	 * (lower number --> higher prio)
97 	 */
98 	k_thread_priority_set(k_current_get(), MAIN_PRIO);
99 
100 	stamp(MEAS_START);
101 	for (size_t tid = 0; tid < nb_threads; tid++) {
102 		k_thread_start(threads[tid]);
103 	}
104 	for (size_t tid = 0; tid < nb_threads; tid++) {
105 		k_thread_join(threads[tid], K_FOREVER);
106 	}
107 	stamp(MEAS_END);
108 
109 	uint32_t full_time = stamps[MEAS_END] - stamps[MEAS_START];
110 	uint64_t time_ms = k_cyc_to_ns_near64(full_time)/NB_YIELDS;
111 
112 	printk("Swapping %2u threads: %8" PRIu32 " cyc & %6" PRIu32 " rounds -> %6"
113 				PRIu64 " ns per ctx\n", nb_threads, full_time,
114 				NB_YIELDS, time_ms);
115 
116 	return yielder_status;
117 }
118 
119 
main(void)120 int main(void)
121 {
122 	int ret;
123 
124 	printk("Userspace scheduling benchmark started on board %s\n", CONFIG_BOARD);
125 
126 	size_t nb_threads_list[] = {2, 8, 16, 32, 0};
127 
128 	printk("============================\n");
129 	printk("user/user^n swapping (yield)\n");
130 
131 	for (size_t i = 0; nb_threads_list[i] > 0; i++) {
132 		ret = exec_test(nb_threads_list[i]);
133 		if (ret != 0) {
134 			printk("FAIL\n");
135 			return 0;
136 		}
137 	}
138 
139 	printk("SUCCESS\n");
140 	return 0;
141 }
142