1 /*
2 * Copyright (c) 2020 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6 #include <zephyr/kernel.h>
7 #include "msgdev.h"
8
9 #include <zephyr/logging/log.h>
10 LOG_MODULE_REGISTER(main, LOG_LEVEL_INF);
11
12 #define STACK_SIZE 2048
13
14 /* How many messages can be queued for a single thread */
15 #define QUEUE_DEPTH MAX_EVENTS
16
17 /* Array of worker threads, and their stacks */
18 static struct thread_rec {
19 struct k_thread thread;
20 struct k_msgq msgq;
21 struct msg msgq_buf[QUEUE_DEPTH];
22 } threads[NUM_THREADS];
23
24 K_THREAD_STACK_ARRAY_DEFINE(thread_stacks, NUM_THREADS, STACK_SIZE);
25
26 /* The static metairq thread we'll use for dispatch */
27 static void metairq_fn(void *p1, void *p2, void *p3);
28 K_THREAD_DEFINE(metairq_thread, STACK_SIZE, metairq_fn,
29 NULL, NULL, NULL, K_HIGHEST_THREAD_PRIO, 0, 0);
30
31 /* Accumulated list of latencies, for a naive variance computation at
32 * the end.
33 */
34 struct {
35 atomic_t num_mirq;
36 uint32_t mirq_latencies[MAX_EVENTS];
37 struct {
38 uint32_t nevt;
39 uint32_t latencies[MAX_EVENTS * 2 / NUM_THREADS];
40 } threads[NUM_THREADS];
41 } stats;
42
43 /* A semaphore with an initial count, used to allow only one thread to
44 * log the final report.
45 */
46 K_SEM_DEFINE(report_cookie, 1, 1);
47
metairq_fn(void * p1,void * p2,void * p3)48 static void metairq_fn(void *p1, void *p2, void *p3)
49 {
50 ARG_UNUSED(p1);
51 ARG_UNUSED(p2);
52 ARG_UNUSED(p3);
53
54 while (true) {
55 /* Receive a message, immediately check a timestamp
56 * and compute a latency value, then dispatch it to
57 * the queue for its target thread
58 */
59 struct msg m;
60
61 message_dev_fetch(&m);
62 m.metairq_latency = k_cycle_get_32() - m.timestamp;
63
64 int ret = k_msgq_put(&threads[m.target].msgq, &m, K_NO_WAIT);
65
66 if (ret) {
67 LOG_INF("Thread %d queue full, message %d dropped",
68 m.target, m.seq);
69 }
70 }
71 }
72
73 /* Simple recursive implementation of an integer square root, cribbed
74 * from wikipedia
75 */
isqrt(uint64_t n)76 static uint32_t isqrt(uint64_t n)
77 {
78 if (n > 1) {
79 uint64_t lo = isqrt(n >> 2) << 1;
80 uint64_t hi = lo + 1;
81
82 return (uint32_t)(((hi * hi) > n) ? lo : hi);
83 }
84 return (uint32_t) n;
85 }
86
calc_stats(const uint32_t * array,uint32_t n,uint32_t * lo,uint32_t * hi,uint32_t * mean,uint32_t * stdev)87 static void calc_stats(const uint32_t *array, uint32_t n,
88 uint32_t *lo, uint32_t *hi, uint32_t *mean, uint32_t *stdev)
89 {
90 uint64_t tot = 0, totsq = 0;
91
92 *lo = INT_MAX;
93 *hi = 0;
94 for (int i = 0; i < n; i++) {
95 *lo = MIN(*lo, array[i]);
96 *hi = MAX(*hi, array[i]);
97 tot += array[i];
98 }
99
100 *mean = (uint32_t)((tot + (n / 2)) / n);
101
102 for (int i = 0; i < n; i++) {
103 int64_t d = (int32_t) (array[i] - *mean);
104
105 totsq += d * d;
106 }
107
108 *stdev = isqrt((totsq + (n / 2)) / n);
109 }
110
record_latencies(struct msg * m,uint32_t latency)111 static void record_latencies(struct msg *m, uint32_t latency)
112 {
113 /* Workaround: qemu emulation shows an erroneously high
114 * metairq latency for the very first event of 7-8us. Maybe
115 * it needs to fault in the our code pages in the host?
116 */
117 if (IS_ENABLED(CONFIG_QEMU_TARGET) && m->seq == 0) {
118 return;
119 }
120
121 /* It might be a potential race condition in this subroutine.
122 * We check if the msg sequence is reaching the MAX EVENT first.
123 * To prevent the coming incorrect changes of the array.
124 */
125 if (m->seq >= MAX_EVENTS) {
126 return;
127 }
128
129 int t = m->target;
130 int lidx = stats.threads[t].nevt++;
131
132 if (lidx < ARRAY_SIZE(stats.threads[t].latencies)) {
133 stats.threads[t].latencies[lidx] = latency;
134 }
135
136 stats.mirq_latencies[atomic_inc(&stats.num_mirq)] = m->metairq_latency;
137
138 /* Once we've logged our final event, print a report. We use
139 * a semaphore with an initial count of 1 to ensure that only
140 * one thread gets to do this. Also events can be processed
141 * out of order, so add a small sleep to let the queues
142 * finish.
143 */
144 if (m->seq == MAX_EVENTS - 1) {
145 uint32_t hi, lo, mean, stdev, ret;
146
147 ret = k_sem_take(&report_cookie, K_FOREVER);
148 __ASSERT_NO_MSG(ret == 0);
149 k_msleep(100);
150
151 calc_stats(stats.mirq_latencies, stats.num_mirq,
152 &lo, &hi, &mean, &stdev);
153
154 LOG_INF(" ---------- Latency (cyc) ----------");
155 LOG_INF(" Best Worst Mean Stdev");
156 LOG_INF("MetaIRQ %8d %8d %8d %8d", lo, hi, mean, stdev);
157
158
159 for (int i = 0; i < NUM_THREADS; i++) {
160 if (stats.threads[i].nevt == 0) {
161 LOG_WRN("No events for thread %d", i);
162 continue;
163 }
164
165 calc_stats(stats.threads[i].latencies,
166 stats.threads[i].nevt,
167 &lo, &hi, &mean, &stdev);
168
169 LOG_INF("Thread%d %8d %8d %8d %8d",
170 i, lo, hi, mean, stdev);
171 }
172
173 LOG_INF("MetaIRQ Test Complete");
174 }
175 }
176
thread_fn(void * p1,void * p2,void * p3)177 static void thread_fn(void *p1, void *p2, void *p3)
178 {
179 ARG_UNUSED(p2);
180 ARG_UNUSED(p3);
181 int id = (long)p1;
182 struct msg m;
183
184 LOG_INF("Starting Thread%d at priority %d", id,
185 k_thread_priority_get(k_current_get()));
186
187 while (true) {
188 int ret = k_msgq_get(&threads[id].msgq, &m, K_FOREVER);
189 uint32_t start = k_cycle_get_32();
190
191 /* Spin on the CPU for the requested number of cycles
192 * doing the "work" required to "process" the event.
193 * Note the inner loop: hammering on k_cycle_get_32()
194 * on some platforms requires locking around the timer
195 * driver internals and can affect interrupt latency.
196 * Obviously we may be preempted as new events arrive
197 * and get queued.
198 */
199 while (k_cycle_get_32() - start < m.proc_cyc) {
200 for (volatile int i = 0; i < 100; i++) {
201 }
202 }
203 __ASSERT_NO_MSG(ret == 0);
204
205 uint32_t dur = k_cycle_get_32() - start;
206
207 #ifdef LOG_EVERY_EVENT
208 /* Log the message, its thread, and the following cycle values:
209 * 1. Receive it from the driver in the MetaIRQ thread
210 * 2. Begin processing it out of the queue in the worker thread
211 * 3. The requested processing time in the message
212 * 4. The actual time taken to process the message
213 * (may be higher if the thread was preempted)
214 */
215 LOG_INF("M%d T%d mirq %d disp %d proc %d real %d",
216 m.seq, id, m.metairq_latency,
217 start - m.timestamp, m.proc_cyc, dur);
218 #endif
219
220 /* Collect the latency values in a big statistics array */
221 record_latencies(&m, start - m.timestamp);
222 }
223 }
224
main(void)225 int main(void)
226 {
227 for (long i = 0; i < NUM_THREADS; i++) {
228 /* Each thread gets a different priority. Half should
229 * be at (negative) cooperative priorities. Lower
230 * thread numbers have higher priority values,
231 * e.g. thread 0 will be preempted only by the
232 * metairq.
233 */
234 int prio = (-NUM_THREADS/2) + i;
235
236 k_msgq_init(&threads[i].msgq, (char *)threads[i].msgq_buf,
237 sizeof(struct msg), QUEUE_DEPTH);
238
239 k_thread_create(&threads[i].thread,
240 thread_stacks[i], STACK_SIZE,
241 thread_fn, (void *)i, NULL, NULL,
242 prio, 0, K_NO_WAIT);
243 }
244
245 message_dev_init();
246 return 0;
247 }
248