1 /*
2 * Copyright (c) 2023 BayLibre SAS
3 * Written by: Nicolas Pitre
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * The purpose of this test is to exercize and validate the on-demand and
7 * preemptive FPU access algorithms implemented in arch/riscv/core/fpu.c.
8 */
9
10 #include <zephyr/ztest.h>
11 #include <zephyr/kernel.h>
12 #include <zephyr/irq_offload.h>
13
fpu_state(void)14 static inline unsigned long fpu_state(void)
15 {
16 return csr_read(mstatus) & MSTATUS_FS;
17 }
18
fpu_is_off(void)19 static inline bool fpu_is_off(void)
20 {
21 return fpu_state() == MSTATUS_FS_OFF;
22 }
23
fpu_is_clean(void)24 static inline bool fpu_is_clean(void)
25 {
26 unsigned long state = fpu_state();
27
28 return state == MSTATUS_FS_INIT || state == MSTATUS_FS_CLEAN;
29 }
30
fpu_is_dirty(void)31 static inline bool fpu_is_dirty(void)
32 {
33 return fpu_state() == MSTATUS_FS_DIRTY;
34 }
35
36 /*
37 * Test for basic FPU access states.
38 */
39
ZTEST(riscv_fpu_sharing,test_basics)40 ZTEST(riscv_fpu_sharing, test_basics)
41 {
42 int32_t val;
43
44 /* write to a FP reg */
45 __asm__ volatile ("fcvt.s.w fa0, %0" : : "r" (42) : "fa0");
46
47 /* the FPU should be dirty now */
48 zassert_true(fpu_is_dirty());
49
50 /* flush the FPU and disable it */
51 zassert_true(k_float_disable(k_current_get()) == 0);
52 zassert_true(fpu_is_off());
53
54 /* read the FP reg back which should re-enable the FPU */
55 __asm__ volatile ("fcvt.w.s %0, fa0, rtz" : "=r" (val));
56
57 /* the FPU should be enabled now but not dirty */
58 zassert_true(fpu_is_clean());
59
60 /* we should have retrieved the same value */
61 zassert_true(val == 42, "got %d instead", val);
62 }
63
64 /*
65 * Test for FPU contention between threads.
66 */
67
new_thread_check(const char * name)68 static void new_thread_check(const char *name)
69 {
70 int32_t val;
71
72 /* threads should start with the FPU disabled */
73 zassert_true(fpu_is_off(), "FPU not off when starting thread %s", name);
74
75 /* read one FP reg */
76 #ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
77 /*
78 * Registers are initialized with zeroes but single precision values
79 * are expected to be "NaN-boxed" to be valid. So don't use the s
80 * format here as it won't convert to zero. It's not a problem
81 * otherwise as proper code is not supposed to rely on unitialized
82 * registers anyway.
83 */
84 __asm__ volatile ("fcvt.w.d %0, fa0, rtz" : "=r" (val));
85 #else
86 __asm__ volatile ("fcvt.w.s %0, fa0, rtz" : "=r" (val));
87 #endif
88
89 /* the FPU should be enabled now and not dirty */
90 zassert_true(fpu_is_clean(), "FPU not clean after read");
91
92 /* the FP regs are supposed to be zero initialized */
93 zassert_true(val == 0, "got %d instead", val);
94 }
95
96 static K_SEM_DEFINE(thread1_sem, 0, 1);
97 static K_SEM_DEFINE(thread2_sem, 0, 1);
98
99 #define STACK_SIZE 2048
100 static K_THREAD_STACK_DEFINE(thread1_stack, STACK_SIZE);
101 static K_THREAD_STACK_DEFINE(thread2_stack, STACK_SIZE);
102
103 static struct k_thread thread1;
104 static struct k_thread thread2;
105
thread1_entry(void * p1,void * p2,void * p3)106 static void thread1_entry(void *p1, void *p2, void *p3)
107 {
108 int32_t val;
109
110 /*
111 * Test 1: Wait for thread2 to let us run and make sure we still own the
112 * FPU afterwards.
113 */
114 new_thread_check("thread1");
115 zassert_true(fpu_is_clean());
116 k_sem_take(&thread1_sem, K_FOREVER);
117 zassert_true(fpu_is_clean());
118
119 /*
120 * Test 2: Let thread2 do its initial thread checks. When we're
121 * scheduled again, thread2 should be the FPU owner at that point
122 * meaning the FPU should then be off for us.
123 */
124 k_sem_give(&thread2_sem);
125 k_sem_take(&thread1_sem, K_FOREVER);
126 zassert_true(fpu_is_off());
127
128 /*
129 * Test 3: Let thread2 verify that it still owns the FPU.
130 */
131 k_sem_give(&thread2_sem);
132 k_sem_take(&thread1_sem, K_FOREVER);
133 zassert_true(fpu_is_off());
134
135 /*
136 * Test 4: Dirty the FPU for ourself. Schedule to thread2 which won't
137 * touch the FPU. Make sure we still own the FPU in dirty state when
138 * we are scheduled back.
139 */
140 __asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (42) : "fa1");
141 zassert_true(fpu_is_dirty());
142 k_sem_give(&thread2_sem);
143 k_sem_take(&thread1_sem, K_FOREVER);
144 zassert_true(fpu_is_dirty());
145
146 /*
147 * Test 5: Because we currently own a dirty FPU, we are considered
148 * an active user. This means we should still own it after letting
149 * thread1 use it as it would be preemptively be restored, but in a
150 * clean state then.
151 */
152 k_sem_give(&thread2_sem);
153 k_sem_take(&thread1_sem, K_FOREVER);
154 zassert_true(fpu_is_clean());
155
156 /*
157 * Test 6: Avoid dirtying the FPU (we'll just make sure it holds our
158 * previously written value). Because thread2 had dirtied it in
159 * test 5, it is considered an active user. Scheduling thread2 will
160 * make it own the FPU right away. However we won't preemptively own
161 * it anymore afterwards as we didn't actively used it this time.
162 */
163 __asm__ volatile ("fcvt.w.s %0, fa1, rtz" : "=r" (val));
164 zassert_true(val == 42, "got %d instead", val);
165 zassert_true(fpu_is_clean());
166 k_sem_give(&thread2_sem);
167 k_sem_take(&thread1_sem, K_FOREVER);
168 zassert_true(fpu_is_off());
169
170 /*
171 * Test 7: Just let thread2 run again. Even if it is no longer an
172 * active user, it should still own the FPU as it is not contended.
173 */
174 k_sem_give(&thread2_sem);
175 }
176
thread2_entry(void * p1,void * p2,void * p3)177 static void thread2_entry(void *p1, void *p2, void *p3)
178 {
179 int32_t val;
180
181 /*
182 * Test 1: thread1 waits until we're scheduled.
183 * Let it run again without doing anything else for now.
184 */
185 k_sem_give(&thread1_sem);
186
187 /*
188 * Test 2: Perform the initial thread check and return to thread1.
189 */
190 k_sem_take(&thread2_sem, K_FOREVER);
191 new_thread_check("thread2");
192 k_sem_give(&thread1_sem);
193
194 /*
195 * Test 3: Make sure we still own the FPU when scheduled back.
196 */
197 k_sem_take(&thread2_sem, K_FOREVER);
198 zassert_true(fpu_is_clean());
199 k_sem_give(&thread1_sem);
200
201 /*
202 * Test 4: Confirm that thread1 took the FPU from us.
203 */
204 k_sem_take(&thread2_sem, K_FOREVER);
205 zassert_true(fpu_is_off());
206 k_sem_give(&thread1_sem);
207
208 /*
209 * Test 5: Take ownership of the FPU by using it.
210 */
211 k_sem_take(&thread2_sem, K_FOREVER);
212 zassert_true(fpu_is_off());
213 __asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (37) : "fa1");
214 zassert_true(fpu_is_dirty());
215 k_sem_give(&thread1_sem);
216
217 /*
218 * Test 6: We dirtied the FPU last time therefore we are an active
219 * user. We should own it right away but clean this time.
220 */
221 k_sem_take(&thread2_sem, K_FOREVER);
222 zassert_true(fpu_is_clean());
223 __asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
224 zassert_true(val == 37, "got %d instead", val);
225 zassert_true(fpu_is_clean());
226 k_sem_give(&thread1_sem);
227
228 /*
229 * Test 7: thread1 didn't claim the FPU and it wasn't preemptively
230 * assigned to it. This means we should still own it despite not
231 * having been an active user lately as the FPU is not contended.
232 */
233 k_sem_take(&thread2_sem, K_FOREVER);
234 zassert_true(fpu_is_clean());
235 __asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
236 zassert_true(val == 37, "got %d instead", val);
237 }
238
ZTEST(riscv_fpu_sharing,test_multi_thread_interaction)239 ZTEST(riscv_fpu_sharing, test_multi_thread_interaction)
240 {
241 k_thread_create(&thread1, thread1_stack, STACK_SIZE,
242 thread1_entry, NULL, NULL, NULL,
243 -1, 0, K_NO_WAIT);
244 k_thread_create(&thread2, thread2_stack, STACK_SIZE,
245 thread2_entry, NULL, NULL, NULL,
246 -1, 0, K_NO_WAIT);
247 zassert_true(k_thread_join(&thread1, K_FOREVER) == 0);
248 zassert_true(k_thread_join(&thread2, K_FOREVER) == 0);
249 }
250
251 /*
252 * Test for thread vs exception interactions.
253 *
254 * Context switching for userspace threads always happens through an
255 * exception. Privileged preemptive threads also get preempted through
256 * an exception. Same for ISRs and system calls. This test reproduces
257 * the conditions for those cases.
258 */
259
260 #define NO_FPU NULL
261 #define WITH_FPU (const void *)1
262
exception_context(const void * arg)263 static void exception_context(const void *arg)
264 {
265 /* All exxceptions should always have the FPU disabled initially */
266 zassert_true(fpu_is_off());
267
268 if (arg == NO_FPU) {
269 return;
270 }
271
272 /* Simulate a user syscall environment by having IRQs enabled */
273 csr_set(mstatus, MSTATUS_IEN);
274
275 /* make sure the FPU is still off */
276 zassert_true(fpu_is_off());
277
278 /* write to an FPU register */
279 __asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (987) : "fa1");
280
281 /* the FPU state should be dirty now */
282 zassert_true(fpu_is_dirty());
283
284 /* IRQs should have been disabled on us to prevent recursive FPU usage */
285 zassert_true((csr_read(mstatus) & MSTATUS_IEN) == 0, "IRQs should be disabled");
286 }
287
ZTEST(riscv_fpu_sharing,test_thread_vs_exc_interaction)288 ZTEST(riscv_fpu_sharing, test_thread_vs_exc_interaction)
289 {
290 int32_t val;
291
292 /* Ensure the FPU is ours and dirty. */
293 __asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (654) : "fa1");
294 zassert_true(fpu_is_dirty());
295
296 /* We're not in exception so IRQs should be enabled. */
297 zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
298
299 /* Exceptions with no FPU usage shouldn't affect our state. */
300 irq_offload(exception_context, NO_FPU);
301 zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
302 zassert_true(fpu_is_dirty());
303 __asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
304 zassert_true(val == 654, "got %d instead", val);
305
306 /*
307 * Exceptions with FPU usage should be trapped to save our context
308 * before letting its accesses go through. Because our FPU state
309 * is dirty at the moment of the trap, we are considered to be an
310 * active user and the FPU context should be preemptively restored
311 * upon leaving the exception, but with a clean state at that point.
312 */
313 irq_offload(exception_context, WITH_FPU);
314 zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
315 zassert_true(fpu_is_clean());
316 __asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
317 zassert_true(val == 654, "got %d instead", val);
318
319 /*
320 * Do the exception with FPU usage again, but this time our current
321 * FPU state is clean, meaning we're no longer an active user.
322 * This means our FPU context should not be preemptively restored.
323 */
324 irq_offload(exception_context, WITH_FPU);
325 zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
326 zassert_true(fpu_is_off());
327
328 /* Make sure we still have proper context when accessing the FPU. */
329 __asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
330 zassert_true(fpu_is_clean());
331 zassert_true(val == 654, "got %d instead", val);
332 }
333
334 /*
335 * Test for proper FPU instruction trap.
336 *
337 * There is no dedicated FPU trap flag bit on RISC-V. FPU specific opcodes
338 * must be looked for when an illegal instruction exception is raised.
339 * This is done in arch/riscv/core/isr.S and explicitly tested here.
340 */
341
342 #define TEST_TRAP(insn) \
343 /* disable the FPU access */ \
344 zassert_true(k_float_disable(k_current_get()) == 0); \
345 zassert_true(fpu_is_off()); \
346 /* execute the instruction */ \
347 { \
348 /* use a0 to be universal with all configs */ \
349 register unsigned long __r __asm__ ("a0") = reg; \
350 PRE_INSN \
351 __asm__ volatile (insn : "+r" (__r) : : "fa0", "fa1", "memory"); \
352 POST_INSN \
353 reg = __r; \
354 } \
355 /* confirm that the FPU state has changed */ \
356 zassert_true(!fpu_is_off())
357
ZTEST(riscv_fpu_sharing,test_fp_insn_trap)358 ZTEST(riscv_fpu_sharing, test_fp_insn_trap)
359 {
360 unsigned long reg;
361 uint32_t buf;
362
363 /* Force non RVC instructions */
364 #define PRE_INSN __asm__ volatile (".option push; .option norvc");
365 #define POST_INSN __asm__ volatile (".option pop");
366
367 /* OP-FP major opcode space */
368 reg = 123456;
369 TEST_TRAP("fcvt.s.w fa1, %0");
370 TEST_TRAP("fadd.s fa0, fa1, fa1");
371 TEST_TRAP("fcvt.w.s %0, fa0");
372 zassert_true(reg == 246912, "got %ld instead", reg);
373
374 /* LOAD-FP / STORE-FP space */
375 buf = 0x40490ff9; /* 3.1416 */
376 reg = (unsigned long)&buf;
377 TEST_TRAP("flw fa1, 0(%0)");
378 TEST_TRAP("fadd.s fa0, fa0, fa1, rtz");
379 TEST_TRAP("fsw fa0, 0(%0)");
380 zassert_true(buf == 0x487120c9 /* 246915.140625 */, "got %#x instead", buf);
381
382 /* CSR with fcsr, frm and fflags */
383 TEST_TRAP("frcsr %0");
384 TEST_TRAP("fscsr %0");
385 TEST_TRAP("frrm %0");
386 TEST_TRAP("fsrm %0");
387 TEST_TRAP("frflags %0");
388 TEST_TRAP("fsflags %0");
389
390 /* lift restriction on RVC instructions */
391 #undef PRE_INSN
392 #define PRE_INSN
393 #undef POST_INSN
394 #define POST_INSN
395
396 /* RVC variants */
397 #if defined(CONFIG_RISCV_ISA_EXT_C)
398 #if !defined(CONFIG_64BIT)
399 /* only available on RV32 */
400 buf = 0x402df8a1; /* 2.7183 */
401 reg = (unsigned long)&buf;
402 TEST_TRAP("c.flw fa1, 0(%0)");
403 TEST_TRAP("fadd.s fa0, fa0, fa1");
404 TEST_TRAP("c.fsw fa0, 0(%0)");
405 zassert_true(buf == 0x48712177 /* 246917.859375 */, "got %#x instead", buf);
406 #endif
407 #if defined(CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION)
408 uint64_t buf64;
409
410 buf64 = 0x400921ff2e48e8a7LL;
411 reg = (unsigned long)&buf64;
412 TEST_TRAP("c.fld fa0, 0(%0)");
413 TEST_TRAP("fadd.d fa1, fa0, fa0, rtz");
414 TEST_TRAP("fadd.d fa1, fa1, fa0, rtz");
415 TEST_TRAP("c.fsd fa1, 0(%0)");
416 zassert_true(buf64 == 0x4022d97f62b6ae7dLL /* 9.4248 */,
417 "got %#llx instead", buf64);
418 #endif
419 #endif /* CONFIG_RISCV_ISA_EXT_C */
420
421 /* MADD major opcode space */
422 reg = 3579;
423 TEST_TRAP("fcvt.s.w fa1, %0");
424 TEST_TRAP("fmadd.s fa0, fa1, fa1, fa1");
425 TEST_TRAP("fcvt.w.s %0, fa0");
426 zassert_true(reg == 12812820, "got %ld instead", reg);
427
428 /* MSUB major opcode space */
429 reg = 1234;
430 TEST_TRAP("fcvt.s.w fa1, %0");
431 TEST_TRAP("fmsub.s fa0, fa1, fa1, fa0");
432 TEST_TRAP("fcvt.w.s %0, fa0");
433 zassert_true(reg == -11290064, "got %ld instead", reg);
434
435 /* NMSUB major opcode space */
436 reg = -23;
437 TEST_TRAP("fcvt.s.w fa1, %0");
438 TEST_TRAP("fnmsub.s fa0, fa1, fa1, fa0");
439 TEST_TRAP("fcvt.w.s %0, fa0");
440 zassert_true(reg == -11290593, "got %ld instead", reg);
441
442 /* NMADD major opcode space */
443 reg = 765;
444 TEST_TRAP("fcvt.s.w fa1, %0");
445 TEST_TRAP("fnmadd.s fa0, fa1, fa1, fa1");
446 TEST_TRAP("fcvt.w.s %0, fa0");
447 zassert_true(reg == -585990, "got %ld instead", reg);
448 }
449
450 ZTEST_SUITE(riscv_fpu_sharing, NULL, NULL, NULL, NULL, NULL);
451