1 /*
2  * Copyright (c) 2023 BayLibre SAS
3  * Written by: Nicolas Pitre
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * The purpose of this test is to exercize and validate the on-demand and
7  * preemptive FPU access algorithms implemented in arch/riscv/core/fpu.c.
8  */
9 
10 #include <zephyr/ztest.h>
11 #include <zephyr/kernel.h>
12 #include <zephyr/irq_offload.h>
13 
fpu_state(void)14 static inline unsigned long fpu_state(void)
15 {
16 	return csr_read(mstatus) & MSTATUS_FS;
17 }
18 
fpu_is_off(void)19 static inline bool fpu_is_off(void)
20 {
21 	return fpu_state() == MSTATUS_FS_OFF;
22 }
23 
fpu_is_clean(void)24 static inline bool fpu_is_clean(void)
25 {
26 	unsigned long state = fpu_state();
27 
28 	return state == MSTATUS_FS_INIT || state == MSTATUS_FS_CLEAN;
29 }
30 
fpu_is_dirty(void)31 static inline bool fpu_is_dirty(void)
32 {
33 	return fpu_state() == MSTATUS_FS_DIRTY;
34 }
35 
36 /*
37  * Test for basic FPU access states.
38  */
39 
ZTEST(riscv_fpu_sharing,test_basics)40 ZTEST(riscv_fpu_sharing, test_basics)
41 {
42 	int32_t val;
43 
44 	/* write to a FP reg */
45 	__asm__ volatile ("fcvt.s.w fa0, %0" : : "r" (42) : "fa0");
46 
47 	/* the FPU should be dirty now */
48 	zassert_true(fpu_is_dirty());
49 
50 	/* flush the FPU and disable it */
51 	zassert_true(k_float_disable(k_current_get()) == 0);
52 	zassert_true(fpu_is_off());
53 
54 	/* read the FP reg back which should re-enable the FPU */
55 	__asm__ volatile ("fcvt.w.s %0, fa0, rtz" : "=r" (val));
56 
57 	/* the FPU should be enabled now but not dirty */
58 	zassert_true(fpu_is_clean());
59 
60 	/* we should have retrieved the same value */
61 	zassert_true(val == 42, "got %d instead", val);
62 }
63 
64 /*
65  * Test for FPU contention between threads.
66  */
67 
new_thread_check(const char * name)68 static void new_thread_check(const char *name)
69 {
70 	int32_t val;
71 
72 	/* threads should start with the FPU disabled */
73 	zassert_true(fpu_is_off(), "FPU not off when starting thread %s", name);
74 
75 	/* read one FP reg */
76 #ifdef CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION
77 	/*
78 	 * Registers are initialized with zeroes but single precision values
79 	 * are expected to be "NaN-boxed" to be valid. So don't use the s
80 	 * format here as it won't convert to zero. It's not a problem
81 	 * otherwise as proper code is not supposed to rely on unitialized
82 	 * registers anyway.
83 	 */
84 	__asm__ volatile ("fcvt.w.d %0, fa0, rtz" : "=r" (val));
85 #else
86 	__asm__ volatile ("fcvt.w.s %0, fa0, rtz" : "=r" (val));
87 #endif
88 
89 	/* the FPU should be enabled now and not dirty */
90 	zassert_true(fpu_is_clean(), "FPU not clean after read");
91 
92 	/* the FP regs are supposed to be zero initialized */
93 	zassert_true(val == 0, "got %d instead", val);
94 }
95 
96 static K_SEM_DEFINE(thread1_sem, 0, 1);
97 static K_SEM_DEFINE(thread2_sem, 0, 1);
98 
99 #define STACK_SIZE 2048
100 static K_THREAD_STACK_DEFINE(thread1_stack, STACK_SIZE);
101 static K_THREAD_STACK_DEFINE(thread2_stack, STACK_SIZE);
102 
103 static struct k_thread thread1;
104 static struct k_thread thread2;
105 
thread1_entry(void * p1,void * p2,void * p3)106 static void thread1_entry(void *p1, void *p2, void *p3)
107 {
108 	int32_t val;
109 
110 	/*
111 	 * Test 1: Wait for thread2 to let us run and make sure we still own the
112 	 * FPU afterwards.
113 	 */
114 	new_thread_check("thread1");
115 	zassert_true(fpu_is_clean());
116 	k_sem_take(&thread1_sem, K_FOREVER);
117 	zassert_true(fpu_is_clean());
118 
119 	/*
120 	 * Test 2: Let thread2 do its initial thread checks. When we're
121 	 * scheduled again, thread2 should be the FPU owner at that point
122 	 * meaning the FPU should then be off for us.
123 	 */
124 	k_sem_give(&thread2_sem);
125 	k_sem_take(&thread1_sem, K_FOREVER);
126 	zassert_true(fpu_is_off());
127 
128 	/*
129 	 * Test 3: Let thread2 verify that it still owns the FPU.
130 	 */
131 	k_sem_give(&thread2_sem);
132 	k_sem_take(&thread1_sem, K_FOREVER);
133 	zassert_true(fpu_is_off());
134 
135 	/*
136 	 * Test 4: Dirty the FPU for ourself. Schedule to thread2 which won't
137 	 * touch the FPU. Make sure we still own the FPU in dirty state when
138 	 * we are scheduled back.
139 	 */
140 	__asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (42) : "fa1");
141 	zassert_true(fpu_is_dirty());
142 	k_sem_give(&thread2_sem);
143 	k_sem_take(&thread1_sem, K_FOREVER);
144 	zassert_true(fpu_is_dirty());
145 
146 	/*
147 	 * Test 5: Because we currently own a dirty FPU, we are considered
148 	 * an active user. This means we should still own it after letting
149 	 * thread1 use it as it would be preemptively be restored, but in a
150 	 * clean state then.
151 	 */
152 	k_sem_give(&thread2_sem);
153 	k_sem_take(&thread1_sem, K_FOREVER);
154 	zassert_true(fpu_is_clean());
155 
156 	/*
157 	 * Test 6: Avoid dirtying the FPU (we'll just make sure it holds our
158 	 * previously written value). Because thread2 had dirtied it in
159 	 * test 5, it is considered an active user. Scheduling thread2 will
160 	 * make it own the FPU right away. However we won't preemptively own
161 	 * it anymore afterwards as we didn't actively used it this time.
162 	 */
163 	__asm__ volatile ("fcvt.w.s %0, fa1, rtz" : "=r" (val));
164 	zassert_true(val == 42, "got %d instead", val);
165 	zassert_true(fpu_is_clean());
166 	k_sem_give(&thread2_sem);
167 	k_sem_take(&thread1_sem, K_FOREVER);
168 	zassert_true(fpu_is_off());
169 
170 	/*
171 	 * Test 7: Just let thread2 run again. Even if it is no longer an
172 	 * active user, it should still own the FPU as it is not contended.
173 	 */
174 	k_sem_give(&thread2_sem);
175 }
176 
thread2_entry(void * p1,void * p2,void * p3)177 static void thread2_entry(void *p1, void *p2, void *p3)
178 {
179 	int32_t val;
180 
181 	/*
182 	 * Test 1: thread1 waits until we're scheduled.
183 	 * Let it run again without doing anything else for now.
184 	 */
185 	k_sem_give(&thread1_sem);
186 
187 	/*
188 	 * Test 2: Perform the initial thread check and return to thread1.
189 	 */
190 	k_sem_take(&thread2_sem, K_FOREVER);
191 	new_thread_check("thread2");
192 	k_sem_give(&thread1_sem);
193 
194 	/*
195 	 * Test 3: Make sure we still own the FPU when scheduled back.
196 	 */
197 	k_sem_take(&thread2_sem, K_FOREVER);
198 	zassert_true(fpu_is_clean());
199 	k_sem_give(&thread1_sem);
200 
201 	/*
202 	 * Test 4: Confirm that thread1 took the FPU from us.
203 	 */
204 	k_sem_take(&thread2_sem, K_FOREVER);
205 	zassert_true(fpu_is_off());
206 	k_sem_give(&thread1_sem);
207 
208 	/*
209 	 * Test 5: Take ownership of the FPU by using it.
210 	 */
211 	k_sem_take(&thread2_sem, K_FOREVER);
212 	zassert_true(fpu_is_off());
213 	__asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (37) : "fa1");
214 	zassert_true(fpu_is_dirty());
215 	k_sem_give(&thread1_sem);
216 
217 	/*
218 	 * Test 6: We dirtied the FPU last time therefore we are an active
219 	 * user. We should own it right away but clean this time.
220 	 */
221 	k_sem_take(&thread2_sem, K_FOREVER);
222 	zassert_true(fpu_is_clean());
223 	__asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
224 	zassert_true(val == 37, "got %d instead", val);
225 	zassert_true(fpu_is_clean());
226 	k_sem_give(&thread1_sem);
227 
228 	/*
229 	 * Test 7: thread1 didn't claim the FPU and it wasn't preemptively
230 	 * assigned to it. This means we should still own it despite not
231 	 * having been an active user lately as the FPU is not contended.
232 	 */
233 	k_sem_take(&thread2_sem, K_FOREVER);
234 	zassert_true(fpu_is_clean());
235 	__asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
236 	zassert_true(val == 37, "got %d instead", val);
237 }
238 
ZTEST(riscv_fpu_sharing,test_multi_thread_interaction)239 ZTEST(riscv_fpu_sharing, test_multi_thread_interaction)
240 {
241 	k_thread_create(&thread1, thread1_stack, STACK_SIZE,
242 			thread1_entry, NULL, NULL, NULL,
243 			-1, 0, K_NO_WAIT);
244 	k_thread_create(&thread2, thread2_stack, STACK_SIZE,
245 			thread2_entry, NULL, NULL, NULL,
246 			-1, 0, K_NO_WAIT);
247 	zassert_true(k_thread_join(&thread1, K_FOREVER) == 0);
248 	zassert_true(k_thread_join(&thread2, K_FOREVER) == 0);
249 }
250 
251 /*
252  * Test for thread vs exception interactions.
253  *
254  * Context switching for userspace threads always happens through an
255  * exception. Privileged preemptive threads also get preempted through
256  * an exception. Same for ISRs and system calls. This test reproduces
257  * the conditions for those cases.
258  */
259 
260 #define NO_FPU		NULL
261 #define WITH_FPU	(const void *)1
262 
exception_context(const void * arg)263 static void exception_context(const void *arg)
264 {
265 	/* All exxceptions should always have the FPU disabled initially */
266 	zassert_true(fpu_is_off());
267 
268 	if (arg == NO_FPU) {
269 		return;
270 	}
271 
272 	/* Simulate a user syscall environment by having IRQs enabled */
273 	csr_set(mstatus, MSTATUS_IEN);
274 
275 	/* make sure the FPU is still off */
276 	zassert_true(fpu_is_off());
277 
278 	/* write to an FPU register */
279 	__asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (987) : "fa1");
280 
281 	/* the FPU state should be dirty now */
282 	zassert_true(fpu_is_dirty());
283 
284 	/* IRQs should have been disabled on us to prevent recursive FPU usage */
285 	zassert_true((csr_read(mstatus) & MSTATUS_IEN) == 0, "IRQs should be disabled");
286 }
287 
ZTEST(riscv_fpu_sharing,test_thread_vs_exc_interaction)288 ZTEST(riscv_fpu_sharing, test_thread_vs_exc_interaction)
289 {
290 	int32_t val;
291 
292 	/* Ensure the FPU is ours and dirty. */
293 	__asm__ volatile ("fcvt.s.w fa1, %0" : : "r" (654) : "fa1");
294 	zassert_true(fpu_is_dirty());
295 
296 	/* We're not in exception so IRQs should be enabled. */
297 	zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
298 
299 	/* Exceptions with no FPU usage shouldn't affect our state. */
300 	irq_offload(exception_context, NO_FPU);
301 	zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
302 	zassert_true(fpu_is_dirty());
303 	__asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
304 	zassert_true(val == 654, "got %d instead", val);
305 
306 	/*
307 	 * Exceptions with FPU usage should be trapped to save our context
308 	 * before letting its accesses go through. Because our FPU state
309 	 * is dirty at the moment of the trap, we are considered to be an
310 	 * active user and the FPU context should be preemptively restored
311 	 * upon leaving the exception, but with a clean state at that point.
312 	 */
313 	irq_offload(exception_context, WITH_FPU);
314 	zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
315 	zassert_true(fpu_is_clean());
316 	__asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
317 	zassert_true(val == 654, "got %d instead", val);
318 
319 	/*
320 	 * Do the exception with FPU usage again, but this time our current
321 	 * FPU state is clean, meaning we're no longer an active user.
322 	 * This means our FPU context should not be preemptively restored.
323 	 */
324 	irq_offload(exception_context, WITH_FPU);
325 	zassert_true((csr_read(mstatus) & MSTATUS_IEN) != 0, "IRQs should be enabled");
326 	zassert_true(fpu_is_off());
327 
328 	/* Make sure we still have proper context when accessing the FPU. */
329 	__asm__ volatile ("fcvt.w.s %0, fa1" : "=r" (val));
330 	zassert_true(fpu_is_clean());
331 	zassert_true(val == 654, "got %d instead", val);
332 }
333 
334 /*
335  * Test for proper FPU instruction trap.
336  *
337  * There is no dedicated FPU trap flag bit on RISC-V. FPU specific opcodes
338  * must be looked for when an illegal instruction exception is raised.
339  * This is done in arch/riscv/core/isr.S and explicitly tested here.
340  */
341 
342 #define TEST_TRAP(insn) \
343 	/* disable the FPU access */ \
344 	zassert_true(k_float_disable(k_current_get()) == 0); \
345 	zassert_true(fpu_is_off()); \
346 	/* execute the instruction */ \
347 	{ \
348 		/* use a0 to be universal with all configs */ \
349 		register unsigned long __r __asm__ ("a0") = reg; \
350 		PRE_INSN \
351 		__asm__ volatile (insn : "+r" (__r) : : "fa0", "fa1", "memory"); \
352 		POST_INSN \
353 		reg = __r; \
354 	} \
355 	/* confirm that the FPU state has changed */ \
356 	zassert_true(!fpu_is_off())
357 
ZTEST(riscv_fpu_sharing,test_fp_insn_trap)358 ZTEST(riscv_fpu_sharing, test_fp_insn_trap)
359 {
360 	unsigned long reg;
361 	uint32_t buf;
362 
363 	/* Force non RVC instructions */
364 	#define PRE_INSN  __asm__ volatile (".option push; .option norvc");
365 	#define POST_INSN __asm__ volatile (".option pop");
366 
367 	/* OP-FP major opcode space */
368 	reg = 123456;
369 	TEST_TRAP("fcvt.s.w fa1, %0");
370 	TEST_TRAP("fadd.s fa0, fa1, fa1");
371 	TEST_TRAP("fcvt.w.s %0, fa0");
372 	zassert_true(reg == 246912, "got %ld instead", reg);
373 
374 	/* LOAD-FP / STORE-FP space */
375 	buf = 0x40490ff9; /* 3.1416 */
376 	reg = (unsigned long)&buf;
377 	TEST_TRAP("flw fa1, 0(%0)");
378 	TEST_TRAP("fadd.s fa0, fa0, fa1, rtz");
379 	TEST_TRAP("fsw fa0, 0(%0)");
380 	zassert_true(buf == 0x487120c9 /* 246915.140625 */, "got %#x instead", buf);
381 
382 	/* CSR with fcsr, frm and fflags */
383 	TEST_TRAP("frcsr %0");
384 	TEST_TRAP("fscsr %0");
385 	TEST_TRAP("frrm %0");
386 	TEST_TRAP("fsrm %0");
387 	TEST_TRAP("frflags %0");
388 	TEST_TRAP("fsflags %0");
389 
390 	/* lift restriction on RVC instructions */
391 	#undef PRE_INSN
392 	#define PRE_INSN
393 	#undef POST_INSN
394 	#define POST_INSN
395 
396 	/* RVC variants */
397 #if defined(CONFIG_RISCV_ISA_EXT_C)
398 #if !defined(CONFIG_64BIT)
399 	/* only available on RV32 */
400 	buf = 0x402df8a1; /* 2.7183 */
401 	reg = (unsigned long)&buf;
402 	TEST_TRAP("c.flw fa1, 0(%0)");
403 	TEST_TRAP("fadd.s fa0, fa0, fa1");
404 	TEST_TRAP("c.fsw fa0, 0(%0)");
405 	zassert_true(buf == 0x48712177 /* 246917.859375 */, "got %#x instead", buf);
406 #endif
407 #if defined(CONFIG_CPU_HAS_FPU_DOUBLE_PRECISION)
408 	uint64_t buf64;
409 
410 	buf64 = 0x400921ff2e48e8a7LL;
411 	reg = (unsigned long)&buf64;
412 	TEST_TRAP("c.fld fa0, 0(%0)");
413 	TEST_TRAP("fadd.d fa1, fa0, fa0, rtz");
414 	TEST_TRAP("fadd.d fa1, fa1, fa0, rtz");
415 	TEST_TRAP("c.fsd fa1, 0(%0)");
416 	zassert_true(buf64 == 0x4022d97f62b6ae7dLL /* 9.4248 */,
417 		     "got %#llx instead", buf64);
418 #endif
419 #endif /* CONFIG_RISCV_ISA_EXT_C */
420 
421 	/* MADD major opcode space */
422 	reg = 3579;
423 	TEST_TRAP("fcvt.s.w fa1, %0");
424 	TEST_TRAP("fmadd.s fa0, fa1, fa1, fa1");
425 	TEST_TRAP("fcvt.w.s %0, fa0");
426 	zassert_true(reg == 12812820, "got %ld instead", reg);
427 
428 	/* MSUB major opcode space */
429 	reg = 1234;
430 	TEST_TRAP("fcvt.s.w fa1, %0");
431 	TEST_TRAP("fmsub.s fa0, fa1, fa1, fa0");
432 	TEST_TRAP("fcvt.w.s %0, fa0");
433 	zassert_true(reg == -11290064, "got %ld instead", reg);
434 
435 	/* NMSUB major opcode space */
436 	reg = -23;
437 	TEST_TRAP("fcvt.s.w fa1, %0");
438 	TEST_TRAP("fnmsub.s fa0, fa1, fa1, fa0");
439 	TEST_TRAP("fcvt.w.s %0, fa0");
440 	zassert_true(reg == -11290593, "got %ld instead", reg);
441 
442 	/* NMADD major opcode space */
443 	reg = 765;
444 	TEST_TRAP("fcvt.s.w fa1, %0");
445 	TEST_TRAP("fnmadd.s fa0, fa1, fa1, fa1");
446 	TEST_TRAP("fcvt.w.s %0, fa0");
447 	zassert_true(reg == -585990, "got %ld instead", reg);
448 }
449 
450 ZTEST_SUITE(riscv_fpu_sharing, NULL, NULL, NULL, NULL, NULL);
451