1 /*
2 * Copyright (c) 2011-2014 Wind River Systems, Inc.
3 * Copyright (c) 2020 Stephanos Ioannidis <root@stephanos.io>
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 */
7
8 /*
9 * @file
10 * @brief load/store portion of FPU sharing test
11 *
12 * @defgroup kernel_fpsharing_tests FP Sharing Tests
13 *
14 * @ingroup all_tests
15 *
16 * This module implements the load/store portion of the FPU sharing test. This
17 * version of this test utilizes a pair of tasks.
18 *
19 * The load/store test validates the floating point unit context
20 * save/restore mechanism. This test utilizes a pair of threads of different
21 * priorities that each use the floating point registers. The context
22 * switching that occurs exercises the kernel's ability to properly preserve
23 * the floating point registers. The test also exercises the kernel's ability
24 * to automatically enable floating point support for a task, if supported.
25 *
26 * FUTURE IMPROVEMENTS
27 * On architectures where the non-integer capabilities are provided in a
28 * hierarchy, for example on IA-32 the USE_FP and USE_SSE options are provided,
29 * this test should be enhanced to ensure that the architectures' z_swap()
30 * routine doesn't context switch more registers that it needs to (which would
31 * represent a performance issue). For example, on the IA-32, the test should
32 * issue a k_fp_disable() from main(), and then indicate that only x87 FPU
33 * registers will be utilized (k_fp_enable()). The thread should continue
34 * to load ALL non-integer registers, but main() should validate that only the
35 * x87 FPU registers are being saved/restored.
36 */
37
38 #include <zephyr/ztest.h>
39 #include <zephyr/debug/gcov.h>
40
41 #if defined(CONFIG_X86)
42 #if defined(__GNUC__)
43 #include "float_regs_x86_gcc.h"
44 #else
45 #include "float_regs_x86_other.h"
46 #endif /* __GNUC__ */
47 #elif defined(CONFIG_ARM)
48 #if defined(CONFIG_ARMV7_M_ARMV8_M_FP) || defined(CONFIG_ARMV7_R_FP) || defined(CONFIG_CPU_HAS_VFP)
49 #if defined(__GNUC__) || defined(__ICCARM__)
50 #include "float_regs_arm_gcc.h"
51 #else
52 #include "float_regs_arm_other.h"
53 #endif /* __GNUC__ */
54 #endif
55 #elif defined(CONFIG_ARM64)
56 #if defined(__GNUC__)
57 #include "float_regs_arm64_gcc.h"
58 #else
59 #include "float_regs_arm64_other.h"
60 #endif /* __GNUC__ */
61 #elif defined(CONFIG_ISA_ARCV2)
62 #if defined(__GNUC__)
63 #include "float_regs_arc_gcc.h"
64 #else
65 #include "float_regs_arc_other.h"
66 #endif /* __GNUC__ */
67 #elif defined(CONFIG_RISCV)
68 #if defined(__GNUC__)
69 #include "float_regs_riscv_gcc.h"
70 #else
71 #include "float_regs_riscv_other.h"
72 #endif /* __GNUC__ */
73 #elif defined(CONFIG_SPARC)
74 #include "float_regs_sparc.h"
75 #elif defined(CONFIG_XTENSA)
76 #include "float_regs_xtensa.h"
77 #endif
78
79 #include "float_context.h"
80 #include "test_common.h"
81
82 /* space for float register load/store area used by low priority task */
83 static struct fp_register_set float_reg_set_load;
84 static struct fp_register_set float_reg_set_store;
85
86 /* space for float register load/store area used by high priority thread */
87 static struct fp_register_set float_reg_set;
88
89 /*
90 * Test counters are "volatile" because GCC may not update them properly
91 * otherwise. (See description of pi calculation test for more details.)
92 */
93 static volatile unsigned int load_store_low_count;
94 static volatile unsigned int load_store_high_count;
95
96 /* Indicates that the load/store test exited */
97 static volatile bool test_exited;
98
99 /* Semaphore for signaling end of test */
100 static K_SEM_DEFINE(test_exit_sem, 0, 1);
101
102 /**
103 * @brief Low priority FPU load/store thread
104 *
105 * @ingroup kernel_fpsharing_tests
106 *
107 * @see k_sched_time_slice_set(), memset(),
108 * _load_all_float_registers(), _store_all_float_registers()
109 */
load_store_low(void)110 static void load_store_low(void)
111 {
112 unsigned int i;
113 bool error = false;
114 unsigned char init_byte;
115 unsigned char *store_ptr = (unsigned char *)&float_reg_set_store;
116 unsigned char *load_ptr = (unsigned char *)&float_reg_set_load;
117
118 volatile char volatile_stack_var = 0;
119
120 /*
121 * Initialize floating point load buffer to known values;
122 * these values must be different than the value used in other threads.
123 */
124 init_byte = MAIN_FLOAT_REG_CHECK_BYTE;
125 for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
126 load_ptr[i] = init_byte++;
127 }
128
129 /* Loop until the test finishes, or an error is detected. */
130 for (load_store_low_count = 0; !test_exited; load_store_low_count++) {
131
132 /*
133 * Clear store buffer to erase all traces of any previous
134 * floating point values that have been saved.
135 */
136 (void)memset(&float_reg_set_store, 0, SIZEOF_FP_REGISTER_SET);
137
138 /*
139 * Utilize an architecture specific function to load all the
140 * floating point registers with known values.
141 */
142 _load_all_float_registers(&float_reg_set_load);
143
144 /*
145 * Waste some cycles to give the high priority load/store
146 * thread an opportunity to run when the low priority thread is
147 * using the floating point registers.
148 *
149 * IMPORTANT: This logic requires that sys_clock_tick_get_32() not
150 * perform any floating point operations!
151 */
152 while ((sys_clock_tick_get_32() % 5) != 0) {
153 /*
154 * Use a volatile variable to prevent compiler
155 * optimizing out the spin loop.
156 */
157 ++volatile_stack_var;
158 }
159
160 /*
161 * Utilize an architecture specific function to dump the
162 * contents of all floating point registers to memory.
163 */
164 _store_all_float_registers(&float_reg_set_store);
165
166 /*
167 * Compare each byte of buffer to ensure the expected value is
168 * present, indicating that the floating point registers weren't
169 * impacted by the operation of the high priority thread(s).
170 *
171 * Display error message and terminate if discrepancies are
172 * detected.
173 */
174 init_byte = MAIN_FLOAT_REG_CHECK_BYTE;
175
176 for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
177 if (store_ptr[i] != init_byte) {
178 TC_ERROR("Found 0x%x instead of 0x%x @ "
179 "offset 0x%x\n",
180 store_ptr[i],
181 init_byte, i);
182 TC_ERROR("Discrepancy found during "
183 "iteration %d\n",
184 load_store_low_count);
185 error = true;
186 }
187 init_byte++;
188 }
189
190 /* Terminate if a test error has been reported */
191 zassert_false(error);
192
193 /*
194 * After every 1000 iterations (arbitrarily chosen), explicitly
195 * disable floating point operations for the task.
196 */
197 #if (defined(CONFIG_X86) && defined(CONFIG_LAZY_FPU_SHARING)) || \
198 defined(CONFIG_ARMV7_M_ARMV8_M_FP) || defined(CONFIG_ARMV7_R_FP)
199 /*
200 * In x86:
201 * The subsequent execution of _load_all_float_registers() will
202 * result in an exception to automatically re-enable
203 * floating point support for the task.
204 *
205 * The purpose of this part of the test is to exercise the
206 * k_float_disable() API, and to also continue exercising
207 * the (exception based) floating enabling mechanism.
208 *
209 * In ARM:
210 *
211 * The routine k_float_disable() allows for thread-level
212 * granularity for disabling floating point. Furthermore, it
213 * is useful for testing automatic thread enabling of floating
214 * point as soon as FP registers are used, again by the thread.
215 */
216 if ((load_store_low_count % 1000) == 0) {
217 k_float_disable(k_current_get());
218 }
219 #endif
220 }
221 }
222
223 /**
224 * @brief High priority FPU load/store thread
225 *
226 * @ingroup kernel_fpsharing_tests
227 *
228 * @see _load_then_store_all_float_registers()
229 */
load_store_high(void)230 static void load_store_high(void)
231 {
232 unsigned int i;
233 unsigned char init_byte;
234 unsigned char *reg_set_ptr = (unsigned char *)&float_reg_set;
235
236 /* Run the test until the specified maximum test count is reached */
237 for (load_store_high_count = 0;
238 load_store_high_count <= MAX_TESTS;
239 load_store_high_count++) {
240
241 /*
242 * Initialize the float_reg_set structure by treating it as
243 * a simple array of bytes (the arrangement and actual number
244 * of registers is not important for this generic C code). The
245 * structure is initialized by using the byte value specified
246 * by the constant FIBER_FLOAT_REG_CHECK_BYTE, and then
247 * incrementing the value for each successive location in the
248 * float_reg_set structure.
249 *
250 * The initial byte value, and thus the contents of the entire
251 * float_reg_set structure, must be different for each
252 * thread to effectively test the kernel's ability to
253 * properly save/restore the floating point values during a
254 * context switch.
255 */
256 init_byte = FIBER_FLOAT_REG_CHECK_BYTE;
257
258 for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
259 reg_set_ptr[i] = init_byte++;
260 }
261
262 /*
263 * Utilize an architecture specific function to load all the
264 * floating point registers with the contents of the
265 * float_reg_set structure.
266 *
267 * The goal of the loading all floating point registers with
268 * values that differ from the values used in other threads is
269 * to help determine whether the floating point register
270 * save/restore mechanism in the kernel's context switcher
271 * is operating correctly.
272 *
273 * When a subsequent k_timer_test() invocation is
274 * performed, a (cooperative) context switch back to the
275 * preempted task will occur. This context switch should result
276 * in restoring the state of the task's floating point
277 * registers when the task was swapped out due to the
278 * occurrence of the timer tick.
279 */
280 _load_then_store_all_float_registers(&float_reg_set);
281
282 /*
283 * Relinquish the processor for the remainder of the current
284 * system clock tick, so that lower priority threads get a
285 * chance to run.
286 *
287 * This exercises the ability of the kernel to restore the
288 * FPU state of a low priority thread _and_ the ability of the
289 * kernel to provide a "clean" FPU state to this thread
290 * once the sleep ends.
291 */
292 k_sleep(K_MSEC(1));
293
294 /* Periodically issue progress report */
295 if ((load_store_high_count % 100) == 0) {
296 PRINT_DATA("Load and store OK after %u (high) "
297 "+ %u (low) tests\n",
298 load_store_high_count,
299 load_store_low_count);
300 }
301 }
302
303 #ifdef CONFIG_COVERAGE_GCOV
304 gcov_coverage_dump();
305 #endif
306
307 /* Signal end of test */
308 test_exited = true;
309 k_sem_give(&test_exit_sem);
310 }
311
312 K_THREAD_DEFINE(load_low, THREAD_STACK_SIZE, load_store_low, NULL, NULL, NULL,
313 THREAD_LOW_PRIORITY, THREAD_FP_FLAGS, K_TICKS_FOREVER);
314
315 K_THREAD_DEFINE(load_high, THREAD_STACK_SIZE, load_store_high, NULL, NULL, NULL,
316 THREAD_HIGH_PRIORITY, THREAD_FP_FLAGS, K_TICKS_FOREVER);
317
ZTEST(fpu_sharing_generic,test_load_store)318 ZTEST(fpu_sharing_generic, test_load_store)
319 {
320 /* Initialise test states */
321 test_exited = false;
322 k_sem_reset(&test_exit_sem);
323
324 /* Start test threads */
325 k_thread_start(load_low);
326 k_thread_start(load_high);
327
328 /* Wait for test threads to exit */
329 k_sem_take(&test_exit_sem, K_FOREVER);
330 }
331