1 /*
2  * Copyright (c) 2011-2014 Wind River Systems, Inc.
3  * Copyright (c) 2020 Stephanos Ioannidis <root@stephanos.io>
4  *
5  * SPDX-License-Identifier: Apache-2.0
6  */
7 
8 /*
9  * @file
10  * @brief load/store portion of FPU sharing test
11  *
12  * @defgroup kernel_fpsharing_tests FP Sharing Tests
13  *
14  * @ingroup all_tests
15  *
16  * This module implements the load/store portion of the  FPU sharing test. This
17  * version of this test utilizes a pair of tasks.
18  *
19  * The load/store test validates the floating point unit context
20  * save/restore mechanism. This test utilizes a pair of threads of different
21  * priorities that each use the floating point registers. The context
22  * switching that occurs exercises the kernel's ability to properly preserve
23  * the floating point registers. The test also exercises the kernel's ability
24  * to automatically enable floating point support for a task, if supported.
25  *
26  * FUTURE IMPROVEMENTS
27  * On architectures where the non-integer capabilities are provided in a
28  * hierarchy, for example on IA-32 the USE_FP and USE_SSE options are provided,
29  * this test should be enhanced to ensure that the architectures' z_swap()
30  * routine doesn't context switch more registers that it needs to (which would
31  * represent a performance issue).  For example, on the IA-32, the test should
32  * issue a k_fp_disable() from main(), and then indicate that only x87 FPU
33  * registers will be utilized (k_fp_enable()).  The thread should continue
34  * to load ALL non-integer registers, but main() should validate that only the
35  * x87 FPU registers are being saved/restored.
36  */
37 
38 #include <zephyr/ztest.h>
39 #include <zephyr/debug/gcov.h>
40 
41 #if defined(CONFIG_X86)
42 #if defined(__GNUC__)
43 #include "float_regs_x86_gcc.h"
44 #else
45 #include "float_regs_x86_other.h"
46 #endif /* __GNUC__ */
47 #elif defined(CONFIG_ARM)
48 #if defined(CONFIG_ARMV7_M_ARMV8_M_FP) || defined(CONFIG_ARMV7_R_FP) || defined(CONFIG_CPU_HAS_VFP)
49 #if defined(__GNUC__) || defined(__ICCARM__)
50 #include "float_regs_arm_gcc.h"
51 #else
52 #include "float_regs_arm_other.h"
53 #endif /* __GNUC__ */
54 #endif
55 #elif defined(CONFIG_ARM64)
56 #if defined(__GNUC__)
57 #include "float_regs_arm64_gcc.h"
58 #else
59 #include "float_regs_arm64_other.h"
60 #endif /* __GNUC__ */
61 #elif defined(CONFIG_ISA_ARCV2)
62 #if defined(__GNUC__)
63 #include "float_regs_arc_gcc.h"
64 #else
65 #include "float_regs_arc_other.h"
66 #endif /* __GNUC__ */
67 #elif defined(CONFIG_RISCV)
68 #if defined(__GNUC__)
69 #include "float_regs_riscv_gcc.h"
70 #else
71 #include "float_regs_riscv_other.h"
72 #endif /* __GNUC__ */
73 #elif defined(CONFIG_SPARC)
74 #include "float_regs_sparc.h"
75 #elif defined(CONFIG_XTENSA)
76 #include "float_regs_xtensa.h"
77 #endif
78 
79 #include "float_context.h"
80 #include "test_common.h"
81 
82 /* space for float register load/store area used by low priority task */
83 static struct fp_register_set float_reg_set_load;
84 static struct fp_register_set float_reg_set_store;
85 
86 /* space for float register load/store area used by high priority thread */
87 static struct fp_register_set float_reg_set;
88 
89 /*
90  * Test counters are "volatile" because GCC may not update them properly
91  * otherwise. (See description of pi calculation test for more details.)
92  */
93 static volatile unsigned int load_store_low_count;
94 static volatile unsigned int load_store_high_count;
95 
96 /* Indicates that the load/store test exited */
97 static volatile bool test_exited;
98 
99 /* Semaphore for signaling end of test */
100 static K_SEM_DEFINE(test_exit_sem, 0, 1);
101 
102 /**
103  * @brief Low priority FPU load/store thread
104  *
105  * @ingroup kernel_fpsharing_tests
106  *
107  * @see k_sched_time_slice_set(), memset(),
108  * _load_all_float_registers(), _store_all_float_registers()
109  */
load_store_low(void)110 static void load_store_low(void)
111 {
112 	unsigned int i;
113 	bool error = false;
114 	unsigned char init_byte;
115 	unsigned char *store_ptr = (unsigned char *)&float_reg_set_store;
116 	unsigned char *load_ptr = (unsigned char *)&float_reg_set_load;
117 
118 	volatile char volatile_stack_var = 0;
119 
120 	/*
121 	 * Initialize floating point load buffer to known values;
122 	 * these values must be different than the value used in other threads.
123 	 */
124 	init_byte = MAIN_FLOAT_REG_CHECK_BYTE;
125 	for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
126 		load_ptr[i] = init_byte++;
127 	}
128 
129 	/* Loop until the test finishes, or an error is detected. */
130 	for (load_store_low_count = 0; !test_exited; load_store_low_count++) {
131 
132 		/*
133 		 * Clear store buffer to erase all traces of any previous
134 		 * floating point values that have been saved.
135 		 */
136 		(void)memset(&float_reg_set_store, 0, SIZEOF_FP_REGISTER_SET);
137 
138 		/*
139 		 * Utilize an architecture specific function to load all the
140 		 * floating point registers with known values.
141 		 */
142 		_load_all_float_registers(&float_reg_set_load);
143 
144 		/*
145 		 * Waste some cycles to give the high priority load/store
146 		 * thread an opportunity to run when the low priority thread is
147 		 * using the floating point registers.
148 		 *
149 		 * IMPORTANT: This logic requires that sys_clock_tick_get_32() not
150 		 * perform any floating point operations!
151 		 */
152 		while ((sys_clock_tick_get_32() % 5) != 0) {
153 			/*
154 			 * Use a volatile variable to prevent compiler
155 			 * optimizing out the spin loop.
156 			 */
157 			++volatile_stack_var;
158 		}
159 
160 		/*
161 		 * Utilize an architecture specific function to dump the
162 		 * contents of all floating point registers to memory.
163 		 */
164 		_store_all_float_registers(&float_reg_set_store);
165 
166 		/*
167 		 * Compare each byte of buffer to ensure the expected value is
168 		 * present, indicating that the floating point registers weren't
169 		 * impacted by the operation of the high priority thread(s).
170 		 *
171 		 * Display error message and terminate if discrepancies are
172 		 * detected.
173 		 */
174 		init_byte = MAIN_FLOAT_REG_CHECK_BYTE;
175 
176 		for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
177 			if (store_ptr[i] != init_byte) {
178 				TC_ERROR("Found 0x%x instead of 0x%x @ "
179 					 "offset 0x%x\n",
180 					 store_ptr[i],
181 					 init_byte, i);
182 				TC_ERROR("Discrepancy found during "
183 					 "iteration %d\n",
184 					 load_store_low_count);
185 				error = true;
186 			}
187 			init_byte++;
188 		}
189 
190 		/* Terminate if a test error has been reported */
191 		zassert_false(error);
192 
193 		/*
194 		 * After every 1000 iterations (arbitrarily chosen), explicitly
195 		 * disable floating point operations for the task.
196 		 */
197 #if (defined(CONFIG_X86) && defined(CONFIG_LAZY_FPU_SHARING)) || \
198 		defined(CONFIG_ARMV7_M_ARMV8_M_FP) || defined(CONFIG_ARMV7_R_FP)
199 		/*
200 		 * In x86:
201 		 * The subsequent execution of _load_all_float_registers() will
202 		 * result in an exception to automatically re-enable
203 		 * floating point support for the task.
204 		 *
205 		 * The purpose of this part of the test is to exercise the
206 		 * k_float_disable() API, and to also continue exercising
207 		 * the (exception based) floating enabling mechanism.
208 		 *
209 		 * In ARM:
210 		 *
211 		 * The routine k_float_disable() allows for thread-level
212 		 * granularity for disabling floating point. Furthermore, it
213 		 * is useful for testing automatic thread enabling of floating
214 		 * point as soon as FP registers are used, again by the thread.
215 		 */
216 		if ((load_store_low_count % 1000) == 0) {
217 			k_float_disable(k_current_get());
218 		}
219 #endif
220 	}
221 }
222 
223 /**
224  * @brief High priority FPU load/store thread
225  *
226  * @ingroup kernel_fpsharing_tests
227  *
228  * @see _load_then_store_all_float_registers()
229  */
load_store_high(void)230 static void load_store_high(void)
231 {
232 	unsigned int i;
233 	unsigned char init_byte;
234 	unsigned char *reg_set_ptr = (unsigned char *)&float_reg_set;
235 
236 	/* Run the test until the specified maximum test count is reached */
237 	for (load_store_high_count = 0;
238 	     load_store_high_count <= MAX_TESTS;
239 	     load_store_high_count++) {
240 
241 		/*
242 		 * Initialize the float_reg_set structure by treating it as
243 		 * a simple array of bytes (the arrangement and actual number
244 		 * of registers is not important for this generic C code).  The
245 		 * structure is initialized by using the byte value specified
246 		 * by the constant FIBER_FLOAT_REG_CHECK_BYTE, and then
247 		 * incrementing the value for each successive location in the
248 		 * float_reg_set structure.
249 		 *
250 		 * The initial byte value, and thus the contents of the entire
251 		 * float_reg_set structure, must be different for each
252 		 * thread to effectively test the kernel's ability to
253 		 * properly save/restore the floating point values during a
254 		 * context switch.
255 		 */
256 		init_byte = FIBER_FLOAT_REG_CHECK_BYTE;
257 
258 		for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
259 			reg_set_ptr[i] = init_byte++;
260 		}
261 
262 		/*
263 		 * Utilize an architecture specific function to load all the
264 		 * floating point registers with the contents of the
265 		 * float_reg_set structure.
266 		 *
267 		 * The goal of the loading all floating point registers with
268 		 * values that differ from the values used in other threads is
269 		 * to help determine whether the floating point register
270 		 * save/restore mechanism in the kernel's context switcher
271 		 * is operating correctly.
272 		 *
273 		 * When a subsequent k_timer_test() invocation is
274 		 * performed, a (cooperative) context switch back to the
275 		 * preempted task will occur. This context switch should result
276 		 * in restoring the state of the task's floating point
277 		 * registers when the task was swapped out due to the
278 		 * occurrence of the timer tick.
279 		 */
280 		_load_then_store_all_float_registers(&float_reg_set);
281 
282 		/*
283 		 * Relinquish the processor for the remainder of the current
284 		 * system clock tick, so that lower priority threads get a
285 		 * chance to run.
286 		 *
287 		 * This exercises the ability of the kernel to restore the
288 		 * FPU state of a low priority thread _and_ the ability of the
289 		 * kernel to provide a "clean" FPU state to this thread
290 		 * once the sleep ends.
291 		 */
292 		k_sleep(K_MSEC(1));
293 
294 		/* Periodically issue progress report */
295 		if ((load_store_high_count % 100) == 0) {
296 			PRINT_DATA("Load and store OK after %u (high) "
297 				   "+ %u (low) tests\n",
298 				   load_store_high_count,
299 				   load_store_low_count);
300 		}
301 	}
302 
303 #ifdef CONFIG_COVERAGE_GCOV
304 	gcov_coverage_dump();
305 #endif
306 
307 	/* Signal end of test */
308 	test_exited = true;
309 	k_sem_give(&test_exit_sem);
310 }
311 
312 K_THREAD_DEFINE(load_low, THREAD_STACK_SIZE, load_store_low, NULL, NULL, NULL,
313 		THREAD_LOW_PRIORITY, THREAD_FP_FLAGS, K_TICKS_FOREVER);
314 
315 K_THREAD_DEFINE(load_high, THREAD_STACK_SIZE, load_store_high, NULL, NULL, NULL,
316 		THREAD_HIGH_PRIORITY, THREAD_FP_FLAGS, K_TICKS_FOREVER);
317 
ZTEST(fpu_sharing_generic,test_load_store)318 ZTEST(fpu_sharing_generic, test_load_store)
319 {
320 	/* Initialise test states */
321 	test_exited = false;
322 	k_sem_reset(&test_exit_sem);
323 
324 	/* Start test threads */
325 	k_thread_start(load_low);
326 	k_thread_start(load_high);
327 
328 	/* Wait for test threads to exit */
329 	k_sem_take(&test_exit_sem, K_FOREVER);
330 }
331