1 /*
2 * Copyright (c) 2010-2014 Wind River Systems, Inc.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 /**
8 * @file
9 * @brief Floating point register sharing routines
10 *
11 * This module allows multiple preemptible threads to safely share the system's
12 * floating point registers, by allowing the system to save FPU state info
13 * in a thread's stack region when a preemptive context switch occurs.
14 *
15 * Note: If the kernel has been built without floating point register sharing
16 * support (CONFIG_FPU_SHARING), the floating point registers can still be used
17 * safely by one or more cooperative threads OR by a single preemptive thread,
18 * but not by both.
19 *
20 * This code is not necessary for systems with CONFIG_EAGER_FPU_SHARING, as
21 * the floating point context is unconditionally saved/restored with every
22 * context switch.
23 *
24 * The floating point register sharing mechanism is designed for minimal
25 * intrusiveness. Floating point state saving is only performed for threads
26 * that explicitly indicate they are using FPU registers, to avoid impacting
27 * the stack size requirements of all other threads. Also, the SSE registers
28 * are only saved for threads that actually used them. For those threads that
29 * do require floating point state saving, a "lazy save/restore" mechanism
30 * is employed so that the FPU's register sets are only switched in and out
31 * when absolutely necessary; this avoids wasting effort preserving them when
32 * there is no risk that they will be altered, or when there is no need to
33 * preserve their contents.
34 *
35 * WARNING
36 * The use of floating point instructions by ISRs is not supported by the
37 * kernel.
38 *
39 * INTERNAL
40 * The kernel sets CR0[TS] to 0 only for threads that require FP register
41 * sharing. All other threads have CR0[TS] set to 1 so that an attempt
42 * to perform an FP operation will cause an exception, allowing the kernel
43 * to enable FP register sharing on its behalf.
44 */
45
46 #include <zephyr/kernel.h>
47 #include <kernel_internal.h>
48
49 /* SSE control/status register default value (used by assembler code) */
50 extern uint32_t _sse_mxcsr_default_value;
51
52 /**
53 * @brief Disallow use of floating point capabilities
54 *
55 * This routine sets CR0[TS] to 1, which disallows the use of FP instructions
56 * by the currently executing thread.
57 */
z_FpAccessDisable(void)58 static inline void z_FpAccessDisable(void)
59 {
60 void *tempReg;
61
62 __asm__ volatile(
63 "movl %%cr0, %0;\n\t"
64 "orl $0x8, %0;\n\t"
65 "movl %0, %%cr0;\n\t"
66 : "=r"(tempReg)
67 :
68 : "memory");
69 }
70
71
72 /**
73 * @brief Save non-integer context information
74 *
75 * This routine saves the system's "live" non-integer context into the
76 * specified area. If the specified thread supports SSE then
77 * x87/MMX/SSEx thread info is saved, otherwise only x87/MMX thread is saved.
78 * Function is invoked by FpCtxSave(struct k_thread *thread)
79 */
z_do_fp_regs_save(void * preemp_float_reg)80 static inline void z_do_fp_regs_save(void *preemp_float_reg)
81 {
82 __asm__ volatile("fnsave (%0);\n\t"
83 :
84 : "r"(preemp_float_reg)
85 : "memory");
86 }
87
88 /**
89 * @brief Save non-integer context information
90 *
91 * This routine saves the system's "live" non-integer context into the
92 * specified area. If the specified thread supports SSE then
93 * x87/MMX/SSEx thread info is saved, otherwise only x87/MMX thread is saved.
94 * Function is invoked by FpCtxSave(struct k_thread *thread)
95 */
z_do_fp_and_sse_regs_save(void * preemp_float_reg)96 static inline void z_do_fp_and_sse_regs_save(void *preemp_float_reg)
97 {
98 __asm__ volatile("fxsave (%0);\n\t"
99 :
100 : "r"(preemp_float_reg)
101 : "memory");
102 }
103
104 /**
105 * @brief Initialize floating point register context information.
106 *
107 * This routine initializes the system's "live" floating point registers.
108 */
z_do_fp_regs_init(void)109 static inline void z_do_fp_regs_init(void)
110 {
111 __asm__ volatile("fninit\n\t");
112 }
113
114 /**
115 * @brief Initialize SSE register context information.
116 *
117 * This routine initializes the system's "live" SSE registers.
118 */
z_do_sse_regs_init(void)119 static inline void z_do_sse_regs_init(void)
120 {
121 __asm__ volatile("ldmxcsr _sse_mxcsr_default_value\n\t");
122 }
123
124 /*
125 * Save a thread's floating point context information.
126 *
127 * This routine saves the system's "live" floating point context into the
128 * specified thread control block. The SSE registers are saved only if the
129 * thread is actually using them.
130 */
FpCtxSave(struct k_thread * thread)131 static void FpCtxSave(struct k_thread *thread)
132 {
133 #ifdef CONFIG_X86_SSE
134 if ((thread->base.user_options & K_SSE_REGS) != 0) {
135 z_do_fp_and_sse_regs_save(&thread->arch.preempFloatReg);
136 return;
137 }
138 #endif
139 z_do_fp_regs_save(&thread->arch.preempFloatReg);
140 }
141
142 /*
143 * Initialize a thread's floating point context information.
144 *
145 * This routine initializes the system's "live" floating point context.
146 * The SSE registers are initialized only if the thread is actually using them.
147 */
FpCtxInit(struct k_thread * thread)148 static inline void FpCtxInit(struct k_thread *thread)
149 {
150 z_do_fp_regs_init();
151 #ifdef CONFIG_X86_SSE
152 if ((thread->base.user_options & K_SSE_REGS) != 0) {
153 z_do_sse_regs_init();
154 }
155 #endif
156 }
157
158 /*
159 * Enable preservation of floating point context information.
160 *
161 * The transition from "non-FP supporting" to "FP supporting" must be done
162 * atomically to avoid confusing the floating point logic used by z_swap(), so
163 * this routine locks interrupts to ensure that a context switch does not occur.
164 * The locking isn't really needed when the routine is called by a cooperative
165 * thread (since context switching can't occur), but it is harmless.
166 */
z_float_enable(struct k_thread * thread,unsigned int options)167 void z_float_enable(struct k_thread *thread, unsigned int options)
168 {
169 unsigned int imask;
170 struct k_thread *fp_owner;
171
172 if (!thread) {
173 return;
174 }
175
176 /* Ensure a preemptive context switch does not occur */
177
178 imask = irq_lock();
179
180 /* Indicate thread requires floating point context saving */
181
182 thread->base.user_options |= (uint8_t)options;
183 /*
184 * The current thread might not allow FP instructions, so clear CR0[TS]
185 * so we can use them. (CR0[TS] gets restored later on, if necessary.)
186 */
187
188 __asm__ volatile("clts\n\t");
189
190 /*
191 * Save existing floating point context (since it is about to change),
192 * but only if the FPU is "owned" by an FP-capable task that is
193 * currently handling an interrupt or exception (meaning its FP context
194 * must be preserved).
195 */
196
197 fp_owner = _kernel.current_fp;
198 if (fp_owner != NULL) {
199 if ((fp_owner->arch.flags & X86_THREAD_FLAG_ALL) != 0) {
200 FpCtxSave(fp_owner);
201 }
202 }
203
204 /* Now create a virgin FP context */
205
206 FpCtxInit(thread);
207
208 /* Associate the new FP context with the specified thread */
209
210 if (thread == _current) {
211 /*
212 * When enabling FP support for the current thread, just claim
213 * ownership of the FPU and leave CR0[TS] unset.
214 *
215 * (The FP context is "live" in hardware, not saved in TCS.)
216 */
217
218 _kernel.current_fp = thread;
219 } else {
220 /*
221 * When enabling FP support for someone else, assign ownership
222 * of the FPU to them (unless we need it ourselves).
223 */
224
225 if ((_current->base.user_options & _FP_USER_MASK) == 0) {
226 /*
227 * We are not FP-capable, so mark FPU as owned by the
228 * thread we've just enabled FP support for, then
229 * disable our own FP access by setting CR0[TS] back
230 * to its original state.
231 */
232
233 _kernel.current_fp = thread;
234 z_FpAccessDisable();
235 } else {
236 /*
237 * We are FP-capable (and thus had FPU ownership on
238 * entry), so save the new FP context in their TCS,
239 * leave FPU ownership with self, and leave CR0[TS]
240 * unset.
241 *
242 * The saved FP context is needed in case the thread
243 * we enabled FP support for is currently pre-empted,
244 * since z_swap() uses it to restore FP context when
245 * the thread re-activates.
246 *
247 * Saving the FP context reinits the FPU, and thus
248 * our own FP context, but that's OK since it didn't
249 * need to be preserved. (i.e. We aren't currently
250 * handling an interrupt or exception.)
251 */
252
253 FpCtxSave(thread);
254 }
255 }
256
257 irq_unlock(imask);
258 }
259
260 /**
261 * Disable preservation of floating point context information.
262 *
263 * The transition from "FP supporting" to "non-FP supporting" must be done
264 * atomically to avoid confusing the floating point logic used by z_swap(), so
265 * this routine locks interrupts to ensure that a context switch does not occur.
266 * The locking isn't really needed when the routine is called by a cooperative
267 * thread (since context switching can't occur), but it is harmless.
268 */
z_float_disable(struct k_thread * thread)269 int z_float_disable(struct k_thread *thread)
270 {
271 unsigned int imask;
272
273 /* Ensure a preemptive context switch does not occur */
274
275 imask = irq_lock();
276
277 /* Disable all floating point capabilities for the thread */
278
279 thread->base.user_options &= ~_FP_USER_MASK;
280
281 if (thread == _current) {
282 z_FpAccessDisable();
283 _kernel.current_fp = (struct k_thread *)0;
284 } else {
285 if (_kernel.current_fp == thread) {
286 _kernel.current_fp = (struct k_thread *)0;
287 }
288 }
289
290 irq_unlock(imask);
291
292 return 0;
293 }
294
295 /*
296 * Handler for "device not available" exception.
297 *
298 * This routine is registered to handle the "device not available" exception
299 * (vector = 7).
300 *
301 * The processor will generate this exception if any x87 FPU, MMX, or SSEx
302 * instruction is executed while CR0[TS]=1. The handler then enables the
303 * current thread to use all supported floating point registers.
304 */
_FpNotAvailableExcHandler(struct arch_esf * pEsf)305 void _FpNotAvailableExcHandler(struct arch_esf *pEsf)
306 {
307 ARG_UNUSED(pEsf);
308
309 /*
310 * Assume the exception did not occur in an ISR.
311 * (In other words, CPU cycles will not be consumed to perform
312 * error checking to ensure the exception was not generated in an ISR.)
313 */
314
315 /* Enable highest level of FP capability configured into the kernel */
316
317 k_float_enable(_current, _FP_USER_MASK);
318 }
319 _EXCEPTION_CONNECT_NOCODE(_FpNotAvailableExcHandler,
320 IV_DEVICE_NOT_AVAILABLE, 0);
321