1 /*
2  * Copyright (c) 2010-2014 Wind River Systems, Inc.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /**
8  * @file
9  * @brief Floating point register sharing routines
10  *
11  * This module allows multiple preemptible threads to safely share the system's
12  * floating point registers, by allowing the system to save FPU state info
13  * in a thread's stack region when a preemptive context switch occurs.
14  *
15  * Note: If the kernel has been built without floating point register sharing
16  * support (CONFIG_FPU_SHARING), the floating point registers can still be used
17  * safely by one or more cooperative threads OR by a single preemptive thread,
18  * but not by both.
19  *
20  * This code is not necessary for systems with CONFIG_EAGER_FPU_SHARING, as
21  * the floating point context is unconditionally saved/restored with every
22  * context switch.
23  *
24  * The floating point register sharing mechanism is designed for minimal
25  * intrusiveness.  Floating point state saving is only performed for threads
26  * that explicitly indicate they are using FPU registers, to avoid impacting
27  * the stack size requirements of all other threads. Also, the SSE registers
28  * are only saved for threads that actually used them. For those threads that
29  * do require floating point state saving, a "lazy save/restore" mechanism
30  * is employed so that the FPU's register sets are only switched in and out
31  * when absolutely necessary; this avoids wasting effort preserving them when
32  * there is no risk that they will be altered, or when there is no need to
33  * preserve their contents.
34  *
35  * WARNING
36  * The use of floating point instructions by ISRs is not supported by the
37  * kernel.
38  *
39  * INTERNAL
40  * The kernel sets CR0[TS] to 0 only for threads that require FP register
41  * sharing. All other threads have CR0[TS] set to 1 so that an attempt
42  * to perform an FP operation will cause an exception, allowing the kernel
43  * to enable FP register sharing on its behalf.
44  */
45 
46 #include <zephyr/kernel.h>
47 #include <kernel_internal.h>
48 
49 /* SSE control/status register default value (used by assembler code) */
50 extern uint32_t _sse_mxcsr_default_value;
51 
52 /**
53  * @brief Disallow use of floating point capabilities
54  *
55  * This routine sets CR0[TS] to 1, which disallows the use of FP instructions
56  * by the currently executing thread.
57  */
z_FpAccessDisable(void)58 static inline void z_FpAccessDisable(void)
59 {
60 	void *tempReg;
61 
62 	__asm__ volatile(
63 		"movl %%cr0, %0;\n\t"
64 		"orl $0x8, %0;\n\t"
65 		"movl %0, %%cr0;\n\t"
66 		: "=r"(tempReg)
67 		:
68 		: "memory");
69 }
70 
71 
72 /**
73  * @brief Save non-integer context information
74  *
75  * This routine saves the system's "live" non-integer context into the
76  * specified area.  If the specified thread supports SSE then
77  * x87/MMX/SSEx thread info is saved, otherwise only x87/MMX thread is saved.
78  * Function is invoked by FpCtxSave(struct k_thread *thread)
79  */
z_do_fp_regs_save(void * preemp_float_reg)80 static inline void z_do_fp_regs_save(void *preemp_float_reg)
81 {
82 	__asm__ volatile("fnsave (%0);\n\t"
83 			 :
84 			 : "r"(preemp_float_reg)
85 			 : "memory");
86 }
87 
88 /**
89  * @brief Save non-integer context information
90  *
91  * This routine saves the system's "live" non-integer context into the
92  * specified area.  If the specified thread supports SSE then
93  * x87/MMX/SSEx thread info is saved, otherwise only x87/MMX thread is saved.
94  * Function is invoked by FpCtxSave(struct k_thread *thread)
95  */
z_do_fp_and_sse_regs_save(void * preemp_float_reg)96 static inline void z_do_fp_and_sse_regs_save(void *preemp_float_reg)
97 {
98 	__asm__ volatile("fxsave (%0);\n\t"
99 			 :
100 			 : "r"(preemp_float_reg)
101 			 : "memory");
102 }
103 
104 /**
105  * @brief Initialize floating point register context information.
106  *
107  * This routine initializes the system's "live" floating point registers.
108  */
z_do_fp_regs_init(void)109 static inline void z_do_fp_regs_init(void)
110 {
111 	__asm__ volatile("fninit\n\t");
112 }
113 
114 /**
115  * @brief Initialize SSE register context information.
116  *
117  * This routine initializes the system's "live" SSE registers.
118  */
z_do_sse_regs_init(void)119 static inline void z_do_sse_regs_init(void)
120 {
121 	__asm__ volatile("ldmxcsr _sse_mxcsr_default_value\n\t");
122 }
123 
124 /*
125  * Save a thread's floating point context information.
126  *
127  * This routine saves the system's "live" floating point context into the
128  * specified thread control block. The SSE registers are saved only if the
129  * thread is actually using them.
130  */
FpCtxSave(struct k_thread * thread)131 static void FpCtxSave(struct k_thread *thread)
132 {
133 #ifdef CONFIG_X86_SSE
134 	if ((thread->base.user_options & K_SSE_REGS) != 0) {
135 		z_do_fp_and_sse_regs_save(&thread->arch.preempFloatReg);
136 		return;
137 	}
138 #endif
139 	z_do_fp_regs_save(&thread->arch.preempFloatReg);
140 }
141 
142 /*
143  * Initialize a thread's floating point context information.
144  *
145  * This routine initializes the system's "live" floating point context.
146  * The SSE registers are initialized only if the thread is actually using them.
147  */
FpCtxInit(struct k_thread * thread)148 static inline void FpCtxInit(struct k_thread *thread)
149 {
150 	z_do_fp_regs_init();
151 #ifdef CONFIG_X86_SSE
152 	if ((thread->base.user_options & K_SSE_REGS) != 0) {
153 		z_do_sse_regs_init();
154 	}
155 #endif
156 }
157 
158 /*
159  * Enable preservation of floating point context information.
160  *
161  * The transition from "non-FP supporting" to "FP supporting" must be done
162  * atomically to avoid confusing the floating point logic used by z_swap(), so
163  * this routine locks interrupts to ensure that a context switch does not occur.
164  * The locking isn't really needed when the routine is called by a cooperative
165  * thread (since context switching can't occur), but it is harmless.
166  */
z_float_enable(struct k_thread * thread,unsigned int options)167 void z_float_enable(struct k_thread *thread, unsigned int options)
168 {
169 	unsigned int imask;
170 	struct k_thread *fp_owner;
171 
172 	if (!thread) {
173 		return;
174 	}
175 
176 	/* Ensure a preemptive context switch does not occur */
177 
178 	imask = irq_lock();
179 
180 	/* Indicate thread requires floating point context saving */
181 
182 	thread->base.user_options |= (uint8_t)options;
183 	/*
184 	 * The current thread might not allow FP instructions, so clear CR0[TS]
185 	 * so we can use them. (CR0[TS] gets restored later on, if necessary.)
186 	 */
187 
188 	__asm__ volatile("clts\n\t");
189 
190 	/*
191 	 * Save existing floating point context (since it is about to change),
192 	 * but only if the FPU is "owned" by an FP-capable task that is
193 	 * currently handling an interrupt or exception (meaning its FP context
194 	 * must be preserved).
195 	 */
196 
197 	fp_owner = _kernel.current_fp;
198 	if (fp_owner != NULL) {
199 		if ((fp_owner->arch.flags & X86_THREAD_FLAG_ALL) != 0) {
200 			FpCtxSave(fp_owner);
201 		}
202 	}
203 
204 	/* Now create a virgin FP context */
205 
206 	FpCtxInit(thread);
207 
208 	/* Associate the new FP context with the specified thread */
209 
210 	if (thread == _current) {
211 		/*
212 		 * When enabling FP support for the current thread, just claim
213 		 * ownership of the FPU and leave CR0[TS] unset.
214 		 *
215 		 * (The FP context is "live" in hardware, not saved in TCS.)
216 		 */
217 
218 		_kernel.current_fp = thread;
219 	} else {
220 		/*
221 		 * When enabling FP support for someone else, assign ownership
222 		 * of the FPU to them (unless we need it ourselves).
223 		 */
224 
225 		if ((_current->base.user_options & _FP_USER_MASK) == 0) {
226 			/*
227 			 * We are not FP-capable, so mark FPU as owned by the
228 			 * thread we've just enabled FP support for, then
229 			 * disable our own FP access by setting CR0[TS] back
230 			 * to its original state.
231 			 */
232 
233 			_kernel.current_fp = thread;
234 			z_FpAccessDisable();
235 		} else {
236 			/*
237 			 * We are FP-capable (and thus had FPU ownership on
238 			 * entry), so save the new FP context in their TCS,
239 			 * leave FPU ownership with self, and leave CR0[TS]
240 			 * unset.
241 			 *
242 			 * The saved FP context is needed in case the thread
243 			 * we enabled FP support for is currently pre-empted,
244 			 * since z_swap() uses it to restore FP context when
245 			 * the thread re-activates.
246 			 *
247 			 * Saving the FP context reinits the FPU, and thus
248 			 * our own FP context, but that's OK since it didn't
249 			 * need to be preserved. (i.e. We aren't currently
250 			 * handling an interrupt or exception.)
251 			 */
252 
253 			FpCtxSave(thread);
254 		}
255 	}
256 
257 	irq_unlock(imask);
258 }
259 
260 /**
261  * Disable preservation of floating point context information.
262  *
263  * The transition from "FP supporting" to "non-FP supporting" must be done
264  * atomically to avoid confusing the floating point logic used by z_swap(), so
265  * this routine locks interrupts to ensure that a context switch does not occur.
266  * The locking isn't really needed when the routine is called by a cooperative
267  * thread (since context switching can't occur), but it is harmless.
268  */
z_float_disable(struct k_thread * thread)269 int z_float_disable(struct k_thread *thread)
270 {
271 	unsigned int imask;
272 
273 	/* Ensure a preemptive context switch does not occur */
274 
275 	imask = irq_lock();
276 
277 	/* Disable all floating point capabilities for the thread */
278 
279 	thread->base.user_options &= ~_FP_USER_MASK;
280 
281 	if (thread == _current) {
282 		z_FpAccessDisable();
283 		_kernel.current_fp = (struct k_thread *)0;
284 	} else {
285 		if (_kernel.current_fp == thread) {
286 			_kernel.current_fp = (struct k_thread *)0;
287 		}
288 	}
289 
290 	irq_unlock(imask);
291 
292 	return 0;
293 }
294 
295 /*
296  * Handler for "device not available" exception.
297  *
298  * This routine is registered to handle the "device not available" exception
299  * (vector = 7).
300  *
301  * The processor will generate this exception if any x87 FPU, MMX, or SSEx
302  * instruction is executed while CR0[TS]=1. The handler then enables the
303  * current thread to use all supported floating point registers.
304  */
_FpNotAvailableExcHandler(struct arch_esf * pEsf)305 void _FpNotAvailableExcHandler(struct arch_esf *pEsf)
306 {
307 	ARG_UNUSED(pEsf);
308 
309 	/*
310 	 * Assume the exception did not occur in an ISR.
311 	 * (In other words, CPU cycles will not be consumed to perform
312 	 * error checking to ensure the exception was not generated in an ISR.)
313 	 */
314 
315 	/* Enable highest level of FP capability configured into the kernel */
316 
317 	k_float_enable(_current, _FP_USER_MASK);
318 }
319 _EXCEPTION_CONNECT_NOCODE(_FpNotAvailableExcHandler,
320 		IV_DEVICE_NOT_AVAILABLE, 0);
321