1/*
2 * Copyright (c) 2010-2015 Wind River Systems, Inc.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7/**
8 * @file
9 * @brief Kernel swapper code for IA-32
10 *
11 * This module implements the arch_swap() routine for the IA-32 architecture.
12 */
13
14#include <zephyr/arch/x86/ia32/asm.h>
15#include <zephyr/kernel.h>
16#include <zephyr/arch/cpu.h>
17#include <kernel_arch_data.h>
18#include <offsets_short.h>
19
20	/* exports (internal APIs) */
21
22	GTEXT(arch_swap)
23	GTEXT(z_x86_thread_entry_wrapper)
24	GTEXT(_x86_user_thread_entry_wrapper)
25
26	/* externs */
27#if !defined(CONFIG_X86_KPTI) && defined(CONFIG_X86_USERSPACE)
28	GTEXT(z_x86_swap_update_page_tables)
29#endif
30	GDATA(_k_neg_eagain)
31
32/*
33 * Given that arch_swap() is called to effect a cooperative context switch,
34 * only the non-volatile integer registers need to be saved in the TCS of the
35 * outgoing thread.  The restoration of the integer registers of the incoming
36 * thread depends on whether that thread was preemptively context switched out.
37 * The X86_THREAD_FLAG_INT and _EXC bits in the k_thread->arch.flags field will
38 * signify that the thread was preemptively context switched out, and thus both
39 * the volatile and non-volatile integer registers need to be restored.
40 *
41 * The non-volatile registers need to be scrubbed to ensure they contain no
42 * sensitive information that could compromise system security.  This is to
43 * make sure that information will not be leaked from one application to
44 * another via these volatile registers.
45 *
46 * Here, the integer registers (EAX, ECX, EDX) have been scrubbed.  Any changes
47 * to this routine that alter the values of these registers MUST be reviewed
48 * for potential security impacts.
49 *
50 * Floating point registers are handled using a lazy save/restore mechanism
51 * since it's expected relatively few threads will be created with the
52 * K_FP_REGS or K_SSE_REGS option bits.  The kernel data structure maintains a
53 * 'current_fp' field to keep track of the thread that "owns" the floating
54 * point registers.  Floating point registers consist of ST0->ST7 (x87 FPU and
55 * MMX registers) and XMM0 -> XMM7.
56 *
57 * All floating point registers are considered 'volatile' thus they will only
58 * be saved/restored when a preemptive context switch occurs.
59 *
60 * Floating point registers are currently NOT scrubbed, and are subject to
61 * potential security leaks.
62 *
63 * C function prototype:
64 *
65 * unsigned int arch_swap (unsigned int eflags);
66 */
67
68SECTION_FUNC(PINNED_TEXT, arch_swap)
69#if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING)
70	pushl	%eax
71	call	z_thread_mark_switched_out
72	popl	%eax
73#endif
74	/*
75	 * Push all non-volatile registers onto the stack; do not copy
76	 * any of these registers into the k_thread.  Only the 'esp' register
77	 * after all the pushes have been performed) will be stored in the
78	 * k_thread.
79	 */
80
81	pushl	%edi
82
83	movl	$_kernel, %edi
84
85	pushl	%esi
86	pushl	%ebx
87	pushl	%ebp
88
89	/*
90	 * Carve space for the return value. Setting it to a default of
91	 * -EAGAIN eliminates the need for the timeout code to set it.
92	 * If another value is ever needed, it can be modified with
93	 * arch_thread_return_value_set().
94	 */
95
96	pushl   _k_neg_eagain
97
98
99	/* save esp into k_thread structure */
100
101	movl	_kernel_offset_to_current(%edi), %edx
102	movl	%esp, _thread_offset_to_esp(%edx)
103	movl	_kernel_offset_to_ready_q_cache(%edi), %eax
104
105	/*
106	 * At this point, the %eax register contains the 'k_thread *' of the
107	 * thread to be swapped in, and %edi still contains &_kernel. %edx
108	 * has the pointer to the outgoing thread.
109	 */
110#if defined(CONFIG_X86_USERSPACE) && !defined(CONFIG_X86_KPTI)
111
112	push	%eax
113	call	z_x86_swap_update_page_tables
114	pop	%eax
115
116	/* Page tables updated. All memory access after this point needs to be
117	 * to memory that has the same mappings and access attributes wrt
118	 * supervisor mode!
119	 */
120#endif
121
122#ifdef CONFIG_EAGER_FPU_SHARING
123	/* Eager floating point state restore logic
124	 *
125	 * Addresses CVE-2018-3665
126	 * Used as an alternate to CONFIG_LAZY_FPU_SHARING if there is any
127	 * sensitive data in the floating point/SIMD registers in a system
128	 * with untrusted threads.
129	 *
130	 * Unconditionally save/restore floating point registers on context
131	 * switch.
132	 */
133	/* Save outgpoing thread context */
134#ifdef CONFIG_X86_SSE
135	fxsave	_thread_offset_to_preempFloatReg(%edx)
136	fninit
137#else
138	fnsave	 _thread_offset_to_preempFloatReg(%edx)
139#endif
140	/* Restore incoming thread context */
141#ifdef CONFIG_X86_SSE
142	fxrstor _thread_offset_to_preempFloatReg(%eax)
143#else
144	frstor _thread_offset_to_preempFloatReg(%eax)
145#endif /* CONFIG_X86_SSE */
146#elif defined(CONFIG_LAZY_FPU_SHARING)
147	/*
148	 * Clear the CR0[TS] bit (in the event the current thread
149	 * doesn't have floating point enabled) to prevent the "device not
150	 * available" exception when executing the subsequent fxsave/fnsave
151	 * and/or fxrstor/frstor instructions.
152	 *
153	 * Indeed, it's possible that none of the aforementioned instructions
154	 * need to be executed, for example, the incoming thread doesn't
155	 * utilize floating point operations.  However, the code responsible
156	 * for setting the CR0[TS] bit appropriately for the incoming thread
157	 * (just after the 'restoreContext_NoFloatSwap' label) will leverage
158	 * the fact that the following 'clts' was performed already.
159	 */
160
161	clts
162
163
164	/*
165	 * Determine whether the incoming thread utilizes floating point regs
166	 * _and_ whether the thread was context switched out preemptively.
167	 */
168
169	testb	$_FP_USER_MASK, _thread_offset_to_user_options(%eax)
170	je 	restoreContext_NoFloatSwap
171
172
173	/*
174	 * The incoming thread uses floating point registers:
175	 * Was it the last thread to use floating point registers?
176	 * If so, there there is no need to restore the floating point context.
177	 */
178
179	movl	_kernel_offset_to_current_fp(%edi), %ebx
180	cmpl	%ebx, %eax
181	je	restoreContext_NoFloatSwap
182
183
184	/*
185	 * The incoming thread uses floating point registers and it was _not_
186	 * the last thread to use those registers:
187	 * Check whether the current FP context actually needs to be saved
188	 * before swapping in the context of the incoming thread.
189	 */
190
191	testl	%ebx, %ebx
192	jz	restoreContext_NoFloatSave
193
194
195	/*
196	 * The incoming thread uses floating point registers and it was _not_
197	 * the last thread to use those registers _and_ the current FP context
198	 * needs to be saved.
199	 *
200	 * Given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are all
201	 * 'volatile', only save the registers if the "current FP context"
202	 * was preemptively context switched.
203	 */
204
205	testb	$X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%ebx)
206	je	restoreContext_NoFloatSave
207
208
209#ifdef CONFIG_X86_SSE
210	testb	$K_SSE_REGS, _thread_offset_to_user_options(%ebx)
211	je	x87FloatSave
212
213	/*
214	 * 'fxsave' does NOT perform an implicit 'fninit', therefore issue an
215	 * 'fninit' to ensure a "clean" FPU state for the incoming thread
216	 * (for the case when the fxrstor is not executed).
217	 */
218
219	fxsave	_thread_offset_to_preempFloatReg(%ebx)
220	fninit
221	jmp	floatSaveDone
222
223x87FloatSave:
224#endif /* CONFIG_X86_SSE */
225
226	/* 'fnsave' performs an implicit 'fninit' after saving state! */
227
228	fnsave	 _thread_offset_to_preempFloatReg(%ebx)
229
230	/* fall through to 'floatSaveDone' */
231
232floatSaveDone:
233restoreContext_NoFloatSave:
234
235	/*********************************************************
236	 * Restore floating point context of the incoming thread.
237	 *********************************************************/
238
239	/*
240	 * Again, given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are
241	 * all 'volatile', only restore the registers if the incoming thread
242	 * was previously preemptively context switched out.
243	 */
244
245	testb   $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%eax)
246	je 	restoreContext_NoFloatRestore
247
248#ifdef CONFIG_X86_SSE
249	testb	$K_SSE_REGS, _thread_offset_to_user_options(%eax)
250	je	x87FloatRestore
251
252	fxrstor	_thread_offset_to_preempFloatReg(%eax)
253	jmp	floatRestoreDone
254
255x87FloatRestore:
256
257#endif /* CONFIG_X86_SSE */
258
259	frstor	_thread_offset_to_preempFloatReg(%eax)
260
261	/* fall through to 'floatRestoreDone' */
262
263floatRestoreDone:
264restoreContext_NoFloatRestore:
265
266	/* record that the incoming thread "owns" the floating point registers */
267
268	movl	%eax, _kernel_offset_to_current_fp(%edi)
269
270
271	/*
272	 * Branch point when none of the floating point registers need to be
273	 * swapped because: a) the incoming thread does not use them OR
274	 * b) the incoming thread is the last thread that used those registers.
275	 */
276
277restoreContext_NoFloatSwap:
278
279	/*
280	 * Leave CR0[TS] clear if incoming thread utilizes the floating point
281	 * registers
282	 */
283
284	testb	$_FP_USER_MASK, _thread_offset_to_user_options(%eax)
285	jne	CROHandlingDone
286
287	/*
288	 * The incoming thread does NOT currently utilize the floating point
289	 * registers, so set CR0[TS] to ensure the "device not available"
290	 * exception occurs on the first attempt to access a x87 FPU, MMX,
291	 * or XMM register.
292	 */
293
294	movl %cr0, %edx
295	orl $0x8, %edx
296	movl %edx, %cr0
297
298CROHandlingDone:
299
300#endif /* CONFIG_LAZY_FPU_SHARING */
301
302	/* update _kernel.current to reflect incoming thread */
303
304	movl    %eax, _kernel_offset_to_current(%edi)
305
306#if defined(CONFIG_X86_USE_THREAD_LOCAL_STORAGE)
307	pushl	%eax
308
309	call	z_x86_tls_update_gdt
310
311	/* Since segment descriptor has changed, need to reload */
312	movw	$GS_TLS_SEG, %ax
313	movw	%ax, %gs
314
315	popl	%eax
316#endif
317
318	/* recover thread stack pointer from k_thread */
319
320	movl	_thread_offset_to_esp(%eax), %esp
321
322
323	/* load return value from a possible arch_thread_return_value_set() */
324
325	popl	%eax
326
327	/* pop the non-volatile registers from the stack */
328
329	popl	%ebp
330	popl	%ebx
331	popl	%esi
332	popl	%edi
333
334	/*
335	 * %eax may contain one of these values:
336	 *
337	 * - the return value for arch_swap() that was set up by a call to
338	 * arch_thread_return_value_set()
339	 * - -EINVAL
340	 */
341
342	/* Utilize the 'eflags' parameter to arch_swap() */
343
344	pushl	4(%esp)
345	popfl
346
347#if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING)
348	pushl	%eax
349	call	z_thread_mark_switched_in
350	popl	%eax
351#endif
352	ret
353
354#ifdef _THREAD_WRAPPER_REQUIRED
355/**
356 *
357 * @brief Adjust stack/parameters before invoking thread entry function
358 *
359 * This function adjusts the initial stack frame created by arch_new_thread()
360 * such that the GDB stack frame unwinders recognize it as the outermost frame
361 * in the thread's stack.
362 *
363 * GDB normally stops unwinding a stack when it detects that it has
364 * reached a function called main().  Kernel threads, however, do not have
365 * a main() function, and there does not appear to be a simple way of stopping
366 * the unwinding of the stack.
367 *
368 * Given the initial thread created by arch_new_thread(), GDB expects to find
369 * a return address on the stack immediately above the thread entry routine
370 * z_thread_entry, in the location occupied by the initial EFLAGS.  GDB
371 * attempts to examine the memory at this return address, which typically
372 * results in an invalid access to page 0 of memory.
373 *
374 * This function overwrites the initial EFLAGS with zero.  When GDB subsequently
375 * attempts to examine memory at address zero, the PeekPoke driver detects
376 * an invalid access to address zero and returns an error, which causes the
377 * GDB stack unwinder to stop somewhat gracefully.
378 *
379 * The initial EFLAGS cannot be overwritten until after z_swap() has swapped in
380 * the new thread for the first time.  This routine is called by z_swap() the
381 * first time that the new thread is swapped in, and it jumps to
382 * z_thread_entry after it has done its work.
383 *
384 *       __________________
385 *      |      param3      |   <------ Top of the stack
386 *      |__________________|
387 *      |      param2      |           Stack Grows Down
388 *      |__________________|                  |
389 *      |      param1      |                  V
390 *      |__________________|
391 *      |      pEntry      |
392 *      |__________________|
393 *      | initial EFLAGS   |  <----   ESP when invoked by z_swap()
394 *      |__________________|             (Zeroed by this routine)
395 *
396 * The address of the thread entry function needs to be in %edi when this is
397 * invoked. It will either be z_thread_entry, or if userspace is enabled,
398 * _arch_drop_to_user_mode if this is a user thread.
399 *
400 * @return this routine does NOT return.
401 */
402
403SECTION_FUNC(PINNED_TEXT, z_x86_thread_entry_wrapper)
404	movl	$0, (%esp)
405	jmp	*%edi
406#endif /* _THREAD_WRAPPER_REQUIRED */
407