1/* 2 * Copyright (c) 2010-2015 Wind River Systems, Inc. 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6 7/** 8 * @file 9 * @brief Kernel swapper code for IA-32 10 * 11 * This module implements the arch_swap() routine for the IA-32 architecture. 12 */ 13 14#include <zephyr/arch/x86/ia32/asm.h> 15#include <zephyr/kernel.h> 16#include <zephyr/arch/cpu.h> 17#include <kernel_arch_data.h> 18#include <offsets_short.h> 19 20 /* exports (internal APIs) */ 21 22 GTEXT(arch_swap) 23 GTEXT(z_x86_thread_entry_wrapper) 24 GTEXT(_x86_user_thread_entry_wrapper) 25 26 /* externs */ 27#if !defined(CONFIG_X86_KPTI) && defined(CONFIG_X86_USERSPACE) 28 GTEXT(z_x86_swap_update_page_tables) 29#endif 30 GDATA(_k_neg_eagain) 31 32/* 33 * Given that arch_swap() is called to effect a cooperative context switch, 34 * only the non-volatile integer registers need to be saved in the TCS of the 35 * outgoing thread. The restoration of the integer registers of the incoming 36 * thread depends on whether that thread was preemptively context switched out. 37 * The X86_THREAD_FLAG_INT and _EXC bits in the k_thread->arch.flags field will 38 * signify that the thread was preemptively context switched out, and thus both 39 * the volatile and non-volatile integer registers need to be restored. 40 * 41 * The non-volatile registers need to be scrubbed to ensure they contain no 42 * sensitive information that could compromise system security. This is to 43 * make sure that information will not be leaked from one application to 44 * another via these volatile registers. 45 * 46 * Here, the integer registers (EAX, ECX, EDX) have been scrubbed. Any changes 47 * to this routine that alter the values of these registers MUST be reviewed 48 * for potential security impacts. 49 * 50 * Floating point registers are handled using a lazy save/restore mechanism 51 * since it's expected relatively few threads will be created with the 52 * K_FP_REGS or K_SSE_REGS option bits. The kernel data structure maintains a 53 * 'current_fp' field to keep track of the thread that "owns" the floating 54 * point registers. Floating point registers consist of ST0->ST7 (x87 FPU and 55 * MMX registers) and XMM0 -> XMM7. 56 * 57 * All floating point registers are considered 'volatile' thus they will only 58 * be saved/restored when a preemptive context switch occurs. 59 * 60 * Floating point registers are currently NOT scrubbed, and are subject to 61 * potential security leaks. 62 * 63 * C function prototype: 64 * 65 * unsigned int arch_swap (unsigned int eflags); 66 */ 67 68SECTION_FUNC(PINNED_TEXT, arch_swap) 69#if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING) 70 pushl %eax 71 call z_thread_mark_switched_out 72 popl %eax 73#endif 74 /* 75 * Push all non-volatile registers onto the stack; do not copy 76 * any of these registers into the k_thread. Only the 'esp' register 77 * after all the pushes have been performed) will be stored in the 78 * k_thread. 79 */ 80 81 pushl %edi 82 83 movl $_kernel, %edi 84 85 pushl %esi 86 pushl %ebx 87 pushl %ebp 88 89 /* 90 * Carve space for the return value. Setting it to a default of 91 * -EAGAIN eliminates the need for the timeout code to set it. 92 * If another value is ever needed, it can be modified with 93 * arch_thread_return_value_set(). 94 */ 95 96 pushl _k_neg_eagain 97 98 99 /* save esp into k_thread structure */ 100 101 movl _kernel_offset_to_current(%edi), %edx 102 movl %esp, _thread_offset_to_esp(%edx) 103 movl _kernel_offset_to_ready_q_cache(%edi), %eax 104 105 /* 106 * At this point, the %eax register contains the 'k_thread *' of the 107 * thread to be swapped in, and %edi still contains &_kernel. %edx 108 * has the pointer to the outgoing thread. 109 */ 110#if defined(CONFIG_X86_USERSPACE) && !defined(CONFIG_X86_KPTI) 111 112 push %eax 113 call z_x86_swap_update_page_tables 114 pop %eax 115 116 /* Page tables updated. All memory access after this point needs to be 117 * to memory that has the same mappings and access attributes wrt 118 * supervisor mode! 119 */ 120#endif 121 122#ifdef CONFIG_EAGER_FPU_SHARING 123 /* Eager floating point state restore logic 124 * 125 * Addresses CVE-2018-3665 126 * Used as an alternate to CONFIG_LAZY_FPU_SHARING if there is any 127 * sensitive data in the floating point/SIMD registers in a system 128 * with untrusted threads. 129 * 130 * Unconditionally save/restore floating point registers on context 131 * switch. 132 */ 133 /* Save outgpoing thread context */ 134#ifdef CONFIG_X86_SSE 135 fxsave _thread_offset_to_preempFloatReg(%edx) 136 fninit 137#else 138 fnsave _thread_offset_to_preempFloatReg(%edx) 139#endif 140 /* Restore incoming thread context */ 141#ifdef CONFIG_X86_SSE 142 fxrstor _thread_offset_to_preempFloatReg(%eax) 143#else 144 frstor _thread_offset_to_preempFloatReg(%eax) 145#endif /* CONFIG_X86_SSE */ 146#elif defined(CONFIG_LAZY_FPU_SHARING) 147 /* 148 * Clear the CR0[TS] bit (in the event the current thread 149 * doesn't have floating point enabled) to prevent the "device not 150 * available" exception when executing the subsequent fxsave/fnsave 151 * and/or fxrstor/frstor instructions. 152 * 153 * Indeed, it's possible that none of the aforementioned instructions 154 * need to be executed, for example, the incoming thread doesn't 155 * utilize floating point operations. However, the code responsible 156 * for setting the CR0[TS] bit appropriately for the incoming thread 157 * (just after the 'restoreContext_NoFloatSwap' label) will leverage 158 * the fact that the following 'clts' was performed already. 159 */ 160 161 clts 162 163 164 /* 165 * Determine whether the incoming thread utilizes floating point regs 166 * _and_ whether the thread was context switched out preemptively. 167 */ 168 169 testb $_FP_USER_MASK, _thread_offset_to_user_options(%eax) 170 je restoreContext_NoFloatSwap 171 172 173 /* 174 * The incoming thread uses floating point registers: 175 * Was it the last thread to use floating point registers? 176 * If so, there there is no need to restore the floating point context. 177 */ 178 179 movl _kernel_offset_to_current_fp(%edi), %ebx 180 cmpl %ebx, %eax 181 je restoreContext_NoFloatSwap 182 183 184 /* 185 * The incoming thread uses floating point registers and it was _not_ 186 * the last thread to use those registers: 187 * Check whether the current FP context actually needs to be saved 188 * before swapping in the context of the incoming thread. 189 */ 190 191 testl %ebx, %ebx 192 jz restoreContext_NoFloatSave 193 194 195 /* 196 * The incoming thread uses floating point registers and it was _not_ 197 * the last thread to use those registers _and_ the current FP context 198 * needs to be saved. 199 * 200 * Given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are all 201 * 'volatile', only save the registers if the "current FP context" 202 * was preemptively context switched. 203 */ 204 205 testb $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%ebx) 206 je restoreContext_NoFloatSave 207 208 209#ifdef CONFIG_X86_SSE 210 testb $K_SSE_REGS, _thread_offset_to_user_options(%ebx) 211 je x87FloatSave 212 213 /* 214 * 'fxsave' does NOT perform an implicit 'fninit', therefore issue an 215 * 'fninit' to ensure a "clean" FPU state for the incoming thread 216 * (for the case when the fxrstor is not executed). 217 */ 218 219 fxsave _thread_offset_to_preempFloatReg(%ebx) 220 fninit 221 jmp floatSaveDone 222 223x87FloatSave: 224#endif /* CONFIG_X86_SSE */ 225 226 /* 'fnsave' performs an implicit 'fninit' after saving state! */ 227 228 fnsave _thread_offset_to_preempFloatReg(%ebx) 229 230 /* fall through to 'floatSaveDone' */ 231 232floatSaveDone: 233restoreContext_NoFloatSave: 234 235 /********************************************************* 236 * Restore floating point context of the incoming thread. 237 *********************************************************/ 238 239 /* 240 * Again, given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are 241 * all 'volatile', only restore the registers if the incoming thread 242 * was previously preemptively context switched out. 243 */ 244 245 testb $X86_THREAD_FLAG_ALL, _thread_offset_to_flags(%eax) 246 je restoreContext_NoFloatRestore 247 248#ifdef CONFIG_X86_SSE 249 testb $K_SSE_REGS, _thread_offset_to_user_options(%eax) 250 je x87FloatRestore 251 252 fxrstor _thread_offset_to_preempFloatReg(%eax) 253 jmp floatRestoreDone 254 255x87FloatRestore: 256 257#endif /* CONFIG_X86_SSE */ 258 259 frstor _thread_offset_to_preempFloatReg(%eax) 260 261 /* fall through to 'floatRestoreDone' */ 262 263floatRestoreDone: 264restoreContext_NoFloatRestore: 265 266 /* record that the incoming thread "owns" the floating point registers */ 267 268 movl %eax, _kernel_offset_to_current_fp(%edi) 269 270 271 /* 272 * Branch point when none of the floating point registers need to be 273 * swapped because: a) the incoming thread does not use them OR 274 * b) the incoming thread is the last thread that used those registers. 275 */ 276 277restoreContext_NoFloatSwap: 278 279 /* 280 * Leave CR0[TS] clear if incoming thread utilizes the floating point 281 * registers 282 */ 283 284 testb $_FP_USER_MASK, _thread_offset_to_user_options(%eax) 285 jne CROHandlingDone 286 287 /* 288 * The incoming thread does NOT currently utilize the floating point 289 * registers, so set CR0[TS] to ensure the "device not available" 290 * exception occurs on the first attempt to access a x87 FPU, MMX, 291 * or XMM register. 292 */ 293 294 movl %cr0, %edx 295 orl $0x8, %edx 296 movl %edx, %cr0 297 298CROHandlingDone: 299 300#endif /* CONFIG_LAZY_FPU_SHARING */ 301 302 /* update _kernel.current to reflect incoming thread */ 303 304 movl %eax, _kernel_offset_to_current(%edi) 305 306#if defined(CONFIG_X86_USE_THREAD_LOCAL_STORAGE) 307 pushl %eax 308 309 call z_x86_tls_update_gdt 310 311 /* Since segment descriptor has changed, need to reload */ 312 movw $GS_TLS_SEG, %ax 313 movw %ax, %gs 314 315 popl %eax 316#endif 317 318 /* recover thread stack pointer from k_thread */ 319 320 movl _thread_offset_to_esp(%eax), %esp 321 322 323 /* load return value from a possible arch_thread_return_value_set() */ 324 325 popl %eax 326 327 /* pop the non-volatile registers from the stack */ 328 329 popl %ebp 330 popl %ebx 331 popl %esi 332 popl %edi 333 334 /* 335 * %eax may contain one of these values: 336 * 337 * - the return value for arch_swap() that was set up by a call to 338 * arch_thread_return_value_set() 339 * - -EINVAL 340 */ 341 342 /* Utilize the 'eflags' parameter to arch_swap() */ 343 344 pushl 4(%esp) 345 popfl 346 347#if defined(CONFIG_INSTRUMENT_THREAD_SWITCHING) 348 pushl %eax 349 call z_thread_mark_switched_in 350 popl %eax 351#endif 352 ret 353 354#ifdef _THREAD_WRAPPER_REQUIRED 355/** 356 * 357 * @brief Adjust stack/parameters before invoking thread entry function 358 * 359 * This function adjusts the initial stack frame created by arch_new_thread() 360 * such that the GDB stack frame unwinders recognize it as the outermost frame 361 * in the thread's stack. 362 * 363 * GDB normally stops unwinding a stack when it detects that it has 364 * reached a function called main(). Kernel threads, however, do not have 365 * a main() function, and there does not appear to be a simple way of stopping 366 * the unwinding of the stack. 367 * 368 * Given the initial thread created by arch_new_thread(), GDB expects to find 369 * a return address on the stack immediately above the thread entry routine 370 * z_thread_entry, in the location occupied by the initial EFLAGS. GDB 371 * attempts to examine the memory at this return address, which typically 372 * results in an invalid access to page 0 of memory. 373 * 374 * This function overwrites the initial EFLAGS with zero. When GDB subsequently 375 * attempts to examine memory at address zero, the PeekPoke driver detects 376 * an invalid access to address zero and returns an error, which causes the 377 * GDB stack unwinder to stop somewhat gracefully. 378 * 379 * The initial EFLAGS cannot be overwritten until after z_swap() has swapped in 380 * the new thread for the first time. This routine is called by z_swap() the 381 * first time that the new thread is swapped in, and it jumps to 382 * z_thread_entry after it has done its work. 383 * 384 * __________________ 385 * | param3 | <------ Top of the stack 386 * |__________________| 387 * | param2 | Stack Grows Down 388 * |__________________| | 389 * | param1 | V 390 * |__________________| 391 * | pEntry | 392 * |__________________| 393 * | initial EFLAGS | <---- ESP when invoked by z_swap() 394 * |__________________| (Zeroed by this routine) 395 * 396 * The address of the thread entry function needs to be in %edi when this is 397 * invoked. It will either be z_thread_entry, or if userspace is enabled, 398 * _arch_drop_to_user_mode if this is a user thread. 399 * 400 * @return this routine does NOT return. 401 */ 402 403SECTION_FUNC(PINNED_TEXT, z_x86_thread_entry_wrapper) 404 movl $0, (%esp) 405 jmp *%edi 406#endif /* _THREAD_WRAPPER_REQUIRED */ 407