1/* 2 * Copyright (c) 2017 Intel Corporation 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6 7#include <zephyr/arch/x86/ia32/asm.h> 8#include <zephyr/arch/cpu.h> 9#include <offsets_short.h> 10#include <zephyr/syscall.h> 11#include <zephyr/kernel/mm.h> 12#include <x86_mmu.h> 13 14/* Exports */ 15GTEXT(z_x86_syscall_entry_stub) 16GTEXT(z_x86_userspace_enter) 17GTEXT(arch_user_string_nlen) 18GTEXT(z_x86_user_string_nlen_fault_start) 19GTEXT(z_x86_user_string_nlen_fault_end) 20GTEXT(z_x86_user_string_nlen_fixup) 21 22/* Imports */ 23GDATA(_k_syscall_table) 24 25#ifdef CONFIG_X86_KPTI 26/* Switch from the shadow to the kernel page table, switch to the interrupted 27 * thread's kernel stack, and copy all context from the trampoline stack. 28 * 29 * Assumes all registers are callee-saved since this gets called from other 30 * ASM code. Assumes a particular stack layout which is correct for 31 * _exception_enter and _interrupt_enter when invoked with a call instruction: 32 * 33 * 28 SS 34 * 24 ES 35 * 20 EFLAGS 36 * 16 CS 37 * 12 EIP 38 * 8 isr_param or exc code 39 * 4 isr or exc handler 40 * 0 return address 41 */ 42SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_kernel) 43 /* Check interrupted code segment to see if we came from ring 3 44 * and hence on the trampoline stack 45 */ 46 testb $3, 16(%esp) /* Offset of CS */ 47 jz 1f 48 49 /* Stash these regs as we need to use them */ 50 pushl %esi 51 pushl %edi 52 53 /* Switch to kernel page table */ 54 movl $K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi 55 movl %esi, %cr3 56 57 /* Save old trampoline stack pointer in %edi */ 58 movl %esp, %edi 59 60 /* Switch to privilege mode stack */ 61 movl $_kernel, %esi 62 movl _kernel_offset_to_current(%esi), %esi 63 movl _thread_offset_to_psp(%esi), %esp 64 65 /* Transplant stack context and restore ESI/EDI. Taking care to zero 66 * or put uninteresting values where we stashed ESI/EDI since the 67 * trampoline page is insecure and there might a context switch 68 * on the way out instead of returning to the original thread 69 * immediately. 70 */ 71 pushl 36(%edi) /* SS */ 72 pushl 32(%edi) /* ESP */ 73 pushl 28(%edi) /* EFLAGS */ 74 pushl 24(%edi) /* CS */ 75 pushl 20(%edi) /* EIP */ 76 pushl 16(%edi) /* error code or isr parameter */ 77 pushl 12(%edi) /* exception/irq handler */ 78 pushl 8(%edi) /* return address */ 79 movl 4(%edi), %esi /* restore ESI */ 80 movl $0, 4(%edi) /* Zero old esi storage area */ 81 xchgl %edi, (%edi) /* Exchange old edi to restore it and put 82 old sp in the storage area */ 83 84 /* Trampoline stack should have nothing sensitive in it at this point */ 851: 86 ret 87 88/* Copy interrupt return stack context to the trampoline stack, switch back 89 * to the user page table, and only then 'iret'. We jump to this instead 90 * of calling 'iret' if KPTI is turned on. 91 * 92 * Stack layout is expected to be as follows: 93 * 94 * 16 SS 95 * 12 ESP 96 * 8 EFLAGS 97 * 4 CS 98 * 0 EIP 99 * 100 * This function is conditionally macroed to KPTI_IRET/KPTI_IRET_USER 101 */ 102SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user) 103 /* Check interrupted code segment to see if we came from ring 3 104 * and hence on the trampoline stack 105 */ 106 testb $3, 4(%esp) /* Offset of CS */ 107 jz 1f 108 109 /* Otherwise, fall through ... */ 110 111SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user_always) 112 /* Stash EDI, need a free register */ 113 pushl %edi 114 115 /* Store old stack pointer and switch to trampoline stack. 116 * Lock IRQs before changing stack pointer to the trampoline stack, 117 * we don't want any interrupts also using the trampoline stack 118 * during this time. 119 */ 120 movl %esp, %edi 121 cli 122 movl $z_trampoline_stack_end, %esp 123 124 /* Copy context */ 125 pushl 20(%edi) /* SS */ 126 pushl 16(%edi) /* ESP */ 127 pushl 12(%edi) /* EFLAGS */ 128 pushl 8(%edi) /* CS */ 129 pushl 4(%edi) /* EIP */ 130 xchgl %edi, (%edi) /* Exchange old edi to restore it and put 131 trampoline stack address in its old storage 132 area */ 133 /* Switch to user page table */ 134 pushl %eax 135 movl $_kernel, %eax 136 movl _kernel_offset_to_current(%eax), %eax 137 movl _thread_offset_to_ptables(%eax), %eax 138 movl %eax, %cr3 139 popl %eax 140 movl $0, -4(%esp) /* Delete stashed EAX data */ 141 142 /* Trampoline stack should have nothing sensitive in it at this point */ 1431: 144 iret 145#endif /* CONFIG_X86_KPTI */ 146 147/* Landing site for syscall SW IRQ. Marshal arguments and call C function for 148 * further processing. We're on the kernel stack for the invoking thread, 149 * unless KPTI is enabled, in which case we're on the trampoline stack and 150 * need to get off it before enabling interrupts. 151 */ 152SECTION_FUNC(TEXT, z_x86_syscall_entry_stub) 153#ifdef CONFIG_X86_KPTI 154 /* Stash these regs as we need to use them */ 155 pushl %esi 156 pushl %edi 157 158 /* Switch to kernel page table */ 159 movl $K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi 160 movl %esi, %cr3 161 162 /* Save old trampoline stack pointer in %edi */ 163 movl %esp, %edi 164 165 /* Switch to privilege elevation stack */ 166 movl $_kernel, %esi 167 movl _kernel_offset_to_current(%esi), %esi 168 movl _thread_offset_to_psp(%esi), %esp 169 170 /* Transplant context according to layout above. Variant of logic 171 * in x86_trampoline_to_kernel */ 172 pushl 24(%edi) /* SS */ 173 pushl 20(%edi) /* ESP */ 174 pushl 16(%edi) /* EFLAGS */ 175 pushl 12(%edi) /* CS */ 176 pushl 8(%edi) /* EIP */ 177 movl 4(%edi), %esi /* restore ESI */ 178 movl $0, 4(%edi) /* Zero old esi storage area */ 179 xchgl %edi, (%edi) /* Exchange old edi to restore it and put 180 old sp in the storage area */ 181 182 /* Trampoline stack should have nothing sensitive in it at this point */ 183#endif /* CONFIG_X86_KPTI */ 184 185 sti /* re-enable interrupts */ 186 cld /* clear direction flag, restored on 'iret' */ 187 188 /* call_id is in ESI. bounds-check it, must be less than 189 * K_SYSCALL_LIMIT 190 */ 191 cmp $K_SYSCALL_LIMIT, %esi 192 jae _bad_syscall 193 194_id_ok: 195#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION 196 /* Prevent speculation with bogus system call IDs */ 197 lfence 198#endif 199 /* Marshal arguments per calling convention to match what is expected 200 * for _k_syscall_handler_t functions 201 */ 202 push %esp /* ssf */ 203 push %ebp /* arg6 */ 204 push %edi /* arg5 */ 205 push %ebx /* arg4 */ 206 push %ecx /* arg3 */ 207 push %edx /* arg2 */ 208 push %eax /* arg1 */ 209 210 /* from the call ID in ESI, load EBX with the actual function pointer 211 * to call by looking it up in the system call dispatch table 212 */ 213 xor %edi, %edi 214 mov _k_syscall_table(%edi, %esi, 4), %ebx 215 216 /* Run the handler, which is some entry in _k_syscall_table */ 217 call *%ebx 218 219 /* EAX now contains return value. Pop or xor everything else to prevent 220 * information leak from kernel mode. 221 */ 222 pop %edx /* old arg1 value, discard it */ 223 pop %edx 224 pop %ecx 225 pop %ebx 226 pop %edi 227 /* Discard ssf and arg6 */ 228 add $8, %esp 229 KPTI_IRET_USER 230 231_bad_syscall: 232 /* ESI had a bogus syscall value in it, replace with the bad syscall 233 * handler's ID, and put the bad ID as its first argument. This 234 * clobbers ESI but the bad syscall handler never returns 235 * anyway, it's going to generate a kernel oops 236 */ 237 mov %esi, %eax 238 mov $K_SYSCALL_BAD, %esi 239 jmp _id_ok 240 241 242/* 243 * size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg) 244 */ 245SECTION_FUNC(TEXT, arch_user_string_nlen) 246 push %ebp 247 mov %esp, %ebp 248 249 /* error value, set to -1 initially. This location is -4(%ebp) */ 250 push $-1 251 252 /* Do the strlen operation, based on disassembly of minimal libc */ 253 xor %eax, %eax /* EAX = 0, length count */ 254 mov 0x8(%ebp), %edx /* EDX base of string */ 255 256 /* This code might page fault */ 257strlen_loop: 258z_x86_user_string_nlen_fault_start: 259 cmpb $0x0, (%edx, %eax, 1) /* *(EDX + EAX) == 0? Could fault. */ 260 261z_x86_user_string_nlen_fault_end: 262 je strlen_done 263 cmp 0xc(%ebp), %eax /* Max length reached? */ 264 je strlen_done 265 inc %eax /* EAX++ and loop again */ 266 jmp strlen_loop 267 268strlen_done: 269 /* Set error value to 0 since we succeeded */ 270 movl $0, -4(%ebp) 271 272z_x86_user_string_nlen_fixup: 273 /* Write error value to err pointer parameter */ 274 movl 0x10(%ebp), %ecx 275 pop %edx 276 movl %edx, (%ecx) 277 278 pop %ebp 279 ret 280 281 282/* FUNC_NORETURN void z_x86_userspace_enter(k_thread_entry_t user_entry, 283 * void *p1, void *p2, void *p3, 284 * uint32_t stack_end, 285 * uint32_t stack_start) 286 * 287 * A one-way trip to userspace. 288 */ 289SECTION_FUNC(TEXT, z_x86_userspace_enter) 290 pop %esi /* Discard return address on stack */ 291 292 /* Fetch parameters on the stack */ 293 pop %eax /* user_entry */ 294 pop %edx /* p1 */ 295 pop %ecx /* p2 */ 296 pop %esi /* p3 */ 297 pop %ebx /* stack_end (high address) */ 298 pop %edi /* stack_start (low address) */ 299 300 /* Move to the kernel stack for this thread, so we can erase the 301 * user stack. The kernel stack is the page immediately before 302 * the user stack. 303 * 304 * For security reasons, we must erase the entire user stack. 305 * We don't know what previous contexts it was used and do not 306 * want to leak any information. 307 */ 308 mov %edi, %esp 309 310 /* Erase and enable US bit in page tables for the stack buffer */ 311 push %ecx 312 push %eax 313 push %edx 314 call z_x86_current_stack_perms 315 pop %edx 316 pop %eax 317 pop %ecx 318 319 /* Set stack pointer to the base of the freshly-erased user stack. 320 * Now that this is set we won't need EBX any more. 321 */ 322 mov %ebx, %esp 323 324 /* Set segment registers (except CS and SS which are done in 325 * a special way by 'iret' below) 326 */ 327 mov $USER_DATA_SEG, %bx 328 mov %bx, %ds 329 mov %bx, %es 330 331 /* Push arguments to z_thread_entry() */ 332 push %esi /* p3 */ 333 push %ecx /* p2 */ 334 push %edx /* p1 */ 335 push %eax /* user_entry */ 336 /* NULL return address */ 337 push $0 338 339 /* Save stack pointer at this position, this is where it will be 340 * when we land in z_thread_entry() 341 */ 342 mov %esp, %edi 343 344 /* Inter-privilege 'iret' pops all of these. Need to fake an interrupt 345 * return to enter user mode as far calls cannot change privilege 346 * level 347 */ 348 push $USER_DATA_SEG /* SS */ 349 push %edi /* ESP */ 350 pushfl /* EFLAGS */ 351 push $USER_CODE_SEG /* CS */ 352 push $z_thread_entry /* EIP */ 353 354 /* We will land in z_thread_entry() in user mode after this */ 355 KPTI_IRET_USER 356