/* * Copyright (c) 2017 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 */ #include #include #include #include #include #include /* Exports */ GTEXT(z_x86_syscall_entry_stub) GTEXT(z_x86_userspace_enter) GTEXT(arch_user_string_nlen) GTEXT(z_x86_user_string_nlen_fault_start) GTEXT(z_x86_user_string_nlen_fault_end) GTEXT(z_x86_user_string_nlen_fixup) /* Imports */ GDATA(_k_syscall_table) #ifdef CONFIG_X86_KPTI /* Switch from the shadow to the kernel page table, switch to the interrupted * thread's kernel stack, and copy all context from the trampoline stack. * * Assumes all registers are callee-saved since this gets called from other * ASM code. Assumes a particular stack layout which is correct for * _exception_enter and _interrupt_enter when invoked with a call instruction: * * 28 SS * 24 ES * 20 EFLAGS * 16 CS * 12 EIP * 8 isr_param or exc code * 4 isr or exc handler * 0 return address */ SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_kernel) /* Check interrupted code segment to see if we came from ring 3 * and hence on the trampoline stack */ testb $3, 16(%esp) /* Offset of CS */ jz 1f /* Stash these regs as we need to use them */ pushl %esi pushl %edi /* Switch to kernel page table */ movl $K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi movl %esi, %cr3 /* Save old trampoline stack pointer in %edi */ movl %esp, %edi /* Switch to privilege mode stack */ movl $_kernel, %esi movl _kernel_offset_to_current(%esi), %esi movl _thread_offset_to_psp(%esi), %esp /* Transplant stack context and restore ESI/EDI. Taking care to zero * or put uninteresting values where we stashed ESI/EDI since the * trampoline page is insecure and there might a context switch * on the way out instead of returning to the original thread * immediately. */ pushl 36(%edi) /* SS */ pushl 32(%edi) /* ESP */ pushl 28(%edi) /* EFLAGS */ pushl 24(%edi) /* CS */ pushl 20(%edi) /* EIP */ pushl 16(%edi) /* error code or isr parameter */ pushl 12(%edi) /* exception/irq handler */ pushl 8(%edi) /* return address */ movl 4(%edi), %esi /* restore ESI */ movl $0, 4(%edi) /* Zero old esi storage area */ xchgl %edi, (%edi) /* Exchange old edi to restore it and put old sp in the storage area */ /* Trampoline stack should have nothing sensitive in it at this point */ 1: ret /* Copy interrupt return stack context to the trampoline stack, switch back * to the user page table, and only then 'iret'. We jump to this instead * of calling 'iret' if KPTI is turned on. * * Stack layout is expected to be as follows: * * 16 SS * 12 ESP * 8 EFLAGS * 4 CS * 0 EIP * * This function is conditionally macroed to KPTI_IRET/KPTI_IRET_USER */ SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user) /* Check interrupted code segment to see if we came from ring 3 * and hence on the trampoline stack */ testb $3, 4(%esp) /* Offset of CS */ jz 1f /* Otherwise, fall through ... */ SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user_always) /* Stash EDI, need a free register */ pushl %edi /* Store old stack pointer and switch to trampoline stack. * Lock IRQs before changing stack pointer to the trampoline stack, * we don't want any interrupts also using the trampoline stack * during this time. */ movl %esp, %edi cli movl $z_trampoline_stack_end, %esp /* Copy context */ pushl 20(%edi) /* SS */ pushl 16(%edi) /* ESP */ pushl 12(%edi) /* EFLAGS */ pushl 8(%edi) /* CS */ pushl 4(%edi) /* EIP */ xchgl %edi, (%edi) /* Exchange old edi to restore it and put trampoline stack address in its old storage area */ /* Switch to user page table */ pushl %eax movl $_kernel, %eax movl _kernel_offset_to_current(%eax), %eax movl _thread_offset_to_ptables(%eax), %eax movl %eax, %cr3 popl %eax movl $0, -4(%esp) /* Delete stashed EAX data */ /* Trampoline stack should have nothing sensitive in it at this point */ 1: iret #endif /* CONFIG_X86_KPTI */ /* Landing site for syscall SW IRQ. Marshal arguments and call C function for * further processing. We're on the kernel stack for the invoking thread, * unless KPTI is enabled, in which case we're on the trampoline stack and * need to get off it before enabling interrupts. */ SECTION_FUNC(TEXT, z_x86_syscall_entry_stub) #ifdef CONFIG_X86_KPTI /* Stash these regs as we need to use them */ pushl %esi pushl %edi /* Switch to kernel page table */ movl $K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi movl %esi, %cr3 /* Save old trampoline stack pointer in %edi */ movl %esp, %edi /* Switch to privilege elevation stack */ movl $_kernel, %esi movl _kernel_offset_to_current(%esi), %esi movl _thread_offset_to_psp(%esi), %esp /* Transplant context according to layout above. Variant of logic * in x86_trampoline_to_kernel */ pushl 24(%edi) /* SS */ pushl 20(%edi) /* ESP */ pushl 16(%edi) /* EFLAGS */ pushl 12(%edi) /* CS */ pushl 8(%edi) /* EIP */ movl 4(%edi), %esi /* restore ESI */ movl $0, 4(%edi) /* Zero old esi storage area */ xchgl %edi, (%edi) /* Exchange old edi to restore it and put old sp in the storage area */ /* Trampoline stack should have nothing sensitive in it at this point */ #endif /* CONFIG_X86_KPTI */ sti /* re-enable interrupts */ cld /* clear direction flag, restored on 'iret' */ /* call_id is in ESI. bounds-check it, must be less than * K_SYSCALL_LIMIT */ cmp $K_SYSCALL_LIMIT, %esi jae _bad_syscall _id_ok: #ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION /* Prevent speculation with bogus system call IDs */ lfence #endif /* Marshal arguments per calling convention to match what is expected * for _k_syscall_handler_t functions */ push %esp /* ssf */ push %ebp /* arg6 */ push %edi /* arg5 */ push %ebx /* arg4 */ push %ecx /* arg3 */ push %edx /* arg2 */ push %eax /* arg1 */ /* from the call ID in ESI, load EBX with the actual function pointer * to call by looking it up in the system call dispatch table */ xor %edi, %edi mov _k_syscall_table(%edi, %esi, 4), %ebx /* Run the handler, which is some entry in _k_syscall_table */ call *%ebx /* EAX now contains return value. Pop or xor everything else to prevent * information leak from kernel mode. */ pop %edx /* old arg1 value, discard it */ pop %edx pop %ecx pop %ebx pop %edi /* Discard ssf and arg6 */ add $8, %esp KPTI_IRET_USER _bad_syscall: /* ESI had a bogus syscall value in it, replace with the bad syscall * handler's ID, and put the bad ID as its first argument. This * clobbers ESI but the bad syscall handler never returns * anyway, it's going to generate a kernel oops */ mov %esi, %eax mov $K_SYSCALL_BAD, %esi jmp _id_ok /* * size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg) */ SECTION_FUNC(TEXT, arch_user_string_nlen) push %ebp mov %esp, %ebp /* error value, set to -1 initially. This location is -4(%ebp) */ push $-1 /* Do the strlen operation, based on disassembly of minimal libc */ xor %eax, %eax /* EAX = 0, length count */ mov 0x8(%ebp), %edx /* EDX base of string */ /* This code might page fault */ strlen_loop: z_x86_user_string_nlen_fault_start: cmpb $0x0, (%edx, %eax, 1) /* *(EDX + EAX) == 0? Could fault. */ z_x86_user_string_nlen_fault_end: je strlen_done cmp 0xc(%ebp), %eax /* Max length reached? */ je strlen_done inc %eax /* EAX++ and loop again */ jmp strlen_loop strlen_done: /* Set error value to 0 since we succeeded */ movl $0, -4(%ebp) z_x86_user_string_nlen_fixup: /* Write error value to err pointer parameter */ movl 0x10(%ebp), %ecx pop %edx movl %edx, (%ecx) pop %ebp ret /* FUNC_NORETURN void z_x86_userspace_enter(k_thread_entry_t user_entry, * void *p1, void *p2, void *p3, * uint32_t stack_end, * uint32_t stack_start) * * A one-way trip to userspace. */ SECTION_FUNC(TEXT, z_x86_userspace_enter) pop %esi /* Discard return address on stack */ /* Fetch parameters on the stack */ pop %eax /* user_entry */ pop %edx /* p1 */ pop %ecx /* p2 */ pop %esi /* p3 */ pop %ebx /* stack_end (high address) */ pop %edi /* stack_start (low address) */ /* Move to the kernel stack for this thread, so we can erase the * user stack. The kernel stack is the page immediately before * the user stack. * * For security reasons, we must erase the entire user stack. * We don't know what previous contexts it was used and do not * want to leak any information. */ mov %edi, %esp /* Erase and enable US bit in page tables for the stack buffer */ push %ecx push %eax push %edx call z_x86_current_stack_perms pop %edx pop %eax pop %ecx /* Set stack pointer to the base of the freshly-erased user stack. * Now that this is set we won't need EBX any more. */ mov %ebx, %esp /* Set segment registers (except CS and SS which are done in * a special way by 'iret' below) */ mov $USER_DATA_SEG, %bx mov %bx, %ds mov %bx, %es /* Push arguments to z_thread_entry() */ push %esi /* p3 */ push %ecx /* p2 */ push %edx /* p1 */ push %eax /* user_entry */ /* NULL return address */ push $0 /* Save stack pointer at this position, this is where it will be * when we land in z_thread_entry() */ mov %esp, %edi /* Inter-privilege 'iret' pops all of these. Need to fake an interrupt * return to enter user mode as far calls cannot change privilege * level */ push $USER_DATA_SEG /* SS */ push %edi /* ESP */ pushfl /* EFLAGS */ push $USER_CODE_SEG /* CS */ push $z_thread_entry /* EIP */ /* We will land in z_thread_entry() in user mode after this */ KPTI_IRET_USER