1 /*
2 * Copyright (c) 2017 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #include <zephyr/kernel.h>
8 #include <zephyr/sys/speculation.h>
9 #include <zephyr/internal/syscall_handler.h>
10 #include <kernel_arch_func.h>
11 #include <ksched.h>
12 #include <x86_mmu.h>
13
14 #ifdef CONFIG_DEMAND_PAGING
15 #include <zephyr/kernel/mm/demand_paging.h>
16 #endif
17
18 #ifndef CONFIG_X86_KPTI
19 /* Update the to the incoming thread's page table, and update the location of
20 * the privilege elevation stack.
21 *
22 * May be called ONLY during context switch. Hot code path!
23 *
24 * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
25 * active page tables are the kernel's page tables. If the incoming thread is
26 * in user mode we are going to switch CR3 to the domain-specific tables when
27 * we go through z_x86_trampoline_to_user.
28 *
29 * We don't need to update the privilege mode initial stack pointer either,
30 * privilege elevation always lands on the trampoline stack and the irq/syscall
31 * code has to manually transition off of it to the appropriate stack after
32 * switching page tables.
33 */
34 __pinned_func
z_x86_swap_update_page_tables(struct k_thread * incoming)35 void z_x86_swap_update_page_tables(struct k_thread *incoming)
36 {
37 #ifndef CONFIG_X86_64
38 /* Set initial stack pointer when elevating privileges from Ring 3
39 * to Ring 0.
40 */
41 _main_tss.esp0 = (uintptr_t)incoming->arch.psp;
42 #endif
43
44 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
45 z_x86_swap_update_common_page_table(incoming);
46 #else
47 /* Check first that we actually need to do this, since setting
48 * CR3 involves an expensive full TLB flush.
49 */
50 uintptr_t ptables_phys = incoming->arch.ptables;
51
52 __ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
53 incoming);
54
55 if (ptables_phys != z_x86_cr3_get()) {
56 z_x86_cr3_set(ptables_phys);
57 }
58 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
59 }
60 #endif /* CONFIG_X86_KPTI */
61
62 /* Preparation steps needed for all threads if user mode is turned on.
63 *
64 * Returns the initial entry point to swap into.
65 */
z_x86_userspace_prepare_thread(struct k_thread * thread)66 void *z_x86_userspace_prepare_thread(struct k_thread *thread)
67 {
68 void *initial_entry;
69 struct z_x86_thread_stack_header *header =
70 (struct z_x86_thread_stack_header *)thread->stack_obj;
71
72 thread->arch.psp =
73 header->privilege_stack + sizeof(header->privilege_stack);
74
75 #ifndef CONFIG_X86_COMMON_PAGE_TABLE
76 /* Important this gets cleared, so that arch_mem_domain_* APIs
77 * can distinguish between new threads, and threads migrating
78 * between domains
79 */
80 thread->arch.ptables = (uintptr_t)NULL;
81 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
82
83 if ((thread->base.user_options & K_USER) != 0U) {
84 initial_entry = arch_user_mode_enter;
85 } else {
86 initial_entry = z_thread_entry;
87 }
88
89 return initial_entry;
90 }
91
arch_user_mode_enter(k_thread_entry_t user_entry,void * p1,void * p2,void * p3)92 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
93 void *p1, void *p2, void *p3)
94 {
95 size_t stack_end;
96
97 /* Transition will reset stack pointer to initial, discarding
98 * any old context since this is a one-way operation
99 */
100 stack_end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
101 _current->stack_info.size -
102 _current->stack_info.delta);
103
104 #ifdef CONFIG_X86_64
105 /* x86_64 SysV ABI requires 16 byte stack alignment, which
106 * means that on entry to a C function (which follows a CALL
107 * that pushes 8 bytes) the stack must be MISALIGNED by
108 * exactly 8 bytes.
109 */
110 stack_end -= 8;
111 #endif
112
113 #if defined(CONFIG_DEMAND_PAGING) && \
114 !defined(CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT)
115 /* If generic section is not present at boot,
116 * the thread stack may not be in physical memory.
117 * Unconditionally page in the stack instead of
118 * relying on page fault to speed up a little bit
119 * on starting the thread.
120 *
121 * Note that this also needs to page in the reserved
122 * portion of the stack (which is usually the page just
123 * before the beginning of stack in
124 * _current->stack_info.start.
125 */
126 uintptr_t stack_start;
127 size_t stack_size;
128 uintptr_t stack_aligned_start;
129 size_t stack_aligned_size;
130
131 stack_start = POINTER_TO_UINT(_current->stack_obj);
132 stack_size = Z_THREAD_STACK_SIZE_ADJUST(_current->stack_info.size);
133
134 #if defined(CONFIG_HW_STACK_PROTECTION)
135 /* With hardware stack protection, the first page of stack
136 * is a guard page. So need to skip it.
137 */
138 stack_start += CONFIG_MMU_PAGE_SIZE;
139 stack_size -= CONFIG_MMU_PAGE_SIZE;
140 #endif
141
142 (void)k_mem_region_align(&stack_aligned_start, &stack_aligned_size,
143 stack_start, stack_size,
144 CONFIG_MMU_PAGE_SIZE);
145 k_mem_page_in(UINT_TO_POINTER(stack_aligned_start),
146 stack_aligned_size);
147 #endif
148
149 z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
150 _current->stack_info.start);
151 CODE_UNREACHABLE;
152 }
153