1 /*
2 * Copyright (c) 2017 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #include <errno.h>
8
9 #include <zephyr/kernel.h>
10 #include <zephyr/sys/speculation.h>
11 #include <zephyr/internal/syscall_handler.h>
12 #include <kernel_arch_func.h>
13 #include <ksched.h>
14 #include <x86_mmu.h>
15
16 BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) &&
17 (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0);
18
19 #ifdef CONFIG_DEMAND_PAGING
20 #include <zephyr/kernel/mm/demand_paging.h>
21 #endif
22
23 #ifndef CONFIG_X86_KPTI
24 /* Update the to the incoming thread's page table, and update the location of
25 * the privilege elevation stack.
26 *
27 * May be called ONLY during context switch. Hot code path!
28 *
29 * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
30 * active page tables are the kernel's page tables. If the incoming thread is
31 * in user mode we are going to switch CR3 to the domain-specific tables when
32 * we go through z_x86_trampoline_to_user.
33 *
34 * We don't need to update the privilege mode initial stack pointer either,
35 * privilege elevation always lands on the trampoline stack and the irq/syscall
36 * code has to manually transition off of it to the appropriate stack after
37 * switching page tables.
38 */
39 __pinned_func
z_x86_swap_update_page_tables(struct k_thread * incoming)40 void z_x86_swap_update_page_tables(struct k_thread *incoming)
41 {
42 #ifndef CONFIG_X86_64
43 /* Set initial stack pointer when elevating privileges from Ring 3
44 * to Ring 0.
45 */
46 _main_tss.esp0 = (uintptr_t)incoming->arch.psp;
47 #endif
48
49 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
50 z_x86_swap_update_common_page_table(incoming);
51 #else
52 /* Check first that we actually need to do this, since setting
53 * CR3 involves an expensive full TLB flush.
54 */
55 uintptr_t ptables_phys = incoming->arch.ptables;
56
57 __ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
58 incoming);
59
60 if (ptables_phys != z_x86_cr3_get()) {
61 z_x86_cr3_set(ptables_phys);
62 }
63 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
64 }
65 #endif /* CONFIG_X86_KPTI */
66
67 /* Preparation steps needed for all threads if user mode is turned on.
68 *
69 * Returns the initial entry point to swap into.
70 */
z_x86_userspace_prepare_thread(struct k_thread * thread)71 void *z_x86_userspace_prepare_thread(struct k_thread *thread)
72 {
73 void *initial_entry;
74
75 if (z_stack_is_user_capable(thread->stack_obj)) {
76 struct z_x86_thread_stack_header *header =
77 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
78 (struct z_x86_thread_stack_header *)thread->stack_info.mapped.addr;
79 #else
80 (struct z_x86_thread_stack_header *)thread->stack_obj;
81 #endif /* CONFIG_THREAD_STACK_MEM_MAPPED */
82
83 thread->arch.psp = header->privilege_stack + sizeof(header->privilege_stack);
84 } else {
85 thread->arch.psp = NULL;
86 }
87
88 #ifndef CONFIG_X86_COMMON_PAGE_TABLE
89 /* Important this gets cleared, so that arch_mem_domain_* APIs
90 * can distinguish between new threads, and threads migrating
91 * between domains
92 */
93 thread->arch.ptables = (uintptr_t)NULL;
94 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
95
96 if ((thread->base.user_options & K_USER) != 0U) {
97 initial_entry = arch_user_mode_enter;
98
99 #ifdef CONFIG_INIT_STACKS
100 /* setup_thread_stack() does not initialize the architecture specific
101 * privileged stack. So we need to do it manually here as this function
102 * is called by arch_new_thread() via z_setup_new_thread() after
103 * setup_thread_stack() but before thread starts running.
104 *
105 * Note that only user threads have privileged stacks and kernel
106 * only threads do not.
107 *
108 * Also note that this needs to be done before calling
109 * z_x86_userspace_enter() where it clears the user stack.
110 * That function requires using the privileged stack for
111 * code execution so we cannot clear that at the same time.
112 */
113 struct z_x86_thread_stack_header *hdr_stack_obj =
114 (struct z_x86_thread_stack_header *)thread->stack_obj;
115
116 (void)memset(&hdr_stack_obj->privilege_stack[0], 0xaa,
117 sizeof(hdr_stack_obj->privilege_stack));
118 #endif
119
120 } else {
121 initial_entry = z_thread_entry;
122 }
123
124 return initial_entry;
125 }
126
arch_user_mode_enter(k_thread_entry_t user_entry,void * p1,void * p2,void * p3)127 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
128 void *p1, void *p2, void *p3)
129 {
130 size_t stack_end;
131
132 /* Transition will reset stack pointer to initial, discarding
133 * any old context since this is a one-way operation
134 */
135 stack_end = Z_STACK_PTR_ALIGN(arch_current_thread()->stack_info.start +
136 arch_current_thread()->stack_info.size -
137 arch_current_thread()->stack_info.delta);
138
139 #ifdef CONFIG_X86_64
140 /* x86_64 SysV ABI requires 16 byte stack alignment, which
141 * means that on entry to a C function (which follows a CALL
142 * that pushes 8 bytes) the stack must be MISALIGNED by
143 * exactly 8 bytes.
144 */
145 stack_end -= 8;
146 #endif
147
148 #if defined(CONFIG_DEMAND_PAGING) && \
149 !defined(CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT)
150 /* If generic section is not present at boot,
151 * the thread stack may not be in physical memory.
152 * Unconditionally page in the stack instead of
153 * relying on page fault to speed up a little bit
154 * on starting the thread.
155 *
156 * Note that this also needs to page in the reserved
157 * portion of the stack (which is usually the page just
158 * before the beginning of stack in
159 * arch_current_thread()->stack_info.start.
160 */
161 uintptr_t stack_start;
162 size_t stack_size;
163 uintptr_t stack_aligned_start;
164 size_t stack_aligned_size;
165
166 stack_start = POINTER_TO_UINT(arch_current_thread()->stack_obj);
167 stack_size = K_THREAD_STACK_LEN(arch_current_thread()->stack_info.size);
168
169 #if defined(CONFIG_X86_STACK_PROTECTION)
170 /* With hardware stack protection, the first page of stack
171 * is a guard page. So need to skip it.
172 */
173 stack_start += CONFIG_MMU_PAGE_SIZE;
174 stack_size -= CONFIG_MMU_PAGE_SIZE;
175 #endif
176
177 (void)k_mem_region_align(&stack_aligned_start, &stack_aligned_size,
178 stack_start, stack_size,
179 CONFIG_MMU_PAGE_SIZE);
180 k_mem_page_in(UINT_TO_POINTER(stack_aligned_start),
181 stack_aligned_size);
182 #endif
183
184 z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
185 arch_current_thread()->stack_info.start);
186 CODE_UNREACHABLE;
187 }
188
arch_thread_priv_stack_space_get(const struct k_thread * thread,size_t * stack_size,size_t * unused_ptr)189 int arch_thread_priv_stack_space_get(const struct k_thread *thread, size_t *stack_size,
190 size_t *unused_ptr)
191 {
192 struct z_x86_thread_stack_header *hdr_stack_obj;
193
194 if ((thread->base.user_options & K_USER) != K_USER) {
195 return -EINVAL;
196 }
197
198 hdr_stack_obj = (struct z_x86_thread_stack_header *)thread->stack_obj;
199
200 return z_stack_space_get(&hdr_stack_obj->privilege_stack[0],
201 sizeof(hdr_stack_obj->privilege_stack),
202 unused_ptr);
203 }
204