1 /*
2  * Copyright (c) 2017 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <errno.h>
8 
9 #include <zephyr/kernel.h>
10 #include <zephyr/sys/speculation.h>
11 #include <zephyr/internal/syscall_handler.h>
12 #include <kernel_arch_func.h>
13 #include <ksched.h>
14 #include <x86_mmu.h>
15 
16 BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) &&
17 	     (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0);
18 
19 #ifdef CONFIG_DEMAND_PAGING
20 #include <zephyr/kernel/mm/demand_paging.h>
21 #endif
22 
23 #ifndef CONFIG_X86_KPTI
24 /* Update the to the incoming thread's page table, and update the location of
25  * the privilege elevation stack.
26  *
27  * May be called ONLY during context switch. Hot code path!
28  *
29  * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
30  * active page tables are the kernel's page tables. If the incoming thread is
31  * in user mode we are going to switch CR3 to the domain-specific tables when
32  * we go through z_x86_trampoline_to_user.
33  *
34  * We don't need to update the privilege mode initial stack pointer either,
35  * privilege elevation always lands on the trampoline stack and the irq/syscall
36  * code has to manually transition off of it to the appropriate stack after
37  * switching page tables.
38  */
39 __pinned_func
z_x86_swap_update_page_tables(struct k_thread * incoming)40 void z_x86_swap_update_page_tables(struct k_thread *incoming)
41 {
42 #ifndef CONFIG_X86_64
43 	/* Set initial stack pointer when elevating privileges from Ring 3
44 	 * to Ring 0.
45 	 */
46 	_main_tss.esp0 = (uintptr_t)incoming->arch.psp;
47 #endif
48 
49 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
50 	z_x86_swap_update_common_page_table(incoming);
51 #else
52 	/* Check first that we actually need to do this, since setting
53 	 * CR3 involves an expensive full TLB flush.
54 	 */
55 	uintptr_t ptables_phys = incoming->arch.ptables;
56 
57 	__ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
58 		 incoming);
59 
60 	if (ptables_phys != z_x86_cr3_get()) {
61 		z_x86_cr3_set(ptables_phys);
62 	}
63 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
64 }
65 #endif /* CONFIG_X86_KPTI */
66 
67 /* Preparation steps needed for all threads if user mode is turned on.
68  *
69  * Returns the initial entry point to swap into.
70  */
z_x86_userspace_prepare_thread(struct k_thread * thread)71 void *z_x86_userspace_prepare_thread(struct k_thread *thread)
72 {
73 	void *initial_entry;
74 
75 	if (z_stack_is_user_capable(thread->stack_obj)) {
76 		struct z_x86_thread_stack_header *header =
77 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
78 			(struct z_x86_thread_stack_header *)thread->stack_info.mapped.addr;
79 #else
80 			(struct z_x86_thread_stack_header *)thread->stack_obj;
81 #endif /* CONFIG_THREAD_STACK_MEM_MAPPED */
82 
83 		thread->arch.psp = header->privilege_stack + sizeof(header->privilege_stack);
84 	} else {
85 		thread->arch.psp = NULL;
86 	}
87 
88 #ifndef CONFIG_X86_COMMON_PAGE_TABLE
89 	/* Important this gets cleared, so that arch_mem_domain_* APIs
90 	 * can distinguish between new threads, and threads migrating
91 	 * between domains
92 	 */
93 	thread->arch.ptables = (uintptr_t)NULL;
94 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
95 
96 	if ((thread->base.user_options & K_USER) != 0U) {
97 		initial_entry = arch_user_mode_enter;
98 
99 #ifdef CONFIG_INIT_STACKS
100 		/* setup_thread_stack() does not initialize the architecture specific
101 		 * privileged stack. So we need to do it manually here as this function
102 		 * is called by arch_new_thread() via z_setup_new_thread() after
103 		 * setup_thread_stack() but before thread starts running.
104 		 *
105 		 * Note that only user threads have privileged stacks and kernel
106 		 * only threads do not.
107 		 *
108 		 * Also note that this needs to be done before calling
109 		 * z_x86_userspace_enter() where it clears the user stack.
110 		 * That function requires using the privileged stack for
111 		 * code execution so we cannot clear that at the same time.
112 		 */
113 		struct z_x86_thread_stack_header *hdr_stack_obj =
114 			(struct z_x86_thread_stack_header *)thread->stack_obj;
115 
116 		(void)memset(&hdr_stack_obj->privilege_stack[0], 0xaa,
117 			     sizeof(hdr_stack_obj->privilege_stack));
118 #endif
119 
120 	} else {
121 		initial_entry = z_thread_entry;
122 	}
123 
124 	return initial_entry;
125 }
126 
arch_user_mode_enter(k_thread_entry_t user_entry,void * p1,void * p2,void * p3)127 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
128 					void *p1, void *p2, void *p3)
129 {
130 	size_t stack_end;
131 
132 	/* Transition will reset stack pointer to initial, discarding
133 	 * any old context since this is a one-way operation
134 	 */
135 	stack_end = Z_STACK_PTR_ALIGN(arch_current_thread()->stack_info.start +
136 				      arch_current_thread()->stack_info.size -
137 				      arch_current_thread()->stack_info.delta);
138 
139 #ifdef CONFIG_X86_64
140 	/* x86_64 SysV ABI requires 16 byte stack alignment, which
141 	 * means that on entry to a C function (which follows a CALL
142 	 * that pushes 8 bytes) the stack must be MISALIGNED by
143 	 * exactly 8 bytes.
144 	 */
145 	stack_end -= 8;
146 #endif
147 
148 #if defined(CONFIG_DEMAND_PAGING) && \
149 	!defined(CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT)
150 	/* If generic section is not present at boot,
151 	 * the thread stack may not be in physical memory.
152 	 * Unconditionally page in the stack instead of
153 	 * relying on page fault to speed up a little bit
154 	 * on starting the thread.
155 	 *
156 	 * Note that this also needs to page in the reserved
157 	 * portion of the stack (which is usually the page just
158 	 * before the beginning of stack in
159 	 * arch_current_thread()->stack_info.start.
160 	 */
161 	uintptr_t stack_start;
162 	size_t stack_size;
163 	uintptr_t stack_aligned_start;
164 	size_t stack_aligned_size;
165 
166 	stack_start = POINTER_TO_UINT(arch_current_thread()->stack_obj);
167 	stack_size = K_THREAD_STACK_LEN(arch_current_thread()->stack_info.size);
168 
169 #if defined(CONFIG_X86_STACK_PROTECTION)
170 	/* With hardware stack protection, the first page of stack
171 	 * is a guard page. So need to skip it.
172 	 */
173 	stack_start += CONFIG_MMU_PAGE_SIZE;
174 	stack_size -= CONFIG_MMU_PAGE_SIZE;
175 #endif
176 
177 	(void)k_mem_region_align(&stack_aligned_start, &stack_aligned_size,
178 				 stack_start, stack_size,
179 				 CONFIG_MMU_PAGE_SIZE);
180 	k_mem_page_in(UINT_TO_POINTER(stack_aligned_start),
181 		      stack_aligned_size);
182 #endif
183 
184 	z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
185 			      arch_current_thread()->stack_info.start);
186 	CODE_UNREACHABLE;
187 }
188 
arch_thread_priv_stack_space_get(const struct k_thread * thread,size_t * stack_size,size_t * unused_ptr)189 int arch_thread_priv_stack_space_get(const struct k_thread *thread, size_t *stack_size,
190 				     size_t *unused_ptr)
191 {
192 	struct z_x86_thread_stack_header *hdr_stack_obj;
193 
194 	if ((thread->base.user_options & K_USER) != K_USER) {
195 		return -EINVAL;
196 	}
197 
198 	hdr_stack_obj = (struct z_x86_thread_stack_header *)thread->stack_obj;
199 
200 	return z_stack_space_get(&hdr_stack_obj->privilege_stack[0],
201 				 sizeof(hdr_stack_obj->privilege_stack),
202 				 unused_ptr);
203 }
204