1 /*
2  * Copyright (c) 2017 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <zephyr/kernel.h>
8 #include <zephyr/sys/speculation.h>
9 #include <zephyr/internal/syscall_handler.h>
10 #include <kernel_arch_func.h>
11 #include <ksched.h>
12 #include <x86_mmu.h>
13 
14 BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) &&
15 	     (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0);
16 
17 #ifdef CONFIG_DEMAND_PAGING
18 #include <zephyr/kernel/mm/demand_paging.h>
19 #endif
20 
21 #ifndef CONFIG_X86_KPTI
22 /* Update the to the incoming thread's page table, and update the location of
23  * the privilege elevation stack.
24  *
25  * May be called ONLY during context switch. Hot code path!
26  *
27  * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
28  * active page tables are the kernel's page tables. If the incoming thread is
29  * in user mode we are going to switch CR3 to the domain-specific tables when
30  * we go through z_x86_trampoline_to_user.
31  *
32  * We don't need to update the privilege mode initial stack pointer either,
33  * privilege elevation always lands on the trampoline stack and the irq/syscall
34  * code has to manually transition off of it to the appropriate stack after
35  * switching page tables.
36  */
37 __pinned_func
z_x86_swap_update_page_tables(struct k_thread * incoming)38 void z_x86_swap_update_page_tables(struct k_thread *incoming)
39 {
40 #ifndef CONFIG_X86_64
41 	/* Set initial stack pointer when elevating privileges from Ring 3
42 	 * to Ring 0.
43 	 */
44 	_main_tss.esp0 = (uintptr_t)incoming->arch.psp;
45 #endif
46 
47 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
48 	z_x86_swap_update_common_page_table(incoming);
49 #else
50 	/* Check first that we actually need to do this, since setting
51 	 * CR3 involves an expensive full TLB flush.
52 	 */
53 	uintptr_t ptables_phys = incoming->arch.ptables;
54 
55 	__ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
56 		 incoming);
57 
58 	if (ptables_phys != z_x86_cr3_get()) {
59 		z_x86_cr3_set(ptables_phys);
60 	}
61 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
62 }
63 #endif /* CONFIG_X86_KPTI */
64 
65 /* Preparation steps needed for all threads if user mode is turned on.
66  *
67  * Returns the initial entry point to swap into.
68  */
z_x86_userspace_prepare_thread(struct k_thread * thread)69 void *z_x86_userspace_prepare_thread(struct k_thread *thread)
70 {
71 	void *initial_entry;
72 
73 	struct z_x86_thread_stack_header *header =
74 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
75 		(struct z_x86_thread_stack_header *)thread->stack_info.mapped.addr;
76 #else
77 		(struct z_x86_thread_stack_header *)thread->stack_obj;
78 #endif /* CONFIG_THREAD_STACK_MEM_MAPPED */
79 
80 	thread->arch.psp =
81 		header->privilege_stack + sizeof(header->privilege_stack);
82 
83 #ifndef CONFIG_X86_COMMON_PAGE_TABLE
84 	/* Important this gets cleared, so that arch_mem_domain_* APIs
85 	 * can distinguish between new threads, and threads migrating
86 	 * between domains
87 	 */
88 	thread->arch.ptables = (uintptr_t)NULL;
89 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
90 
91 	if ((thread->base.user_options & K_USER) != 0U) {
92 		initial_entry = arch_user_mode_enter;
93 	} else {
94 		initial_entry = z_thread_entry;
95 	}
96 
97 	return initial_entry;
98 }
99 
arch_user_mode_enter(k_thread_entry_t user_entry,void * p1,void * p2,void * p3)100 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
101 					void *p1, void *p2, void *p3)
102 {
103 	size_t stack_end;
104 
105 	/* Transition will reset stack pointer to initial, discarding
106 	 * any old context since this is a one-way operation
107 	 */
108 	stack_end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
109 				      _current->stack_info.size -
110 				      _current->stack_info.delta);
111 
112 #ifdef CONFIG_X86_64
113 	/* x86_64 SysV ABI requires 16 byte stack alignment, which
114 	 * means that on entry to a C function (which follows a CALL
115 	 * that pushes 8 bytes) the stack must be MISALIGNED by
116 	 * exactly 8 bytes.
117 	 */
118 	stack_end -= 8;
119 #endif
120 
121 #if defined(CONFIG_DEMAND_PAGING) && \
122 	!defined(CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT)
123 	/* If generic section is not present at boot,
124 	 * the thread stack may not be in physical memory.
125 	 * Unconditionally page in the stack instead of
126 	 * relying on page fault to speed up a little bit
127 	 * on starting the thread.
128 	 *
129 	 * Note that this also needs to page in the reserved
130 	 * portion of the stack (which is usually the page just
131 	 * before the beginning of stack in
132 	 * _current->stack_info.start.
133 	 */
134 	uintptr_t stack_start;
135 	size_t stack_size;
136 	uintptr_t stack_aligned_start;
137 	size_t stack_aligned_size;
138 
139 	stack_start = POINTER_TO_UINT(_current->stack_obj);
140 	stack_size = K_THREAD_STACK_LEN(_current->stack_info.size);
141 
142 #if defined(CONFIG_X86_STACK_PROTECTION)
143 	/* With hardware stack protection, the first page of stack
144 	 * is a guard page. So need to skip it.
145 	 */
146 	stack_start += CONFIG_MMU_PAGE_SIZE;
147 	stack_size -= CONFIG_MMU_PAGE_SIZE;
148 #endif
149 
150 	(void)k_mem_region_align(&stack_aligned_start, &stack_aligned_size,
151 				 stack_start, stack_size,
152 				 CONFIG_MMU_PAGE_SIZE);
153 	k_mem_page_in(UINT_TO_POINTER(stack_aligned_start),
154 		      stack_aligned_size);
155 #endif
156 
157 	z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
158 			      _current->stack_info.start);
159 	CODE_UNREACHABLE;
160 }
161