1 /*
2  * Copyright (c) 2017 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #include <zephyr/kernel.h>
8 #include <zephyr/sys/speculation.h>
9 #include <zephyr/internal/syscall_handler.h>
10 #include <kernel_arch_func.h>
11 #include <ksched.h>
12 #include <x86_mmu.h>
13 
14 #ifdef CONFIG_DEMAND_PAGING
15 #include <zephyr/kernel/mm/demand_paging.h>
16 #endif
17 
18 #ifndef CONFIG_X86_KPTI
19 /* Update the to the incoming thread's page table, and update the location of
20  * the privilege elevation stack.
21  *
22  * May be called ONLY during context switch. Hot code path!
23  *
24  * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
25  * active page tables are the kernel's page tables. If the incoming thread is
26  * in user mode we are going to switch CR3 to the domain-specific tables when
27  * we go through z_x86_trampoline_to_user.
28  *
29  * We don't need to update the privilege mode initial stack pointer either,
30  * privilege elevation always lands on the trampoline stack and the irq/syscall
31  * code has to manually transition off of it to the appropriate stack after
32  * switching page tables.
33  */
34 __pinned_func
z_x86_swap_update_page_tables(struct k_thread * incoming)35 void z_x86_swap_update_page_tables(struct k_thread *incoming)
36 {
37 #ifndef CONFIG_X86_64
38 	/* Set initial stack pointer when elevating privileges from Ring 3
39 	 * to Ring 0.
40 	 */
41 	_main_tss.esp0 = (uintptr_t)incoming->arch.psp;
42 #endif
43 
44 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
45 	z_x86_swap_update_common_page_table(incoming);
46 #else
47 	/* Check first that we actually need to do this, since setting
48 	 * CR3 involves an expensive full TLB flush.
49 	 */
50 	uintptr_t ptables_phys = incoming->arch.ptables;
51 
52 	__ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
53 		 incoming);
54 
55 	if (ptables_phys != z_x86_cr3_get()) {
56 		z_x86_cr3_set(ptables_phys);
57 	}
58 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
59 }
60 #endif /* CONFIG_X86_KPTI */
61 
62 /* Preparation steps needed for all threads if user mode is turned on.
63  *
64  * Returns the initial entry point to swap into.
65  */
z_x86_userspace_prepare_thread(struct k_thread * thread)66 void *z_x86_userspace_prepare_thread(struct k_thread *thread)
67 {
68 	void *initial_entry;
69 	struct z_x86_thread_stack_header *header =
70 		(struct z_x86_thread_stack_header *)thread->stack_obj;
71 
72 	thread->arch.psp =
73 		header->privilege_stack + sizeof(header->privilege_stack);
74 
75 #ifndef CONFIG_X86_COMMON_PAGE_TABLE
76 	/* Important this gets cleared, so that arch_mem_domain_* APIs
77 	 * can distinguish between new threads, and threads migrating
78 	 * between domains
79 	 */
80 	thread->arch.ptables = (uintptr_t)NULL;
81 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
82 
83 	if ((thread->base.user_options & K_USER) != 0U) {
84 		initial_entry = arch_user_mode_enter;
85 	} else {
86 		initial_entry = z_thread_entry;
87 	}
88 
89 	return initial_entry;
90 }
91 
arch_user_mode_enter(k_thread_entry_t user_entry,void * p1,void * p2,void * p3)92 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
93 					void *p1, void *p2, void *p3)
94 {
95 	size_t stack_end;
96 
97 	/* Transition will reset stack pointer to initial, discarding
98 	 * any old context since this is a one-way operation
99 	 */
100 	stack_end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
101 				      _current->stack_info.size -
102 				      _current->stack_info.delta);
103 
104 #ifdef CONFIG_X86_64
105 	/* x86_64 SysV ABI requires 16 byte stack alignment, which
106 	 * means that on entry to a C function (which follows a CALL
107 	 * that pushes 8 bytes) the stack must be MISALIGNED by
108 	 * exactly 8 bytes.
109 	 */
110 	stack_end -= 8;
111 #endif
112 
113 #if defined(CONFIG_DEMAND_PAGING) && \
114 	!defined(CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT)
115 	/* If generic section is not present at boot,
116 	 * the thread stack may not be in physical memory.
117 	 * Unconditionally page in the stack instead of
118 	 * relying on page fault to speed up a little bit
119 	 * on starting the thread.
120 	 *
121 	 * Note that this also needs to page in the reserved
122 	 * portion of the stack (which is usually the page just
123 	 * before the beginning of stack in
124 	 * _current->stack_info.start.
125 	 */
126 	uintptr_t stack_start;
127 	size_t stack_size;
128 	uintptr_t stack_aligned_start;
129 	size_t stack_aligned_size;
130 
131 	stack_start = POINTER_TO_UINT(_current->stack_obj);
132 	stack_size = Z_THREAD_STACK_SIZE_ADJUST(_current->stack_info.size);
133 
134 #if defined(CONFIG_HW_STACK_PROTECTION)
135 	/* With hardware stack protection, the first page of stack
136 	 * is a guard page. So need to skip it.
137 	 */
138 	stack_start += CONFIG_MMU_PAGE_SIZE;
139 	stack_size -= CONFIG_MMU_PAGE_SIZE;
140 #endif
141 
142 	(void)k_mem_region_align(&stack_aligned_start, &stack_aligned_size,
143 				 stack_start, stack_size,
144 				 CONFIG_MMU_PAGE_SIZE);
145 	k_mem_page_in(UINT_TO_POINTER(stack_aligned_start),
146 		      stack_aligned_size);
147 #endif
148 
149 	z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
150 			      _current->stack_info.start);
151 	CODE_UNREACHABLE;
152 }
153