1 /*
2 * Copyright (c) 2017 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7 #include <zephyr/kernel.h>
8 #include <zephyr/sys/speculation.h>
9 #include <zephyr/internal/syscall_handler.h>
10 #include <kernel_arch_func.h>
11 #include <ksched.h>
12 #include <x86_mmu.h>
13
14 BUILD_ASSERT((CONFIG_PRIVILEGED_STACK_SIZE > 0) &&
15 (CONFIG_PRIVILEGED_STACK_SIZE % CONFIG_MMU_PAGE_SIZE) == 0);
16
17 #ifdef CONFIG_DEMAND_PAGING
18 #include <zephyr/kernel/mm/demand_paging.h>
19 #endif
20
21 #ifndef CONFIG_X86_KPTI
22 /* Update the to the incoming thread's page table, and update the location of
23 * the privilege elevation stack.
24 *
25 * May be called ONLY during context switch. Hot code path!
26 *
27 * Nothing to do here if KPTI is enabled. We are in supervisor mode, so the
28 * active page tables are the kernel's page tables. If the incoming thread is
29 * in user mode we are going to switch CR3 to the domain-specific tables when
30 * we go through z_x86_trampoline_to_user.
31 *
32 * We don't need to update the privilege mode initial stack pointer either,
33 * privilege elevation always lands on the trampoline stack and the irq/syscall
34 * code has to manually transition off of it to the appropriate stack after
35 * switching page tables.
36 */
37 __pinned_func
z_x86_swap_update_page_tables(struct k_thread * incoming)38 void z_x86_swap_update_page_tables(struct k_thread *incoming)
39 {
40 #ifndef CONFIG_X86_64
41 /* Set initial stack pointer when elevating privileges from Ring 3
42 * to Ring 0.
43 */
44 _main_tss.esp0 = (uintptr_t)incoming->arch.psp;
45 #endif
46
47 #ifdef CONFIG_X86_COMMON_PAGE_TABLE
48 z_x86_swap_update_common_page_table(incoming);
49 #else
50 /* Check first that we actually need to do this, since setting
51 * CR3 involves an expensive full TLB flush.
52 */
53 uintptr_t ptables_phys = incoming->arch.ptables;
54
55 __ASSERT(ptables_phys != 0, "NULL page tables for thread %p\n",
56 incoming);
57
58 if (ptables_phys != z_x86_cr3_get()) {
59 z_x86_cr3_set(ptables_phys);
60 }
61 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
62 }
63 #endif /* CONFIG_X86_KPTI */
64
65 /* Preparation steps needed for all threads if user mode is turned on.
66 *
67 * Returns the initial entry point to swap into.
68 */
z_x86_userspace_prepare_thread(struct k_thread * thread)69 void *z_x86_userspace_prepare_thread(struct k_thread *thread)
70 {
71 void *initial_entry;
72
73 struct z_x86_thread_stack_header *header =
74 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
75 (struct z_x86_thread_stack_header *)thread->stack_info.mapped.addr;
76 #else
77 (struct z_x86_thread_stack_header *)thread->stack_obj;
78 #endif /* CONFIG_THREAD_STACK_MEM_MAPPED */
79
80 thread->arch.psp =
81 header->privilege_stack + sizeof(header->privilege_stack);
82
83 #ifndef CONFIG_X86_COMMON_PAGE_TABLE
84 /* Important this gets cleared, so that arch_mem_domain_* APIs
85 * can distinguish between new threads, and threads migrating
86 * between domains
87 */
88 thread->arch.ptables = (uintptr_t)NULL;
89 #endif /* CONFIG_X86_COMMON_PAGE_TABLE */
90
91 if ((thread->base.user_options & K_USER) != 0U) {
92 initial_entry = arch_user_mode_enter;
93 } else {
94 initial_entry = z_thread_entry;
95 }
96
97 return initial_entry;
98 }
99
arch_user_mode_enter(k_thread_entry_t user_entry,void * p1,void * p2,void * p3)100 FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
101 void *p1, void *p2, void *p3)
102 {
103 size_t stack_end;
104
105 /* Transition will reset stack pointer to initial, discarding
106 * any old context since this is a one-way operation
107 */
108 stack_end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
109 _current->stack_info.size -
110 _current->stack_info.delta);
111
112 #ifdef CONFIG_X86_64
113 /* x86_64 SysV ABI requires 16 byte stack alignment, which
114 * means that on entry to a C function (which follows a CALL
115 * that pushes 8 bytes) the stack must be MISALIGNED by
116 * exactly 8 bytes.
117 */
118 stack_end -= 8;
119 #endif
120
121 #if defined(CONFIG_DEMAND_PAGING) && \
122 !defined(CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT)
123 /* If generic section is not present at boot,
124 * the thread stack may not be in physical memory.
125 * Unconditionally page in the stack instead of
126 * relying on page fault to speed up a little bit
127 * on starting the thread.
128 *
129 * Note that this also needs to page in the reserved
130 * portion of the stack (which is usually the page just
131 * before the beginning of stack in
132 * _current->stack_info.start.
133 */
134 uintptr_t stack_start;
135 size_t stack_size;
136 uintptr_t stack_aligned_start;
137 size_t stack_aligned_size;
138
139 stack_start = POINTER_TO_UINT(_current->stack_obj);
140 stack_size = K_THREAD_STACK_LEN(_current->stack_info.size);
141
142 #if defined(CONFIG_X86_STACK_PROTECTION)
143 /* With hardware stack protection, the first page of stack
144 * is a guard page. So need to skip it.
145 */
146 stack_start += CONFIG_MMU_PAGE_SIZE;
147 stack_size -= CONFIG_MMU_PAGE_SIZE;
148 #endif
149
150 (void)k_mem_region_align(&stack_aligned_start, &stack_aligned_size,
151 stack_start, stack_size,
152 CONFIG_MMU_PAGE_SIZE);
153 k_mem_page_in(UINT_TO_POINTER(stack_aligned_start),
154 stack_aligned_size);
155 #endif
156
157 z_x86_userspace_enter(user_entry, p1, p2, p3, stack_end,
158 _current->stack_info.start);
159 CODE_UNREACHABLE;
160 }
161