1/*
2 * Copyright (c) 2017 Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6
7#include <zephyr/arch/x86/ia32/asm.h>
8#include <zephyr/arch/cpu.h>
9#include <offsets_short.h>
10#include <zephyr/syscall.h>
11#include <zephyr/kernel/mm.h>
12#include <x86_mmu.h>
13
14/* Exports */
15GTEXT(z_x86_syscall_entry_stub)
16GTEXT(z_x86_userspace_enter)
17GTEXT(arch_user_string_nlen)
18GTEXT(z_x86_user_string_nlen_fault_start)
19GTEXT(z_x86_user_string_nlen_fault_end)
20GTEXT(z_x86_user_string_nlen_fixup)
21
22/* Imports */
23GDATA(_k_syscall_table)
24
25#ifdef CONFIG_X86_KPTI
26/* Switch from the shadow to the kernel page table, switch to the interrupted
27 * thread's kernel stack, and copy all context from the trampoline stack.
28 *
29 * Assumes all registers are callee-saved since this gets called from other
30 * ASM code. Assumes a particular stack layout which is correct for
31 * _exception_enter and _interrupt_enter when invoked with a call instruction:
32 *
33 *  28 SS
34 *  24 ES
35 *  20 EFLAGS
36 *  16 CS
37 *  12 EIP
38 *  8  isr_param or exc code
39 *  4  isr or exc handler
40 *  0  return address
41 */
42SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_kernel)
43	/* Check interrupted code segment to see if we came from ring 3
44	 * and hence on the trampoline stack
45	 */
46	testb $3, 16(%esp) /* Offset of CS */
47	jz 1f
48
49	/* Stash these regs as we need to use them */
50	pushl	%esi
51	pushl	%edi
52
53	/* Switch to kernel page table */
54	movl	$K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi
55	movl	%esi, %cr3
56
57	/* Save old trampoline stack pointer in %edi */
58	movl	%esp, %edi
59
60	/* Switch to privilege mode stack */
61	movl	$_kernel, %esi
62	movl	_kernel_offset_to_current(%esi), %esi
63	movl	_thread_offset_to_psp(%esi), %esp
64
65	/* Transplant stack context and restore ESI/EDI. Taking care to zero
66	 * or put uninteresting values where we stashed ESI/EDI since the
67	 * trampoline page is insecure and there might a context switch
68	 * on the way out instead of returning to the original thread
69	 * immediately.
70	 */
71	pushl	36(%edi)	/* SS */
72	pushl	32(%edi)	/* ESP */
73	pushl	28(%edi)	/* EFLAGS */
74	pushl	24(%edi)	/* CS */
75	pushl	20(%edi)	/* EIP */
76	pushl	16(%edi)	/* error code or isr parameter */
77	pushl	12(%edi)	/* exception/irq handler */
78	pushl   8(%edi)		/* return address */
79	movl	4(%edi), %esi	/* restore ESI */
80	movl	$0, 4(%edi)	/* Zero old esi storage area */
81	xchgl	%edi, (%edi)	/* Exchange old edi to restore it and put
82				   old sp in the storage area */
83
84	/* Trampoline stack should have nothing sensitive in it at this point */
851:
86	ret
87
88/* Copy interrupt return stack context to the trampoline stack, switch back
89 * to the user page table, and only then 'iret'. We jump to this instead
90 * of calling 'iret' if KPTI is turned on.
91 *
92 * Stack layout is expected to be as follows:
93 *
94 * 16 SS
95 * 12 ESP
96 * 8 EFLAGS
97 * 4 CS
98 * 0 EIP
99 *
100 * This function is conditionally macroed to KPTI_IRET/KPTI_IRET_USER
101 */
102SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user)
103	/* Check interrupted code segment to see if we came from ring 3
104	 * and hence on the trampoline stack
105	 */
106	testb $3, 4(%esp) /* Offset of CS */
107	jz 1f
108
109	/* Otherwise, fall through ... */
110
111SECTION_FUNC(PINNED_TEXT, z_x86_trampoline_to_user_always)
112	/* Stash EDI, need a free register */
113	pushl	%edi
114
115	/* Store old stack pointer and switch to trampoline stack.
116	 * Lock IRQs before changing stack pointer to the trampoline stack,
117	 * we don't want any interrupts also using the trampoline stack
118	 * during this time.
119	 */
120	movl	%esp, %edi
121	cli
122	movl	$z_trampoline_stack_end, %esp
123
124	/* Copy context */
125	pushl	20(%edi)	/* SS */
126	pushl	16(%edi)	/* ESP */
127	pushl	12(%edi)	/* EFLAGS */
128	pushl   8(%edi)		/* CS */
129	pushl   4(%edi)		/* EIP */
130	xchgl	%edi, (%edi)	/* Exchange old edi to restore it and put
131				   trampoline stack address in its old storage
132				   area */
133	/* Switch to user page table */
134	pushl	%eax
135	movl	$_kernel, %eax
136	movl	_kernel_offset_to_current(%eax), %eax
137	movl	_thread_offset_to_ptables(%eax), %eax
138	movl	%eax, %cr3
139	popl	%eax
140	movl	$0, -4(%esp)	/* Delete stashed EAX data */
141
142	/* Trampoline stack should have nothing sensitive in it at this point */
1431:
144	iret
145#endif /* CONFIG_X86_KPTI */
146
147/* Landing site for syscall SW IRQ. Marshal arguments and call C function for
148 * further processing. We're on the kernel stack for the invoking thread,
149 * unless KPTI is enabled, in which case we're on the trampoline stack and
150 * need to get off it before enabling interrupts.
151 */
152SECTION_FUNC(TEXT, z_x86_syscall_entry_stub)
153#ifdef CONFIG_X86_KPTI
154	/* Stash these regs as we need to use them */
155	pushl	%esi
156	pushl	%edi
157
158	/* Switch to kernel page table */
159	movl	$K_MEM_PHYS_ADDR(z_x86_kernel_ptables), %esi
160	movl	%esi, %cr3
161
162	/* Save old trampoline stack pointer in %edi */
163	movl	%esp, %edi
164
165	/* Switch to privilege elevation stack */
166	movl	$_kernel, %esi
167	movl	_kernel_offset_to_current(%esi), %esi
168	movl	_thread_offset_to_psp(%esi), %esp
169
170	/* Transplant context according to layout above. Variant of logic
171	 * in x86_trampoline_to_kernel */
172	pushl	24(%edi)	/* SS */
173	pushl	20(%edi)	/* ESP */
174	pushl	16(%edi)	/* EFLAGS */
175	pushl	12(%edi)	/* CS */
176	pushl	8(%edi)		/* EIP */
177	movl	4(%edi), %esi	/* restore ESI */
178	movl	$0, 4(%edi)	/* Zero old esi storage area */
179	xchgl	%edi, (%edi)	/* Exchange old edi to restore it and put
180				   old sp in the storage area */
181
182	/* Trampoline stack should have nothing sensitive in it at this point */
183#endif /* CONFIG_X86_KPTI */
184
185	sti			/* re-enable interrupts */
186	cld			/* clear direction flag, restored on 'iret' */
187
188	/* call_id is in ESI. bounds-check it, must be less than
189	 * K_SYSCALL_LIMIT
190	 */
191	cmp	$K_SYSCALL_LIMIT, %esi
192	jae	_bad_syscall
193
194_id_ok:
195#ifdef CONFIG_X86_BOUNDS_CHECK_BYPASS_MITIGATION
196	/* Prevent speculation with bogus system call IDs */
197	lfence
198#endif
199	/* Marshal arguments per calling convention to match what is expected
200	 * for _k_syscall_handler_t functions
201	 */
202	push	%esp		/* ssf */
203	push	%ebp		/* arg6 */
204	push	%edi		/* arg5 */
205	push	%ebx		/* arg4 */
206	push	%ecx		/* arg3 */
207	push	%edx		/* arg2	*/
208	push	%eax		/* arg1 */
209
210	/* from the call ID in ESI, load EBX with the actual function pointer
211	 * to call by looking it up in the system call dispatch table
212	 */
213	xor	%edi, %edi
214	mov	_k_syscall_table(%edi, %esi, 4), %ebx
215
216	/* Run the handler, which is some entry in _k_syscall_table */
217	call	*%ebx
218
219	/* EAX now contains return value. Pop or xor everything else to prevent
220	 * information leak from kernel mode.
221	 */
222	pop	%edx		/* old arg1 value, discard it */
223	pop	%edx
224	pop	%ecx
225	pop	%ebx
226	pop	%edi
227	/* Discard ssf and arg6 */
228	add	$8, %esp
229	KPTI_IRET_USER
230
231_bad_syscall:
232	/* ESI had a bogus syscall value in it, replace with the bad syscall
233	 * handler's ID, and put the bad ID as its first argument.  This
234	 * clobbers ESI but the bad syscall handler never returns
235	 * anyway, it's going to generate a kernel oops
236	 */
237	mov	%esi, %eax
238	mov	$K_SYSCALL_BAD, %esi
239	jmp	_id_ok
240
241
242/*
243 * size_t arch_user_string_nlen(const char *s, size_t maxsize, int *err_arg)
244 */
245SECTION_FUNC(TEXT, arch_user_string_nlen)
246	push	%ebp
247	mov	%esp, %ebp
248
249	/* error value, set to -1 initially. This location is -4(%ebp) */
250	push	$-1
251
252	/* Do the strlen operation, based on disassembly of minimal libc */
253	xor	%eax, %eax		/* EAX = 0, length count */
254	mov	0x8(%ebp), %edx		/* EDX base of string */
255
256	/* This code might page fault */
257strlen_loop:
258z_x86_user_string_nlen_fault_start:
259	cmpb	$0x0, (%edx, %eax, 1)	/* *(EDX + EAX) == 0? Could fault. */
260
261z_x86_user_string_nlen_fault_end:
262	je	strlen_done
263	cmp	0xc(%ebp), %eax		/* Max length reached? */
264	je	strlen_done
265	inc	%eax			/* EAX++ and loop again */
266	jmp	strlen_loop
267
268strlen_done:
269	/* Set error value to 0 since we succeeded */
270	movl	$0, -4(%ebp)
271
272z_x86_user_string_nlen_fixup:
273	/* Write error value to err pointer parameter */
274	movl	0x10(%ebp), %ecx
275	pop	%edx
276	movl	%edx, (%ecx)
277
278	pop	%ebp
279	ret
280
281
282/* FUNC_NORETURN void z_x86_userspace_enter(k_thread_entry_t user_entry,
283 *					   void *p1, void *p2, void *p3,
284 *					   uint32_t stack_end,
285 *					   uint32_t stack_start)
286 *
287 * A one-way trip to userspace.
288 */
289SECTION_FUNC(TEXT, z_x86_userspace_enter)
290	pop	%esi	/* Discard return address on stack */
291
292	/* Fetch parameters on the stack */
293	pop	%eax	/* user_entry */
294	pop	%edx	/* p1 */
295	pop	%ecx	/* p2 */
296	pop	%esi	/* p3 */
297	pop	%ebx	/* stack_end (high address) */
298	pop	%edi	/* stack_start (low address) */
299
300	/* Move to the kernel stack for this thread, so we can erase the
301	 * user stack. The kernel stack is the page immediately before
302	 * the user stack.
303	 *
304	 * For security reasons, we must erase the entire user stack.
305	 * We don't know what previous contexts it was used and do not
306	 * want to leak any information.
307	 */
308	mov	%edi, %esp
309
310	/* Erase and enable US bit in page tables for the stack buffer */
311	push	%ecx
312	push	%eax
313	push	%edx
314	call	z_x86_current_stack_perms
315	pop	%edx
316	pop	%eax
317	pop	%ecx
318
319	/* Set stack pointer to the base of the freshly-erased user stack.
320	 * Now that this is set we won't need EBX any more.
321	 */
322	mov	%ebx, %esp
323
324	/* Set segment registers (except CS and SS which are done in
325	 * a special way by 'iret' below)
326	 */
327	mov	$USER_DATA_SEG, %bx
328	mov	%bx, %ds
329	mov	%bx, %es
330
331	/* Push arguments to z_thread_entry() */
332	push	%esi	/* p3 */
333	push	%ecx	/* p2 */
334	push	%edx	/* p1 */
335	push	%eax	/* user_entry */
336	/* NULL return address */
337	push	$0
338
339	/* Save stack pointer at this position, this is where it will be
340	 * when we land in z_thread_entry()
341	 */
342	mov	%esp, %edi
343
344	/* Inter-privilege 'iret' pops all of these. Need to fake an interrupt
345	 * return to enter user mode as far calls cannot change privilege
346	 * level
347	 */
348	push	$USER_DATA_SEG	/* SS */
349	push	%edi		/* ESP */
350	pushfl			/* EFLAGS */
351	push	$USER_CODE_SEG	/* CS */
352	push	$z_thread_entry	/* EIP */
353
354	/* We will land in z_thread_entry() in user mode after this */
355	KPTI_IRET_USER
356