1 /*
2 * Copyright (c) 2019 Intel Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6 #include <kernel.h>
7 #include <ksched.h>
8 #include <kernel_structs.h>
9 #include <kernel_internal.h>
10 #include <exc_handle.h>
11 #include <logging/log.h>
12 #include <x86_mmu.h>
13 #include <mmu.h>
14 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
15
16 #if defined(CONFIG_BOARD_QEMU_X86) || defined(CONFIG_BOARD_QEMU_X86_64)
arch_system_halt(unsigned int reason)17 FUNC_NORETURN void arch_system_halt(unsigned int reason)
18 {
19 ARG_UNUSED(reason);
20
21 /* Causes QEMU to exit. We passed the following on the command line:
22 * -device isa-debug-exit,iobase=0xf4,iosize=0x04
23 *
24 * For any value of the first argument X, the return value of the
25 * QEMU process is (X * 2) + 1.
26 *
27 * It has been observed that if the emulator exits for a triple-fault
28 * (often due to bad page tables or other CPU structures) it will
29 * terminate with 0 error code.
30 */
31 sys_out32(reason, 0xf4);
32 CODE_UNREACHABLE;
33 }
34 #endif
35
esf_get_sp(const z_arch_esf_t * esf)36 static inline uintptr_t esf_get_sp(const z_arch_esf_t *esf)
37 {
38 #ifdef CONFIG_X86_64
39 return esf->rsp;
40 #else
41 return esf->esp;
42 #endif
43 }
44
esf_get_code(const z_arch_esf_t * esf)45 static inline uintptr_t esf_get_code(const z_arch_esf_t *esf)
46 {
47 #ifdef CONFIG_X86_64
48 return esf->code;
49 #else
50 return esf->errorCode;
51 #endif
52 }
53
54 #ifdef CONFIG_THREAD_STACK_INFO
55 __pinned_func
z_x86_check_stack_bounds(uintptr_t addr,size_t size,uint16_t cs)56 bool z_x86_check_stack_bounds(uintptr_t addr, size_t size, uint16_t cs)
57 {
58 uintptr_t start, end;
59
60 if (_current == NULL || arch_is_in_isr()) {
61 /* We were servicing an interrupt or in early boot environment
62 * and are supposed to be on the interrupt stack */
63 int cpu_id;
64
65 #ifdef CONFIG_SMP
66 cpu_id = arch_curr_cpu()->id;
67 #else
68 cpu_id = 0;
69 #endif
70 start = (uintptr_t)Z_KERNEL_STACK_BUFFER(
71 z_interrupt_stacks[cpu_id]);
72 end = start + CONFIG_ISR_STACK_SIZE;
73 #ifdef CONFIG_USERSPACE
74 } else if ((cs & 0x3U) == 0U &&
75 (_current->base.user_options & K_USER) != 0) {
76 /* The low two bits of the CS register is the privilege
77 * level. It will be 0 in supervisor mode and 3 in user mode
78 * corresponding to ring 0 / ring 3.
79 *
80 * If we get here, we must have been doing a syscall, check
81 * privilege elevation stack bounds
82 */
83 start = _current->stack_info.start - CONFIG_MMU_PAGE_SIZE;
84 end = _current->stack_info.start;
85 #endif /* CONFIG_USERSPACE */
86 } else {
87 /* Normal thread operation, check its stack buffer */
88 start = _current->stack_info.start;
89 end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
90 _current->stack_info.size);
91 }
92
93 return (addr <= start) || (addr + size > end);
94 }
95 #endif
96
97 #ifdef CONFIG_EXCEPTION_DEBUG
98 #if defined(CONFIG_X86_EXCEPTION_STACK_TRACE)
99 struct stack_frame {
100 uintptr_t next;
101 uintptr_t ret_addr;
102 #ifndef CONFIG_X86_64
103 uintptr_t args;
104 #endif
105 };
106
107 #define MAX_STACK_FRAMES 8
108
109 __pinned_func
unwind_stack(uintptr_t base_ptr,uint16_t cs)110 static void unwind_stack(uintptr_t base_ptr, uint16_t cs)
111 {
112 struct stack_frame *frame;
113 int i;
114
115 if (base_ptr == 0U) {
116 LOG_ERR("NULL base ptr");
117 return;
118 }
119
120 for (i = 0; i < MAX_STACK_FRAMES; i++) {
121 if (base_ptr % sizeof(base_ptr) != 0U) {
122 LOG_ERR("unaligned frame ptr");
123 return;
124 }
125
126 frame = (struct stack_frame *)base_ptr;
127 if (frame == NULL) {
128 break;
129 }
130
131 #ifdef CONFIG_THREAD_STACK_INFO
132 /* Ensure the stack frame is within the faulting context's
133 * stack buffer
134 */
135 if (z_x86_check_stack_bounds((uintptr_t)frame,
136 sizeof(*frame), cs)) {
137 LOG_ERR(" corrupted? (bp=%p)", frame);
138 break;
139 }
140 #endif
141
142 if (frame->ret_addr == 0U) {
143 break;
144 }
145 #ifdef CONFIG_X86_64
146 LOG_ERR(" 0x%016lx", frame->ret_addr);
147 #else
148 LOG_ERR(" 0x%08lx (0x%lx)", frame->ret_addr, frame->args);
149 #endif
150 base_ptr = frame->next;
151 }
152 }
153 #endif /* CONFIG_X86_EXCEPTION_STACK_TRACE */
154
get_cr3(const z_arch_esf_t * esf)155 static inline uintptr_t get_cr3(const z_arch_esf_t *esf)
156 {
157 #if defined(CONFIG_USERSPACE) && defined(CONFIG_X86_KPTI)
158 /* If the interrupted thread was in user mode, we did a page table
159 * switch when we took the exception via z_x86_trampoline_to_kernel
160 */
161 if ((esf->cs & 0x3) != 0) {
162 return _current->arch.ptables;
163 }
164 #else
165 ARG_UNUSED(esf);
166 #endif
167 /* Return the current CR3 value, it didn't change when we took
168 * the exception
169 */
170 return z_x86_cr3_get();
171 }
172
get_ptables(const z_arch_esf_t * esf)173 static inline pentry_t *get_ptables(const z_arch_esf_t *esf)
174 {
175 return z_mem_virt_addr(get_cr3(esf));
176 }
177
178 #ifdef CONFIG_X86_64
179 __pinned_func
dump_regs(const z_arch_esf_t * esf)180 static void dump_regs(const z_arch_esf_t *esf)
181 {
182 LOG_ERR("RAX: 0x%016lx RBX: 0x%016lx RCX: 0x%016lx RDX: 0x%016lx",
183 esf->rax, esf->rbx, esf->rcx, esf->rdx);
184 LOG_ERR("RSI: 0x%016lx RDI: 0x%016lx RBP: 0x%016lx RSP: 0x%016lx",
185 esf->rsi, esf->rdi, esf->rbp, esf->rsp);
186 LOG_ERR(" R8: 0x%016lx R9: 0x%016lx R10: 0x%016lx R11: 0x%016lx",
187 esf->r8, esf->r9, esf->r10, esf->r11);
188 LOG_ERR("R12: 0x%016lx R13: 0x%016lx R14: 0x%016lx R15: 0x%016lx",
189 esf->r12, esf->r13, esf->r14, esf->r15);
190 LOG_ERR("RSP: 0x%016lx RFLAGS: 0x%016lx CS: 0x%04lx CR3: 0x%016lx",
191 esf->rsp, esf->rflags, esf->cs & 0xFFFFU, get_cr3(esf));
192
193 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
194 LOG_ERR("call trace:");
195 #endif
196 LOG_ERR("RIP: 0x%016lx", esf->rip);
197 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
198 unwind_stack(esf->rbp, esf->cs);
199 #endif
200 }
201 #else /* 32-bit */
202 __pinned_func
dump_regs(const z_arch_esf_t * esf)203 static void dump_regs(const z_arch_esf_t *esf)
204 {
205 LOG_ERR("EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x",
206 esf->eax, esf->ebx, esf->ecx, esf->edx);
207 LOG_ERR("ESI: 0x%08x, EDI: 0x%08x, EBP: 0x%08x, ESP: 0x%08x",
208 esf->esi, esf->edi, esf->ebp, esf->esp);
209 LOG_ERR("EFLAGS: 0x%08x CS: 0x%04x CR3: 0x%08lx", esf->eflags,
210 esf->cs & 0xFFFFU, get_cr3(esf));
211
212 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
213 LOG_ERR("call trace:");
214 #endif
215 LOG_ERR("EIP: 0x%08x", esf->eip);
216 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
217 unwind_stack(esf->ebp, esf->cs);
218 #endif
219 }
220 #endif /* CONFIG_X86_64 */
221
222 __pinned_func
log_exception(uintptr_t vector,uintptr_t code)223 static void log_exception(uintptr_t vector, uintptr_t code)
224 {
225 switch (vector) {
226 case IV_DIVIDE_ERROR:
227 LOG_ERR("Divide by zero");
228 break;
229 case IV_DEBUG:
230 LOG_ERR("Debug");
231 break;
232 case IV_NON_MASKABLE_INTERRUPT:
233 LOG_ERR("Non-maskable interrupt");
234 break;
235 case IV_BREAKPOINT:
236 LOG_ERR("Breakpoint");
237 break;
238 case IV_OVERFLOW:
239 LOG_ERR("Overflow");
240 break;
241 case IV_BOUND_RANGE:
242 LOG_ERR("Bound range exceeded");
243 break;
244 case IV_INVALID_OPCODE:
245 LOG_ERR("Invalid opcode");
246 break;
247 case IV_DEVICE_NOT_AVAILABLE:
248 LOG_ERR("Floating point unit device not available");
249 break;
250 case IV_DOUBLE_FAULT:
251 LOG_ERR("Double fault (code 0x%lx)", code);
252 break;
253 case IV_COPROC_SEGMENT_OVERRUN:
254 LOG_ERR("Co-processor segment overrun");
255 break;
256 case IV_INVALID_TSS:
257 LOG_ERR("Invalid TSS (code 0x%lx)", code);
258 break;
259 case IV_SEGMENT_NOT_PRESENT:
260 LOG_ERR("Segment not present (code 0x%lx)", code);
261 break;
262 case IV_STACK_FAULT:
263 LOG_ERR("Stack segment fault");
264 break;
265 case IV_GENERAL_PROTECTION:
266 LOG_ERR("General protection fault (code 0x%lx)", code);
267 break;
268 /* IV_PAGE_FAULT skipped, we have a dedicated handler */
269 case IV_X87_FPU_FP_ERROR:
270 LOG_ERR("x87 floating point exception");
271 break;
272 case IV_ALIGNMENT_CHECK:
273 LOG_ERR("Alignment check (code 0x%lx)", code);
274 break;
275 case IV_MACHINE_CHECK:
276 LOG_ERR("Machine check");
277 break;
278 case IV_SIMD_FP:
279 LOG_ERR("SIMD floating point exception");
280 break;
281 case IV_VIRT_EXCEPTION:
282 LOG_ERR("Virtualization exception");
283 break;
284 case IV_SECURITY_EXCEPTION:
285 LOG_ERR("Security exception");
286 break;
287 default:
288 LOG_ERR("Exception not handled (code 0x%lx)", code);
289 break;
290 }
291 }
292
293 __pinned_func
dump_page_fault(z_arch_esf_t * esf)294 static void dump_page_fault(z_arch_esf_t *esf)
295 {
296 uintptr_t err;
297 void *cr2;
298
299 cr2 = z_x86_cr2_get();
300 err = esf_get_code(esf);
301 LOG_ERR("Page fault at address %p (error code 0x%lx)", cr2, err);
302
303 if ((err & PF_RSVD) != 0) {
304 LOG_ERR("Reserved bits set in page tables");
305 } else {
306 if ((err & PF_P) == 0) {
307 LOG_ERR("Linear address not present in page tables");
308 }
309 LOG_ERR("Access violation: %s thread not allowed to %s",
310 (err & PF_US) != 0U ? "user" : "supervisor",
311 (err & PF_ID) != 0U ? "execute" : ((err & PF_WR) != 0U ?
312 "write" :
313 "read"));
314 if ((err & PF_PK) != 0) {
315 LOG_ERR("Protection key disallowed");
316 } else if ((err & PF_SGX) != 0) {
317 LOG_ERR("SGX access control violation");
318 }
319 }
320
321 #ifdef CONFIG_X86_MMU
322 z_x86_dump_mmu_flags(get_ptables(esf), cr2);
323 #endif /* CONFIG_X86_MMU */
324 }
325 #endif /* CONFIG_EXCEPTION_DEBUG */
326
327 __pinned_func
z_x86_fatal_error(unsigned int reason,const z_arch_esf_t * esf)328 FUNC_NORETURN void z_x86_fatal_error(unsigned int reason,
329 const z_arch_esf_t *esf)
330 {
331 if (esf != NULL) {
332 #ifdef CONFIG_EXCEPTION_DEBUG
333 dump_regs(esf);
334 #endif
335 #if defined(CONFIG_ASSERT) && defined(CONFIG_X86_64)
336 if (esf->rip == 0xb9) {
337 /* See implementation of __resume in locore.S. This is
338 * never a valid RIP value. Treat this as a kernel
339 * panic.
340 */
341 LOG_ERR("Attempt to resume un-suspended thread object");
342 reason = K_ERR_KERNEL_PANIC;
343 }
344 #endif
345 }
346 z_fatal_error(reason, esf);
347 CODE_UNREACHABLE;
348 }
349
350 __pinned_func
z_x86_unhandled_cpu_exception(uintptr_t vector,const z_arch_esf_t * esf)351 FUNC_NORETURN void z_x86_unhandled_cpu_exception(uintptr_t vector,
352 const z_arch_esf_t *esf)
353 {
354 #ifdef CONFIG_EXCEPTION_DEBUG
355 log_exception(vector, esf_get_code(esf));
356 #else
357 ARG_UNUSED(vector);
358 #endif
359 z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
360 }
361
362 #ifdef CONFIG_USERSPACE
363 Z_EXC_DECLARE(z_x86_user_string_nlen);
364
365 static const struct z_exc_handle exceptions[] = {
366 Z_EXC_HANDLE(z_x86_user_string_nlen)
367 };
368 #endif
369
370 __pinned_func
z_x86_page_fault_handler(z_arch_esf_t * esf)371 void z_x86_page_fault_handler(z_arch_esf_t *esf)
372 {
373 #ifdef CONFIG_DEMAND_PAGING
374 if ((esf->errorCode & PF_P) == 0) {
375 /* Page was non-present at time exception happened.
376 * Get faulting virtual address from CR2 register
377 */
378 void *virt = z_x86_cr2_get();
379 bool was_valid_access;
380
381 #ifdef CONFIG_X86_KPTI
382 /* Protection ring is lowest 2 bits in interrupted CS */
383 bool was_user = ((esf->cs & 0x3) != 0U);
384
385 /* Need to check if the interrupted context was a user thread
386 * that hit a non-present page that was flipped due to KPTI in
387 * the thread's page tables, in which case this is an access
388 * violation and we should treat this as an error.
389 *
390 * We're probably not locked, but if there is a race, we will
391 * be fine, the kernel page fault code will later detect that
392 * the page is present in the kernel's page tables and the
393 * instruction will just be re-tried, producing another fault.
394 */
395 if (was_user &&
396 !z_x86_kpti_is_access_ok(virt, get_ptables(esf))) {
397 was_valid_access = false;
398 } else
399 #else
400 {
401 was_valid_access = z_page_fault(virt);
402 }
403 #endif /* CONFIG_X86_KPTI */
404 if (was_valid_access) {
405 /* Page fault handled, re-try */
406 return;
407 }
408 }
409 #endif /* CONFIG_DEMAND_PAGING */
410
411 #if !defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_COREDUMP)
412 z_x86_exception_vector = IV_PAGE_FAULT;
413 #endif
414
415 #ifdef CONFIG_USERSPACE
416 int i;
417
418 for (i = 0; i < ARRAY_SIZE(exceptions); i++) {
419 #ifdef CONFIG_X86_64
420 if ((void *)esf->rip >= exceptions[i].start &&
421 (void *)esf->rip < exceptions[i].end) {
422 esf->rip = (uint64_t)(exceptions[i].fixup);
423 return;
424 }
425 #else
426 if ((void *)esf->eip >= exceptions[i].start &&
427 (void *)esf->eip < exceptions[i].end) {
428 esf->eip = (unsigned int)(exceptions[i].fixup);
429 return;
430 }
431 #endif /* CONFIG_X86_64 */
432 }
433 #endif
434 #ifdef CONFIG_EXCEPTION_DEBUG
435 dump_page_fault(esf);
436 #endif
437 #ifdef CONFIG_THREAD_STACK_INFO
438 if (z_x86_check_stack_bounds(esf_get_sp(esf), 0, esf->cs)) {
439 z_x86_fatal_error(K_ERR_STACK_CHK_FAIL, esf);
440 }
441 #endif
442 z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
443 CODE_UNREACHABLE;
444 }
445
446 __pinned_func
z_x86_do_kernel_oops(const z_arch_esf_t * esf)447 void z_x86_do_kernel_oops(const z_arch_esf_t *esf)
448 {
449 uintptr_t reason;
450
451 #ifdef CONFIG_X86_64
452 reason = esf->rax;
453 #else
454 uintptr_t *stack_ptr = (uintptr_t *)esf->esp;
455
456 reason = *stack_ptr;
457 #endif
458
459 #ifdef CONFIG_USERSPACE
460 /* User mode is only allowed to induce oopses and stack check
461 * failures via this software interrupt
462 */
463 if ((esf->cs & 0x3) != 0 && !(reason == K_ERR_KERNEL_OOPS ||
464 reason == K_ERR_STACK_CHK_FAIL)) {
465 reason = K_ERR_KERNEL_OOPS;
466 }
467 #endif
468
469 z_x86_fatal_error(reason, esf);
470 }
471