1 /*
2 * Copyright (c) 2019 Intel Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 */
5
6 #include <zephyr/kernel.h>
7 #include <ksched.h>
8 #include <zephyr/kernel_structs.h>
9 #include <kernel_internal.h>
10 #include <zephyr/arch/common/exc_handle.h>
11 #include <zephyr/logging/log.h>
12 #include <x86_mmu.h>
13 #include <mmu.h>
14 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
15
16 #if defined(CONFIG_BOARD_QEMU_X86) || defined(CONFIG_BOARD_QEMU_X86_64)
arch_system_halt(unsigned int reason)17 FUNC_NORETURN void arch_system_halt(unsigned int reason)
18 {
19 ARG_UNUSED(reason);
20
21 /* Causes QEMU to exit. We passed the following on the command line:
22 * -device isa-debug-exit,iobase=0xf4,iosize=0x04
23 *
24 * For any value of the first argument X, the return value of the
25 * QEMU process is (X * 2) + 1.
26 *
27 * It has been observed that if the emulator exits for a triple-fault
28 * (often due to bad page tables or other CPU structures) it will
29 * terminate with 0 error code.
30 */
31 sys_out32(reason, 0xf4);
32 CODE_UNREACHABLE;
33 }
34 #endif
35
36 #ifdef CONFIG_THREAD_STACK_INFO
37
esf_get_sp(const struct arch_esf * esf)38 static inline uintptr_t esf_get_sp(const struct arch_esf *esf)
39 {
40 #ifdef CONFIG_X86_64
41 return esf->rsp;
42 #else
43 return esf->esp;
44 #endif
45 }
46
47 __pinned_func
z_x86_check_stack_bounds(uintptr_t addr,size_t size,uint16_t cs)48 bool z_x86_check_stack_bounds(uintptr_t addr, size_t size, uint16_t cs)
49 {
50 uintptr_t start, end;
51
52 if (arch_current_thread() == NULL || arch_is_in_isr()) {
53 /* We were servicing an interrupt or in early boot environment
54 * and are supposed to be on the interrupt stack */
55 int cpu_id;
56
57 #ifdef CONFIG_SMP
58 cpu_id = arch_curr_cpu()->id;
59 #else
60 cpu_id = 0;
61 #endif
62 start = (uintptr_t)K_KERNEL_STACK_BUFFER(
63 z_interrupt_stacks[cpu_id]);
64 end = start + CONFIG_ISR_STACK_SIZE;
65 #ifdef CONFIG_USERSPACE
66 } else if ((cs & 0x3U) == 0U &&
67 (arch_current_thread()->base.user_options & K_USER) != 0) {
68 /* The low two bits of the CS register is the privilege
69 * level. It will be 0 in supervisor mode and 3 in user mode
70 * corresponding to ring 0 / ring 3.
71 *
72 * If we get here, we must have been doing a syscall, check
73 * privilege elevation stack bounds
74 */
75 start = arch_current_thread()->stack_info.start - CONFIG_PRIVILEGED_STACK_SIZE;
76 end = arch_current_thread()->stack_info.start;
77 #endif /* CONFIG_USERSPACE */
78 } else {
79 /* Normal thread operation, check its stack buffer */
80 start = arch_current_thread()->stack_info.start;
81 end = Z_STACK_PTR_ALIGN(arch_current_thread()->stack_info.start +
82 arch_current_thread()->stack_info.size);
83 }
84
85 return (addr <= start) || (addr + size > end);
86 }
87 #endif /* CONFIG_THREAD_STACK_INFO */
88
89 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
90 /**
91 * Check if the fault is in the guard pages.
92 *
93 * @param addr Address to be tested.
94 *
95 * @return True Address is in guard pages, false otherwise.
96 */
97 __pinned_func
z_x86_check_guard_page(uintptr_t addr)98 bool z_x86_check_guard_page(uintptr_t addr)
99 {
100 struct k_thread *thread = arch_current_thread();
101 uintptr_t start, end;
102
103 /* Front guard size - before thread stack area */
104 start = (uintptr_t)thread->stack_info.mapped.addr - CONFIG_MMU_PAGE_SIZE;
105 end = (uintptr_t)thread->stack_info.mapped.addr;
106
107 if ((addr >= start) && (addr < end)) {
108 return true;
109 }
110
111 /* Rear guard size - after thread stack area */
112 start = (uintptr_t)thread->stack_info.mapped.addr + thread->stack_info.mapped.sz;
113 end = start + CONFIG_MMU_PAGE_SIZE;
114
115 if ((addr >= start) && (addr < end)) {
116 return true;
117 }
118
119 return false;
120 }
121 #endif /* CONFIG_THREAD_STACK_MEM_MAPPED */
122
123 #if defined(CONFIG_ARCH_STACKWALK)
124 struct stack_frame {
125 uintptr_t next;
126 uintptr_t ret_addr;
127 };
128
walk_stackframe(stack_trace_callback_fn cb,void * cookie,const struct arch_esf * esf,int max_frames)129 __pinned_func static void walk_stackframe(stack_trace_callback_fn cb, void *cookie,
130 const struct arch_esf *esf, int max_frames)
131 {
132 uintptr_t base_ptr;
133 uint16_t cs;
134 struct stack_frame *frame;
135 int i;
136
137 if (esf != NULL) {
138 #ifdef CONFIG_X86_64
139 base_ptr = esf->rbp;
140 #else /* x86 32-bit */
141 base_ptr = esf->ebp;
142 #endif /* CONFIG_X86_64 */
143 cs = esf->cs;
144 } else {
145 return;
146 }
147
148 if (base_ptr == 0U) {
149 LOG_ERR("NULL base ptr");
150 return;
151 }
152
153 for (i = 0; i < max_frames; i++) {
154 if (base_ptr % sizeof(base_ptr) != 0U) {
155 LOG_ERR("unaligned frame ptr");
156 return;
157 }
158
159 frame = (struct stack_frame *)base_ptr;
160 if (frame == NULL) {
161 break;
162 }
163
164 #ifdef CONFIG_THREAD_STACK_INFO
165 /* Ensure the stack frame is within the faulting context's
166 * stack buffer
167 */
168 if (z_x86_check_stack_bounds((uintptr_t)frame,
169 sizeof(*frame), cs)) {
170 LOG_ERR(" corrupted? (bp=%p)", frame);
171 break;
172 }
173 #endif
174
175 if (frame->ret_addr == 0U) {
176 break;
177 }
178
179 if (!cb(cookie, frame->ret_addr)) {
180 break;
181 }
182
183 base_ptr = frame->next;
184 }
185 }
186
arch_stack_walk(stack_trace_callback_fn callback_fn,void * cookie,const struct k_thread * thread,const struct arch_esf * esf)187 void arch_stack_walk(stack_trace_callback_fn callback_fn, void *cookie,
188 const struct k_thread *thread, const struct arch_esf *esf)
189 {
190 ARG_UNUSED(thread);
191
192 walk_stackframe(callback_fn, cookie, esf,
193 CONFIG_ARCH_STACKWALK_MAX_FRAMES);
194 }
195 #endif /* CONFIG_ARCH_STACKWALK */
196
197 #if defined(CONFIG_EXCEPTION_STACK_TRACE)
print_trace_address(void * arg,unsigned long addr)198 static bool print_trace_address(void *arg, unsigned long addr)
199 {
200 int *i = arg;
201
202 #ifdef CONFIG_X86_64
203 LOG_ERR(" %d: 0x%016lx", (*i)++, addr);
204 #else
205 LOG_ERR(" %d: 0x%08lx", (*i)++, addr);
206 #endif
207
208 return true;
209 }
210
unwind_stack(const struct arch_esf * esf)211 static ALWAYS_INLINE void unwind_stack(const struct arch_esf *esf)
212 {
213 int i = 0;
214
215 walk_stackframe(print_trace_address, &i, esf, CONFIG_ARCH_STACKWALK_MAX_FRAMES);
216 }
217 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
218
219 #ifdef CONFIG_EXCEPTION_DEBUG
esf_get_code(const struct arch_esf * esf)220 static inline uintptr_t esf_get_code(const struct arch_esf *esf)
221 {
222 #ifdef CONFIG_X86_64
223 return esf->code;
224 #else
225 return esf->errorCode;
226 #endif
227 }
228
get_cr3(const struct arch_esf * esf)229 static inline uintptr_t get_cr3(const struct arch_esf *esf)
230 {
231 #if defined(CONFIG_USERSPACE) && defined(CONFIG_X86_KPTI)
232 /* If the interrupted thread was in user mode, we did a page table
233 * switch when we took the exception via z_x86_trampoline_to_kernel
234 */
235 if ((esf->cs & 0x3) != 0) {
236 return arch_current_thread()->arch.ptables;
237 }
238 #else
239 ARG_UNUSED(esf);
240 #endif
241 /* Return the current CR3 value, it didn't change when we took
242 * the exception
243 */
244 return z_x86_cr3_get();
245 }
246
get_ptables(const struct arch_esf * esf)247 static inline pentry_t *get_ptables(const struct arch_esf *esf)
248 {
249 return k_mem_virt_addr(get_cr3(esf));
250 }
251
252 #ifdef CONFIG_X86_64
253 __pinned_func
dump_regs(const struct arch_esf * esf)254 static void dump_regs(const struct arch_esf *esf)
255 {
256 LOG_ERR("RAX: 0x%016lx RBX: 0x%016lx RCX: 0x%016lx RDX: 0x%016lx",
257 esf->rax, esf->rbx, esf->rcx, esf->rdx);
258 LOG_ERR("RSI: 0x%016lx RDI: 0x%016lx RBP: 0x%016lx RSP: 0x%016lx",
259 esf->rsi, esf->rdi, esf->rbp, esf->rsp);
260 LOG_ERR(" R8: 0x%016lx R9: 0x%016lx R10: 0x%016lx R11: 0x%016lx",
261 esf->r8, esf->r9, esf->r10, esf->r11);
262 LOG_ERR("R12: 0x%016lx R13: 0x%016lx R14: 0x%016lx R15: 0x%016lx",
263 esf->r12, esf->r13, esf->r14, esf->r15);
264 LOG_ERR("RSP: 0x%016lx RFLAGS: 0x%016lx CS: 0x%04lx CR3: 0x%016lx",
265 esf->rsp, esf->rflags, esf->cs & 0xFFFFU, get_cr3(esf));
266
267 LOG_ERR("RIP: 0x%016lx", esf->rip);
268 }
269 #else /* 32-bit */
270 __pinned_func
dump_regs(const struct arch_esf * esf)271 static void dump_regs(const struct arch_esf *esf)
272 {
273 LOG_ERR("EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x",
274 esf->eax, esf->ebx, esf->ecx, esf->edx);
275 LOG_ERR("ESI: 0x%08x, EDI: 0x%08x, EBP: 0x%08x, ESP: 0x%08x",
276 esf->esi, esf->edi, esf->ebp, esf->esp);
277 LOG_ERR("EFLAGS: 0x%08x CS: 0x%04x CR3: 0x%08lx", esf->eflags,
278 esf->cs & 0xFFFFU, get_cr3(esf));
279
280 LOG_ERR("EIP: 0x%08x", esf->eip);
281 }
282 #endif /* CONFIG_X86_64 */
283
284 __pinned_func
log_exception(uintptr_t vector,uintptr_t code)285 static void log_exception(uintptr_t vector, uintptr_t code)
286 {
287 switch (vector) {
288 case IV_DIVIDE_ERROR:
289 LOG_ERR("Divide by zero");
290 break;
291 case IV_DEBUG:
292 LOG_ERR("Debug");
293 break;
294 case IV_NON_MASKABLE_INTERRUPT:
295 LOG_ERR("Non-maskable interrupt");
296 break;
297 case IV_BREAKPOINT:
298 LOG_ERR("Breakpoint");
299 break;
300 case IV_OVERFLOW:
301 LOG_ERR("Overflow");
302 break;
303 case IV_BOUND_RANGE:
304 LOG_ERR("Bound range exceeded");
305 break;
306 case IV_INVALID_OPCODE:
307 LOG_ERR("Invalid opcode");
308 break;
309 case IV_DEVICE_NOT_AVAILABLE:
310 LOG_ERR("Floating point unit device not available");
311 break;
312 case IV_DOUBLE_FAULT:
313 LOG_ERR("Double fault (code 0x%lx)", code);
314 break;
315 case IV_COPROC_SEGMENT_OVERRUN:
316 LOG_ERR("Co-processor segment overrun");
317 break;
318 case IV_INVALID_TSS:
319 LOG_ERR("Invalid TSS (code 0x%lx)", code);
320 break;
321 case IV_SEGMENT_NOT_PRESENT:
322 LOG_ERR("Segment not present (code 0x%lx)", code);
323 break;
324 case IV_STACK_FAULT:
325 LOG_ERR("Stack segment fault");
326 break;
327 case IV_GENERAL_PROTECTION:
328 LOG_ERR("General protection fault (code 0x%lx)", code);
329 break;
330 /* IV_PAGE_FAULT skipped, we have a dedicated handler */
331 case IV_X87_FPU_FP_ERROR:
332 LOG_ERR("x87 floating point exception");
333 break;
334 case IV_ALIGNMENT_CHECK:
335 LOG_ERR("Alignment check (code 0x%lx)", code);
336 break;
337 case IV_MACHINE_CHECK:
338 LOG_ERR("Machine check");
339 break;
340 case IV_SIMD_FP:
341 LOG_ERR("SIMD floating point exception");
342 break;
343 case IV_VIRT_EXCEPTION:
344 LOG_ERR("Virtualization exception");
345 break;
346 case IV_SECURITY_EXCEPTION:
347 LOG_ERR("Security exception");
348 break;
349 default:
350 LOG_ERR("Exception not handled (code 0x%lx)", code);
351 break;
352 }
353 }
354
355 __pinned_func
dump_page_fault(struct arch_esf * esf)356 static void dump_page_fault(struct arch_esf *esf)
357 {
358 uintptr_t err;
359 void *cr2;
360
361 cr2 = z_x86_cr2_get();
362 err = esf_get_code(esf);
363 LOG_ERR("Page fault at address %p (error code 0x%lx)", cr2, err);
364
365 if ((err & PF_RSVD) != 0) {
366 LOG_ERR("Reserved bits set in page tables");
367 } else {
368 if ((err & PF_P) == 0) {
369 LOG_ERR("Linear address not present in page tables");
370 }
371 LOG_ERR("Access violation: %s thread not allowed to %s",
372 (err & PF_US) != 0U ? "user" : "supervisor",
373 (err & PF_ID) != 0U ? "execute" : ((err & PF_WR) != 0U ?
374 "write" :
375 "read"));
376 if ((err & PF_PK) != 0) {
377 LOG_ERR("Protection key disallowed");
378 } else if ((err & PF_SGX) != 0) {
379 LOG_ERR("SGX access control violation");
380 }
381 }
382
383 #ifdef CONFIG_X86_MMU
384 z_x86_dump_mmu_flags(get_ptables(esf), cr2);
385 #endif /* CONFIG_X86_MMU */
386 }
387 #endif /* CONFIG_EXCEPTION_DEBUG */
388
389 __pinned_func
z_x86_fatal_error(unsigned int reason,const struct arch_esf * esf)390 FUNC_NORETURN void z_x86_fatal_error(unsigned int reason,
391 const struct arch_esf *esf)
392 {
393 if (esf != NULL) {
394 #ifdef CONFIG_EXCEPTION_DEBUG
395 dump_regs(esf);
396 #endif
397 #ifdef CONFIG_EXCEPTION_STACK_TRACE
398 LOG_ERR("call trace:");
399 unwind_stack(esf);
400 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
401 #if defined(CONFIG_ASSERT) && defined(CONFIG_X86_64)
402 if (esf->rip == 0xb9) {
403 /* See implementation of __resume in locore.S. This is
404 * never a valid RIP value. Treat this as a kernel
405 * panic.
406 */
407 LOG_ERR("Attempt to resume un-suspended thread object");
408 reason = K_ERR_KERNEL_PANIC;
409 }
410 #endif
411 }
412 z_fatal_error(reason, esf);
413 CODE_UNREACHABLE;
414 }
415
416 __pinned_func
z_x86_unhandled_cpu_exception(uintptr_t vector,const struct arch_esf * esf)417 FUNC_NORETURN void z_x86_unhandled_cpu_exception(uintptr_t vector,
418 const struct arch_esf *esf)
419 {
420 #ifdef CONFIG_EXCEPTION_DEBUG
421 log_exception(vector, esf_get_code(esf));
422 #else
423 ARG_UNUSED(vector);
424 #endif
425 z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
426 }
427
428 #ifdef CONFIG_USERSPACE
429 Z_EXC_DECLARE(z_x86_user_string_nlen);
430
431 static const struct z_exc_handle exceptions[] = {
432 Z_EXC_HANDLE(z_x86_user_string_nlen)
433 };
434 #endif
435
436 __pinned_func
z_x86_page_fault_handler(struct arch_esf * esf)437 void z_x86_page_fault_handler(struct arch_esf *esf)
438 {
439 #ifdef CONFIG_DEMAND_PAGING
440 if ((esf->errorCode & PF_P) == 0) {
441 /* Page was non-present at time exception happened.
442 * Get faulting virtual address from CR2 register
443 */
444 void *virt = z_x86_cr2_get();
445 bool was_valid_access;
446
447 #ifdef CONFIG_X86_KPTI
448 /* Protection ring is lowest 2 bits in interrupted CS */
449 bool was_user = ((esf->cs & 0x3) != 0U);
450
451 /* Need to check if the interrupted context was a user thread
452 * that hit a non-present page that was flipped due to KPTI in
453 * the thread's page tables, in which case this is an access
454 * violation and we should treat this as an error.
455 *
456 * We're probably not locked, but if there is a race, we will
457 * be fine, the kernel page fault code will later detect that
458 * the page is present in the kernel's page tables and the
459 * instruction will just be re-tried, producing another fault.
460 */
461 if (was_user &&
462 !z_x86_kpti_is_access_ok(virt, get_ptables(esf))) {
463 was_valid_access = false;
464 } else
465 #else
466 {
467 was_valid_access = k_mem_page_fault(virt);
468 }
469 #endif /* CONFIG_X86_KPTI */
470 if (was_valid_access) {
471 /* Page fault handled, re-try */
472 return;
473 }
474 }
475 #endif /* CONFIG_DEMAND_PAGING */
476
477 #if !defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_COREDUMP)
478 z_x86_exception_vector = IV_PAGE_FAULT;
479 #endif
480
481 #ifdef CONFIG_USERSPACE
482 int i;
483
484 for (i = 0; i < ARRAY_SIZE(exceptions); i++) {
485 #ifdef CONFIG_X86_64
486 if ((void *)esf->rip >= exceptions[i].start &&
487 (void *)esf->rip < exceptions[i].end) {
488 esf->rip = (uint64_t)(exceptions[i].fixup);
489 return;
490 }
491 #else
492 if ((void *)esf->eip >= exceptions[i].start &&
493 (void *)esf->eip < exceptions[i].end) {
494 esf->eip = (unsigned int)(exceptions[i].fixup);
495 return;
496 }
497 #endif /* CONFIG_X86_64 */
498 }
499 #endif
500 #ifdef CONFIG_EXCEPTION_DEBUG
501 dump_page_fault(esf);
502 #endif
503 #ifdef CONFIG_THREAD_STACK_INFO
504 if (z_x86_check_stack_bounds(esf_get_sp(esf), 0, esf->cs)) {
505 z_x86_fatal_error(K_ERR_STACK_CHK_FAIL, esf);
506 }
507 #endif
508 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
509 void *fault_addr = z_x86_cr2_get();
510
511 if (z_x86_check_guard_page((uintptr_t)fault_addr)) {
512 z_x86_fatal_error(K_ERR_STACK_CHK_FAIL, esf);
513 }
514 #endif
515
516 z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
517 CODE_UNREACHABLE;
518 }
519
520 __pinned_func
z_x86_do_kernel_oops(const struct arch_esf * esf)521 void z_x86_do_kernel_oops(const struct arch_esf *esf)
522 {
523 uintptr_t reason;
524
525 #ifdef CONFIG_X86_64
526 reason = esf->rax;
527 #else
528 uintptr_t *stack_ptr = (uintptr_t *)esf->esp;
529
530 reason = *stack_ptr;
531 #endif
532
533 #ifdef CONFIG_USERSPACE
534 /* User mode is only allowed to induce oopses and stack check
535 * failures via this software interrupt
536 */
537 if ((esf->cs & 0x3) != 0 && !(reason == K_ERR_KERNEL_OOPS ||
538 reason == K_ERR_STACK_CHK_FAIL)) {
539 reason = K_ERR_KERNEL_OOPS;
540 }
541 #endif
542
543 z_x86_fatal_error(reason, esf);
544 }
545