1 /*
2  * Copyright (c) 2019 Intel Corporation
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #include <zephyr/kernel.h>
7 #include <ksched.h>
8 #include <zephyr/kernel_structs.h>
9 #include <kernel_internal.h>
10 #include <zephyr/arch/common/exc_handle.h>
11 #include <zephyr/logging/log.h>
12 #include <x86_mmu.h>
13 #include <mmu.h>
14 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
15 
16 #if defined(CONFIG_BOARD_QEMU_X86) || defined(CONFIG_BOARD_QEMU_X86_64)
arch_system_halt(unsigned int reason)17 FUNC_NORETURN void arch_system_halt(unsigned int reason)
18 {
19 	ARG_UNUSED(reason);
20 
21 	/* Causes QEMU to exit. We passed the following on the command line:
22 	 * -device isa-debug-exit,iobase=0xf4,iosize=0x04
23 	 *
24 	 * For any value of the first argument X, the return value of the
25 	 * QEMU process is (X * 2) + 1.
26 	 *
27 	 * It has been observed that if the emulator exits for a triple-fault
28 	 * (often due to bad page tables or other CPU structures) it will
29 	 * terminate with 0 error code.
30 	 */
31 	sys_out32(reason, 0xf4);
32 	CODE_UNREACHABLE;
33 }
34 #endif
35 
36 #ifdef CONFIG_THREAD_STACK_INFO
37 
esf_get_sp(const struct arch_esf * esf)38 static inline uintptr_t esf_get_sp(const struct arch_esf *esf)
39 {
40 #ifdef CONFIG_X86_64
41 	return esf->rsp;
42 #else
43 	return esf->esp;
44 #endif
45 }
46 
47 __pinned_func
z_x86_check_stack_bounds(uintptr_t addr,size_t size,uint16_t cs)48 bool z_x86_check_stack_bounds(uintptr_t addr, size_t size, uint16_t cs)
49 {
50 	uintptr_t start, end;
51 
52 	if (arch_current_thread() == NULL || arch_is_in_isr()) {
53 		/* We were servicing an interrupt or in early boot environment
54 		 * and are supposed to be on the interrupt stack */
55 		int cpu_id;
56 
57 #ifdef CONFIG_SMP
58 		cpu_id = arch_curr_cpu()->id;
59 #else
60 		cpu_id = 0;
61 #endif
62 		start = (uintptr_t)K_KERNEL_STACK_BUFFER(
63 		    z_interrupt_stacks[cpu_id]);
64 		end = start + CONFIG_ISR_STACK_SIZE;
65 #ifdef CONFIG_USERSPACE
66 	} else if ((cs & 0x3U) == 0U &&
67 		   (arch_current_thread()->base.user_options & K_USER) != 0) {
68 		/* The low two bits of the CS register is the privilege
69 		 * level. It will be 0 in supervisor mode and 3 in user mode
70 		 * corresponding to ring 0 / ring 3.
71 		 *
72 		 * If we get here, we must have been doing a syscall, check
73 		 * privilege elevation stack bounds
74 		 */
75 		start = arch_current_thread()->stack_info.start - CONFIG_PRIVILEGED_STACK_SIZE;
76 		end = arch_current_thread()->stack_info.start;
77 #endif /* CONFIG_USERSPACE */
78 	} else {
79 		/* Normal thread operation, check its stack buffer */
80 		start = arch_current_thread()->stack_info.start;
81 		end = Z_STACK_PTR_ALIGN(arch_current_thread()->stack_info.start +
82 					arch_current_thread()->stack_info.size);
83 	}
84 
85 	return (addr <= start) || (addr + size > end);
86 }
87 #endif /* CONFIG_THREAD_STACK_INFO */
88 
89 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
90 /**
91  * Check if the fault is in the guard pages.
92  *
93  * @param addr Address to be tested.
94  *
95  * @return True Address is in guard pages, false otherwise.
96  */
97 __pinned_func
z_x86_check_guard_page(uintptr_t addr)98 bool z_x86_check_guard_page(uintptr_t addr)
99 {
100 	struct k_thread *thread = arch_current_thread();
101 	uintptr_t start, end;
102 
103 	/* Front guard size - before thread stack area */
104 	start = (uintptr_t)thread->stack_info.mapped.addr - CONFIG_MMU_PAGE_SIZE;
105 	end = (uintptr_t)thread->stack_info.mapped.addr;
106 
107 	if ((addr >= start) && (addr < end)) {
108 		return true;
109 	}
110 
111 	/* Rear guard size - after thread stack area */
112 	start = (uintptr_t)thread->stack_info.mapped.addr + thread->stack_info.mapped.sz;
113 	end = start + CONFIG_MMU_PAGE_SIZE;
114 
115 	if ((addr >= start) && (addr < end)) {
116 		return true;
117 	}
118 
119 	return false;
120 }
121 #endif /* CONFIG_THREAD_STACK_MEM_MAPPED */
122 
123 #if defined(CONFIG_ARCH_STACKWALK)
124 struct stack_frame {
125 	uintptr_t next;
126 	uintptr_t ret_addr;
127 };
128 
walk_stackframe(stack_trace_callback_fn cb,void * cookie,const struct arch_esf * esf,int max_frames)129 __pinned_func static void walk_stackframe(stack_trace_callback_fn cb, void *cookie,
130 					  const struct arch_esf *esf, int max_frames)
131 {
132 	uintptr_t base_ptr;
133 	uint16_t cs;
134 	struct stack_frame *frame;
135 	int i;
136 
137 	if (esf != NULL) {
138 #ifdef CONFIG_X86_64
139 		base_ptr = esf->rbp;
140 #else /* x86 32-bit */
141 		base_ptr = esf->ebp;
142 #endif /* CONFIG_X86_64 */
143 		cs = esf->cs;
144 	} else {
145 		return;
146 	}
147 
148 	if (base_ptr == 0U) {
149 		LOG_ERR("NULL base ptr");
150 		return;
151 	}
152 
153 	for (i = 0; i < max_frames; i++) {
154 		if (base_ptr % sizeof(base_ptr) != 0U) {
155 			LOG_ERR("unaligned frame ptr");
156 			return;
157 		}
158 
159 		frame = (struct stack_frame *)base_ptr;
160 		if (frame == NULL) {
161 			break;
162 		}
163 
164 #ifdef CONFIG_THREAD_STACK_INFO
165 		/* Ensure the stack frame is within the faulting context's
166 		 * stack buffer
167 		 */
168 		if (z_x86_check_stack_bounds((uintptr_t)frame,
169 					     sizeof(*frame), cs)) {
170 			LOG_ERR("     corrupted? (bp=%p)", frame);
171 			break;
172 		}
173 #endif
174 
175 		if (frame->ret_addr == 0U) {
176 			break;
177 		}
178 
179 		if (!cb(cookie, frame->ret_addr)) {
180 			break;
181 		}
182 
183 		base_ptr = frame->next;
184 	}
185 }
186 
arch_stack_walk(stack_trace_callback_fn callback_fn,void * cookie,const struct k_thread * thread,const struct arch_esf * esf)187 void arch_stack_walk(stack_trace_callback_fn callback_fn, void *cookie,
188 		     const struct k_thread *thread, const struct arch_esf *esf)
189 {
190 	ARG_UNUSED(thread);
191 
192 	walk_stackframe(callback_fn, cookie, esf,
193 			CONFIG_ARCH_STACKWALK_MAX_FRAMES);
194 }
195 #endif /* CONFIG_ARCH_STACKWALK */
196 
197 #if defined(CONFIG_EXCEPTION_STACK_TRACE)
print_trace_address(void * arg,unsigned long addr)198 static bool print_trace_address(void *arg, unsigned long addr)
199 {
200 	int *i = arg;
201 
202 #ifdef CONFIG_X86_64
203 	LOG_ERR("     %d: 0x%016lx", (*i)++, addr);
204 #else
205 	LOG_ERR("     %d: 0x%08lx", (*i)++, addr);
206 #endif
207 
208 	return true;
209 }
210 
unwind_stack(const struct arch_esf * esf)211 static ALWAYS_INLINE void unwind_stack(const struct arch_esf *esf)
212 {
213 	int i = 0;
214 
215 	walk_stackframe(print_trace_address, &i, esf, CONFIG_ARCH_STACKWALK_MAX_FRAMES);
216 }
217 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
218 
219 #ifdef CONFIG_EXCEPTION_DEBUG
esf_get_code(const struct arch_esf * esf)220 static inline uintptr_t esf_get_code(const struct arch_esf *esf)
221 {
222 #ifdef CONFIG_X86_64
223 	return esf->code;
224 #else
225 	return esf->errorCode;
226 #endif
227 }
228 
get_cr3(const struct arch_esf * esf)229 static inline uintptr_t get_cr3(const struct arch_esf *esf)
230 {
231 #if defined(CONFIG_USERSPACE) && defined(CONFIG_X86_KPTI)
232 	/* If the interrupted thread was in user mode, we did a page table
233 	 * switch when we took the exception via z_x86_trampoline_to_kernel
234 	 */
235 	if ((esf->cs & 0x3) != 0) {
236 		return arch_current_thread()->arch.ptables;
237 	}
238 #else
239 	ARG_UNUSED(esf);
240 #endif
241 	/* Return the current CR3 value, it didn't change when we took
242 	 * the exception
243 	 */
244 	return z_x86_cr3_get();
245 }
246 
get_ptables(const struct arch_esf * esf)247 static inline pentry_t *get_ptables(const struct arch_esf *esf)
248 {
249 	return k_mem_virt_addr(get_cr3(esf));
250 }
251 
252 #ifdef CONFIG_X86_64
253 __pinned_func
dump_regs(const struct arch_esf * esf)254 static void dump_regs(const struct arch_esf *esf)
255 {
256 	LOG_ERR("RAX: 0x%016lx RBX: 0x%016lx RCX: 0x%016lx RDX: 0x%016lx",
257 		esf->rax, esf->rbx, esf->rcx, esf->rdx);
258 	LOG_ERR("RSI: 0x%016lx RDI: 0x%016lx RBP: 0x%016lx RSP: 0x%016lx",
259 		esf->rsi, esf->rdi, esf->rbp, esf->rsp);
260 	LOG_ERR(" R8: 0x%016lx  R9: 0x%016lx R10: 0x%016lx R11: 0x%016lx",
261 		esf->r8, esf->r9, esf->r10, esf->r11);
262 	LOG_ERR("R12: 0x%016lx R13: 0x%016lx R14: 0x%016lx R15: 0x%016lx",
263 		esf->r12, esf->r13, esf->r14, esf->r15);
264 	LOG_ERR("RSP: 0x%016lx RFLAGS: 0x%016lx CS: 0x%04lx CR3: 0x%016lx",
265 		esf->rsp, esf->rflags, esf->cs & 0xFFFFU, get_cr3(esf));
266 
267 	LOG_ERR("RIP: 0x%016lx", esf->rip);
268 }
269 #else /* 32-bit */
270 __pinned_func
dump_regs(const struct arch_esf * esf)271 static void dump_regs(const struct arch_esf *esf)
272 {
273 	LOG_ERR("EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x",
274 		esf->eax, esf->ebx, esf->ecx, esf->edx);
275 	LOG_ERR("ESI: 0x%08x, EDI: 0x%08x, EBP: 0x%08x, ESP: 0x%08x",
276 		esf->esi, esf->edi, esf->ebp, esf->esp);
277 	LOG_ERR("EFLAGS: 0x%08x CS: 0x%04x CR3: 0x%08lx", esf->eflags,
278 		esf->cs & 0xFFFFU, get_cr3(esf));
279 
280 	LOG_ERR("EIP: 0x%08x", esf->eip);
281 }
282 #endif /* CONFIG_X86_64 */
283 
284 __pinned_func
log_exception(uintptr_t vector,uintptr_t code)285 static void log_exception(uintptr_t vector, uintptr_t code)
286 {
287 	switch (vector) {
288 	case IV_DIVIDE_ERROR:
289 		LOG_ERR("Divide by zero");
290 		break;
291 	case IV_DEBUG:
292 		LOG_ERR("Debug");
293 		break;
294 	case IV_NON_MASKABLE_INTERRUPT:
295 		LOG_ERR("Non-maskable interrupt");
296 		break;
297 	case IV_BREAKPOINT:
298 		LOG_ERR("Breakpoint");
299 		break;
300 	case IV_OVERFLOW:
301 		LOG_ERR("Overflow");
302 		break;
303 	case IV_BOUND_RANGE:
304 		LOG_ERR("Bound range exceeded");
305 		break;
306 	case IV_INVALID_OPCODE:
307 		LOG_ERR("Invalid opcode");
308 		break;
309 	case IV_DEVICE_NOT_AVAILABLE:
310 		LOG_ERR("Floating point unit device not available");
311 		break;
312 	case IV_DOUBLE_FAULT:
313 		LOG_ERR("Double fault (code 0x%lx)", code);
314 		break;
315 	case IV_COPROC_SEGMENT_OVERRUN:
316 		LOG_ERR("Co-processor segment overrun");
317 		break;
318 	case IV_INVALID_TSS:
319 		LOG_ERR("Invalid TSS (code 0x%lx)", code);
320 		break;
321 	case IV_SEGMENT_NOT_PRESENT:
322 		LOG_ERR("Segment not present (code 0x%lx)", code);
323 		break;
324 	case IV_STACK_FAULT:
325 		LOG_ERR("Stack segment fault");
326 		break;
327 	case IV_GENERAL_PROTECTION:
328 		LOG_ERR("General protection fault (code 0x%lx)", code);
329 		break;
330 	/* IV_PAGE_FAULT skipped, we have a dedicated handler */
331 	case IV_X87_FPU_FP_ERROR:
332 		LOG_ERR("x87 floating point exception");
333 		break;
334 	case IV_ALIGNMENT_CHECK:
335 		LOG_ERR("Alignment check (code 0x%lx)", code);
336 		break;
337 	case IV_MACHINE_CHECK:
338 		LOG_ERR("Machine check");
339 		break;
340 	case IV_SIMD_FP:
341 		LOG_ERR("SIMD floating point exception");
342 		break;
343 	case IV_VIRT_EXCEPTION:
344 		LOG_ERR("Virtualization exception");
345 		break;
346 	case IV_SECURITY_EXCEPTION:
347 		LOG_ERR("Security exception");
348 		break;
349 	default:
350 		LOG_ERR("Exception not handled (code 0x%lx)", code);
351 		break;
352 	}
353 }
354 
355 __pinned_func
dump_page_fault(struct arch_esf * esf)356 static void dump_page_fault(struct arch_esf *esf)
357 {
358 	uintptr_t err;
359 	void *cr2;
360 
361 	cr2 = z_x86_cr2_get();
362 	err = esf_get_code(esf);
363 	LOG_ERR("Page fault at address %p (error code 0x%lx)", cr2, err);
364 
365 	if ((err & PF_RSVD) != 0) {
366 		LOG_ERR("Reserved bits set in page tables");
367 	} else {
368 		if ((err & PF_P) == 0) {
369 			LOG_ERR("Linear address not present in page tables");
370 		}
371 		LOG_ERR("Access violation: %s thread not allowed to %s",
372 			(err & PF_US) != 0U ? "user" : "supervisor",
373 			(err & PF_ID) != 0U ? "execute" : ((err & PF_WR) != 0U ?
374 							   "write" :
375 							   "read"));
376 		if ((err & PF_PK) != 0) {
377 			LOG_ERR("Protection key disallowed");
378 		} else if ((err & PF_SGX) != 0) {
379 			LOG_ERR("SGX access control violation");
380 		}
381 	}
382 
383 #ifdef CONFIG_X86_MMU
384 	z_x86_dump_mmu_flags(get_ptables(esf), cr2);
385 #endif /* CONFIG_X86_MMU */
386 }
387 #endif /* CONFIG_EXCEPTION_DEBUG */
388 
389 __pinned_func
z_x86_fatal_error(unsigned int reason,const struct arch_esf * esf)390 FUNC_NORETURN void z_x86_fatal_error(unsigned int reason,
391 				     const struct arch_esf *esf)
392 {
393 	if (esf != NULL) {
394 #ifdef CONFIG_EXCEPTION_DEBUG
395 		dump_regs(esf);
396 #endif
397 #ifdef CONFIG_EXCEPTION_STACK_TRACE
398 		LOG_ERR("call trace:");
399 		unwind_stack(esf);
400 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
401 #if defined(CONFIG_ASSERT) && defined(CONFIG_X86_64)
402 		if (esf->rip == 0xb9) {
403 			/* See implementation of __resume in locore.S. This is
404 			 * never a valid RIP value. Treat this as a kernel
405 			 * panic.
406 			 */
407 			LOG_ERR("Attempt to resume un-suspended thread object");
408 			reason = K_ERR_KERNEL_PANIC;
409 		}
410 #endif
411 	}
412 	z_fatal_error(reason, esf);
413 	CODE_UNREACHABLE;
414 }
415 
416 __pinned_func
z_x86_unhandled_cpu_exception(uintptr_t vector,const struct arch_esf * esf)417 FUNC_NORETURN void z_x86_unhandled_cpu_exception(uintptr_t vector,
418 						 const struct arch_esf *esf)
419 {
420 #ifdef CONFIG_EXCEPTION_DEBUG
421 	log_exception(vector, esf_get_code(esf));
422 #else
423 	ARG_UNUSED(vector);
424 #endif
425 	z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
426 }
427 
428 #ifdef CONFIG_USERSPACE
429 Z_EXC_DECLARE(z_x86_user_string_nlen);
430 
431 static const struct z_exc_handle exceptions[] = {
432 	Z_EXC_HANDLE(z_x86_user_string_nlen)
433 };
434 #endif
435 
436 __pinned_func
z_x86_page_fault_handler(struct arch_esf * esf)437 void z_x86_page_fault_handler(struct arch_esf *esf)
438 {
439 #ifdef CONFIG_DEMAND_PAGING
440 	if ((esf->errorCode & PF_P) == 0) {
441 		/* Page was non-present at time exception happened.
442 		 * Get faulting virtual address from CR2 register
443 		 */
444 		void *virt = z_x86_cr2_get();
445 		bool was_valid_access;
446 
447 #ifdef CONFIG_X86_KPTI
448 		/* Protection ring is lowest 2 bits in interrupted CS */
449 		bool was_user = ((esf->cs & 0x3) != 0U);
450 
451 		/* Need to check if the interrupted context was a user thread
452 		 * that hit a non-present page that was flipped due to KPTI in
453 		 * the thread's page tables, in which case this is an access
454 		 * violation and we should treat this as an error.
455 		 *
456 		 * We're probably not locked, but if there is a race, we will
457 		 * be fine, the kernel page fault code will later detect that
458 		 * the page is present in the kernel's page tables and the
459 		 * instruction will just be re-tried, producing another fault.
460 		 */
461 		if (was_user &&
462 		    !z_x86_kpti_is_access_ok(virt, get_ptables(esf))) {
463 			was_valid_access = false;
464 		} else
465 #else
466 		{
467 			was_valid_access = k_mem_page_fault(virt);
468 		}
469 #endif /* CONFIG_X86_KPTI */
470 		if (was_valid_access) {
471 			/* Page fault handled, re-try */
472 			return;
473 		}
474 	}
475 #endif /* CONFIG_DEMAND_PAGING */
476 
477 #if !defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_COREDUMP)
478 	z_x86_exception_vector = IV_PAGE_FAULT;
479 #endif
480 
481 #ifdef CONFIG_USERSPACE
482 	int i;
483 
484 	for (i = 0; i < ARRAY_SIZE(exceptions); i++) {
485 #ifdef CONFIG_X86_64
486 		if ((void *)esf->rip >= exceptions[i].start &&
487 		    (void *)esf->rip < exceptions[i].end) {
488 			esf->rip = (uint64_t)(exceptions[i].fixup);
489 			return;
490 		}
491 #else
492 		if ((void *)esf->eip >= exceptions[i].start &&
493 		    (void *)esf->eip < exceptions[i].end) {
494 			esf->eip = (unsigned int)(exceptions[i].fixup);
495 			return;
496 		}
497 #endif /* CONFIG_X86_64 */
498 	}
499 #endif
500 #ifdef CONFIG_EXCEPTION_DEBUG
501 	dump_page_fault(esf);
502 #endif
503 #ifdef CONFIG_THREAD_STACK_INFO
504 	if (z_x86_check_stack_bounds(esf_get_sp(esf), 0, esf->cs)) {
505 		z_x86_fatal_error(K_ERR_STACK_CHK_FAIL, esf);
506 	}
507 #endif
508 #ifdef CONFIG_THREAD_STACK_MEM_MAPPED
509 	void *fault_addr = z_x86_cr2_get();
510 
511 	if (z_x86_check_guard_page((uintptr_t)fault_addr)) {
512 		z_x86_fatal_error(K_ERR_STACK_CHK_FAIL, esf);
513 	}
514 #endif
515 
516 	z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
517 	CODE_UNREACHABLE;
518 }
519 
520 __pinned_func
z_x86_do_kernel_oops(const struct arch_esf * esf)521 void z_x86_do_kernel_oops(const struct arch_esf *esf)
522 {
523 	uintptr_t reason;
524 
525 #ifdef CONFIG_X86_64
526 	reason = esf->rax;
527 #else
528 	uintptr_t *stack_ptr = (uintptr_t *)esf->esp;
529 
530 	reason = *stack_ptr;
531 #endif
532 
533 #ifdef CONFIG_USERSPACE
534 	/* User mode is only allowed to induce oopses and stack check
535 	 * failures via this software interrupt
536 	 */
537 	if ((esf->cs & 0x3) != 0 && !(reason == K_ERR_KERNEL_OOPS ||
538 				      reason == K_ERR_STACK_CHK_FAIL)) {
539 		reason = K_ERR_KERNEL_OOPS;
540 	}
541 #endif
542 
543 	z_x86_fatal_error(reason, esf);
544 }
545