1 /*
2  * Copyright (c) 2019 Intel Corporation
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #include <kernel.h>
7 #include <ksched.h>
8 #include <kernel_structs.h>
9 #include <kernel_internal.h>
10 #include <exc_handle.h>
11 #include <logging/log.h>
12 #include <x86_mmu.h>
13 #include <mmu.h>
14 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
15 
16 #if defined(CONFIG_BOARD_QEMU_X86) || defined(CONFIG_BOARD_QEMU_X86_64)
arch_system_halt(unsigned int reason)17 FUNC_NORETURN void arch_system_halt(unsigned int reason)
18 {
19 	ARG_UNUSED(reason);
20 
21 	/* Causes QEMU to exit. We passed the following on the command line:
22 	 * -device isa-debug-exit,iobase=0xf4,iosize=0x04
23 	 *
24 	 * For any value of the first argument X, the return value of the
25 	 * QEMU process is (X * 2) + 1.
26 	 *
27 	 * It has been observed that if the emulator exits for a triple-fault
28 	 * (often due to bad page tables or other CPU structures) it will
29 	 * terminate with 0 error code.
30 	 */
31 	sys_out32(reason, 0xf4);
32 	CODE_UNREACHABLE;
33 }
34 #endif
35 
esf_get_sp(const z_arch_esf_t * esf)36 static inline uintptr_t esf_get_sp(const z_arch_esf_t *esf)
37 {
38 #ifdef CONFIG_X86_64
39 	return esf->rsp;
40 #else
41 	return esf->esp;
42 #endif
43 }
44 
esf_get_code(const z_arch_esf_t * esf)45 static inline uintptr_t esf_get_code(const z_arch_esf_t *esf)
46 {
47 #ifdef CONFIG_X86_64
48 	return esf->code;
49 #else
50 	return esf->errorCode;
51 #endif
52 }
53 
54 #ifdef CONFIG_THREAD_STACK_INFO
55 __pinned_func
z_x86_check_stack_bounds(uintptr_t addr,size_t size,uint16_t cs)56 bool z_x86_check_stack_bounds(uintptr_t addr, size_t size, uint16_t cs)
57 {
58 	uintptr_t start, end;
59 
60 	if (_current == NULL || arch_is_in_isr()) {
61 		/* We were servicing an interrupt or in early boot environment
62 		 * and are supposed to be on the interrupt stack */
63 		int cpu_id;
64 
65 #ifdef CONFIG_SMP
66 		cpu_id = arch_curr_cpu()->id;
67 #else
68 		cpu_id = 0;
69 #endif
70 		start = (uintptr_t)Z_KERNEL_STACK_BUFFER(
71 		    z_interrupt_stacks[cpu_id]);
72 		end = start + CONFIG_ISR_STACK_SIZE;
73 #ifdef CONFIG_USERSPACE
74 	} else if ((cs & 0x3U) == 0U &&
75 		   (_current->base.user_options & K_USER) != 0) {
76 		/* The low two bits of the CS register is the privilege
77 		 * level. It will be 0 in supervisor mode and 3 in user mode
78 		 * corresponding to ring 0 / ring 3.
79 		 *
80 		 * If we get here, we must have been doing a syscall, check
81 		 * privilege elevation stack bounds
82 		 */
83 		start = _current->stack_info.start - CONFIG_MMU_PAGE_SIZE;
84 		end = _current->stack_info.start;
85 #endif /* CONFIG_USERSPACE */
86 	} else {
87 		/* Normal thread operation, check its stack buffer */
88 		start = _current->stack_info.start;
89 		end = Z_STACK_PTR_ALIGN(_current->stack_info.start +
90 					_current->stack_info.size);
91 	}
92 
93 	return (addr <= start) || (addr + size > end);
94 }
95 #endif
96 
97 #ifdef CONFIG_EXCEPTION_DEBUG
98 #if defined(CONFIG_X86_EXCEPTION_STACK_TRACE)
99 struct stack_frame {
100 	uintptr_t next;
101 	uintptr_t ret_addr;
102 #ifndef CONFIG_X86_64
103 	uintptr_t args;
104 #endif
105 };
106 
107 #define MAX_STACK_FRAMES 8
108 
109 __pinned_func
unwind_stack(uintptr_t base_ptr,uint16_t cs)110 static void unwind_stack(uintptr_t base_ptr, uint16_t cs)
111 {
112 	struct stack_frame *frame;
113 	int i;
114 
115 	if (base_ptr == 0U) {
116 		LOG_ERR("NULL base ptr");
117 		return;
118 	}
119 
120 	for (i = 0; i < MAX_STACK_FRAMES; i++) {
121 		if (base_ptr % sizeof(base_ptr) != 0U) {
122 			LOG_ERR("unaligned frame ptr");
123 			return;
124 		}
125 
126 		frame = (struct stack_frame *)base_ptr;
127 		if (frame == NULL) {
128 			break;
129 		}
130 
131 #ifdef CONFIG_THREAD_STACK_INFO
132 		/* Ensure the stack frame is within the faulting context's
133 		 * stack buffer
134 		 */
135 		if (z_x86_check_stack_bounds((uintptr_t)frame,
136 					     sizeof(*frame), cs)) {
137 			LOG_ERR("     corrupted? (bp=%p)", frame);
138 			break;
139 		}
140 #endif
141 
142 		if (frame->ret_addr == 0U) {
143 			break;
144 		}
145 #ifdef CONFIG_X86_64
146 		LOG_ERR("     0x%016lx", frame->ret_addr);
147 #else
148 		LOG_ERR("     0x%08lx (0x%lx)", frame->ret_addr, frame->args);
149 #endif
150 		base_ptr = frame->next;
151 	}
152 }
153 #endif /* CONFIG_X86_EXCEPTION_STACK_TRACE */
154 
get_cr3(const z_arch_esf_t * esf)155 static inline uintptr_t get_cr3(const z_arch_esf_t *esf)
156 {
157 #if defined(CONFIG_USERSPACE) && defined(CONFIG_X86_KPTI)
158 	/* If the interrupted thread was in user mode, we did a page table
159 	 * switch when we took the exception via z_x86_trampoline_to_kernel
160 	 */
161 	if ((esf->cs & 0x3) != 0) {
162 		return _current->arch.ptables;
163 	}
164 #else
165 	ARG_UNUSED(esf);
166 #endif
167 	/* Return the current CR3 value, it didn't change when we took
168 	 * the exception
169 	 */
170 	return z_x86_cr3_get();
171 }
172 
get_ptables(const z_arch_esf_t * esf)173 static inline pentry_t *get_ptables(const z_arch_esf_t *esf)
174 {
175 	return z_mem_virt_addr(get_cr3(esf));
176 }
177 
178 #ifdef CONFIG_X86_64
179 __pinned_func
dump_regs(const z_arch_esf_t * esf)180 static void dump_regs(const z_arch_esf_t *esf)
181 {
182 	LOG_ERR("RAX: 0x%016lx RBX: 0x%016lx RCX: 0x%016lx RDX: 0x%016lx",
183 		esf->rax, esf->rbx, esf->rcx, esf->rdx);
184 	LOG_ERR("RSI: 0x%016lx RDI: 0x%016lx RBP: 0x%016lx RSP: 0x%016lx",
185 		esf->rsi, esf->rdi, esf->rbp, esf->rsp);
186 	LOG_ERR(" R8: 0x%016lx  R9: 0x%016lx R10: 0x%016lx R11: 0x%016lx",
187 		esf->r8, esf->r9, esf->r10, esf->r11);
188 	LOG_ERR("R12: 0x%016lx R13: 0x%016lx R14: 0x%016lx R15: 0x%016lx",
189 		esf->r12, esf->r13, esf->r14, esf->r15);
190 	LOG_ERR("RSP: 0x%016lx RFLAGS: 0x%016lx CS: 0x%04lx CR3: 0x%016lx",
191 		esf->rsp, esf->rflags, esf->cs & 0xFFFFU, get_cr3(esf));
192 
193 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
194 	LOG_ERR("call trace:");
195 #endif
196 	LOG_ERR("RIP: 0x%016lx", esf->rip);
197 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
198 	unwind_stack(esf->rbp, esf->cs);
199 #endif
200 }
201 #else /* 32-bit */
202 __pinned_func
dump_regs(const z_arch_esf_t * esf)203 static void dump_regs(const z_arch_esf_t *esf)
204 {
205 	LOG_ERR("EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x",
206 		esf->eax, esf->ebx, esf->ecx, esf->edx);
207 	LOG_ERR("ESI: 0x%08x, EDI: 0x%08x, EBP: 0x%08x, ESP: 0x%08x",
208 		esf->esi, esf->edi, esf->ebp, esf->esp);
209 	LOG_ERR("EFLAGS: 0x%08x CS: 0x%04x CR3: 0x%08lx", esf->eflags,
210 		esf->cs & 0xFFFFU, get_cr3(esf));
211 
212 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
213 	LOG_ERR("call trace:");
214 #endif
215 	LOG_ERR("EIP: 0x%08x", esf->eip);
216 #ifdef CONFIG_X86_EXCEPTION_STACK_TRACE
217 	unwind_stack(esf->ebp, esf->cs);
218 #endif
219 }
220 #endif /* CONFIG_X86_64 */
221 
222 __pinned_func
log_exception(uintptr_t vector,uintptr_t code)223 static void log_exception(uintptr_t vector, uintptr_t code)
224 {
225 	switch (vector) {
226 	case IV_DIVIDE_ERROR:
227 		LOG_ERR("Divide by zero");
228 		break;
229 	case IV_DEBUG:
230 		LOG_ERR("Debug");
231 		break;
232 	case IV_NON_MASKABLE_INTERRUPT:
233 		LOG_ERR("Non-maskable interrupt");
234 		break;
235 	case IV_BREAKPOINT:
236 		LOG_ERR("Breakpoint");
237 		break;
238 	case IV_OVERFLOW:
239 		LOG_ERR("Overflow");
240 		break;
241 	case IV_BOUND_RANGE:
242 		LOG_ERR("Bound range exceeded");
243 		break;
244 	case IV_INVALID_OPCODE:
245 		LOG_ERR("Invalid opcode");
246 		break;
247 	case IV_DEVICE_NOT_AVAILABLE:
248 		LOG_ERR("Floating point unit device not available");
249 		break;
250 	case IV_DOUBLE_FAULT:
251 		LOG_ERR("Double fault (code 0x%lx)", code);
252 		break;
253 	case IV_COPROC_SEGMENT_OVERRUN:
254 		LOG_ERR("Co-processor segment overrun");
255 		break;
256 	case IV_INVALID_TSS:
257 		LOG_ERR("Invalid TSS (code 0x%lx)", code);
258 		break;
259 	case IV_SEGMENT_NOT_PRESENT:
260 		LOG_ERR("Segment not present (code 0x%lx)", code);
261 		break;
262 	case IV_STACK_FAULT:
263 		LOG_ERR("Stack segment fault");
264 		break;
265 	case IV_GENERAL_PROTECTION:
266 		LOG_ERR("General protection fault (code 0x%lx)", code);
267 		break;
268 	/* IV_PAGE_FAULT skipped, we have a dedicated handler */
269 	case IV_X87_FPU_FP_ERROR:
270 		LOG_ERR("x87 floating point exception");
271 		break;
272 	case IV_ALIGNMENT_CHECK:
273 		LOG_ERR("Alignment check (code 0x%lx)", code);
274 		break;
275 	case IV_MACHINE_CHECK:
276 		LOG_ERR("Machine check");
277 		break;
278 	case IV_SIMD_FP:
279 		LOG_ERR("SIMD floating point exception");
280 		break;
281 	case IV_VIRT_EXCEPTION:
282 		LOG_ERR("Virtualization exception");
283 		break;
284 	case IV_SECURITY_EXCEPTION:
285 		LOG_ERR("Security exception");
286 		break;
287 	default:
288 		LOG_ERR("Exception not handled (code 0x%lx)", code);
289 		break;
290 	}
291 }
292 
293 __pinned_func
dump_page_fault(z_arch_esf_t * esf)294 static void dump_page_fault(z_arch_esf_t *esf)
295 {
296 	uintptr_t err;
297 	void *cr2;
298 
299 	cr2 = z_x86_cr2_get();
300 	err = esf_get_code(esf);
301 	LOG_ERR("Page fault at address %p (error code 0x%lx)", cr2, err);
302 
303 	if ((err & PF_RSVD) != 0) {
304 		LOG_ERR("Reserved bits set in page tables");
305 	} else {
306 		if ((err & PF_P) == 0) {
307 			LOG_ERR("Linear address not present in page tables");
308 		}
309 		LOG_ERR("Access violation: %s thread not allowed to %s",
310 			(err & PF_US) != 0U ? "user" : "supervisor",
311 			(err & PF_ID) != 0U ? "execute" : ((err & PF_WR) != 0U ?
312 							   "write" :
313 							   "read"));
314 		if ((err & PF_PK) != 0) {
315 			LOG_ERR("Protection key disallowed");
316 		} else if ((err & PF_SGX) != 0) {
317 			LOG_ERR("SGX access control violation");
318 		}
319 	}
320 
321 #ifdef CONFIG_X86_MMU
322 	z_x86_dump_mmu_flags(get_ptables(esf), cr2);
323 #endif /* CONFIG_X86_MMU */
324 }
325 #endif /* CONFIG_EXCEPTION_DEBUG */
326 
327 __pinned_func
z_x86_fatal_error(unsigned int reason,const z_arch_esf_t * esf)328 FUNC_NORETURN void z_x86_fatal_error(unsigned int reason,
329 				     const z_arch_esf_t *esf)
330 {
331 	if (esf != NULL) {
332 #ifdef CONFIG_EXCEPTION_DEBUG
333 		dump_regs(esf);
334 #endif
335 #if defined(CONFIG_ASSERT) && defined(CONFIG_X86_64)
336 		if (esf->rip == 0xb9) {
337 			/* See implementation of __resume in locore.S. This is
338 			 * never a valid RIP value. Treat this as a kernel
339 			 * panic.
340 			 */
341 			LOG_ERR("Attempt to resume un-suspended thread object");
342 			reason = K_ERR_KERNEL_PANIC;
343 		}
344 #endif
345 	}
346 	z_fatal_error(reason, esf);
347 	CODE_UNREACHABLE;
348 }
349 
350 __pinned_func
z_x86_unhandled_cpu_exception(uintptr_t vector,const z_arch_esf_t * esf)351 FUNC_NORETURN void z_x86_unhandled_cpu_exception(uintptr_t vector,
352 						 const z_arch_esf_t *esf)
353 {
354 #ifdef CONFIG_EXCEPTION_DEBUG
355 	log_exception(vector, esf_get_code(esf));
356 #else
357 	ARG_UNUSED(vector);
358 #endif
359 	z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
360 }
361 
362 #ifdef CONFIG_USERSPACE
363 Z_EXC_DECLARE(z_x86_user_string_nlen);
364 
365 static const struct z_exc_handle exceptions[] = {
366 	Z_EXC_HANDLE(z_x86_user_string_nlen)
367 };
368 #endif
369 
370 __pinned_func
z_x86_page_fault_handler(z_arch_esf_t * esf)371 void z_x86_page_fault_handler(z_arch_esf_t *esf)
372 {
373 #ifdef CONFIG_DEMAND_PAGING
374 	if ((esf->errorCode & PF_P) == 0) {
375 		/* Page was non-present at time exception happened.
376 		 * Get faulting virtual address from CR2 register
377 		 */
378 		void *virt = z_x86_cr2_get();
379 		bool was_valid_access;
380 
381 #ifdef CONFIG_X86_KPTI
382 		/* Protection ring is lowest 2 bits in interrupted CS */
383 		bool was_user = ((esf->cs & 0x3) != 0U);
384 
385 		/* Need to check if the interrupted context was a user thread
386 		 * that hit a non-present page that was flipped due to KPTI in
387 		 * the thread's page tables, in which case this is an access
388 		 * violation and we should treat this as an error.
389 		 *
390 		 * We're probably not locked, but if there is a race, we will
391 		 * be fine, the kernel page fault code will later detect that
392 		 * the page is present in the kernel's page tables and the
393 		 * instruction will just be re-tried, producing another fault.
394 		 */
395 		if (was_user &&
396 		    !z_x86_kpti_is_access_ok(virt, get_ptables(esf))) {
397 			was_valid_access = false;
398 		} else
399 #else
400 		{
401 			was_valid_access = z_page_fault(virt);
402 		}
403 #endif /* CONFIG_X86_KPTI */
404 		if (was_valid_access) {
405 			/* Page fault handled, re-try */
406 			return;
407 		}
408 	}
409 #endif /* CONFIG_DEMAND_PAGING */
410 
411 #if !defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_COREDUMP)
412 	z_x86_exception_vector = IV_PAGE_FAULT;
413 #endif
414 
415 #ifdef CONFIG_USERSPACE
416 	int i;
417 
418 	for (i = 0; i < ARRAY_SIZE(exceptions); i++) {
419 #ifdef CONFIG_X86_64
420 		if ((void *)esf->rip >= exceptions[i].start &&
421 		    (void *)esf->rip < exceptions[i].end) {
422 			esf->rip = (uint64_t)(exceptions[i].fixup);
423 			return;
424 		}
425 #else
426 		if ((void *)esf->eip >= exceptions[i].start &&
427 		    (void *)esf->eip < exceptions[i].end) {
428 			esf->eip = (unsigned int)(exceptions[i].fixup);
429 			return;
430 		}
431 #endif /* CONFIG_X86_64 */
432 	}
433 #endif
434 #ifdef CONFIG_EXCEPTION_DEBUG
435 	dump_page_fault(esf);
436 #endif
437 #ifdef CONFIG_THREAD_STACK_INFO
438 	if (z_x86_check_stack_bounds(esf_get_sp(esf), 0, esf->cs)) {
439 		z_x86_fatal_error(K_ERR_STACK_CHK_FAIL, esf);
440 	}
441 #endif
442 	z_x86_fatal_error(K_ERR_CPU_EXCEPTION, esf);
443 	CODE_UNREACHABLE;
444 }
445 
446 __pinned_func
z_x86_do_kernel_oops(const z_arch_esf_t * esf)447 void z_x86_do_kernel_oops(const z_arch_esf_t *esf)
448 {
449 	uintptr_t reason;
450 
451 #ifdef CONFIG_X86_64
452 	reason = esf->rax;
453 #else
454 	uintptr_t *stack_ptr = (uintptr_t *)esf->esp;
455 
456 	reason = *stack_ptr;
457 #endif
458 
459 #ifdef CONFIG_USERSPACE
460 	/* User mode is only allowed to induce oopses and stack check
461 	 * failures via this software interrupt
462 	 */
463 	if ((esf->cs & 0x3) != 0 && !(reason == K_ERR_KERNEL_OOPS ||
464 				      reason == K_ERR_STACK_CHK_FAIL)) {
465 		reason = K_ERR_KERNEL_OOPS;
466 	}
467 #endif
468 
469 	z_x86_fatal_error(reason, esf);
470 }
471