1 /*
2  * Copyright (c) 2019 Carlo Caione <ccaione@baylibre.com>
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /**
8  * @file
9  * @brief Kernel fatal error handler for ARM64 Cortex-A
10  *
11  * This module provides the z_arm64_fatal_error() routine for ARM64 Cortex-A
12  * CPUs and z_arm64_do_kernel_oops() routine to manage software-generated fatal
13  * exceptions
14  */
15 
16 #include <zephyr/debug/symtab.h>
17 #include <zephyr/drivers/pm_cpu_ops.h>
18 #include <zephyr/arch/common/exc_handle.h>
19 #include <zephyr/kernel.h>
20 #include <zephyr/linker/linker-defs.h>
21 #include <zephyr/logging/log.h>
22 #include <zephyr/sys/poweroff.h>
23 #include <kernel_arch_func.h>
24 #include <kernel_arch_interface.h>
25 
26 #include "paging.h"
27 
28 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
29 
30 #ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK
31 K_KERNEL_PINNED_STACK_ARRAY_DEFINE(z_arm64_safe_exception_stacks,
32 				   CONFIG_MP_MAX_NUM_CPUS,
33 				   CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE);
34 
z_arm64_safe_exception_stack_init(void)35 void z_arm64_safe_exception_stack_init(void)
36 {
37 	int cpu_id;
38 	char *safe_exc_sp;
39 
40 	cpu_id = arch_curr_cpu()->id;
41 	safe_exc_sp = K_KERNEL_STACK_BUFFER(z_arm64_safe_exception_stacks[cpu_id]) +
42 		      CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE;
43 	arch_curr_cpu()->arch.safe_exception_stack = (uint64_t)safe_exc_sp;
44 	write_sp_el0((uint64_t)safe_exc_sp);
45 
46 	arch_curr_cpu()->arch.current_stack_limit = 0UL;
47 	arch_curr_cpu()->arch.corrupted_sp = 0UL;
48 }
49 #endif
50 
51 #ifdef CONFIG_USERSPACE
52 Z_EXC_DECLARE(z_arm64_user_string_nlen);
53 
54 static const struct z_exc_handle exceptions[] = {
55 	Z_EXC_HANDLE(z_arm64_user_string_nlen),
56 };
57 #endif /* CONFIG_USERSPACE */
58 
59 #ifdef CONFIG_EXCEPTION_DEBUG
dump_esr(uint64_t esr,bool * dump_far)60 static void dump_esr(uint64_t esr, bool *dump_far)
61 {
62 	const char *err;
63 
64 	switch (GET_ESR_EC(esr)) {
65 	case 0b000000: /* 0x00 */
66 		err = "Unknown reason";
67 		break;
68 	case 0b000001: /* 0x01 */
69 		err = "Trapped WFI or WFE instruction execution";
70 		break;
71 	case 0b000011: /* 0x03 */
72 		err = "Trapped MCR or MRC access with (coproc==0b1111) that "
73 		      "is not reported using EC 0b000000";
74 		break;
75 	case 0b000100: /* 0x04 */
76 		err = "Trapped MCRR or MRRC access with (coproc==0b1111) "
77 		      "that is not reported using EC 0b000000";
78 		break;
79 	case 0b000101: /* 0x05 */
80 		err = "Trapped MCR or MRC access with (coproc==0b1110)";
81 		break;
82 	case 0b000110: /* 0x06 */
83 		err = "Trapped LDC or STC access";
84 		break;
85 	case 0b000111: /* 0x07 */
86 		err = "Trapped access to SVE, Advanced SIMD, or "
87 		      "floating-point functionality";
88 		break;
89 	case 0b001100: /* 0x0c */
90 		err = "Trapped MRRC access with (coproc==0b1110)";
91 		break;
92 	case 0b001101: /* 0x0d */
93 		err = "Branch Target Exception";
94 		break;
95 	case 0b001110: /* 0x0e */
96 		err = "Illegal Execution state";
97 		break;
98 	case 0b010001: /* 0x11 */
99 		err = "SVC instruction execution in AArch32 state";
100 		break;
101 	case 0b011000: /* 0x18 */
102 		err = "Trapped MSR, MRS or System instruction execution in "
103 		      "AArch64 state, that is not reported using EC "
104 		      "0b000000, 0b000001 or 0b000111";
105 		break;
106 	case 0b011001: /* 0x19 */
107 		err = "Trapped access to SVE functionality";
108 		break;
109 	case 0b100000: /* 0x20 */
110 		*dump_far = true;
111 		err = "Instruction Abort from a lower Exception level, that "
112 		      "might be using AArch32 or AArch64";
113 		break;
114 	case 0b100001: /* 0x21 */
115 		*dump_far = true;
116 		err = "Instruction Abort taken without a change in Exception "
117 		      "level.";
118 		break;
119 	case 0b100010: /* 0x22 */
120 		*dump_far = true;
121 		err = "PC alignment fault exception.";
122 		break;
123 	case 0b100100: /* 0x24 */
124 		*dump_far = true;
125 		err = "Data Abort from a lower Exception level, that might "
126 		      "be using AArch32 or AArch64";
127 		break;
128 	case 0b100101: /* 0x25 */
129 		*dump_far = true;
130 		err = "Data Abort taken without a change in Exception level";
131 		break;
132 	case 0b100110: /* 0x26 */
133 		err = "SP alignment fault exception";
134 		break;
135 	case 0b101000: /* 0x28 */
136 		err = "Trapped floating-point exception taken from AArch32 "
137 		      "state";
138 		break;
139 	case 0b101100: /* 0x2c */
140 		err = "Trapped floating-point exception taken from AArch64 "
141 		      "state.";
142 		break;
143 	case 0b101111: /* 0x2f */
144 		err = "SError interrupt";
145 		break;
146 	case 0b110000: /* 0x30 */
147 		err = "Breakpoint exception from a lower Exception level, "
148 		      "that might be using AArch32 or AArch64";
149 		break;
150 	case 0b110001: /* 0x31 */
151 		err = "Breakpoint exception taken without a change in "
152 		      "Exception level";
153 		break;
154 	case 0b110010: /* 0x32 */
155 		err = "Software Step exception from a lower Exception level, "
156 		      "that might be using AArch32 or AArch64";
157 		break;
158 	case 0b110011: /* 0x33 */
159 		err = "Software Step exception taken without a change in "
160 		      "Exception level";
161 		break;
162 	case 0b110100: /* 0x34 */
163 		*dump_far = true;
164 		err = "Watchpoint exception from a lower Exception level, "
165 		      "that might be using AArch32 or AArch64";
166 		break;
167 	case 0b110101: /* 0x35 */
168 		*dump_far = true;
169 		err = "Watchpoint exception taken without a change in "
170 		      "Exception level.";
171 		break;
172 	case 0b111000: /* 0x38 */
173 		err = "BKPT instruction execution in AArch32 state";
174 		break;
175 	case 0b111100: /* 0x3c */
176 		err = "BRK instruction execution in AArch64 state.";
177 		break;
178 	default:
179 		err = "Unknown";
180 	}
181 
182 	LOG_ERR("ESR_ELn: 0x%016llx", esr);
183 	LOG_ERR("  EC:  0x%llx (%s)", GET_ESR_EC(esr), err);
184 	LOG_ERR("  IL:  0x%llx", GET_ESR_IL(esr));
185 	LOG_ERR("  ISS: 0x%llx", GET_ESR_ISS(esr));
186 }
187 
esf_dump(const struct arch_esf * esf)188 static void esf_dump(const struct arch_esf *esf)
189 {
190 	LOG_ERR("x0:  0x%016llx  x1:  0x%016llx", esf->x0, esf->x1);
191 	LOG_ERR("x2:  0x%016llx  x3:  0x%016llx", esf->x2, esf->x3);
192 	LOG_ERR("x4:  0x%016llx  x5:  0x%016llx", esf->x4, esf->x5);
193 	LOG_ERR("x6:  0x%016llx  x7:  0x%016llx", esf->x6, esf->x7);
194 	LOG_ERR("x8:  0x%016llx  x9:  0x%016llx", esf->x8, esf->x9);
195 	LOG_ERR("x10: 0x%016llx  x11: 0x%016llx", esf->x10, esf->x11);
196 	LOG_ERR("x12: 0x%016llx  x13: 0x%016llx", esf->x12, esf->x13);
197 	LOG_ERR("x14: 0x%016llx  x15: 0x%016llx", esf->x14, esf->x15);
198 	LOG_ERR("x16: 0x%016llx  x17: 0x%016llx", esf->x16, esf->x17);
199 	LOG_ERR("x18: 0x%016llx  lr:  0x%016llx", esf->x18, esf->lr);
200 }
201 #endif /* CONFIG_EXCEPTION_DEBUG */
202 
203 #ifdef CONFIG_ARCH_STACKWALK
204 typedef bool (*arm64_stacktrace_cb)(void *cookie, unsigned long addr, void *fp);
205 
is_address_mapped(uint64_t * addr)206 static bool is_address_mapped(uint64_t *addr)
207 {
208 	uintptr_t *phys = NULL;
209 
210 	if (*addr == 0U)
211 		return false;
212 
213 	/* Check alignment. */
214 	if ((*addr & (sizeof(uint32_t) - 1U)) != 0U)
215 		return false;
216 
217 	return !arch_page_phys_get((void *) addr, phys);
218 }
219 
is_valid_jump_address(uint64_t * addr)220 static bool is_valid_jump_address(uint64_t *addr)
221 {
222 	if (*addr == 0U)
223 		return false;
224 
225 	/* Check alignment. */
226 	if ((*addr & (sizeof(uint32_t) - 1U)) != 0U)
227 		return false;
228 
229 	return ((*addr >= (uint64_t)__text_region_start) &&
230 		(*addr <= (uint64_t)(__text_region_end)));
231 }
232 
walk_stackframe(arm64_stacktrace_cb cb,void * cookie,const struct arch_esf * esf,int max_frames)233 static void walk_stackframe(arm64_stacktrace_cb cb, void *cookie, const struct arch_esf *esf,
234 			    int max_frames)
235 {
236 	/*
237 	 * For GCC:
238 	 *
239 	 *  ^  +-----------------+
240 	 *  |  |                 |
241 	 *  |  |                 |
242 	 *  |  |                 |
243 	 *  |  |                 |
244 	 *  |  | function stack  |
245 	 *  |  |                 |
246 	 *  |  |                 |
247 	 *  |  |                 |
248 	 *  |  |                 |
249 	 *  |  +-----------------+
250 	 *  |  |       LR        |
251 	 *  |  +-----------------+
252 	 *  |  |   previous FP   | <---+ FP
253 	 *  +  +-----------------+
254 	 */
255 
256 	uint64_t *fp;
257 	uint64_t lr;
258 
259 	if (esf != NULL) {
260 		fp = (uint64_t *) esf->fp;
261 	} else {
262 		return;
263 	}
264 
265 	for (int i = 0; (fp != NULL) && (i < max_frames); i++) {
266 		if (!is_address_mapped(fp))
267 			break;
268 		lr = fp[1];
269 		if (!is_valid_jump_address(&lr))
270 			break;
271 		if (!cb(cookie, lr, fp)) {
272 			break;
273 		}
274 		fp = (uint64_t *) fp[0];
275 	}
276 }
277 
arch_stack_walk(stack_trace_callback_fn callback_fn,void * cookie,const struct k_thread * thread,const struct arch_esf * esf)278 void arch_stack_walk(stack_trace_callback_fn callback_fn, void *cookie,
279 		     const struct k_thread *thread, const struct arch_esf *esf)
280 {
281 	ARG_UNUSED(thread);
282 
283 	walk_stackframe((arm64_stacktrace_cb)callback_fn, cookie, esf,
284 			CONFIG_ARCH_STACKWALK_MAX_FRAMES);
285 }
286 #endif /* CONFIG_ARCH_STACKWALK */
287 
288 #ifdef CONFIG_EXCEPTION_STACK_TRACE
print_trace_address(void * arg,unsigned long lr,void * fp)289 static bool print_trace_address(void *arg, unsigned long lr, void *fp)
290 {
291 	int *i = arg;
292 #ifdef CONFIG_SYMTAB
293 	uint32_t offset = 0;
294 	const char *name = symtab_find_symbol_name(lr, &offset);
295 
296 	LOG_ERR("     %d: fp: 0x%016llx lr: 0x%016lx [%s+0x%x]", (*i)++, (uint64_t)fp, lr, name,
297 		offset);
298 #else
299 	LOG_ERR("     %d: fp: 0x%016llx lr: 0x%016lx", (*i)++, (uint64_t)fp, lr);
300 #endif /* CONFIG_SYMTAB */
301 
302 	return true;
303 }
304 
esf_unwind(const struct arch_esf * esf)305 static void esf_unwind(const struct arch_esf *esf)
306 {
307 	int i = 0;
308 
309 	LOG_ERR("");
310 	LOG_ERR("call trace:");
311 	walk_stackframe(print_trace_address, &i, esf, CONFIG_ARCH_STACKWALK_MAX_FRAMES);
312 	LOG_ERR("");
313 }
314 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
315 
316 #ifdef CONFIG_ARM64_STACK_PROTECTION
z_arm64_stack_corruption_check(struct arch_esf * esf,uint64_t esr,uint64_t far)317 static bool z_arm64_stack_corruption_check(struct arch_esf *esf, uint64_t esr, uint64_t far)
318 {
319 	uint64_t sp, sp_limit, guard_start;
320 	/* 0x25 means data abort from current EL */
321 	if (GET_ESR_EC(esr) == 0x25) {
322 		sp_limit = arch_curr_cpu()->arch.current_stack_limit;
323 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
324 		sp = arch_curr_cpu()->arch.corrupted_sp;
325 		if ((sp != 0 && sp <= sp_limit) || (guard_start <= far && far <= sp_limit)) {
326 #ifdef CONFIG_FPU_SHARING
327 			/*
328 			 * We are in exception stack, and now we are sure the stack does overflow,
329 			 * so flush the fpu context to its owner, and then set no fpu trap to avoid
330 			 * a new nested exception triggered by FPU accessing (var_args).
331 			 */
332 			arch_flush_local_fpu();
333 			write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
334 #endif
335 			arch_curr_cpu()->arch.corrupted_sp = 0UL;
336 			LOG_ERR("STACK OVERFLOW FROM KERNEL, SP: 0x%llx OR FAR: 0x%llx INVALID,"
337 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
338 			return true;
339 		}
340 	}
341 #ifdef CONFIG_USERSPACE
342 	else if ((_current->base.user_options & K_USER) != 0 && GET_ESR_EC(esr) == 0x24) {
343 		sp_limit = (uint64_t)_current->stack_info.start;
344 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
345 		sp = esf->sp;
346 		if (sp <= sp_limit || (guard_start <= far && far <= sp_limit)) {
347 			LOG_ERR("STACK OVERFLOW FROM USERSPACE, SP: 0x%llx OR FAR: 0x%llx INVALID,"
348 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
349 			return true;
350 		}
351 	}
352 #endif
353 	return false;
354 }
355 #endif
356 
is_recoverable(struct arch_esf * esf,uint64_t esr,uint64_t far,uint64_t elr)357 static bool is_recoverable(struct arch_esf *esf, uint64_t esr, uint64_t far,
358 			   uint64_t elr)
359 {
360 	if (!esf) {
361 		return false;
362 	}
363 
364 #ifdef CONFIG_USERSPACE
365 	for (int i = 0; i < ARRAY_SIZE(exceptions); i++) {
366 		/* Mask out instruction mode */
367 		uint64_t start = (uint64_t)exceptions[i].start;
368 		uint64_t end = (uint64_t)exceptions[i].end;
369 
370 		if (esf->elr >= start && esf->elr < end) {
371 			esf->elr = (uint64_t)(exceptions[i].fixup);
372 			return true;
373 		}
374 	}
375 #endif
376 
377 	return false;
378 }
379 
z_arm64_fatal_error(unsigned int reason,struct arch_esf * esf)380 void z_arm64_fatal_error(unsigned int reason, struct arch_esf *esf)
381 {
382 	uint64_t esr = 0;
383 	uint64_t elr = 0;
384 	uint64_t far = 0;
385 	uint64_t el;
386 
387 	if (reason != K_ERR_SPURIOUS_IRQ) {
388 		el = read_currentel();
389 
390 		switch (GET_EL(el)) {
391 		case MODE_EL1:
392 			esr = read_esr_el1();
393 			far = read_far_el1();
394 			elr = read_elr_el1();
395 			break;
396 #if !defined(CONFIG_ARMV8_R)
397 		case MODE_EL3:
398 			esr = read_esr_el3();
399 			far = read_far_el3();
400 			elr = read_elr_el3();
401 			break;
402 #endif /* CONFIG_ARMV8_R */
403 		}
404 
405 #ifdef CONFIG_ARM64_STACK_PROTECTION
406 		if (z_arm64_stack_corruption_check(esf, esr, far)) {
407 			reason = K_ERR_STACK_CHK_FAIL;
408 		}
409 #endif
410 
411 		if (IS_ENABLED(CONFIG_DEMAND_PAGING) &&
412 		    reason != K_ERR_STACK_CHK_FAIL &&
413 		    z_arm64_do_demand_paging(esf, esr, far)) {
414 			return;
415 		}
416 
417 		if (GET_EL(el) != MODE_EL0) {
418 #ifdef CONFIG_EXCEPTION_DEBUG
419 			bool dump_far = false;
420 
421 			LOG_ERR("ELR_ELn: 0x%016llx", elr);
422 
423 			dump_esr(esr, &dump_far);
424 
425 			if (dump_far) {
426 				LOG_ERR("FAR_ELn: 0x%016llx", far);
427 			}
428 
429 			LOG_ERR("TPIDRRO: 0x%016llx", read_tpidrro_el0());
430 #endif /* CONFIG_EXCEPTION_DEBUG */
431 
432 			if (is_recoverable(esf, esr, far, elr) &&
433 			    reason != K_ERR_STACK_CHK_FAIL) {
434 				return;
435 			}
436 		}
437 	}
438 
439 #ifdef CONFIG_EXCEPTION_DEBUG
440 	if (esf != NULL) {
441 		esf_dump(esf);
442 	}
443 
444 #ifdef CONFIG_EXCEPTION_STACK_TRACE
445 	esf_unwind(esf);
446 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
447 #endif /* CONFIG_EXCEPTION_DEBUG */
448 
449 	z_fatal_error(reason, esf);
450 }
451 
452 /**
453  * @brief Handle a software-generated fatal exception
454  * (e.g. kernel oops, panic, etc.).
455  *
456  * @param esf exception frame
457  */
z_arm64_do_kernel_oops(struct arch_esf * esf)458 void z_arm64_do_kernel_oops(struct arch_esf *esf)
459 {
460 	/* x8 holds the exception reason */
461 	unsigned int reason = esf->x8;
462 
463 #if defined(CONFIG_USERSPACE)
464 	/*
465 	 * User mode is only allowed to induce oopses and stack check
466 	 * failures via software-triggered system fatal exceptions.
467 	 */
468 	if (((_current->base.user_options & K_USER) != 0) &&
469 		reason != K_ERR_STACK_CHK_FAIL) {
470 		reason = K_ERR_KERNEL_OOPS;
471 	}
472 #endif
473 
474 	z_arm64_fatal_error(reason, esf);
475 }
476 
477 #ifdef CONFIG_USERSPACE
arch_syscall_oops(void * ssf_ptr)478 FUNC_NORETURN void arch_syscall_oops(void *ssf_ptr)
479 {
480 	z_arm64_fatal_error(K_ERR_KERNEL_OOPS, ssf_ptr);
481 	CODE_UNREACHABLE;
482 }
483 #endif
484 
485 #if defined(CONFIG_PM_CPU_OPS_PSCI)
arch_system_halt(unsigned int reason)486 FUNC_NORETURN void arch_system_halt(unsigned int reason)
487 {
488 	ARG_UNUSED(reason);
489 
490 	(void)arch_irq_lock();
491 
492 #ifdef CONFIG_POWEROFF
493 	sys_poweroff();
494 #endif /* CONFIG_POWEROFF */
495 
496 	for (;;) {
497 		/* Spin endlessly as fallback */
498 	}
499 }
500 #endif
501