1 /*
2  * Copyright (c) 2019 Carlo Caione <ccaione@baylibre.com>
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /**
8  * @file
9  * @brief Kernel fatal error handler for ARM64 Cortex-A
10  *
11  * This module provides the z_arm64_fatal_error() routine for ARM64 Cortex-A
12  * CPUs and z_arm64_do_kernel_oops() routine to manage software-generated fatal
13  * exceptions
14  */
15 
16 #include <zephyr/debug/symtab.h>
17 #include <zephyr/drivers/pm_cpu_ops.h>
18 #include <zephyr/arch/common/exc_handle.h>
19 #include <zephyr/kernel.h>
20 #include <zephyr/logging/log.h>
21 #include <zephyr/sys/poweroff.h>
22 #include <kernel_arch_func.h>
23 
24 #include "paging.h"
25 
26 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
27 
28 #ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK
29 K_KERNEL_PINNED_STACK_ARRAY_DEFINE(z_arm64_safe_exception_stacks,
30 				   CONFIG_MP_MAX_NUM_CPUS,
31 				   CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE);
32 
z_arm64_safe_exception_stack_init(void)33 void z_arm64_safe_exception_stack_init(void)
34 {
35 	int cpu_id;
36 	char *safe_exc_sp;
37 
38 	cpu_id = arch_curr_cpu()->id;
39 	safe_exc_sp = K_KERNEL_STACK_BUFFER(z_arm64_safe_exception_stacks[cpu_id]) +
40 		      CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE;
41 	arch_curr_cpu()->arch.safe_exception_stack = (uint64_t)safe_exc_sp;
42 	write_sp_el0((uint64_t)safe_exc_sp);
43 
44 	arch_curr_cpu()->arch.current_stack_limit = 0UL;
45 	arch_curr_cpu()->arch.corrupted_sp = 0UL;
46 }
47 #endif
48 
49 #ifdef CONFIG_USERSPACE
50 Z_EXC_DECLARE(z_arm64_user_string_nlen);
51 
52 static const struct z_exc_handle exceptions[] = {
53 	Z_EXC_HANDLE(z_arm64_user_string_nlen),
54 };
55 #endif /* CONFIG_USERSPACE */
56 
57 #ifdef CONFIG_EXCEPTION_DEBUG
dump_esr(uint64_t esr,bool * dump_far)58 static void dump_esr(uint64_t esr, bool *dump_far)
59 {
60 	const char *err;
61 
62 	switch (GET_ESR_EC(esr)) {
63 	case 0b000000: /* 0x00 */
64 		err = "Unknown reason";
65 		break;
66 	case 0b000001: /* 0x01 */
67 		err = "Trapped WFI or WFE instruction execution";
68 		break;
69 	case 0b000011: /* 0x03 */
70 		err = "Trapped MCR or MRC access with (coproc==0b1111) that "
71 		      "is not reported using EC 0b000000";
72 		break;
73 	case 0b000100: /* 0x04 */
74 		err = "Trapped MCRR or MRRC access with (coproc==0b1111) "
75 		      "that is not reported using EC 0b000000";
76 		break;
77 	case 0b000101: /* 0x05 */
78 		err = "Trapped MCR or MRC access with (coproc==0b1110)";
79 		break;
80 	case 0b000110: /* 0x06 */
81 		err = "Trapped LDC or STC access";
82 		break;
83 	case 0b000111: /* 0x07 */
84 		err = "Trapped access to SVE, Advanced SIMD, or "
85 		      "floating-point functionality";
86 		break;
87 	case 0b001100: /* 0x0c */
88 		err = "Trapped MRRC access with (coproc==0b1110)";
89 		break;
90 	case 0b001101: /* 0x0d */
91 		err = "Branch Target Exception";
92 		break;
93 	case 0b001110: /* 0x0e */
94 		err = "Illegal Execution state";
95 		break;
96 	case 0b010001: /* 0x11 */
97 		err = "SVC instruction execution in AArch32 state";
98 		break;
99 	case 0b011000: /* 0x18 */
100 		err = "Trapped MSR, MRS or System instruction execution in "
101 		      "AArch64 state, that is not reported using EC "
102 		      "0b000000, 0b000001 or 0b000111";
103 		break;
104 	case 0b011001: /* 0x19 */
105 		err = "Trapped access to SVE functionality";
106 		break;
107 	case 0b100000: /* 0x20 */
108 		*dump_far = true;
109 		err = "Instruction Abort from a lower Exception level, that "
110 		      "might be using AArch32 or AArch64";
111 		break;
112 	case 0b100001: /* 0x21 */
113 		*dump_far = true;
114 		err = "Instruction Abort taken without a change in Exception "
115 		      "level.";
116 		break;
117 	case 0b100010: /* 0x22 */
118 		*dump_far = true;
119 		err = "PC alignment fault exception.";
120 		break;
121 	case 0b100100: /* 0x24 */
122 		*dump_far = true;
123 		err = "Data Abort from a lower Exception level, that might "
124 		      "be using AArch32 or AArch64";
125 		break;
126 	case 0b100101: /* 0x25 */
127 		*dump_far = true;
128 		err = "Data Abort taken without a change in Exception level";
129 		break;
130 	case 0b100110: /* 0x26 */
131 		err = "SP alignment fault exception";
132 		break;
133 	case 0b101000: /* 0x28 */
134 		err = "Trapped floating-point exception taken from AArch32 "
135 		      "state";
136 		break;
137 	case 0b101100: /* 0x2c */
138 		err = "Trapped floating-point exception taken from AArch64 "
139 		      "state.";
140 		break;
141 	case 0b101111: /* 0x2f */
142 		err = "SError interrupt";
143 		break;
144 	case 0b110000: /* 0x30 */
145 		err = "Breakpoint exception from a lower Exception level, "
146 		      "that might be using AArch32 or AArch64";
147 		break;
148 	case 0b110001: /* 0x31 */
149 		err = "Breakpoint exception taken without a change in "
150 		      "Exception level";
151 		break;
152 	case 0b110010: /* 0x32 */
153 		err = "Software Step exception from a lower Exception level, "
154 		      "that might be using AArch32 or AArch64";
155 		break;
156 	case 0b110011: /* 0x33 */
157 		err = "Software Step exception taken without a change in "
158 		      "Exception level";
159 		break;
160 	case 0b110100: /* 0x34 */
161 		*dump_far = true;
162 		err = "Watchpoint exception from a lower Exception level, "
163 		      "that might be using AArch32 or AArch64";
164 		break;
165 	case 0b110101: /* 0x35 */
166 		*dump_far = true;
167 		err = "Watchpoint exception taken without a change in "
168 		      "Exception level.";
169 		break;
170 	case 0b111000: /* 0x38 */
171 		err = "BKPT instruction execution in AArch32 state";
172 		break;
173 	case 0b111100: /* 0x3c */
174 		err = "BRK instruction execution in AArch64 state.";
175 		break;
176 	default:
177 		err = "Unknown";
178 	}
179 
180 	LOG_ERR("ESR_ELn: 0x%016llx", esr);
181 	LOG_ERR("  EC:  0x%llx (%s)", GET_ESR_EC(esr), err);
182 	LOG_ERR("  IL:  0x%llx", GET_ESR_IL(esr));
183 	LOG_ERR("  ISS: 0x%llx", GET_ESR_ISS(esr));
184 }
185 
esf_dump(const struct arch_esf * esf)186 static void esf_dump(const struct arch_esf *esf)
187 {
188 	LOG_ERR("x0:  0x%016llx  x1:  0x%016llx", esf->x0, esf->x1);
189 	LOG_ERR("x2:  0x%016llx  x3:  0x%016llx", esf->x2, esf->x3);
190 	LOG_ERR("x4:  0x%016llx  x5:  0x%016llx", esf->x4, esf->x5);
191 	LOG_ERR("x6:  0x%016llx  x7:  0x%016llx", esf->x6, esf->x7);
192 	LOG_ERR("x8:  0x%016llx  x9:  0x%016llx", esf->x8, esf->x9);
193 	LOG_ERR("x10: 0x%016llx  x11: 0x%016llx", esf->x10, esf->x11);
194 	LOG_ERR("x12: 0x%016llx  x13: 0x%016llx", esf->x12, esf->x13);
195 	LOG_ERR("x14: 0x%016llx  x15: 0x%016llx", esf->x14, esf->x15);
196 	LOG_ERR("x16: 0x%016llx  x17: 0x%016llx", esf->x16, esf->x17);
197 	LOG_ERR("x18: 0x%016llx  lr:  0x%016llx", esf->x18, esf->lr);
198 }
199 #endif /* CONFIG_EXCEPTION_DEBUG */
200 
201 #ifdef CONFIG_ARCH_STACKWALK
202 typedef bool (*arm64_stacktrace_cb)(void *cookie, unsigned long addr, void *fp);
203 
walk_stackframe(arm64_stacktrace_cb cb,void * cookie,const struct arch_esf * esf,int max_frames)204 static void walk_stackframe(arm64_stacktrace_cb cb, void *cookie, const struct arch_esf *esf,
205 			    int max_frames)
206 {
207 	/*
208 	 * For GCC:
209 	 *
210 	 *  ^  +-----------------+
211 	 *  |  |                 |
212 	 *  |  |                 |
213 	 *  |  |                 |
214 	 *  |  |                 |
215 	 *  |  | function stack  |
216 	 *  |  |                 |
217 	 *  |  |                 |
218 	 *  |  |                 |
219 	 *  |  |                 |
220 	 *  |  +-----------------+
221 	 *  |  |       LR        |
222 	 *  |  +-----------------+
223 	 *  |  |   previous FP   | <---+ FP
224 	 *  +  +-----------------+
225 	 */
226 
227 	uint64_t *fp;
228 	uint64_t lr;
229 
230 	if (esf != NULL) {
231 		fp = (uint64_t *) esf->fp;
232 	} else {
233 		return;
234 	}
235 
236 	for (int i = 0; (fp != NULL) && (i < max_frames); i++) {
237 		lr = fp[1];
238 		if (!cb(cookie, lr, fp)) {
239 			break;
240 		}
241 		fp = (uint64_t *) fp[0];
242 	}
243 }
244 
arch_stack_walk(stack_trace_callback_fn callback_fn,void * cookie,const struct k_thread * thread,const struct arch_esf * esf)245 void arch_stack_walk(stack_trace_callback_fn callback_fn, void *cookie,
246 		     const struct k_thread *thread, const struct arch_esf *esf)
247 {
248 	ARG_UNUSED(thread);
249 
250 	walk_stackframe((arm64_stacktrace_cb)callback_fn, cookie, esf,
251 			CONFIG_ARCH_STACKWALK_MAX_FRAMES);
252 }
253 #endif /* CONFIG_ARCH_STACKWALK */
254 
255 #ifdef CONFIG_EXCEPTION_STACK_TRACE
print_trace_address(void * arg,unsigned long lr,void * fp)256 static bool print_trace_address(void *arg, unsigned long lr, void *fp)
257 {
258 	int *i = arg;
259 #ifdef CONFIG_SYMTAB
260 	uint32_t offset = 0;
261 	const char *name = symtab_find_symbol_name(lr, &offset);
262 
263 	LOG_ERR("     %d: fp: 0x%016llx lr: 0x%016lx [%s+0x%x]", (*i)++, (uint64_t)fp, lr, name,
264 		offset);
265 #else
266 	LOG_ERR("     %d: fp: 0x%016llx lr: 0x%016lx", (*i)++, (uint64_t)fp, lr);
267 #endif /* CONFIG_SYMTAB */
268 
269 	return true;
270 }
271 
esf_unwind(const struct arch_esf * esf)272 static void esf_unwind(const struct arch_esf *esf)
273 {
274 	int i = 0;
275 
276 	LOG_ERR("");
277 	LOG_ERR("call trace:");
278 	walk_stackframe(print_trace_address, &i, esf, CONFIG_ARCH_STACKWALK_MAX_FRAMES);
279 	LOG_ERR("");
280 }
281 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
282 
283 #ifdef CONFIG_ARM64_STACK_PROTECTION
z_arm64_stack_corruption_check(struct arch_esf * esf,uint64_t esr,uint64_t far)284 static bool z_arm64_stack_corruption_check(struct arch_esf *esf, uint64_t esr, uint64_t far)
285 {
286 	uint64_t sp, sp_limit, guard_start;
287 	/* 0x25 means data abort from current EL */
288 	if (GET_ESR_EC(esr) == 0x25) {
289 		sp_limit = arch_curr_cpu()->arch.current_stack_limit;
290 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
291 		sp = arch_curr_cpu()->arch.corrupted_sp;
292 		if ((sp != 0 && sp <= sp_limit) || (guard_start <= far && far <= sp_limit)) {
293 #ifdef CONFIG_FPU_SHARING
294 			/*
295 			 * We are in exception stack, and now we are sure the stack does overflow,
296 			 * so flush the fpu context to its owner, and then set no fpu trap to avoid
297 			 * a new nested exception triggered by FPU accessing (var_args).
298 			 */
299 			arch_flush_local_fpu();
300 			write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
301 #endif
302 			arch_curr_cpu()->arch.corrupted_sp = 0UL;
303 			LOG_ERR("STACK OVERFLOW FROM KERNEL, SP: 0x%llx OR FAR: 0x%llx INVALID,"
304 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
305 			return true;
306 		}
307 	}
308 #ifdef CONFIG_USERSPACE
309 	else if ((arch_current_thread()->base.user_options & K_USER) != 0 &&
310 		 GET_ESR_EC(esr) == 0x24) {
311 		sp_limit = (uint64_t)arch_current_thread()->stack_info.start;
312 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
313 		sp = esf->sp;
314 		if (sp <= sp_limit || (guard_start <= far && far <= sp_limit)) {
315 			LOG_ERR("STACK OVERFLOW FROM USERSPACE, SP: 0x%llx OR FAR: 0x%llx INVALID,"
316 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
317 			return true;
318 		}
319 	}
320 #endif
321 	return false;
322 }
323 #endif
324 
is_recoverable(struct arch_esf * esf,uint64_t esr,uint64_t far,uint64_t elr)325 static bool is_recoverable(struct arch_esf *esf, uint64_t esr, uint64_t far,
326 			   uint64_t elr)
327 {
328 	if (!esf) {
329 		return false;
330 	}
331 
332 #ifdef CONFIG_USERSPACE
333 	for (int i = 0; i < ARRAY_SIZE(exceptions); i++) {
334 		/* Mask out instruction mode */
335 		uint64_t start = (uint64_t)exceptions[i].start;
336 		uint64_t end = (uint64_t)exceptions[i].end;
337 
338 		if (esf->elr >= start && esf->elr < end) {
339 			esf->elr = (uint64_t)(exceptions[i].fixup);
340 			return true;
341 		}
342 	}
343 #endif
344 
345 	return false;
346 }
347 
z_arm64_fatal_error(unsigned int reason,struct arch_esf * esf)348 void z_arm64_fatal_error(unsigned int reason, struct arch_esf *esf)
349 {
350 	uint64_t esr = 0;
351 	uint64_t elr = 0;
352 	uint64_t far = 0;
353 	uint64_t el;
354 
355 	if (reason != K_ERR_SPURIOUS_IRQ) {
356 		el = read_currentel();
357 
358 		switch (GET_EL(el)) {
359 		case MODE_EL1:
360 			esr = read_esr_el1();
361 			far = read_far_el1();
362 			elr = read_elr_el1();
363 			break;
364 #if !defined(CONFIG_ARMV8_R)
365 		case MODE_EL3:
366 			esr = read_esr_el3();
367 			far = read_far_el3();
368 			elr = read_elr_el3();
369 			break;
370 #endif /* CONFIG_ARMV8_R */
371 		}
372 
373 #ifdef CONFIG_ARM64_STACK_PROTECTION
374 		if (z_arm64_stack_corruption_check(esf, esr, far)) {
375 			reason = K_ERR_STACK_CHK_FAIL;
376 		}
377 #endif
378 
379 		if (IS_ENABLED(CONFIG_DEMAND_PAGING) &&
380 		    reason != K_ERR_STACK_CHK_FAIL &&
381 		    z_arm64_do_demand_paging(esf, esr, far)) {
382 			return;
383 		}
384 
385 		if (GET_EL(el) != MODE_EL0) {
386 #ifdef CONFIG_EXCEPTION_DEBUG
387 			bool dump_far = false;
388 
389 			LOG_ERR("ELR_ELn: 0x%016llx", elr);
390 
391 			dump_esr(esr, &dump_far);
392 
393 			if (dump_far) {
394 				LOG_ERR("FAR_ELn: 0x%016llx", far);
395 			}
396 
397 			LOG_ERR("TPIDRRO: 0x%016llx", read_tpidrro_el0());
398 #endif /* CONFIG_EXCEPTION_DEBUG */
399 
400 			if (is_recoverable(esf, esr, far, elr) &&
401 			    reason != K_ERR_STACK_CHK_FAIL) {
402 				return;
403 			}
404 		}
405 	}
406 
407 #ifdef CONFIG_EXCEPTION_DEBUG
408 	if (esf != NULL) {
409 		esf_dump(esf);
410 	}
411 
412 #ifdef CONFIG_EXCEPTION_STACK_TRACE
413 	esf_unwind(esf);
414 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
415 #endif /* CONFIG_EXCEPTION_DEBUG */
416 
417 	z_fatal_error(reason, esf);
418 
419 	CODE_UNREACHABLE;
420 }
421 
422 /**
423  * @brief Handle a software-generated fatal exception
424  * (e.g. kernel oops, panic, etc.).
425  *
426  * @param esf exception frame
427  */
z_arm64_do_kernel_oops(struct arch_esf * esf)428 void z_arm64_do_kernel_oops(struct arch_esf *esf)
429 {
430 	/* x8 holds the exception reason */
431 	unsigned int reason = esf->x8;
432 
433 #if defined(CONFIG_USERSPACE)
434 	/*
435 	 * User mode is only allowed to induce oopses and stack check
436 	 * failures via software-triggered system fatal exceptions.
437 	 */
438 	if (((arch_current_thread()->base.user_options & K_USER) != 0) &&
439 		reason != K_ERR_STACK_CHK_FAIL) {
440 		reason = K_ERR_KERNEL_OOPS;
441 	}
442 #endif
443 
444 	z_arm64_fatal_error(reason, esf);
445 }
446 
447 #ifdef CONFIG_USERSPACE
arch_syscall_oops(void * ssf_ptr)448 FUNC_NORETURN void arch_syscall_oops(void *ssf_ptr)
449 {
450 	z_arm64_fatal_error(K_ERR_KERNEL_OOPS, ssf_ptr);
451 	CODE_UNREACHABLE;
452 }
453 #endif
454 
455 #if defined(CONFIG_PM_CPU_OPS_PSCI)
arch_system_halt(unsigned int reason)456 FUNC_NORETURN void arch_system_halt(unsigned int reason)
457 {
458 	ARG_UNUSED(reason);
459 
460 	(void)arch_irq_lock();
461 
462 #ifdef CONFIG_POWEROFF
463 	sys_poweroff();
464 #endif /* CONFIG_POWEROFF */
465 
466 	for (;;) {
467 		/* Spin endlessly as fallback */
468 	}
469 }
470 #endif
471