1 /*
2  * Copyright (c) 2019 Carlo Caione <ccaione@baylibre.com>
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 /**
8  * @file
9  * @brief Kernel fatal error handler for ARM64 Cortex-A
10  *
11  * This module provides the z_arm64_fatal_error() routine for ARM64 Cortex-A
12  * CPUs and z_arm64_do_kernel_oops() routine to manage software-generated fatal
13  * exceptions
14  */
15 
16 #include <zephyr/debug/symtab.h>
17 #include <zephyr/drivers/pm_cpu_ops.h>
18 #include <zephyr/arch/common/exc_handle.h>
19 #include <zephyr/kernel.h>
20 #include <zephyr/logging/log.h>
21 #include <zephyr/sys/poweroff.h>
22 #include <kernel_arch_func.h>
23 
24 LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
25 
26 #ifdef CONFIG_ARM64_SAFE_EXCEPTION_STACK
27 K_KERNEL_PINNED_STACK_ARRAY_DEFINE(z_arm64_safe_exception_stacks,
28 				   CONFIG_MP_MAX_NUM_CPUS,
29 				   CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE);
30 
z_arm64_safe_exception_stack_init(void)31 void z_arm64_safe_exception_stack_init(void)
32 {
33 	int cpu_id;
34 	char *safe_exc_sp;
35 
36 	cpu_id = arch_curr_cpu()->id;
37 	safe_exc_sp = K_KERNEL_STACK_BUFFER(z_arm64_safe_exception_stacks[cpu_id]) +
38 		      CONFIG_ARM64_SAFE_EXCEPTION_STACK_SIZE;
39 	arch_curr_cpu()->arch.safe_exception_stack = (uint64_t)safe_exc_sp;
40 	write_sp_el0((uint64_t)safe_exc_sp);
41 
42 	arch_curr_cpu()->arch.current_stack_limit = 0UL;
43 	arch_curr_cpu()->arch.corrupted_sp = 0UL;
44 }
45 #endif
46 
47 #ifdef CONFIG_USERSPACE
48 Z_EXC_DECLARE(z_arm64_user_string_nlen);
49 
50 static const struct z_exc_handle exceptions[] = {
51 	Z_EXC_HANDLE(z_arm64_user_string_nlen),
52 };
53 #endif /* CONFIG_USERSPACE */
54 
55 #ifdef CONFIG_EXCEPTION_DEBUG
dump_esr(uint64_t esr,bool * dump_far)56 static void dump_esr(uint64_t esr, bool *dump_far)
57 {
58 	const char *err;
59 
60 	switch (GET_ESR_EC(esr)) {
61 	case 0b000000: /* 0x00 */
62 		err = "Unknown reason";
63 		break;
64 	case 0b000001: /* 0x01 */
65 		err = "Trapped WFI or WFE instruction execution";
66 		break;
67 	case 0b000011: /* 0x03 */
68 		err = "Trapped MCR or MRC access with (coproc==0b1111) that "
69 		      "is not reported using EC 0b000000";
70 		break;
71 	case 0b000100: /* 0x04 */
72 		err = "Trapped MCRR or MRRC access with (coproc==0b1111) "
73 		      "that is not reported using EC 0b000000";
74 		break;
75 	case 0b000101: /* 0x05 */
76 		err = "Trapped MCR or MRC access with (coproc==0b1110)";
77 		break;
78 	case 0b000110: /* 0x06 */
79 		err = "Trapped LDC or STC access";
80 		break;
81 	case 0b000111: /* 0x07 */
82 		err = "Trapped access to SVE, Advanced SIMD, or "
83 		      "floating-point functionality";
84 		break;
85 	case 0b001100: /* 0x0c */
86 		err = "Trapped MRRC access with (coproc==0b1110)";
87 		break;
88 	case 0b001101: /* 0x0d */
89 		err = "Branch Target Exception";
90 		break;
91 	case 0b001110: /* 0x0e */
92 		err = "Illegal Execution state";
93 		break;
94 	case 0b010001: /* 0x11 */
95 		err = "SVC instruction execution in AArch32 state";
96 		break;
97 	case 0b011000: /* 0x18 */
98 		err = "Trapped MSR, MRS or System instruction execution in "
99 		      "AArch64 state, that is not reported using EC "
100 		      "0b000000, 0b000001 or 0b000111";
101 		break;
102 	case 0b011001: /* 0x19 */
103 		err = "Trapped access to SVE functionality";
104 		break;
105 	case 0b100000: /* 0x20 */
106 		*dump_far = true;
107 		err = "Instruction Abort from a lower Exception level, that "
108 		      "might be using AArch32 or AArch64";
109 		break;
110 	case 0b100001: /* 0x21 */
111 		*dump_far = true;
112 		err = "Instruction Abort taken without a change in Exception "
113 		      "level.";
114 		break;
115 	case 0b100010: /* 0x22 */
116 		*dump_far = true;
117 		err = "PC alignment fault exception.";
118 		break;
119 	case 0b100100: /* 0x24 */
120 		*dump_far = true;
121 		err = "Data Abort from a lower Exception level, that might "
122 		      "be using AArch32 or AArch64";
123 		break;
124 	case 0b100101: /* 0x25 */
125 		*dump_far = true;
126 		err = "Data Abort taken without a change in Exception level";
127 		break;
128 	case 0b100110: /* 0x26 */
129 		err = "SP alignment fault exception";
130 		break;
131 	case 0b101000: /* 0x28 */
132 		err = "Trapped floating-point exception taken from AArch32 "
133 		      "state";
134 		break;
135 	case 0b101100: /* 0x2c */
136 		err = "Trapped floating-point exception taken from AArch64 "
137 		      "state.";
138 		break;
139 	case 0b101111: /* 0x2f */
140 		err = "SError interrupt";
141 		break;
142 	case 0b110000: /* 0x30 */
143 		err = "Breakpoint exception from a lower Exception level, "
144 		      "that might be using AArch32 or AArch64";
145 		break;
146 	case 0b110001: /* 0x31 */
147 		err = "Breakpoint exception taken without a change in "
148 		      "Exception level";
149 		break;
150 	case 0b110010: /* 0x32 */
151 		err = "Software Step exception from a lower Exception level, "
152 		      "that might be using AArch32 or AArch64";
153 		break;
154 	case 0b110011: /* 0x33 */
155 		err = "Software Step exception taken without a change in "
156 		      "Exception level";
157 		break;
158 	case 0b110100: /* 0x34 */
159 		*dump_far = true;
160 		err = "Watchpoint exception from a lower Exception level, "
161 		      "that might be using AArch32 or AArch64";
162 		break;
163 	case 0b110101: /* 0x35 */
164 		*dump_far = true;
165 		err = "Watchpoint exception taken without a change in "
166 		      "Exception level.";
167 		break;
168 	case 0b111000: /* 0x38 */
169 		err = "BKPT instruction execution in AArch32 state";
170 		break;
171 	case 0b111100: /* 0x3c */
172 		err = "BRK instruction execution in AArch64 state.";
173 		break;
174 	default:
175 		err = "Unknown";
176 	}
177 
178 	LOG_ERR("ESR_ELn: 0x%016llx", esr);
179 	LOG_ERR("  EC:  0x%llx (%s)", GET_ESR_EC(esr), err);
180 	LOG_ERR("  IL:  0x%llx", GET_ESR_IL(esr));
181 	LOG_ERR("  ISS: 0x%llx", GET_ESR_ISS(esr));
182 }
183 
esf_dump(const struct arch_esf * esf)184 static void esf_dump(const struct arch_esf *esf)
185 {
186 	LOG_ERR("x0:  0x%016llx  x1:  0x%016llx", esf->x0, esf->x1);
187 	LOG_ERR("x2:  0x%016llx  x3:  0x%016llx", esf->x2, esf->x3);
188 	LOG_ERR("x4:  0x%016llx  x5:  0x%016llx", esf->x4, esf->x5);
189 	LOG_ERR("x6:  0x%016llx  x7:  0x%016llx", esf->x6, esf->x7);
190 	LOG_ERR("x8:  0x%016llx  x9:  0x%016llx", esf->x8, esf->x9);
191 	LOG_ERR("x10: 0x%016llx  x11: 0x%016llx", esf->x10, esf->x11);
192 	LOG_ERR("x12: 0x%016llx  x13: 0x%016llx", esf->x12, esf->x13);
193 	LOG_ERR("x14: 0x%016llx  x15: 0x%016llx", esf->x14, esf->x15);
194 	LOG_ERR("x16: 0x%016llx  x17: 0x%016llx", esf->x16, esf->x17);
195 	LOG_ERR("x18: 0x%016llx  lr:  0x%016llx", esf->x18, esf->lr);
196 }
197 
198 #ifdef CONFIG_EXCEPTION_STACK_TRACE
esf_unwind(const struct arch_esf * esf)199 static void esf_unwind(const struct arch_esf *esf)
200 {
201 	/*
202 	 * For GCC:
203 	 *
204 	 *  ^  +-----------------+
205 	 *  |  |                 |
206 	 *  |  |                 |
207 	 *  |  |                 |
208 	 *  |  |                 |
209 	 *  |  | function stack  |
210 	 *  |  |                 |
211 	 *  |  |                 |
212 	 *  |  |                 |
213 	 *  |  |                 |
214 	 *  |  +-----------------+
215 	 *  |  |       LR        |
216 	 *  |  +-----------------+
217 	 *  |  |   previous FP   | <---+ FP
218 	 *  +  +-----------------+
219 	 */
220 
221 	uint64_t *fp = (uint64_t *) esf->fp;
222 	unsigned int count = 0;
223 	uint64_t lr;
224 
225 	LOG_ERR("");
226 	for (int i = 0; (fp != NULL) && (i < CONFIG_EXCEPTION_STACK_TRACE_MAX_FRAMES); i++) {
227 		lr = fp[1];
228 #ifdef CONFIG_SYMTAB
229 		uint32_t offset = 0;
230 		const char *name = symtab_find_symbol_name(lr, &offset);
231 
232 		LOG_ERR("backtrace %2d: fp: 0x%016llx lr: 0x%016llx [%s+0x%x]",
233 			 count++, (uint64_t) fp, lr, name, offset);
234 #else
235 		LOG_ERR("backtrace %2d: fp: 0x%016llx lr: 0x%016llx",
236 			 count++, (uint64_t) fp, lr);
237 #endif
238 		fp = (uint64_t *) fp[0];
239 	}
240 	LOG_ERR("");
241 }
242 #endif
243 
244 #endif /* CONFIG_EXCEPTION_DEBUG */
245 
246 #ifdef CONFIG_ARM64_STACK_PROTECTION
z_arm64_stack_corruption_check(struct arch_esf * esf,uint64_t esr,uint64_t far)247 static bool z_arm64_stack_corruption_check(struct arch_esf *esf, uint64_t esr, uint64_t far)
248 {
249 	uint64_t sp, sp_limit, guard_start;
250 	/* 0x25 means data abort from current EL */
251 	if (GET_ESR_EC(esr) == 0x25) {
252 		sp_limit = arch_curr_cpu()->arch.current_stack_limit;
253 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
254 		sp = arch_curr_cpu()->arch.corrupted_sp;
255 		if ((sp != 0 && sp <= sp_limit) || (guard_start <= far && far <= sp_limit)) {
256 #ifdef CONFIG_FPU_SHARING
257 			/*
258 			 * We are in exception stack, and now we are sure the stack does overflow,
259 			 * so flush the fpu context to its owner, and then set no fpu trap to avoid
260 			 * a new nested exception triggered by FPU accessing (var_args).
261 			 */
262 			arch_flush_local_fpu();
263 			write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
264 #endif
265 			arch_curr_cpu()->arch.corrupted_sp = 0UL;
266 			LOG_ERR("STACK OVERFLOW FROM KERNEL, SP: 0x%llx OR FAR: 0x%llx INVALID,"
267 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
268 			return true;
269 		}
270 	}
271 #ifdef CONFIG_USERSPACE
272 	else if ((_current->base.user_options & K_USER) != 0 && GET_ESR_EC(esr) == 0x24) {
273 		sp_limit = (uint64_t)_current->stack_info.start;
274 		guard_start = sp_limit - Z_ARM64_STACK_GUARD_SIZE;
275 		sp = esf->sp;
276 		if (sp <= sp_limit || (guard_start <= far && far <= sp_limit)) {
277 			LOG_ERR("STACK OVERFLOW FROM USERSPACE, SP: 0x%llx OR FAR: 0x%llx INVALID,"
278 				" SP LIMIT: 0x%llx", sp, far, sp_limit);
279 			return true;
280 		}
281 	}
282 #endif
283 	return false;
284 }
285 #endif
286 
is_recoverable(struct arch_esf * esf,uint64_t esr,uint64_t far,uint64_t elr)287 static bool is_recoverable(struct arch_esf *esf, uint64_t esr, uint64_t far,
288 			   uint64_t elr)
289 {
290 	if (!esf)
291 		return false;
292 
293 #ifdef CONFIG_USERSPACE
294 	for (int i = 0; i < ARRAY_SIZE(exceptions); i++) {
295 		/* Mask out instruction mode */
296 		uint64_t start = (uint64_t)exceptions[i].start;
297 		uint64_t end = (uint64_t)exceptions[i].end;
298 
299 		if (esf->elr >= start && esf->elr < end) {
300 			esf->elr = (uint64_t)(exceptions[i].fixup);
301 			return true;
302 		}
303 	}
304 #endif
305 
306 	return false;
307 }
308 
z_arm64_fatal_error(unsigned int reason,struct arch_esf * esf)309 void z_arm64_fatal_error(unsigned int reason, struct arch_esf *esf)
310 {
311 	uint64_t esr = 0;
312 	uint64_t elr = 0;
313 	uint64_t far = 0;
314 	uint64_t el;
315 
316 	if (reason != K_ERR_SPURIOUS_IRQ) {
317 		el = read_currentel();
318 
319 		switch (GET_EL(el)) {
320 		case MODE_EL1:
321 			esr = read_esr_el1();
322 			far = read_far_el1();
323 			elr = read_elr_el1();
324 			break;
325 #if !defined(CONFIG_ARMV8_R)
326 		case MODE_EL3:
327 			esr = read_esr_el3();
328 			far = read_far_el3();
329 			elr = read_elr_el3();
330 			break;
331 #endif /* CONFIG_ARMV8_R */
332 		}
333 
334 #ifdef CONFIG_ARM64_STACK_PROTECTION
335 		if (z_arm64_stack_corruption_check(esf, esr, far)) {
336 			reason = K_ERR_STACK_CHK_FAIL;
337 		}
338 #endif
339 
340 		if (GET_EL(el) != MODE_EL0) {
341 #ifdef CONFIG_EXCEPTION_DEBUG
342 			bool dump_far = false;
343 
344 			LOG_ERR("ELR_ELn: 0x%016llx", elr);
345 
346 			dump_esr(esr, &dump_far);
347 
348 			if (dump_far)
349 				LOG_ERR("FAR_ELn: 0x%016llx", far);
350 
351 			LOG_ERR("TPIDRRO: 0x%016llx", read_tpidrro_el0());
352 #endif /* CONFIG_EXCEPTION_DEBUG */
353 
354 			if (is_recoverable(esf, esr, far, elr) &&
355 			    reason != K_ERR_STACK_CHK_FAIL) {
356 				return;
357 			}
358 		}
359 	}
360 
361 #ifdef CONFIG_EXCEPTION_DEBUG
362 	if (esf != NULL) {
363 		esf_dump(esf);
364 	}
365 
366 #ifdef CONFIG_EXCEPTION_STACK_TRACE
367 	esf_unwind(esf);
368 #endif /* CONFIG_EXCEPTION_STACK_TRACE */
369 #endif /* CONFIG_EXCEPTION_DEBUG */
370 
371 	z_fatal_error(reason, esf);
372 
373 	CODE_UNREACHABLE;
374 }
375 
376 /**
377  * @brief Handle a software-generated fatal exception
378  * (e.g. kernel oops, panic, etc.).
379  *
380  * @param esf exception frame
381  */
z_arm64_do_kernel_oops(struct arch_esf * esf)382 void z_arm64_do_kernel_oops(struct arch_esf *esf)
383 {
384 	/* x8 holds the exception reason */
385 	unsigned int reason = esf->x8;
386 
387 #if defined(CONFIG_USERSPACE)
388 	/*
389 	 * User mode is only allowed to induce oopses and stack check
390 	 * failures via software-triggered system fatal exceptions.
391 	 */
392 	if (((_current->base.user_options & K_USER) != 0) &&
393 		reason != K_ERR_STACK_CHK_FAIL) {
394 		reason = K_ERR_KERNEL_OOPS;
395 	}
396 #endif
397 
398 	z_arm64_fatal_error(reason, esf);
399 }
400 
401 #ifdef CONFIG_USERSPACE
arch_syscall_oops(void * ssf_ptr)402 FUNC_NORETURN void arch_syscall_oops(void *ssf_ptr)
403 {
404 	z_arm64_fatal_error(K_ERR_KERNEL_OOPS, ssf_ptr);
405 	CODE_UNREACHABLE;
406 }
407 #endif
408 
409 #if defined(CONFIG_PM_CPU_OPS_PSCI)
arch_system_halt(unsigned int reason)410 FUNC_NORETURN void arch_system_halt(unsigned int reason)
411 {
412 	ARG_UNUSED(reason);
413 
414 	(void)arch_irq_lock();
415 
416 #ifdef CONFIG_POWEROFF
417 	sys_poweroff();
418 #endif /* CONFIG_POWEROFF */
419 
420 	for (;;) {
421 		/* Spin endlessly as fallback */
422 	}
423 }
424 #endif
425