1 /*
2  * Copyright (c) 2018 Intel Corporation
3  * Copyright (c) 2024 Espressif Systems (Shanghai) Co., Ltd.
4  *
5  * SPDX-License-Identifier: Apache-2.0
6  */
7 
8 #include <zephyr/device.h>
9 #include <zephyr/kernel.h>
10 #include <zephyr/spinlock.h>
11 #include <zephyr/kernel_structs.h>
12 #include <zephyr/storage/flash_map.h>
13 #include <zephyr/drivers/interrupt_controller/intc_esp32.h>
14 
15 #include <soc.h>
16 #include <esp_cpu.h>
17 #include "esp_rom_uart.h"
18 
19 #include "esp_mcuboot_image.h"
20 #include "esp_memory_utils.h"
21 
22 #ifdef CONFIG_SMP
23 
24 #include <ipi.h>
25 
26 #ifndef CONFIG_SOC_ESP32_PROCPU
27 static struct k_spinlock loglock;
28 #endif
29 
30 struct cpustart_rec {
31 	int cpu;
32 	arch_cpustart_t fn;
33 	char *stack_top;
34 	void *arg;
35 	int vecbase;
36 	volatile int *alive;
37 };
38 
39 volatile struct cpustart_rec *start_rec;
40 static void *appcpu_top;
41 static bool cpus_active[CONFIG_MP_MAX_NUM_CPUS];
42 static struct k_spinlock loglock;
43 
44 /* Note that the logging done here is ACTUALLY REQUIRED FOR RELIABLE
45  * OPERATION!  At least one particular board will experience spurious
46  * hangs during initialization (usually the APPCPU fails to start at
47  * all) without these calls present.  It's not just time -- careful
48  * use of k_busy_wait() (and even hand-crafted timer loops using the
49  * Xtensa timer SRs directly) that duplicates the timing exactly still
50  * sees hangs.  Something is happening inside the ROM UART code that
51  * magically makes the startup sequence reliable.
52  *
53  * Leave this in place until the sequence is understood better.
54  *
55  * (Note that the use of the spinlock is cosmetic only -- if you take
56  * it out the messages will interleave across the two CPUs but startup
57  * will still be reliable.)
58  */
smp_log(const char * msg)59 void smp_log(const char *msg)
60 {
61 	k_spinlock_key_t key = k_spin_lock(&loglock);
62 
63 	while (*msg) {
64 		esp_rom_uart_tx_one_char(*msg++);
65 	}
66 	esp_rom_uart_tx_one_char('\r');
67 	esp_rom_uart_tx_one_char('\n');
68 
69 	k_spin_unlock(&loglock, key);
70 }
71 
appcpu_entry2(void)72 static void appcpu_entry2(void)
73 {
74 	volatile int ps, ie;
75 
76 	/* Copy over VECBASE from the main CPU for an initial value
77 	 * (will need to revisit this if we ever allow a user API to
78 	 * change interrupt vectors at runtime).  Make sure interrupts
79 	 * are locally disabled, then synthesize a PS value that will
80 	 * enable them for the user code to pass to irq_unlock()
81 	 * later.
82 	 */
83 	__asm__ volatile("rsr.PS %0" : "=r"(ps));
84 	ps &= ~(XCHAL_PS_EXCM_MASK | XCHAL_PS_INTLEVEL_MASK);
85 	__asm__ volatile("wsr.PS %0" : : "r"(ps));
86 
87 	ie = 0;
88 	__asm__ volatile("wsr.INTENABLE %0" : : "r"(ie));
89 	__asm__ volatile("wsr.VECBASE %0" : : "r"(start_rec->vecbase));
90 	__asm__ volatile("rsync");
91 
92 	/* Set up the CPU pointer.  Really this should be xtensa arch
93 	 * code, not in the ESP-32 layer
94 	 */
95 	_cpu_t *cpu = &_kernel.cpus[1];
96 
97 	__asm__ volatile("wsr.MISC0 %0" : : "r"(cpu));
98 
99 	smp_log("ESP32: APPCPU running");
100 
101 	*start_rec->alive = 1;
102 	start_rec->fn(start_rec->arg);
103 }
104 
105 /* Defines a locally callable "function" named _stack-switch().  The
106  * first argument (in register a2 post-ENTRY) is the new stack pointer
107  * to go into register a1.  The second (a3) is the entry point.
108  * Because this never returns, a0 is used as a scratch register then
109  * set to zero for the called function (a null return value is the
110  * signal for "top of stack" to the debugger).
111  */
112 void z_appcpu_stack_switch(void *stack, void *entry);
113 __asm__("\n"
114 	".align 4"		"\n"
115 	"z_appcpu_stack_switch:"	"\n\t"
116 
117 	"entry a1, 16"		"\n\t"
118 
119 	/* Subtle: we want the stack to be 16 bytes higher than the
120 	 * top on entry to the called function, because the ABI forces
121 	 * it to assume that those bytes are for its caller's A0-A3
122 	 * spill area.  (In fact ENTRY instructions with stack
123 	 * adjustments less than 16 are a warning condition in the
124 	 * assembler). But we aren't a caller, have no bit set in
125 	 * WINDOWSTART and will never be asked to spill anything.
126 	 * Those 16 bytes would otherwise be wasted on the stack, so
127 	 * adjust
128 	 */
129 	"addi a1, a2, 16"	"\n\t"
130 
131 	/* Clear WINDOWSTART so called functions never try to spill
132 	 * our callers' registers into the now-garbage stack pointers
133 	 * they contain.  No need to set the bit corresponding to
134 	 * WINDOWBASE, our C callee will do that when it does an
135 	 * ENTRY.
136 	 */
137 	"movi a0, 0"		"\n\t"
138 	"wsr.WINDOWSTART a0"	"\n\t"
139 
140 	/* Clear CALLINC field of PS (you would think it would, but
141 	 * our ENTRY doesn't actually do that) so the callee's ENTRY
142 	 * doesn't shift the registers
143 	 */
144 	"rsr.PS a0"		"\n\t"
145 	"movi a2, 0xfffcffff"	"\n\t"
146 	"and a0, a0, a2"	"\n\t"
147 	"wsr.PS a0"		"\n\t"
148 
149 	"rsync"			"\n\t"
150 	"movi a0, 0"		"\n\t"
151 
152 	"jx a3"			"\n\t");
153 
154 /* Carefully constructed to use no stack beyond compiler-generated ABI
155  * instructions.  WE DO NOT KNOW WHERE THE STACK FOR THIS FUNCTION IS.
156  * The ROM library just picks a spot on its own with no input from our
157  * app linkage and tells us nothing about it until we're already
158  * running.
159  */
appcpu_entry1(void)160 static void appcpu_entry1(void)
161 {
162 	z_appcpu_stack_switch(appcpu_top, appcpu_entry2);
163 }
164 
165 /* The calls and sequencing here were extracted from the ESP-32
166  * FreeRTOS integration with just a tiny bit of cleanup.  None of the
167  * calls or registers shown are documented, so treat this code with
168  * extreme caution.
169  */
esp_appcpu_start(void * entry_point)170 void esp_appcpu_start(void *entry_point)
171 {
172 	ets_printf("ESP32: starting APPCPU");
173 
174 	/* These two calls are wrapped in a "stall_other_cpu" API in
175 	 * esp-idf.  But in this context the appcpu is stalled by
176 	 * definition, so we can skip that complexity and just call
177 	 * the ROM directly.
178 	 */
179 	esp_rom_Cache_Flush(1);
180 	esp_rom_Cache_Read_Enable(1);
181 
182 	esp_rom_ets_set_appcpu_boot_addr((void *)0);
183 
184 	DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_B_REG, DPORT_APPCPU_CLKGATE_EN);
185 	DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_C_REG, DPORT_APPCPU_RUNSTALL);
186 	DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
187 	DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
188 
189 	/* extracted from SMP LOG above, THIS IS REQUIRED FOR AMP RELIABLE
190 	 * OPERATION AS WELL, PLEASE DON'T touch on the dummy write below!
191 	 *
192 	 * Note that the logging done here is ACTUALLY REQUIRED FOR RELIABLE
193 	 * OPERATION!  At least one particular board will experience spurious
194 	 * hangs during initialization (usually the APPCPU fails to start at
195 	 * all) without these calls present.  It's not just time -- careful
196 	 * use of k_busy_wait() (and even hand-crafted timer loops using the
197 	 * Xtensa timer SRs directly) that duplicates the timing exactly still
198 	 * sees hangs.  Something is happening inside the ROM UART code that
199 	 * magically makes the startup sequence reliable.
200 	 *
201 	 * Leave this in place until the sequence is understood better.
202 	 *
203 	 */
204 	esp_rom_uart_tx_one_char('\r');
205 	esp_rom_uart_tx_one_char('\r');
206 	esp_rom_uart_tx_one_char('\n');
207 
208 	/* Seems weird that you set the boot address AFTER starting
209 	 * the CPU, but this is how they do it...
210 	 */
211 	esp_rom_ets_set_appcpu_boot_addr((void *)entry_point);
212 
213 	ets_printf("ESP32: APPCPU start sequence complete");
214 }
215 
esp_crosscore_isr(void * arg)216 IRAM_ATTR static void esp_crosscore_isr(void *arg)
217 {
218 	ARG_UNUSED(arg);
219 
220 	/* Right now this interrupt is only used for IPIs */
221 	z_sched_ipi();
222 
223 	const int core_id = esp_core_id();
224 
225 	if (core_id == 0) {
226 		DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, 0);
227 	} else {
228 		DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_1_REG, 0);
229 	}
230 }
231 
arch_cpu_start(int cpu_num,k_thread_stack_t * stack,int sz,arch_cpustart_t fn,void * arg)232 void arch_cpu_start(int cpu_num, k_thread_stack_t *stack, int sz,
233 		    arch_cpustart_t fn, void *arg)
234 {
235 	volatile struct cpustart_rec sr;
236 	int vb;
237 	volatile int alive_flag;
238 
239 	__ASSERT(cpu_num == 1, "ESP-32 supports only two CPUs");
240 
241 	__asm__ volatile("rsr.VECBASE %0\n\t" : "=r"(vb));
242 
243 	alive_flag = 0;
244 
245 	sr.cpu = cpu_num;
246 	sr.fn = fn;
247 	sr.stack_top = K_KERNEL_STACK_BUFFER(stack) + sz;
248 	sr.arg = arg;
249 	sr.vecbase = vb;
250 	sr.alive = &alive_flag;
251 
252 	appcpu_top = K_KERNEL_STACK_BUFFER(stack) + sz;
253 
254 	start_rec = &sr;
255 
256 	esp_appcpu_start(appcpu_entry1);
257 
258 	while (!alive_flag) {
259 	}
260 
261 	cpus_active[0] = true;
262 	cpus_active[cpu_num] = true;
263 
264 	esp_intr_alloc(DT_IRQ_BY_IDX(DT_NODELABEL(ipi0), 0, irq),
265 		ESP_PRIO_TO_FLAGS(DT_IRQ_BY_IDX(DT_NODELABEL(ipi0), 0, priority)) |
266 		ESP_INT_FLAGS_CHECK(DT_IRQ_BY_IDX(DT_NODELABEL(ipi0), 0, flags)) |
267 			ESP_INTR_FLAG_IRAM,
268 		esp_crosscore_isr,
269 		NULL,
270 		NULL);
271 
272 	esp_intr_alloc(DT_IRQ_BY_IDX(DT_NODELABEL(ipi1), 0, irq),
273 		ESP_PRIO_TO_FLAGS(DT_IRQ_BY_IDX(DT_NODELABEL(ipi1), 0, priority)) |
274 		ESP_INT_FLAGS_CHECK(DT_IRQ_BY_IDX(DT_NODELABEL(ipi1), 0, flags)) |
275 			ESP_INTR_FLAG_IRAM,
276 		esp_crosscore_isr,
277 		NULL,
278 		NULL);
279 
280 	smp_log("ESP32: APPCPU initialized");
281 }
282 
arch_sched_directed_ipi(uint32_t cpu_bitmap)283 void arch_sched_directed_ipi(uint32_t cpu_bitmap)
284 {
285 	const int core_id = esp_core_id();
286 
287 	ARG_UNUSED(cpu_bitmap);
288 
289 	if (core_id == 0) {
290 		DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0);
291 	} else {
292 		DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_1_REG, DPORT_CPU_INTR_FROM_CPU_1);
293 	}
294 }
295 
arch_sched_broadcast_ipi(void)296 void arch_sched_broadcast_ipi(void)
297 {
298 	arch_sched_directed_ipi(IPI_ALL_CPUS_MASK);
299 }
300 
arch_cpu_active(int cpu_num)301 IRAM_ATTR bool arch_cpu_active(int cpu_num)
302 {
303 	return cpus_active[cpu_num];
304 }
305 #endif /* CONFIG_SMP */
306 
esp_appcpu_start2(void * entry_point)307 void esp_appcpu_start2(void *entry_point)
308 {
309 	esp_cpu_unstall(1);
310 
311 	if (!DPORT_GET_PERI_REG_MASK(DPORT_APPCPU_CTRL_B_REG, DPORT_APPCPU_CLKGATE_EN)) {
312 		DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_B_REG, DPORT_APPCPU_CLKGATE_EN);
313 		DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_C_REG, DPORT_APPCPU_RUNSTALL);
314 		DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
315 		DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
316 	}
317 
318 	esp_rom_ets_set_appcpu_boot_addr((void *)entry_point);
319 
320 	esp_cpu_reset(1);
321 }
322 
323 /* AMP support */
324 #ifdef CONFIG_SOC_ENABLE_APPCPU
325 
326 #include "bootloader_flash_priv.h"
327 
328 #define sys_mmap   bootloader_mmap
329 #define sys_munmap bootloader_munmap
330 
load_segment(uint32_t src_addr,uint32_t src_len,uint32_t dst_addr)331 static int load_segment(uint32_t src_addr, uint32_t src_len, uint32_t dst_addr)
332 {
333 	const uint32_t *data = (const uint32_t *)sys_mmap(src_addr, src_len);
334 
335 	if (!data) {
336 		ets_printf("%s: mmap failed", __func__);
337 		return -1;
338 	}
339 
340 	volatile uint32_t *dst = (volatile uint32_t *)dst_addr;
341 
342 	for (int i = 0; i < src_len / 4; i++) {
343 		dst[i] = data[i];
344 	}
345 
346 	sys_munmap(data);
347 
348 	return 0;
349 }
350 
esp_appcpu_image_load(unsigned int hdr_offset,unsigned int * entry_addr)351 int IRAM_ATTR esp_appcpu_image_load(unsigned int hdr_offset, unsigned int *entry_addr)
352 {
353 	const uint32_t img_off = FIXED_PARTITION_OFFSET(slot0_appcpu_partition);
354 	const uint32_t fa_size = FIXED_PARTITION_SIZE(slot0_appcpu_partition);
355 	const uint8_t fa_id = FIXED_PARTITION_ID(slot0_appcpu_partition);
356 
357 	if (entry_addr == NULL) {
358 		ets_printf("Can't return the entry address. Aborting!\n");
359 		abort();
360 		return -1;
361 	}
362 
363 	uint32_t mcuboot_header[8] = {0};
364 	esp_image_load_header_t image_header = {0};
365 
366 	const uint32_t *data = (const uint32_t *)sys_mmap(img_off, 0x40);
367 
368 	memcpy((void *)&mcuboot_header, data, sizeof(mcuboot_header));
369 	memcpy((void *)&image_header, data + (hdr_offset / sizeof(uint32_t)),
370 	       sizeof(esp_image_load_header_t));
371 
372 	sys_munmap(data);
373 
374 	if (image_header.header_magic == ESP_LOAD_HEADER_MAGIC) {
375 		ets_printf("APPCPU image, area id: %d, offset: 0x%x, hdr.off: 0x%x, size: %d kB\n",
376 			   fa_id, img_off, hdr_offset, fa_size / 1024);
377 	} else if ((image_header.header_magic & 0xff) == 0xE9) {
378 		ets_printf("ESP image format is not supported\n");
379 		abort();
380 	} else {
381 		ets_printf("Unknown or empty image detected. Aborting!\n");
382 		abort();
383 	}
384 
385 	if (!esp_ptr_in_iram((void *)image_header.iram_dest_addr) ||
386 	    !esp_ptr_in_iram((void *)(image_header.iram_dest_addr + image_header.iram_size))) {
387 		ets_printf("IRAM region in load header is not valid. Aborting");
388 		abort();
389 	}
390 
391 	if (!esp_ptr_in_dram((void *)image_header.dram_dest_addr) ||
392 	    !esp_ptr_in_dram((void *)(image_header.dram_dest_addr + image_header.dram_size))) {
393 		ets_printf("DRAM region in load header is not valid. Aborting");
394 		abort();
395 	}
396 
397 	if (!esp_ptr_in_iram((void *)image_header.entry_addr)) {
398 		ets_printf("Application entry point (%xh) is not in IRAM. Aborting",
399 			   image_header.entry_addr);
400 		abort();
401 	}
402 
403 	ets_printf("IRAM segment: paddr=%08xh, vaddr=%08xh, size=%05xh (%6d) load\n",
404 		   (img_off + image_header.iram_flash_offset), image_header.iram_dest_addr,
405 		   image_header.iram_size, image_header.iram_size);
406 
407 	load_segment(img_off + image_header.iram_flash_offset, image_header.iram_size,
408 		     image_header.iram_dest_addr);
409 
410 	ets_printf("DRAM segment: paddr=%08xh, vaddr=%08xh, size=%05xh (%6d) load\n",
411 		   (img_off + image_header.dram_flash_offset), image_header.dram_dest_addr,
412 		   image_header.dram_size, image_header.dram_size);
413 
414 	load_segment(img_off + image_header.dram_flash_offset, image_header.dram_size,
415 		     image_header.dram_dest_addr);
416 
417 	ets_printf("Application start=%xh\n\n", image_header.entry_addr);
418 	esp_rom_uart_tx_wait_idle(0);
419 
420 	assert(entry_addr != NULL);
421 	*entry_addr = image_header.entry_addr;
422 
423 	return 0;
424 }
425 
esp_appcpu_image_stop(void)426 void esp_appcpu_image_stop(void)
427 {
428 	esp_cpu_stall(1);
429 }
430 
esp_appcpu_image_start(unsigned int hdr_offset)431 void esp_appcpu_image_start(unsigned int hdr_offset)
432 {
433 	static int started;
434 	unsigned int entry_addr = 0;
435 
436 	if (started) {
437 		printk("APPCPU already started.\r\n");
438 		return;
439 	}
440 
441 	/* Input image meta header, output appcpu entry point */
442 	esp_appcpu_image_load(hdr_offset, &entry_addr);
443 
444 	esp_appcpu_start2((void *)entry_addr);
445 }
446 
esp_appcpu_init(void)447 int esp_appcpu_init(void)
448 {
449 	/* Load APPCPU image using image header offset
450 	 * (skipping the MCUBoot header)
451 	 */
452 	esp_appcpu_image_start(0x20);
453 
454 	return 0;
455 }
456 #endif /* CONFIG_SOC_ENABLE_APPCPU */
457