1 /*
2 * Copyright (c) 2018 Intel Corporation
3 * Copyright (c) 2024 Espressif Systems (Shanghai) Co., Ltd.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 */
7
8 #include <zephyr/device.h>
9 #include <zephyr/kernel.h>
10 #include <zephyr/spinlock.h>
11 #include <zephyr/kernel_structs.h>
12 #include <zephyr/storage/flash_map.h>
13 #include <zephyr/drivers/interrupt_controller/intc_esp32.h>
14
15 #include <soc.h>
16 #include <esp_cpu.h>
17 #include "esp_rom_uart.h"
18
19 #include "esp_mcuboot_image.h"
20 #include "esp_memory_utils.h"
21
22 #ifdef CONFIG_SMP
23
24 #include <ipi.h>
25
26 #ifndef CONFIG_SOC_ESP32_PROCPU
27 static struct k_spinlock loglock;
28 #endif
29
30 struct cpustart_rec {
31 int cpu;
32 arch_cpustart_t fn;
33 char *stack_top;
34 void *arg;
35 int vecbase;
36 volatile int *alive;
37 };
38
39 volatile struct cpustart_rec *start_rec;
40 static void *appcpu_top;
41 static bool cpus_active[CONFIG_MP_MAX_NUM_CPUS];
42 static struct k_spinlock loglock;
43
44 /* Note that the logging done here is ACTUALLY REQUIRED FOR RELIABLE
45 * OPERATION! At least one particular board will experience spurious
46 * hangs during initialization (usually the APPCPU fails to start at
47 * all) without these calls present. It's not just time -- careful
48 * use of k_busy_wait() (and even hand-crafted timer loops using the
49 * Xtensa timer SRs directly) that duplicates the timing exactly still
50 * sees hangs. Something is happening inside the ROM UART code that
51 * magically makes the startup sequence reliable.
52 *
53 * Leave this in place until the sequence is understood better.
54 *
55 * (Note that the use of the spinlock is cosmetic only -- if you take
56 * it out the messages will interleave across the two CPUs but startup
57 * will still be reliable.)
58 */
smp_log(const char * msg)59 void smp_log(const char *msg)
60 {
61 k_spinlock_key_t key = k_spin_lock(&loglock);
62
63 while (*msg) {
64 esp_rom_uart_tx_one_char(*msg++);
65 }
66 esp_rom_uart_tx_one_char('\r');
67 esp_rom_uart_tx_one_char('\n');
68
69 k_spin_unlock(&loglock, key);
70 }
71
appcpu_entry2(void)72 static void appcpu_entry2(void)
73 {
74 volatile int ps, ie;
75
76 /* Copy over VECBASE from the main CPU for an initial value
77 * (will need to revisit this if we ever allow a user API to
78 * change interrupt vectors at runtime). Make sure interrupts
79 * are locally disabled, then synthesize a PS value that will
80 * enable them for the user code to pass to irq_unlock()
81 * later.
82 */
83 __asm__ volatile("rsr.PS %0" : "=r"(ps));
84 ps &= ~(XCHAL_PS_EXCM_MASK | XCHAL_PS_INTLEVEL_MASK);
85 __asm__ volatile("wsr.PS %0" : : "r"(ps));
86
87 ie = 0;
88 __asm__ volatile("wsr.INTENABLE %0" : : "r"(ie));
89 __asm__ volatile("wsr.VECBASE %0" : : "r"(start_rec->vecbase));
90 __asm__ volatile("rsync");
91
92 /* Set up the CPU pointer. Really this should be xtensa arch
93 * code, not in the ESP-32 layer
94 */
95 _cpu_t *cpu = &_kernel.cpus[1];
96
97 __asm__ volatile("wsr.MISC0 %0" : : "r"(cpu));
98
99 smp_log("ESP32: APPCPU running");
100
101 *start_rec->alive = 1;
102 start_rec->fn(start_rec->arg);
103 }
104
105 /* Defines a locally callable "function" named _stack-switch(). The
106 * first argument (in register a2 post-ENTRY) is the new stack pointer
107 * to go into register a1. The second (a3) is the entry point.
108 * Because this never returns, a0 is used as a scratch register then
109 * set to zero for the called function (a null return value is the
110 * signal for "top of stack" to the debugger).
111 */
112 void z_appcpu_stack_switch(void *stack, void *entry);
113 __asm__("\n"
114 ".align 4" "\n"
115 "z_appcpu_stack_switch:" "\n\t"
116
117 "entry a1, 16" "\n\t"
118
119 /* Subtle: we want the stack to be 16 bytes higher than the
120 * top on entry to the called function, because the ABI forces
121 * it to assume that those bytes are for its caller's A0-A3
122 * spill area. (In fact ENTRY instructions with stack
123 * adjustments less than 16 are a warning condition in the
124 * assembler). But we aren't a caller, have no bit set in
125 * WINDOWSTART and will never be asked to spill anything.
126 * Those 16 bytes would otherwise be wasted on the stack, so
127 * adjust
128 */
129 "addi a1, a2, 16" "\n\t"
130
131 /* Clear WINDOWSTART so called functions never try to spill
132 * our callers' registers into the now-garbage stack pointers
133 * they contain. No need to set the bit corresponding to
134 * WINDOWBASE, our C callee will do that when it does an
135 * ENTRY.
136 */
137 "movi a0, 0" "\n\t"
138 "wsr.WINDOWSTART a0" "\n\t"
139
140 /* Clear CALLINC field of PS (you would think it would, but
141 * our ENTRY doesn't actually do that) so the callee's ENTRY
142 * doesn't shift the registers
143 */
144 "rsr.PS a0" "\n\t"
145 "movi a2, 0xfffcffff" "\n\t"
146 "and a0, a0, a2" "\n\t"
147 "wsr.PS a0" "\n\t"
148
149 "rsync" "\n\t"
150 "movi a0, 0" "\n\t"
151
152 "jx a3" "\n\t");
153
154 /* Carefully constructed to use no stack beyond compiler-generated ABI
155 * instructions. WE DO NOT KNOW WHERE THE STACK FOR THIS FUNCTION IS.
156 * The ROM library just picks a spot on its own with no input from our
157 * app linkage and tells us nothing about it until we're already
158 * running.
159 */
appcpu_entry1(void)160 static void appcpu_entry1(void)
161 {
162 z_appcpu_stack_switch(appcpu_top, appcpu_entry2);
163 }
164
165 /* The calls and sequencing here were extracted from the ESP-32
166 * FreeRTOS integration with just a tiny bit of cleanup. None of the
167 * calls or registers shown are documented, so treat this code with
168 * extreme caution.
169 */
esp_appcpu_start(void * entry_point)170 void esp_appcpu_start(void *entry_point)
171 {
172 ets_printf("ESP32: starting APPCPU");
173
174 /* These two calls are wrapped in a "stall_other_cpu" API in
175 * esp-idf. But in this context the appcpu is stalled by
176 * definition, so we can skip that complexity and just call
177 * the ROM directly.
178 */
179 esp_rom_Cache_Flush(1);
180 esp_rom_Cache_Read_Enable(1);
181
182 esp_rom_ets_set_appcpu_boot_addr((void *)0);
183
184 DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_B_REG, DPORT_APPCPU_CLKGATE_EN);
185 DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_C_REG, DPORT_APPCPU_RUNSTALL);
186 DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
187 DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
188
189 /* extracted from SMP LOG above, THIS IS REQUIRED FOR AMP RELIABLE
190 * OPERATION AS WELL, PLEASE DON'T touch on the dummy write below!
191 *
192 * Note that the logging done here is ACTUALLY REQUIRED FOR RELIABLE
193 * OPERATION! At least one particular board will experience spurious
194 * hangs during initialization (usually the APPCPU fails to start at
195 * all) without these calls present. It's not just time -- careful
196 * use of k_busy_wait() (and even hand-crafted timer loops using the
197 * Xtensa timer SRs directly) that duplicates the timing exactly still
198 * sees hangs. Something is happening inside the ROM UART code that
199 * magically makes the startup sequence reliable.
200 *
201 * Leave this in place until the sequence is understood better.
202 *
203 */
204 esp_rom_uart_tx_one_char('\r');
205 esp_rom_uart_tx_one_char('\r');
206 esp_rom_uart_tx_one_char('\n');
207
208 /* Seems weird that you set the boot address AFTER starting
209 * the CPU, but this is how they do it...
210 */
211 esp_rom_ets_set_appcpu_boot_addr((void *)entry_point);
212
213 ets_printf("ESP32: APPCPU start sequence complete");
214 }
215
esp_crosscore_isr(void * arg)216 IRAM_ATTR static void esp_crosscore_isr(void *arg)
217 {
218 ARG_UNUSED(arg);
219
220 /* Right now this interrupt is only used for IPIs */
221 z_sched_ipi();
222
223 const int core_id = esp_core_id();
224
225 if (core_id == 0) {
226 DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, 0);
227 } else {
228 DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_1_REG, 0);
229 }
230 }
231
arch_cpu_start(int cpu_num,k_thread_stack_t * stack,int sz,arch_cpustart_t fn,void * arg)232 void arch_cpu_start(int cpu_num, k_thread_stack_t *stack, int sz,
233 arch_cpustart_t fn, void *arg)
234 {
235 volatile struct cpustart_rec sr;
236 int vb;
237 volatile int alive_flag;
238
239 __ASSERT(cpu_num == 1, "ESP-32 supports only two CPUs");
240
241 __asm__ volatile("rsr.VECBASE %0\n\t" : "=r"(vb));
242
243 alive_flag = 0;
244
245 sr.cpu = cpu_num;
246 sr.fn = fn;
247 sr.stack_top = K_KERNEL_STACK_BUFFER(stack) + sz;
248 sr.arg = arg;
249 sr.vecbase = vb;
250 sr.alive = &alive_flag;
251
252 appcpu_top = K_KERNEL_STACK_BUFFER(stack) + sz;
253
254 start_rec = &sr;
255
256 esp_appcpu_start(appcpu_entry1);
257
258 while (!alive_flag) {
259 }
260
261 cpus_active[0] = true;
262 cpus_active[cpu_num] = true;
263
264 esp_intr_alloc(DT_IRQ_BY_IDX(DT_NODELABEL(ipi0), 0, irq),
265 ESP_PRIO_TO_FLAGS(DT_IRQ_BY_IDX(DT_NODELABEL(ipi0), 0, priority)) |
266 ESP_INT_FLAGS_CHECK(DT_IRQ_BY_IDX(DT_NODELABEL(ipi0), 0, flags)) |
267 ESP_INTR_FLAG_IRAM,
268 esp_crosscore_isr,
269 NULL,
270 NULL);
271
272 esp_intr_alloc(DT_IRQ_BY_IDX(DT_NODELABEL(ipi1), 0, irq),
273 ESP_PRIO_TO_FLAGS(DT_IRQ_BY_IDX(DT_NODELABEL(ipi1), 0, priority)) |
274 ESP_INT_FLAGS_CHECK(DT_IRQ_BY_IDX(DT_NODELABEL(ipi1), 0, flags)) |
275 ESP_INTR_FLAG_IRAM,
276 esp_crosscore_isr,
277 NULL,
278 NULL);
279
280 smp_log("ESP32: APPCPU initialized");
281 }
282
arch_sched_directed_ipi(uint32_t cpu_bitmap)283 void arch_sched_directed_ipi(uint32_t cpu_bitmap)
284 {
285 const int core_id = esp_core_id();
286
287 ARG_UNUSED(cpu_bitmap);
288
289 if (core_id == 0) {
290 DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0);
291 } else {
292 DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_1_REG, DPORT_CPU_INTR_FROM_CPU_1);
293 }
294 }
295
arch_sched_broadcast_ipi(void)296 void arch_sched_broadcast_ipi(void)
297 {
298 arch_sched_directed_ipi(IPI_ALL_CPUS_MASK);
299 }
300
arch_cpu_active(int cpu_num)301 IRAM_ATTR bool arch_cpu_active(int cpu_num)
302 {
303 return cpus_active[cpu_num];
304 }
305 #endif /* CONFIG_SMP */
306
esp_appcpu_start2(void * entry_point)307 void esp_appcpu_start2(void *entry_point)
308 {
309 esp_cpu_unstall(1);
310
311 if (!DPORT_GET_PERI_REG_MASK(DPORT_APPCPU_CTRL_B_REG, DPORT_APPCPU_CLKGATE_EN)) {
312 DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_B_REG, DPORT_APPCPU_CLKGATE_EN);
313 DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_C_REG, DPORT_APPCPU_RUNSTALL);
314 DPORT_SET_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
315 DPORT_CLEAR_PERI_REG_MASK(DPORT_APPCPU_CTRL_A_REG, DPORT_APPCPU_RESETTING);
316 }
317
318 esp_rom_ets_set_appcpu_boot_addr((void *)entry_point);
319
320 esp_cpu_reset(1);
321 }
322
323 /* AMP support */
324 #ifdef CONFIG_SOC_ENABLE_APPCPU
325
326 #include "bootloader_flash_priv.h"
327
328 #define sys_mmap bootloader_mmap
329 #define sys_munmap bootloader_munmap
330
load_segment(uint32_t src_addr,uint32_t src_len,uint32_t dst_addr)331 static int load_segment(uint32_t src_addr, uint32_t src_len, uint32_t dst_addr)
332 {
333 const uint32_t *data = (const uint32_t *)sys_mmap(src_addr, src_len);
334
335 if (!data) {
336 ets_printf("%s: mmap failed", __func__);
337 return -1;
338 }
339
340 volatile uint32_t *dst = (volatile uint32_t *)dst_addr;
341
342 for (int i = 0; i < src_len / 4; i++) {
343 dst[i] = data[i];
344 }
345
346 sys_munmap(data);
347
348 return 0;
349 }
350
esp_appcpu_image_load(unsigned int hdr_offset,unsigned int * entry_addr)351 int IRAM_ATTR esp_appcpu_image_load(unsigned int hdr_offset, unsigned int *entry_addr)
352 {
353 const uint32_t img_off = FIXED_PARTITION_OFFSET(slot0_appcpu_partition);
354 const uint32_t fa_size = FIXED_PARTITION_SIZE(slot0_appcpu_partition);
355 const uint8_t fa_id = FIXED_PARTITION_ID(slot0_appcpu_partition);
356
357 if (entry_addr == NULL) {
358 ets_printf("Can't return the entry address. Aborting!\n");
359 abort();
360 return -1;
361 }
362
363 uint32_t mcuboot_header[8] = {0};
364 esp_image_load_header_t image_header = {0};
365
366 const uint32_t *data = (const uint32_t *)sys_mmap(img_off, 0x40);
367
368 memcpy((void *)&mcuboot_header, data, sizeof(mcuboot_header));
369 memcpy((void *)&image_header, data + (hdr_offset / sizeof(uint32_t)),
370 sizeof(esp_image_load_header_t));
371
372 sys_munmap(data);
373
374 if (image_header.header_magic == ESP_LOAD_HEADER_MAGIC) {
375 ets_printf("APPCPU image, area id: %d, offset: 0x%x, hdr.off: 0x%x, size: %d kB\n",
376 fa_id, img_off, hdr_offset, fa_size / 1024);
377 } else if ((image_header.header_magic & 0xff) == 0xE9) {
378 ets_printf("ESP image format is not supported\n");
379 abort();
380 } else {
381 ets_printf("Unknown or empty image detected. Aborting!\n");
382 abort();
383 }
384
385 if (!esp_ptr_in_iram((void *)image_header.iram_dest_addr) ||
386 !esp_ptr_in_iram((void *)(image_header.iram_dest_addr + image_header.iram_size))) {
387 ets_printf("IRAM region in load header is not valid. Aborting");
388 abort();
389 }
390
391 if (!esp_ptr_in_dram((void *)image_header.dram_dest_addr) ||
392 !esp_ptr_in_dram((void *)(image_header.dram_dest_addr + image_header.dram_size))) {
393 ets_printf("DRAM region in load header is not valid. Aborting");
394 abort();
395 }
396
397 if (!esp_ptr_in_iram((void *)image_header.entry_addr)) {
398 ets_printf("Application entry point (%xh) is not in IRAM. Aborting",
399 image_header.entry_addr);
400 abort();
401 }
402
403 ets_printf("IRAM segment: paddr=%08xh, vaddr=%08xh, size=%05xh (%6d) load\n",
404 (img_off + image_header.iram_flash_offset), image_header.iram_dest_addr,
405 image_header.iram_size, image_header.iram_size);
406
407 load_segment(img_off + image_header.iram_flash_offset, image_header.iram_size,
408 image_header.iram_dest_addr);
409
410 ets_printf("DRAM segment: paddr=%08xh, vaddr=%08xh, size=%05xh (%6d) load\n",
411 (img_off + image_header.dram_flash_offset), image_header.dram_dest_addr,
412 image_header.dram_size, image_header.dram_size);
413
414 load_segment(img_off + image_header.dram_flash_offset, image_header.dram_size,
415 image_header.dram_dest_addr);
416
417 ets_printf("Application start=%xh\n\n", image_header.entry_addr);
418 esp_rom_uart_tx_wait_idle(0);
419
420 assert(entry_addr != NULL);
421 *entry_addr = image_header.entry_addr;
422
423 return 0;
424 }
425
esp_appcpu_image_stop(void)426 void esp_appcpu_image_stop(void)
427 {
428 esp_cpu_stall(1);
429 }
430
esp_appcpu_image_start(unsigned int hdr_offset)431 void esp_appcpu_image_start(unsigned int hdr_offset)
432 {
433 static int started;
434 unsigned int entry_addr = 0;
435
436 if (started) {
437 printk("APPCPU already started.\r\n");
438 return;
439 }
440
441 /* Input image meta header, output appcpu entry point */
442 esp_appcpu_image_load(hdr_offset, &entry_addr);
443
444 esp_appcpu_start2((void *)entry_addr);
445 }
446
esp_appcpu_init(void)447 int esp_appcpu_init(void)
448 {
449 /* Load APPCPU image using image header offset
450 * (skipping the MCUBoot header)
451 */
452 esp_appcpu_image_start(0x20);
453
454 return 0;
455 }
456 #endif /* CONFIG_SOC_ENABLE_APPCPU */
457