1 /*
2 * Copyright (c) 2021 BayLibre SAS
3 * Written by: Nicolas Pitre
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 */
7
8 #include <zephyr/kernel.h>
9 #include <zephyr/kernel_structs.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/arch/cpu.h>
12 #include <zephyr/sys/barrier.h>
13 #include <zephyr/sys/atomic.h>
14
15 /* to be found in fpu.S */
16 extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context);
17 extern void z_arm64_fpu_restore(struct z_arm64_fp_context *saved_fp_context);
18
19 #define FPU_DEBUG 0
20
21 #if FPU_DEBUG
22
23 /*
24 * Debug traces have to be produced without printk() or any other functions
25 * using a va_list as va_start() always copy the FPU registers that could be
26 * used to pass float arguments, and that triggers an FPU access trap.
27 */
28
29 #include <string.h>
30
DBG(char * msg,struct k_thread * th)31 static void DBG(char *msg, struct k_thread *th)
32 {
33 char buf[80], *p;
34 unsigned int v;
35
36 strcpy(buf, "CPU# exc# ");
37 buf[3] = '0' + _current_cpu->id;
38 buf[8] = '0' + arch_exception_depth();
39 strcat(buf, arch_current_thread()->name);
40 strcat(buf, ": ");
41 strcat(buf, msg);
42 strcat(buf, " ");
43 strcat(buf, th->name);
44
45
46 v = *(unsigned char *)&th->arch.saved_fp_context;
47 p = buf + strlen(buf);
48 *p++ = ' ';
49 *p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a');
50 *p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a');
51 *p++ = '\n';
52 *p = 0;
53
54 k_str_out(buf, p - buf);
55 }
56
57 #else
58
DBG(char * msg,struct k_thread * t)59 static inline void DBG(char *msg, struct k_thread *t) { }
60
61 #endif /* FPU_DEBUG */
62
63 /*
64 * Flush FPU content and disable access.
65 * This is called locally and also from flush_fpu_ipi_handler().
66 */
arch_flush_local_fpu(void)67 void arch_flush_local_fpu(void)
68 {
69 __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
70
71 struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
72
73 if (owner != NULL) {
74 uint64_t cpacr = read_cpacr_el1();
75
76 /* turn on FPU access */
77 write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
78 barrier_isync_fence_full();
79
80 /* save current owner's content */
81 z_arm64_fpu_save(&owner->arch.saved_fp_context);
82 /* make sure content made it to memory before releasing */
83 barrier_dsync_fence_full();
84 /* release ownership */
85 atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
86 DBG("disable", owner);
87
88 /* disable FPU access */
89 write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
90 barrier_isync_fence_full();
91 }
92 }
93
94 #ifdef CONFIG_SMP
flush_owned_fpu(struct k_thread * thread)95 static void flush_owned_fpu(struct k_thread *thread)
96 {
97 __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
98
99 int i;
100
101 /* search all CPUs for the owner we want */
102 unsigned int num_cpus = arch_num_cpus();
103
104 for (i = 0; i < num_cpus; i++) {
105 if (atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner) != thread) {
106 continue;
107 }
108 /* we found it live on CPU i */
109 if (i == _current_cpu->id) {
110 arch_flush_local_fpu();
111 } else {
112 /* the FPU context is live on another CPU */
113 arch_flush_fpu_ipi(i);
114
115 /*
116 * Wait for it only if this is about the thread
117 * currently running on this CPU. Otherwise the
118 * other CPU running some other thread could regain
119 * ownership the moment it is removed from it and
120 * we would be stuck here.
121 *
122 * Also, if this is for the thread running on this
123 * CPU, then we preemptively flush any live context
124 * on this CPU as well since we're likely to
125 * replace it, and this avoids a deadlock where
126 * two CPUs want to pull each other's FPU context.
127 */
128 if (thread == arch_current_thread()) {
129 arch_flush_local_fpu();
130 while (atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner) == thread) {
131 barrier_dsync_fence_full();
132 }
133 }
134 }
135 break;
136 }
137 }
138 #endif
139
z_arm64_fpu_enter_exc(void)140 void z_arm64_fpu_enter_exc(void)
141 {
142 __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
143
144 /* always deny FPU access whenever an exception is entered */
145 write_cpacr_el1(read_cpacr_el1() & ~CPACR_EL1_FPEN_NOTRAP);
146 barrier_isync_fence_full();
147 }
148
149 /*
150 * Simulate some FPU store instructions.
151 *
152 * In many cases, the FPU trap is triggered by va_start() that copies
153 * the content of FP registers used for floating point argument passing
154 * into the va_list object in case there were actual float arguments from
155 * the caller. In practice this is almost never the case, especially if
156 * FPU access is disabled and we're trapped while in exception context.
157 * Rather than flushing the FPU context to its owner and enabling access
158 * just to let the corresponding STR instructions execute, we simply
159 * simulate them and leave the FPU access disabled. This also avoids the
160 * need for disabling interrupts in syscalls and IRQ handlers as well.
161 */
simulate_str_q_insn(struct arch_esf * esf)162 static bool simulate_str_q_insn(struct arch_esf *esf)
163 {
164 /*
165 * Support only the "FP in exception" cases for now.
166 * We know there is no saved FPU context to check nor any
167 * userspace stack memory to validate in that case.
168 */
169 if (arch_exception_depth() <= 1) {
170 return false;
171 }
172
173 uint32_t *pc = (uint32_t *)esf->elr;
174 /* The original (interrupted) sp is the top of the esf structure */
175 uintptr_t sp = (uintptr_t)esf + sizeof(*esf);
176
177 for (;;) {
178 uint32_t insn = *pc;
179
180 /*
181 * We're looking for STR (immediate, SIMD&FP) of the form:
182 *
183 * STR Q<n>, [SP, #<pimm>]
184 *
185 * where 0 <= <n> <= 7 and <pimm> is a 12-bits multiple of 16.
186 */
187 if ((insn & 0xffc003f8) != 0x3d8003e0) {
188 break;
189 }
190
191 uint32_t pimm = (insn >> 10) & 0xfff;
192
193 /* Zero the location as the above STR would have done */
194 *(__int128 *)(sp + pimm * 16) = 0;
195
196 /* move to the next instruction */
197 pc++;
198 }
199
200 /* did we do something? */
201 if (pc != (uint32_t *)esf->elr) {
202 /* resume execution past the simulated instructions */
203 esf->elr = (uintptr_t)pc;
204 return true;
205 }
206
207 return false;
208 }
209
210 /*
211 * Process the FPU trap.
212 *
213 * This usually means that FP regs belong to another thread. Save them
214 * to that thread's save area and restore the current thread's content.
215 *
216 * We also get here when FP regs are used while in exception as FP access
217 * is always disabled by default in that case. If so we save the FPU content
218 * to the owning thread and simply enable FPU access. Exceptions should be
219 * short and don't have persistent register contexts when they're done so
220 * there is nothing to save/restore for that context... as long as we
221 * don't get interrupted that is. To ensure that we mask interrupts to
222 * the triggering exception context.
223 */
z_arm64_fpu_trap(struct arch_esf * esf)224 void z_arm64_fpu_trap(struct arch_esf *esf)
225 {
226 __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
227
228 /* check if a quick simulation can do it */
229 if (simulate_str_q_insn(esf)) {
230 return;
231 }
232
233 /* turn on FPU access */
234 write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
235 barrier_isync_fence_full();
236
237 /* save current owner's content if any */
238 struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
239
240 if (owner) {
241 z_arm64_fpu_save(&owner->arch.saved_fp_context);
242 barrier_dsync_fence_full();
243 atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
244 DBG("save", owner);
245 }
246
247 if (arch_exception_depth() > 1) {
248 /*
249 * We were already in exception when the FPU access trap.
250 * We give it access and prevent any further IRQ recursion
251 * by disabling IRQs as we wouldn't be able to preserve the
252 * interrupted exception's FPU context.
253 */
254 esf->spsr |= DAIF_IRQ_BIT;
255 return;
256 }
257
258 #ifdef CONFIG_SMP
259 /*
260 * Make sure the FPU context we need isn't live on another CPU.
261 * The current CPU's FPU context is NULL at this point.
262 */
263 flush_owned_fpu(arch_current_thread());
264 #endif
265
266 /* become new owner */
267 atomic_ptr_set(&_current_cpu->arch.fpu_owner, arch_current_thread());
268
269 /* restore our content */
270 z_arm64_fpu_restore(&arch_current_thread()->arch.saved_fp_context);
271 DBG("restore", arch_current_thread());
272 }
273
274 /*
275 * Perform lazy FPU context switching by simply granting or denying
276 * access to FP regs based on FPU ownership before leaving the last
277 * exception level in case of exceptions, or during a thread context
278 * switch with the exception level of the new thread being 0.
279 * If current thread doesn't own the FP regs then it will trap on its
280 * first access and then the actual FPU context switching will occur.
281 */
fpu_access_update(unsigned int exc_update_level)282 static void fpu_access_update(unsigned int exc_update_level)
283 {
284 __ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
285
286 uint64_t cpacr = read_cpacr_el1();
287
288 if (arch_exception_depth() == exc_update_level) {
289 /* We're about to execute non-exception code */
290 if (atomic_ptr_get(&_current_cpu->arch.fpu_owner) == arch_current_thread()) {
291 /* turn on FPU access */
292 write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
293 } else {
294 /* deny FPU access */
295 write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
296 }
297 } else {
298 /*
299 * Any new exception level should always trap on FPU
300 * access as we want to make sure IRQs are disabled before
301 * granting it access (see z_arm64_fpu_trap() documentation).
302 */
303 write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
304 }
305 barrier_isync_fence_full();
306 }
307
308 /*
309 * This is called on every exception exit except for z_arm64_fpu_trap().
310 * In that case the exception level of interest is 1 (soon to be 0).
311 */
z_arm64_fpu_exit_exc(void)312 void z_arm64_fpu_exit_exc(void)
313 {
314 fpu_access_update(1);
315 }
316
317 /*
318 * This is called from z_arm64_context_switch(). FPU access may be granted
319 * only if exception level is 0. If we switch to a thread that is still in
320 * some exception context then FPU access would be re-evaluated at exception
321 * exit time via z_arm64_fpu_exit_exc().
322 */
z_arm64_fpu_thread_context_switch(void)323 void z_arm64_fpu_thread_context_switch(void)
324 {
325 fpu_access_update(0);
326 }
327
arch_float_disable(struct k_thread * thread)328 int arch_float_disable(struct k_thread *thread)
329 {
330 if (thread != NULL) {
331 unsigned int key = arch_irq_lock();
332
333 #ifdef CONFIG_SMP
334 flush_owned_fpu(thread);
335 #else
336 if (thread == atomic_ptr_get(&_current_cpu->arch.fpu_owner)) {
337 arch_flush_local_fpu();
338 }
339 #endif
340
341 arch_irq_unlock(key);
342 }
343
344 return 0;
345 }
346
arch_float_enable(struct k_thread * thread,unsigned int options)347 int arch_float_enable(struct k_thread *thread, unsigned int options)
348 {
349 /* floats always gets enabled automatically at the moment */
350 return 0;
351 }
352