1 /*
2  * Copyright (c) 2021 BayLibre SAS
3  * Written by: Nicolas Pitre
4  *
5  * SPDX-License-Identifier: Apache-2.0
6  */
7 
8 #include <zephyr/kernel.h>
9 #include <zephyr/kernel_structs.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/arch/cpu.h>
12 #include <zephyr/sys/barrier.h>
13 #include <zephyr/sys/atomic.h>
14 
15 /* to be found in fpu.S */
16 extern void z_arm64_fpu_save(struct z_arm64_fp_context *saved_fp_context);
17 extern void z_arm64_fpu_restore(struct z_arm64_fp_context *saved_fp_context);
18 
19 #define FPU_DEBUG 0
20 
21 #if FPU_DEBUG
22 
23 /*
24  * Debug traces have to be produced without printk() or any other functions
25  * using a va_list as va_start() always copy the FPU registers that could be
26  * used to pass float arguments, and that triggers an FPU access trap.
27  */
28 
29 #include <string.h>
30 
DBG(char * msg,struct k_thread * th)31 static void DBG(char *msg, struct k_thread *th)
32 {
33 	char buf[80], *p;
34 	unsigned int v;
35 
36 	strcpy(buf, "CPU# exc# ");
37 	buf[3] = '0' + _current_cpu->id;
38 	buf[8] = '0' + arch_exception_depth();
39 	strcat(buf, arch_current_thread()->name);
40 	strcat(buf, ": ");
41 	strcat(buf, msg);
42 	strcat(buf, " ");
43 	strcat(buf, th->name);
44 
45 
46 	v = *(unsigned char *)&th->arch.saved_fp_context;
47 	p = buf + strlen(buf);
48 	*p++ = ' ';
49 	*p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a');
50 	*p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a');
51 	*p++ = '\n';
52 	*p = 0;
53 
54 	k_str_out(buf, p - buf);
55 }
56 
57 #else
58 
DBG(char * msg,struct k_thread * t)59 static inline void DBG(char *msg, struct k_thread *t) { }
60 
61 #endif /* FPU_DEBUG */
62 
63 /*
64  * Flush FPU content and disable access.
65  * This is called locally and also from flush_fpu_ipi_handler().
66  */
arch_flush_local_fpu(void)67 void arch_flush_local_fpu(void)
68 {
69 	__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
70 
71 	struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
72 
73 	if (owner != NULL) {
74 		uint64_t cpacr = read_cpacr_el1();
75 
76 		/* turn on FPU access */
77 		write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
78 		barrier_isync_fence_full();
79 
80 		/* save current owner's content */
81 		z_arm64_fpu_save(&owner->arch.saved_fp_context);
82 		/* make sure content made it to memory before releasing */
83 		barrier_dsync_fence_full();
84 		/* release ownership */
85 		atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
86 		DBG("disable", owner);
87 
88 		/* disable FPU access */
89 		write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
90 		barrier_isync_fence_full();
91 	}
92 }
93 
94 #ifdef CONFIG_SMP
flush_owned_fpu(struct k_thread * thread)95 static void flush_owned_fpu(struct k_thread *thread)
96 {
97 	__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
98 
99 	int i;
100 
101 	/* search all CPUs for the owner we want */
102 	unsigned int num_cpus = arch_num_cpus();
103 
104 	for (i = 0; i < num_cpus; i++) {
105 		if (atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner) != thread) {
106 			continue;
107 		}
108 		/* we found it live on CPU i */
109 		if (i == _current_cpu->id) {
110 			arch_flush_local_fpu();
111 		} else {
112 			/* the FPU context is live on another CPU */
113 			arch_flush_fpu_ipi(i);
114 
115 			/*
116 			 * Wait for it only if this is about the thread
117 			 * currently running on this CPU. Otherwise the
118 			 * other CPU running some other thread could regain
119 			 * ownership the moment it is removed from it and
120 			 * we would be stuck here.
121 			 *
122 			 * Also, if this is for the thread running on this
123 			 * CPU, then we preemptively flush any live context
124 			 * on this CPU as well since we're likely to
125 			 * replace it, and this avoids a deadlock where
126 			 * two CPUs want to pull each other's FPU context.
127 			 */
128 			if (thread == arch_current_thread()) {
129 				arch_flush_local_fpu();
130 				while (atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner) == thread) {
131 					barrier_dsync_fence_full();
132 				}
133 			}
134 		}
135 		break;
136 	}
137 }
138 #endif
139 
z_arm64_fpu_enter_exc(void)140 void z_arm64_fpu_enter_exc(void)
141 {
142 	__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
143 
144 	/* always deny FPU access whenever an exception is entered */
145 	write_cpacr_el1(read_cpacr_el1() & ~CPACR_EL1_FPEN_NOTRAP);
146 	barrier_isync_fence_full();
147 }
148 
149 /*
150  * Simulate some FPU store instructions.
151  *
152  * In many cases, the FPU trap is triggered by va_start() that copies
153  * the content of FP registers used for floating point argument passing
154  * into the va_list object in case there were actual float arguments from
155  * the caller. In practice this is almost never the case, especially if
156  * FPU access is disabled and we're trapped while in exception context.
157  * Rather than flushing the FPU context to its owner and enabling access
158  * just to let the corresponding STR instructions execute, we simply
159  * simulate them and leave the FPU access disabled. This also avoids the
160  * need for disabling interrupts in syscalls and IRQ handlers as well.
161  */
simulate_str_q_insn(struct arch_esf * esf)162 static bool simulate_str_q_insn(struct arch_esf *esf)
163 {
164 	/*
165 	 * Support only the "FP in exception" cases for now.
166 	 * We know there is no saved FPU context to check nor any
167 	 * userspace stack memory to validate in that case.
168 	 */
169 	if (arch_exception_depth() <= 1) {
170 		return false;
171 	}
172 
173 	uint32_t *pc = (uint32_t *)esf->elr;
174 	/* The original (interrupted) sp is the top of the esf structure */
175 	uintptr_t sp = (uintptr_t)esf + sizeof(*esf);
176 
177 	for (;;) {
178 		uint32_t insn = *pc;
179 
180 		/*
181 		 * We're looking for STR (immediate, SIMD&FP) of the form:
182 		 *
183 		 *  STR Q<n>, [SP, #<pimm>]
184 		 *
185 		 * where 0 <= <n> <= 7 and <pimm> is a 12-bits multiple of 16.
186 		 */
187 		if ((insn & 0xffc003f8) != 0x3d8003e0) {
188 			break;
189 		}
190 
191 		uint32_t pimm = (insn >> 10) & 0xfff;
192 
193 		/* Zero the location as the above STR would have done */
194 		*(__int128 *)(sp + pimm * 16) = 0;
195 
196 		/* move to the next instruction */
197 		pc++;
198 	}
199 
200 	/* did we do something? */
201 	if (pc != (uint32_t *)esf->elr) {
202 		/* resume execution past the simulated instructions */
203 		esf->elr = (uintptr_t)pc;
204 		return true;
205 	}
206 
207 	return false;
208 }
209 
210 /*
211  * Process the FPU trap.
212  *
213  * This usually means that FP regs belong to another thread. Save them
214  * to that thread's save area and restore the current thread's content.
215  *
216  * We also get here when FP regs are used while in exception as FP access
217  * is always disabled by default in that case. If so we save the FPU content
218  * to the owning thread and simply enable FPU access. Exceptions should be
219  * short and don't have persistent register contexts when they're done so
220  * there is nothing to save/restore for that context... as long as we
221  * don't get interrupted that is. To ensure that we mask interrupts to
222  * the triggering exception context.
223  */
z_arm64_fpu_trap(struct arch_esf * esf)224 void z_arm64_fpu_trap(struct arch_esf *esf)
225 {
226 	__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
227 
228 	/* check if a quick simulation can do it */
229 	if (simulate_str_q_insn(esf)) {
230 		return;
231 	}
232 
233 	/* turn on FPU access */
234 	write_cpacr_el1(read_cpacr_el1() | CPACR_EL1_FPEN_NOTRAP);
235 	barrier_isync_fence_full();
236 
237 	/* save current owner's content  if any */
238 	struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
239 
240 	if (owner) {
241 		z_arm64_fpu_save(&owner->arch.saved_fp_context);
242 		barrier_dsync_fence_full();
243 		atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
244 		DBG("save", owner);
245 	}
246 
247 	if (arch_exception_depth() > 1) {
248 		/*
249 		 * We were already in exception when the FPU access trap.
250 		 * We give it access and prevent any further IRQ recursion
251 		 * by disabling IRQs as we wouldn't be able to preserve the
252 		 * interrupted exception's FPU context.
253 		 */
254 		esf->spsr |= DAIF_IRQ_BIT;
255 		return;
256 	}
257 
258 #ifdef CONFIG_SMP
259 	/*
260 	 * Make sure the FPU context we need isn't live on another CPU.
261 	 * The current CPU's FPU context is NULL at this point.
262 	 */
263 	flush_owned_fpu(arch_current_thread());
264 #endif
265 
266 	/* become new owner */
267 	atomic_ptr_set(&_current_cpu->arch.fpu_owner, arch_current_thread());
268 
269 	/* restore our content */
270 	z_arm64_fpu_restore(&arch_current_thread()->arch.saved_fp_context);
271 	DBG("restore", arch_current_thread());
272 }
273 
274 /*
275  * Perform lazy FPU context switching by simply granting or denying
276  * access to FP regs based on FPU ownership before leaving the last
277  * exception level in case of exceptions, or during a thread context
278  * switch with the exception level of the new thread being 0.
279  * If current thread doesn't own the FP regs then it will trap on its
280  * first access and then the actual FPU context switching will occur.
281  */
fpu_access_update(unsigned int exc_update_level)282 static void fpu_access_update(unsigned int exc_update_level)
283 {
284 	__ASSERT(read_daif() & DAIF_IRQ_BIT, "must be called with IRQs disabled");
285 
286 	uint64_t cpacr = read_cpacr_el1();
287 
288 	if (arch_exception_depth() == exc_update_level) {
289 		/* We're about to execute non-exception code */
290 		if (atomic_ptr_get(&_current_cpu->arch.fpu_owner) == arch_current_thread()) {
291 			/* turn on FPU access */
292 			write_cpacr_el1(cpacr | CPACR_EL1_FPEN_NOTRAP);
293 		} else {
294 			/* deny FPU access */
295 			write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
296 		}
297 	} else {
298 		/*
299 		 * Any new exception level should always trap on FPU
300 		 * access as we want to make sure IRQs are disabled before
301 		 * granting it access (see z_arm64_fpu_trap() documentation).
302 		 */
303 		write_cpacr_el1(cpacr & ~CPACR_EL1_FPEN_NOTRAP);
304 	}
305 	barrier_isync_fence_full();
306 }
307 
308 /*
309  * This is called on every exception exit except for z_arm64_fpu_trap().
310  * In that case the exception level of interest is 1 (soon to be 0).
311  */
z_arm64_fpu_exit_exc(void)312 void z_arm64_fpu_exit_exc(void)
313 {
314 	fpu_access_update(1);
315 }
316 
317 /*
318  * This is called from z_arm64_context_switch(). FPU access may be granted
319  * only if exception level is 0. If we switch to a thread that is still in
320  * some exception context then FPU access would be re-evaluated at exception
321  * exit time via z_arm64_fpu_exit_exc().
322  */
z_arm64_fpu_thread_context_switch(void)323 void z_arm64_fpu_thread_context_switch(void)
324 {
325 	fpu_access_update(0);
326 }
327 
arch_float_disable(struct k_thread * thread)328 int arch_float_disable(struct k_thread *thread)
329 {
330 	if (thread != NULL) {
331 		unsigned int key = arch_irq_lock();
332 
333 #ifdef CONFIG_SMP
334 		flush_owned_fpu(thread);
335 #else
336 		if (thread == atomic_ptr_get(&_current_cpu->arch.fpu_owner)) {
337 			arch_flush_local_fpu();
338 		}
339 #endif
340 
341 		arch_irq_unlock(key);
342 	}
343 
344 	return 0;
345 }
346 
arch_float_enable(struct k_thread * thread,unsigned int options)347 int arch_float_enable(struct k_thread *thread, unsigned int options)
348 {
349 	/* floats always gets enabled automatically at the moment */
350 	return 0;
351 }
352