1 /*
2  * Copyright (c) 2023 BayLibre SAS
3  * Written by: Nicolas Pitre
4  *
5  * SPDX-License-Identifier: Apache-2.0
6  */
7 
8 #include <zephyr/kernel.h>
9 #include <zephyr/kernel_structs.h>
10 #include <kernel_arch_interface.h>
11 #include <zephyr/sys/atomic.h>
12 
13 /* to be found in fpu.S */
14 extern void z_riscv_fpu_save(struct z_riscv_fp_context *saved_fp_context);
15 extern void z_riscv_fpu_restore(struct z_riscv_fp_context *saved_fp_context);
16 
17 #define FPU_DEBUG 0
18 
19 #if FPU_DEBUG
20 
21 /*
22  * Debug traces have to be produced without printk() or any other functions
23  * using a va_list as va_start() may copy the FPU registers that could be
24  * used to pass float arguments, and that would trigger an FPU access trap.
25  * Note: Apparently gcc doesn't use float regs with variadic functions on
26  * RISC-V even if -mabi is used with f or d so this precaution might be
27  * unnecessary. But better be safe than sorry especially for debugging code.
28  */
29 
30 #include <string.h>
31 
DBG(char * msg,struct k_thread * th)32 static void DBG(char *msg, struct k_thread *th)
33 {
34 	char buf[80], *p;
35 	unsigned int v;
36 
37 	strcpy(buf, "CPU# exc# ");
38 	buf[3] = '0' + _current_cpu->id;
39 	buf[8] = '0' + arch_current_thread()->arch.exception_depth;
40 	strcat(buf, arch_current_thread()->name);
41 	strcat(buf, ": ");
42 	strcat(buf, msg);
43 	strcat(buf, " ");
44 	strcat(buf, th->name);
45 
46 	v = *(unsigned char *)&th->arch.saved_fp_context;
47 	p = buf + strlen(buf);
48 	*p++ = ' ';
49 	*p++ = ((v >> 4) < 10) ? ((v >> 4) + '0') : ((v >> 4) - 10 + 'a');
50 	*p++ = ((v & 15) < 10) ? ((v & 15) + '0') : ((v & 15) - 10 + 'a');
51 	*p++ = '\n';
52 	*p = 0;
53 
54 	k_str_out(buf, p - buf);
55 }
56 
57 #else
58 
DBG(char * msg,struct k_thread * t)59 static inline void DBG(char *msg, struct k_thread *t) { }
60 
61 #endif /* FPU_DEBUG */
62 
z_riscv_fpu_disable(void)63 static void z_riscv_fpu_disable(void)
64 {
65 	unsigned long status = csr_read(mstatus);
66 
67 	__ASSERT((status & MSTATUS_IEN) == 0, "must be called with IRQs disabled");
68 
69 	if ((status & MSTATUS_FS) != 0) {
70 		csr_clear(mstatus, MSTATUS_FS);
71 
72 		/* remember its clean/dirty state */
73 		_current_cpu->arch.fpu_state = (status & MSTATUS_FS);
74 	}
75 }
76 
z_riscv_fpu_load(void)77 static void z_riscv_fpu_load(void)
78 {
79 	__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
80 		 "must be called with IRQs disabled");
81 	__ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0,
82 		 "must be called with FPU access disabled");
83 
84 	/* become new owner */
85 	atomic_ptr_set(&_current_cpu->arch.fpu_owner, arch_current_thread());
86 
87 	/* restore our content */
88 	csr_set(mstatus, MSTATUS_FS_INIT);
89 	z_riscv_fpu_restore(&arch_current_thread()->arch.saved_fp_context);
90 	DBG("restore", arch_current_thread());
91 }
92 
93 /*
94  * Flush FPU content and clear ownership. If the saved FPU state is "clean"
95  * then we know the in-memory copy is up to date and skip the FPU content
96  * transfer. The saved FPU state is updated upon disabling FPU access so
97  * we require that this be called only when the FPU is disabled.
98  *
99  * This is called locally and also from flush_fpu_ipi_handler().
100  */
arch_flush_local_fpu(void)101 void arch_flush_local_fpu(void)
102 {
103 	__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
104 		 "must be called with IRQs disabled");
105 	__ASSERT((csr_read(mstatus) & MSTATUS_FS) == 0,
106 		 "must be called with FPU access disabled");
107 
108 	struct k_thread *owner = atomic_ptr_get(&_current_cpu->arch.fpu_owner);
109 
110 	if (owner != NULL) {
111 		bool dirty = (_current_cpu->arch.fpu_state == MSTATUS_FS_DIRTY);
112 
113 		if (dirty) {
114 			/* turn on FPU access */
115 			csr_set(mstatus, MSTATUS_FS_CLEAN);
116 			/* save current owner's content */
117 			z_riscv_fpu_save(&owner->arch.saved_fp_context);
118 		}
119 
120 		/* dirty means active use */
121 		owner->arch.fpu_recently_used = dirty;
122 
123 		/* disable FPU access */
124 		csr_clear(mstatus, MSTATUS_FS);
125 
126 		/* release ownership */
127 		atomic_ptr_clear(&_current_cpu->arch.fpu_owner);
128 		DBG("disable", owner);
129 	}
130 }
131 
132 #ifdef CONFIG_SMP
flush_owned_fpu(struct k_thread * thread)133 static void flush_owned_fpu(struct k_thread *thread)
134 {
135 	__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
136 		 "must be called with IRQs disabled");
137 
138 	int i;
139 	atomic_ptr_val_t owner;
140 
141 	/* search all CPUs for the owner we want */
142 	unsigned int num_cpus = arch_num_cpus();
143 
144 	for (i = 0; i < num_cpus; i++) {
145 		owner = atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner);
146 		if (owner != thread) {
147 			continue;
148 		}
149 		/* we found it live on CPU i */
150 		if (i == _current_cpu->id) {
151 			z_riscv_fpu_disable();
152 			arch_flush_local_fpu();
153 			break;
154 		}
155 		/* the FPU context is live on another CPU */
156 		arch_flush_fpu_ipi(i);
157 
158 		/*
159 		 * Wait for it only if this is about the thread
160 		 * currently running on this CPU. Otherwise the
161 		 * other CPU running some other thread could regain
162 		 * ownership the moment it is removed from it and
163 		 * we would be stuck here.
164 		 *
165 		 * Also, if this is for the thread running on this
166 		 * CPU, then we preemptively flush any live context
167 		 * on this CPU as well since we're likely to
168 		 * replace it, and this avoids a deadlock where
169 		 * two CPUs want to pull each other's FPU context.
170 		 */
171 		if (thread == arch_current_thread()) {
172 			z_riscv_fpu_disable();
173 			arch_flush_local_fpu();
174 			do {
175 				arch_nop();
176 				owner = atomic_ptr_get(&_kernel.cpus[i].arch.fpu_owner);
177 			} while (owner == thread);
178 		}
179 		break;
180 	}
181 }
182 #endif
183 
z_riscv_fpu_enter_exc(void)184 void z_riscv_fpu_enter_exc(void)
185 {
186 	/* always deny FPU access whenever an exception is entered */
187 	z_riscv_fpu_disable();
188 }
189 
190 /*
191  * Process the FPU trap.
192  *
193  * This usually means that FP regs belong to another thread. Save them
194  * to that thread's save area and restore the current thread's content.
195  *
196  * We also get here when FP regs are used while in exception as FP access
197  * is always disabled by default in that case. If so we save the FPU content
198  * to the owning thread and simply enable FPU access. Exceptions should be
199  * short and don't have persistent register contexts when they're done so
200  * there is nothing to save/restore for that context... as long as we
201  * don't get interrupted that is. To ensure that we mask interrupts to
202  * the triggering exception context.
203  *
204  * Note that the exception depth count was not incremented before this call
205  * as no further exceptions are expected before returning to normal mode.
206  */
z_riscv_fpu_trap(struct arch_esf * esf)207 void z_riscv_fpu_trap(struct arch_esf *esf)
208 {
209 	__ASSERT((esf->mstatus & MSTATUS_FS) == 0 &&
210 		 (csr_read(mstatus) & MSTATUS_FS) == 0,
211 		 "called despite FPU being accessible");
212 
213 	/* save current owner's content  if any */
214 	arch_flush_local_fpu();
215 
216 	if (arch_current_thread()->arch.exception_depth > 0) {
217 		/*
218 		 * We were already in exception when the FPU access trapped.
219 		 * We give it access and prevent any further IRQ recursion
220 		 * by disabling IRQs as we wouldn't be able to preserve the
221 		 * interrupted exception's FPU context.
222 		 */
223 		esf->mstatus &= ~MSTATUS_MPIE_EN;
224 
225 		/* make it accessible to the returning context */
226 		esf->mstatus |= MSTATUS_FS_INIT;
227 
228 		return;
229 	}
230 
231 #ifdef CONFIG_SMP
232 	/*
233 	 * Make sure the FPU context we need isn't live on another CPU.
234 	 * The current CPU's FPU context is NULL at this point.
235 	 */
236 	flush_owned_fpu(arch_current_thread());
237 #endif
238 
239 	/* make it accessible and clean to the returning context */
240 	esf->mstatus |= MSTATUS_FS_CLEAN;
241 
242 	/* and load it with corresponding content */
243 	z_riscv_fpu_load();
244 }
245 
246 /*
247  * Perform lazy FPU context switching by simply granting or denying
248  * access to FP regs based on FPU ownership before leaving the last
249  * exception level in case of exceptions, or during a thread context
250  * switch with the exception level of the new thread being 0.
251  * If current thread doesn't own the FP regs then it will trap on its
252  * first access and then the actual FPU context switching will occur.
253  */
fpu_access_allowed(unsigned int exc_update_level)254 static bool fpu_access_allowed(unsigned int exc_update_level)
255 {
256 	__ASSERT((csr_read(mstatus) & MSTATUS_IEN) == 0,
257 		 "must be called with IRQs disabled");
258 
259 	if (arch_current_thread()->arch.exception_depth == exc_update_level) {
260 		/* We're about to execute non-exception code */
261 		if (_current_cpu->arch.fpu_owner == arch_current_thread()) {
262 			/* everything is already in place */
263 			return true;
264 		}
265 		if (arch_current_thread()->arch.fpu_recently_used) {
266 			/*
267 			 * Before this thread was context-switched out,
268 			 * it made active use of the FPU, but someone else
269 			 * took it away in the mean time. Let's preemptively
270 			 * claim it back to avoid the likely exception trap
271 			 * to come otherwise.
272 			 */
273 			z_riscv_fpu_disable();
274 			arch_flush_local_fpu();
275 #ifdef CONFIG_SMP
276 			flush_owned_fpu(arch_current_thread());
277 #endif
278 			z_riscv_fpu_load();
279 			_current_cpu->arch.fpu_state = MSTATUS_FS_CLEAN;
280 			return true;
281 		}
282 		return false;
283 	}
284 	/*
285 	 * Any new exception level should always trap on FPU
286 	 * access as we want to make sure IRQs are disabled before
287 	 * granting it access (see z_riscv_fpu_trap() documentation).
288 	 */
289 	return false;
290 }
291 
292 /*
293  * This is called on every exception exit except for z_riscv_fpu_trap().
294  * In that case the exception level of interest is 1 (soon to be 0).
295  */
z_riscv_fpu_exit_exc(struct arch_esf * esf)296 void z_riscv_fpu_exit_exc(struct arch_esf *esf)
297 {
298 	if (fpu_access_allowed(1)) {
299 		esf->mstatus &= ~MSTATUS_FS;
300 		esf->mstatus |= _current_cpu->arch.fpu_state;
301 	} else {
302 		esf->mstatus &= ~MSTATUS_FS;
303 	}
304 }
305 
306 /*
307  * This is called from z_riscv_context_switch(). FPU access may be granted
308  * only if exception level is 0. If we switch to a thread that is still in
309  * some exception context then FPU access would be re-evaluated at exception
310  * exit time via z_riscv_fpu_exit_exc().
311  */
z_riscv_fpu_thread_context_switch(void)312 void z_riscv_fpu_thread_context_switch(void)
313 {
314 	if (fpu_access_allowed(0)) {
315 		csr_clear(mstatus, MSTATUS_FS);
316 		csr_set(mstatus, _current_cpu->arch.fpu_state);
317 	} else {
318 		z_riscv_fpu_disable();
319 	}
320 }
321 
arch_float_disable(struct k_thread * thread)322 int arch_float_disable(struct k_thread *thread)
323 {
324 	if (thread != NULL) {
325 		unsigned int key = arch_irq_lock();
326 
327 #ifdef CONFIG_SMP
328 		flush_owned_fpu(thread);
329 #else
330 		if (thread == _current_cpu->arch.fpu_owner) {
331 			z_riscv_fpu_disable();
332 			arch_flush_local_fpu();
333 		}
334 #endif
335 
336 		arch_irq_unlock(key);
337 	}
338 
339 	return 0;
340 }
341 
arch_float_enable(struct k_thread * thread,unsigned int options)342 int arch_float_enable(struct k_thread *thread, unsigned int options)
343 {
344 	/* floats always gets enabled automatically at the moment */
345 	return 0;
346 }
347