1/*
2Copyright (c) 1990 The Regents of the University of California.
3All rights reserved.
4
5Redistribution and use in source and binary forms are permitted
6provided that the above copyright notice and this paragraph are
7duplicated in all such forms and that any documentation,
8and/or other materials related to such
9distribution and use acknowledge that the software was developed
10by the University of California, Berkeley.  The name of the
11University may not be used to endorse or promote products derived
12from this software without specific prior written permission.
13THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 */
17/* This is a simple version of setjmp and longjmp for the PowerPC.
18   Ian Lance Taylor, Cygnus Support, 9 Feb 1994.
19   Modified by Jeff Johnston, Red Hat Inc. 2 Oct 2001.
20   Modified by Sebastian Huber, embedded brains GmbH. 22 Sep 2022.  */
21
22#include <picolibc.h>
23
24#include "ppc-asm.h"
25
26FUNC_START(setjmp)
27#ifdef __ALTIVEC__
28	addi	3,3,15		# align Altivec to 16 byte boundary
29#if __powerpc64__
30	clrrdi	3,3,4
31#else
32	rlwinm	3,3,0,0,27
33#endif
34#else
35	addi	3,3,7		# align to 8 byte boundary
36#if __powerpc64__
37	clrrdi	3,3,3
38#else
39	rlwinm	3,3,0,0,28
40#endif
41#endif
42#if __SPE__
43	/* If we are E500, then save 64-bit registers.  */
44	evstdd	1,0(3)		# offset 0
45	evstdd	2,8(3)		# offset 8
46	evstdd	13,16(3)	# offset 16
47	evstdd	14,24(3)	# offset 24
48	evstdd	15,32(3)	# offset 32
49	evstdd	16,40(3)	# offset 40
50	evstdd	17,48(3)	# offset 48
51	evstdd	18,56(3)	# offset 56
52	evstdd	19,64(3)	# offset 64
53	evstdd	20,72(3)	# offset 72
54	evstdd	21,80(3)	# offset 80
55	evstdd	22,88(3)	# offset 88
56	evstdd	23,96(3)	# offset 96
57	evstdd	24,104(3)	# offset 104
58	evstdd	25,112(3)	# offset 112
59	evstdd	26,120(3)	# offset 120
60	evstdd	27,128(3)	# offset 128
61	evstdd	28,136(3)	# offset 136
62	evstdd	29,144(3)	# offset 144
63	evstdd	30,152(3)	# offset 152
64	evstdd	31,160(3)	# offset 160
65
66	/* Add 164 to r3 to account for the amount of data we just
67	   stored.  Note that we are not adding 168 because the next
68	   store instruction uses an offset of 4.  */
69	addi	3,3,164
70#elif __powerpc64__
71	/* In the first store, add 8 to r3 so that the subsequent floating
72	   point stores are aligned on an 8 byte boundary and the Altivec
73	   stores are aligned on a 16 byte boundary.  */
74	stdu	1,8(3)		# offset 8
75	stdu	2,8(3)		# offset 16
76	stdu	13,8(3)		# offset 24
77	stdu	14,8(3)		# offset 32
78	stdu	15,8(3)		# offset 40
79	stdu	16,8(3)		# offset 48
80	stdu	17,8(3)		# offset 56
81	stdu	18,8(3)		# offset 64
82	stdu	19,8(3)		# offset 72
83	stdu	20,8(3)		# offset 80
84	stdu	21,8(3)		# offset 88
85	stdu	22,8(3)		# offset 96
86	stdu	23,8(3)		# offset 104
87	stdu	24,8(3)		# offset 112
88	stdu	25,8(3)		# offset 120
89	stdu	26,8(3)		# offset 128
90	stdu	27,8(3)		# offset 136
91	stdu	28,8(3)		# offset 144
92	stdu	29,8(3)		# offset 152
93	stdu	30,8(3)		# offset 160
94	stdu	31,8(3)		# offset 168
95	mflr	4
96	stdu	4,8(3)		# offset 176
97	mfcr	4
98	stwu	4,8(3)		# offset 184
99#else
100	stw	1,0(3)		# offset 0
101	stwu	2,4(3)		# offset 4
102	stwu	13,4(3)		# offset 8
103	stwu	14,4(3)		# offset 12
104	stwu	15,4(3)		# offset 16
105	stwu	16,4(3)		# offset 20
106	stwu	17,4(3)		# offset 24
107	stwu	18,4(3)		# offset 28
108	stwu	19,4(3)		# offset 32
109	stwu	20,4(3)		# offset 36
110	stwu	21,4(3)		# offset 40
111	stwu	22,4(3)		# offset 44
112	stwu	23,4(3)		# offset 48
113	stwu	24,4(3)		# offset 52
114	stwu	25,4(3)		# offset 56
115	stwu	26,4(3)		# offset 60
116	stwu	27,4(3)		# offset 64
117	stwu	28,4(3)		# offset 68
118	stwu	29,4(3)		# offset 72
119	stwu	30,4(3)		# offset 76
120	stwu	31,4(3)		# offset 80
121#endif
122
123#if !__powerpc64__
124	/* If __SPE__, then add 84 to the offset shown from this point on until
125	   the end of this function.  This difference comes from the fact that
126	   we save 21 64-bit registers instead of 21 32-bit registers above.  */
127	mflr	4
128	stwu	4,4(3)		# offset 84
129	mfcr	4
130	stwu	4,4(3)		# offset 88
131				# one word pad to get floating point aligned on 8 byte boundary
132#endif
133
134	/* Check whether we need to save FPRs.  Checking __NO_FPRS__
135	   on its own would be enough for GCC 4.1 and above, but older
136	   compilers only define _SOFT_FLOAT, so check both.  */
137#if !defined (__NO_FPRS__) && !defined (_SOFT_FLOAT)
138#if defined (__rtems__) && !defined (__PPC_CPU_E6500__)
139	/* For some RTEMS multilibs, the FPU and Altivec units are disabled
140	   during interrupt handling.  Do not save and restore the
141	   corresponding registers in this case.  */
142	mfmsr	5
143	andi.	5,5,0x2000
144	beq	1f
145#endif
146
147	/* If __powerpc64__, then add 96 to the offset shown from this point on until
148	   the end of this function.  This difference comes from the fact that
149	   we save 23 64-bit registers instead of 23 32-bit registers above and
150	   we take alignement requirements of floating point and Altivec stores
151	   into account.  */
152
153	stfdu	14,8(3)		# offset 96
154	stfdu	15,8(3)		# offset 104
155	stfdu	16,8(3)		# offset 112
156	stfdu	17,8(3)		# offset 120
157	stfdu	18,8(3)		# offset 128
158	stfdu	19,8(3)		# offset 136
159	stfdu	20,8(3)		# offset 144
160	stfdu	21,8(3)		# offset 152
161	stfdu	22,8(3)		# offset 160
162	stfdu	23,8(3)		# offset 168
163	stfdu	24,8(3)		# offset 176
164	stfdu	25,8(3)		# offset 184
165	stfdu	26,8(3)		# offset 192
166	stfdu	27,8(3)		# offset 200
167	stfdu	28,8(3)		# offset 208
168	stfdu	29,8(3)		# offset 216
169	stfdu	30,8(3)		# offset 224
170	stfdu	31,8(3)		# offset 232
1711:
172#endif
173
174	/* This requires a total of 21 * 4 + 18 * 8 + 4 + 4 + 4
175	   bytes == 60 * 4 bytes == 240 bytes.  */
176
177#ifdef __ALTIVEC__
178#if defined (__rtems__) && !defined (__PPC_CPU_E6500__)
179	mfmsr	5
180	andis.	5,5,0x200
181	beq	1f
182#endif
183	/* save Altivec vrsave and vr20-vr31 registers */
184	mfspr	4,256		# vrsave register
185	stwu	4,16(3)		# offset 248
186	addi	3,3,8
187	stvx	20,0,3		# offset 256
188	addi	3,3,16
189	stvx	21,0,3		# offset 272
190	addi	3,3,16
191	stvx	22,0,3		# offset 288
192	addi	3,3,16
193	stvx	23,0,3		# offset 304
194	addi	3,3,16
195	stvx	24,0,3		# offset 320
196	addi	3,3,16
197	stvx	25,0,3		# offset 336
198	addi	3,3,16
199	stvx	26,0,3		# offset 352
200	addi	3,3,16
201	stvx	27,0,3		# offset 368
202	addi	3,3,16
203	stvx	28,0,3		# offset 384
204	addi	3,3,16
205	stvx	29,0,3		# offset 400
206	addi	3,3,16
207	stvx	30,0,3		# offset 416
208	addi	3,3,16
209	stvx	31,0,3		# offset 432
2101:
211
212	/* This requires a total of 240 + 8 + 8 + 12 * 16 == 448 bytes. */
213#endif
214	li	3,0
215	blr
216FUNC_END(setjmp)
217
218
219FUNC_START(longjmp)
220#ifdef __ALTIVEC__
221	addi	3,3,15		# align Altivec to 16 byte boundary
222#if __powerpc64__
223	clrrdi	3,3,4
224#else
225	rlwinm	3,3,0,0,27
226#endif
227#else
228	addi	3,3,7		# align to 8 byte boundary
229#if __powerpc64__
230	clrrdi	3,3,3
231#else
232	rlwinm	3,3,0,0,28
233#endif
234#endif
235#if __SPE__
236	/* If we are E500, then restore 64-bit registers.  */
237	evldd	1,0(3)		# offset 0
238	evldd	2,8(3)		# offset 8
239	evldd	13,16(3)	# offset 16
240	evldd	14,24(3)	# offset 24
241	evldd	15,32(3)	# offset 32
242	evldd	16,40(3)	# offset 40
243	evldd	17,48(3)	# offset 48
244	evldd	18,56(3)	# offset 56
245	evldd	19,64(3)	# offset 64
246	evldd	20,72(3)	# offset 72
247	evldd	21,80(3)	# offset 80
248	evldd	22,88(3)	# offset 88
249	evldd	23,96(3)	# offset 96
250	evldd	24,104(3)	# offset 104
251	evldd	25,112(3)	# offset 112
252	evldd	26,120(3)	# offset 120
253	evldd	27,128(3)	# offset 128
254	evldd	28,136(3)	# offset 136
255	evldd	29,144(3)	# offset 144
256	evldd	30,152(3)	# offset 152
257	evldd	31,160(3)	# offset 160
258
259	/* Add 164 to r3 to account for the amount of data we just
260	   loaded.  Note that we are not adding 168 because the next
261	   load instruction uses an offset of 4.  */
262	addi	3,3,164
263#elif __powerpc64__
264	/* In the first load, add 8 to r3 so that the subsequent floating
265	   point loades are aligned on an 8 byte boundary and the Altivec
266	   loads are aligned on a 16 byte boundary.  */
267	ldu	1,8(3)		# offset 8
268	ldu	2,8(3)		# offset 16
269	ldu	13,8(3)		# offset 24
270	ldu	14,8(3)		# offset 32
271	ldu	15,8(3)		# offset 40
272	ldu	16,8(3)		# offset 48
273	ldu	17,8(3)		# offset 56
274	ldu	18,8(3)		# offset 64
275	ldu	19,8(3)		# offset 72
276	ldu	20,8(3)		# offset 80
277	ldu	21,8(3)		# offset 88
278	ldu	22,8(3)		# offset 96
279	ldu	23,8(3)		# offset 104
280	ldu	24,8(3)		# offset 112
281	ldu	25,8(3)		# offset 120
282	ldu	26,8(3)		# offset 128
283	ldu	27,8(3)		# offset 136
284	ldu	28,8(3)		# offset 144
285	ldu	29,8(3)		# offset 152
286	ldu	30,8(3)		# offset 160
287	ldu	31,8(3)		# offset 168
288	ldu	5,8(3)		# offset 176
289	mtlr	5
290	lwzu	5,8(3)		# offset 184
291	mtcrf	255,5
292#else
293	lwz	1,0(3)		# offset 0
294	lwzu	2,4(3)		# offset 4
295	lwzu	13,4(3)		# offset 8
296	lwzu	14,4(3)		# offset 12
297	lwzu	15,4(3)		# offset 16
298	lwzu	16,4(3)		# offset 20
299	lwzu	17,4(3)		# offset 24
300	lwzu	18,4(3)		# offset 28
301	lwzu	19,4(3)		# offset 32
302	lwzu	20,4(3)		# offset 36
303	lwzu	21,4(3)		# offset 40
304	lwzu	22,4(3)		# offset 44
305	lwzu	23,4(3)		# offset 48
306	lwzu	24,4(3)		# offset 52
307	lwzu	25,4(3)		# offset 56
308	lwzu	26,4(3)		# offset 60
309	lwzu	27,4(3)		# offset 64
310	lwzu	28,4(3)		# offset 68
311	lwzu	29,4(3)		# offset 72
312	lwzu	30,4(3)		# offset 76
313	lwzu	31,4(3)		# offset 80
314#endif
315	/* If __SPE__, then add 84 to the offset shown from this point on until
316	   the end of this function.  This difference comes from the fact that
317	   we restore 22 64-bit registers instead of 22 32-bit registers above.  */
318
319#if !__powerpc64__
320	lwzu	5,4(3)		# offset 84
321	mtlr	5
322	lwzu	5,4(3)		# offset 88
323	mtcrf	255,5
324				# one word pad to get floating point aligned on 8 byte boundary
325#endif
326
327	/* Check whether we need to restore FPRs.  Checking
328	   __NO_FPRS__ on its own would be enough for GCC 4.1 and
329	   above, but older compilers only define _SOFT_FLOAT, so
330	   check both.  */
331#if !defined (__NO_FPRS__) && !defined (_SOFT_FLOAT)
332#if defined (__rtems__) && !defined (__PPC_CPU_E6500__)
333	mfmsr	5
334	andi.	5,5,0x2000
335	beq	1f
336#endif
337
338	/* If __powerpc64__, then add 96 to the offset shown from this point on until
339	   the end of this function.  This difference comes from the fact that
340	   we restore 23 64-bit registers instead of 23 32-bit registers above and
341	   we take alignement requirements of floating point and Altivec loads
342	   into account.  */
343
344	lfdu	14,8(3)         # offset 96
345	lfdu	15,8(3)         # offset 104
346	lfdu	16,8(3)         # offset 112
347	lfdu	17,8(3)         # offset 120
348	lfdu	18,8(3)         # offset 128
349	lfdu	19,8(3)         # offset 136
350	lfdu	20,8(3)         # offset 144
351	lfdu	21,8(3)         # offset 152
352	lfdu	22,8(3)         # offset 160
353	lfdu	23,8(3)         # offset 168
354	lfdu	24,8(3)         # offset 176
355	lfdu	25,8(3)         # offset 184
356	lfdu	26,8(3)         # offset 192
357	lfdu	27,8(3)         # offset 200
358	lfdu	28,8(3)         # offset 208
359	lfdu	29,8(3)         # offset 216
360	lfdu	30,8(3)         # offset 224
361	lfdu	31,8(3)         # offset 232
3621:
363#endif
364
365#ifdef __ALTIVEC__
366#if defined (__rtems__) && !defined (__PPC_CPU_E6500__)
367	mfmsr	5
368	andis.	5,5,0x200
369	beq	1f
370#endif
371	/* restore Altivec vrsave and v20-v31 registers */
372	lwzu	5,16(3)		# offset 248
373	mtspr	256,5		# vrsave
374	addi	3,3,8
375	lvx	20,0,3		# offset 256
376	addi	3,3,16
377	lvx	21,0,3		# offset 272
378	addi	3,3,16
379	lvx	22,0,3		# offset 288
380	addi	3,3,16
381	lvx	23,0,3		# offset 304
382	addi	3,3,16
383	lvx	24,0,3		# offset 320
384	addi	3,3,16
385	lvx	25,0,3		# offset 336
386	addi	3,3,16
387	lvx	26,0,3		# offset 352
388	addi	3,3,16
389	lvx	27,0,3		# offset 368
390	addi	3,3,16
391	lvx	28,0,3		# offset 384
392	addi	3,3,16
393	lvx	29,0,3		# offset 400
394	addi	3,3,16
395	lvx	30,0,3		# offset 416
396	addi	3,3,16
397	lvx	31,0,3		# offset 432
3981:
399#endif
400
401	mr.	3,4
402	bclr+	4,2
403	li	3,1
404	blr
405FUNC_END(longjmp)
406