1//
2// windowspill.S  --  register window spill routine
3//
4// $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/hal/windowspill_asm.S#1 $
5
6// Copyright (c) 1999-2010 Tensilica Inc.
7//
8// Permission is hereby granted, free of charge, to any person obtaining
9// a copy of this software and associated documentation files (the
10// "Software"), to deal in the Software without restriction, including
11// without limitation the rights to use, copy, modify, merge, publish,
12// distribute, sublicense, and/or sell copies of the Software, and to
13// permit persons to whom the Software is furnished to do so, subject to
14// the following conditions:
15//
16// The above copyright notice and this permission notice shall be included
17// in all copies or substantial portions of the Software.
18//
19// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27#include <xtensa/coreasm.h>
28
29
30//  xthal_window_spill_nw
31//
32//  Spill live register windows to the stack.
33//
34//  Required entry conditions:
35//	PS.WOE = 0
36//	PS.INTLEVEL >= XCHAL_EXCM_LEVEL
37//	a1 = valid stack pointer (note: some regs may be spilled at a1-16)
38//	a0 = return PC (usually set by call0 or callx0 when calling this function)
39//	a2,a3 undefined
40//	a4 thru a15 valid, if they are part of window(s) to be spilled
41//     (Current window a0..a15 saved if necessary.)
42//	WINDOWSTART[WINDOWBASE] = 1
43//
44//  Exit conditions:
45//	PS.WOE, PS.INTLEVEL = same as on entry
46//	WINDOWBASE = same as on entry
47//	WINDOWSTART updated to reflect spilled windows
48//		(equals 1<<WINDOWBASE if successful return)
49//	a0 = return PC
50//	a1 = same as on entry
51//	a2 = error code:
52//		0 --> successful
53//			(WINDOWSTART = 1<<WINDOWBASE)
54//		1 --> invalid WINDOWSTART (WINDOWBASE bit not set)
55//			(WINDOWSTART unchanged)
56//		2 --> invalid window size (not 4, 8 or 12 regs)
57//			(WINDOWSTART bits of successfully spilled
58//			 windows are cleared, others left intact)
59//	a3 clobbered
60//	a4,a5,a8,a9,a12,a13 = same as on entry
61//	a6,a7,a10,a11,a14,a15 clobbered if they were part of window(s)
62//		to be spilled, otherwise they are the same as on entry
63//	loop registers (LCOUNT,LBEG,LEND) are NOT affected (they were in earlier versions)
64//	SAR clobbered
65//
66//  All non-spilled register windows will be spilled.
67//  Beware that this may include a4..a15 of the current window,
68//  so generally these should not have been clobbered by the
69//  caller if it is at all possible that these registers
70//  are part of an unspilled window (it often is possible)
71//  (otherwise the spilled stack would be invalid).
72//
73//  THIS MEANS: the caller is responsible for saving a0-a15 but
74//  the caller must leave a4-a15 intact when control is transferred
75//  here.
76//
77//  It may be reentrant (but stack pointer is invalid during
78//  execution due to window rotations, so can't take interrupts
79//  and exceptions in the usual manner, so ... what does
80//  reentrancy really mean here?).
81
82
83	//  The xthal_spill_registers_into_stack_nw entry point
84	//  is kept here only for backwards compatibility.
85	//  It will be removed in the very near future.
86	.global	xthal_spill_registers_into_stack_nw
87
88	.text
89	.align 4
90	.global	xthal_window_spill_nw
91xthal_window_spill_nw:
92xthal_spill_registers_into_stack_nw:	// BACKWARD COMPATIBILITY ONLY - see above
93
94#if ! XCHAL_HAVE_WINDOWED
95	//  Nothing to do -- window option was not selected.
96	movi	a2, 0		// always report success
97	ret
98#else /* XCHAL_HAVE_WINDOWED */
99#define WSBITS	(XCHAL_NUM_AREGS / 4)		/* width of WINDOWSTART register in bits */
100#define WBBITS	(XCHAL_NUM_AREGS_LOG2 - 2)	/* width of WINDOWBASE register in bits */
101	/*
102	 * Rearrange (rotate) window start bits relative to the current
103	 * window (WINDOWBASE).  WINDOWSTART currently looks like this:
104	 *
105	 *          a15-a0
106	 * NAREG-1   |  |    0
107	 *    |      vvvv    |
108	 *    xxxxxxxxxx1yyyyy
109	 *              ^
110	 *              |
111	 *              WINDOWBASE
112	 *
113	 * The start bit pointed to by WINDOWBASE must be set
114	 * (we return an error if it isn't), as it corresponds
115	 * to the start of the current window (shown as a0-a15).
116	 *
117	 * We want the window start bits rotated to look like this:
118	 *              1yyyyyxxxxxxxxxx
119	 *
120	 * Note that there is one start bit for every four registers;
121	 * and the total number of registers (NAREG) can be 32 or 64;
122	 * so the number of start bits in WINDOWSTART is NAREG/4,
123	 * and the size of WINDOWSTART can be 8 or 16.
124	 */
125
126	rsr.windowbase	a2
127	addi	a2, a2, 1
128	ssr	a2		// sar = WINDOWBASE + 1
129	rsr.windowstart	a3
130	srl	a2, a3		// a2 is 0... | 000000xxxxxxxxxx = WINDOWSTART >> sar
131	sll	a3, a3		// a3 is 1yyyyy0000000000 | 0... = WINDOWSTART << (32 - sar)
132	bgez	a3, .Linvalid_ws	// verify that msbit is indeed set
133
134	srli	a3, a3, 32-WSBITS	// a3 is 0... | 1yyyyy0000000000 = a3 >> (32-NAREG/4)
135	or	a2, a2, a3		// a2 is 0... | 1yyyyyxxxxxxxxxx
136
137	/*
138	 *	FIND THE FIRST ONE
139	 *
140	 *  Now we have (in a2) the window start bits rotated in order
141	 *  from oldest (closest to lsbit) to current (msbit set).
142	 *  Each start bit (that is set), other than the current one,
143	 *  corresponds to a window frame to spill.
144	 *
145	 *  Now find the first start bit, ie. the first frame to spill,
146	 *  by looking for the first bit set in a2 (from lsbit side).
147	 */
148
149#if XCHAL_HAVE_NSA
150	neg     a3, a2		// keep only the least-significant bit set of a2 ...
151	and     a3, a3, a2	// ... in a3
152	nsau    a3, a3		// get index of that bit, numbered from msbit (32 if absent)
153	ssl	a3		// set sar = 32 - a3 = bit index numbered from lsbit + 1
154#else /* XCHAL_HAVE_NSA */
155	wsr.windowstart	a2	// temporarily save rotated start bits
156				// (we can use WINDOWSTART because WOE=0)
157
158	//  NOTE:  this could be optimized a bit, by explicit coding rather than the macro.
159	find_ls_one	a3, a2	// set a3 to index of lsmost bit set in a2 (a2 clobbered)
160
161	addi	a2, a3, 1	// index+1
162	ssr	a2		// set sar = index + 1
163	rsr.windowstart	a2	// restore a2 (rotated start bits)
164#endif /* XCHAL_HAVE_NSA */
165	srl	a2, a2		// right-justify the rotated start bits (dropping lsbit set)
166	wsr.windowstart	a2	// save rotated + justified window start bits,
167				//  because a2 will disappear when modifying WINDOWBASE
168				// again, we can use WINDOWSTART because WOE=0
169
170	/*
171	 *  Rotate WindowBase so that a0 of the next window to spill is in a4
172	 *  (ie. leaving us with a2 and a3 to play with, because a0 and a1
173	 *  may be those of the original window which we must preserve).
174	 */
175	rsr.windowbase	a2
176#if XCHAL_HAVE_NSA
177	addi	a2, a2, 31
178	sub	a3, a2, a3	// a3 = WINDOWBASE + index = WINDOWBASE + (31 - msbit_index)
179#else /* XCHAL_HAVE_NSA */
180	add	a3, a2, a3	// a3 = WINDOWBASE + index
181#endif /* XCHAL_HAVE_NSA */
182	wsr.windowbase	a3	// effectively do:  rotw index
183	rsync			// wait for write to WINDOWBASE to complete
184	//  Now our registers have changed!
185
186	rsr.windowstart	a2	// restore a2 (rotated + justified window start bits)
187
188	/*
189	 *  We are now ready to start the window spill loop.
190	 *  Relative to the above, a2 and WINDOWBASE are now as follows:
191	 *
192	 *        1yyyyyxxxxxxxxxx = rotated start bits as shown above
193	 *        1yyyyyxxxx100000 = actual rotated start bits (example)
194	 *  0000001yyyyyxxxx ^     = a2 = rotated + justified start bits
195	 *        ^      xxx1^     = window being spilled
196	 *        ^          ^
197	 *        |          |
198	 *    original    current
199	 *   WINDOWBASE  WINDOWBASE
200	 *
201	 *  The first window to spill (save) starts at what is now a4.
202	 *  The spill loop maintains the adjusted start bits in a2,
203	 *  shifting them right as each window is spilled.
204	 */
205
206.Lspill_loop:
207	//  Top of save loop.
208	//  Find the size of this call and branch to the appropriate save routine.
209
210	beqz	a2, .Ldone		// if no start bit remaining, we're done
211	bbsi.l	a2, 0, .Lspill4		// if next start bit is set, it's a call4
212	bbsi.l	a2, 1, .Lspill8		// if 2nd next bit set, it's a call8
213	bbsi.l	a2, 2, .Lspill12	// if 3rd next bit set, it's a call12
214	j	.Linvalid_window	// else it's an invalid window!
215
216
217
218	// SAVE A CALL4
219.Lspill4:
220	addi	a3, a9, -16	// a3 gets call[i+1]'s sp - 16
221	s32i	a4, a3, 0	// store call[i]'s a0
222	s32i	a5, a3, 4	// store call[i]'s a1
223	s32i	a6, a3, 8	// store call[i]'s a2
224	s32i	a7, a3, 12	// store call[i]'s a3
225
226	srli	a6, a2, 1	// move and shift the start bits
227	rotw	1		// rotate the window
228
229	j	.Lspill_loop
230
231	// SAVE A CALL8
232.Lspill8:
233	addi	a3, a13, -16	// a0 gets call[i+1]'s sp - 16
234	s32i	a4, a3, 0	// store call[i]'s a0
235	s32i	a5, a3, 4	// store call[i]'s a1
236	s32i	a6, a3, 8	// store call[i]'s a2
237	s32i	a7, a3, 12	// store call[i]'s a3
238
239	addi	a3, a5, -12	// call[i-1]'s sp address
240	l32i	a3, a3, 0	// a3 is call[i-1]'s sp
241			// (load slot)
242	addi	a3, a3, -32	// a3 points to our spill area
243
244	s32i	a8, a3, 0	// store call[i]'s a4
245	s32i	a9, a3, 4	// store call[i]'s a5
246	s32i	a10, a3, 8	// store call[i]'s a6
247	s32i	a11, a3, 12	// store call[i]'s a7
248
249	srli	a10, a2, 2	// move and shift the start bits
250	rotw	2		// rotate the window
251
252	j	.Lspill_loop
253
254	// SAVE A CALL12
255.Lspill12:
256	rotw	1		// rotate to see call[i+1]'s sp
257
258	addi	a13, a13, -16	// set to the reg save area
259	s32i	a0, a13, 0	// store call[i]'s a0
260	s32i	a1, a13, 4	// store call[i]'s a1
261	s32i	a2, a13, 8	// store call[i]'s a2
262	s32i	a3, a13, 12	// store call[i]'s a3
263
264	addi	a3, a1, -12	// call[i-1]'s sp address
265	l32i	a3, a3, 0	// a3 has call[i-1]'s sp
266	addi	a13, a13, 16	// restore call[i+1]'s sp (here to fill load slot)
267	addi	a3, a3, -48	// a3 points to our save area
268
269	s32i	a4, a3, 0	// store call[i]'s a4
270	s32i	a5, a3, 4	// store call[i]'s a5
271	s32i	a6, a3, 8	// store call[i]'s a6
272	s32i	a7, a3, 12	// store call[i]'s a7
273	s32i	a8, a3, 16	// store call[i]'s a4
274	s32i	a9, a3, 20	// store call[i]'s a5
275	s32i	a10, a3, 24	// store call[i]'s a6
276	s32i	a11, a3, 28	// store call[i]'s a7
277
278	rotw	-1		// rotate to see start bits (a2)
279	srli	a14, a2, 3	// move and shift the start bits
280	rotw	3		// rotate to next window
281
282	j	.Lspill_loop
283
284
285
286.Ldone:
287	rotw	1		// back to the original window
288	rsr.windowbase	a2	// get (original) window base
289	ssl	a2		// setup for shift left by WINDOWBASE
290	movi	a2, 1
291	sll	a2, a2		// compute new WINDOWSTART = 1<<WINDOWBASE
292	wsr.windowstart	a2	// and apply it
293	rsync
294	movi	a2, 0		// done!
295	ret
296	//jx	a0
297
298
299	//  Invalid WINDOWSTART register.
300	//
301.Linvalid_ws:
302	movi	a2, 1		// indicate invalid WINDOWSTART
303	ret			// return from subroutine
304
305
306	//  Invalid window size!
307	//  The three bits following the start bit are all clear, so
308	//  we have an invalid window state (can't determine a window size).
309	//
310	//  So we exit with an error, but to do that we must first restore
311	//  the original WINDOWBASE.  We also compute a sensible
312	//  WINDOWSTART that has the start bits of spilled windows
313	//  cleared, but all other start bits intact, so someone debugging
314	//  the failure can look at WINDOWSTART to see which window
315	//  failed to spill.
316	//
317.Linvalid_window:
318	slli	a2, a2, 1	// space for missing start bit
319	addi	a2, a2, 1	// add missing start bit
320	rsr.windowbase	a3	// get current WINDOWBASE
321	bbsi.l	a2, WSBITS-1, 2f	// branch if current WINDOWBASE==original
3221:	addi	a3, a3, -1	// decrement towards original WINDOWBASE
323	slli	a2, a2, 1	// shift towards original WINDOWSTART alignment
324	bbci.l	a2, WSBITS-1, 1b	// repeat until ms start bit set
325	extui	a3, a3, 0, WBBITS	// mask out upper base bits, in case of carry-over
3262:	//  Here, a3 = original WINDOWBASE;
327	//  and msbit of start bits in a2 is set, and no other bits above it.
328	//  Now rotate a2 to become the correct WINDOWSTART.
329	ssl	a3		// set shift left ... (sar = 32 - orig WB)
330	slli	a3, a2, 32-WSBITS	// left-justify start bits
331	src	a2, a2, a3	// rotate left by original WINDOWBASE
332	extui	a2, a2, 0, WSBITS	// keep only significant start bits
333	wsr.windowstart	a2	// we've cleared only start bits of spilled windows
334	rsr.sar	a3		// retrieve 32 - original WINDOWBASE
335	movi	a2, 32
336	sub	a3, a2, a3	// restore original WINDOWBASE
337	wsr.windowbase	a3	// back to original WINDOWBASE
338	rsync
339
340	movi	a2, 2		// indicate invalid window size
341	ret
342
343#endif /* XCHAL_HAVE_WINDOWED */
344
345	.size	xthal_window_spill_nw, . - xthal_window_spill_nw
346
347
348//  void  xthal_window_spill (void);
349//
350//  Spill live register windows to the stack.
351//
352//  This will spill all register windows except this
353//  function's window, and possibly that of its caller.
354//  (Currently, the caller's window is spilled and reloaded
355//   when this function returns.  This may change with
356//   future optimisations.)
357//
358//  Another, simpler way to implement this might be
359//  to use an appropriate sequence of call/entry/retw
360//  instructions to force overflow of any live windows.
361//
362//  Assumes that PS.INTLEVEL=0 and PS.WOE=1 on entry/exit.
363//
364	.text
365	.align 4
366	.global	xthal_window_spill
367	.type	xthal_window_spill,@function
368xthal_window_spill:
369	abi_entry
370#if XCHAL_HAVE_WINDOWED
371	movi	a6, ~(PS_WOE_MASK|PS_INTLEVEL_MASK)	// (using a6 ensures any window using this a4..a7 is spilled)
372	rsr.ps	a5
373	mov	a4, a0			 // save a0
374	and	a2, a5, a6		 // clear WOE, INTLEVEL
375	addi	a2, a2, XCHAL_EXCM_LEVEL // set INTLEVEL = XCHAL_EXCM_LEVEL
376	wsr.ps	a2			 // apply to PS
377	rsync
378	call0	xthal_window_spill_nw
379	mov	a0, a4		// restore a0
380	wsr.ps	a5		// restore PS
381	rsync
382#endif /* XCHAL_HAVE_WINDOWED */
383	abi_return
384
385	.size	xthal_window_spill, . - xthal_window_spill
386
387