1 /*
2  * xtensa/coreasm.h -- assembler-specific definitions that depend on CORE configuration
3  *
4  *  Source for configuration-independent binaries (which link in a
5  *  configuration-specific HAL library) must NEVER include this file.
6  *  It is perfectly normal, however, for the HAL itself to include this file.
7  *
8  *  This file must NOT include xtensa/config/system.h.  Any assembler
9  *  header file that depends on system information should likely go
10  *  in a new systemasm.h (or sysasm.h) header file.
11  *
12  *  NOTE: macro beqi32 is NOT configuration-dependent, and is placed
13  *        here until we have a proper configuration-independent header file.
14  */
15 
16 /* $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/include/xtensa/coreasm.h#1 $ */
17 
18 /*
19  * Copyright (c) 2000-2014 Tensilica Inc.
20  *
21  * Permission is hereby granted, free of charge, to any person obtaining
22  * a copy of this software and associated documentation files (the
23  * "Software"), to deal in the Software without restriction, including
24  * without limitation the rights to use, copy, modify, merge, publish,
25  * distribute, sublicense, and/or sell copies of the Software, and to
26  * permit persons to whom the Software is furnished to do so, subject to
27  * the following conditions:
28  *
29  * The above copyright notice and this permission notice shall be included
30  * in all copies or substantial portions of the Software.
31  *
32  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
34  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
35  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
36  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
37  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
38  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39  */
40 
41 #ifndef XTENSA_COREASM_H
42 #define XTENSA_COREASM_H
43 
44 /*
45  *  Tell header files this is assembly source, so they can avoid non-assembler
46  *  definitions (eg. C types etc):
47  */
48 #ifndef _ASMLANGUAGE	/* conditionalize to avoid cpp warnings (3rd parties might use same macro) */
49 #define _ASMLANGUAGE
50 #endif
51 
52 #include <xtensa/config/core.h>
53 #include <xtensa/config/specreg.h>
54 #include <xtensa/config/system.h>
55 
56 /*
57  *  Assembly-language specific definitions (assembly macros, etc.).
58  */
59 
60 /*----------------------------------------------------------------------
61  *  find_ms_setbit
62  *
63  *  This macro finds the most significant bit that is set in <as>
64  *  and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
65  *  The index counts starting at zero for the lsbit, so the return
66  *  value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
67  *
68  *  Parameters:
69  *	<ad>	destination address register (any register)
70  *	<as>	source address register
71  *	<at>	temporary address register (must be different than <as>)
72  *	<base>	constant value added to result (usually 0 or 1)
73  *  On entry:
74  *	<ad> = undefined if different than <as>
75  *	<as> = value whose most significant set bit is to be found
76  *	<at> = undefined
77  *	no other registers are used by this macro.
78  *  On exit:
79  *	<ad> = <base> + index of msbit set in original <as>,
80  *	     = <base> - 1 if original <as> was zero.
81  *	<as> clobbered (if not <ad>)
82  *	<at> clobbered (if not <ad>)
83  *  Example:
84  *	find_ms_setbit a0, a4, a0, 0		-- return in a0 index of msbit set in a4
85  */
86 
87 	.macro	find_ms_setbit ad, as, at, base
88 #if XCHAL_HAVE_NSA
89 	movi	\at, 31+\base
90 	nsau	\as, \as	// get index of \as, numbered from msbit (32 if absent)
91 	sub	\ad, \at, \as	// get numbering from lsbit (0..31, -1 if absent)
92 #else /* XCHAL_HAVE_NSA */
93 	movi	\at, \base	// start with result of 0 (point to lsbit of 32)
94 
95 	beqz	\as, 2f		// special case for zero argument: return -1
96 	bltui	\as, 0x10000, 1f	// is it one of the 16 lsbits? (if so, check lower 16 bits)
97 	addi	\at, \at, 16	// no, increment result to upper 16 bits (of 32)
98 	//srli	\as, \as, 16	// check upper half (shift right 16 bits)
99 	extui	\as, \as, 16, 16	// check upper half (shift right 16 bits)
100 1:	bltui	\as, 0x100, 1f	// is it one of the 8 lsbits? (if so, check lower 8 bits)
101 	addi	\at, \at, 8	// no, increment result to upper 8 bits (of 16)
102 	srli	\as, \as, 8	// shift right to check upper 8 bits
103 1:	bltui	\as, 0x10, 1f	// is it one of the 4 lsbits? (if so, check lower 4 bits)
104 	addi	\at, \at, 4	// no, increment result to upper 4 bits (of 8)
105 	srli	\as, \as, 4	// shift right 4 bits to check upper half
106 1:	bltui	\as, 0x4, 1f	// is it one of the 2 lsbits? (if so, check lower 2 bits)
107 	addi	\at, \at, 2	// no, increment result to upper 2 bits (of 4)
108 	srli	\as, \as, 2	// shift right 2 bits to check upper half
109 1:	bltui	\as, 0x2, 1f	// is it the lsbit?
110 	addi	\at, \at, 2	// no, increment result to upper bit (of 2)
111 2:	addi	\at, \at, -1	// (from just above: add 1;  from beqz: return -1)
112 	//srli	\as, \as, 1
113 1:				// done! \at contains index of msbit set (or -1 if none set)
114 	.if	0x\ad - 0x\at	// destination different than \at ? (works because regs are a0-a15)
115 	mov	\ad, \at	// then move result to \ad
116 	.endif
117 #endif /* XCHAL_HAVE_NSA */
118 	.endm	// find_ms_setbit
119 
120 /*----------------------------------------------------------------------
121  *  find_ls_setbit
122  *
123  *  This macro finds the least significant bit that is set in <as>,
124  *  and return its index in <ad>.
125  *  Usage is the same as for the find_ms_setbit macro.
126  *  Example:
127  *	find_ls_setbit a0, a4, a0, 0	-- return in a0 index of lsbit set in a4
128  */
129 
130 	.macro	find_ls_setbit ad, as, at, base
131 	neg	\at, \as	// keep only the least-significant bit that is set...
132 	and	\as, \at, \as	// ... in \as
133 	find_ms_setbit	\ad, \as, \at, \base
134 	.endm	// find_ls_setbit
135 
136 /*----------------------------------------------------------------------
137  *  find_ls_one
138  *
139  *  Same as find_ls_setbit with base zero.
140  *  Source (as) and destination (ad) registers must be different.
141  *  Provided for backward compatibility.
142  */
143 
144 	.macro	find_ls_one ad, as
145 	find_ls_setbit	\ad, \as, \ad, 0
146 	.endm	// find_ls_one
147 
148 /*----------------------------------------------------------------------
149  *  floop, floopnez, floopgtz, floopend
150  *
151  *  These macros are used for fast inner loops that
152  *  work whether or not the Loops options is configured.
153  *  If the Loops option is configured, they simply use
154  *  the zero-overhead LOOP instructions; otherwise
155  *  they use explicit decrement and branch instructions.
156  *
157  *  They are used in pairs, with floop, floopnez or floopgtz
158  *  at the beginning of the loop, and floopend at the end.
159  *
160  *  Each pair of loop macro calls must be given the loop count
161  *  address register and a unique label for that loop.
162  *
163  *  Example:
164  *
165  *	movi	 a3, 16     // loop 16 times
166  *	floop    a3, myloop1
167  *	:
168  *	bnez     a7, end1	// exit loop if a7 != 0
169  *	:
170  *	floopend a3, myloop1
171  *  end1:
172  *
173  *  Like the LOOP instructions, these macros cannot be
174  *  nested, must include at least one instruction,
175  *  cannot call functions inside the loop, etc.
176  *  The loop can be exited by jumping to the instruction
177  *  following floopend (or elsewhere outside the loop),
178  *  or continued by jumping to a NOP instruction placed
179  *  immediately before floopend.
180  *
181  *  Unlike LOOP instructions, the register passed to floop*
182  *  cannot be used inside the loop, because it is used as
183  *  the loop counter if the Loops option is not configured.
184  *  And its value is undefined after exiting the loop.
185  *  And because the loop counter register is active inside
186  *  the loop, you can't easily use this construct to loop
187  *  across a register file using ROTW as you might with LOOP
188  *  instructions, unless you copy the loop register along.
189  */
190 
191 	/*  Named label version of the macros:  */
192 
193 	.macro	floop		ar, endlabel
194 	floop_		\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
195 	.endm
196 
197 	.macro	floopnez	ar, endlabel
198 	floopnez_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
199 	.endm
200 
201 	.macro	floopgtz	ar, endlabel
202 	floopgtz_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
203 	.endm
204 
205 	.macro	floopend	ar, endlabel
206 	floopend_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
207 	.endm
208 
209 	/*  Numbered local label version of the macros:  */
210 #if 0 /*UNTESTED*/
211 	.macro	floop89		ar
212 	floop_		\ar, 8, 9f
213 	.endm
214 
215 	.macro	floopnez89	ar
216 	floopnez_	\ar, 8, 9f
217 	.endm
218 
219 	.macro	floopgtz89	ar
220 	floopgtz_	\ar, 8, 9f
221 	.endm
222 
223 	.macro	floopend89	ar
224 	floopend_	\ar, 8b, 9
225 	.endm
226 #endif /*0*/
227 
228 	/*  Underlying version of the macros:  */
229 
230 	.macro	floop_	ar, startlabel, endlabelref
231 	.ifdef	_infloop_
232 	.if	_infloop_
233 	.err	// Error: floop cannot be nested
234 	.endif
235 	.endif
236 	.set	_infloop_, 1
237 #if XCHAL_HAVE_LOOPS
238 	loop	\ar, \endlabelref
239 #else /* XCHAL_HAVE_LOOPS */
240 \startlabel:
241 	addi	\ar, \ar, -1
242 #endif /* XCHAL_HAVE_LOOPS */
243 	.endm	// floop_
244 
245 	.macro	floopnez_	ar, startlabel, endlabelref
246 	.ifdef	_infloop_
247 	.if	_infloop_
248 	.err	// Error: floopnez cannot be nested
249 	.endif
250 	.endif
251 	.set	_infloop_, 1
252 #if XCHAL_HAVE_LOOPS
253 	loopnez	\ar, \endlabelref
254 #else /* XCHAL_HAVE_LOOPS */
255 	beqz	\ar, \endlabelref
256 \startlabel:
257 	addi	\ar, \ar, -1
258 #endif /* XCHAL_HAVE_LOOPS */
259 	.endm	// floopnez_
260 
261 	.macro	floopgtz_	ar, startlabel, endlabelref
262 	.ifdef	_infloop_
263 	.if	_infloop_
264 	.err	// Error: floopgtz cannot be nested
265 	.endif
266 	.endif
267 	.set	_infloop_, 1
268 #if XCHAL_HAVE_LOOPS
269 	loopgtz	\ar, \endlabelref
270 #else /* XCHAL_HAVE_LOOPS */
271 	bltz	\ar, \endlabelref
272 	beqz	\ar, \endlabelref
273 \startlabel:
274 	addi	\ar, \ar, -1
275 #endif /* XCHAL_HAVE_LOOPS */
276 	.endm	// floopgtz_
277 
278 
279 	.macro	floopend_	ar, startlabelref, endlabel
280 	.ifndef	_infloop_
281 	.err	// Error: floopend without matching floopXXX
282 	.endif
283 	.ifeq	_infloop_
284 	.err	// Error: floopend without matching floopXXX
285 	.endif
286 	.set	_infloop_, 0
287 #if ! XCHAL_HAVE_LOOPS
288 	bnez	\ar, \startlabelref
289 #endif /* XCHAL_HAVE_LOOPS */
290 \endlabel:
291 	.endm	// floopend_
292 
293 /*----------------------------------------------------------------------
294  *  crsil  --  conditional RSIL (read/set interrupt level)
295  *
296  *  Executes the RSIL instruction if it exists, else just reads PS.
297  *  The RSIL instruction does not exist in the new exception architecture
298  *  if the interrupt option is not selected.
299  */
300 
301 	.macro	crsil	ar, newlevel
302 #if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
303 	rsil	\ar, \newlevel
304 #else
305 	rsr.ps	\ar
306 #endif
307 	.endm	// crsil
308 
309 /*----------------------------------------------------------------------
310  *  safe_movi_a0  --  move constant into a0 when L32R is not safe
311  *
312  *  This macro is typically used by interrupt/exception handlers.
313  *  Loads a 32-bit constant in a0, without using any other register,
314  *  and without corrupting the LITBASE register, even when the
315  *  value of the LITBASE register is unknown (eg. when application
316  *  code and interrupt/exception handling code are built independently,
317  *  and thus with independent values of the LITBASE register;
318  *  debug monitors are one example of this).
319  *
320  *  Worst-case size of resulting code:  17 bytes.
321  */
322 
323 	.macro	safe_movi_a0	constant
324 #if XCHAL_HAVE_ABSOLUTE_LITERALS
325 	/*  Contort a PC-relative literal load even though we may be in litbase-relative mode: */
326 	j	1f
327 	.begin	no-transform			// ensure what follows is assembled exactly as-is
328 	.align	4				// ensure constant and call0 target ...
329 	.byte	0				// ... are 4-byte aligned (call0 instruction is 3 bytes long)
330 1:	call0	2f				// read PC (that follows call0) in a0
331 	.long	\constant			// 32-bit constant to load into a0
332 2:
333 	.end	no-transform
334 	l32i	a0, a0, 0			// load constant
335 #else
336 	movi	a0, \constant			// no LITBASE, can assume PC-relative L32R
337 #endif
338 	.endm
339 
340 
341 
342 
343 /*----------------------------------------------------------------------
344  *  window_spill{4,8,12}
345  *
346  *  These macros spill callers' register windows to the stack.
347  *  They work for both privileged and non-privileged tasks.
348  *  Must be called from a windowed ABI context, eg. within
349  *  a windowed ABI function (ie. valid stack frame, window
350  *  exceptions enabled, not in exception mode, etc).
351  *
352  *  This macro requires a single invocation of the window_spill_common
353  *  macro in the same assembly unit and section.
354  *
355  *  Note that using window_spill{4,8,12} macros is more efficient
356  *  than calling a function implemented using window_spill_function,
357  *  because the latter needs extra code to figure out the size of
358  *  the call to the spilling function.
359  *
360  *  Example usage:
361  *
362  *		.text
363  *		.align	4
364  *		.global	some_function
365  *		.type	some_function,@function
366  *	some_function:
367  *		entry	a1, 16
368  *		:
369  *		:
370  *
371  *		window_spill4	// Spill windows of some_function's callers; preserves a0..a3 only;
372  *				// to use window_spill{8,12} in this example function we'd have
373  *				// to increase space allocated by the entry instruction, because
374  *				// 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
375  *				// for call8/window_spill8 or call12/window_spill12 respectively.
376  *
377  *		:
378  *
379  *		retw
380  *
381  *		window_spill_common	// instantiates code used by window_spill4
382  *
383  *
384  *  On entry:
385  *	none (if window_spill4)
386  *	stack frame has enough space allocated for call8 (if window_spill8)
387  *	stack frame has enough space allocated for call12 (if window_spill12)
388  *  On exit:
389  *	 a4..a15 clobbered (if window_spill4)
390  *	 a8..a15 clobbered (if window_spill8)
391  *	a12..a15 clobbered (if window_spill12)
392  *	no caller windows are in live registers
393  */
394 
395 	.macro	window_spill4
396 #if XCHAL_HAVE_WINDOWED
397 # if XCHAL_NUM_AREGS == 16
398 	movi	a15, 0			// for 16-register files, no need to call to reach the end
399 # elif XCHAL_NUM_AREGS == 32
400 	call4	.L__wdwspill_assist28	// call deep enough to clear out any live callers
401 # elif XCHAL_NUM_AREGS == 64
402 	call4	.L__wdwspill_assist60	// call deep enough to clear out any live callers
403 # endif
404 #endif
405 	.endm	// window_spill4
406 
407 	.macro	window_spill8
408 #if XCHAL_HAVE_WINDOWED
409 # if XCHAL_NUM_AREGS == 16
410 	movi	a15, 0			// for 16-register files, no need to call to reach the end
411 # elif XCHAL_NUM_AREGS == 32
412 	call8	.L__wdwspill_assist24	// call deep enough to clear out any live callers
413 # elif XCHAL_NUM_AREGS == 64
414 	call8	.L__wdwspill_assist56	// call deep enough to clear out any live callers
415 # endif
416 #endif
417 	.endm	// window_spill8
418 
419 	.macro	window_spill12
420 #if XCHAL_HAVE_WINDOWED
421 # if XCHAL_NUM_AREGS == 16
422 	movi	a15, 0			// for 16-register files, no need to call to reach the end
423 # elif XCHAL_NUM_AREGS == 32
424 	call12	.L__wdwspill_assist20	// call deep enough to clear out any live callers
425 # elif XCHAL_NUM_AREGS == 64
426 	call12	.L__wdwspill_assist52	// call deep enough to clear out any live callers
427 # endif
428 #endif
429 	.endm	// window_spill12
430 
431 
432 /*----------------------------------------------------------------------
433  *  window_spill_function
434  *
435  *  This macro outputs a function that will spill its caller's callers'
436  *  register windows to the stack.  Eg. it could be used to implement
437  *  a version of xthal_window_spill() that works in non-privileged tasks.
438  *  This works for both privileged and non-privileged tasks.
439  *
440  *  Typical usage:
441  *
442  *		.text
443  *		.align	4
444  *		.global	my_spill_function
445  *		.type	my_spill_function,@function
446  *	my_spill_function:
447  *		window_spill_function
448  *
449  *  On entry to resulting function:
450  *	none
451  *  On exit from resulting function:
452  *	none (no caller windows are in live registers)
453  */
454 
455 	.macro	window_spill_function
456 #if XCHAL_HAVE_WINDOWED
457 #  if XCHAL_NUM_AREGS == 32
458 	entry	sp, 48
459 	bbci.l	a0, 31, 1f		// branch if called with call4
460 	bbsi.l	a0, 30, 2f		// branch if called with call12
461 	call8	.L__wdwspill_assist16	// called with call8, only need another 8
462 	retw
463 1:	call12	.L__wdwspill_assist16	// called with call4, only need another 12
464 	retw
465 2:	call4	.L__wdwspill_assist16	// called with call12, only need another 4
466 	retw
467 #  elif XCHAL_NUM_AREGS == 64
468 	entry	sp, 48
469 	bbci.l	a0, 31, 1f		// branch if called with call4
470 	bbsi.l	a0, 30, 2f		// branch if called with call12
471 	call4	.L__wdwspill_assist52	// called with call8, only need a call4
472 	retw
473 1:	call8	.L__wdwspill_assist52	// called with call4, only need a call8
474 	retw
475 2:	call12	.L__wdwspill_assist40	// called with call12, can skip a call12
476 	retw
477 #  elif XCHAL_NUM_AREGS == 16
478 	entry	sp, 16
479 	bbci.l	a0, 31, 1f	// branch if called with call4
480 	bbsi.l	a0, 30, 2f	// branch if called with call12
481 	movi	a7, 0		// called with call8
482 	retw
483 1:	movi	a11, 0		// called with call4
484 2:	retw			// if called with call12, everything already spilled
485 
486 //	movi	a15, 0		// trick to spill all but the direct caller
487 //	j	1f
488 //	//  The entry instruction is magical in the assembler (gets auto-aligned)
489 //	//  so we have to jump to it to avoid falling through the padding.
490 //	//  We need entry/retw to know where to return.
491 //1:	entry	sp, 16
492 //	retw
493 #  else
494 #   error "unrecognized address register file size"
495 #  endif
496 
497 #endif /* XCHAL_HAVE_WINDOWED */
498 	window_spill_common
499 	.endm	// window_spill_function
500 
501 /*----------------------------------------------------------------------
502  *  window_spill_common
503  *
504  *  Common code used by any number of invocations of the window_spill##
505  *  and window_spill_function macros.
506  *
507  *  Must be instantiated exactly once within a given assembly unit,
508  *  within call/j range of and same section as window_spill##
509  *  macro invocations for that assembly unit.
510  *  (Is automatically instantiated by the window_spill_function macro.)
511  */
512 
513 	.macro	window_spill_common
514 #if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
515 	.ifndef	.L__wdwspill_defined
516 #  if XCHAL_NUM_AREGS >= 64
517 .L__wdwspill_assist60:
518 	entry	sp, 32
519 	call8	.L__wdwspill_assist52
520 	retw
521 .L__wdwspill_assist56:
522 	entry	sp, 16
523 	call4	.L__wdwspill_assist52
524 	retw
525 .L__wdwspill_assist52:
526 	entry	sp, 48
527 	call12	.L__wdwspill_assist40
528 	retw
529 .L__wdwspill_assist40:
530 	entry	sp, 48
531 	call12	.L__wdwspill_assist28
532 	retw
533 #  endif
534 .L__wdwspill_assist28:
535 	entry	sp, 48
536 	call12	.L__wdwspill_assist16
537 	retw
538 .L__wdwspill_assist24:
539 	entry	sp, 32
540 	call8	.L__wdwspill_assist16
541 	retw
542 .L__wdwspill_assist20:
543 	entry	sp, 16
544 	call4	.L__wdwspill_assist16
545 	retw
546 .L__wdwspill_assist16:
547 	entry	sp, 16
548 	movi	a15, 0
549 	retw
550 	.set	.L__wdwspill_defined, 1
551 	.endif
552 #endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
553 	.endm	// window_spill_common
554 
555 /*----------------------------------------------------------------------
556  *  beqi32
557  *
558  *  macro implements version of beqi for arbitrary 32-bit immediate value
559  *
560  *     beqi32 ax, ay, imm32, label
561  *
562  *  Compares value in register ax with imm32 value and jumps to label if
563  *  equal. Clobbers register ay if needed
564  *
565  */
566    .macro beqi32	ax, ay, imm, label
567     .ifeq ((\imm-1) & ~7)	// 1..8 ?
568 		beqi	\ax, \imm, \label
569     .else
570       .ifeq (\imm+1)		// -1 ?
571 		beqi	\ax, \imm, \label
572       .else
573         .ifeq (\imm)		// 0 ?
574 		beqz	\ax, \label
575         .else
576 		//  We could also handle immediates 10,12,16,32,64,128,256
577 		//  but it would be a long macro...
578 		movi	\ay, \imm
579 		beq	\ax, \ay, \label
580         .endif
581       .endif
582     .endif
583    .endm // beqi32
584 
585 /*----------------------------------------------------------------------
586  *  isync_retw_nop
587  *
588  *  This macro must be invoked immediately after ISYNC if ISYNC
589  *  would otherwise be immediately followed by RETW (or other instruction
590  *  modifying WindowBase or WindowStart), in a context where
591  *  kernel vector mode may be selected, and level-one interrupts
592  *  and window overflows may be enabled, on an XEA1 configuration.
593  *
594  *  On hardware with erratum "XEA1KWIN" (see <xtensa/core.h> for details),
595  *  XEA1 code must have at least one instruction between ISYNC and RETW if
596  *  run in kernel vector mode with interrupts and window overflows enabled.
597  */
598 	.macro	isync_retw_nop
599 #if XCHAL_MAYHAVE_ERRATUM_XEA1KWIN
600 	nop
601 #endif
602 	.endm
603 
604 /*----------------------------------------------------------------------
605  *  isync_return_nop
606  *
607  *  This macro should be used instead of isync_retw_nop in code that is
608  *  intended to run on both the windowed and call0 ABIs
609  */
610         .macro  isync_return_nop
611 #ifdef __XTENSA_WINDOWED_ABI__
612         isync_retw_nop
613 #endif
614         .endm
615 
616 /*----------------------------------------------------------------------
617  *  isync_erratum453
618  *
619  *  This macro must be invoked at certain points in the code,
620  *  such as in exception and interrupt vectors in particular,
621  *  to work around erratum 453.
622  */
623 	.macro	isync_erratum453
624 #if XCHAL_ERRATUM_453
625 	isync
626 #endif
627 	.endm
628 
629 
630 /*----------------------------------------------------------------------
631  *  readsr
632  *
633  *  wrapper for 'rsr' that constructs register names that involve levels
634  *  e.g. EPCn etc. Use like so:
635  *      readsr epc XCHAL_DEBUGLEVEL a2
636  */
637 	.macro	readsr  reg suf ar
638 	rsr.\reg\suf	\ar
639 	.endm
640 
641 /*----------------------------------------------------------------------
642  *  writesr
643  *
644  *  wrapper for 'wsr' that constructs register names that involve levels
645  *  e.g. EPCn etc. Use like so:
646  *      writesr epc XCHAL_DEBUGLEVEL a2
647  */
648 	.macro	writesr  reg suf ar
649 	wsr.\reg\suf	\ar
650 	.endm
651 
652 /*----------------------------------------------------------------------
653  *  xchgsr
654  *
655  *  wrapper for 'xsr' that constructs register names that involve levels
656  *  e.g. EPCn etc. Use like so:
657  *      xchgsr epc XCHAL_DEBUGLEVEL a2
658  */
659 	.macro	xchgsr  reg suf ar
660 	xsr.\reg\suf	\ar
661 	.endm
662 
663 /*----------------------------------------------------------------------
664  * INDEX_SR
665  *
666  * indexing wrapper for rsr/wsr/xsr that constructs register names from
667  * the provided base name and the current index. Use like so:
668  *     .set _idx, 0
669  *     INDEX_SR rsr.ccompare a2
670  *
671  * this yields: rsr.ccompare0 a2
672  */
673 	.macro	INDEX_SR  instr ar
674 .ifeq (_idx)
675 	&instr&0	\ar
676 .endif
677 .ifeq (_idx-1)
678 	&instr&1	\ar
679 .endif
680 .ifeq (_idx-2)
681 	&instr&2	\ar
682 .endif
683 .ifeq (_idx-3)
684 	&instr&3	\ar
685 .endif
686 .ifeq (_idx-4)
687 	&instr&4	\ar
688 .endif
689 .ifeq (_idx-5)
690 	&instr&5	\ar
691 .endif
692 .ifeq (_idx-6)
693 	&instr&6	\ar
694 .endif
695 .ifeq (_idx-7)
696 	&instr&7	\ar
697 .endif
698 	.endm
699 
700 
701 /*----------------------------------------------------------------------
702  *  abs
703  *
704  *  implements abs on machines that do not have it configured
705  */
706 
707 #if !XCHAL_HAVE_ABS
708 	.macro abs arr, ars
709 	.ifc \arr, \ars
710 	//src equal dest is less efficient
711 	bgez \arr, 1f
712 	neg \arr, \arr
713 1:
714 	.else
715 	neg \arr, \ars
716 	movgez \arr, \ars, \ars
717 	.endif
718 	.endm
719 #endif /* !XCHAL_HAVE_ABS */
720 
721 
722 /*----------------------------------------------------------------------
723  *  addx2
724  *
725  *  implements addx2 on machines that do not have it configured
726  *
727  */
728 
729 #if !XCHAL_HAVE_ADDX
730 	.macro addx2 arr, ars, art
731 	.ifc \arr, \art
732 	.ifc \arr, \ars
733 	// addx2 a, a, a     (not common)
734 	.err
735 	.else
736 	add \arr, \ars, \art
737 	add \arr, \ars, \art
738 	.endif
739 	.else
740 	//addx2 a, b, c
741 	//addx2 a, a, b
742 	//addx2 a, b, b
743 	slli \arr, \ars, 1
744 	add  \arr, \arr, \art
745 	.endif
746 	.endm
747 #endif /* !XCHAL_HAVE_ADDX */
748 
749 /*----------------------------------------------------------------------
750  *  addx4
751  *
752  *  implements addx4 on machines that do not have it configured
753  *
754  */
755 
756 #if !XCHAL_HAVE_ADDX
757 	.macro addx4 arr, ars, art
758 	.ifc \arr, \art
759 	.ifc \arr, \ars
760 	// addx4 a, a, a     (not common)
761 	 .err
762 	 .else
763 	//# addx4 a, b, a
764 	add \arr, \ars, \art
765 	add \arr, \ars, \art
766 	add \arr, \ars, \art
767 	add \arr, \ars, \art
768 	.endif
769 	.else
770 	//addx4 a, b, c
771 	//addx4 a, a, b
772 	//addx4 a, b, b
773 	slli \arr, \ars, 2
774 	add  \arr, \arr, \art
775 	.endif
776 	.endm
777 #endif /* !XCHAL_HAVE_ADDX */
778 
779 /*----------------------------------------------------------------------
780  *  addx8
781  *
782  *  implements addx8 on machines that do not have it configured
783  *
784  */
785 
786 #if !XCHAL_HAVE_ADDX
787 	.macro addx8 arr, ars, art
788 	.ifc \arr, \art
789 	.ifc \arr, \ars
790 	//addx8 a, a, a     (not common)
791 	.err
792 	.else
793 	//addx8 a, b, a
794 	add \arr, \ars, \art
795 	add \arr, \ars, \art
796 	add \arr, \ars, \art
797 	add \arr, \ars, \art
798 	add \arr, \ars, \art
799 	add \arr, \ars, \art
800 	add \arr, \ars, \art
801 	add \arr, \ars, \art
802 	.endif
803 	.else
804 	//addx8 a, b, c
805 	//addx8 a, a, b
806 	//addx8 a, b, b
807 	slli \arr, \ars, 3
808 	add  \arr, \arr, \art
809 	.endif
810 	.endm
811 #endif /* !XCHAL_HAVE_ADDX */
812 
813 
814 /*----------------------------------------------------------------------
815  *  rfe_rfue
816  *
817  *  Maps to RFUE on XEA1, and RFE on XEA2.  No mapping on XEAX.
818  */
819 
820 #if XCHAL_HAVE_XEA1
821 	.macro	rfe_rfue
822 	rfue
823 	.endm
824 #elif XCHAL_HAVE_XEA2
825 	.macro	rfe_rfue
826 	rfe
827 	.endm
828 #endif
829 
830 
831 /*----------------------------------------------------------------------
832  *  abi_entry
833  *
834  *  Generate proper function entry sequence for the current ABI
835  *  (windowed or call0).  Takes care of allocating stack space (up to 1kB)
836  *  and saving the return PC, if necessary.  The corresponding abi_return
837  *  macro does the corresponding stack deallocation and restoring return PC.
838  *
839  *  Parameters are:
840  *
841  *	locsize		Number of bytes to allocate on the stack
842  *			for local variables (and for args to pass to
843  *			callees, if any calls are made).  Defaults to zero.
844  *			The macro rounds this up to a multiple of 16.
845  *			NOTE:  large values are allowed (e.g. up to 1 GB).
846  *
847  *	callsize	Maximum call size made by this function.
848  *			Leave zero (default) for leaf functions, i.e. if
849  *			this function makes no calls to other functions.
850  *			Otherwise must be set to 4, 8, or 12 according
851  *			to whether the "largest" call made is a call[x]4,
852  *			call[x]8, or call[x]12 (for call0 ABI, it makes
853  *			no difference whether this is set to 4, 8 or 12,
854  *			but it must be set to one of these values).
855  *
856  *  NOTE:  It is up to the caller to align the entry point, declare the
857  *  function symbol, make it global, etc.
858  *
859  *  NOTE:  This macro relies on assembler relaxation for large values
860  *  of locsize.  It might not work with the no-transform directive.
861  *  NOTE:  For the call0 ABI, this macro ensures SP is allocated or
862  *  de-allocated cleanly, i.e. without temporarily allocating too much
863  *  (or allocating negatively!) due to addi relaxation.
864  *
865  *  NOTE:  Generating the proper sequence and register allocation for
866  *  making calls in an ABI independent manner is a separate topic not
867  *  covered by this macro.
868  *
869  *  NOTE:  To access arguments, you can't use a fixed offset from SP.
870  *  The offset depends on the ABI, whether the function is leaf, etc.
871  *  The simplest method is probably to use the .locsz symbol, which
872  *  is set by this macro to the actual number of bytes allocated on
873  *  the stack, in other words, to the offset from SP to the arguments.
874  *  E.g. for a function whose arguments are all 32-bit integers, you
875  *  can get the 7th and 8th arguments (1st and 2nd args stored on stack)
876  *  using:
877  *	l32i	a2, sp, .locsz
878  *	l32i	a3, sp, .locsz+4
879  *  (this example works as long as locsize is under L32I's offset limit
880  *   of 1020 minus up to 48 bytes of ABI-specific stack usage;
881  *   otherwise you might first need to do "addi a?, sp, .locsz"
882  *   or similar sequence).
883  *
884  *  NOTE:  For call0 ABI, this macro (and abi_return) may clobber a9
885  *  (a caller-saved register).
886  *
887  *  Examples:
888  *		abi_entry
889  *		abi_entry  5
890  *		abi_entry  22, 8
891  *		abi_entry  0, 4
892  */
893 
894 	/*
895 	 *  Compute .locsz and .callsz without emitting any instructions.
896 	 *  Used by both abi_entry and abi_return.
897 	 *  Assumes locsize >= 0.
898 	 */
899 	.macro	abi_entry_size locsize=0, callsize=0
900 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
901 	.ifeq	\callsize
902 	 .set	.callsz, 16
903 	.else
904 	 .ifeq	\callsize-4
905 	  .set	.callsz, 16
906 	 .else
907 	  .ifeq	\callsize-8
908 	   .set	.callsz, 32
909 	  .else
910 	   .ifeq \callsize-12
911 	    .set .callsz, 48
912 	   .else
913 	    .error	"abi_entry: invalid call size \callsize"
914 	   .endif
915 	  .endif
916 	 .endif
917 	.endif
918 	.set	.locsz, .callsz + ((\locsize + 15) & -16)
919 #else
920 	.set	.callsz, \callsize
921 	.if	.callsz		/* if calls, need space for return PC */
922 	 .set	.locsz, (\locsize + 4 + 15) & -16
923 	.else
924 	 .set	.locsz, (\locsize + 15) & -16
925 	.endif
926 #endif
927 	.endm
928 
929 	.macro abi_entry locsize=0, callsize=0
930 	.iflt	\locsize
931 	 .error	"abi_entry: invalid negative size of locals (\locsize)"
932 	.endif
933 	abi_entry_size	\locsize, \callsize
934 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
935 # define ABI_ENTRY_MINSIZE	3	/* size of abi_entry (no arguments) instructions in bytes */
936 	.ifgt	.locsz - 32760	/* .locsz > 32760 (ENTRY's max range)? */
937 	/*  Funky computation to try to have assembler use addmi efficiently if possible:  */
938 	entry	sp, 0x7F00 + (.locsz & 0xF0)
939 	addi	a12, sp, - ((.locsz & -0x100) - 0x7F00)
940 	movsp	sp, a12
941 	.else
942 	entry	sp, .locsz
943 	.endif
944 #else
945 # define ABI_ENTRY_MINSIZE	0	/* size of abi_entry (no arguments) instructions in bytes */
946 	.if	.locsz
947 	 .ifle	.locsz - 128	/* if locsz <= 128 */
948 	addi	sp, sp, -.locsz
949 	  .if	.callsz
950 	s32i	a0, sp, .locsz - 4
951 	  .endif
952 	 .elseif  .callsz	/* locsz > 128, with calls: */
953 	movi	a9, .locsz - 16		/* note: a9 is caller-saved */
954 	addi	sp, sp, -16
955 	s32i	a0, sp, 12
956 	sub	sp, sp, a9
957 	 .else			/* locsz > 128, no calls: */
958 	movi	a9, .locsz
959 	sub	sp, sp, a9
960 	 .endif			/* end */
961 	.endif
962 #endif
963 	.endm
964 
965 
966 
967 /*----------------------------------------------------------------------
968  *  abi_return
969  *
970  *  Generate proper function exit sequence for the current ABI
971  *  (windowed or call0).  Takes care of freeing stack space and
972  *  restoring the return PC, if necessary.
973  *  NOTE:  This macro MUST be invoked following a corresponding
974  *  abi_entry macro invocation.  For call0 ABI in particular,
975  *  all stack and PC restoration are done according to the last
976  *  abi_entry macro invoked before this macro in the assembly file.
977  *
978  *  Normally this macro takes no arguments.  However to allow
979  *  for placing abi_return *before* abi_entry (as must be done
980  *  for some highly optimized assembly), it optionally takes
981  *  exactly the same arguments as abi_entry.
982  */
983 
984 	.macro abi_return	locsize=-1, callsize=0
985 	.ifge	\locsize
986 	abi_entry_size	\locsize, \callsize
987 	.endif
988 #if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
989 	retw
990 #else
991 	.if	.locsz
992 	 .iflt	.locsz - 128	/* if locsz < 128 */
993 	  .if	.callsz
994 	l32i	a0, sp, .locsz - 4
995 	  .endif
996 	addi	sp, sp, .locsz
997 	 .elseif  .callsz	/* locsz >= 128, with calls: */
998 	addi	a9, sp, .locsz - 16
999 	l32i	a0, a9, 12
1000 	addi	sp, a9, 16
1001 	 .else			/* locsz >= 128, no calls: */
1002 	movi	a9, .locsz
1003 	add	sp, sp, a9
1004 	 .endif			/* end */
1005 	.endif
1006 	ret
1007 #endif
1008 	.endm
1009 
1010 
1011 /*
1012  * HW erratum fixes.
1013  */
1014 
1015 	.macro hw_erratum_487_fix
1016 #if defined XSHAL_ERRATUM_487_FIX
1017 	isync
1018 #endif
1019 	.endm
1020 
1021 /*
1022  * These macros are internal, subject to change, and should not be used in
1023  * any new code.
1024  */
1025 
1026 #define _GBL(x)    .global x
1027 #define _TYP(x)    .type x,@function
1028 #define _ALN(x)    .align x
1029 #define _SIZ(x)    .size x, . - x
1030 #define _MKEND(x)  .purgem endfunc ; .macro endfunc ; _SIZ(x) ; .purgem endfunc ; .macro endfunc ; .endm ; .endm
1031 #define _SYMT(x)   _GBL(x); _MKEND(x); _TYP(x); _ALN(4); x:
1032 #define _SYM2(x)   _GBL(x); _TYP(x); x:
1033 #define _SYM(x)   _GBL(x); _MKEND(x); _ALN(4); x:
1034 .macro endfunc ; .endm
1035 
1036 /*
1037  * the DECLFUNC() macro provides a mechanism for implementing both the
1038  * standard and _nw interface with a single copy of the code.
1039  *
1040  * For Call0 ABI there is one function definition which is labeled with
1041  * both the xthal_..._nw and xthal_... symbols.
1042  *
1043  * For windowed ABI, two compilations are involved (one with the __NW_FUNCTION__
1044  * symbol defined) resulting in two separate functions (the _nw one without
1045  * the window adjustments).
1046 */
1047 
1048 #if defined(__NW_FUNCTION__)
1049 # define DECLFUNC(x) _SYMT(x ## _nw)
1050 #else
1051 # if defined (__XTENSA_CALL0_ABI__)
1052 #  define DECLFUNC(x)  _SYMT(x); _SYM2(x ## _nw)
1053 # else
1054 #  define DECLFUNC(x)  _SYMT(x)
1055 # endif
1056 #endif
1057 
1058 #endif /*XTENSA_COREASM_H*/
1059 
1060