1// core-save.S  --  core state save/restore routines (used by PSO)
2// $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/xtos/core-save.S#1 $
3
4// Copyright (c) 2012-2013 Tensilica Inc.
5//
6// Permission is hereby granted, free of charge, to any person obtaining
7// a copy of this software and associated documentation files (the
8// "Software"), to deal in the Software without restriction, including
9// without limitation the rights to use, copy, modify, merge, publish,
10// distribute, sublicense, and/or sell copies of the Software, and to
11// permit persons to whom the Software is furnished to do so, subject to
12// the following conditions:
13//
14// The above copyright notice and this permission notice shall be included
15// in all copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25
26#include <xtensa/coreasm.h>
27#include <xtensa/corebits.h>
28#include <xtensa/cacheasm.h>
29#include <xtensa/cacheattrasm.h>
30#include <xtensa/xdm-regs.h>
31#include <xtensa/xtruntime-core-state.h>
32#include <xtensa/mpuasm.h>
33#include "xtos-internal.h"
34
35.weak	_idma_pso_save
36//.type	xtos_C_core_save, @function
37
38
39	.text
40
41
42	//  (Place this alternate entry symbol *outside* the _xtos_core_save()
43	//   function, to avoid confusing debugging / profiling / etc.)
44	.align	4
45	.global	_xtos_core_save_entry
46	.type _xtos_core_save_entry,@function
47_xtos_core_save_entry:
48	j	.Lcore_save
49	.size _xtos_core_save_entry, . - _xtos_core_save_entry
50
51
52	//  int  _xtos_core_save(unsigned flags, XtosCoreState *savearea, void *code)
53	//
54	//  Generic processor state save routine.
55	//
56	//  On entry (after ENTRY if windowed):
57	//	a0 = return PC
58	//	a2 = flags argument
59	//	a3 = ptr to save area
60	//	a4 = ptr to code to jump to after save (just return if 0)
61	//  Returns:
62	//	0 when first returning from this call (if a4 == 0)
63	//	non-zero (passed from restore call) when returning from restore
64	//	(if a4 != 0, return behavior if any depends on code at a4)
65	//
66	.align	4
67	.global	_xtos_core_save
68	.type _xtos_core_save,@function
69_xtos_core_save:
70	abi_entry
71
72.Lcore_save:
73	s32i	a0, a3, CS_SA_areg + 0*4	// save a0 (clobbered below)
74	s32i	a1, a3, CS_SA_areg + 1*4	// save a1
75	s32i	a2, a3, CS_SA_areg + 2*4	// save a2 (flags arg, for debugging only)
76	s32i	a4, a3, CS_SA_areg + 4*4	// save a4 (code to jump to after saving)
77#ifdef __XTENSA_CALL0_ABI__
78	//  Callee-saved regs:
79	s32i	a12, a3, CS_SA_areg + 12*4	// save a12
80	s32i	a13, a3, CS_SA_areg + 13*4	// save a13
81	s32i	a14, a3, CS_SA_areg + 14*4	// save a14
82	s32i	a15, a3, CS_SA_areg + 15*4	// save a15
83#else
84	call4	xthal_window_spill		// spill live caller windows to stack
85#endif
86	j	.Ls1
87
88	.align	16
89.Ls1:
90#if XCHAL_HAVE_INTERRUPTS
91	rsil	a4, 15				// disable interrupts before clobbering a0
92#elif XCHAL_HAVE_EXCEPTIONS
93	rsr.ps	a4
94#endif
95
96#if XCHAL_HAVE_EXCEPTIONS
97	s32i	a4, a3, CS_SA_ps		// save PS
98#endif
99
100#if XCHAL_HAVE_IDMA
101	movi	a4, _idma_pso_save
102	beqz	a4, .LnoIDMA
103#  ifdef __XTENSA_CALL0_ABI__
104	mov a13, a3
105	mov a12, a2
106	addi	a3, a3, CS_SA_idmaregs	    // location for IDMA regs save
107	call0	_idma_pso_save
108	mov a3, a13
109	mov a2, a12
110#  else
111	mov    a6, a2
112	addi   a7, a3, CS_SA_idmaregs	    // location for IDMA regs save
113	call4  _idma_pso_save
114#  endif
115.LnoIDMA:
116#endif
117
118// not yet implemented
119//#  ifdef __XTENSA_CALL0_ABI__
120//	mov a13, a3
121//	mov a12, a2
122//	call0	xtos_C_core_save
123//	mov a3, a13
124//	mov a2, a12
125//#  else
126//	mov    a6, a2
127//	mov    a7, a3
128//	call4  xtos_C_core_save
129//#  endif
130//#endif
131
132#if XCHAL_HAVE_CCOUNT
133	rsr.ccount	a5			// save CCOUNT restore value
134#endif
135#if XCHAL_HAVE_INTERRUPTS
136	rsr.interrupt	a6			// save pending interrupts
137	s32i	a6, a3, CS_SA_interrupt
138#endif
139#if XCHAL_HAVE_CCOUNT
140	s32i	a5, a3, CS_SA_ccount
141#endif
142
143	call0	_xtos_core_save_common		// save and shutoff -- returns after wakeup
144
145	//  a2 now contains return value.
146	//  a3 still points to save area.
147	//  Interrupts still disabled.
148
149	//  Restore WINDOWSTART to single window.  Leave WINDOWBASE wherever it is.
150	//rsr.windowbase	a6
151	//movi	a5, 1
152	//ssl	a6
153	//sll	a5, a5
154	//wsr.windowstart	a5
155	//rsync
156
157	l32i	a0, a3, CS_SA_areg + 0*4	// restore a0
158	l32i	a1, a3, CS_SA_areg + 1*4	// restore a1
159#ifdef __XTENSA_CALL0_ABI__
160	//  Callee-saved regs:
161	l32i	a12, a3, CS_SA_areg + 12*4	// restore a12
162	l32i	a13, a3, CS_SA_areg + 13*4	// restore a13
163	l32i	a14, a3, CS_SA_areg + 14*4	// restore a14
164	l32i	a15, a3, CS_SA_areg + 15*4	// restore a15
165#endif
166
167#if XCHAL_HAVE_EXCEPTIONS
168	//  Now that we've restored windowed state (a0,a1), we can restore interrupts.
169	l32i	a4, a3, CS_SA_ps		// restore ps
170	wsr.ps	a4
171	rsync
172#endif
173
174	abi_return
175
176
177
178	//  Generic processor state save routine, callable from assembly-level
179	//  (Does not assume valid stack, saves all ARs, no window-spill etc.)
180	//
181	//  On entry:
182	//	a0 = return PC
183	//	a2 = flags argument
184	//	a3 = ptr to save area
185	//	a4 = ptr to code to jump to after save (just return if 0)
186	//  All other registers are saved.
187	//  Returns:
188	//	0 when first returning from this call (if a4 == 0)
189	//	non-zero (passed from restore call) when returning from restore
190	//	(if a4 != 0, return behavior if any depends on code at a4)
191	//
192	.align	4
193	.global	_xtos_core_save_nw
194_xtos_core_save_nw:
195	s32i	a0, a3, CS_SA_areg + 0*4	// save a0 (clobbered below)
196	s32i	a1, a3, CS_SA_areg + 1*4	// save a1
197	s32i	a2, a3, CS_SA_areg + 2*4	// save a2 (flags arg, for debugging only)
198	s32i	a4, a3, CS_SA_areg + 4*4	// save a4 (code to jump to after saving)
199	s32i	a5, a3, CS_SA_areg + 5*4	// save a5
200	s32i	a6, a3, CS_SA_areg + 6*4	// save a6
201	s32i	a7, a3, CS_SA_areg + 7*4	// save a7
202	j	.Ls2
203
204	.align 16
205.Ls2:
206#if XCHAL_HAVE_INTERRUPTS
207	rsil	a4, 15				// disable interrupts before rotating etc
208#elif XCHAL_HAVE_EXCEPTIONS
209	rsr.ps	a4
210#endif
211
212#if XCHAL_HAVE_EXCEPTIONS
213	s32i	a4, a3, CS_SA_ps		// save PS
214#endif
215
216#if XCHAL_HAVE_CCOUNT
217	rsr.ccount	a5			// save CCOUNT restore value
218#endif
219#if XCHAL_HAVE_INTERRUPTS
220	rsr.interrupt	a6			// save pending interrupts
221        s32i    a6, a3, CS_SA_interrupt
222#endif
223#if XCHAL_HAVE_CCOUNT
224        s32i    a5, a3, CS_SA_ccount
225#endif
226
227#if XCHAL_HAVE_WINDOWED
228	movi	a5, XCHAL_NUM_AREGS / 8 - 1	// number of 8-reg chunks to save (a0-a7 already done)
229#endif
2301:	s32i	a8, a3, CS_SA_areg + 8*4	// save a8
231	s32i	a9, a3, CS_SA_areg + 9*4	// save a9
232	s32i	a10,a3, CS_SA_areg + 10*4	// save a10
233	s32i	a11,a3, CS_SA_areg + 11*4	// save a11
234	s32i	a12,a3, CS_SA_areg + 12*4	// save a12
235	s32i	a13,a3, CS_SA_areg + 13*4	// save a13
236	s32i	a14,a3, CS_SA_areg + 14*4	// save a14
237	s32i	a15,a3, CS_SA_areg + 15*4	// save a15
238#if XCHAL_HAVE_WINDOWED
239	addi	a11, a3, 8*4			// next frame (a11 will become a3, a13 become a5)
240	addi	a13, a5, -1
241	rotw	2
242	bnez	a5, 1b				// loop until all frames done
243	rotw	2				// back to starting windowbase
244#endif
245
246	movi	a1, 0				// not to save any regs from stack
247	call0	_xtos_core_save_common
248
249	//  a2 now contains return value.
250	//  a3 still points to save area.
251	//  Interrupts still disabled.
252
253#if XCHAL_HAVE_WINDOWED
254	rotw	-2
255	movi	a5, XCHAL_NUM_AREGS / 8 - 1	// 8-reg chunks to restore (a0-a7 already done)
256	addi	a3, a11, XCHAL_NUM_AREGS * 4
2571:	rotw	-2
258	addi	a3, a11, -8*4
259	addi	a5, a13, -1
260#endif
261	l32i	a8, a3, CS_SA_areg + 8*4	// restore a8
262	l32i	a9, a3, CS_SA_areg + 9*4	// restore a9
263	l32i	a10,a3, CS_SA_areg + 10*4	// restore a10
264	l32i	a11,a3, CS_SA_areg + 11*4	// restore a11
265	l32i	a12,a3, CS_SA_areg + 12*4	// restore a12
266	l32i	a13,a3, CS_SA_areg + 13*4	// restore a13
267	l32i	a14,a3, CS_SA_areg + 14*4	// restore a14
268	l32i	a15,a3, CS_SA_areg + 15*4	// restore a15
269#if XCHAL_HAVE_WINDOWED
270	bnez	a5, 1b				// loop until all frames done
271	//  We're now back to starting windowbase, and original a3.
272#endif
273
274	l32i	a0, a3, CS_SA_areg + 0*4	// restore a0
275	l32i	a1, a3, CS_SA_areg + 1*4	// restore a1
276	//  Don't clobber return value, so don't restore a2.
277	l32i	a4, a3, CS_SA_areg + 4*4	// restore a4
278	l32i	a5, a3, CS_SA_areg + 5*4	// restore a5
279	l32i	a6, a3, CS_SA_areg + 6*4	// restore a6
280#if XCHAL_HAVE_EXCEPTIONS
281	//  Now that we've restored windowed state (a0,a1,done rotating), we can restore interrupts.
282	l32i	a7, a3, CS_SA_ps		// restore ps
283	wsr.ps	a7
284	rsync
285#endif
286	l32i	a7, a3, CS_SA_areg + 7*4	// restore a7
287	ret
288
289
290
291
292	//  Common state save / shut-off code.
293	//
294	//	a0 = return PC within caller shut-off routine
295	//	a1 = stack if != 0
296	//	a2 = flags argument
297	//	a3 = pointer to _xtos_pso_savearea
298	//	a4 = PS to save/restore
299	//	PS.INTLEVEL = 15  (interrupts disabled, except NMI)
300	//	a5-a15 (and other ARs) are available.
301	//	NOTE: CCOUNT and INTERRUPT have already been saved in save area.
302	//
303	.align	4
304	//.global	_xtos_core_save_common
305_xtos_core_save_common:
306//#if XCHAL_HAVE_EXCEPTIONS
307//	s32i	a4, a3, CS_SA_ps		// save PS
308//#endif
309
310#if XCHAL_HAVE_CACHE_BLOCKOPS
311	pfend.o				// terminate non-essential block-prefetch ops
312#endif
313
314#if XCHAL_HAVE_WINDOWED
315	// The following discussion is valid if we have a stack:
316	// At this point, all non-live register windows have been spilled to the
317	// stack. However, we cannot leave any spilled registers in our stack frame
318	// or our caller's stack frame, since these frames could change after we
319	// return and before restore() is called. So all spilled registers in the
320	// current and previous stack frames must be saved to the save area. This
321	// means a max of 16 registers: 4 base save registers for our caller, upto
322	// 8 extra save registers for our caller, and 4 base save registers for the
323	// next function up from our caller. The stack looks like this:
324	//
325	//	------------------------------- <---- stack ptr of function (i - 2)
326	//	  Base save area i - 3
327	//	-------------------------------
328	//	  Extra save area i - 1
329	//	  (0-8 registers depending on call type)
330	//	-------------------------------
331	//	  Locals i - 1
332	//	------------------------------- <---- stack ptr of function (i - 1)
333	//	  Base save area i - 2                (our caller)
334	//
335	//	------------------------------- <---- Our stack ptr (a1)
336	//	  Base save area i - 1
337	//	-------------------------------
338	//
339	// We don't have any extra save area or locals in our frame. See the
340	// Xtensa Programmer's Guide for more details of the stack layout.
341	//
342	// NOTE that we are not counting the call0 to _xtos_core_save_common() since
343	// that does not result in any register window rotation nor stack ptr change.
344
345	s32i	a1, a3, CS_SA_caller_regs_saved	// save flag
346	beqz	a1, .Lendcr			// skip if no stack
347
348	// Save our caller's a0-a3 from the base save area (a1-16)
349
350	addi	a4, a1, -16
351	l32i	a5, a4, 0
352	l32i	a6, a4, 4
353	s32i	a5, a3, CS_SA_caller_regs	// caller a0
354	s32i	a6, a3, CS_SA_caller_regs + 4	// caller a1
355	l32i	a5, a4, 8
356	l32i	a6, a4, 12
357	s32i	a5, a3, CS_SA_caller_regs + 8	// caller a2
358	s32i	a6, a3, CS_SA_caller_regs + 12	// caller a3
359
360	// Save our callers caller's a0-a3 from its base save area (a1+0)
361
362	l32i	a5, a1, 0
363	l32i	a6, a1, 4
364	s32i	a5, a3, CS_SA_caller_regs + 16  // caller caller a0
365	s32i	a6, a3, CS_SA_caller_regs + 20  // caller caller a1
366	l32i	a5, a1, 8
367	l32i	a6, a1, 12
368	s32i	a5, a3, CS_SA_caller_regs + 24  // caller caller a2
369	s32i	a6, a3, CS_SA_caller_regs + 28  // caller caller a3
370
371	// Now save 0-8 registers for our caller from its ext save area
372	// NOTE we can't use a0 directly because we are one level down
373
374	l32i	a4, a3, CS_SA_areg		// pull in the return address
375	extui	a4, a4, 30, 2			// Top 2 bits of ret addr
376	blti	a4, 2, .Lendcr			// No regs to save
377	l32i	a5, a1, 4			// a5 <- caller caller a1
378	slli	a4, a4, 4
379	sub	a4, a5, a4			// a4 <- bottom of extra save area
380	addi	a5, a5, -16			// a5 <- top of extra save area
381	addi	a6, a3, CS_SA_caller_regs + 32	// location to start saving to
382.Lcrloop:
383	l32i	a7, a4, 0			// Save in groups of 4 registers
384	l32i	a8, a4, 4
385	s32i	a7, a6, 0
386	s32i	a8, a6, 4
387	l32i	a7, a4, 8
388	l32i	a8, a4, 12
389	s32i	a7, a6, 8
390	s32i	a8, a6, 12
391	addi	a4, a4, 16
392	addi	a6, a6, 16
393	blt	a4, a5, .Lcrloop
394.Lendcr:
395#endif
396
397	// We want to save the CCOUNT value as soon as feasible after disabling
398	// interrupts, so that the counter does not run past any CCOMPARE value
399	// and miss a timer interrupt. The callers of this function have saved
400	// the values of CCOUNT and INTERRUPT immediately after disabling interrupts.
401
402#if XCHAL_HAVE_CCOUNT
403	.set	_idx, 0
404	.rept	XCHAL_NUM_TIMERS
405	INDEX_SR rsr.ccompare a5
406	s32i	a5, a3, CS_SA_ccompare + 4*_idx
407	.set	_idx, _idx+1
408        .endr
409#endif
410
411	s32i	a0, a3, CS_SA_restore_label	// where to return to, to return from function
412#if XCHAL_HAVE_INTERRUPTS || XCHAL_HAVE_EXCEPTIONS
413	rsr.epc1	a5
414	s32i	a5, a3, CS_SA_epc1
415	rsr.excsave1	a5
416	s32i	a5, a3, CS_SA_excsave1
417# ifdef XCHAL_DOUBLEEXC_VECTOR_VADDR
418	rsr.depc	a5
419	s32i	a5, a3, CS_SA_depc
420# endif
421#endif
422#if XCHAL_HAVE_WINDOWED
423	rsr.windowbase	a5
424	s32i	a5, a3, CS_SA_windowbase	// save windowbase
425	rsr.windowstart	a5
426	s32i	a5, a3, CS_SA_windowstart	// save windowstart
427#endif
428	rsr.sar	a5
429	s32i	a5, a3, CS_SA_sar		// save sar
430
431#if XCHAL_HAVE_PSO_CDM
432	//  Save PWRCTL, and update according to flags argument.
433	movi	a4, XDM_MISC_PWRCTL
434	movi	a6, PWRCTL_MEM_WAKEUP
435	rer	a7, a4				// get pwrctl
436	s32i	a7, a3, CS_SA_pwrctl		// save pwrctl
437	//  Avoid setting power-control bits if not already set, i.e. clear them only.
438	bbci.l	a2, XTOS_COREF_PSO_SHIFT, 1f	// if not shutting off, don't touch power bits
439
440	//  Set PWRCTL MEM_WAKEUP bit according to flags (whether to let mem power off).
441	or	a5, a7, a6	// set...
442	xor	a5, a5, a6	// ... and clear MEM_WAKEUP bit to write
443	and	a6, a2, a6	// isolate MEM_WAKEUP bit from flags
444	or	a5, a5, a6	// set MEM_WAKEUP bit to write from flags
445	//  Clear PWRCTL DEBUG_WAKEUP bit if cleared in flags (if letting debug power off).
446	movi	a6, ~PWRCTL_DEBUG_WAKEUP
447	or	a6, a2, a6	// isolate DEBUG_WAKEUP bit from flags
448	and	a6, a5, a6	// clear it if was clear in flags
449	//  Update PWRCTL
450	wer	a6, a4		// write new pwrctl
451	//extw			// let the new pwrctl value settle
4521:
453#endif
454
455	.set	_idx, 2
456	.rept	XCHAL_NUM_INTLEVELS+XCHAL_HAVE_NMI-1
457	INDEX_SR rsr.epc a5
458	s32i	a5, a3, CS_SA_epc + 4*(_idx-2)
459	INDEX_SR rsr.eps a5
460	s32i	a5, a3, CS_SA_eps + 4*(_idx-2)
461	INDEX_SR rsr.excsave a5
462	s32i	a5, a3, CS_SA_excsave + 4*(_idx-2)
463	.set	_idx, _idx+1
464	.endr
465
466#if XCHAL_HAVE_LOOPS
467	rsr.lbeg	a5
468	s32i	a5, a3, CS_SA_lbeg
469	rsr.lend	a5
470	s32i	a5, a3, CS_SA_lend
471	rsr.lcount	a5
472	s32i	a5, a3, CS_SA_lcount
473#endif
474#if XCHAL_HAVE_ABSOLUTE_LITERALS
475	rsr.litbase	a5
476	s32i	a5, a3, CS_SA_litbase
477#endif
478#if XCHAL_HAVE_VECBASE
479	rsr.vecbase	a5
480	s32i	a5, a3, CS_SA_vecbase
481#endif
482#if XCHAL_HAVE_S32C1I && (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RC_2009_0)	/* have ATOMCTL ? */
483	rsr.atomctl	a5
484	s32i	a5, a3, CS_SA_atomctl
485#endif
486#if XCHAL_HAVE_PREFETCH
487	movi	a5, 0			// disable prefetch during shutoff
488	xsr.prefctl	a5
489	s32i	a5, a3, CS_SA_prefctl
490#endif
491#if XCHAL_USE_MEMCTL
492	rsr.memctl	a5
493	s32i	a5, a3, CS_SA_memctl
494#endif
495#if XCHAL_HAVE_INTERRUPTS
496	rsr.intenable	a5
497	s32i	a5, a3, CS_SA_intenable
498#endif
499#if XCHAL_HAVE_DEBUG
500	//  NOTE:  restore of debug state is conditional,
501	//  as the power-down and wakeup code might be actively debugged.
502	rsr.icount	a5
503	s32i	a5, a3, CS_SA_icount
504	rsr.icountlevel	a5
505	s32i	a5, a3, CS_SA_icountlevel
506	rsr.debugcause	a5
507	s32i	a5, a3, CS_SA_debugcause	// (won't get restored?)
508	//rsr.ddr	a5
509	//s32i	a5, a3, CS_SA_ddr
510# if XCHAL_NUM_IBREAK
511	rsr.ibreakenable	a5
512	s32i	a5, a3, CS_SA_ibreakenable
513# endif
514	.set	_idx, 0
515	.rept	XCHAL_NUM_IBREAK
516	INDEX_SR rsr.ibreaka a5
517	s32i	a5, a3, CS_SA_ibreaka + 4*_idx
518	.set	_idx, _idx+1
519	.endr
520	.set	_idx, 0
521	.rept	XCHAL_NUM_DBREAK
522	INDEX_SR rsr.dbreakc a5
523	s32i	a5, a3, CS_SA_dbreakc + 4*_idx
524	INDEX_SR rsr.dbreaka a5
525	s32i	a5, a3, CS_SA_dbreaka + 4*_idx
526	.set	_idx, _idx+1
527	.endr
528#endif
529
530	.set	_idx, 0
531	.rept	XCHAL_NUM_MISC_REGS
532	INDEX_SR rsr.misc a5
533	s32i	a5, a3, CS_SA_misc + 4*_idx
534	.set	_idx, _idx+1
535	.endr
536
537#if XCHAL_HAVE_MEM_ECC_PARITY
538	rsr.mepc	a5
539	s32i	a5, a3, CS_SA_mepc
540	rsr.meps	a5
541	s32i	a5, a3, CS_SA_meps
542	rsr.mesave	a5
543	s32i	a5, a3, CS_SA_mesave
544	rsr.mesr	a5
545	s32i	a5, a3, CS_SA_mesr
546	rsr.mecr	a5
547	s32i	a5, a3, CS_SA_mecr
548	rsr.mevaddr	a5
549	s32i	a5, a3, CS_SA_mevaddr
550#endif
551
552	/*  TIE state  */
553	addi	a4, a3, CS_SA_ncp
554	xchal_ncp_store	a4, a5,a6,a7,a8		// save non-coprocessor state
555#if XCHAL_HAVE_CP
556	rsr.cpenable	a5
557	s32i	a5, a3, CS_SA_cpenable
558	movi	a6, -1
559	wsr.cpenable	a6			// enable all coprocessors
560	rsync
561	xchal_cp0_store  a4, a5,a6,a7,a8  continue=1
562	xchal_cp1_store  a4, a5,a6,a7,a8  continue=1
563	xchal_cp2_store  a4, a5,a6,a7,a8  continue=1
564	xchal_cp3_store  a4, a5,a6,a7,a8  continue=1
565	xchal_cp4_store  a4, a5,a6,a7,a8  continue=1
566	xchal_cp5_store  a4, a5,a6,a7,a8  continue=1
567	xchal_cp6_store  a4, a5,a6,a7,a8  continue=1
568	xchal_cp7_store  a4, a5,a6,a7,a8  continue=1
569	//xchal_cp8_store  a4, a5,a6,a7,a8  continue=1
570	//xchal_cp9_store  a4, a5,a6,a7,a8  continue=1
571	//xchal_cp10_store a4, a5,a6,a7,a8  continue=1
572	//xchal_cp11_store a4, a5,a6,a7,a8  continue=1
573	//xchal_cp12_store a4, a5,a6,a7,a8  continue=1
574	//xchal_cp13_store a4, a5,a6,a7,a8  continue=1
575	//xchal_cp14_store a4, a5,a6,a7,a8  continue=1
576	//xchal_cp15_store a4, a5,a6,a7,a8  continue=1
577#endif
578
579	/*  TLB state (for known MMU types only, not internal custom)  */
580#if XCHAL_HAVE_MIMIC_CACHEATTR || XCHAL_HAVE_XLT_CACHEATTR
581	addi	a4, a3, CS_SA_tlbs	// where to start storing TLB entry info
582	movi	a5, 0x20000000
583	movi	a6, 0
5841:	rdtlb1	a7, a6			// read DTLB entry PPN + CA
585	s32i	a7, a4, 0
586	ritlb1	a7, a6			// read ITLB entry PPN + CA
587	s32i	a7, a4, 4
588	addi	a4, a4, 8
589	add	a6, a6, a5
590	bnez	a6, 1b
591
592#elif XCHAL_HAVE_PTP_MMU
593	//  Declare a table of TLB entries to save/restore.
594	//  Each entry is a 32-bit index to use directly with [rw][di]tlb[01].
595	//  Indices assume ITLBCFG == DTLBCFG == 0.
596	//  Bit 4 means not-for-dtlb, and bit 5 means not-for-itlb
597	//  (these bits aren't used by these instructions, so okay to use for this).
598	.section .rodata, "a"
599	.global _xtos_pso_tlbmap
600	.global _xtos_pso_tlbmap_end
601	.type _xtos_pso_tlbmap, @object
602_xtos_pso_tlbmap:
603	.long	0x0C0C0C0C, ARF_ENTRIES	// *TLB way 0, 4/8 entries of 4KB
604	.long	0x0C0C0C0C, ARF_ENTRIES	// *TLB way 1, 4/8 entries of 4KB
605	.long	0x0C0C0C0C, ARF_ENTRIES	// *TLB way 2, 4/8 entries of 4KB
606	.long	0x0C0C0C0C, ARF_ENTRIES	// *TLB way 3, 4/8 entries of 4KB
607	.long	0x1A181614, 4		// *TLB way 4, 4 entries of 1MB/4MB/16MB/64MB
608# if XCHAL_HAVE_SPANNING_WAY	/* MMU v3 */
609	.long	0x1C1B1C1B, 4		// *TLB way 5, 4 entries of 128MB/256MB
610	.long	0x1B1D1B1D, 8		// *TLB way 6, 8 entries of 512MB/128MB
611# endif
612	.long	0x0C0C0C0C, 0x8001	// DTLB way 7, 1 entry of 4KB
613	.long	0x0C0C0C0C, 0x8001	// DTLB way 8, 1 entry of 4KB
614	.long	0x0C0C0C0C, 0x8001	// DTLB way 9, 1 entry of 4KB
615_xtos_pso_tlbmap_end:
616	.size _xtos_pso_tlbmap, . - _xtos_pso_tlbmap
617
618	.text
619	addi	a4, a3, CS_SA_tlbs	// where to start storing TLB entry info
620	movi	a10, _xtos_pso_tlbmap
621	movi	a11, _xtos_pso_tlbmap_end
622	rsr.dtlbcfg	a14		// page size index (0..3) for each DTLB way
623	rsr.itlbcfg	a15		// page size index (0..3) for each ITLB way
624	s32i	a14, a3, CS_SA_dtlbcfg
625	s32i	a15, a3, CS_SA_itlbcfg
626	rsr.ptevaddr	a5
627	s32i	a5, a3, CS_SA_ptevaddr
628	rsr.rasid	a5
629	s32i	a5, a3, CS_SA_rasid
630	//  Loop from last way to first (less register pressure that way).
631.Loop_tlbmap:
632	addi	a11, a11, -8		// next way
633	l32i	a8, a11, 0		// map of four (page size log2) per index for this way
634	// DTLB page size:
635	extui	a12, a14, 0, 4		// page size index for this DTLB way
636	srli	a14, a14, 4		// (for next way)
637	ssa8l	a12			// prepare to shift right by 8*a12
638	srl	a12, a8			// page size log2 for this DTLB way
639	ssl	a12			// prepare to shift left by a12
640	movi	a12, 1			// (to compute 1 << (page size log2))
641	sll	a12, a12		// page size for this DTLB way
642
643	//  Save all entries of this DTLB way:
644	l32i	a9, a11, 4		// number of entries for this way
645	sub	a5, a11, a10		// way number * 8
646	srli	a5, a5, 3		// way number
647	extui	a9, a9, 0, 8
6481:	rdtlb0	a6, a5			// read DTLB entry VPN + ASID ...
649	rdtlb1	a7, a5			// read DTLB entry PPN + CA ...
650	add	a5, a5, a12		// next entry of this DTLB way
651	s32i	a6, a4, 0		// save entry ...
652	s32i	a7, a4, 4
653	addi	a4, a4, 8
654	addi	a9, a9, -1
655	bnez	a9, 1b
656
657	// ITLB page size:
658	extui	a12, a15, 0, 4		// page size index for this ITLB way
659	srli	a15, a15, 4		// (for next way)
660	ssa8l	a12			// prepare to shift right by 8*a12
661	srl	a12, a8			// page size log2 for this ITLB way
662	ssl	a12			// prepare to shift left by a12
663	movi	a12, 1			// (to compute 1 << (page size log2))
664	sll	a12, a12		// page size for this ITLB way
665
666	//  Save all entries of this ITLB way:
667	l32i	a9, a11, 4		// number of entries for this way
668	sub	a5, a11, a10		// way number * 8
669	srli	a5, a5, 3		// way number
670	bbsi.l	a9, 15, 2f		// skip ITLB if is a DTLB-only way
671	extui	a9, a9, 0, 8
6721:	ritlb0	a6, a5			// read ITLB entry VPN + ASID ...
673	ritlb1	a7, a5			// read ITLB entry PPN + CA ...
674	add	a5, a5, a12		// next entry of this ITLB way
675	s32i	a6, a4, 0		// save entry ...
676	s32i	a7, a4, 4
677	addi	a4, a4, 8
678	addi	a9, a9, -1
679	bnez	a9, 1b
6802:
681	bne	a11, a10, .Loop_tlbmap	// loop for next TLB way
682	//  Done saving TLBs.
683#endif
684
685#if XCHAL_HAVE_CACHE_BLOCKOPS
686	pfwait.a			// wait for any remaining block-prefetch ops
687#endif
688
689#if XCHAL_HAVE_MPU
690	addi	a4, a3, CS_SA_mpuentry	// location for MPU save
691	mpu_read_map  a4, a5, a6
692	rsr.cacheadrdis a4
693	addi    a5, a3, CS_SA_cacheadrdis
694	s32i    a4, a5, 0
695
696#if XCHAL_DCACHE_IS_WRITEBACK
697	//  Must write this piece back to memory, because if it stays
698	//  in the cache and we try to restore with caches bypassed,
699	//  the wrong values will be fetched from memory.
700	//  TODO: See if possible to replace with call to xthal_dcache_region_writeback
701	//  TODO: If going to write back full dcache below, skip this step
702	addi	a4, a3, CS_SA_mpuentry
703	movi	a5, CS_SA_ncp - CS_SA_mpuentry
704	dcache_writeback_region a4, a5, a7, a8
705#endif
706#endif
707
708	//  With data cache coherency enabled, need a full data cache
709	//  writeback and invalidate, then disable coherency, before shut-off.
710	//  Otherwise, if we'll let dcache power off, writeback its contents.
711	//
712	//  We make sure the signature only gets written after everything
713	//  else is written back (if we writeback), and only gets written
714	//  back if the rest gets written back.
715	movi	a6, CORE_STATE_SIGNATURE
716#if XCHAL_DCACHE_IS_WRITEBACK
717# if XCHAL_HAVE_PSO_CDM && XCHAL_DCACHE_IS_COHERENT && XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RE_2012_0
718	rsr.memctl	a4
719	bbci.l	a2, XTOS_COREF_PSO_SHIFT, 1f	// if not shutting off, leave snoops as is
720	bbci.l	a4, MEMCTL_SNOOP_EN_SHIFT, 1f	// snoops (coherence) enabled?
721	dcache_writeback_inv_all a4, a5, a7, 0	// yes: writeback-invalidate
722	memw					// wait for writeback to complete
723	s32i	a6, a3, CS_SA_signature
724	dhwbi	a3, CS_SA_signature
725	//  Now that dcache is empty, make sure snoops are off during shut-off.
726	addi	a4, a4, -MEMCTL_SNOOP_EN
727	wsr.memctl	a4
728	j	9f
7291:
730# endif
731	bbsi.l	a2, PWRCTL_MEM_WAKEUP_SHIFT, 7f	// letting caches power off?
732	dcache_writeback_all	a4, a5, a7, 0	// yes: writeback
733	memw					// wait for writeback to complete
734	j	8f
735
736	// The signature and the cache/TLB state must be written out to
737	// main memory even though the caches stay on, because on restart
738	// we will come up with caches bypassed and need to read the state
739	// back before the cache/TLB is set up.
7407:
741        mov     a4, a3
742        movi    a5, CS_SA_ncp
743        dcache_writeback_region a4, a5, a7, a8
744        memw
7458:
746	s32i	a6, a3, CS_SA_signature
747	dhwb	a3, CS_SA_signature		// needed even if caches stay on
748#else
749	s32i	a6, a3, CS_SA_signature
750#endif
751
7529:	l32i	a4, a3, CS_SA_areg + 4*4	// restore a4 (code to jump to after saving)
753	memw					// wait for signature to be in memory
754
755	beqz	a4, 1f				// code to jump to?
756	jx	a4				// yes, jump to it
7571:	l32i	a0, a3, CS_SA_restore_label	// no, return:  restore return PC
758	movi	a2, 0				// return 0
759	ret
760
761
762	.size	_xtos_core_save, . - _xtos_core_save
763
764