1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * arch/ia64/kernel/entry.S
4 *
5 * Kernel entry points.
6 *
7 * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
8 *	David Mosberger-Tang <davidm@hpl.hp.com>
9 * Copyright (C) 1999, 2002-2003
10 *	Asit Mallick <Asit.K.Mallick@intel.com>
11 * 	Don Dugger <Don.Dugger@intel.com>
12 *	Suresh Siddha <suresh.b.siddha@intel.com>
13 *	Fenghua Yu <fenghua.yu@intel.com>
14 * Copyright (C) 1999 VA Linux Systems
15 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
16 */
17/*
18 * ia64_switch_to now places correct virtual mapping in in TR2 for
19 * kernel stack. This allows us to handle interrupts without changing
20 * to physical mode.
21 *
22 * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
23 * Patrick O'Rourke	<orourke@missioncriticallinux.com>
24 * 11/07/2000
25 */
26/*
27 * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
28 *                    VA Linux Systems Japan K.K.
29 *                    pv_ops.
30 */
31/*
32 * Global (preserved) predicate usage on syscall entry/exit path:
33 *
34 *	pKStk:		See entry.h.
35 *	pUStk:		See entry.h.
36 *	pSys:		See entry.h.
37 *	pNonSys:	!pSys
38 */
39
40#include <linux/export.h>
41#include <linux/pgtable.h>
42#include <asm/asmmacro.h>
43#include <asm/cache.h>
44#include <asm/errno.h>
45#include <asm/kregs.h>
46#include <asm/asm-offsets.h>
47#include <asm/percpu.h>
48#include <asm/processor.h>
49#include <asm/thread_info.h>
50#include <asm/unistd.h>
51#include <asm/ftrace.h>
52
53#include "minstate.h"
54
55	/*
56	 * execve() is special because in case of success, we need to
57	 * setup a null register window frame.
58	 */
59ENTRY(ia64_execve)
60	/*
61	 * Allocate 8 input registers since ptrace() may clobber them
62	 */
63	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
64	alloc loc1=ar.pfs,8,2,3,0
65	mov loc0=rp
66	.body
67	mov out0=in0			// filename
68	;;				// stop bit between alloc and call
69	mov out1=in1			// argv
70	mov out2=in2			// envp
71	br.call.sptk.many rp=sys_execve
72.ret0:
73	cmp4.ge p6,p7=r8,r0
74	mov ar.pfs=loc1			// restore ar.pfs
75	sxt4 r8=r8			// return 64-bit result
76	;;
77	stf.spill [sp]=f0
78	mov rp=loc0
79(p6)	mov ar.pfs=r0			// clear ar.pfs on success
80(p7)	br.ret.sptk.many rp
81
82	/*
83	 * In theory, we'd have to zap this state only to prevent leaking of
84	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
85	 * this executes in less than 20 cycles even on Itanium, so it's not worth
86	 * optimizing for...).
87	 */
88	mov ar.unat=0; 		mov ar.lc=0
89	mov r4=0;		mov f2=f0;		mov b1=r0
90	mov r5=0;		mov f3=f0;		mov b2=r0
91	mov r6=0;		mov f4=f0;		mov b3=r0
92	mov r7=0;		mov f5=f0;		mov b4=r0
93	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
94	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
95	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
96	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
97	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
98	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
99	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
100	br.ret.sptk.many rp
101END(ia64_execve)
102
103/*
104 * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
105 *	      u64 tls)
106 */
107GLOBAL_ENTRY(sys_clone2)
108	/*
109	 * Allocate 8 input registers since ptrace() may clobber them
110	 */
111	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
112	alloc r16=ar.pfs,8,2,6,0
113	DO_SAVE_SWITCH_STACK
114	mov loc0=rp
115	mov loc1=r16                             // save ar.pfs across ia64_clone
116	.body
117	mov out0=in0
118	mov out1=in1
119	mov out2=in2
120	mov out3=in3
121	mov out4=in4
122	mov out5=in5
123	br.call.sptk.many rp=ia64_clone
124.ret1:	.restore sp
125	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
126	mov ar.pfs=loc1
127	mov rp=loc0
128	br.ret.sptk.many rp
129END(sys_clone2)
130
131/*
132 * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
133 *	Deprecated.  Use sys_clone2() instead.
134 */
135GLOBAL_ENTRY(sys_clone)
136	/*
137	 * Allocate 8 input registers since ptrace() may clobber them
138	 */
139	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
140	alloc r16=ar.pfs,8,2,6,0
141	DO_SAVE_SWITCH_STACK
142	mov loc0=rp
143	mov loc1=r16                             // save ar.pfs across ia64_clone
144	.body
145	mov out0=in0
146	mov out1=in1
147	mov out2=16				// stacksize (compensates for 16-byte scratch area)
148	mov out3=in3
149	mov out4=in4
150	mov out5=in5
151	br.call.sptk.many rp=ia64_clone
152.ret2:	.restore sp
153	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
154	mov ar.pfs=loc1
155	mov rp=loc0
156	br.ret.sptk.many rp
157END(sys_clone)
158
159/*
160 * prev_task <- ia64_switch_to(struct task_struct *next)
161 *	With Ingo's new scheduler, interrupts are disabled when this routine gets
162 *	called.  The code starting at .map relies on this.  The rest of the code
163 *	doesn't care about the interrupt masking status.
164 */
165GLOBAL_ENTRY(ia64_switch_to)
166	.prologue
167	alloc r16=ar.pfs,1,0,0,0
168	DO_SAVE_SWITCH_STACK
169	.body
170
171	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
172	movl r25=init_task
173	mov r27=IA64_KR(CURRENT_STACK)
174	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
175	dep r20=0,in0,61,3		// physical address of "next"
176	;;
177	st8 [r22]=sp			// save kernel stack pointer of old task
178	shr.u r26=r20,IA64_GRANULE_SHIFT
179	cmp.eq p7,p6=r25,in0
180	;;
181	/*
182	 * If we've already mapped this task's page, we can skip doing it again.
183	 */
184(p6)	cmp.eq p7,p6=r26,r27
185(p6)	br.cond.dpnt .map
186	;;
187.done:
188	ld8 sp=[r21]			// load kernel stack pointer of new task
189	MOV_TO_KR(CURRENT, in0, r8, r9)		// update "current" application register
190	mov r8=r13			// return pointer to previously running task
191	mov r13=in0			// set "current" pointer
192	;;
193	DO_LOAD_SWITCH_STACK
194
195#ifdef CONFIG_SMP
196	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
197#endif
198	br.ret.sptk.many rp		// boogie on out in new context
199
200.map:
201	RSM_PSR_IC(r25)			// interrupts (psr.i) are already disabled here
202	movl r25=PAGE_KERNEL
203	;;
204	srlz.d
205	or r23=r25,r20			// construct PA | page properties
206	mov r25=IA64_GRANULE_SHIFT<<2
207	;;
208	MOV_TO_ITIR(p0, r25, r8)
209	MOV_TO_IFA(in0, r8)		// VA of next task...
210	;;
211	mov r25=IA64_TR_CURRENT_STACK
212	MOV_TO_KR(CURRENT_STACK, r26, r8, r9)	// remember last page we mapped...
213	;;
214	itr.d dtr[r25]=r23		// wire in new mapping...
215	SSM_PSR_IC_AND_SRLZ_D(r8, r9)	// reenable the psr.ic bit
216	br.cond.sptk .done
217END(ia64_switch_to)
218
219/*
220 * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
221 * means that we may get an interrupt with "sp" pointing to the new kernel stack while
222 * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
223 * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
224 * problem.  Also, we don't need to specify unwind information for preserved registers
225 * that are not modified in save_switch_stack as the right unwind information is already
226 * specified at the call-site of save_switch_stack.
227 */
228
229/*
230 * save_switch_stack:
231 *	- r16 holds ar.pfs
232 *	- b7 holds address to return to
233 *	- rp (b0) holds return address to save
234 */
235GLOBAL_ENTRY(save_switch_stack)
236	.prologue
237	.altrp b7
238	flushrs			// flush dirty regs to backing store (must be first in insn group)
239	.save @priunat,r17
240	mov r17=ar.unat		// preserve caller's
241	.body
242#ifdef CONFIG_ITANIUM
243	adds r2=16+128,sp
244	adds r3=16+64,sp
245	adds r14=SW(R4)+16,sp
246	;;
247	st8.spill [r14]=r4,16		// spill r4
248	lfetch.fault.excl.nt1 [r3],128
249	;;
250	lfetch.fault.excl.nt1 [r2],128
251	lfetch.fault.excl.nt1 [r3],128
252	;;
253	lfetch.fault.excl [r2]
254	lfetch.fault.excl [r3]
255	adds r15=SW(R5)+16,sp
256#else
257	add r2=16+3*128,sp
258	add r3=16,sp
259	add r14=SW(R4)+16,sp
260	;;
261	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
262	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
263	;;
264	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
265	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
266	;;
267	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
268	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
269	adds r15=SW(R5)+16,sp
270#endif
271	;;
272	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
273	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
274	add r2=SW(F2)+16,sp		// r2 = &sw->f2
275	;;
276	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
277	mov.m r18=ar.fpsr		// preserve fpsr
278	add r3=SW(F3)+16,sp		// r3 = &sw->f3
279	;;
280	stf.spill [r2]=f2,32
281	mov.m r19=ar.rnat
282	mov r21=b0
283
284	stf.spill [r3]=f3,32
285	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
286	mov r22=b1
287	;;
288	// since we're done with the spills, read and save ar.unat:
289	mov.m r29=ar.unat
290	mov.m r20=ar.bspstore
291	mov r23=b2
292	stf.spill [r2]=f4,32
293	stf.spill [r3]=f5,32
294	mov r24=b3
295	;;
296	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
297	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
298	mov r25=b4
299	mov r26=b5
300	;;
301	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
302	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
303	mov r21=ar.lc		// I-unit
304	stf.spill [r2]=f12,32
305	stf.spill [r3]=f13,32
306	;;
307	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
308	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
309	stf.spill [r2]=f14,32
310	stf.spill [r3]=f15,32
311	;;
312	st8 [r14]=r26				// save b5
313	st8 [r15]=r21				// save ar.lc
314	stf.spill [r2]=f16,32
315	stf.spill [r3]=f17,32
316	;;
317	stf.spill [r2]=f18,32
318	stf.spill [r3]=f19,32
319	;;
320	stf.spill [r2]=f20,32
321	stf.spill [r3]=f21,32
322	;;
323	stf.spill [r2]=f22,32
324	stf.spill [r3]=f23,32
325	;;
326	stf.spill [r2]=f24,32
327	stf.spill [r3]=f25,32
328	;;
329	stf.spill [r2]=f26,32
330	stf.spill [r3]=f27,32
331	;;
332	stf.spill [r2]=f28,32
333	stf.spill [r3]=f29,32
334	;;
335	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
336	stf.spill [r3]=f31,SW(PR)-SW(F31)
337	add r14=SW(CALLER_UNAT)+16,sp
338	;;
339	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
340	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
341	mov r21=pr
342	;;
343	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
344	st8 [r3]=r21				// save predicate registers
345	;;
346	st8 [r2]=r20				// save ar.bspstore
347	st8 [r14]=r18				// save fpsr
348	mov ar.rsc=3		// put RSE back into eager mode, pl 0
349	br.cond.sptk.many b7
350END(save_switch_stack)
351
352/*
353 * load_switch_stack:
354 *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
355 *	- b7 holds address to return to
356 *	- must not touch r8-r11
357 */
358GLOBAL_ENTRY(load_switch_stack)
359	.prologue
360	.altrp b7
361
362	.body
363	lfetch.fault.nt1 [sp]
364	adds r2=SW(AR_BSPSTORE)+16,sp
365	adds r3=SW(AR_UNAT)+16,sp
366	mov ar.rsc=0						// put RSE into enforced lazy mode
367	adds r14=SW(CALLER_UNAT)+16,sp
368	adds r15=SW(AR_FPSR)+16,sp
369	;;
370	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
371	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
372	;;
373	ld8 r21=[r2],16		// restore b0
374	ld8 r22=[r3],16		// restore b1
375	;;
376	ld8 r23=[r2],16		// restore b2
377	ld8 r24=[r3],16		// restore b3
378	;;
379	ld8 r25=[r2],16		// restore b4
380	ld8 r26=[r3],16		// restore b5
381	;;
382	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
383	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
384	;;
385	ld8 r28=[r2]		// restore pr
386	ld8 r30=[r3]		// restore rnat
387	;;
388	ld8 r18=[r14],16	// restore caller's unat
389	ld8 r19=[r15],24	// restore fpsr
390	;;
391	ldf.fill f2=[r14],32
392	ldf.fill f3=[r15],32
393	;;
394	ldf.fill f4=[r14],32
395	ldf.fill f5=[r15],32
396	;;
397	ldf.fill f12=[r14],32
398	ldf.fill f13=[r15],32
399	;;
400	ldf.fill f14=[r14],32
401	ldf.fill f15=[r15],32
402	;;
403	ldf.fill f16=[r14],32
404	ldf.fill f17=[r15],32
405	;;
406	ldf.fill f18=[r14],32
407	ldf.fill f19=[r15],32
408	mov b0=r21
409	;;
410	ldf.fill f20=[r14],32
411	ldf.fill f21=[r15],32
412	mov b1=r22
413	;;
414	ldf.fill f22=[r14],32
415	ldf.fill f23=[r15],32
416	mov b2=r23
417	;;
418	mov ar.bspstore=r27
419	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
420	mov b3=r24
421	;;
422	ldf.fill f24=[r14],32
423	ldf.fill f25=[r15],32
424	mov b4=r25
425	;;
426	ldf.fill f26=[r14],32
427	ldf.fill f27=[r15],32
428	mov b5=r26
429	;;
430	ldf.fill f28=[r14],32
431	ldf.fill f29=[r15],32
432	mov ar.pfs=r16
433	;;
434	ldf.fill f30=[r14],32
435	ldf.fill f31=[r15],24
436	mov ar.lc=r17
437	;;
438	ld8.fill r4=[r14],16
439	ld8.fill r5=[r15],16
440	mov pr=r28,-1
441	;;
442	ld8.fill r6=[r14],16
443	ld8.fill r7=[r15],16
444
445	mov ar.unat=r18				// restore caller's unat
446	mov ar.rnat=r30				// must restore after bspstore but before rsc!
447	mov ar.fpsr=r19				// restore fpsr
448	mov ar.rsc=3				// put RSE back into eager mode, pl 0
449	br.cond.sptk.many b7
450END(load_switch_stack)
451
452	/*
453	 * Invoke a system call, but do some tracing before and after the call.
454	 * We MUST preserve the current register frame throughout this routine
455	 * because some system calls (such as ia64_execve) directly
456	 * manipulate ar.pfs.
457	 */
458GLOBAL_ENTRY(ia64_trace_syscall)
459	PT_REGS_UNWIND_INFO(0)
460	/*
461	 * We need to preserve the scratch registers f6-f11 in case the system
462	 * call is sigreturn.
463	 */
464	adds r16=PT(F6)+16,sp
465	adds r17=PT(F7)+16,sp
466	;;
467 	stf.spill [r16]=f6,32
468 	stf.spill [r17]=f7,32
469	;;
470 	stf.spill [r16]=f8,32
471 	stf.spill [r17]=f9,32
472	;;
473 	stf.spill [r16]=f10
474 	stf.spill [r17]=f11
475	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
476	cmp.lt p6,p0=r8,r0			// check tracehook
477	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
478	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
479	mov r10=0
480(p6)	br.cond.sptk strace_error		// syscall failed ->
481	adds r16=PT(F6)+16,sp
482	adds r17=PT(F7)+16,sp
483	;;
484	ldf.fill f6=[r16],32
485	ldf.fill f7=[r17],32
486	;;
487	ldf.fill f8=[r16],32
488	ldf.fill f9=[r17],32
489	;;
490	ldf.fill f10=[r16]
491	ldf.fill f11=[r17]
492	// the syscall number may have changed, so re-load it and re-calculate the
493	// syscall entry-point:
494	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
495	;;
496	ld8 r15=[r15]
497	mov r3=NR_syscalls - 1
498	;;
499	adds r15=-1024,r15
500	movl r16=sys_call_table
501	;;
502	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
503	cmp.leu p6,p7=r15,r3
504	;;
505(p6)	ld8 r20=[r20]				// load address of syscall entry point
506(p7)	movl r20=sys_ni_syscall
507	;;
508	mov b6=r20
509	br.call.sptk.many rp=b6			// do the syscall
510.strace_check_retval:
511	cmp.lt p6,p0=r8,r0			// syscall failed?
512	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
513	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
514	mov r10=0
515(p6)	br.cond.sptk strace_error		// syscall failed ->
516	;;					// avoid RAW on r10
517.strace_save_retval:
518.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
519.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
520	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
521.ret3:
522(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
523(pUStk)	rsm psr.i				// disable interrupts
524	br.cond.sptk ia64_work_pending_syscall_end
525
526strace_error:
527	ld8 r3=[r2]				// load pt_regs.r8
528	sub r9=0,r8				// negate return value to get errno value
529	;;
530	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
531	adds r3=16,r2				// r3=&pt_regs.r10
532	;;
533(p6)	mov r10=-1
534(p6)	mov r8=r9
535	br.cond.sptk .strace_save_retval
536END(ia64_trace_syscall)
537
538	/*
539	 * When traced and returning from sigreturn, we invoke syscall_trace but then
540	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
541	 */
542GLOBAL_ENTRY(ia64_strace_leave_kernel)
543	PT_REGS_UNWIND_INFO(0)
544{	/*
545	 * Some versions of gas generate bad unwind info if the first instruction of a
546	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
547	 */
548	nop.m 0
549	nop.i 0
550	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
551}
552.ret4:	br.cond.sptk ia64_leave_kernel
553END(ia64_strace_leave_kernel)
554
555ENTRY(call_payload)
556	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0)
557	/* call the kernel_thread payload; fn is in r4, arg - in r5 */
558	alloc loc1=ar.pfs,0,3,1,0
559	mov loc0=rp
560	mov loc2=gp
561	mov out0=r5		// arg
562	ld8 r14 = [r4], 8	// fn.address
563	;;
564	mov b6 = r14
565	ld8 gp = [r4]		// fn.gp
566	;;
567	br.call.sptk.many rp=b6	// fn(arg)
568.ret12:	mov gp=loc2
569	mov rp=loc0
570	mov ar.pfs=loc1
571	/* ... and if it has returned, we are going to userland */
572	cmp.ne pKStk,pUStk=r0,r0
573	br.ret.sptk.many rp
574END(call_payload)
575
576GLOBAL_ENTRY(ia64_ret_from_clone)
577	PT_REGS_UNWIND_INFO(0)
578{	/*
579	 * Some versions of gas generate bad unwind info if the first instruction of a
580	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
581	 */
582	nop.m 0
583	nop.i 0
584	/*
585	 * We need to call schedule_tail() to complete the scheduling process.
586	 * Called by ia64_switch_to() after ia64_clone()->copy_thread().  r8 contains the
587	 * address of the previously executing task.
588	 */
589	br.call.sptk.many rp=ia64_invoke_schedule_tail
590}
591.ret8:
592(pKStk)	br.call.sptk.many rp=call_payload
593	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
594	;;
595	ld4 r2=[r2]
596	;;
597	mov r8=0
598	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
599	;;
600	cmp.ne p6,p0=r2,r0
601(p6)	br.cond.spnt .strace_check_retval
602	;;					// added stop bits to prevent r8 dependency
603END(ia64_ret_from_clone)
604	// fall through
605GLOBAL_ENTRY(ia64_ret_from_syscall)
606	PT_REGS_UNWIND_INFO(0)
607	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
608	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
609	mov r10=r0				// clear error indication in r10
610(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
611END(ia64_ret_from_syscall)
612	// fall through
613
614/*
615 * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
616 *	need to switch to bank 0 and doesn't restore the scratch registers.
617 *	To avoid leaking kernel bits, the scratch registers are set to
618 *	the following known-to-be-safe values:
619 *
620 *		  r1: restored (global pointer)
621 *		  r2: cleared
622 *		  r3: 1 (when returning to user-level)
623 *	      r8-r11: restored (syscall return value(s))
624 *		 r12: restored (user-level stack pointer)
625 *		 r13: restored (user-level thread pointer)
626 *		 r14: set to __kernel_syscall_via_epc
627 *		 r15: restored (syscall #)
628 *	     r16-r17: cleared
629 *		 r18: user-level b6
630 *		 r19: cleared
631 *		 r20: user-level ar.fpsr
632 *		 r21: user-level b0
633 *		 r22: cleared
634 *		 r23: user-level ar.bspstore
635 *		 r24: user-level ar.rnat
636 *		 r25: user-level ar.unat
637 *		 r26: user-level ar.pfs
638 *		 r27: user-level ar.rsc
639 *		 r28: user-level ip
640 *		 r29: user-level psr
641 *		 r30: user-level cfm
642 *		 r31: user-level pr
643 *	      f6-f11: cleared
644 *		  pr: restored (user-level pr)
645 *		  b0: restored (user-level rp)
646 *	          b6: restored
647 *		  b7: set to __kernel_syscall_via_epc
648 *	     ar.unat: restored (user-level ar.unat)
649 *	      ar.pfs: restored (user-level ar.pfs)
650 *	      ar.rsc: restored (user-level ar.rsc)
651 *	     ar.rnat: restored (user-level ar.rnat)
652 *	 ar.bspstore: restored (user-level ar.bspstore)
653 *	     ar.fpsr: restored (user-level ar.fpsr)
654 *	      ar.ccv: cleared
655 *	      ar.csd: cleared
656 *	      ar.ssd: cleared
657 */
658GLOBAL_ENTRY(ia64_leave_syscall)
659	PT_REGS_UNWIND_INFO(0)
660	/*
661	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
662	 * user- or fsys-mode, hence we disable interrupts early on.
663	 *
664	 * p6 controls whether current_thread_info()->flags needs to be check for
665	 * extra work.  We always check for extra work when returning to user-level.
666	 * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count
667	 * is 0.  After extra work processing has been completed, execution
668	 * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
669	 * needs to be redone.
670	 */
671#ifdef CONFIG_PREEMPTION
672	RSM_PSR_I(p0, r2, r18)			// disable interrupts
673	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
674(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
675	;;
676	.pred.rel.mutex pUStk,pKStk
677(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
678(pUStk)	mov r21=0			// r21 <- 0
679	;;
680	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
681#else /* !CONFIG_PREEMPTION */
682	RSM_PSR_I(pUStk, r2, r18)
683	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
684(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
685#endif
686.global ia64_work_processed_syscall;
687ia64_work_processed_syscall:
688#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
689	adds r2=PT(LOADRS)+16,r12
690	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
691	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
692	;;
693(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
694	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
695	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
696	;;
697#else
698	adds r2=PT(LOADRS)+16,r12
699	adds r3=PT(AR_BSPSTORE)+16,r12
700	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
701	;;
702(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
703	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
704	nop.i 0
705	;;
706#endif
707	mov r16=ar.bsp				// M2  get existing backing store pointer
708	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
709(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
710	;;
711	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
712(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
713(p6)	br.cond.spnt .work_pending_syscall
714	;;
715	// start restoring the state saved on the kernel stack (struct pt_regs):
716	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
717	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
718(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
719	;;
720	invala			// M0|1 invalidate ALAT
721	RSM_PSR_I_IC(r28, r29, r30)	// M2   turn off interrupts and interruption collection
722	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
723
724	ld8 r29=[r2],16		// M0|1 load cr.ipsr
725	ld8 r28=[r3],16		// M0|1 load cr.iip
726#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
727(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
728	;;
729	ld8 r30=[r2],16		// M0|1 load cr.ifs
730	ld8 r25=[r3],16		// M0|1 load ar.unat
731(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
732	;;
733#else
734	mov r22=r0		// A    clear r22
735	;;
736	ld8 r30=[r2],16		// M0|1 load cr.ifs
737	ld8 r25=[r3],16		// M0|1 load ar.unat
738(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
739	;;
740#endif
741	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
742	MOV_FROM_PSR(pKStk, r22, r21)	// M2   read PSR now that interrupts are disabled
743	nop 0
744	;;
745	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
746	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
747	mov f6=f0			// F    clear f6
748	;;
749	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
750	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
751	mov f7=f0				// F    clear f7
752	;;
753	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
754	ld8.fill r1=[r3],16			// M0|1 load r1
755(pUStk) mov r17=1				// A
756	;;
757#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
758(pUStk) st1 [r15]=r17				// M2|3
759#else
760(pUStk) st1 [r14]=r17				// M2|3
761#endif
762	ld8.fill r13=[r3],16			// M0|1
763	mov f8=f0				// F    clear f8
764	;;
765	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
766	ld8.fill r15=[r3]			// M0|1 restore r15
767	mov b6=r18				// I0   restore b6
768
769	LOAD_PHYS_STACK_REG_SIZE(r17)
770	mov f9=f0					// F    clear f9
771(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
772
773	srlz.d				// M0   ensure interruption collection is off (for cover)
774	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
775	COVER				// B    add current frame into dirty partition & set cr.ifs
776	;;
777#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
778	mov r19=ar.bsp			// M2   get new backing store pointer
779	st8 [r14]=r22			// M	save time at leave
780	mov f10=f0			// F    clear f10
781
782	mov r22=r0			// A	clear r22
783	movl r14=__kernel_syscall_via_epc // X
784	;;
785#else
786	mov r19=ar.bsp			// M2   get new backing store pointer
787	mov f10=f0			// F    clear f10
788
789	nop.m 0
790	movl r14=__kernel_syscall_via_epc // X
791	;;
792#endif
793	mov.m ar.csd=r0			// M2   clear ar.csd
794	mov.m ar.ccv=r0			// M2   clear ar.ccv
795	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
796
797	mov.m ar.ssd=r0			// M2   clear ar.ssd
798	mov f11=f0			// F    clear f11
799	br.cond.sptk.many rbs_switch	// B
800END(ia64_leave_syscall)
801
802GLOBAL_ENTRY(ia64_leave_kernel)
803	PT_REGS_UNWIND_INFO(0)
804	/*
805	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
806	 * user- or fsys-mode, hence we disable interrupts early on.
807	 *
808	 * p6 controls whether current_thread_info()->flags needs to be check for
809	 * extra work.  We always check for extra work when returning to user-level.
810	 * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count
811	 * is 0.  After extra work processing has been completed, execution
812	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
813	 * needs to be redone.
814	 */
815#ifdef CONFIG_PREEMPTION
816	RSM_PSR_I(p0, r17, r31)			// disable interrupts
817	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
818(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
819	;;
820	.pred.rel.mutex pUStk,pKStk
821(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
822(pUStk)	mov r21=0			// r21 <- 0
823	;;
824	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
825#else
826	RSM_PSR_I(pUStk, r17, r31)
827	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
828(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
829#endif
830.work_processed_kernel:
831	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
832	;;
833(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
834	adds r21=PT(PR)+16,r12
835	;;
836
837	lfetch [r21],PT(CR_IPSR)-PT(PR)
838	adds r2=PT(B6)+16,r12
839	adds r3=PT(R16)+16,r12
840	;;
841	lfetch [r21]
842	ld8 r28=[r2],8		// load b6
843	adds r29=PT(R24)+16,r12
844
845	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
846	adds r30=PT(AR_CCV)+16,r12
847(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
848	;;
849	ld8.fill r24=[r29]
850	ld8 r15=[r30]		// load ar.ccv
851(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
852	;;
853	ld8 r29=[r2],16		// load b7
854	ld8 r30=[r3],16		// load ar.csd
855(p6)	br.cond.spnt .work_pending
856	;;
857	ld8 r31=[r2],16		// load ar.ssd
858	ld8.fill r8=[r3],16
859	;;
860	ld8.fill r9=[r2],16
861	ld8.fill r10=[r3],PT(R17)-PT(R10)
862	;;
863	ld8.fill r11=[r2],PT(R18)-PT(R11)
864	ld8.fill r17=[r3],16
865	;;
866	ld8.fill r18=[r2],16
867	ld8.fill r19=[r3],16
868	;;
869	ld8.fill r20=[r2],16
870	ld8.fill r21=[r3],16
871	mov ar.csd=r30
872	mov ar.ssd=r31
873	;;
874	RSM_PSR_I_IC(r23, r22, r25)	// initiate turning off of interrupt and interruption collection
875	invala			// invalidate ALAT
876	;;
877	ld8.fill r22=[r2],24
878	ld8.fill r23=[r3],24
879	mov b6=r28
880	;;
881	ld8.fill r25=[r2],16
882	ld8.fill r26=[r3],16
883	mov b7=r29
884	;;
885	ld8.fill r27=[r2],16
886	ld8.fill r28=[r3],16
887	;;
888	ld8.fill r29=[r2],16
889	ld8.fill r30=[r3],24
890	;;
891	ld8.fill r31=[r2],PT(F9)-PT(R31)
892	adds r3=PT(F10)-PT(F6),r3
893	;;
894	ldf.fill f9=[r2],PT(F6)-PT(F9)
895	ldf.fill f10=[r3],PT(F8)-PT(F10)
896	;;
897	ldf.fill f6=[r2],PT(F7)-PT(F6)
898	;;
899	ldf.fill f7=[r2],PT(F11)-PT(F7)
900	ldf.fill f8=[r3],32
901	;;
902	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
903	mov ar.ccv=r15
904	;;
905	ldf.fill f11=[r2]
906	BSW_0(r2, r3, r15)	// switch back to bank 0 (no stop bit required beforehand...)
907	;;
908(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
909	adds r16=PT(CR_IPSR)+16,r12
910	adds r17=PT(CR_IIP)+16,r12
911
912#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
913	.pred.rel.mutex pUStk,pKStk
914	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
915	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
916	nop.i 0
917	;;
918#else
919	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
920	nop.i 0
921	nop.i 0
922	;;
923#endif
924	ld8 r29=[r16],16	// load cr.ipsr
925	ld8 r28=[r17],16	// load cr.iip
926	;;
927	ld8 r30=[r16],16	// load cr.ifs
928	ld8 r25=[r17],16	// load ar.unat
929	;;
930	ld8 r26=[r16],16	// load ar.pfs
931	ld8 r27=[r17],16	// load ar.rsc
932	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
933	;;
934	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
935	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
936	;;
937	ld8 r31=[r16],16	// load predicates
938	ld8 r21=[r17],16	// load b0
939	;;
940	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
941	ld8.fill r1=[r17],16	// load r1
942	;;
943	ld8.fill r12=[r16],16
944	ld8.fill r13=[r17],16
945#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
946(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
947#else
948(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
949#endif
950	;;
951	ld8 r20=[r16],16	// ar.fpsr
952	ld8.fill r15=[r17],16
953#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
954(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
955#endif
956	;;
957	ld8.fill r14=[r16],16
958	ld8.fill r2=[r17]
959(pUStk)	mov r17=1
960	;;
961#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
962	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
963	//  mib  :  mov add br        ->  mib  : ld8 add br
964	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
965	//
966	//  no one require bsp in r16 if (pKStk) branch is selected.
967(pUStk)	st8 [r3]=r22		// save time at leave
968(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
969	shr.u r18=r19,16	// get byte size of existing "dirty" partition
970	;;
971	ld8.fill r3=[r16]	// deferred
972	LOAD_PHYS_STACK_REG_SIZE(r17)
973(pKStk)	br.cond.dpnt skip_rbs_switch
974	mov r16=ar.bsp		// get existing backing store pointer
975#else
976	ld8.fill r3=[r16]
977(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
978	shr.u r18=r19,16	// get byte size of existing "dirty" partition
979	;;
980	mov r16=ar.bsp		// get existing backing store pointer
981	LOAD_PHYS_STACK_REG_SIZE(r17)
982(pKStk)	br.cond.dpnt skip_rbs_switch
983#endif
984
985	/*
986	 * Restore user backing store.
987	 *
988	 * NOTE: alloc, loadrs, and cover can't be predicated.
989	 */
990(pNonSys) br.cond.dpnt dont_preserve_current_frame
991	COVER				// add current frame into dirty partition and set cr.ifs
992	;;
993	mov r19=ar.bsp			// get new backing store pointer
994rbs_switch:
995	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
996	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
997	;;
998	sub r19=r19,r16			// calculate total byte size of dirty partition
999	add r18=64,r18			// don't force in0-in7 into memory...
1000	;;
1001	shl r19=r19,16			// shift size of dirty partition into loadrs position
1002	;;
1003dont_preserve_current_frame:
1004	/*
1005	 * To prevent leaking bits between the kernel and user-space,
1006	 * we must clear the stacked registers in the "invalid" partition here.
1007	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
1008	 * 5 registers/cycle on McKinley).
1009	 */
1010#	define pRecurse	p6
1011#	define pReturn	p7
1012#ifdef CONFIG_ITANIUM
1013#	define Nregs	10
1014#else
1015#	define Nregs	14
1016#endif
1017	alloc loc0=ar.pfs,2,Nregs-2,2,0
1018	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
1019	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
1020	;;
1021	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
1022	shladd in0=loc1,3,r17
1023	mov in1=0
1024	;;
1025	TEXT_ALIGN(32)
1026rse_clear_invalid:
1027#ifdef CONFIG_ITANIUM
1028	// cycle 0
1029 { .mii
1030	alloc loc0=ar.pfs,2,Nregs-2,2,0
1031	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
1032	add out0=-Nregs*8,in0
1033}{ .mfb
1034	add out1=1,in1			// increment recursion count
1035	nop.f 0
1036	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
1037	;;
1038}{ .mfi	// cycle 1
1039	mov loc1=0
1040	nop.f 0
1041	mov loc2=0
1042}{ .mib
1043	mov loc3=0
1044	mov loc4=0
1045(pRecurse) br.call.sptk.many b0=rse_clear_invalid
1046
1047}{ .mfi	// cycle 2
1048	mov loc5=0
1049	nop.f 0
1050	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
1051}{ .mib
1052	mov loc6=0
1053	mov loc7=0
1054(pReturn) br.ret.sptk.many b0
1055}
1056#else /* !CONFIG_ITANIUM */
1057	alloc loc0=ar.pfs,2,Nregs-2,2,0
1058	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
1059	add out0=-Nregs*8,in0
1060	add out1=1,in1			// increment recursion count
1061	mov loc1=0
1062	mov loc2=0
1063	;;
1064	mov loc3=0
1065	mov loc4=0
1066	mov loc5=0
1067	mov loc6=0
1068	mov loc7=0
1069(pRecurse) br.call.dptk.few b0=rse_clear_invalid
1070	;;
1071	mov loc8=0
1072	mov loc9=0
1073	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
1074	mov loc10=0
1075	mov loc11=0
1076(pReturn) br.ret.dptk.many b0
1077#endif /* !CONFIG_ITANIUM */
1078#	undef pRecurse
1079#	undef pReturn
1080	;;
1081	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
1082	;;
1083	loadrs
1084	;;
1085skip_rbs_switch:
1086	mov ar.unat=r25		// M2
1087(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
1088(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
1089	;;
1090(pUStk)	mov ar.bspstore=r23	// M2
1091(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
1092(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
1093	;;
1094	MOV_TO_IPSR(p0, r29, r25)	// M2
1095	mov ar.pfs=r26		// I0
1096(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
1097
1098	MOV_TO_IFS(p9, r30, r25)// M2
1099	mov b0=r21		// I0
1100(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
1101
1102	mov ar.fpsr=r20		// M2
1103	MOV_TO_IIP(r28, r25)	// M2
1104	nop 0
1105	;;
1106(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
1107	nop 0
1108(pLvSys)mov r2=r0
1109
1110	mov ar.rsc=r27		// M2
1111	mov pr=r31,-1		// I0
1112	RFI			// B
1113
1114	/*
1115	 * On entry:
1116	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPTION)
1117	 *	r31 = current->thread_info->flags
1118	 * On exit:
1119	 *	p6 = TRUE if work-pending-check needs to be redone
1120	 *
1121	 * Interrupts are disabled on entry, reenabled depend on work, and
1122	 * disabled on exit.
1123	 */
1124.work_pending_syscall:
1125	add r2=-8,r2
1126	add r3=-8,r3
1127	;;
1128	st8 [r2]=r8
1129	st8 [r3]=r10
1130.work_pending:
1131	tbit.z p6,p0=r31,TIF_NEED_RESCHED	// is resched not needed?
1132(p6)	br.cond.sptk.few .notify
1133	br.call.spnt.many rp=preempt_schedule_irq
1134.ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
1135(pLvSys)br.cond.sptk.few  ia64_work_pending_syscall_end
1136	br.cond.sptk.many .work_processed_kernel
1137
1138.notify:
1139(pUStk)	br.call.spnt.many rp=notify_resume_user
1140.ret10:	cmp.ne p6,p0=r0,r0	// p6 <- 0 (don't re-check)
1141(pLvSys)br.cond.sptk.few  ia64_work_pending_syscall_end
1142	br.cond.sptk.many .work_processed_kernel
1143
1144.global ia64_work_pending_syscall_end;
1145ia64_work_pending_syscall_end:
1146	adds r2=PT(R8)+16,r12
1147	adds r3=PT(R10)+16,r12
1148	;;
1149	ld8 r8=[r2]
1150	ld8 r10=[r3]
1151	br.cond.sptk.many ia64_work_processed_syscall
1152END(ia64_leave_kernel)
1153
1154ENTRY(handle_syscall_error)
1155	/*
1156	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
1157	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
1158	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
1159	 * pt_regs.r8 is zero, we assume that the call completed successfully.
1160	 */
1161	PT_REGS_UNWIND_INFO(0)
1162	ld8 r3=[r2]		// load pt_regs.r8
1163	;;
1164	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
1165	;;
1166(p7)	mov r10=-1
1167(p7)	sub r8=0,r8		// negate return value to get errno
1168	br.cond.sptk ia64_leave_syscall
1169END(handle_syscall_error)
1170
1171	/*
1172	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
1173	 * in case a system call gets restarted.
1174	 */
1175GLOBAL_ENTRY(ia64_invoke_schedule_tail)
1176	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
1177	alloc loc1=ar.pfs,8,2,1,0
1178	mov loc0=rp
1179	mov out0=r8				// Address of previous task
1180	;;
1181	br.call.sptk.many rp=schedule_tail
1182.ret11:	mov ar.pfs=loc1
1183	mov rp=loc0
1184	br.ret.sptk.many rp
1185END(ia64_invoke_schedule_tail)
1186
1187	/*
1188	 * Setup stack and call do_notify_resume_user(), keeping interrupts
1189	 * disabled.
1190	 *
1191	 * Note that pSys and pNonSys need to be set up by the caller.
1192	 * We declare 8 input registers so the system call args get preserved,
1193	 * in case we need to restart a system call.
1194	 */
1195GLOBAL_ENTRY(notify_resume_user)
1196	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
1197	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
1198	mov r9=ar.unat
1199	mov loc0=rp				// save return address
1200	mov out0=0				// there is no "oldset"
1201	adds out1=8,sp				// out1=&sigscratch->ar_pfs
1202(pSys)	mov out2=1				// out2==1 => we're in a syscall
1203	;;
1204(pNonSys) mov out2=0				// out2==0 => not a syscall
1205	.fframe 16
1206	.spillsp ar.unat, 16
1207	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
1208	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
1209	.body
1210	br.call.sptk.many rp=do_notify_resume_user
1211.ret15:	.restore sp
1212	adds sp=16,sp				// pop scratch stack space
1213	;;
1214	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
1215	mov rp=loc0
1216	;;
1217	mov ar.unat=r9
1218	mov ar.pfs=loc1
1219	br.ret.sptk.many rp
1220END(notify_resume_user)
1221
1222ENTRY(sys_rt_sigreturn)
1223	PT_REGS_UNWIND_INFO(0)
1224	/*
1225	 * Allocate 8 input registers since ptrace() may clobber them
1226	 */
1227	alloc r2=ar.pfs,8,0,1,0
1228	.prologue
1229	PT_REGS_SAVES(16)
1230	adds sp=-16,sp
1231	.body
1232	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
1233	;;
1234	/*
1235	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
1236	 * syscall-entry path does not save them we save them here instead.  Note: we
1237	 * don't need to save any other registers that are not saved by the stream-lined
1238	 * syscall path, because restore_sigcontext() restores them.
1239	 */
1240	adds r16=PT(F6)+32,sp
1241	adds r17=PT(F7)+32,sp
1242	;;
1243 	stf.spill [r16]=f6,32
1244 	stf.spill [r17]=f7,32
1245	;;
1246 	stf.spill [r16]=f8,32
1247 	stf.spill [r17]=f9,32
1248	;;
1249 	stf.spill [r16]=f10
1250 	stf.spill [r17]=f11
1251	adds out0=16,sp				// out0 = &sigscratch
1252	br.call.sptk.many rp=ia64_rt_sigreturn
1253.ret19:	.restore sp,0
1254	adds sp=16,sp
1255	;;
1256	ld8 r9=[sp]				// load new ar.unat
1257	mov.sptk b7=r8,ia64_leave_kernel
1258	;;
1259	mov ar.unat=r9
1260	br.many b7
1261END(sys_rt_sigreturn)
1262
1263GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
1264	.prologue
1265	/*
1266	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
1267	 */
1268	mov r16=r0
1269	DO_SAVE_SWITCH_STACK
1270	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
1271.ret21:	.body
1272	DO_LOAD_SWITCH_STACK
1273	br.cond.sptk.many rp				// goes to ia64_leave_kernel
1274END(ia64_prepare_handle_unaligned)
1275
1276	//
1277	// unw_init_running(void (*callback)(info, arg), void *arg)
1278	//
1279#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
1280
1281GLOBAL_ENTRY(unw_init_running)
1282	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
1283	alloc loc1=ar.pfs,2,3,3,0
1284	;;
1285	ld8 loc2=[in0],8
1286	mov loc0=rp
1287	mov r16=loc1
1288	DO_SAVE_SWITCH_STACK
1289	.body
1290
1291	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
1292	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
1293	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
1294	adds sp=-EXTRA_FRAME_SIZE,sp
1295	.body
1296	;;
1297	adds out0=16,sp				// &info
1298	mov out1=r13				// current
1299	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
1300	br.call.sptk.many rp=unw_init_frame_info
13011:	adds out0=16,sp				// &info
1302	mov b6=loc2
1303	mov loc2=gp				// save gp across indirect function call
1304	;;
1305	ld8 gp=[in0]
1306	mov out1=in1				// arg
1307	br.call.sptk.many rp=b6			// invoke the callback function
13081:	mov gp=loc2				// restore gp
1309
1310	// For now, we don't allow changing registers from within
1311	// unw_init_running; if we ever want to allow that, we'd
1312	// have to do a load_switch_stack here:
1313	.restore sp
1314	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
1315
1316	mov ar.pfs=loc1
1317	mov rp=loc0
1318	br.ret.sptk.many rp
1319END(unw_init_running)
1320EXPORT_SYMBOL(unw_init_running)
1321
1322#ifdef CONFIG_FUNCTION_TRACER
1323#ifdef CONFIG_DYNAMIC_FTRACE
1324GLOBAL_ENTRY(_mcount)
1325	br ftrace_stub
1326END(_mcount)
1327EXPORT_SYMBOL(_mcount)
1328
1329.here:
1330	br.ret.sptk.many b0
1331
1332GLOBAL_ENTRY(ftrace_caller)
1333	alloc out0 = ar.pfs, 8, 0, 4, 0
1334	mov out3 = r0
1335	;;
1336	mov out2 = b0
1337	add r3 = 0x20, r3
1338	mov out1 = r1;
1339	br.call.sptk.many b0 = ftrace_patch_gp
1340	//this might be called from module, so we must patch gp
1341ftrace_patch_gp:
1342	movl gp=__gp
1343	mov b0 = r3
1344	;;
1345.global ftrace_call;
1346ftrace_call:
1347{
1348	.mlx
1349	nop.m 0x0
1350	movl r3 = .here;;
1351}
1352	alloc loc0 = ar.pfs, 4, 4, 2, 0
1353	;;
1354	mov loc1 = b0
1355	mov out0 = b0
1356	mov loc2 = r8
1357	mov loc3 = r15
1358	;;
1359	adds out0 = -MCOUNT_INSN_SIZE, out0
1360	mov out1 = in2
1361	mov b6 = r3
1362
1363	br.call.sptk.many b0 = b6
1364	;;
1365	mov ar.pfs = loc0
1366	mov b0 = loc1
1367	mov r8 = loc2
1368	mov r15 = loc3
1369	br ftrace_stub
1370	;;
1371END(ftrace_caller)
1372
1373#else
1374GLOBAL_ENTRY(_mcount)
1375	movl r2 = ftrace_stub
1376	movl r3 = ftrace_trace_function;;
1377	ld8 r3 = [r3];;
1378	ld8 r3 = [r3];;
1379	cmp.eq p7,p0 = r2, r3
1380(p7)	br.sptk.many ftrace_stub
1381	;;
1382
1383	alloc loc0 = ar.pfs, 4, 4, 2, 0
1384	;;
1385	mov loc1 = b0
1386	mov out0 = b0
1387	mov loc2 = r8
1388	mov loc3 = r15
1389	;;
1390	adds out0 = -MCOUNT_INSN_SIZE, out0
1391	mov out1 = in2
1392	mov b6 = r3
1393
1394	br.call.sptk.many b0 = b6
1395	;;
1396	mov ar.pfs = loc0
1397	mov b0 = loc1
1398	mov r8 = loc2
1399	mov r15 = loc3
1400	br ftrace_stub
1401	;;
1402END(_mcount)
1403#endif
1404
1405GLOBAL_ENTRY(ftrace_stub)
1406	mov r3 = b0
1407	movl r2 = _mcount_ret_helper
1408	;;
1409	mov b6 = r2
1410	mov b7 = r3
1411	br.ret.sptk.many b6
1412
1413_mcount_ret_helper:
1414	mov b0 = r42
1415	mov r1 = r41
1416	mov ar.pfs = r40
1417	br b7
1418END(ftrace_stub)
1419
1420#endif /* CONFIG_FUNCTION_TRACER */
1421
1422#define __SYSCALL(nr, entry) data8 entry
1423	.rodata
1424	.align 8
1425	.globl sys_call_table
1426sys_call_table:
1427#include <asm/syscall_table.h>
1428