1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *  PARISC TLB and cache flushing support
4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 */
8
9/*
10 * NOTE: fdc,fic, and pdc instructions that use base register modification
11 *       should only use index and base registers that are not shadowed,
12 *       so that the fast path emulation in the non access miss handler
13 *       can be used.
14 */
15
16#ifdef CONFIG_64BIT
17	.level	2.0w
18#else
19	.level	2.0
20#endif
21
22#include <asm/psw.h>
23#include <asm/assembly.h>
24#include <asm/cache.h>
25#include <asm/ldcw.h>
26#include <asm/alternative.h>
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <linux/pgtable.h>
30
31	.section .text.hot
32	.align	16
33
34ENTRY_CFI(flush_tlb_all_local)
35	/*
36	 * The pitlbe and pdtlbe instructions should only be used to
37	 * flush the entire tlb. Also, there needs to be no intervening
38	 * tlb operations, e.g. tlb misses, so the operation needs
39	 * to happen in real mode with all interruptions disabled.
40	 */
41
42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
43	rsm		PSW_SM_I, %r19		/* save I-bit state */
44	load32		PA(1f), %r1
45	nop
46	nop
47	nop
48	nop
49	nop
50
51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
53	mtctl		%r0, %cr17		/* Clear IIASQ head */
54	mtctl		%r1, %cr18		/* IIAOQ head */
55	ldo		4(%r1), %r1
56	mtctl		%r1, %cr18		/* IIAOQ tail */
57	load32		REAL_MODE_PSW, %r1
58	mtctl           %r1, %ipsw
59	rfi
60	nop
61
621:      load32		PA(cache_info), %r1
63
64	/* Flush Instruction Tlb */
65
6688:	LDREG		ITLB_SID_BASE(%r1), %r20
67	LDREG		ITLB_SID_STRIDE(%r1), %r21
68	LDREG		ITLB_SID_COUNT(%r1), %r22
69	LDREG		ITLB_OFF_BASE(%r1), %arg0
70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
72	LDREG		ITLB_LOOP(%r1), %arg3
73
74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
76	copy		%arg0, %r28		/* Init base addr */
77
78fitmanyloop:					/* Loop if LOOP >= 2 */
79	mtsp		%r20, %sr1
80	add		%r21, %r20, %r20	/* increment space */
81	copy		%arg2, %r29		/* Init middle loop count */
82
83fitmanymiddle:					/* Loop if LOOP >= 2 */
84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
85	pitlbe		%r0(%sr1, %r28)
86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
88	copy		%arg3, %r31		/* Re-init inner loop count */
89
90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
92
93fitoneloop:					/* Loop if LOOP = 1 */
94	mtsp		%r20, %sr1
95	copy		%arg0, %r28		/* init base addr */
96	copy		%arg2, %r29		/* init middle loop count */
97
98fitonemiddle:					/* Loop if LOOP = 1 */
99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
101
102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
103	add		%r21, %r20, %r20		/* increment space */
104
105fitdone:
106	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
107
108	/* Flush Data Tlb */
109
110	LDREG		DTLB_SID_BASE(%r1), %r20
111	LDREG		DTLB_SID_STRIDE(%r1), %r21
112	LDREG		DTLB_SID_COUNT(%r1), %r22
113	LDREG		DTLB_OFF_BASE(%r1), %arg0
114	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
115	LDREG		DTLB_OFF_COUNT(%r1), %arg2
116	LDREG		DTLB_LOOP(%r1), %arg3
117
118	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
119	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
120	copy		%arg0, %r28		/* Init base addr */
121
122fdtmanyloop:					/* Loop if LOOP >= 2 */
123	mtsp		%r20, %sr1
124	add		%r21, %r20, %r20	/* increment space */
125	copy		%arg2, %r29		/* Init middle loop count */
126
127fdtmanymiddle:					/* Loop if LOOP >= 2 */
128	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
129	pdtlbe		%r0(%sr1, %r28)
130	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
131	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
132	copy		%arg3, %r31		/* Re-init inner loop count */
133
134	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
135	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
136
137fdtoneloop:					/* Loop if LOOP = 1 */
138	mtsp		%r20, %sr1
139	copy		%arg0, %r28		/* init base addr */
140	copy		%arg2, %r29		/* init middle loop count */
141
142fdtonemiddle:					/* Loop if LOOP = 1 */
143	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
144	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
145
146	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
147	add		%r21, %r20, %r20	/* increment space */
148
149
150fdtdone:
151	/*
152	 * Switch back to virtual mode
153	 */
154	/* pcxt_ssm_bug */
155	rsm		PSW_SM_I, %r0
156	load32		2f, %r1
157	nop
158	nop
159	nop
160	nop
161	nop
162
163	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
164	mtctl		%r0, %cr17		/* Clear IIASQ tail */
165	mtctl		%r0, %cr17		/* Clear IIASQ head */
166	mtctl		%r1, %cr18		/* IIAOQ head */
167	ldo		4(%r1), %r1
168	mtctl		%r1, %cr18		/* IIAOQ tail */
169	load32		KERNEL_PSW, %r1
170	or		%r1, %r19, %r1	/* I-bit to state on entry */
171	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
172	rfi
173	nop
174
1752:      bv		%r0(%r2)
176	nop
177
178	/*
179	 * When running in qemu, drop whole flush_tlb_all_local function and
180	 * replace by one pdtlbe instruction, for which QEMU will drop all
181	 * local TLB entries.
182	 */
1833:	pdtlbe		%r0(%sr1,%r0)
184	bv,n		%r0(%r2)
185	ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b)
186ENDPROC_CFI(flush_tlb_all_local)
187
188	.import cache_info,data
189
190ENTRY_CFI(flush_instruction_cache_local)
19188:	load32		cache_info, %r1
192
193	/* Flush Instruction Cache */
194
195	LDREG		ICACHE_BASE(%r1), %arg0
196	LDREG		ICACHE_STRIDE(%r1), %arg1
197	LDREG		ICACHE_COUNT(%r1), %arg2
198	LDREG		ICACHE_LOOP(%r1), %arg3
199	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
200	mtsp		%r0, %sr1
201	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
202	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
203
204fimanyloop:					/* Loop if LOOP >= 2 */
205	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
206	fice            %r0(%sr1, %arg0)
207	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
208	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
209	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
210
211fioneloop:					/* Loop if LOOP = 1 */
212	/* Some implementations may flush with a single fice instruction */
213	cmpib,COND(>>=),n	15, %arg2, fioneloop2
214
215fioneloop1:
216	fice,m		%arg1(%sr1, %arg0)
217	fice,m		%arg1(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	addib,COND(>)	-16, %arg2, fioneloop1
232	fice,m		%arg1(%sr1, %arg0)
233
234	/* Check if done */
235	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
236
237fioneloop2:
238	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
239	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
240
241fisync:
242	sync
243	mtsm		%r22			/* restore I-bit */
24489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
245	bv		%r0(%r2)
246	nop
247ENDPROC_CFI(flush_instruction_cache_local)
248
249
250	.import cache_info, data
251ENTRY_CFI(flush_data_cache_local)
25288:	load32		cache_info, %r1
253
254	/* Flush Data Cache */
255
256	LDREG		DCACHE_BASE(%r1), %arg0
257	LDREG		DCACHE_STRIDE(%r1), %arg1
258	LDREG		DCACHE_COUNT(%r1), %arg2
259	LDREG		DCACHE_LOOP(%r1), %arg3
260	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
261	mtsp		%r0, %sr1
262	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
263	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
264
265fdmanyloop:					/* Loop if LOOP >= 2 */
266	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
267	fdce		%r0(%sr1, %arg0)
268	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
269	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
270	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
271
272fdoneloop:					/* Loop if LOOP = 1 */
273	/* Some implementations may flush with a single fdce instruction */
274	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
275
276fdoneloop1:
277	fdce,m		%arg1(%sr1, %arg0)
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	addib,COND(>)	-16, %arg2, fdoneloop1
293	fdce,m		%arg1(%sr1, %arg0)
294
295	/* Check if done */
296	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
297
298fdoneloop2:
299	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
300	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
301
302fdsync:
303	syncdma
304	sync
305	mtsm		%r22			/* restore I-bit */
30689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
307	bv		%r0(%r2)
308	nop
309ENDPROC_CFI(flush_data_cache_local)
310
311/* Clear page using kernel mapping.  */
312
313ENTRY_CFI(clear_page_asm)
314#ifdef CONFIG_64BIT
315
316	/* Unroll the loop.  */
317	ldi		(PAGE_SIZE / 128), %r1
318
3191:
320	std		%r0, 0(%r26)
321	std		%r0, 8(%r26)
322	std		%r0, 16(%r26)
323	std		%r0, 24(%r26)
324	std		%r0, 32(%r26)
325	std		%r0, 40(%r26)
326	std		%r0, 48(%r26)
327	std		%r0, 56(%r26)
328	std		%r0, 64(%r26)
329	std		%r0, 72(%r26)
330	std		%r0, 80(%r26)
331	std		%r0, 88(%r26)
332	std		%r0, 96(%r26)
333	std		%r0, 104(%r26)
334	std		%r0, 112(%r26)
335	std		%r0, 120(%r26)
336
337	/* Note reverse branch hint for addib is taken.  */
338	addib,COND(>),n	-1, %r1, 1b
339	ldo		128(%r26), %r26
340
341#else
342
343	/*
344	 * Note that until (if) we start saving the full 64-bit register
345	 * values on interrupt, we can't use std on a 32 bit kernel.
346	 */
347	ldi		(PAGE_SIZE / 64), %r1
348
3491:
350	stw		%r0, 0(%r26)
351	stw		%r0, 4(%r26)
352	stw		%r0, 8(%r26)
353	stw		%r0, 12(%r26)
354	stw		%r0, 16(%r26)
355	stw		%r0, 20(%r26)
356	stw		%r0, 24(%r26)
357	stw		%r0, 28(%r26)
358	stw		%r0, 32(%r26)
359	stw		%r0, 36(%r26)
360	stw		%r0, 40(%r26)
361	stw		%r0, 44(%r26)
362	stw		%r0, 48(%r26)
363	stw		%r0, 52(%r26)
364	stw		%r0, 56(%r26)
365	stw		%r0, 60(%r26)
366
367	addib,COND(>),n	-1, %r1, 1b
368	ldo		64(%r26), %r26
369#endif
370	bv		%r0(%r2)
371	nop
372ENDPROC_CFI(clear_page_asm)
373
374/* Copy page using kernel mapping.  */
375
376ENTRY_CFI(copy_page_asm)
377#ifdef CONFIG_64BIT
378	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
379	 * Unroll the loop by hand and arrange insn appropriately.
380	 * Prefetch doesn't improve performance on rp3440.
381	 * GCC probably can do this just as well...
382	 */
383
384	ldi		(PAGE_SIZE / 128), %r1
385
3861:	ldd		0(%r25), %r19
387	ldd		8(%r25), %r20
388
389	ldd		16(%r25), %r21
390	ldd		24(%r25), %r22
391	std		%r19, 0(%r26)
392	std		%r20, 8(%r26)
393
394	ldd		32(%r25), %r19
395	ldd		40(%r25), %r20
396	std		%r21, 16(%r26)
397	std		%r22, 24(%r26)
398
399	ldd		48(%r25), %r21
400	ldd		56(%r25), %r22
401	std		%r19, 32(%r26)
402	std		%r20, 40(%r26)
403
404	ldd		64(%r25), %r19
405	ldd		72(%r25), %r20
406	std		%r21, 48(%r26)
407	std		%r22, 56(%r26)
408
409	ldd		80(%r25), %r21
410	ldd		88(%r25), %r22
411	std		%r19, 64(%r26)
412	std		%r20, 72(%r26)
413
414	ldd		 96(%r25), %r19
415	ldd		104(%r25), %r20
416	std		%r21, 80(%r26)
417	std		%r22, 88(%r26)
418
419	ldd		112(%r25), %r21
420	ldd		120(%r25), %r22
421	ldo		128(%r25), %r25
422	std		%r19, 96(%r26)
423	std		%r20, 104(%r26)
424
425	std		%r21, 112(%r26)
426	std		%r22, 120(%r26)
427
428	/* Note reverse branch hint for addib is taken.  */
429	addib,COND(>),n	-1, %r1, 1b
430	ldo		128(%r26), %r26
431
432#else
433
434	/*
435	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
436	 * bundles (very restricted rules for bundling).
437	 * Note that until (if) we start saving
438	 * the full 64 bit register values on interrupt, we can't
439	 * use ldd/std on a 32 bit kernel.
440	 */
441	ldw		0(%r25), %r19
442	ldi		(PAGE_SIZE / 64), %r1
443
4441:
445	ldw		4(%r25), %r20
446	ldw		8(%r25), %r21
447	ldw		12(%r25), %r22
448	stw		%r19, 0(%r26)
449	stw		%r20, 4(%r26)
450	stw		%r21, 8(%r26)
451	stw		%r22, 12(%r26)
452	ldw		16(%r25), %r19
453	ldw		20(%r25), %r20
454	ldw		24(%r25), %r21
455	ldw		28(%r25), %r22
456	stw		%r19, 16(%r26)
457	stw		%r20, 20(%r26)
458	stw		%r21, 24(%r26)
459	stw		%r22, 28(%r26)
460	ldw		32(%r25), %r19
461	ldw		36(%r25), %r20
462	ldw		40(%r25), %r21
463	ldw		44(%r25), %r22
464	stw		%r19, 32(%r26)
465	stw		%r20, 36(%r26)
466	stw		%r21, 40(%r26)
467	stw		%r22, 44(%r26)
468	ldw		48(%r25), %r19
469	ldw		52(%r25), %r20
470	ldw		56(%r25), %r21
471	ldw		60(%r25), %r22
472	stw		%r19, 48(%r26)
473	stw		%r20, 52(%r26)
474	ldo		64(%r25), %r25
475	stw		%r21, 56(%r26)
476	stw		%r22, 60(%r26)
477	ldo		64(%r26), %r26
478	addib,COND(>),n	-1, %r1, 1b
479	ldw		0(%r25), %r19
480#endif
481	bv		%r0(%r2)
482	nop
483ENDPROC_CFI(copy_page_asm)
484
485/*
486 * NOTE: Code in clear_user_page has a hard coded dependency on the
487 *       maximum alias boundary being 4 Mb. We've been assured by the
488 *       parisc chip designers that there will not ever be a parisc
489 *       chip with a larger alias boundary (Never say never :-) ).
490 *
491 *       Subtle: the dtlb miss handlers support the temp alias region by
492 *       "knowing" that if a dtlb miss happens within the temp alias
493 *       region it must have occurred while in clear_user_page. Since
494 *       this routine makes use of processor local translations, we
495 *       don't want to insert them into the kernel page table. Instead,
496 *       we load up some general registers (they need to be registers
497 *       which aren't shadowed) with the physical page numbers (preshifted
498 *       for tlb insertion) needed to insert the translations. When we
499 *       miss on the translation, the dtlb miss handler inserts the
500 *       translation into the tlb using these values:
501 *
502 *          %r26 physical page (shifted for tlb insert) of "to" translation
503 *          %r23 physical page (shifted for tlb insert) of "from" translation
504 */
505
506        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
507        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
508        .macro          convert_phys_for_tlb_insert20  phys
509        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
510#if _PAGE_SIZE_ENCODING_DEFAULT
511        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
512#endif
513	.endm
514
515	/*
516	 * copy_user_page_asm() performs a page copy using mappings
517	 * equivalent to the user page mappings.  It can be used to
518	 * implement copy_user_page() but unfortunately both the `from'
519	 * and `to' pages need to be flushed through mappings equivalent
520	 * to the user mappings after the copy because the kernel accesses
521	 * the `from' page through the kmap kernel mapping and the `to'
522	 * page needs to be flushed since code can be copied.  As a
523	 * result, this implementation is less efficient than the simpler
524	 * copy using the kernel mapping.  It only needs the `from' page
525	 * to flushed via the user mapping.  The kunmap routines handle
526	 * the flushes needed for the kernel mapping.
527	 *
528	 * I'm still keeping this around because it may be possible to
529	 * use it if more information is passed into copy_user_page().
530	 * Have to do some measurements to see if it is worthwhile to
531	 * lobby for such a change.
532	 *
533	 */
534
535ENTRY_CFI(copy_user_page_asm)
536	/* Convert virtual `to' and `from' addresses to physical addresses.
537	   Move `from' physical address to non shadowed register.  */
538	ldil		L%(__PAGE_OFFSET), %r1
539	sub		%r26, %r1, %r26
540	sub		%r25, %r1, %r23
541
542	ldil		L%(TMPALIAS_MAP_START), %r28
543#ifdef CONFIG_64BIT
544#if (TMPALIAS_MAP_START >= 0x80000000)
545	depdi		0, 31,32, %r28		/* clear any sign extension */
546#endif
547	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
548	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
549	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
550	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
551	copy		%r28, %r29
552	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
553#else
554	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
555	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
556	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
557	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
558	copy		%r28, %r29
559	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
560#endif
561
562	/* Purge any old translations */
563
564#ifdef CONFIG_PA20
565	pdtlb,l		%r0(%r28)
566	pdtlb,l		%r0(%r29)
567#else
5680:	pdtlb		%r0(%r28)
5691:	pdtlb		%r0(%r29)
570	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
571	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
572#endif
573
574#ifdef CONFIG_64BIT
575	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
576	 * Unroll the loop by hand and arrange insn appropriately.
577	 * GCC probably can do this just as well.
578	 */
579
580	ldd		0(%r29), %r19
581	ldi		(PAGE_SIZE / 128), %r1
582
5831:	ldd		8(%r29), %r20
584
585	ldd		16(%r29), %r21
586	ldd		24(%r29), %r22
587	std		%r19, 0(%r28)
588	std		%r20, 8(%r28)
589
590	ldd		32(%r29), %r19
591	ldd		40(%r29), %r20
592	std		%r21, 16(%r28)
593	std		%r22, 24(%r28)
594
595	ldd		48(%r29), %r21
596	ldd		56(%r29), %r22
597	std		%r19, 32(%r28)
598	std		%r20, 40(%r28)
599
600	ldd		64(%r29), %r19
601	ldd		72(%r29), %r20
602	std		%r21, 48(%r28)
603	std		%r22, 56(%r28)
604
605	ldd		80(%r29), %r21
606	ldd		88(%r29), %r22
607	std		%r19, 64(%r28)
608	std		%r20, 72(%r28)
609
610	ldd		 96(%r29), %r19
611	ldd		104(%r29), %r20
612	std		%r21, 80(%r28)
613	std		%r22, 88(%r28)
614
615	ldd		112(%r29), %r21
616	ldd		120(%r29), %r22
617	std		%r19, 96(%r28)
618	std		%r20, 104(%r28)
619
620	ldo		128(%r29), %r29
621	std		%r21, 112(%r28)
622	std		%r22, 120(%r28)
623	ldo		128(%r28), %r28
624
625	/* conditional branches nullify on forward taken branch, and on
626	 * non-taken backward branch. Note that .+4 is a backwards branch.
627	 * The ldd should only get executed if the branch is taken.
628	 */
629	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
630	ldd		0(%r29), %r19		/* start next loads */
631
632#else
633	ldi		(PAGE_SIZE / 64), %r1
634
635	/*
636	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
637	 * bundles (very restricted rules for bundling). It probably
638	 * does OK on PCXU and better, but we could do better with
639	 * ldd/std instructions. Note that until (if) we start saving
640	 * the full 64 bit register values on interrupt, we can't
641	 * use ldd/std on a 32 bit kernel.
642	 */
643
6441:	ldw		0(%r29), %r19
645	ldw		4(%r29), %r20
646	ldw		8(%r29), %r21
647	ldw		12(%r29), %r22
648	stw		%r19, 0(%r28)
649	stw		%r20, 4(%r28)
650	stw		%r21, 8(%r28)
651	stw		%r22, 12(%r28)
652	ldw		16(%r29), %r19
653	ldw		20(%r29), %r20
654	ldw		24(%r29), %r21
655	ldw		28(%r29), %r22
656	stw		%r19, 16(%r28)
657	stw		%r20, 20(%r28)
658	stw		%r21, 24(%r28)
659	stw		%r22, 28(%r28)
660	ldw		32(%r29), %r19
661	ldw		36(%r29), %r20
662	ldw		40(%r29), %r21
663	ldw		44(%r29), %r22
664	stw		%r19, 32(%r28)
665	stw		%r20, 36(%r28)
666	stw		%r21, 40(%r28)
667	stw		%r22, 44(%r28)
668	ldw		48(%r29), %r19
669	ldw		52(%r29), %r20
670	ldw		56(%r29), %r21
671	ldw		60(%r29), %r22
672	stw		%r19, 48(%r28)
673	stw		%r20, 52(%r28)
674	stw		%r21, 56(%r28)
675	stw		%r22, 60(%r28)
676	ldo		64(%r28), %r28
677
678	addib,COND(>)		-1, %r1,1b
679	ldo		64(%r29), %r29
680#endif
681
682	bv		%r0(%r2)
683	nop
684ENDPROC_CFI(copy_user_page_asm)
685
686ENTRY_CFI(clear_user_page_asm)
687	tophys_r1	%r26
688
689	ldil		L%(TMPALIAS_MAP_START), %r28
690#ifdef CONFIG_64BIT
691#if (TMPALIAS_MAP_START >= 0x80000000)
692	depdi		0, 31,32, %r28		/* clear any sign extension */
693#endif
694	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
695	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
696	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
697#else
698	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
699	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
700	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
701#endif
702
703	/* Purge any old translation */
704
705#ifdef CONFIG_PA20
706	pdtlb,l		%r0(%r28)
707#else
7080:	pdtlb		%r0(%r28)
709	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
710#endif
711
712#ifdef CONFIG_64BIT
713	ldi		(PAGE_SIZE / 128), %r1
714
715	/* PREFETCH (Write) has not (yet) been proven to help here */
716	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
717
7181:	std		%r0, 0(%r28)
719	std		%r0, 8(%r28)
720	std		%r0, 16(%r28)
721	std		%r0, 24(%r28)
722	std		%r0, 32(%r28)
723	std		%r0, 40(%r28)
724	std		%r0, 48(%r28)
725	std		%r0, 56(%r28)
726	std		%r0, 64(%r28)
727	std		%r0, 72(%r28)
728	std		%r0, 80(%r28)
729	std		%r0, 88(%r28)
730	std		%r0, 96(%r28)
731	std		%r0, 104(%r28)
732	std		%r0, 112(%r28)
733	std		%r0, 120(%r28)
734	addib,COND(>)		-1, %r1, 1b
735	ldo		128(%r28), %r28
736
737#else	/* ! CONFIG_64BIT */
738	ldi		(PAGE_SIZE / 64), %r1
739
7401:	stw		%r0, 0(%r28)
741	stw		%r0, 4(%r28)
742	stw		%r0, 8(%r28)
743	stw		%r0, 12(%r28)
744	stw		%r0, 16(%r28)
745	stw		%r0, 20(%r28)
746	stw		%r0, 24(%r28)
747	stw		%r0, 28(%r28)
748	stw		%r0, 32(%r28)
749	stw		%r0, 36(%r28)
750	stw		%r0, 40(%r28)
751	stw		%r0, 44(%r28)
752	stw		%r0, 48(%r28)
753	stw		%r0, 52(%r28)
754	stw		%r0, 56(%r28)
755	stw		%r0, 60(%r28)
756	addib,COND(>)		-1, %r1, 1b
757	ldo		64(%r28), %r28
758#endif	/* CONFIG_64BIT */
759
760	bv		%r0(%r2)
761	nop
762ENDPROC_CFI(clear_user_page_asm)
763
764ENTRY_CFI(flush_dcache_page_asm)
765	ldil		L%(TMPALIAS_MAP_START), %r28
766#ifdef CONFIG_64BIT
767#if (TMPALIAS_MAP_START >= 0x80000000)
768	depdi		0, 31,32, %r28		/* clear any sign extension */
769#endif
770	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
771	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
772	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
773#else
774	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
775	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
776	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
777#endif
778
779	/* Purge any old translation */
780
781#ifdef CONFIG_PA20
782	pdtlb,l		%r0(%r28)
783#else
7840:	pdtlb		%r0(%r28)
785	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
786#endif
787
78888:	ldil		L%dcache_stride, %r1
789	ldw		R%dcache_stride(%r1), r31
790
791#ifdef CONFIG_64BIT
792	depdi,z		1, 63-PAGE_SHIFT,1, %r25
793#else
794	depwi,z		1, 31-PAGE_SHIFT,1, %r25
795#endif
796	add		%r28, %r25, %r25
797	sub		%r25, r31, %r25
798
7991:	fdc,m		r31(%r28)
800	fdc,m		r31(%r28)
801	fdc,m		r31(%r28)
802	fdc,m		r31(%r28)
803	fdc,m		r31(%r28)
804	fdc,m		r31(%r28)
805	fdc,m		r31(%r28)
806	fdc,m		r31(%r28)
807	fdc,m		r31(%r28)
808	fdc,m		r31(%r28)
809	fdc,m		r31(%r28)
810	fdc,m		r31(%r28)
811	fdc,m		r31(%r28)
812	fdc,m		r31(%r28)
813	fdc,m		r31(%r28)
814	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
815	fdc,m		r31(%r28)
816
81789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
818	sync
819	bv		%r0(%r2)
820	nop
821ENDPROC_CFI(flush_dcache_page_asm)
822
823ENTRY_CFI(purge_dcache_page_asm)
824	ldil		L%(TMPALIAS_MAP_START), %r28
825#ifdef CONFIG_64BIT
826#if (TMPALIAS_MAP_START >= 0x80000000)
827	depdi		0, 31,32, %r28		/* clear any sign extension */
828#endif
829	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
830	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
831	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
832#else
833	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
834	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
835	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
836#endif
837
838	/* Purge any old translation */
839
840#ifdef CONFIG_PA20
841	pdtlb,l		%r0(%r28)
842#else
8430:	pdtlb		%r0(%r28)
844	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
845#endif
846
84788:	ldil		L%dcache_stride, %r1
848	ldw		R%dcache_stride(%r1), r31
849
850#ifdef CONFIG_64BIT
851	depdi,z		1, 63-PAGE_SHIFT,1, %r25
852#else
853	depwi,z		1, 31-PAGE_SHIFT,1, %r25
854#endif
855	add		%r28, %r25, %r25
856	sub		%r25, r31, %r25
857
8581:      pdc,m		r31(%r28)
859	pdc,m		r31(%r28)
860	pdc,m		r31(%r28)
861	pdc,m		r31(%r28)
862	pdc,m		r31(%r28)
863	pdc,m		r31(%r28)
864	pdc,m		r31(%r28)
865	pdc,m		r31(%r28)
866	pdc,m		r31(%r28)
867	pdc,m		r31(%r28)
868	pdc,m		r31(%r28)
869	pdc,m		r31(%r28)
870	pdc,m		r31(%r28)
871	pdc,m		r31(%r28)
872	pdc,m		r31(%r28)
873	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
874	pdc,m		r31(%r28)
875
87689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
877	sync
878	bv		%r0(%r2)
879	nop
880ENDPROC_CFI(purge_dcache_page_asm)
881
882ENTRY_CFI(flush_icache_page_asm)
883	ldil		L%(TMPALIAS_MAP_START), %r28
884#ifdef CONFIG_64BIT
885#if (TMPALIAS_MAP_START >= 0x80000000)
886	depdi		0, 31,32, %r28		/* clear any sign extension */
887#endif
888	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
889	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
890	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
891#else
892	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
893	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
894	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
895#endif
896
897	/* Purge any old translation.  Note that the FIC instruction
898	 * may use either the instruction or data TLB.  Given that we
899	 * have a flat address space, it's not clear which TLB will be
900	 * used.  So, we purge both entries.  */
901
902#ifdef CONFIG_PA20
903	pdtlb,l		%r0(%r28)
9041:	pitlb,l         %r0(%sr4,%r28)
905	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
906#else
9070:	pdtlb		%r0(%r28)
9081:	pitlb           %r0(%sr4,%r28)
909	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
910	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
911	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
912#endif
913
91488:	ldil		L%icache_stride, %r1
915	ldw		R%icache_stride(%r1), %r31
916
917#ifdef CONFIG_64BIT
918	depdi,z		1, 63-PAGE_SHIFT,1, %r25
919#else
920	depwi,z		1, 31-PAGE_SHIFT,1, %r25
921#endif
922	add		%r28, %r25, %r25
923	sub		%r25, %r31, %r25
924
925	/* fic only has the type 26 form on PA1.1, requiring an
926	 * explicit space specification, so use %sr4 */
9271:      fic,m		%r31(%sr4,%r28)
928	fic,m		%r31(%sr4,%r28)
929	fic,m		%r31(%sr4,%r28)
930	fic,m		%r31(%sr4,%r28)
931	fic,m		%r31(%sr4,%r28)
932	fic,m		%r31(%sr4,%r28)
933	fic,m		%r31(%sr4,%r28)
934	fic,m		%r31(%sr4,%r28)
935	fic,m		%r31(%sr4,%r28)
936	fic,m		%r31(%sr4,%r28)
937	fic,m		%r31(%sr4,%r28)
938	fic,m		%r31(%sr4,%r28)
939	fic,m		%r31(%sr4,%r28)
940	fic,m		%r31(%sr4,%r28)
941	fic,m		%r31(%sr4,%r28)
942	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
943	fic,m		%r31(%sr4,%r28)
944
94589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
946	sync
947	bv		%r0(%r2)
948	nop
949ENDPROC_CFI(flush_icache_page_asm)
950
951ENTRY_CFI(flush_kernel_dcache_page_asm)
95288:	ldil		L%dcache_stride, %r1
953	ldw		R%dcache_stride(%r1), %r23
954
955#ifdef CONFIG_64BIT
956	depdi,z		1, 63-PAGE_SHIFT,1, %r25
957#else
958	depwi,z		1, 31-PAGE_SHIFT,1, %r25
959#endif
960	add		%r26, %r25, %r25
961	sub		%r25, %r23, %r25
962
9631:      fdc,m		%r23(%r26)
964	fdc,m		%r23(%r26)
965	fdc,m		%r23(%r26)
966	fdc,m		%r23(%r26)
967	fdc,m		%r23(%r26)
968	fdc,m		%r23(%r26)
969	fdc,m		%r23(%r26)
970	fdc,m		%r23(%r26)
971	fdc,m		%r23(%r26)
972	fdc,m		%r23(%r26)
973	fdc,m		%r23(%r26)
974	fdc,m		%r23(%r26)
975	fdc,m		%r23(%r26)
976	fdc,m		%r23(%r26)
977	fdc,m		%r23(%r26)
978	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
979	fdc,m		%r23(%r26)
980
98189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
982	sync
983	bv		%r0(%r2)
984	nop
985ENDPROC_CFI(flush_kernel_dcache_page_asm)
986
987ENTRY_CFI(purge_kernel_dcache_page_asm)
98888:	ldil		L%dcache_stride, %r1
989	ldw		R%dcache_stride(%r1), %r23
990
991#ifdef CONFIG_64BIT
992	depdi,z		1, 63-PAGE_SHIFT,1, %r25
993#else
994	depwi,z		1, 31-PAGE_SHIFT,1, %r25
995#endif
996	add		%r26, %r25, %r25
997	sub		%r25, %r23, %r25
998
9991:      pdc,m		%r23(%r26)
1000	pdc,m		%r23(%r26)
1001	pdc,m		%r23(%r26)
1002	pdc,m		%r23(%r26)
1003	pdc,m		%r23(%r26)
1004	pdc,m		%r23(%r26)
1005	pdc,m		%r23(%r26)
1006	pdc,m		%r23(%r26)
1007	pdc,m		%r23(%r26)
1008	pdc,m		%r23(%r26)
1009	pdc,m		%r23(%r26)
1010	pdc,m		%r23(%r26)
1011	pdc,m		%r23(%r26)
1012	pdc,m		%r23(%r26)
1013	pdc,m		%r23(%r26)
1014	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1015	pdc,m		%r23(%r26)
1016
101789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1018	sync
1019	bv		%r0(%r2)
1020	nop
1021ENDPROC_CFI(purge_kernel_dcache_page_asm)
1022
1023ENTRY_CFI(flush_user_dcache_range_asm)
102488:	ldil		L%dcache_stride, %r1
1025	ldw		R%dcache_stride(%r1), %r23
1026	ldo		-1(%r23), %r21
1027	ANDCM		%r26, %r21, %r26
1028
1029#ifdef CONFIG_64BIT
1030	depd,z		%r23, 59, 60, %r21
1031#else
1032	depw,z		%r23, 27, 28, %r21
1033#endif
1034	add		%r26, %r21, %r22
1035	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10361:	add		%r22, %r21, %r22
1037	fdc,m		%r23(%sr3, %r26)
1038	fdc,m		%r23(%sr3, %r26)
1039	fdc,m		%r23(%sr3, %r26)
1040	fdc,m		%r23(%sr3, %r26)
1041	fdc,m		%r23(%sr3, %r26)
1042	fdc,m		%r23(%sr3, %r26)
1043	fdc,m		%r23(%sr3, %r26)
1044	fdc,m		%r23(%sr3, %r26)
1045	fdc,m		%r23(%sr3, %r26)
1046	fdc,m		%r23(%sr3, %r26)
1047	fdc,m		%r23(%sr3, %r26)
1048	fdc,m		%r23(%sr3, %r26)
1049	fdc,m		%r23(%sr3, %r26)
1050	fdc,m		%r23(%sr3, %r26)
1051	fdc,m		%r23(%sr3, %r26)
1052	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1053	fdc,m		%r23(%sr3, %r26)
1054
10552:	cmpb,COND(>>),n	%r25, %r26, 2b
1056	fdc,m		%r23(%sr3, %r26)
1057
105889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1059	sync
1060	bv		%r0(%r2)
1061	nop
1062ENDPROC_CFI(flush_user_dcache_range_asm)
1063
1064ENTRY_CFI(flush_kernel_dcache_range_asm)
106588:	ldil		L%dcache_stride, %r1
1066	ldw		R%dcache_stride(%r1), %r23
1067	ldo		-1(%r23), %r21
1068	ANDCM		%r26, %r21, %r26
1069
1070#ifdef CONFIG_64BIT
1071	depd,z		%r23, 59, 60, %r21
1072#else
1073	depw,z		%r23, 27, 28, %r21
1074#endif
1075	add		%r26, %r21, %r22
1076	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10771:	add		%r22, %r21, %r22
1078	fdc,m		%r23(%r26)
1079	fdc,m		%r23(%r26)
1080	fdc,m		%r23(%r26)
1081	fdc,m		%r23(%r26)
1082	fdc,m		%r23(%r26)
1083	fdc,m		%r23(%r26)
1084	fdc,m		%r23(%r26)
1085	fdc,m		%r23(%r26)
1086	fdc,m		%r23(%r26)
1087	fdc,m		%r23(%r26)
1088	fdc,m		%r23(%r26)
1089	fdc,m		%r23(%r26)
1090	fdc,m		%r23(%r26)
1091	fdc,m		%r23(%r26)
1092	fdc,m		%r23(%r26)
1093	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1094	fdc,m		%r23(%r26)
1095
10962:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1097	fdc,m		%r23(%r26)
1098
1099	sync
110089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1101	syncdma
1102	bv		%r0(%r2)
1103	nop
1104ENDPROC_CFI(flush_kernel_dcache_range_asm)
1105
1106ENTRY_CFI(purge_kernel_dcache_range_asm)
110788:	ldil		L%dcache_stride, %r1
1108	ldw		R%dcache_stride(%r1), %r23
1109	ldo		-1(%r23), %r21
1110	ANDCM		%r26, %r21, %r26
1111
1112#ifdef CONFIG_64BIT
1113	depd,z		%r23, 59, 60, %r21
1114#else
1115	depw,z		%r23, 27, 28, %r21
1116#endif
1117	add		%r26, %r21, %r22
1118	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11191:	add		%r22, %r21, %r22
1120	pdc,m		%r23(%r26)
1121	pdc,m		%r23(%r26)
1122	pdc,m		%r23(%r26)
1123	pdc,m		%r23(%r26)
1124	pdc,m		%r23(%r26)
1125	pdc,m		%r23(%r26)
1126	pdc,m		%r23(%r26)
1127	pdc,m		%r23(%r26)
1128	pdc,m		%r23(%r26)
1129	pdc,m		%r23(%r26)
1130	pdc,m		%r23(%r26)
1131	pdc,m		%r23(%r26)
1132	pdc,m		%r23(%r26)
1133	pdc,m		%r23(%r26)
1134	pdc,m		%r23(%r26)
1135	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1136	pdc,m		%r23(%r26)
1137
11382:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1139	pdc,m		%r23(%r26)
1140
1141	sync
114289:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1143	syncdma
1144	bv		%r0(%r2)
1145	nop
1146ENDPROC_CFI(purge_kernel_dcache_range_asm)
1147
1148ENTRY_CFI(flush_user_icache_range_asm)
114988:	ldil		L%icache_stride, %r1
1150	ldw		R%icache_stride(%r1), %r23
1151	ldo		-1(%r23), %r21
1152	ANDCM		%r26, %r21, %r26
1153
1154#ifdef CONFIG_64BIT
1155	depd,z		%r23, 59, 60, %r21
1156#else
1157	depw,z		%r23, 27, 28, %r21
1158#endif
1159	add		%r26, %r21, %r22
1160	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11611:	add		%r22, %r21, %r22
1162	fic,m		%r23(%sr3, %r26)
1163	fic,m		%r23(%sr3, %r26)
1164	fic,m		%r23(%sr3, %r26)
1165	fic,m		%r23(%sr3, %r26)
1166	fic,m		%r23(%sr3, %r26)
1167	fic,m		%r23(%sr3, %r26)
1168	fic,m		%r23(%sr3, %r26)
1169	fic,m		%r23(%sr3, %r26)
1170	fic,m		%r23(%sr3, %r26)
1171	fic,m		%r23(%sr3, %r26)
1172	fic,m		%r23(%sr3, %r26)
1173	fic,m		%r23(%sr3, %r26)
1174	fic,m		%r23(%sr3, %r26)
1175	fic,m		%r23(%sr3, %r26)
1176	fic,m		%r23(%sr3, %r26)
1177	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1178	fic,m		%r23(%sr3, %r26)
1179
11802:	cmpb,COND(>>),n	%r25, %r26, 2b
1181	fic,m		%r23(%sr3, %r26)
1182
118389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1184	sync
1185	bv		%r0(%r2)
1186	nop
1187ENDPROC_CFI(flush_user_icache_range_asm)
1188
1189ENTRY_CFI(flush_kernel_icache_page)
119088:	ldil		L%icache_stride, %r1
1191	ldw		R%icache_stride(%r1), %r23
1192
1193#ifdef CONFIG_64BIT
1194	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1195#else
1196	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1197#endif
1198	add		%r26, %r25, %r25
1199	sub		%r25, %r23, %r25
1200
1201
12021:      fic,m		%r23(%sr4, %r26)
1203	fic,m		%r23(%sr4, %r26)
1204	fic,m		%r23(%sr4, %r26)
1205	fic,m		%r23(%sr4, %r26)
1206	fic,m		%r23(%sr4, %r26)
1207	fic,m		%r23(%sr4, %r26)
1208	fic,m		%r23(%sr4, %r26)
1209	fic,m		%r23(%sr4, %r26)
1210	fic,m		%r23(%sr4, %r26)
1211	fic,m		%r23(%sr4, %r26)
1212	fic,m		%r23(%sr4, %r26)
1213	fic,m		%r23(%sr4, %r26)
1214	fic,m		%r23(%sr4, %r26)
1215	fic,m		%r23(%sr4, %r26)
1216	fic,m		%r23(%sr4, %r26)
1217	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1218	fic,m		%r23(%sr4, %r26)
1219
122089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1221	sync
1222	bv		%r0(%r2)
1223	nop
1224ENDPROC_CFI(flush_kernel_icache_page)
1225
1226ENTRY_CFI(flush_kernel_icache_range_asm)
122788:	ldil		L%icache_stride, %r1
1228	ldw		R%icache_stride(%r1), %r23
1229	ldo		-1(%r23), %r21
1230	ANDCM		%r26, %r21, %r26
1231
1232#ifdef CONFIG_64BIT
1233	depd,z		%r23, 59, 60, %r21
1234#else
1235	depw,z		%r23, 27, 28, %r21
1236#endif
1237	add		%r26, %r21, %r22
1238	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12391:	add		%r22, %r21, %r22
1240	fic,m		%r23(%sr4, %r26)
1241	fic,m		%r23(%sr4, %r26)
1242	fic,m		%r23(%sr4, %r26)
1243	fic,m		%r23(%sr4, %r26)
1244	fic,m		%r23(%sr4, %r26)
1245	fic,m		%r23(%sr4, %r26)
1246	fic,m		%r23(%sr4, %r26)
1247	fic,m		%r23(%sr4, %r26)
1248	fic,m		%r23(%sr4, %r26)
1249	fic,m		%r23(%sr4, %r26)
1250	fic,m		%r23(%sr4, %r26)
1251	fic,m		%r23(%sr4, %r26)
1252	fic,m		%r23(%sr4, %r26)
1253	fic,m		%r23(%sr4, %r26)
1254	fic,m		%r23(%sr4, %r26)
1255	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1256	fic,m		%r23(%sr4, %r26)
1257
12582:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1259	fic,m		%r23(%sr4, %r26)
1260
126189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1262	sync
1263	bv		%r0(%r2)
1264	nop
1265ENDPROC_CFI(flush_kernel_icache_range_asm)
1266
1267	__INIT
1268
1269	/* align should cover use of rfi in disable_sr_hashing_asm and
1270	 * srdis_done.
1271	 */
1272	.align	256
1273ENTRY_CFI(disable_sr_hashing_asm)
1274	/*
1275	 * Switch to real mode
1276	 */
1277	/* pcxt_ssm_bug */
1278	rsm		PSW_SM_I, %r0
1279	load32		PA(1f), %r1
1280	nop
1281	nop
1282	nop
1283	nop
1284	nop
1285
1286	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1287	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1288	mtctl		%r0, %cr17		/* Clear IIASQ head */
1289	mtctl		%r1, %cr18		/* IIAOQ head */
1290	ldo		4(%r1), %r1
1291	mtctl		%r1, %cr18		/* IIAOQ tail */
1292	load32		REAL_MODE_PSW, %r1
1293	mtctl		%r1, %ipsw
1294	rfi
1295	nop
1296
12971:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1298	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1299	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1300	b,n		srdis_done
1301
1302srdis_pcxs:
1303
1304	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1305
1306	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1307	.word		0x141c1a00		/* must issue twice */
1308	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1309	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1310	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1311	.word		0x141c1600		/* must issue twice */
1312	b,n		srdis_done
1313
1314srdis_pcxl:
1315
1316	/* Disable Space Register Hashing for PCXL */
1317
1318	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1319	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1320	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1321	b,n		srdis_done
1322
1323srdis_pa20:
1324
1325	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1326
1327	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1328	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1329	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1330
1331
1332srdis_done:
1333	/* Switch back to virtual mode */
1334	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1335	load32 	   	2f, %r1
1336	nop
1337	nop
1338	nop
1339	nop
1340	nop
1341
1342	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1343	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1344	mtctl		%r0, %cr17		/* Clear IIASQ head */
1345	mtctl		%r1, %cr18		/* IIAOQ head */
1346	ldo		4(%r1), %r1
1347	mtctl		%r1, %cr18		/* IIAOQ tail */
1348	load32		KERNEL_PSW, %r1
1349	mtctl		%r1, %ipsw
1350	rfi
1351	nop
1352
13532:      bv		%r0(%r2)
1354	nop
1355ENDPROC_CFI(disable_sr_hashing_asm)
1356
1357	.end
1358