1/*
2 * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
3 * Copyright (C) 2007-2009 PetaLogix
4 * Copyright (C) 2006 Atmark Techno, Inc.
5 *
6 * MMU code derived from arch/ppc/kernel/head_4xx.S:
7 *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
8 *      Initial PowerPC version.
9 *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
10 *      Rewritten for PReP
11 *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
12 *      Low-level exception handers, MMU support, and rewrite.
13 *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
14 *      PowerPC 8xx modifications.
15 *    Copyright (c) 1998-1999 TiVo, Inc.
16 *      PowerPC 403GCX modifications.
17 *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
18 *      PowerPC 403GCX/405GP modifications.
19 *    Copyright 2000 MontaVista Software Inc.
20 *	PPC405 modifications
21 *      PowerPC 403GCX/405GP modifications.
22 * 	Author: MontaVista Software, Inc.
23 *         	frank_rowand@mvista.com or source@mvista.com
24 * 	   	debbie_chu@mvista.com
25 *
26 * This file is subject to the terms and conditions of the GNU General Public
27 * License. See the file "COPYING" in the main directory of this archive
28 * for more details.
29 */
30
31#include <linux/init.h>
32#include <linux/linkage.h>
33#include <asm/thread_info.h>
34#include <asm/page.h>
35#include <linux/of_fdt.h>		/* for OF_DT_HEADER */
36
37#ifdef CONFIG_MMU
38#include <asm/setup.h> /* COMMAND_LINE_SIZE */
39#include <asm/mmu.h>
40#include <asm/processor.h>
41
42.section .data
43.global empty_zero_page
44.align 12
45empty_zero_page:
46	.space	PAGE_SIZE
47.global swapper_pg_dir
48swapper_pg_dir:
49	.space	PAGE_SIZE
50
51#endif /* CONFIG_MMU */
52
53.section .rodata
54.align 4
55endian_check:
56	.word	1
57
58	__HEAD
59ENTRY(_start)
60#if CONFIG_KERNEL_BASE_ADDR == 0
61	brai	TOPHYS(real_start)
62	.org	0x100
63real_start:
64#endif
65
66	mts	rmsr, r0
67/* Disable stack protection from bootloader */
68	mts	rslr, r0
69	addi	r8, r0, 0xFFFFFFFF
70	mts	rshr, r8
71/*
72 * According to Xilinx, msrclr instruction behaves like 'mfs rX,rpc'
73 * if the msrclr instruction is not enabled. We use this to detect
74 * if the opcode is available, by issuing msrclr and then testing the result.
75 * r8 == 0 - msr instructions are implemented
76 * r8 != 0 - msr instructions are not implemented
77 */
78	mfs	r1, rmsr
79	msrclr	r8, 0 /* clear nothing - just read msr for test */
80	cmpu	r8, r8, r1 /* r1 must contain msr reg content */
81
82/* r7 may point to an FDT, or there may be one linked in.
83   if it's in r7, we've got to save it away ASAP.
84   We ensure r7 points to a valid FDT, just in case the bootloader
85   is broken or non-existent */
86	beqi	r7, no_fdt_arg			/* NULL pointer?  don't copy */
87/* Does r7 point to a valid FDT? Load HEADER magic number */
88	/* Run time Big/Little endian platform */
89	/* Save 1 as word and load byte - 0 - BIG, 1 - LITTLE */
90	lbui	r11, r0, TOPHYS(endian_check)
91	beqid	r11, big_endian /* DO NOT break delay stop dependency */
92	lw	r11, r0, r7 /* Big endian load in delay slot */
93	lwr	r11, r0, r7 /* Little endian load */
94big_endian:
95	rsubi	r11, r11, OF_DT_HEADER	/* Check FDT header */
96	beqi	r11, _prepare_copy_fdt
97	or	r7, r0, r0		/* clear R7 when not valid DTB */
98	bnei	r11, no_fdt_arg			/* No - get out of here */
99_prepare_copy_fdt:
100	or	r11, r0, r0 /* incremment */
101	ori	r4, r0, TOPHYS(_fdt_start)
102	ori	r3, r0, (0x10000 - 4)
103_copy_fdt:
104	lw	r12, r7, r11 /* r12 = r7 + r11 */
105	sw	r12, r4, r11 /* addr[r4 + r11] = r12 */
106	addik	r11, r11, 4 /* increment counting */
107	bgtid	r3, _copy_fdt /* loop for all entries */
108	addik	r3, r3, -4 /* descrement loop */
109no_fdt_arg:
110
111#ifdef CONFIG_MMU
112
113#ifndef CONFIG_CMDLINE_BOOL
114/*
115 * handling command line
116 * copy command line directly to cmd_line placed in data section.
117 */
118	beqid	r5, skip	/* Skip if NULL pointer */
119	or	r11, r0, r0		/* incremment */
120	ori	r4, r0, cmd_line	/* load address of command line */
121	tophys(r4,r4)			/* convert to phys address */
122	ori	r3, r0, COMMAND_LINE_SIZE - 1 /* number of loops */
123_copy_command_line:
124	/* r2=r5+r11 - r5 contain pointer to command line */
125	lbu	r2, r5, r11
126	beqid	r2, skip		/* Skip if no data */
127	sb	r2, r4, r11		/* addr[r4+r11]= r2 */
128	addik	r11, r11, 1		/* increment counting */
129	bgtid	r3, _copy_command_line	/* loop for all entries       */
130	addik	r3, r3, -1		/* decrement loop */
131	addik	r5, r4, 0		/* add new space for command line */
132	tovirt(r5,r5)
133skip:
134#endif /* CONFIG_CMDLINE_BOOL */
135
136#ifdef NOT_COMPILE
137/* save bram context */
138	or	r11, r0, r0				/* incremment */
139	ori	r4, r0, TOPHYS(_bram_load_start)	/* save bram context */
140	ori	r3, r0, (LMB_SIZE - 4)
141_copy_bram:
142	lw	r7, r0, r11		/* r7 = r0 + r11 */
143	sw	r7, r4, r11		/* addr[r4 + r11] = r7 */
144	addik	r11, r11, 4		/* increment counting */
145	bgtid	r3, _copy_bram		/* loop for all entries */
146	addik	r3, r3, -4		/* descrement loop */
147#endif
148	/* We have to turn on the MMU right away. */
149
150	/*
151	 * Set up the initial MMU state so we can do the first level of
152	 * kernel initialization.  This maps the first 16 MBytes of memory 1:1
153	 * virtual to physical.
154	 */
155	nop
156	addik	r3, r0, MICROBLAZE_TLB_SIZE -1	/* Invalidate all TLB entries */
157_invalidate:
158	mts	rtlbx, r3
159	mts	rtlbhi, r0			/* flush: ensure V is clear   */
160	mts	rtlblo, r0
161	bgtid	r3, _invalidate		/* loop for all entries       */
162	addik	r3, r3, -1
163	/* sync */
164
165	/* Setup the kernel PID */
166	mts	rpid,r0			/* Load the kernel PID */
167	nop
168	bri	4
169
170	/*
171	 * We should still be executing code at physical address area
172	 * RAM_BASEADDR at this point. However, kernel code is at
173	 * a virtual address. So, set up a TLB mapping to cover this once
174	 * translation is enabled.
175	 */
176
177	addik	r3,r0, CONFIG_KERNEL_START /* Load the kernel virtual address */
178	tophys(r4,r3)			/* Load the kernel physical address */
179
180	/* start to do TLB calculation */
181	addik	r12, r0, _end
182	rsub	r12, r3, r12
183	addik	r12, r12, CONFIG_LOWMEM_SIZE >> PTE_SHIFT /* that's the pad */
184
185	or r9, r0, r0 /* TLB0 = 0 */
186	or r10, r0, r0 /* TLB1 = 0 */
187
188	addik	r11, r12, -0x1000000
189	bgei	r11, GT16 /* size is greater than 16MB */
190	addik	r11, r12, -0x0800000
191	bgei	r11, GT8 /* size is greater than 8MB */
192	addik	r11, r12, -0x0400000
193	bgei	r11, GT4 /* size is greater than 4MB */
194	/* size is less than 4MB */
195	addik	r11, r12, -0x0200000
196	bgei	r11, GT2 /* size is greater than 2MB */
197	addik	r9, r0, 0x0100000 /* TLB0 must be 1MB */
198	addik	r11, r12, -0x0100000
199	bgei	r11, GT1 /* size is greater than 1MB */
200	/* TLB1 is 0 which is setup above */
201	bri tlb_end
202GT4: /* r11 contains the rest - will be either 1 or 4 */
203	ori r9, r0, 0x400000 /* TLB0 is 4MB */
204	bri TLB1
205GT16: /* TLB0 is 16MB */
206	addik	r9, r0, 0x1000000 /* means TLB0 is 16MB */
207TLB1:
208	/* must be used r2 because of subtract if failed */
209	addik	r2, r11, -0x0400000
210	bgei	r2, GT20 /* size is greater than 16MB */
211	/* size is >16MB and <20MB */
212	addik	r11, r11, -0x0100000
213	bgei	r11, GT17 /* size is greater than 17MB */
214	/* kernel is >16MB and < 17MB */
215GT1:
216	addik	r10, r0, 0x0100000 /* means TLB1 is 1MB */
217	bri tlb_end
218GT2: /* TLB0 is 0 and TLB1 will be 4MB */
219GT17: /* TLB1 is 4MB - kernel size <20MB */
220	addik	r10, r0, 0x0400000 /* means TLB1 is 4MB */
221	bri tlb_end
222GT8: /* TLB0 is still zero that's why I can use only TLB1 */
223GT20: /* TLB1 is 16MB - kernel size >20MB */
224	addik	r10, r0, 0x1000000 /* means TLB1 is 16MB */
225tlb_end:
226
227	/*
228	 * Configure and load two entries into TLB slots 0 and 1.
229	 * In case we are pinning TLBs, these are reserved in by the
230	 * other TLB functions.  If not reserving, then it doesn't
231	 * matter where they are loaded.
232	 */
233	andi	r4,r4,0xfffffc00	/* Mask off the real page number */
234	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
235
236	/*
237	 * TLB0 is always used - check if is not zero (r9 stores TLB0 value)
238	 * if is use TLB1 value and clear it (r10 stores TLB1 value)
239	 */
240	bnei	r9, tlb0_not_zero
241	add	r9, r10, r0
242	add	r10, r0, r0
243tlb0_not_zero:
244
245	/* look at the code below */
246	ori	r30, r0, 0x200
247	andi	r29, r9, 0x100000
248	bneid	r29, 1f
249	addik	r30, r30, 0x80
250	andi	r29, r9, 0x400000
251	bneid	r29, 1f
252	addik	r30, r30, 0x80
253	andi	r29, r9, 0x1000000
254	bneid	r29, 1f
255	addik	r30, r30, 0x80
2561:
257	andi	r3,r3,0xfffffc00	/* Mask off the effective page number */
258	ori	r3,r3,(TLB_VALID)
259	or	r3, r3, r30
260
261	/* Load tlb_skip size value which is index to first unused TLB entry */
262	lwi	r11, r0, TOPHYS(tlb_skip)
263	mts     rtlbx,r11		/* TLB slow 0 */
264
265	mts	rtlblo,r4		/* Load the data portion of the entry */
266	mts	rtlbhi,r3		/* Load the tag portion of the entry */
267
268	/* Increase tlb_skip size */
269	addik	r11, r11, 1
270	swi	r11, r0, TOPHYS(tlb_skip)
271
272	/* TLB1 can be zeroes that's why we not setup it */
273	beqi	r10, jump_over2
274
275	/* look at the code below */
276	ori	r30, r0, 0x200
277	andi	r29, r10, 0x100000
278	bneid	r29, 1f
279	addik	r30, r30, 0x80
280	andi	r29, r10, 0x400000
281	bneid	r29, 1f
282	addik	r30, r30, 0x80
283	andi	r29, r10, 0x1000000
284	bneid	r29, 1f
285	addik	r30, r30, 0x80
2861:
287	addk	r4, r4, r9	/* previous addr + TLB0 size */
288	addk	r3, r3, r9
289
290	andi	r3,r3,0xfffffc00	/* Mask off the effective page number */
291	ori	r3,r3,(TLB_VALID)
292	or	r3, r3, r30
293
294	lwi	r11, r0, TOPHYS(tlb_skip)
295	mts     rtlbx, r11		/* r11 is used from TLB0 */
296
297	mts	rtlblo,r4		/* Load the data portion of the entry */
298	mts	rtlbhi,r3		/* Load the tag portion of the entry */
299
300	/* Increase tlb_skip size */
301	addik	r11, r11, 1
302	swi	r11, r0, TOPHYS(tlb_skip)
303
304jump_over2:
305	/*
306	 * Load a TLB entry for LMB, since we need access to
307	 * the exception vectors, using a 4k real==virtual mapping.
308	 */
309	/* Use temporary TLB_ID for LMB - clear this temporary mapping later */
310	ori	r11, r0, MICROBLAZE_LMB_TLB_ID
311	mts     rtlbx,r11
312
313	ori	r4,r0,(TLB_WR | TLB_EX)
314	ori	r3,r0,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
315
316	mts	rtlblo,r4		/* Load the data portion of the entry */
317	mts	rtlbhi,r3		/* Load the tag portion of the entry */
318
319	/*
320	 * We now have the lower 16 Meg of RAM mapped into TLB entries, and the
321	 * caches ready to work.
322	 */
323turn_on_mmu:
324	ori	r15,r0,start_here
325	ori	r4,r0,MSR_KERNEL_VMS
326	mts	rmsr,r4
327	nop
328	rted	r15,0			/* enables MMU */
329	nop
330
331start_here:
332#endif /* CONFIG_MMU */
333
334	/* Initialize small data anchors */
335	addik	r13, r0, _KERNEL_SDA_BASE_
336	addik	r2, r0, _KERNEL_SDA2_BASE_
337
338	/* Initialize stack pointer */
339	addik	r1, r0, init_thread_union + THREAD_SIZE - 4
340
341	/* Initialize r31 with current task address */
342	addik	r31, r0, init_task
343
344	addik	r11, r0, machine_early_init
345	brald	r15, r11
346	nop
347
348#ifndef CONFIG_MMU
349	addik	r15, r0, machine_halt
350	braid	start_kernel
351	nop
352#else
353	/*
354	 * Initialize the MMU.
355	 */
356	bralid	r15, mmu_init
357	nop
358
359	/* Go back to running unmapped so we can load up new values
360	 * and change to using our exception vectors.
361	 * On the MicroBlaze, all we invalidate the used TLB entries to clear
362	 * the old 16M byte TLB mappings.
363	 */
364	ori	r15,r0,TOPHYS(kernel_load_context)
365	ori	r4,r0,MSR_KERNEL
366	mts	rmsr,r4
367	nop
368	bri	4
369	rted	r15,0
370	nop
371
372	/* Load up the kernel context */
373kernel_load_context:
374	ori	r5, r0, MICROBLAZE_LMB_TLB_ID
375	mts     rtlbx,r5
376	nop
377	mts	rtlbhi,r0
378	nop
379	addi	r15, r0, machine_halt
380	ori	r17, r0, start_kernel
381	ori	r4, r0, MSR_KERNEL_VMS
382	mts	rmsr, r4
383	nop
384	rted	r17, 0		/* enable MMU and jump to start_kernel */
385	nop
386#endif /* CONFIG_MMU */
387