1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2005-2017 Andes Technology Corporation
3 
4 #include <linux/extable.h>
5 #include <linux/module.h>
6 #include <linux/signal.h>
7 #include <linux/ptrace.h>
8 #include <linux/mm.h>
9 #include <linux/init.h>
10 #include <linux/hardirq.h>
11 #include <linux/uaccess.h>
12 #include <linux/perf_event.h>
13 
14 #include <asm/tlbflush.h>
15 
16 extern void die(const char *str, struct pt_regs *regs, long err);
17 
18 /*
19  * This is useful to dump out the page tables associated with
20  * 'addr' in mm 'mm'.
21  */
show_pte(struct mm_struct * mm,unsigned long addr)22 void show_pte(struct mm_struct *mm, unsigned long addr)
23 {
24 	pgd_t *pgd;
25 	if (!mm)
26 		mm = &init_mm;
27 
28 	pr_alert("pgd = %p\n", mm->pgd);
29 	pgd = pgd_offset(mm, addr);
30 	pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
31 
32 	do {
33 		p4d_t *p4d;
34 		pud_t *pud;
35 		pmd_t *pmd;
36 
37 		if (pgd_none(*pgd))
38 			break;
39 
40 		if (pgd_bad(*pgd)) {
41 			pr_alert("(bad)");
42 			break;
43 		}
44 
45 		p4d = p4d_offset(pgd, addr);
46 		pud = pud_offset(p4d, addr);
47 		pmd = pmd_offset(pud, addr);
48 #if PTRS_PER_PMD != 1
49 		pr_alert(", *pmd=%08lx", pmd_val(*pmd));
50 #endif
51 
52 		if (pmd_none(*pmd))
53 			break;
54 
55 		if (pmd_bad(*pmd)) {
56 			pr_alert("(bad)");
57 			break;
58 		}
59 
60 		if (IS_ENABLED(CONFIG_HIGHMEM))
61 		{
62 			pte_t *pte;
63 			/* We must not map this if we have highmem enabled */
64 			pte = pte_offset_map(pmd, addr);
65 			pr_alert(", *pte=%08lx", pte_val(*pte));
66 			pte_unmap(pte);
67 		}
68 	} while (0);
69 
70 	pr_alert("\n");
71 }
72 
do_page_fault(unsigned long entry,unsigned long addr,unsigned int error_code,struct pt_regs * regs)73 void do_page_fault(unsigned long entry, unsigned long addr,
74 		   unsigned int error_code, struct pt_regs *regs)
75 {
76 	struct task_struct *tsk;
77 	struct mm_struct *mm;
78 	struct vm_area_struct *vma;
79 	int si_code;
80 	vm_fault_t fault;
81 	unsigned int mask = VM_ACCESS_FLAGS;
82 	unsigned int flags = FAULT_FLAG_DEFAULT;
83 
84 	error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
85 	tsk = current;
86 	mm = tsk->mm;
87 	si_code = SEGV_MAPERR;
88 	/*
89 	 * We fault-in kernel-space virtual memory on-demand. The
90 	 * 'reference' page table is init_mm.pgd.
91 	 *
92 	 * NOTE! We MUST NOT take any locks for this case. We may
93 	 * be in an interrupt or a critical region, and should
94 	 * only copy the information from the master page table,
95 	 * nothing more.
96 	 */
97 	if (addr >= TASK_SIZE) {
98 		if (user_mode(regs))
99 			goto bad_area_nosemaphore;
100 
101 		if (addr >= TASK_SIZE && addr < VMALLOC_END
102 		    && (entry == ENTRY_PTE_NOT_PRESENT))
103 			goto vmalloc_fault;
104 		else
105 			goto no_context;
106 	}
107 
108 	/* Send a signal to the task for handling the unalignment access. */
109 	if (entry == ENTRY_GENERAL_EXCPETION
110 	    && error_code == ETYPE_ALIGNMENT_CHECK) {
111 		if (user_mode(regs))
112 			goto bad_area_nosemaphore;
113 		else
114 			goto no_context;
115 	}
116 
117 	/*
118 	 * If we're in an interrupt or have no user
119 	 * context, we must not take the fault..
120 	 */
121 	if (unlikely(faulthandler_disabled() || !mm))
122 		goto no_context;
123 
124 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
125 
126 	/*
127 	 * As per x86, we may deadlock here. However, since the kernel only
128 	 * validly references user space from well defined areas of the code,
129 	 * we can bug out early if this is from code which shouldn't.
130 	 */
131 	if (unlikely(!mmap_read_trylock(mm))) {
132 		if (!user_mode(regs) &&
133 		    !search_exception_tables(instruction_pointer(regs)))
134 			goto no_context;
135 retry:
136 		mmap_read_lock(mm);
137 	} else {
138 		/*
139 		 * The above down_read_trylock() might have succeeded in which
140 		 * case, we'll have missed the might_sleep() from down_read().
141 		 */
142 		might_sleep();
143 		if (IS_ENABLED(CONFIG_DEBUG_VM)) {
144 			if (!user_mode(regs) &&
145 			    !search_exception_tables(instruction_pointer(regs)))
146 				goto no_context;
147 		}
148 	}
149 
150 	vma = find_vma(mm, addr);
151 
152 	if (unlikely(!vma))
153 		goto bad_area;
154 
155 	if (vma->vm_start <= addr)
156 		goto good_area;
157 
158 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
159 		goto bad_area;
160 
161 	if (unlikely(expand_stack(vma, addr)))
162 		goto bad_area;
163 
164 	/*
165 	 * Ok, we have a good vm_area for this memory access, so
166 	 * we can handle it..
167 	 */
168 
169 good_area:
170 	si_code = SEGV_ACCERR;
171 
172 	/* first do some preliminary protection checks */
173 	if (entry == ENTRY_PTE_NOT_PRESENT) {
174 		if (error_code & ITYPE_mskINST)
175 			mask = VM_EXEC;
176 		else {
177 			mask = VM_READ | VM_WRITE;
178 		}
179 	} else if (entry == ENTRY_TLB_MISC) {
180 		switch (error_code & ITYPE_mskETYPE) {
181 		case RD_PROT:
182 			mask = VM_READ;
183 			break;
184 		case WRT_PROT:
185 			mask = VM_WRITE;
186 			flags |= FAULT_FLAG_WRITE;
187 			break;
188 		case NOEXEC:
189 			mask = VM_EXEC;
190 			break;
191 		case PAGE_MODIFY:
192 			mask = VM_WRITE;
193 			flags |= FAULT_FLAG_WRITE;
194 			break;
195 		case ACC_BIT:
196 			BUG();
197 		default:
198 			break;
199 		}
200 
201 	}
202 	if (!(vma->vm_flags & mask))
203 		goto bad_area;
204 
205 	/*
206 	 * If for any reason at all we couldn't handle the fault,
207 	 * make sure we exit gracefully rather than endlessly redo
208 	 * the fault.
209 	 */
210 
211 	fault = handle_mm_fault(vma, addr, flags, regs);
212 
213 	/*
214 	 * If we need to retry but a fatal signal is pending, handle the
215 	 * signal first. We do not need to release the mmap_lock because it
216 	 * would already be released in __lock_page_or_retry in mm/filemap.c.
217 	 */
218 	if (fault_signal_pending(fault, regs)) {
219 		if (!user_mode(regs))
220 			goto no_context;
221 		return;
222 	}
223 
224 	if (unlikely(fault & VM_FAULT_ERROR)) {
225 		if (fault & VM_FAULT_OOM)
226 			goto out_of_memory;
227 		else if (fault & VM_FAULT_SIGBUS)
228 			goto do_sigbus;
229 		else
230 			goto bad_area;
231 	}
232 
233 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
234 		if (fault & VM_FAULT_RETRY) {
235 			flags |= FAULT_FLAG_TRIED;
236 
237 			/* No need to mmap_read_unlock(mm) as we would
238 			 * have already released it in __lock_page_or_retry
239 			 * in mm/filemap.c.
240 			 */
241 			goto retry;
242 		}
243 	}
244 
245 	mmap_read_unlock(mm);
246 	return;
247 
248 	/*
249 	 * Something tried to access memory that isn't in our memory map..
250 	 * Fix it, but check if it's kernel or user first..
251 	 */
252 bad_area:
253 	mmap_read_unlock(mm);
254 
255 bad_area_nosemaphore:
256 
257 	/* User mode accesses just cause a SIGSEGV */
258 
259 	if (user_mode(regs)) {
260 		tsk->thread.address = addr;
261 		tsk->thread.error_code = error_code;
262 		tsk->thread.trap_no = entry;
263 		force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
264 		return;
265 	}
266 
267 no_context:
268 
269 	/* Are we prepared to handle this kernel fault?
270 	 *
271 	 * (The kernel has valid exception-points in the source
272 	 *  when it acesses user-memory. When it fails in one
273 	 *  of those points, we find it in a table and do a jump
274 	 *  to some fixup code that loads an appropriate error
275 	 *  code)
276 	 */
277 
278 	{
279 		const struct exception_table_entry *entry;
280 
281 		if ((entry =
282 		     search_exception_tables(instruction_pointer(regs))) !=
283 		    NULL) {
284 			/* Adjust the instruction pointer in the stackframe */
285 			instruction_pointer(regs) = entry->fixup;
286 			return;
287 		}
288 	}
289 
290 	/*
291 	 * Oops. The kernel tried to access some bad page. We'll have to
292 	 * terminate things with extreme prejudice.
293 	 */
294 
295 	bust_spinlocks(1);
296 	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
297 		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
298 		 "paging request", addr);
299 
300 	show_pte(mm, addr);
301 	die("Oops", regs, error_code);
302 	bust_spinlocks(0);
303 	do_exit(SIGKILL);
304 
305 	return;
306 
307 	/*
308 	 * We ran out of memory, or some other thing happened to us that made
309 	 * us unable to handle the page fault gracefully.
310 	 */
311 
312 out_of_memory:
313 	mmap_read_unlock(mm);
314 	if (!user_mode(regs))
315 		goto no_context;
316 	pagefault_out_of_memory();
317 	return;
318 
319 do_sigbus:
320 	mmap_read_unlock(mm);
321 
322 	/* Kernel mode? Handle exceptions or die */
323 	if (!user_mode(regs))
324 		goto no_context;
325 
326 	/*
327 	 * Send a sigbus
328 	 */
329 	tsk->thread.address = addr;
330 	tsk->thread.error_code = error_code;
331 	tsk->thread.trap_no = entry;
332 	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
333 
334 	return;
335 
336 vmalloc_fault:
337 	{
338 		/*
339 		 * Synchronize this task's top level page-table
340 		 * with the 'reference' page table.
341 		 *
342 		 * Use current_pgd instead of tsk->active_mm->pgd
343 		 * since the latter might be unavailable if this
344 		 * code is executed in a misfortunately run irq
345 		 * (like inside schedule() between switch_mm and
346 		 *  switch_to...).
347 		 */
348 
349 		unsigned int index = pgd_index(addr);
350 		pgd_t *pgd, *pgd_k;
351 		p4d_t *p4d, *p4d_k;
352 		pud_t *pud, *pud_k;
353 		pmd_t *pmd, *pmd_k;
354 		pte_t *pte_k;
355 
356 		pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index;
357 		pgd_k = init_mm.pgd + index;
358 
359 		if (!pgd_present(*pgd_k))
360 			goto no_context;
361 
362 		p4d = p4d_offset(pgd, addr);
363 		p4d_k = p4d_offset(pgd_k, addr);
364 		if (!p4d_present(*p4d_k))
365 			goto no_context;
366 
367 		pud = pud_offset(p4d, addr);
368 		pud_k = pud_offset(p4d_k, addr);
369 		if (!pud_present(*pud_k))
370 			goto no_context;
371 
372 		pmd = pmd_offset(pud, addr);
373 		pmd_k = pmd_offset(pud_k, addr);
374 		if (!pmd_present(*pmd_k))
375 			goto no_context;
376 
377 		if (!pmd_present(*pmd))
378 			set_pmd(pmd, *pmd_k);
379 		else
380 			BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
381 
382 		/*
383 		 * Since the vmalloc area is global, we don't
384 		 * need to copy individual PTE's, it is enough to
385 		 * copy the pgd pointer into the pte page of the
386 		 * root task. If that is there, we'll find our pte if
387 		 * it exists.
388 		 */
389 
390 		/* Make sure the actual PTE exists as well to
391 		 * catch kernel vmalloc-area accesses to non-mapped
392 		 * addres. If we don't do this, this will just
393 		 * silently loop forever.
394 		 */
395 
396 		pte_k = pte_offset_kernel(pmd_k, addr);
397 		if (!pte_present(*pte_k))
398 			goto no_context;
399 
400 		return;
401 	}
402 }
403