1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Machine check exception handling CPU-side for power7 and power8
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9 #undef DEBUG
10 #define pr_fmt(fmt) "mce_power: " fmt
11
12 #include <linux/types.h>
13 #include <linux/ptrace.h>
14 #include <linux/extable.h>
15 #include <asm/mmu.h>
16 #include <asm/mce.h>
17 #include <asm/machdep.h>
18 #include <asm/pgtable.h>
19 #include <asm/pte-walk.h>
20 #include <asm/sstep.h>
21 #include <asm/exception-64s.h>
22 #include <asm/extable.h>
23
24 /*
25 * Convert an address related to an mm to a PFN. NOTE: we are in real
26 * mode, we could potentially race with page table updates.
27 */
addr_to_pfn(struct pt_regs * regs,unsigned long addr)28 unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
29 {
30 pte_t *ptep;
31 unsigned int shift;
32 unsigned long pfn, flags;
33 struct mm_struct *mm;
34
35 if (user_mode(regs))
36 mm = current->mm;
37 else
38 mm = &init_mm;
39
40 local_irq_save(flags);
41 ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
42
43 if (!ptep || pte_special(*ptep)) {
44 pfn = ULONG_MAX;
45 goto out;
46 }
47
48 if (shift <= PAGE_SHIFT)
49 pfn = pte_pfn(*ptep);
50 else {
51 unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
52 pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
53 }
54
55 out:
56 local_irq_restore(flags);
57 return pfn;
58 }
59
60 /* flush SLBs and reload */
61 #ifdef CONFIG_PPC_BOOK3S_64
flush_and_reload_slb(void)62 void flush_and_reload_slb(void)
63 {
64 /* Invalidate all SLBs */
65 slb_flush_all_realmode();
66
67 #ifdef CONFIG_KVM_BOOK3S_HANDLER
68 /*
69 * If machine check is hit when in guest or in transition, we will
70 * only flush the SLBs and continue.
71 */
72 if (get_paca()->kvm_hstate.in_guest)
73 return;
74 #endif
75 if (early_radix_enabled())
76 return;
77
78 /*
79 * This probably shouldn't happen, but it may be possible it's
80 * called in early boot before SLB shadows are allocated.
81 */
82 if (!get_slb_shadow())
83 return;
84
85 slb_restore_bolted_realmode();
86 }
87 #endif
88
flush_erat(void)89 static void flush_erat(void)
90 {
91 #ifdef CONFIG_PPC_BOOK3S_64
92 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
93 flush_and_reload_slb();
94 return;
95 }
96 #endif
97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
98 }
99
100 #define MCE_FLUSH_SLB 1
101 #define MCE_FLUSH_TLB 2
102 #define MCE_FLUSH_ERAT 3
103
mce_flush(int what)104 static int mce_flush(int what)
105 {
106 #ifdef CONFIG_PPC_BOOK3S_64
107 if (what == MCE_FLUSH_SLB) {
108 flush_and_reload_slb();
109 return 1;
110 }
111 #endif
112 if (what == MCE_FLUSH_ERAT) {
113 flush_erat();
114 return 1;
115 }
116 if (what == MCE_FLUSH_TLB) {
117 tlbiel_all();
118 return 1;
119 }
120
121 return 0;
122 }
123
124 #define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
125
126 struct mce_ierror_table {
127 unsigned long srr1_mask;
128 unsigned long srr1_value;
129 bool nip_valid; /* nip is a valid indicator of faulting address */
130 unsigned int error_type;
131 unsigned int error_subtype;
132 unsigned int error_class;
133 unsigned int initiator;
134 unsigned int severity;
135 bool sync_error;
136 };
137
138 static const struct mce_ierror_table mce_p7_ierror_table[] = {
139 { 0x00000000001c0000, 0x0000000000040000, true,
140 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
141 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
142 { 0x00000000001c0000, 0x0000000000080000, true,
143 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
144 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
145 { 0x00000000001c0000, 0x00000000000c0000, true,
146 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
147 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
148 { 0x00000000001c0000, 0x0000000000100000, true,
149 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
150 MCE_ECLASS_SOFT_INDETERMINATE,
151 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
152 { 0x00000000001c0000, 0x0000000000140000, true,
153 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
154 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
155 { 0x00000000001c0000, 0x0000000000180000, true,
156 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
157 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
158 { 0x00000000001c0000, 0x00000000001c0000, true,
159 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
160 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
161 { 0, 0, 0, 0, 0, 0, 0 } };
162
163 static const struct mce_ierror_table mce_p8_ierror_table[] = {
164 { 0x00000000081c0000, 0x0000000000040000, true,
165 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
166 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
167 { 0x00000000081c0000, 0x0000000000080000, true,
168 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
169 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
170 { 0x00000000081c0000, 0x00000000000c0000, true,
171 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
172 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
173 { 0x00000000081c0000, 0x0000000000100000, true,
174 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
175 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
176 { 0x00000000081c0000, 0x0000000000140000, true,
177 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
178 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
179 { 0x00000000081c0000, 0x0000000000180000, true,
180 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
181 MCE_ECLASS_HARDWARE,
182 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
183 { 0x00000000081c0000, 0x00000000001c0000, true,
184 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
185 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
186 { 0x00000000081c0000, 0x0000000008000000, true,
187 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
188 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
189 { 0x00000000081c0000, 0x0000000008040000, true,
190 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
191 MCE_ECLASS_HARDWARE,
192 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
193 { 0, 0, 0, 0, 0, 0, 0 } };
194
195 static const struct mce_ierror_table mce_p9_ierror_table[] = {
196 { 0x00000000081c0000, 0x0000000000040000, true,
197 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
198 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
199 { 0x00000000081c0000, 0x0000000000080000, true,
200 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
201 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
202 { 0x00000000081c0000, 0x00000000000c0000, true,
203 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
204 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
205 { 0x00000000081c0000, 0x0000000000100000, true,
206 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
207 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
208 { 0x00000000081c0000, 0x0000000000140000, true,
209 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
210 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
211 { 0x00000000081c0000, 0x0000000000180000, true,
212 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
213 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
214 { 0x00000000081c0000, 0x00000000001c0000, true,
215 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
216 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
217 { 0x00000000081c0000, 0x0000000008000000, true,
218 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
219 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
220 { 0x00000000081c0000, 0x0000000008040000, true,
221 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
222 MCE_ECLASS_HARDWARE,
223 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
224 { 0x00000000081c0000, 0x00000000080c0000, true,
225 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
226 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
227 { 0x00000000081c0000, 0x0000000008100000, true,
228 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
229 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
230 { 0x00000000081c0000, 0x0000000008140000, false,
231 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
232 MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
233 { 0x00000000081c0000, 0x0000000008180000, false,
234 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
235 MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
236 { 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
237 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
238 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
239 { 0, 0, 0, 0, 0, 0, 0 } };
240
241 struct mce_derror_table {
242 unsigned long dsisr_value;
243 bool dar_valid; /* dar is a valid indicator of faulting address */
244 unsigned int error_type;
245 unsigned int error_subtype;
246 unsigned int error_class;
247 unsigned int initiator;
248 unsigned int severity;
249 bool sync_error;
250 };
251
252 static const struct mce_derror_table mce_p7_derror_table[] = {
253 { 0x00008000, false,
254 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
255 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
256 { 0x00004000, true,
257 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
258 MCE_ECLASS_HARDWARE,
259 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
260 { 0x00000800, true,
261 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
262 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
263 { 0x00000400, true,
264 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
265 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
266 { 0x00000080, true,
267 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
268 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
269 { 0x00000100, true,
270 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
271 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
272 { 0x00000040, true,
273 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
274 MCE_ECLASS_HARD_INDETERMINATE,
275 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
276 { 0, false, 0, 0, 0, 0, 0 } };
277
278 static const struct mce_derror_table mce_p8_derror_table[] = {
279 { 0x00008000, false,
280 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
281 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
282 { 0x00004000, true,
283 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
284 MCE_ECLASS_HARDWARE,
285 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
286 { 0x00002000, true,
287 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
288 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
289 { 0x00001000, true,
290 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
291 MCE_ECLASS_HARDWARE,
292 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
293 { 0x00000800, true,
294 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
295 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
296 { 0x00000400, true,
297 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
298 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
299 { 0x00000200, true,
300 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
301 MCE_ECLASS_SOFT_INDETERMINATE,
302 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
303 { 0x00000080, true,
304 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
305 MCE_ECLASS_SOFT_INDETERMINATE,
306 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
307 { 0x00000100, true,
308 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
309 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
310 { 0, false, 0, 0, 0, 0, 0 } };
311
312 static const struct mce_derror_table mce_p9_derror_table[] = {
313 { 0x00008000, false,
314 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
315 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
316 { 0x00004000, true,
317 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
318 MCE_ECLASS_HARDWARE,
319 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
320 { 0x00002000, true,
321 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
322 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
323 { 0x00001000, true,
324 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
325 MCE_ECLASS_HARDWARE,
326 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
327 { 0x00000800, true,
328 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
329 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
330 { 0x00000400, true,
331 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
332 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
333 { 0x00000200, false,
334 MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
335 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
336 { 0x00000080, true,
337 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
338 MCE_ECLASS_SOFT_INDETERMINATE,
339 MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
340 { 0x00000100, true,
341 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
342 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
343 { 0x00000040, true,
344 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
345 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
346 { 0x00000020, false,
347 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
348 MCE_ECLASS_HARDWARE,
349 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
350 { 0x00000010, false,
351 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
352 MCE_ECLASS_HARDWARE,
353 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
354 { 0x00000008, false,
355 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
356 MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
357 { 0, false, 0, 0, 0, 0, 0 } };
358
mce_find_instr_ea_and_phys(struct pt_regs * regs,uint64_t * addr,uint64_t * phys_addr)359 static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
360 uint64_t *phys_addr)
361 {
362 /*
363 * Carefully look at the NIP to determine
364 * the instruction to analyse. Reading the NIP
365 * in real-mode is tricky and can lead to recursive
366 * faults
367 */
368 int instr;
369 unsigned long pfn, instr_addr;
370 struct instruction_op op;
371 struct pt_regs tmp = *regs;
372
373 pfn = addr_to_pfn(regs, regs->nip);
374 if (pfn != ULONG_MAX) {
375 instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
376 instr = *(unsigned int *)(instr_addr);
377 if (!analyse_instr(&op, &tmp, instr)) {
378 pfn = addr_to_pfn(regs, op.ea);
379 *addr = op.ea;
380 *phys_addr = (pfn << PAGE_SHIFT);
381 return 0;
382 }
383 /*
384 * analyse_instr() might fail if the instruction
385 * is not a load/store, although this is unexpected
386 * for load/store errors or if we got the NIP
387 * wrong
388 */
389 }
390 *addr = 0;
391 return -1;
392 }
393
mce_handle_ierror(struct pt_regs * regs,const struct mce_ierror_table table[],struct mce_error_info * mce_err,uint64_t * addr,uint64_t * phys_addr)394 static int mce_handle_ierror(struct pt_regs *regs,
395 const struct mce_ierror_table table[],
396 struct mce_error_info *mce_err, uint64_t *addr,
397 uint64_t *phys_addr)
398 {
399 uint64_t srr1 = regs->msr;
400 int handled = 0;
401 int i;
402
403 *addr = 0;
404
405 for (i = 0; table[i].srr1_mask; i++) {
406 if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
407 continue;
408
409 /* attempt to correct the error */
410 switch (table[i].error_type) {
411 case MCE_ERROR_TYPE_SLB:
412 if (local_paca->in_mce == 1)
413 slb_save_contents(local_paca->mce_faulty_slbs);
414 handled = mce_flush(MCE_FLUSH_SLB);
415 break;
416 case MCE_ERROR_TYPE_ERAT:
417 handled = mce_flush(MCE_FLUSH_ERAT);
418 break;
419 case MCE_ERROR_TYPE_TLB:
420 handled = mce_flush(MCE_FLUSH_TLB);
421 break;
422 }
423
424 /* now fill in mce_error_info */
425 mce_err->error_type = table[i].error_type;
426 mce_err->error_class = table[i].error_class;
427 switch (table[i].error_type) {
428 case MCE_ERROR_TYPE_UE:
429 mce_err->u.ue_error_type = table[i].error_subtype;
430 break;
431 case MCE_ERROR_TYPE_SLB:
432 mce_err->u.slb_error_type = table[i].error_subtype;
433 break;
434 case MCE_ERROR_TYPE_ERAT:
435 mce_err->u.erat_error_type = table[i].error_subtype;
436 break;
437 case MCE_ERROR_TYPE_TLB:
438 mce_err->u.tlb_error_type = table[i].error_subtype;
439 break;
440 case MCE_ERROR_TYPE_USER:
441 mce_err->u.user_error_type = table[i].error_subtype;
442 break;
443 case MCE_ERROR_TYPE_RA:
444 mce_err->u.ra_error_type = table[i].error_subtype;
445 break;
446 case MCE_ERROR_TYPE_LINK:
447 mce_err->u.link_error_type = table[i].error_subtype;
448 break;
449 }
450 mce_err->sync_error = table[i].sync_error;
451 mce_err->severity = table[i].severity;
452 mce_err->initiator = table[i].initiator;
453 if (table[i].nip_valid) {
454 *addr = regs->nip;
455 if (mce_err->sync_error &&
456 table[i].error_type == MCE_ERROR_TYPE_UE) {
457 unsigned long pfn;
458
459 if (get_paca()->in_mce < MAX_MCE_DEPTH) {
460 pfn = addr_to_pfn(regs, regs->nip);
461 if (pfn != ULONG_MAX) {
462 *phys_addr =
463 (pfn << PAGE_SHIFT);
464 }
465 }
466 }
467 }
468 return handled;
469 }
470
471 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
472 mce_err->error_class = MCE_ECLASS_UNKNOWN;
473 mce_err->severity = MCE_SEV_SEVERE;
474 mce_err->initiator = MCE_INITIATOR_CPU;
475 mce_err->sync_error = true;
476
477 return 0;
478 }
479
mce_handle_derror(struct pt_regs * regs,const struct mce_derror_table table[],struct mce_error_info * mce_err,uint64_t * addr,uint64_t * phys_addr)480 static int mce_handle_derror(struct pt_regs *regs,
481 const struct mce_derror_table table[],
482 struct mce_error_info *mce_err, uint64_t *addr,
483 uint64_t *phys_addr)
484 {
485 uint64_t dsisr = regs->dsisr;
486 int handled = 0;
487 int found = 0;
488 int i;
489
490 *addr = 0;
491
492 for (i = 0; table[i].dsisr_value; i++) {
493 if (!(dsisr & table[i].dsisr_value))
494 continue;
495
496 /* attempt to correct the error */
497 switch (table[i].error_type) {
498 case MCE_ERROR_TYPE_SLB:
499 if (local_paca->in_mce == 1)
500 slb_save_contents(local_paca->mce_faulty_slbs);
501 if (mce_flush(MCE_FLUSH_SLB))
502 handled = 1;
503 break;
504 case MCE_ERROR_TYPE_ERAT:
505 if (mce_flush(MCE_FLUSH_ERAT))
506 handled = 1;
507 break;
508 case MCE_ERROR_TYPE_TLB:
509 if (mce_flush(MCE_FLUSH_TLB))
510 handled = 1;
511 break;
512 }
513
514 /*
515 * Attempt to handle multiple conditions, but only return
516 * one. Ensure uncorrectable errors are first in the table
517 * to match.
518 */
519 if (found)
520 continue;
521
522 /* now fill in mce_error_info */
523 mce_err->error_type = table[i].error_type;
524 mce_err->error_class = table[i].error_class;
525 switch (table[i].error_type) {
526 case MCE_ERROR_TYPE_UE:
527 mce_err->u.ue_error_type = table[i].error_subtype;
528 break;
529 case MCE_ERROR_TYPE_SLB:
530 mce_err->u.slb_error_type = table[i].error_subtype;
531 break;
532 case MCE_ERROR_TYPE_ERAT:
533 mce_err->u.erat_error_type = table[i].error_subtype;
534 break;
535 case MCE_ERROR_TYPE_TLB:
536 mce_err->u.tlb_error_type = table[i].error_subtype;
537 break;
538 case MCE_ERROR_TYPE_USER:
539 mce_err->u.user_error_type = table[i].error_subtype;
540 break;
541 case MCE_ERROR_TYPE_RA:
542 mce_err->u.ra_error_type = table[i].error_subtype;
543 break;
544 case MCE_ERROR_TYPE_LINK:
545 mce_err->u.link_error_type = table[i].error_subtype;
546 break;
547 }
548 mce_err->sync_error = table[i].sync_error;
549 mce_err->severity = table[i].severity;
550 mce_err->initiator = table[i].initiator;
551 if (table[i].dar_valid)
552 *addr = regs->dar;
553 else if (mce_err->sync_error &&
554 table[i].error_type == MCE_ERROR_TYPE_UE) {
555 /*
556 * We do a maximum of 4 nested MCE calls, see
557 * kernel/exception-64s.h
558 */
559 if (get_paca()->in_mce < MAX_MCE_DEPTH)
560 mce_find_instr_ea_and_phys(regs, addr,
561 phys_addr);
562 }
563 found = 1;
564 }
565
566 if (found)
567 return handled;
568
569 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
570 mce_err->error_class = MCE_ECLASS_UNKNOWN;
571 mce_err->severity = MCE_SEV_SEVERE;
572 mce_err->initiator = MCE_INITIATOR_CPU;
573 mce_err->sync_error = true;
574
575 return 0;
576 }
577
mce_handle_ue_error(struct pt_regs * regs,struct mce_error_info * mce_err)578 static long mce_handle_ue_error(struct pt_regs *regs,
579 struct mce_error_info *mce_err)
580 {
581 long handled = 0;
582 const struct exception_table_entry *entry;
583
584 entry = search_kernel_exception_table(regs->nip);
585 if (entry) {
586 mce_err->ignore_event = true;
587 regs->nip = extable_fixup(entry);
588 return 1;
589 }
590
591 /*
592 * On specific SCOM read via MMIO we may get a machine check
593 * exception with SRR0 pointing inside opal. If that is the
594 * case OPAL may have recovery address to re-read SCOM data in
595 * different way and hence we can recover from this MC.
596 */
597
598 if (ppc_md.mce_check_early_recovery) {
599 if (ppc_md.mce_check_early_recovery(regs))
600 handled = 1;
601 }
602 return handled;
603 }
604
mce_handle_error(struct pt_regs * regs,const struct mce_derror_table dtable[],const struct mce_ierror_table itable[])605 static long mce_handle_error(struct pt_regs *regs,
606 const struct mce_derror_table dtable[],
607 const struct mce_ierror_table itable[])
608 {
609 struct mce_error_info mce_err = { 0 };
610 uint64_t addr, phys_addr = ULONG_MAX;
611 uint64_t srr1 = regs->msr;
612 long handled;
613
614 if (SRR1_MC_LOADSTORE(srr1))
615 handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
616 &phys_addr);
617 else
618 handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
619 &phys_addr);
620
621 if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
622 handled = mce_handle_ue_error(regs, &mce_err);
623
624 save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
625
626 return handled;
627 }
628
__machine_check_early_realmode_p7(struct pt_regs * regs)629 long __machine_check_early_realmode_p7(struct pt_regs *regs)
630 {
631 /* P7 DD1 leaves top bits of DSISR undefined */
632 regs->dsisr &= 0x0000ffff;
633
634 return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
635 }
636
__machine_check_early_realmode_p8(struct pt_regs * regs)637 long __machine_check_early_realmode_p8(struct pt_regs *regs)
638 {
639 return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
640 }
641
__machine_check_early_realmode_p9(struct pt_regs * regs)642 long __machine_check_early_realmode_p9(struct pt_regs *regs)
643 {
644 /*
645 * On POWER9 DD2.1 and below, it's possible to get a machine check
646 * caused by a paste instruction where only DSISR bit 25 is set. This
647 * will result in the MCE handler seeing an unknown event and the kernel
648 * crashing. An MCE that occurs like this is spurious, so we don't need
649 * to do anything in terms of servicing it. If there is something that
650 * needs to be serviced, the CPU will raise the MCE again with the
651 * correct DSISR so that it can be serviced properly. So detect this
652 * case and mark it as handled.
653 */
654 if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
655 return 1;
656
657 return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
658 }
659