1 /*
2 * Machine check exception handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22 #undef DEBUG
23 #define pr_fmt(fmt) "mce: " fmt
24
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/ptrace.h>
28 #include <linux/percpu.h>
29 #include <linux/export.h>
30 #include <linux/irq_work.h>
31
32 #include <asm/machdep.h>
33 #include <asm/mce.h>
34
35 static DEFINE_PER_CPU(int, mce_nest_count);
36 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37
38 /* Queue for delayed MCE events. */
39 static DEFINE_PER_CPU(int, mce_queue_count);
40 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41
42 /* Queue for delayed MCE UE events. */
43 static DEFINE_PER_CPU(int, mce_ue_count);
44 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 mce_ue_event_queue);
46
47 static void machine_check_process_queued_event(struct irq_work *work);
48 void machine_check_ue_event(struct machine_check_event *evt);
49 static void machine_process_ue_event(struct work_struct *work);
50
51 static struct irq_work mce_event_process_work = {
52 .func = machine_check_process_queued_event,
53 };
54
55 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56
mce_set_error_info(struct machine_check_event * mce,struct mce_error_info * mce_err)57 static void mce_set_error_info(struct machine_check_event *mce,
58 struct mce_error_info *mce_err)
59 {
60 mce->error_type = mce_err->error_type;
61 switch (mce_err->error_type) {
62 case MCE_ERROR_TYPE_UE:
63 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 break;
65 case MCE_ERROR_TYPE_SLB:
66 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 break;
68 case MCE_ERROR_TYPE_ERAT:
69 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 break;
71 case MCE_ERROR_TYPE_TLB:
72 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 break;
74 case MCE_ERROR_TYPE_USER:
75 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 break;
77 case MCE_ERROR_TYPE_RA:
78 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 break;
80 case MCE_ERROR_TYPE_LINK:
81 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 break;
83 case MCE_ERROR_TYPE_UNKNOWN:
84 default:
85 break;
86 }
87 }
88
89 /*
90 * Decode and save high level MCE information into per cpu buffer which
91 * is an array of machine_check_event structure.
92 */
save_mce_event(struct pt_regs * regs,long handled,struct mce_error_info * mce_err,uint64_t nip,uint64_t addr,uint64_t phys_addr)93 void save_mce_event(struct pt_regs *regs, long handled,
94 struct mce_error_info *mce_err,
95 uint64_t nip, uint64_t addr, uint64_t phys_addr)
96 {
97 int index = __this_cpu_inc_return(mce_nest_count) - 1;
98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
99
100 /*
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
104 */
105 if (index >= MAX_MC_EVT)
106 return;
107
108 /* Populate generic machine check info */
109 mce->version = MCE_V1;
110 mce->srr0 = nip;
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114
115 /* Mark it recovered if we have handled it and MSR(RI=1). */
116 if (handled && (regs->msr & MSR_RI))
117 mce->disposition = MCE_DISPOSITION_RECOVERED;
118 else
119 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120
121 mce->initiator = mce_err->initiator;
122 mce->severity = mce_err->severity;
123
124 /*
125 * Populate the mce error_type and type-specific error_type.
126 */
127 mce_set_error_info(mce, mce_err);
128
129 if (!addr)
130 return;
131
132 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 mce->u.tlb_error.effective_address_provided = true;
134 mce->u.tlb_error.effective_address = addr;
135 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 mce->u.slb_error.effective_address_provided = true;
137 mce->u.slb_error.effective_address = addr;
138 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 mce->u.erat_error.effective_address_provided = true;
140 mce->u.erat_error.effective_address = addr;
141 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 mce->u.user_error.effective_address_provided = true;
143 mce->u.user_error.effective_address = addr;
144 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 mce->u.ra_error.effective_address_provided = true;
146 mce->u.ra_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 mce->u.link_error.effective_address_provided = true;
149 mce->u.link_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 mce->u.ue_error.effective_address_provided = true;
152 mce->u.ue_error.effective_address = addr;
153 if (phys_addr != ULONG_MAX) {
154 mce->u.ue_error.physical_address_provided = true;
155 mce->u.ue_error.physical_address = phys_addr;
156 machine_check_ue_event(mce);
157 }
158 }
159 return;
160 }
161
162 /*
163 * get_mce_event:
164 * mce Pointer to machine_check_event structure to be filled.
165 * release Flag to indicate whether to free the event slot or not.
166 * 0 <= do not release the mce event. Caller will invoke
167 * release_mce_event() once event has been consumed.
168 * 1 <= release the slot.
169 *
170 * return 1 = success
171 * 0 = failure
172 *
173 * get_mce_event() will be called by platform specific machine check
174 * handle routine and in KVM.
175 * When we call get_mce_event(), we are still in interrupt context and
176 * preemption will not be scheduled until ret_from_expect() routine
177 * is called.
178 */
get_mce_event(struct machine_check_event * mce,bool release)179 int get_mce_event(struct machine_check_event *mce, bool release)
180 {
181 int index = __this_cpu_read(mce_nest_count) - 1;
182 struct machine_check_event *mc_evt;
183 int ret = 0;
184
185 /* Sanity check */
186 if (index < 0)
187 return ret;
188
189 /* Check if we have MCE info to process. */
190 if (index < MAX_MC_EVT) {
191 mc_evt = this_cpu_ptr(&mce_event[index]);
192 /* Copy the event structure and release the original */
193 if (mce)
194 *mce = *mc_evt;
195 if (release)
196 mc_evt->in_use = 0;
197 ret = 1;
198 }
199 /* Decrement the count to free the slot. */
200 if (release)
201 __this_cpu_dec(mce_nest_count);
202
203 return ret;
204 }
205
release_mce_event(void)206 void release_mce_event(void)
207 {
208 get_mce_event(NULL, true);
209 }
210
211
212 /*
213 * Queue up the MCE event which then can be handled later.
214 */
machine_check_ue_event(struct machine_check_event * evt)215 void machine_check_ue_event(struct machine_check_event *evt)
216 {
217 int index;
218
219 index = __this_cpu_inc_return(mce_ue_count) - 1;
220 /* If queue is full, just return for now. */
221 if (index >= MAX_MC_EVT) {
222 __this_cpu_dec(mce_ue_count);
223 return;
224 }
225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226
227 /* Queue work to process this event later. */
228 schedule_work(&mce_ue_event_work);
229 }
230
231 /*
232 * Queue up the MCE event which then can be handled later.
233 */
machine_check_queue_event(void)234 void machine_check_queue_event(void)
235 {
236 int index;
237 struct machine_check_event evt;
238
239 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 return;
241
242 index = __this_cpu_inc_return(mce_queue_count) - 1;
243 /* If queue is full, just return for now. */
244 if (index >= MAX_MC_EVT) {
245 __this_cpu_dec(mce_queue_count);
246 return;
247 }
248 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249
250 /* Queue irq work to process this event later. */
251 irq_work_queue(&mce_event_process_work);
252 }
253 /*
254 * process pending MCE event from the mce event queue. This function will be
255 * called during syscall exit.
256 */
machine_process_ue_event(struct work_struct * work)257 static void machine_process_ue_event(struct work_struct *work)
258 {
259 int index;
260 struct machine_check_event *evt;
261
262 while (__this_cpu_read(mce_ue_count) > 0) {
263 index = __this_cpu_read(mce_ue_count) - 1;
264 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265 #ifdef CONFIG_MEMORY_FAILURE
266 /*
267 * This should probably queued elsewhere, but
268 * oh! well
269 */
270 if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 if (evt->u.ue_error.physical_address_provided) {
272 unsigned long pfn;
273
274 pfn = evt->u.ue_error.physical_address >>
275 PAGE_SHIFT;
276 memory_failure(pfn, 0);
277 } else
278 pr_warn("Failed to identify bad address from "
279 "where the uncorrectable error (UE) "
280 "was generated\n");
281 }
282 #endif
283 __this_cpu_dec(mce_ue_count);
284 }
285 }
286 /*
287 * process pending MCE event from the mce event queue. This function will be
288 * called during syscall exit.
289 */
machine_check_process_queued_event(struct irq_work * work)290 static void machine_check_process_queued_event(struct irq_work *work)
291 {
292 int index;
293 struct machine_check_event *evt;
294
295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296
297 /*
298 * For now just print it to console.
299 * TODO: log this error event to FSP or nvram.
300 */
301 while (__this_cpu_read(mce_queue_count) > 0) {
302 index = __this_cpu_read(mce_queue_count) - 1;
303 evt = this_cpu_ptr(&mce_event_queue[index]);
304 machine_check_print_event_info(evt, false);
305 __this_cpu_dec(mce_queue_count);
306 }
307 }
308
machine_check_print_event_info(struct machine_check_event * evt,bool user_mode)309 void machine_check_print_event_info(struct machine_check_event *evt,
310 bool user_mode)
311 {
312 const char *level, *sevstr, *subtype;
313 static const char *mc_ue_types[] = {
314 "Indeterminate",
315 "Instruction fetch",
316 "Page table walk ifetch",
317 "Load/Store",
318 "Page table walk Load/Store",
319 };
320 static const char *mc_slb_types[] = {
321 "Indeterminate",
322 "Parity",
323 "Multihit",
324 };
325 static const char *mc_erat_types[] = {
326 "Indeterminate",
327 "Parity",
328 "Multihit",
329 };
330 static const char *mc_tlb_types[] = {
331 "Indeterminate",
332 "Parity",
333 "Multihit",
334 };
335 static const char *mc_user_types[] = {
336 "Indeterminate",
337 "tlbie(l) invalid",
338 };
339 static const char *mc_ra_types[] = {
340 "Indeterminate",
341 "Instruction fetch (bad)",
342 "Instruction fetch (foreign)",
343 "Page table walk ifetch (bad)",
344 "Page table walk ifetch (foreign)",
345 "Load (bad)",
346 "Store (bad)",
347 "Page table walk Load/Store (bad)",
348 "Page table walk Load/Store (foreign)",
349 "Load/Store (foreign)",
350 };
351 static const char *mc_link_types[] = {
352 "Indeterminate",
353 "Instruction fetch (timeout)",
354 "Page table walk ifetch (timeout)",
355 "Load (timeout)",
356 "Store (timeout)",
357 "Page table walk Load/Store (timeout)",
358 };
359
360 /* Print things out */
361 if (evt->version != MCE_V1) {
362 pr_err("Machine Check Exception, Unknown event version %d !\n",
363 evt->version);
364 return;
365 }
366 switch (evt->severity) {
367 case MCE_SEV_NO_ERROR:
368 level = KERN_INFO;
369 sevstr = "Harmless";
370 break;
371 case MCE_SEV_WARNING:
372 level = KERN_WARNING;
373 sevstr = "";
374 break;
375 case MCE_SEV_ERROR_SYNC:
376 level = KERN_ERR;
377 sevstr = "Severe";
378 break;
379 case MCE_SEV_FATAL:
380 default:
381 level = KERN_ERR;
382 sevstr = "Fatal";
383 break;
384 }
385
386 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 evt->disposition == MCE_DISPOSITION_RECOVERED ?
388 "Recovered" : "Not recovered");
389
390 if (user_mode) {
391 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
392 evt->srr0, current->pid, current->comm);
393 } else {
394 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
395 (void *)evt->srr0);
396 }
397
398 printk("%s Initiator: %s\n", level,
399 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400 switch (evt->error_type) {
401 case MCE_ERROR_TYPE_UE:
402 subtype = evt->u.ue_error.ue_error_type <
403 ARRAY_SIZE(mc_ue_types) ?
404 mc_ue_types[evt->u.ue_error.ue_error_type]
405 : "Unknown";
406 printk("%s Error type: UE [%s]\n", level, subtype);
407 if (evt->u.ue_error.effective_address_provided)
408 printk("%s Effective address: %016llx\n",
409 level, evt->u.ue_error.effective_address);
410 if (evt->u.ue_error.physical_address_provided)
411 printk("%s Physical address: %016llx\n",
412 level, evt->u.ue_error.physical_address);
413 break;
414 case MCE_ERROR_TYPE_SLB:
415 subtype = evt->u.slb_error.slb_error_type <
416 ARRAY_SIZE(mc_slb_types) ?
417 mc_slb_types[evt->u.slb_error.slb_error_type]
418 : "Unknown";
419 printk("%s Error type: SLB [%s]\n", level, subtype);
420 if (evt->u.slb_error.effective_address_provided)
421 printk("%s Effective address: %016llx\n",
422 level, evt->u.slb_error.effective_address);
423 break;
424 case MCE_ERROR_TYPE_ERAT:
425 subtype = evt->u.erat_error.erat_error_type <
426 ARRAY_SIZE(mc_erat_types) ?
427 mc_erat_types[evt->u.erat_error.erat_error_type]
428 : "Unknown";
429 printk("%s Error type: ERAT [%s]\n", level, subtype);
430 if (evt->u.erat_error.effective_address_provided)
431 printk("%s Effective address: %016llx\n",
432 level, evt->u.erat_error.effective_address);
433 break;
434 case MCE_ERROR_TYPE_TLB:
435 subtype = evt->u.tlb_error.tlb_error_type <
436 ARRAY_SIZE(mc_tlb_types) ?
437 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438 : "Unknown";
439 printk("%s Error type: TLB [%s]\n", level, subtype);
440 if (evt->u.tlb_error.effective_address_provided)
441 printk("%s Effective address: %016llx\n",
442 level, evt->u.tlb_error.effective_address);
443 break;
444 case MCE_ERROR_TYPE_USER:
445 subtype = evt->u.user_error.user_error_type <
446 ARRAY_SIZE(mc_user_types) ?
447 mc_user_types[evt->u.user_error.user_error_type]
448 : "Unknown";
449 printk("%s Error type: User [%s]\n", level, subtype);
450 if (evt->u.user_error.effective_address_provided)
451 printk("%s Effective address: %016llx\n",
452 level, evt->u.user_error.effective_address);
453 break;
454 case MCE_ERROR_TYPE_RA:
455 subtype = evt->u.ra_error.ra_error_type <
456 ARRAY_SIZE(mc_ra_types) ?
457 mc_ra_types[evt->u.ra_error.ra_error_type]
458 : "Unknown";
459 printk("%s Error type: Real address [%s]\n", level, subtype);
460 if (evt->u.ra_error.effective_address_provided)
461 printk("%s Effective address: %016llx\n",
462 level, evt->u.ra_error.effective_address);
463 break;
464 case MCE_ERROR_TYPE_LINK:
465 subtype = evt->u.link_error.link_error_type <
466 ARRAY_SIZE(mc_link_types) ?
467 mc_link_types[evt->u.link_error.link_error_type]
468 : "Unknown";
469 printk("%s Error type: Link [%s]\n", level, subtype);
470 if (evt->u.link_error.effective_address_provided)
471 printk("%s Effective address: %016llx\n",
472 level, evt->u.link_error.effective_address);
473 break;
474 default:
475 case MCE_ERROR_TYPE_UNKNOWN:
476 printk("%s Error type: Unknown\n", level);
477 break;
478 }
479 }
480 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
481
482 /*
483 * This function is called in real mode. Strictly no printk's please.
484 *
485 * regs->nip and regs->msr contains srr0 and ssr1.
486 */
machine_check_early(struct pt_regs * regs)487 long machine_check_early(struct pt_regs *regs)
488 {
489 long handled = 0;
490
491 __this_cpu_inc(irq_stat.mce_exceptions);
492
493 if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
494 handled = cur_cpu_spec->machine_check_early(regs);
495 return handled;
496 }
497
498 /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
499 static enum {
500 DTRIG_UNKNOWN,
501 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
502 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
503 } hmer_debug_trig_function;
504
init_debug_trig_function(void)505 static int init_debug_trig_function(void)
506 {
507 int pvr;
508 struct device_node *cpun;
509 struct property *prop = NULL;
510 const char *str;
511
512 /* First look in the device tree */
513 preempt_disable();
514 cpun = of_get_cpu_node(smp_processor_id(), NULL);
515 if (cpun) {
516 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
517 prop, str) {
518 if (strcmp(str, "bit17-vector-ci-load") == 0)
519 hmer_debug_trig_function = DTRIG_VECTOR_CI;
520 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
521 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
522 }
523 of_node_put(cpun);
524 }
525 preempt_enable();
526
527 /* If we found the property, don't look at PVR */
528 if (prop)
529 goto out;
530
531 pvr = mfspr(SPRN_PVR);
532 /* Check for POWER9 Nimbus (scale-out) */
533 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534 /* DD2.2 and later */
535 if ((pvr & 0xfff) >= 0x202)
536 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537 /* DD2.0 and DD2.1 - used for vector CI load emulation */
538 else if ((pvr & 0xfff) >= 0x200)
539 hmer_debug_trig_function = DTRIG_VECTOR_CI;
540 }
541
542 out:
543 switch (hmer_debug_trig_function) {
544 case DTRIG_VECTOR_CI:
545 pr_debug("HMI debug trigger used for vector CI load\n");
546 break;
547 case DTRIG_SUSPEND_ESCAPE:
548 pr_debug("HMI debug trigger used for TM suspend escape\n");
549 break;
550 default:
551 break;
552 }
553 return 0;
554 }
555 __initcall(init_debug_trig_function);
556
557 /*
558 * Handle HMIs that occur as a result of a debug trigger.
559 * Return values:
560 * -1 means this is not a HMI cause that we know about
561 * 0 means no further handling is required
562 * 1 means further handling is required
563 */
hmi_handle_debugtrig(struct pt_regs * regs)564 long hmi_handle_debugtrig(struct pt_regs *regs)
565 {
566 unsigned long hmer = mfspr(SPRN_HMER);
567 long ret = 0;
568
569 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570 if (!((hmer & HMER_DEBUG_TRIG)
571 && hmer_debug_trig_function != DTRIG_UNKNOWN))
572 return -1;
573
574 hmer &= ~HMER_DEBUG_TRIG;
575 /* HMER is a write-AND register */
576 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577
578 switch (hmer_debug_trig_function) {
579 case DTRIG_VECTOR_CI:
580 /*
581 * Now to avoid problems with soft-disable we
582 * only do the emulation if we are coming from
583 * host user space
584 */
585 if (regs && user_mode(regs))
586 ret = local_paca->hmi_p9_special_emu = 1;
587
588 break;
589
590 default:
591 break;
592 }
593
594 /*
595 * See if any other HMI causes remain to be handled
596 */
597 if (hmer & mfspr(SPRN_HMEER))
598 return -1;
599
600 return ret;
601 }
602
603 /*
604 * Return values:
605 */
hmi_exception_realmode(struct pt_regs * regs)606 long hmi_exception_realmode(struct pt_regs *regs)
607 {
608 int ret;
609
610 __this_cpu_inc(irq_stat.hmi_exceptions);
611
612 ret = hmi_handle_debugtrig(regs);
613 if (ret >= 0)
614 return ret;
615
616 wait_for_subcore_guest_exit();
617
618 if (ppc_md.hmi_exception_early)
619 ppc_md.hmi_exception_early(regs);
620
621 wait_for_tb_resync();
622
623 return 1;
624 }
625