mce.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Machine check exception handling.
  4. *
  5. * Copyright 2013 IBM Corporation
  6. * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  7. */
  8. #undef DEBUG
  9. #define pr_fmt(fmt) "mce: " fmt
  10. #include <linux/hardirq.h>
  11. #include <linux/types.h>
  12. #include <linux/ptrace.h>
  13. #include <linux/percpu.h>
  14. #include <linux/export.h>
  15. #include <linux/irq_work.h>
  16. #include <linux/extable.h>
  17. #include <linux/ftrace.h>
  18. #include <linux/memblock.h>
  19. #include <linux/of.h>
  20. #include <asm/interrupt.h>
  21. #include <asm/machdep.h>
  22. #include <asm/mce.h>
  23. #include <asm/nmi.h>
  24. #include "setup.h"
  25. static void machine_check_ue_event(struct machine_check_event *evt);
  26. static void machine_process_ue_event(struct work_struct *work);
  27. static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
  28. static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
  29. int mce_register_notifier(struct notifier_block *nb)
  30. {
  31. return blocking_notifier_chain_register(&mce_notifier_list, nb);
  32. }
  33. EXPORT_SYMBOL_GPL(mce_register_notifier);
  34. int mce_unregister_notifier(struct notifier_block *nb)
  35. {
  36. return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
  37. }
  38. EXPORT_SYMBOL_GPL(mce_unregister_notifier);
  39. static void mce_set_error_info(struct machine_check_event *mce,
  40. struct mce_error_info *mce_err)
  41. {
  42. mce->error_type = mce_err->error_type;
  43. switch (mce_err->error_type) {
  44. case MCE_ERROR_TYPE_UE:
  45. mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
  46. break;
  47. case MCE_ERROR_TYPE_SLB:
  48. mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
  49. break;
  50. case MCE_ERROR_TYPE_ERAT:
  51. mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
  52. break;
  53. case MCE_ERROR_TYPE_TLB:
  54. mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
  55. break;
  56. case MCE_ERROR_TYPE_USER:
  57. mce->u.user_error.user_error_type = mce_err->u.user_error_type;
  58. break;
  59. case MCE_ERROR_TYPE_RA:
  60. mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
  61. break;
  62. case MCE_ERROR_TYPE_LINK:
  63. mce->u.link_error.link_error_type = mce_err->u.link_error_type;
  64. break;
  65. case MCE_ERROR_TYPE_UNKNOWN:
  66. default:
  67. break;
  68. }
  69. }
  70. void mce_irq_work_queue(void)
  71. {
  72. /* Raise decrementer interrupt */
  73. arch_irq_work_raise();
  74. set_mce_pending_irq_work();
  75. }
  76. /*
  77. * Decode and save high level MCE information into per cpu buffer which
  78. * is an array of machine_check_event structure.
  79. */
  80. void save_mce_event(struct pt_regs *regs, long handled,
  81. struct mce_error_info *mce_err,
  82. uint64_t nip, uint64_t addr, uint64_t phys_addr)
  83. {
  84. int index = local_paca->mce_info->mce_nest_count++;
  85. struct machine_check_event *mce;
  86. mce = &local_paca->mce_info->mce_event[index];
  87. /*
  88. * Return if we don't have enough space to log mce event.
  89. * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
  90. * the check below will stop buffer overrun.
  91. */
  92. if (index >= MAX_MC_EVT)
  93. return;
  94. /* Populate generic machine check info */
  95. mce->version = MCE_V1;
  96. mce->srr0 = nip;
  97. mce->srr1 = regs->msr;
  98. mce->gpr3 = regs->gpr[3];
  99. mce->in_use = 1;
  100. mce->cpu = get_paca()->paca_index;
  101. /* Mark it recovered if we have handled it and MSR(RI=1). */
  102. if (handled && (regs->msr & MSR_RI))
  103. mce->disposition = MCE_DISPOSITION_RECOVERED;
  104. else
  105. mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
  106. mce->initiator = mce_err->initiator;
  107. mce->severity = mce_err->severity;
  108. mce->sync_error = mce_err->sync_error;
  109. mce->error_class = mce_err->error_class;
  110. /*
  111. * Populate the mce error_type and type-specific error_type.
  112. */
  113. mce_set_error_info(mce, mce_err);
  114. if (mce->error_type == MCE_ERROR_TYPE_UE)
  115. mce->u.ue_error.ignore_event = mce_err->ignore_event;
  116. /*
  117. * Raise irq work, So that we don't miss to log the error for
  118. * unrecoverable errors.
  119. */
  120. if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
  121. mce_irq_work_queue();
  122. if (!addr)
  123. return;
  124. if (mce->error_type == MCE_ERROR_TYPE_TLB) {
  125. mce->u.tlb_error.effective_address_provided = true;
  126. mce->u.tlb_error.effective_address = addr;
  127. } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
  128. mce->u.slb_error.effective_address_provided = true;
  129. mce->u.slb_error.effective_address = addr;
  130. } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
  131. mce->u.erat_error.effective_address_provided = true;
  132. mce->u.erat_error.effective_address = addr;
  133. } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
  134. mce->u.user_error.effective_address_provided = true;
  135. mce->u.user_error.effective_address = addr;
  136. } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
  137. mce->u.ra_error.effective_address_provided = true;
  138. mce->u.ra_error.effective_address = addr;
  139. } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
  140. mce->u.link_error.effective_address_provided = true;
  141. mce->u.link_error.effective_address = addr;
  142. } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
  143. mce->u.ue_error.effective_address_provided = true;
  144. mce->u.ue_error.effective_address = addr;
  145. if (phys_addr != ULONG_MAX) {
  146. mce->u.ue_error.physical_address_provided = true;
  147. mce->u.ue_error.physical_address = phys_addr;
  148. machine_check_ue_event(mce);
  149. }
  150. }
  151. return;
  152. }
  153. /*
  154. * get_mce_event:
  155. * mce Pointer to machine_check_event structure to be filled.
  156. * release Flag to indicate whether to free the event slot or not.
  157. * 0 <= do not release the mce event. Caller will invoke
  158. * release_mce_event() once event has been consumed.
  159. * 1 <= release the slot.
  160. *
  161. * return 1 = success
  162. * 0 = failure
  163. *
  164. * get_mce_event() will be called by platform specific machine check
  165. * handle routine and in KVM.
  166. * When we call get_mce_event(), we are still in interrupt context and
  167. * preemption will not be scheduled until ret_from_expect() routine
  168. * is called.
  169. */
  170. int get_mce_event(struct machine_check_event *mce, bool release)
  171. {
  172. int index = local_paca->mce_info->mce_nest_count - 1;
  173. struct machine_check_event *mc_evt;
  174. int ret = 0;
  175. /* Sanity check */
  176. if (index < 0)
  177. return ret;
  178. /* Check if we have MCE info to process. */
  179. if (index < MAX_MC_EVT) {
  180. mc_evt = &local_paca->mce_info->mce_event[index];
  181. /* Copy the event structure and release the original */
  182. if (mce)
  183. *mce = *mc_evt;
  184. if (release)
  185. mc_evt->in_use = 0;
  186. ret = 1;
  187. }
  188. /* Decrement the count to free the slot. */
  189. if (release)
  190. local_paca->mce_info->mce_nest_count--;
  191. return ret;
  192. }
  193. void release_mce_event(void)
  194. {
  195. get_mce_event(NULL, true);
  196. }
  197. static void machine_check_ue_work(void)
  198. {
  199. schedule_work(&mce_ue_event_work);
  200. }
  201. /*
  202. * Queue up the MCE event which then can be handled later.
  203. */
  204. static void machine_check_ue_event(struct machine_check_event *evt)
  205. {
  206. int index;
  207. index = local_paca->mce_info->mce_ue_count++;
  208. /* If queue is full, just return for now. */
  209. if (index >= MAX_MC_EVT) {
  210. local_paca->mce_info->mce_ue_count--;
  211. return;
  212. }
  213. memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
  214. evt, sizeof(*evt));
  215. }
  216. /*
  217. * Queue up the MCE event which then can be handled later.
  218. */
  219. void machine_check_queue_event(void)
  220. {
  221. int index;
  222. struct machine_check_event evt;
  223. if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
  224. return;
  225. index = local_paca->mce_info->mce_queue_count++;
  226. /* If queue is full, just return for now. */
  227. if (index >= MAX_MC_EVT) {
  228. local_paca->mce_info->mce_queue_count--;
  229. return;
  230. }
  231. memcpy(&local_paca->mce_info->mce_event_queue[index],
  232. &evt, sizeof(evt));
  233. mce_irq_work_queue();
  234. }
  235. void mce_common_process_ue(struct pt_regs *regs,
  236. struct mce_error_info *mce_err)
  237. {
  238. const struct exception_table_entry *entry;
  239. entry = search_kernel_exception_table(regs->nip);
  240. if (entry) {
  241. mce_err->ignore_event = true;
  242. regs_set_return_ip(regs, extable_fixup(entry));
  243. }
  244. }
  245. /*
  246. * process pending MCE event from the mce event queue. This function will be
  247. * called during syscall exit.
  248. */
  249. static void machine_process_ue_event(struct work_struct *work)
  250. {
  251. int index;
  252. struct machine_check_event *evt;
  253. while (local_paca->mce_info->mce_ue_count > 0) {
  254. index = local_paca->mce_info->mce_ue_count - 1;
  255. evt = &local_paca->mce_info->mce_ue_event_queue[index];
  256. blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
  257. #ifdef CONFIG_MEMORY_FAILURE
  258. /*
  259. * This should probably queued elsewhere, but
  260. * oh! well
  261. *
  262. * Don't report this machine check because the caller has a
  263. * asked us to ignore the event, it has a fixup handler which
  264. * will do the appropriate error handling and reporting.
  265. */
  266. if (evt->error_type == MCE_ERROR_TYPE_UE) {
  267. if (evt->u.ue_error.ignore_event) {
  268. local_paca->mce_info->mce_ue_count--;
  269. continue;
  270. }
  271. if (evt->u.ue_error.physical_address_provided) {
  272. unsigned long pfn;
  273. pfn = evt->u.ue_error.physical_address >>
  274. PAGE_SHIFT;
  275. memory_failure(pfn, 0);
  276. } else
  277. pr_warn("Failed to identify bad address from "
  278. "where the uncorrectable error (UE) "
  279. "was generated\n");
  280. }
  281. #endif
  282. local_paca->mce_info->mce_ue_count--;
  283. }
  284. }
  285. /*
  286. * process pending MCE event from the mce event queue. This function will be
  287. * called during syscall exit.
  288. */
  289. static void machine_check_process_queued_event(void)
  290. {
  291. int index;
  292. struct machine_check_event *evt;
  293. add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
  294. /*
  295. * For now just print it to console.
  296. * TODO: log this error event to FSP or nvram.
  297. */
  298. while (local_paca->mce_info->mce_queue_count > 0) {
  299. index = local_paca->mce_info->mce_queue_count - 1;
  300. evt = &local_paca->mce_info->mce_event_queue[index];
  301. if (evt->error_type == MCE_ERROR_TYPE_UE &&
  302. evt->u.ue_error.ignore_event) {
  303. local_paca->mce_info->mce_queue_count--;
  304. continue;
  305. }
  306. machine_check_print_event_info(evt, false, false);
  307. local_paca->mce_info->mce_queue_count--;
  308. }
  309. }
  310. void set_mce_pending_irq_work(void)
  311. {
  312. local_paca->mce_pending_irq_work = 1;
  313. }
  314. void clear_mce_pending_irq_work(void)
  315. {
  316. local_paca->mce_pending_irq_work = 0;
  317. }
  318. void mce_run_irq_context_handlers(void)
  319. {
  320. if (unlikely(local_paca->mce_pending_irq_work)) {
  321. if (ppc_md.machine_check_log_err)
  322. ppc_md.machine_check_log_err();
  323. machine_check_process_queued_event();
  324. machine_check_ue_work();
  325. clear_mce_pending_irq_work();
  326. }
  327. }
  328. void machine_check_print_event_info(struct machine_check_event *evt,
  329. bool user_mode, bool in_guest)
  330. {
  331. const char *level, *sevstr, *subtype, *err_type, *initiator;
  332. uint64_t ea = 0, pa = 0;
  333. int n = 0;
  334. char dar_str[50];
  335. char pa_str[50];
  336. static const char *mc_ue_types[] = {
  337. "Indeterminate",
  338. "Instruction fetch",
  339. "Page table walk ifetch",
  340. "Load/Store",
  341. "Page table walk Load/Store",
  342. };
  343. static const char *mc_slb_types[] = {
  344. "Indeterminate",
  345. "Parity",
  346. "Multihit",
  347. };
  348. static const char *mc_erat_types[] = {
  349. "Indeterminate",
  350. "Parity",
  351. "Multihit",
  352. };
  353. static const char *mc_tlb_types[] = {
  354. "Indeterminate",
  355. "Parity",
  356. "Multihit",
  357. };
  358. static const char *mc_user_types[] = {
  359. "Indeterminate",
  360. "tlbie(l) invalid",
  361. "scv invalid",
  362. };
  363. static const char *mc_ra_types[] = {
  364. "Indeterminate",
  365. "Instruction fetch (bad)",
  366. "Instruction fetch (foreign/control memory)",
  367. "Page table walk ifetch (bad)",
  368. "Page table walk ifetch (foreign/control memory)",
  369. "Load (bad)",
  370. "Store (bad)",
  371. "Page table walk Load/Store (bad)",
  372. "Page table walk Load/Store (foreign/control memory)",
  373. "Load/Store (foreign/control memory)",
  374. };
  375. static const char *mc_link_types[] = {
  376. "Indeterminate",
  377. "Instruction fetch (timeout)",
  378. "Page table walk ifetch (timeout)",
  379. "Load (timeout)",
  380. "Store (timeout)",
  381. "Page table walk Load/Store (timeout)",
  382. };
  383. static const char *mc_error_class[] = {
  384. "Unknown",
  385. "Hardware error",
  386. "Probable Hardware error (some chance of software cause)",
  387. "Software error",
  388. "Probable Software error (some chance of hardware cause)",
  389. };
  390. /* Print things out */
  391. if (evt->version != MCE_V1) {
  392. pr_err("Machine Check Exception, Unknown event version %d !\n",
  393. evt->version);
  394. return;
  395. }
  396. switch (evt->severity) {
  397. case MCE_SEV_NO_ERROR:
  398. level = KERN_INFO;
  399. sevstr = "Harmless";
  400. break;
  401. case MCE_SEV_WARNING:
  402. level = KERN_WARNING;
  403. sevstr = "Warning";
  404. break;
  405. case MCE_SEV_SEVERE:
  406. level = KERN_ERR;
  407. sevstr = "Severe";
  408. break;
  409. case MCE_SEV_FATAL:
  410. default:
  411. level = KERN_ERR;
  412. sevstr = "Fatal";
  413. break;
  414. }
  415. switch(evt->initiator) {
  416. case MCE_INITIATOR_CPU:
  417. initiator = "CPU";
  418. break;
  419. case MCE_INITIATOR_PCI:
  420. initiator = "PCI";
  421. break;
  422. case MCE_INITIATOR_ISA:
  423. initiator = "ISA";
  424. break;
  425. case MCE_INITIATOR_MEMORY:
  426. initiator = "Memory";
  427. break;
  428. case MCE_INITIATOR_POWERMGM:
  429. initiator = "Power Management";
  430. break;
  431. case MCE_INITIATOR_UNKNOWN:
  432. default:
  433. initiator = "Unknown";
  434. break;
  435. }
  436. switch (evt->error_type) {
  437. case MCE_ERROR_TYPE_UE:
  438. err_type = "UE";
  439. subtype = evt->u.ue_error.ue_error_type <
  440. ARRAY_SIZE(mc_ue_types) ?
  441. mc_ue_types[evt->u.ue_error.ue_error_type]
  442. : "Unknown";
  443. if (evt->u.ue_error.effective_address_provided)
  444. ea = evt->u.ue_error.effective_address;
  445. if (evt->u.ue_error.physical_address_provided)
  446. pa = evt->u.ue_error.physical_address;
  447. break;
  448. case MCE_ERROR_TYPE_SLB:
  449. err_type = "SLB";
  450. subtype = evt->u.slb_error.slb_error_type <
  451. ARRAY_SIZE(mc_slb_types) ?
  452. mc_slb_types[evt->u.slb_error.slb_error_type]
  453. : "Unknown";
  454. if (evt->u.slb_error.effective_address_provided)
  455. ea = evt->u.slb_error.effective_address;
  456. break;
  457. case MCE_ERROR_TYPE_ERAT:
  458. err_type = "ERAT";
  459. subtype = evt->u.erat_error.erat_error_type <
  460. ARRAY_SIZE(mc_erat_types) ?
  461. mc_erat_types[evt->u.erat_error.erat_error_type]
  462. : "Unknown";
  463. if (evt->u.erat_error.effective_address_provided)
  464. ea = evt->u.erat_error.effective_address;
  465. break;
  466. case MCE_ERROR_TYPE_TLB:
  467. err_type = "TLB";
  468. subtype = evt->u.tlb_error.tlb_error_type <
  469. ARRAY_SIZE(mc_tlb_types) ?
  470. mc_tlb_types[evt->u.tlb_error.tlb_error_type]
  471. : "Unknown";
  472. if (evt->u.tlb_error.effective_address_provided)
  473. ea = evt->u.tlb_error.effective_address;
  474. break;
  475. case MCE_ERROR_TYPE_USER:
  476. err_type = "User";
  477. subtype = evt->u.user_error.user_error_type <
  478. ARRAY_SIZE(mc_user_types) ?
  479. mc_user_types[evt->u.user_error.user_error_type]
  480. : "Unknown";
  481. if (evt->u.user_error.effective_address_provided)
  482. ea = evt->u.user_error.effective_address;
  483. break;
  484. case MCE_ERROR_TYPE_RA:
  485. err_type = "Real address";
  486. subtype = evt->u.ra_error.ra_error_type <
  487. ARRAY_SIZE(mc_ra_types) ?
  488. mc_ra_types[evt->u.ra_error.ra_error_type]
  489. : "Unknown";
  490. if (evt->u.ra_error.effective_address_provided)
  491. ea = evt->u.ra_error.effective_address;
  492. break;
  493. case MCE_ERROR_TYPE_LINK:
  494. err_type = "Link";
  495. subtype = evt->u.link_error.link_error_type <
  496. ARRAY_SIZE(mc_link_types) ?
  497. mc_link_types[evt->u.link_error.link_error_type]
  498. : "Unknown";
  499. if (evt->u.link_error.effective_address_provided)
  500. ea = evt->u.link_error.effective_address;
  501. break;
  502. case MCE_ERROR_TYPE_DCACHE:
  503. err_type = "D-Cache";
  504. subtype = "Unknown";
  505. break;
  506. case MCE_ERROR_TYPE_ICACHE:
  507. err_type = "I-Cache";
  508. subtype = "Unknown";
  509. break;
  510. default:
  511. case MCE_ERROR_TYPE_UNKNOWN:
  512. err_type = "Unknown";
  513. subtype = "";
  514. break;
  515. }
  516. dar_str[0] = pa_str[0] = '\0';
  517. if (ea && evt->srr0 != ea) {
  518. /* Load/Store address */
  519. n = sprintf(dar_str, "DAR: %016llx ", ea);
  520. if (pa)
  521. sprintf(dar_str + n, "paddr: %016llx ", pa);
  522. } else if (pa) {
  523. sprintf(pa_str, " paddr: %016llx", pa);
  524. }
  525. printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
  526. level, evt->cpu, sevstr, in_guest ? "Guest" : "",
  527. err_type, subtype, dar_str,
  528. evt->disposition == MCE_DISPOSITION_RECOVERED ?
  529. "Recovered" : "Not recovered");
  530. if (in_guest || user_mode) {
  531. printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
  532. level, evt->cpu, current->pid, current->comm,
  533. in_guest ? "Guest " : "", evt->srr0, pa_str);
  534. } else {
  535. printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
  536. level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
  537. }
  538. printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
  539. subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
  540. mc_error_class[evt->error_class] : "Unknown";
  541. printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
  542. #ifdef CONFIG_PPC_64S_HASH_MMU
  543. /* Display faulty slb contents for SLB errors. */
  544. if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
  545. slb_dump_contents(local_paca->mce_faulty_slbs);
  546. #endif
  547. }
  548. EXPORT_SYMBOL_GPL(machine_check_print_event_info);
  549. /*
  550. * This function is called in real mode. Strictly no printk's please.
  551. *
  552. * regs->nip and regs->msr contains srr0 and ssr1.
  553. */
  554. DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
  555. {
  556. long handled = 0;
  557. hv_nmi_check_nonrecoverable(regs);
  558. /*
  559. * See if platform is capable of handling machine check.
  560. */
  561. if (ppc_md.machine_check_early)
  562. handled = ppc_md.machine_check_early(regs);
  563. return handled;
  564. }
  565. /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
  566. static enum {
  567. DTRIG_UNKNOWN,
  568. DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
  569. DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
  570. } hmer_debug_trig_function;
  571. static int init_debug_trig_function(void)
  572. {
  573. int pvr;
  574. struct device_node *cpun;
  575. struct property *prop = NULL;
  576. const char *str;
  577. /* First look in the device tree */
  578. preempt_disable();
  579. cpun = of_get_cpu_node(smp_processor_id(), NULL);
  580. if (cpun) {
  581. of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
  582. prop, str) {
  583. if (strcmp(str, "bit17-vector-ci-load") == 0)
  584. hmer_debug_trig_function = DTRIG_VECTOR_CI;
  585. else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
  586. hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
  587. }
  588. of_node_put(cpun);
  589. }
  590. preempt_enable();
  591. /* If we found the property, don't look at PVR */
  592. if (prop)
  593. goto out;
  594. pvr = mfspr(SPRN_PVR);
  595. /* Check for POWER9 Nimbus (scale-out) */
  596. if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
  597. /* DD2.2 and later */
  598. if ((pvr & 0xfff) >= 0x202)
  599. hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
  600. /* DD2.0 and DD2.1 - used for vector CI load emulation */
  601. else if ((pvr & 0xfff) >= 0x200)
  602. hmer_debug_trig_function = DTRIG_VECTOR_CI;
  603. }
  604. out:
  605. switch (hmer_debug_trig_function) {
  606. case DTRIG_VECTOR_CI:
  607. pr_debug("HMI debug trigger used for vector CI load\n");
  608. break;
  609. case DTRIG_SUSPEND_ESCAPE:
  610. pr_debug("HMI debug trigger used for TM suspend escape\n");
  611. break;
  612. default:
  613. break;
  614. }
  615. return 0;
  616. }
  617. __initcall(init_debug_trig_function);
  618. /*
  619. * Handle HMIs that occur as a result of a debug trigger.
  620. * Return values:
  621. * -1 means this is not a HMI cause that we know about
  622. * 0 means no further handling is required
  623. * 1 means further handling is required
  624. */
  625. long hmi_handle_debugtrig(struct pt_regs *regs)
  626. {
  627. unsigned long hmer = mfspr(SPRN_HMER);
  628. long ret = 0;
  629. /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
  630. if (!((hmer & HMER_DEBUG_TRIG)
  631. && hmer_debug_trig_function != DTRIG_UNKNOWN))
  632. return -1;
  633. hmer &= ~HMER_DEBUG_TRIG;
  634. /* HMER is a write-AND register */
  635. mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
  636. switch (hmer_debug_trig_function) {
  637. case DTRIG_VECTOR_CI:
  638. /*
  639. * Now to avoid problems with soft-disable we
  640. * only do the emulation if we are coming from
  641. * host user space
  642. */
  643. if (regs && user_mode(regs))
  644. ret = local_paca->hmi_p9_special_emu = 1;
  645. break;
  646. default:
  647. break;
  648. }
  649. /*
  650. * See if any other HMI causes remain to be handled
  651. */
  652. if (hmer & mfspr(SPRN_HMEER))
  653. return -1;
  654. return ret;
  655. }
  656. /*
  657. * Return values:
  658. */
  659. DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
  660. {
  661. int ret;
  662. local_paca->hmi_irqs++;
  663. ret = hmi_handle_debugtrig(regs);
  664. if (ret >= 0)
  665. return ret;
  666. wait_for_subcore_guest_exit();
  667. if (ppc_md.hmi_exception_early)
  668. ppc_md.hmi_exception_early(regs);
  669. wait_for_tb_resync();
  670. return 1;
  671. }
  672. void __init mce_init(void)
  673. {
  674. struct mce_info *mce_info;
  675. u64 limit;
  676. int i;
  677. limit = min(ppc64_bolted_size(), ppc64_rma_size);
  678. for_each_possible_cpu(i) {
  679. mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
  680. __alignof__(*mce_info),
  681. MEMBLOCK_LOW_LIMIT,
  682. limit, early_cpu_to_node(i));
  683. if (!mce_info)
  684. goto err;
  685. paca_ptrs[i]->mce_info = mce_info;
  686. }
  687. return;
  688. err:
  689. panic("Failed to allocate memory for MCE event data\n");
  690. }