mce.c 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. /*
  2. * NFIT - Machine Check Handler
  3. *
  4. * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of version 2 of the GNU General Public License as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. */
  15. #include <linux/notifier.h>
  16. #include <linux/acpi.h>
  17. #include <linux/nd.h>
  18. #include <asm/mce.h>
  19. #include "nfit.h"
  20. static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
  21. void *data)
  22. {
  23. struct mce *mce = (struct mce *)data;
  24. struct acpi_nfit_desc *acpi_desc;
  25. struct nfit_spa *nfit_spa;
  26. /* We only care about uncorrectable memory errors */
  27. if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
  28. return NOTIFY_DONE;
  29. /* Verify the address reported in the MCE is valid. */
  30. if (!mce_usable_address(mce))
  31. return NOTIFY_DONE;
  32. /*
  33. * mce->addr contains the physical addr accessed that caused the
  34. * machine check. We need to walk through the list of NFITs, and see
  35. * if any of them matches that address, and only then start a scrub.
  36. */
  37. mutex_lock(&acpi_desc_lock);
  38. list_for_each_entry(acpi_desc, &acpi_descs, list) {
  39. struct device *dev = acpi_desc->dev;
  40. int found_match = 0;
  41. mutex_lock(&acpi_desc->init_mutex);
  42. list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
  43. struct acpi_nfit_system_address *spa = nfit_spa->spa;
  44. if (nfit_spa_type(spa) != NFIT_SPA_PM)
  45. continue;
  46. /* find the spa that covers the mce addr */
  47. if (spa->address > mce->addr)
  48. continue;
  49. if ((spa->address + spa->length - 1) < mce->addr)
  50. continue;
  51. found_match = 1;
  52. dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
  53. spa->range_index, spa->address, spa->length);
  54. /*
  55. * We can break at the first match because we're going
  56. * to rescan all the SPA ranges. There shouldn't be any
  57. * aliasing anyway.
  58. */
  59. break;
  60. }
  61. mutex_unlock(&acpi_desc->init_mutex);
  62. if (!found_match)
  63. continue;
  64. /* If this fails due to an -ENOMEM, there is little we can do */
  65. nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
  66. ALIGN(mce->addr, L1_CACHE_BYTES),
  67. L1_CACHE_BYTES);
  68. nvdimm_region_notify(nfit_spa->nd_region,
  69. NVDIMM_REVALIDATE_POISON);
  70. if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
  71. /*
  72. * We can ignore an -EBUSY here because if an ARS is
  73. * already in progress, just let that be the last
  74. * authoritative one
  75. */
  76. acpi_nfit_ars_rescan(acpi_desc, 0);
  77. }
  78. break;
  79. }
  80. mutex_unlock(&acpi_desc_lock);
  81. return NOTIFY_DONE;
  82. }
  83. static struct notifier_block nfit_mce_dec = {
  84. .notifier_call = nfit_handle_mce,
  85. .priority = MCE_PRIO_NFIT,
  86. };
  87. void nfit_mce_register(void)
  88. {
  89. mce_register_decode_chain(&nfit_mce_dec);
  90. }
  91. void nfit_mce_unregister(void)
  92. {
  93. mce_unregister_decode_chain(&nfit_mce_dec);
  94. }