ghes.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260
  1. /*
  2. * APEI Generic Hardware Error Source support
  3. *
  4. * Generic Hardware Error Source provides a way to report platform
  5. * hardware errors (such as that from chipset). It works in so called
  6. * "Firmware First" mode, that is, hardware errors are reported to
  7. * firmware firstly, then reported to Linux by firmware. This way,
  8. * some non-standard hardware error registers or non-standard hardware
  9. * link can be checked by firmware to produce more hardware error
  10. * information for Linux.
  11. *
  12. * For more information about Generic Hardware Error Source, please
  13. * refer to ACPI Specification version 4.0, section 17.3.2.6
  14. *
  15. * Copyright 2010,2011 Intel Corp.
  16. * Author: Huang Ying <ying.huang@intel.com>
  17. *
  18. * This program is free software; you can redistribute it and/or
  19. * modify it under the terms of the GNU General Public License version
  20. * 2 as published by the Free Software Foundation;
  21. *
  22. * This program is distributed in the hope that it will be useful,
  23. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  25. * GNU General Public License for more details.
  26. */
  27. #include <linux/kernel.h>
  28. #include <linux/moduleparam.h>
  29. #include <linux/init.h>
  30. #include <linux/acpi.h>
  31. #include <linux/io.h>
  32. #include <linux/interrupt.h>
  33. #include <linux/timer.h>
  34. #include <linux/cper.h>
  35. #include <linux/platform_device.h>
  36. #include <linux/mutex.h>
  37. #include <linux/ratelimit.h>
  38. #include <linux/vmalloc.h>
  39. #include <linux/irq_work.h>
  40. #include <linux/llist.h>
  41. #include <linux/genalloc.h>
  42. #include <linux/pci.h>
  43. #include <linux/aer.h>
  44. #include <linux/nmi.h>
  45. #include <linux/sched/clock.h>
  46. #include <linux/uuid.h>
  47. #include <linux/ras.h>
  48. #include <acpi/actbl1.h>
  49. #include <acpi/ghes.h>
  50. #include <acpi/apei.h>
  51. #include <asm/fixmap.h>
  52. #include <asm/tlbflush.h>
  53. #include <ras/ras_event.h>
  54. #include "apei-internal.h"
  55. #define GHES_PFX "GHES: "
  56. #define GHES_ESTATUS_MAX_SIZE 65536
  57. #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
  58. #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
  59. /* This is just an estimation for memory pool allocation */
  60. #define GHES_ESTATUS_CACHE_AVG_SIZE 512
  61. #define GHES_ESTATUS_CACHES_SIZE 4
  62. #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
  63. /* Prevent too many caches are allocated because of RCU */
  64. #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
  65. #define GHES_ESTATUS_CACHE_LEN(estatus_len) \
  66. (sizeof(struct ghes_estatus_cache) + (estatus_len))
  67. #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
  68. ((struct acpi_hest_generic_status *) \
  69. ((struct ghes_estatus_cache *)(estatus_cache) + 1))
  70. #define GHES_ESTATUS_NODE_LEN(estatus_len) \
  71. (sizeof(struct ghes_estatus_node) + (estatus_len))
  72. #define GHES_ESTATUS_FROM_NODE(estatus_node) \
  73. ((struct acpi_hest_generic_status *) \
  74. ((struct ghes_estatus_node *)(estatus_node) + 1))
  75. static inline bool is_hest_type_generic_v2(struct ghes *ghes)
  76. {
  77. return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
  78. }
  79. /*
  80. * This driver isn't really modular, however for the time being,
  81. * continuing to use module_param is the easiest way to remain
  82. * compatible with existing boot arg use cases.
  83. */
  84. bool ghes_disable;
  85. module_param_named(disable, ghes_disable, bool, 0);
  86. /*
  87. * All error sources notified with HED (Hardware Error Device) share a
  88. * single notifier callback, so they need to be linked and checked one
  89. * by one. This holds true for NMI too.
  90. *
  91. * RCU is used for these lists, so ghes_list_mutex is only used for
  92. * list changing, not for traversing.
  93. */
  94. static LIST_HEAD(ghes_hed);
  95. static DEFINE_MUTEX(ghes_list_mutex);
  96. /*
  97. * Because the memory area used to transfer hardware error information
  98. * from BIOS to Linux can be determined only in NMI, IRQ or timer
  99. * handler, but general ioremap can not be used in atomic context, so
  100. * the fixmap is used instead.
  101. *
  102. * These 2 spinlocks are used to prevent the fixmap entries from being used
  103. * simultaneously.
  104. */
  105. static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
  106. static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
  107. static struct gen_pool *ghes_estatus_pool;
  108. static unsigned long ghes_estatus_pool_size_request;
  109. static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
  110. static atomic_t ghes_estatus_cache_alloced;
  111. static int ghes_panic_timeout __read_mostly = 30;
  112. static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
  113. {
  114. phys_addr_t paddr;
  115. pgprot_t prot;
  116. paddr = pfn << PAGE_SHIFT;
  117. prot = arch_apei_get_mem_attribute(paddr);
  118. __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
  119. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
  120. }
  121. static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
  122. {
  123. phys_addr_t paddr;
  124. pgprot_t prot;
  125. paddr = pfn << PAGE_SHIFT;
  126. prot = arch_apei_get_mem_attribute(paddr);
  127. __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
  128. return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
  129. }
  130. static void ghes_iounmap_nmi(void)
  131. {
  132. clear_fixmap(FIX_APEI_GHES_NMI);
  133. }
  134. static void ghes_iounmap_irq(void)
  135. {
  136. clear_fixmap(FIX_APEI_GHES_IRQ);
  137. }
  138. static int ghes_estatus_pool_init(void)
  139. {
  140. ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
  141. if (!ghes_estatus_pool)
  142. return -ENOMEM;
  143. return 0;
  144. }
  145. static void ghes_estatus_pool_free_chunk(struct gen_pool *pool,
  146. struct gen_pool_chunk *chunk,
  147. void *data)
  148. {
  149. vfree((void *)chunk->start_addr);
  150. }
  151. static void ghes_estatus_pool_exit(void)
  152. {
  153. gen_pool_for_each_chunk(ghes_estatus_pool,
  154. ghes_estatus_pool_free_chunk, NULL);
  155. gen_pool_destroy(ghes_estatus_pool);
  156. }
  157. static int ghes_estatus_pool_expand(unsigned long len)
  158. {
  159. unsigned long size, addr;
  160. ghes_estatus_pool_size_request += PAGE_ALIGN(len);
  161. size = gen_pool_size(ghes_estatus_pool);
  162. if (size >= ghes_estatus_pool_size_request)
  163. return 0;
  164. addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
  165. if (!addr)
  166. return -ENOMEM;
  167. /*
  168. * New allocation must be visible in all pgd before it can be found by
  169. * an NMI allocating from the pool.
  170. */
  171. vmalloc_sync_mappings();
  172. return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1);
  173. }
  174. static int map_gen_v2(struct ghes *ghes)
  175. {
  176. return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
  177. }
  178. static void unmap_gen_v2(struct ghes *ghes)
  179. {
  180. apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
  181. }
  182. static struct ghes *ghes_new(struct acpi_hest_generic *generic)
  183. {
  184. struct ghes *ghes;
  185. unsigned int error_block_length;
  186. int rc;
  187. ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
  188. if (!ghes)
  189. return ERR_PTR(-ENOMEM);
  190. ghes->generic = generic;
  191. if (is_hest_type_generic_v2(ghes)) {
  192. rc = map_gen_v2(ghes);
  193. if (rc)
  194. goto err_free;
  195. }
  196. rc = apei_map_generic_address(&generic->error_status_address);
  197. if (rc)
  198. goto err_unmap_read_ack_addr;
  199. error_block_length = generic->error_block_length;
  200. if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
  201. pr_warning(FW_WARN GHES_PFX
  202. "Error status block length is too long: %u for "
  203. "generic hardware error source: %d.\n",
  204. error_block_length, generic->header.source_id);
  205. error_block_length = GHES_ESTATUS_MAX_SIZE;
  206. }
  207. ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
  208. if (!ghes->estatus) {
  209. rc = -ENOMEM;
  210. goto err_unmap_status_addr;
  211. }
  212. return ghes;
  213. err_unmap_status_addr:
  214. apei_unmap_generic_address(&generic->error_status_address);
  215. err_unmap_read_ack_addr:
  216. if (is_hest_type_generic_v2(ghes))
  217. unmap_gen_v2(ghes);
  218. err_free:
  219. kfree(ghes);
  220. return ERR_PTR(rc);
  221. }
  222. static void ghes_fini(struct ghes *ghes)
  223. {
  224. kfree(ghes->estatus);
  225. apei_unmap_generic_address(&ghes->generic->error_status_address);
  226. if (is_hest_type_generic_v2(ghes))
  227. unmap_gen_v2(ghes);
  228. }
  229. static inline int ghes_severity(int severity)
  230. {
  231. switch (severity) {
  232. case CPER_SEV_INFORMATIONAL:
  233. return GHES_SEV_NO;
  234. case CPER_SEV_CORRECTED:
  235. return GHES_SEV_CORRECTED;
  236. case CPER_SEV_RECOVERABLE:
  237. return GHES_SEV_RECOVERABLE;
  238. case CPER_SEV_FATAL:
  239. return GHES_SEV_PANIC;
  240. default:
  241. /* Unknown, go panic */
  242. return GHES_SEV_PANIC;
  243. }
  244. }
  245. static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
  246. int from_phys)
  247. {
  248. void __iomem *vaddr;
  249. unsigned long flags = 0;
  250. int in_nmi = in_nmi();
  251. u64 offset;
  252. u32 trunk;
  253. while (len > 0) {
  254. offset = paddr - (paddr & PAGE_MASK);
  255. if (in_nmi) {
  256. raw_spin_lock(&ghes_ioremap_lock_nmi);
  257. vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
  258. } else {
  259. spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
  260. vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
  261. }
  262. trunk = PAGE_SIZE - offset;
  263. trunk = min(trunk, len);
  264. if (from_phys)
  265. memcpy_fromio(buffer, vaddr + offset, trunk);
  266. else
  267. memcpy_toio(vaddr + offset, buffer, trunk);
  268. len -= trunk;
  269. paddr += trunk;
  270. buffer += trunk;
  271. if (in_nmi) {
  272. ghes_iounmap_nmi();
  273. raw_spin_unlock(&ghes_ioremap_lock_nmi);
  274. } else {
  275. ghes_iounmap_irq();
  276. spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
  277. }
  278. }
  279. }
  280. static int ghes_read_estatus(struct ghes *ghes, int silent)
  281. {
  282. struct acpi_hest_generic *g = ghes->generic;
  283. u64 buf_paddr;
  284. u32 len;
  285. int rc;
  286. rc = apei_read(&buf_paddr, &g->error_status_address);
  287. if (rc) {
  288. if (!silent && printk_ratelimit())
  289. pr_warning(FW_WARN GHES_PFX
  290. "Failed to read error status block address for hardware error source: %d.\n",
  291. g->header.source_id);
  292. return -EIO;
  293. }
  294. if (!buf_paddr)
  295. return -ENOENT;
  296. ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
  297. sizeof(*ghes->estatus), 1);
  298. if (!ghes->estatus->block_status)
  299. return -ENOENT;
  300. ghes->buffer_paddr = buf_paddr;
  301. ghes->flags |= GHES_TO_CLEAR;
  302. rc = -EIO;
  303. len = cper_estatus_len(ghes->estatus);
  304. if (len < sizeof(*ghes->estatus))
  305. goto err_read_block;
  306. if (len > ghes->generic->error_block_length)
  307. goto err_read_block;
  308. if (cper_estatus_check_header(ghes->estatus))
  309. goto err_read_block;
  310. ghes_copy_tofrom_phys(ghes->estatus + 1,
  311. buf_paddr + sizeof(*ghes->estatus),
  312. len - sizeof(*ghes->estatus), 1);
  313. if (cper_estatus_check(ghes->estatus))
  314. goto err_read_block;
  315. rc = 0;
  316. err_read_block:
  317. if (rc && !silent && printk_ratelimit())
  318. pr_warning(FW_WARN GHES_PFX
  319. "Failed to read error status block!\n");
  320. return rc;
  321. }
  322. static void ghes_clear_estatus(struct ghes *ghes)
  323. {
  324. ghes->estatus->block_status = 0;
  325. if (!(ghes->flags & GHES_TO_CLEAR))
  326. return;
  327. ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
  328. sizeof(ghes->estatus->block_status), 0);
  329. ghes->flags &= ~GHES_TO_CLEAR;
  330. }
  331. static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
  332. {
  333. #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
  334. unsigned long pfn;
  335. int flags = -1;
  336. int sec_sev = ghes_severity(gdata->error_severity);
  337. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  338. if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
  339. return;
  340. pfn = mem_err->physical_addr >> PAGE_SHIFT;
  341. if (!pfn_valid(pfn)) {
  342. pr_warn_ratelimited(FW_WARN GHES_PFX
  343. "Invalid address in generic error data: %#llx\n",
  344. mem_err->physical_addr);
  345. return;
  346. }
  347. /* iff following two events can be handled properly by now */
  348. if (sec_sev == GHES_SEV_CORRECTED &&
  349. (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
  350. flags = MF_SOFT_OFFLINE;
  351. if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
  352. flags = 0;
  353. if (flags != -1)
  354. memory_failure_queue(pfn, flags);
  355. #endif
  356. }
  357. /*
  358. * PCIe AER errors need to be sent to the AER driver for reporting and
  359. * recovery. The GHES severities map to the following AER severities and
  360. * require the following handling:
  361. *
  362. * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
  363. * These need to be reported by the AER driver but no recovery is
  364. * necessary.
  365. * GHES_SEV_RECOVERABLE -> AER_NONFATAL
  366. * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
  367. * These both need to be reported and recovered from by the AER driver.
  368. * GHES_SEV_PANIC does not make it to this handling since the kernel must
  369. * panic.
  370. */
  371. static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
  372. {
  373. #ifdef CONFIG_ACPI_APEI_PCIEAER
  374. struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
  375. if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
  376. pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
  377. unsigned int devfn;
  378. int aer_severity;
  379. devfn = PCI_DEVFN(pcie_err->device_id.device,
  380. pcie_err->device_id.function);
  381. aer_severity = cper_severity_to_aer(gdata->error_severity);
  382. /*
  383. * If firmware reset the component to contain
  384. * the error, we must reinitialize it before
  385. * use, so treat it as a fatal AER error.
  386. */
  387. if (gdata->flags & CPER_SEC_RESET)
  388. aer_severity = AER_FATAL;
  389. aer_recover_queue(pcie_err->device_id.segment,
  390. pcie_err->device_id.bus,
  391. devfn, aer_severity,
  392. (struct aer_capability_regs *)
  393. pcie_err->aer_info);
  394. }
  395. #endif
  396. }
  397. static void ghes_do_proc(struct ghes *ghes,
  398. const struct acpi_hest_generic_status *estatus)
  399. {
  400. int sev, sec_sev;
  401. struct acpi_hest_generic_data *gdata;
  402. guid_t *sec_type;
  403. guid_t *fru_id = &NULL_UUID_LE;
  404. char *fru_text = "";
  405. sev = ghes_severity(estatus->error_severity);
  406. apei_estatus_for_each_section(estatus, gdata) {
  407. sec_type = (guid_t *)gdata->section_type;
  408. sec_sev = ghes_severity(gdata->error_severity);
  409. if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
  410. fru_id = (guid_t *)gdata->fru_id;
  411. if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
  412. fru_text = gdata->fru_text;
  413. if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
  414. struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  415. ghes_edac_report_mem_error(sev, mem_err);
  416. arch_apei_report_mem_error(sev, mem_err);
  417. ghes_handle_memory_failure(gdata, sev);
  418. }
  419. else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
  420. ghes_handle_aer(gdata);
  421. }
  422. else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  423. struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
  424. log_arm_hw_error(err);
  425. } else {
  426. void *err = acpi_hest_get_payload(gdata);
  427. log_non_standard_event(sec_type, fru_id, fru_text,
  428. sec_sev, err,
  429. gdata->error_data_length);
  430. }
  431. }
  432. }
  433. static void __ghes_print_estatus(const char *pfx,
  434. const struct acpi_hest_generic *generic,
  435. const struct acpi_hest_generic_status *estatus)
  436. {
  437. static atomic_t seqno;
  438. unsigned int curr_seqno;
  439. char pfx_seq[64];
  440. if (pfx == NULL) {
  441. if (ghes_severity(estatus->error_severity) <=
  442. GHES_SEV_CORRECTED)
  443. pfx = KERN_WARNING;
  444. else
  445. pfx = KERN_ERR;
  446. }
  447. curr_seqno = atomic_inc_return(&seqno);
  448. snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
  449. printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
  450. pfx_seq, generic->header.source_id);
  451. cper_estatus_print(pfx_seq, estatus);
  452. }
  453. static int ghes_print_estatus(const char *pfx,
  454. const struct acpi_hest_generic *generic,
  455. const struct acpi_hest_generic_status *estatus)
  456. {
  457. /* Not more than 2 messages every 5 seconds */
  458. static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
  459. static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
  460. struct ratelimit_state *ratelimit;
  461. if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
  462. ratelimit = &ratelimit_corrected;
  463. else
  464. ratelimit = &ratelimit_uncorrected;
  465. if (__ratelimit(ratelimit)) {
  466. __ghes_print_estatus(pfx, generic, estatus);
  467. return 1;
  468. }
  469. return 0;
  470. }
  471. /*
  472. * GHES error status reporting throttle, to report more kinds of
  473. * errors, instead of just most frequently occurred errors.
  474. */
  475. static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
  476. {
  477. u32 len;
  478. int i, cached = 0;
  479. unsigned long long now;
  480. struct ghes_estatus_cache *cache;
  481. struct acpi_hest_generic_status *cache_estatus;
  482. len = cper_estatus_len(estatus);
  483. rcu_read_lock();
  484. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  485. cache = rcu_dereference(ghes_estatus_caches[i]);
  486. if (cache == NULL)
  487. continue;
  488. if (len != cache->estatus_len)
  489. continue;
  490. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  491. if (memcmp(estatus, cache_estatus, len))
  492. continue;
  493. atomic_inc(&cache->count);
  494. now = sched_clock();
  495. if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
  496. cached = 1;
  497. break;
  498. }
  499. rcu_read_unlock();
  500. return cached;
  501. }
  502. static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
  503. struct acpi_hest_generic *generic,
  504. struct acpi_hest_generic_status *estatus)
  505. {
  506. int alloced;
  507. u32 len, cache_len;
  508. struct ghes_estatus_cache *cache;
  509. struct acpi_hest_generic_status *cache_estatus;
  510. alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
  511. if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
  512. atomic_dec(&ghes_estatus_cache_alloced);
  513. return NULL;
  514. }
  515. len = cper_estatus_len(estatus);
  516. cache_len = GHES_ESTATUS_CACHE_LEN(len);
  517. cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
  518. if (!cache) {
  519. atomic_dec(&ghes_estatus_cache_alloced);
  520. return NULL;
  521. }
  522. cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
  523. memcpy(cache_estatus, estatus, len);
  524. cache->estatus_len = len;
  525. atomic_set(&cache->count, 0);
  526. cache->generic = generic;
  527. cache->time_in = sched_clock();
  528. return cache;
  529. }
  530. static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
  531. {
  532. u32 len;
  533. len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
  534. len = GHES_ESTATUS_CACHE_LEN(len);
  535. gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
  536. atomic_dec(&ghes_estatus_cache_alloced);
  537. }
  538. static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
  539. {
  540. struct ghes_estatus_cache *cache;
  541. cache = container_of(head, struct ghes_estatus_cache, rcu);
  542. ghes_estatus_cache_free(cache);
  543. }
  544. static void ghes_estatus_cache_add(
  545. struct acpi_hest_generic *generic,
  546. struct acpi_hest_generic_status *estatus)
  547. {
  548. int i, slot = -1, count;
  549. unsigned long long now, duration, period, max_period = 0;
  550. struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
  551. new_cache = ghes_estatus_cache_alloc(generic, estatus);
  552. if (new_cache == NULL)
  553. return;
  554. rcu_read_lock();
  555. now = sched_clock();
  556. for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
  557. cache = rcu_dereference(ghes_estatus_caches[i]);
  558. if (cache == NULL) {
  559. slot = i;
  560. slot_cache = NULL;
  561. break;
  562. }
  563. duration = now - cache->time_in;
  564. if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
  565. slot = i;
  566. slot_cache = cache;
  567. break;
  568. }
  569. count = atomic_read(&cache->count);
  570. period = duration;
  571. do_div(period, (count + 1));
  572. if (period > max_period) {
  573. max_period = period;
  574. slot = i;
  575. slot_cache = cache;
  576. }
  577. }
  578. /* new_cache must be put into array after its contents are written */
  579. smp_wmb();
  580. if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
  581. slot_cache, new_cache) == slot_cache) {
  582. if (slot_cache)
  583. call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
  584. } else
  585. ghes_estatus_cache_free(new_cache);
  586. rcu_read_unlock();
  587. }
  588. static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
  589. {
  590. int rc;
  591. u64 val = 0;
  592. rc = apei_read(&val, &gv2->read_ack_register);
  593. if (rc)
  594. return rc;
  595. val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
  596. val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
  597. return apei_write(val, &gv2->read_ack_register);
  598. }
  599. static void __ghes_panic(struct ghes *ghes)
  600. {
  601. __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
  602. ghes_clear_estatus(ghes);
  603. /* reboot to log the error! */
  604. if (!panic_timeout)
  605. panic_timeout = ghes_panic_timeout;
  606. panic("Fatal hardware error!");
  607. }
  608. static int ghes_proc(struct ghes *ghes)
  609. {
  610. int rc;
  611. rc = ghes_read_estatus(ghes, 0);
  612. if (rc)
  613. goto out;
  614. if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
  615. __ghes_panic(ghes);
  616. }
  617. if (!ghes_estatus_cached(ghes->estatus)) {
  618. if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
  619. ghes_estatus_cache_add(ghes->generic, ghes->estatus);
  620. }
  621. ghes_do_proc(ghes, ghes->estatus);
  622. out:
  623. ghes_clear_estatus(ghes);
  624. if (rc == -ENOENT)
  625. return rc;
  626. /*
  627. * GHESv2 type HEST entries introduce support for error acknowledgment,
  628. * so only acknowledge the error if this support is present.
  629. */
  630. if (is_hest_type_generic_v2(ghes))
  631. return ghes_ack_error(ghes->generic_v2);
  632. return rc;
  633. }
  634. static void ghes_add_timer(struct ghes *ghes)
  635. {
  636. struct acpi_hest_generic *g = ghes->generic;
  637. unsigned long expire;
  638. if (!g->notify.poll_interval) {
  639. pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
  640. g->header.source_id);
  641. return;
  642. }
  643. expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
  644. ghes->timer.expires = round_jiffies_relative(expire);
  645. add_timer(&ghes->timer);
  646. }
  647. static void ghes_poll_func(struct timer_list *t)
  648. {
  649. struct ghes *ghes = from_timer(ghes, t, timer);
  650. ghes_proc(ghes);
  651. if (!(ghes->flags & GHES_EXITING))
  652. ghes_add_timer(ghes);
  653. }
  654. static irqreturn_t ghes_irq_func(int irq, void *data)
  655. {
  656. struct ghes *ghes = data;
  657. int rc;
  658. rc = ghes_proc(ghes);
  659. if (rc)
  660. return IRQ_NONE;
  661. return IRQ_HANDLED;
  662. }
  663. static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
  664. void *data)
  665. {
  666. struct ghes *ghes;
  667. int ret = NOTIFY_DONE;
  668. rcu_read_lock();
  669. list_for_each_entry_rcu(ghes, &ghes_hed, list) {
  670. if (!ghes_proc(ghes))
  671. ret = NOTIFY_OK;
  672. }
  673. rcu_read_unlock();
  674. return ret;
  675. }
  676. static struct notifier_block ghes_notifier_hed = {
  677. .notifier_call = ghes_notify_hed,
  678. };
  679. #ifdef CONFIG_ACPI_APEI_SEA
  680. static LIST_HEAD(ghes_sea);
  681. /*
  682. * Return 0 only if one of the SEA error sources successfully reported an error
  683. * record sent from the firmware.
  684. */
  685. int ghes_notify_sea(void)
  686. {
  687. struct ghes *ghes;
  688. int ret = -ENOENT;
  689. rcu_read_lock();
  690. list_for_each_entry_rcu(ghes, &ghes_sea, list) {
  691. if (!ghes_proc(ghes))
  692. ret = 0;
  693. }
  694. rcu_read_unlock();
  695. return ret;
  696. }
  697. static void ghes_sea_add(struct ghes *ghes)
  698. {
  699. mutex_lock(&ghes_list_mutex);
  700. list_add_rcu(&ghes->list, &ghes_sea);
  701. mutex_unlock(&ghes_list_mutex);
  702. }
  703. static void ghes_sea_remove(struct ghes *ghes)
  704. {
  705. mutex_lock(&ghes_list_mutex);
  706. list_del_rcu(&ghes->list);
  707. mutex_unlock(&ghes_list_mutex);
  708. synchronize_rcu();
  709. }
  710. #else /* CONFIG_ACPI_APEI_SEA */
  711. static inline void ghes_sea_add(struct ghes *ghes) { }
  712. static inline void ghes_sea_remove(struct ghes *ghes) { }
  713. #endif /* CONFIG_ACPI_APEI_SEA */
  714. #ifdef CONFIG_HAVE_ACPI_APEI_NMI
  715. /*
  716. * printk is not safe in NMI context. So in NMI handler, we allocate
  717. * required memory from lock-less memory allocator
  718. * (ghes_estatus_pool), save estatus into it, put them into lock-less
  719. * list (ghes_estatus_llist), then delay printk into IRQ context via
  720. * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
  721. * required pool size by all NMI error source.
  722. */
  723. static struct llist_head ghes_estatus_llist;
  724. static struct irq_work ghes_proc_irq_work;
  725. /*
  726. * NMI may be triggered on any CPU, so ghes_in_nmi is used for
  727. * having only one concurrent reader.
  728. */
  729. static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
  730. static LIST_HEAD(ghes_nmi);
  731. static void ghes_proc_in_irq(struct irq_work *irq_work)
  732. {
  733. struct llist_node *llnode, *next;
  734. struct ghes_estatus_node *estatus_node;
  735. struct acpi_hest_generic *generic;
  736. struct acpi_hest_generic_status *estatus;
  737. u32 len, node_len;
  738. llnode = llist_del_all(&ghes_estatus_llist);
  739. /*
  740. * Because the time order of estatus in list is reversed,
  741. * revert it back to proper order.
  742. */
  743. llnode = llist_reverse_order(llnode);
  744. while (llnode) {
  745. next = llnode->next;
  746. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  747. llnode);
  748. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  749. len = cper_estatus_len(estatus);
  750. node_len = GHES_ESTATUS_NODE_LEN(len);
  751. ghes_do_proc(estatus_node->ghes, estatus);
  752. if (!ghes_estatus_cached(estatus)) {
  753. generic = estatus_node->generic;
  754. if (ghes_print_estatus(NULL, generic, estatus))
  755. ghes_estatus_cache_add(generic, estatus);
  756. }
  757. gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
  758. node_len);
  759. llnode = next;
  760. }
  761. }
  762. static void ghes_print_queued_estatus(void)
  763. {
  764. struct llist_node *llnode;
  765. struct ghes_estatus_node *estatus_node;
  766. struct acpi_hest_generic *generic;
  767. struct acpi_hest_generic_status *estatus;
  768. llnode = llist_del_all(&ghes_estatus_llist);
  769. /*
  770. * Because the time order of estatus in list is reversed,
  771. * revert it back to proper order.
  772. */
  773. llnode = llist_reverse_order(llnode);
  774. while (llnode) {
  775. estatus_node = llist_entry(llnode, struct ghes_estatus_node,
  776. llnode);
  777. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  778. generic = estatus_node->generic;
  779. ghes_print_estatus(NULL, generic, estatus);
  780. llnode = llnode->next;
  781. }
  782. }
  783. /* Save estatus for further processing in IRQ context */
  784. static void __process_error(struct ghes *ghes)
  785. {
  786. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  787. u32 len, node_len;
  788. struct ghes_estatus_node *estatus_node;
  789. struct acpi_hest_generic_status *estatus;
  790. if (ghes_estatus_cached(ghes->estatus))
  791. return;
  792. len = cper_estatus_len(ghes->estatus);
  793. node_len = GHES_ESTATUS_NODE_LEN(len);
  794. estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
  795. if (!estatus_node)
  796. return;
  797. estatus_node->ghes = ghes;
  798. estatus_node->generic = ghes->generic;
  799. estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
  800. memcpy(estatus, ghes->estatus, len);
  801. llist_add(&estatus_node->llnode, &ghes_estatus_llist);
  802. #endif
  803. }
  804. static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
  805. {
  806. struct ghes *ghes;
  807. int sev, ret = NMI_DONE;
  808. if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
  809. return ret;
  810. list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
  811. if (ghes_read_estatus(ghes, 1)) {
  812. ghes_clear_estatus(ghes);
  813. continue;
  814. } else {
  815. ret = NMI_HANDLED;
  816. }
  817. sev = ghes_severity(ghes->estatus->error_severity);
  818. if (sev >= GHES_SEV_PANIC) {
  819. ghes_print_queued_estatus();
  820. __ghes_panic(ghes);
  821. }
  822. if (!(ghes->flags & GHES_TO_CLEAR))
  823. continue;
  824. __process_error(ghes);
  825. ghes_clear_estatus(ghes);
  826. }
  827. #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
  828. if (ret == NMI_HANDLED)
  829. irq_work_queue(&ghes_proc_irq_work);
  830. #endif
  831. atomic_dec(&ghes_in_nmi);
  832. return ret;
  833. }
  834. static unsigned long ghes_esource_prealloc_size(
  835. const struct acpi_hest_generic *generic)
  836. {
  837. unsigned long block_length, prealloc_records, prealloc_size;
  838. block_length = min_t(unsigned long, generic->error_block_length,
  839. GHES_ESTATUS_MAX_SIZE);
  840. prealloc_records = max_t(unsigned long,
  841. generic->records_to_preallocate, 1);
  842. prealloc_size = min_t(unsigned long, block_length * prealloc_records,
  843. GHES_ESOURCE_PREALLOC_MAX_SIZE);
  844. return prealloc_size;
  845. }
  846. static void ghes_estatus_pool_shrink(unsigned long len)
  847. {
  848. ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
  849. }
  850. static void ghes_nmi_add(struct ghes *ghes)
  851. {
  852. unsigned long len;
  853. len = ghes_esource_prealloc_size(ghes->generic);
  854. ghes_estatus_pool_expand(len);
  855. mutex_lock(&ghes_list_mutex);
  856. if (list_empty(&ghes_nmi))
  857. register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
  858. list_add_rcu(&ghes->list, &ghes_nmi);
  859. mutex_unlock(&ghes_list_mutex);
  860. }
  861. static void ghes_nmi_remove(struct ghes *ghes)
  862. {
  863. unsigned long len;
  864. mutex_lock(&ghes_list_mutex);
  865. list_del_rcu(&ghes->list);
  866. if (list_empty(&ghes_nmi))
  867. unregister_nmi_handler(NMI_LOCAL, "ghes");
  868. mutex_unlock(&ghes_list_mutex);
  869. /*
  870. * To synchronize with NMI handler, ghes can only be
  871. * freed after NMI handler finishes.
  872. */
  873. synchronize_rcu();
  874. len = ghes_esource_prealloc_size(ghes->generic);
  875. ghes_estatus_pool_shrink(len);
  876. }
  877. static void ghes_nmi_init_cxt(void)
  878. {
  879. init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
  880. }
  881. #else /* CONFIG_HAVE_ACPI_APEI_NMI */
  882. static inline void ghes_nmi_add(struct ghes *ghes) { }
  883. static inline void ghes_nmi_remove(struct ghes *ghes) { }
  884. static inline void ghes_nmi_init_cxt(void) { }
  885. #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
  886. static int ghes_probe(struct platform_device *ghes_dev)
  887. {
  888. struct acpi_hest_generic *generic;
  889. struct ghes *ghes = NULL;
  890. int rc = -EINVAL;
  891. generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
  892. if (!generic->enabled)
  893. return -ENODEV;
  894. switch (generic->notify.type) {
  895. case ACPI_HEST_NOTIFY_POLLED:
  896. case ACPI_HEST_NOTIFY_EXTERNAL:
  897. case ACPI_HEST_NOTIFY_SCI:
  898. case ACPI_HEST_NOTIFY_GSIV:
  899. case ACPI_HEST_NOTIFY_GPIO:
  900. break;
  901. case ACPI_HEST_NOTIFY_SEA:
  902. if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
  903. pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
  904. generic->header.source_id);
  905. rc = -ENOTSUPP;
  906. goto err;
  907. }
  908. break;
  909. case ACPI_HEST_NOTIFY_NMI:
  910. if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
  911. pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
  912. generic->header.source_id);
  913. goto err;
  914. }
  915. break;
  916. case ACPI_HEST_NOTIFY_LOCAL:
  917. pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
  918. generic->header.source_id);
  919. goto err;
  920. default:
  921. pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
  922. generic->notify.type, generic->header.source_id);
  923. goto err;
  924. }
  925. rc = -EIO;
  926. if (generic->error_block_length <
  927. sizeof(struct acpi_hest_generic_status)) {
  928. pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
  929. generic->error_block_length,
  930. generic->header.source_id);
  931. goto err;
  932. }
  933. ghes = ghes_new(generic);
  934. if (IS_ERR(ghes)) {
  935. rc = PTR_ERR(ghes);
  936. ghes = NULL;
  937. goto err;
  938. }
  939. switch (generic->notify.type) {
  940. case ACPI_HEST_NOTIFY_POLLED:
  941. timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
  942. ghes_add_timer(ghes);
  943. break;
  944. case ACPI_HEST_NOTIFY_EXTERNAL:
  945. /* External interrupt vector is GSI */
  946. rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
  947. if (rc) {
  948. pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
  949. generic->header.source_id);
  950. goto err;
  951. }
  952. rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
  953. "GHES IRQ", ghes);
  954. if (rc) {
  955. pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
  956. generic->header.source_id);
  957. goto err;
  958. }
  959. break;
  960. case ACPI_HEST_NOTIFY_SCI:
  961. case ACPI_HEST_NOTIFY_GSIV:
  962. case ACPI_HEST_NOTIFY_GPIO:
  963. mutex_lock(&ghes_list_mutex);
  964. if (list_empty(&ghes_hed))
  965. register_acpi_hed_notifier(&ghes_notifier_hed);
  966. list_add_rcu(&ghes->list, &ghes_hed);
  967. mutex_unlock(&ghes_list_mutex);
  968. break;
  969. case ACPI_HEST_NOTIFY_SEA:
  970. ghes_sea_add(ghes);
  971. break;
  972. case ACPI_HEST_NOTIFY_NMI:
  973. ghes_nmi_add(ghes);
  974. break;
  975. default:
  976. BUG();
  977. }
  978. platform_set_drvdata(ghes_dev, ghes);
  979. ghes_edac_register(ghes, &ghes_dev->dev);
  980. /* Handle any pending errors right away */
  981. ghes_proc(ghes);
  982. return 0;
  983. err:
  984. if (ghes) {
  985. ghes_fini(ghes);
  986. kfree(ghes);
  987. }
  988. return rc;
  989. }
  990. static int ghes_remove(struct platform_device *ghes_dev)
  991. {
  992. struct ghes *ghes;
  993. struct acpi_hest_generic *generic;
  994. ghes = platform_get_drvdata(ghes_dev);
  995. generic = ghes->generic;
  996. ghes->flags |= GHES_EXITING;
  997. switch (generic->notify.type) {
  998. case ACPI_HEST_NOTIFY_POLLED:
  999. del_timer_sync(&ghes->timer);
  1000. break;
  1001. case ACPI_HEST_NOTIFY_EXTERNAL:
  1002. free_irq(ghes->irq, ghes);
  1003. break;
  1004. case ACPI_HEST_NOTIFY_SCI:
  1005. case ACPI_HEST_NOTIFY_GSIV:
  1006. case ACPI_HEST_NOTIFY_GPIO:
  1007. mutex_lock(&ghes_list_mutex);
  1008. list_del_rcu(&ghes->list);
  1009. if (list_empty(&ghes_hed))
  1010. unregister_acpi_hed_notifier(&ghes_notifier_hed);
  1011. mutex_unlock(&ghes_list_mutex);
  1012. synchronize_rcu();
  1013. break;
  1014. case ACPI_HEST_NOTIFY_SEA:
  1015. ghes_sea_remove(ghes);
  1016. break;
  1017. case ACPI_HEST_NOTIFY_NMI:
  1018. ghes_nmi_remove(ghes);
  1019. break;
  1020. default:
  1021. BUG();
  1022. break;
  1023. }
  1024. ghes_fini(ghes);
  1025. ghes_edac_unregister(ghes);
  1026. kfree(ghes);
  1027. platform_set_drvdata(ghes_dev, NULL);
  1028. return 0;
  1029. }
  1030. static struct platform_driver ghes_platform_driver = {
  1031. .driver = {
  1032. .name = "GHES",
  1033. },
  1034. .probe = ghes_probe,
  1035. .remove = ghes_remove,
  1036. };
  1037. static int __init ghes_init(void)
  1038. {
  1039. int rc;
  1040. if (acpi_disabled)
  1041. return -ENODEV;
  1042. switch (hest_disable) {
  1043. case HEST_NOT_FOUND:
  1044. return -ENODEV;
  1045. case HEST_DISABLED:
  1046. pr_info(GHES_PFX "HEST is not enabled!\n");
  1047. return -EINVAL;
  1048. default:
  1049. break;
  1050. }
  1051. if (ghes_disable) {
  1052. pr_info(GHES_PFX "GHES is not enabled!\n");
  1053. return -EINVAL;
  1054. }
  1055. ghes_nmi_init_cxt();
  1056. rc = ghes_estatus_pool_init();
  1057. if (rc)
  1058. goto err;
  1059. rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
  1060. GHES_ESTATUS_CACHE_ALLOCED_MAX);
  1061. if (rc)
  1062. goto err_pool_exit;
  1063. rc = platform_driver_register(&ghes_platform_driver);
  1064. if (rc)
  1065. goto err_pool_exit;
  1066. rc = apei_osc_setup();
  1067. if (rc == 0 && osc_sb_apei_support_acked)
  1068. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
  1069. else if (rc == 0 && !osc_sb_apei_support_acked)
  1070. pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
  1071. else if (rc && osc_sb_apei_support_acked)
  1072. pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
  1073. else
  1074. pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
  1075. return 0;
  1076. err_pool_exit:
  1077. ghes_estatus_pool_exit();
  1078. err:
  1079. return rc;
  1080. }
  1081. device_initcall(ghes_init);