aer.c 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Implement the AER root port service driver. The driver registers an IRQ
  4. * handler. When a root port triggers an AER interrupt, the IRQ handler
  5. * collects root port status and schedules work.
  6. *
  7. * Copyright (C) 2006 Intel Corp.
  8. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  9. * Zhang Yanmin (yanmin.zhang@intel.com)
  10. *
  11. * (C) Copyright 2009 Hewlett-Packard Development Company, L.P.
  12. * Andrew Patterson <andrew.patterson@hp.com>
  13. */
  14. #include <linux/cper.h>
  15. #include <linux/pci.h>
  16. #include <linux/pci-acpi.h>
  17. #include <linux/sched.h>
  18. #include <linux/kernel.h>
  19. #include <linux/errno.h>
  20. #include <linux/pm.h>
  21. #include <linux/init.h>
  22. #include <linux/interrupt.h>
  23. #include <linux/delay.h>
  24. #include <linux/kfifo.h>
  25. #include <linux/slab.h>
  26. #include <acpi/apei.h>
  27. #include <ras/ras_event.h>
  28. #include "../pci.h"
  29. #include "portdrv.h"
  30. #define AER_ERROR_SOURCES_MAX 100
  31. #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */
  32. #define AER_MAX_TYPEOF_UNCOR_ERRS 26 /* as per PCI_ERR_UNCOR_STATUS*/
  33. struct aer_err_source {
  34. unsigned int status;
  35. unsigned int id;
  36. };
  37. struct aer_rpc {
  38. struct pci_dev *rpd; /* Root Port device */
  39. struct work_struct dpc_handler;
  40. struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
  41. struct aer_err_info e_info;
  42. unsigned short prod_idx; /* Error Producer Index */
  43. unsigned short cons_idx; /* Error Consumer Index */
  44. int isr;
  45. spinlock_t e_lock; /*
  46. * Lock access to Error Status/ID Regs
  47. * and error producer/consumer index
  48. */
  49. struct mutex rpc_mutex; /*
  50. * only one thread could do
  51. * recovery on the same
  52. * root port hierarchy
  53. */
  54. };
  55. /* AER stats for the device */
  56. struct aer_stats {
  57. /*
  58. * Fields for all AER capable devices. They indicate the errors
  59. * "as seen by this device". Note that this may mean that if an
  60. * end point is causing problems, the AER counters may increment
  61. * at its link partner (e.g. root port) because the errors will be
  62. * "seen" by the link partner and not the the problematic end point
  63. * itself (which may report all counters as 0 as it never saw any
  64. * problems).
  65. */
  66. /* Counters for different type of correctable errors */
  67. u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
  68. /* Counters for different type of fatal uncorrectable errors */
  69. u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
  70. /* Counters for different type of nonfatal uncorrectable errors */
  71. u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
  72. /* Total number of ERR_COR sent by this device */
  73. u64 dev_total_cor_errs;
  74. /* Total number of ERR_FATAL sent by this device */
  75. u64 dev_total_fatal_errs;
  76. /* Total number of ERR_NONFATAL sent by this device */
  77. u64 dev_total_nonfatal_errs;
  78. /*
  79. * Fields for Root ports & root complex event collectors only, these
  80. * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
  81. * messages received by the root port / event collector, INCLUDING the
  82. * ones that are generated internally (by the rootport itself)
  83. */
  84. u64 rootport_total_cor_errs;
  85. u64 rootport_total_fatal_errs;
  86. u64 rootport_total_nonfatal_errs;
  87. };
  88. #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
  89. PCI_ERR_UNC_ECRC| \
  90. PCI_ERR_UNC_UNSUP| \
  91. PCI_ERR_UNC_COMP_ABORT| \
  92. PCI_ERR_UNC_UNX_COMP| \
  93. PCI_ERR_UNC_MALF_TLP)
  94. #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
  95. PCI_EXP_RTCTL_SENFEE| \
  96. PCI_EXP_RTCTL_SEFEE)
  97. #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
  98. PCI_ERR_ROOT_CMD_NONFATAL_EN| \
  99. PCI_ERR_ROOT_CMD_FATAL_EN)
  100. #define ERR_COR_ID(d) (d & 0xffff)
  101. #define ERR_UNCOR_ID(d) (d >> 16)
  102. static int pcie_aer_disable;
  103. void pci_no_aer(void)
  104. {
  105. pcie_aer_disable = 1;
  106. }
  107. bool pci_aer_available(void)
  108. {
  109. return !pcie_aer_disable && pci_msi_enabled();
  110. }
  111. #ifdef CONFIG_PCIE_ECRC
  112. #define ECRC_POLICY_DEFAULT 0 /* ECRC set by BIOS */
  113. #define ECRC_POLICY_OFF 1 /* ECRC off for performance */
  114. #define ECRC_POLICY_ON 2 /* ECRC on for data integrity */
  115. static int ecrc_policy = ECRC_POLICY_DEFAULT;
  116. static const char *ecrc_policy_str[] = {
  117. [ECRC_POLICY_DEFAULT] = "bios",
  118. [ECRC_POLICY_OFF] = "off",
  119. [ECRC_POLICY_ON] = "on"
  120. };
  121. /**
  122. * enable_ercr_checking - enable PCIe ECRC checking for a device
  123. * @dev: the PCI device
  124. *
  125. * Returns 0 on success, or negative on failure.
  126. */
  127. static int enable_ecrc_checking(struct pci_dev *dev)
  128. {
  129. int pos;
  130. u32 reg32;
  131. if (!pci_is_pcie(dev))
  132. return -ENODEV;
  133. pos = dev->aer_cap;
  134. if (!pos)
  135. return -ENODEV;
  136. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
  137. if (reg32 & PCI_ERR_CAP_ECRC_GENC)
  138. reg32 |= PCI_ERR_CAP_ECRC_GENE;
  139. if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
  140. reg32 |= PCI_ERR_CAP_ECRC_CHKE;
  141. pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
  142. return 0;
  143. }
  144. /**
  145. * disable_ercr_checking - disables PCIe ECRC checking for a device
  146. * @dev: the PCI device
  147. *
  148. * Returns 0 on success, or negative on failure.
  149. */
  150. static int disable_ecrc_checking(struct pci_dev *dev)
  151. {
  152. int pos;
  153. u32 reg32;
  154. if (!pci_is_pcie(dev))
  155. return -ENODEV;
  156. pos = dev->aer_cap;
  157. if (!pos)
  158. return -ENODEV;
  159. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
  160. reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
  161. pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
  162. return 0;
  163. }
  164. /**
  165. * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy
  166. * @dev: the PCI device
  167. */
  168. void pcie_set_ecrc_checking(struct pci_dev *dev)
  169. {
  170. switch (ecrc_policy) {
  171. case ECRC_POLICY_DEFAULT:
  172. return;
  173. case ECRC_POLICY_OFF:
  174. disable_ecrc_checking(dev);
  175. break;
  176. case ECRC_POLICY_ON:
  177. enable_ecrc_checking(dev);
  178. break;
  179. default:
  180. return;
  181. }
  182. }
  183. /**
  184. * pcie_ecrc_get_policy - parse kernel command-line ecrc option
  185. */
  186. void pcie_ecrc_get_policy(char *str)
  187. {
  188. int i;
  189. for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++)
  190. if (!strncmp(str, ecrc_policy_str[i],
  191. strlen(ecrc_policy_str[i])))
  192. break;
  193. if (i >= ARRAY_SIZE(ecrc_policy_str))
  194. return;
  195. ecrc_policy = i;
  196. }
  197. #endif /* CONFIG_PCIE_ECRC */
  198. #ifdef CONFIG_ACPI_APEI
  199. static inline int hest_match_pci(struct acpi_hest_aer_common *p,
  200. struct pci_dev *pci)
  201. {
  202. return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) &&
  203. ACPI_HEST_BUS(p->bus) == pci->bus->number &&
  204. p->device == PCI_SLOT(pci->devfn) &&
  205. p->function == PCI_FUNC(pci->devfn);
  206. }
  207. static inline bool hest_match_type(struct acpi_hest_header *hest_hdr,
  208. struct pci_dev *dev)
  209. {
  210. u16 hest_type = hest_hdr->type;
  211. u8 pcie_type = pci_pcie_type(dev);
  212. if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT &&
  213. pcie_type == PCI_EXP_TYPE_ROOT_PORT) ||
  214. (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT &&
  215. pcie_type == PCI_EXP_TYPE_ENDPOINT) ||
  216. (hest_type == ACPI_HEST_TYPE_AER_BRIDGE &&
  217. (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE))
  218. return true;
  219. return false;
  220. }
  221. struct aer_hest_parse_info {
  222. struct pci_dev *pci_dev;
  223. int firmware_first;
  224. };
  225. static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr)
  226. {
  227. if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
  228. hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
  229. hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE)
  230. return 1;
  231. return 0;
  232. }
  233. static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data)
  234. {
  235. struct aer_hest_parse_info *info = data;
  236. struct acpi_hest_aer_common *p;
  237. int ff;
  238. if (!hest_source_is_pcie_aer(hest_hdr))
  239. return 0;
  240. p = (struct acpi_hest_aer_common *)(hest_hdr + 1);
  241. ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
  242. /*
  243. * If no specific device is supplied, determine whether
  244. * FIRMWARE_FIRST is set for *any* PCIe device.
  245. */
  246. if (!info->pci_dev) {
  247. info->firmware_first |= ff;
  248. return 0;
  249. }
  250. /* Otherwise, check the specific device */
  251. if (p->flags & ACPI_HEST_GLOBAL) {
  252. if (hest_match_type(hest_hdr, info->pci_dev))
  253. info->firmware_first = ff;
  254. } else
  255. if (hest_match_pci(p, info->pci_dev))
  256. info->firmware_first = ff;
  257. return 0;
  258. }
  259. static void aer_set_firmware_first(struct pci_dev *pci_dev)
  260. {
  261. int rc;
  262. struct aer_hest_parse_info info = {
  263. .pci_dev = pci_dev,
  264. .firmware_first = 0,
  265. };
  266. rc = apei_hest_parse(aer_hest_parse, &info);
  267. if (rc)
  268. pci_dev->__aer_firmware_first = 0;
  269. else
  270. pci_dev->__aer_firmware_first = info.firmware_first;
  271. pci_dev->__aer_firmware_first_valid = 1;
  272. }
  273. int pcie_aer_get_firmware_first(struct pci_dev *dev)
  274. {
  275. if (!pci_is_pcie(dev))
  276. return 0;
  277. if (pcie_ports_native)
  278. return 0;
  279. if (!dev->__aer_firmware_first_valid)
  280. aer_set_firmware_first(dev);
  281. return dev->__aer_firmware_first;
  282. }
  283. static bool aer_firmware_first;
  284. /**
  285. * aer_acpi_firmware_first - Check if APEI should control AER.
  286. */
  287. bool aer_acpi_firmware_first(void)
  288. {
  289. static bool parsed = false;
  290. struct aer_hest_parse_info info = {
  291. .pci_dev = NULL, /* Check all PCIe devices */
  292. .firmware_first = 0,
  293. };
  294. if (pcie_ports_native)
  295. return false;
  296. if (!parsed) {
  297. apei_hest_parse(aer_hest_parse, &info);
  298. aer_firmware_first = info.firmware_first;
  299. parsed = true;
  300. }
  301. return aer_firmware_first;
  302. }
  303. #endif
  304. #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
  305. PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
  306. int pci_enable_pcie_error_reporting(struct pci_dev *dev)
  307. {
  308. if (pcie_aer_get_firmware_first(dev))
  309. return -EIO;
  310. if (!dev->aer_cap)
  311. return -EIO;
  312. return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
  313. }
  314. EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
  315. int pci_disable_pcie_error_reporting(struct pci_dev *dev)
  316. {
  317. if (pcie_aer_get_firmware_first(dev))
  318. return -EIO;
  319. return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
  320. PCI_EXP_AER_FLAGS);
  321. }
  322. EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
  323. void pci_aer_clear_device_status(struct pci_dev *dev)
  324. {
  325. u16 sta;
  326. pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
  327. pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
  328. }
  329. int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
  330. {
  331. int pos;
  332. u32 status, sev;
  333. pos = dev->aer_cap;
  334. if (!pos)
  335. return -EIO;
  336. if (pcie_aer_get_firmware_first(dev))
  337. return -EIO;
  338. /* Clear status bits for ERR_NONFATAL errors only */
  339. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  340. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
  341. status &= ~sev;
  342. if (status)
  343. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  344. return 0;
  345. }
  346. EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
  347. void pci_aer_clear_fatal_status(struct pci_dev *dev)
  348. {
  349. int pos;
  350. u32 status, sev;
  351. pos = dev->aer_cap;
  352. if (!pos)
  353. return;
  354. if (pcie_aer_get_firmware_first(dev))
  355. return;
  356. /* Clear status bits for ERR_FATAL errors only */
  357. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  358. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
  359. status &= sev;
  360. if (status)
  361. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  362. }
  363. int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
  364. {
  365. int pos;
  366. u32 status;
  367. int port_type;
  368. if (!pci_is_pcie(dev))
  369. return -ENODEV;
  370. pos = dev->aer_cap;
  371. if (!pos)
  372. return -EIO;
  373. if (pcie_aer_get_firmware_first(dev))
  374. return -EIO;
  375. port_type = pci_pcie_type(dev);
  376. if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
  377. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
  378. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status);
  379. }
  380. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  381. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
  382. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  383. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  384. return 0;
  385. }
  386. void pci_aer_init(struct pci_dev *dev)
  387. {
  388. dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  389. if (dev->aer_cap)
  390. dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
  391. pci_cleanup_aer_error_status_regs(dev);
  392. }
  393. void pci_aer_exit(struct pci_dev *dev)
  394. {
  395. kfree(dev->aer_stats);
  396. dev->aer_stats = NULL;
  397. }
  398. #define AER_AGENT_RECEIVER 0
  399. #define AER_AGENT_REQUESTER 1
  400. #define AER_AGENT_COMPLETER 2
  401. #define AER_AGENT_TRANSMITTER 3
  402. #define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \
  403. 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
  404. #define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \
  405. 0 : PCI_ERR_UNC_COMP_ABORT)
  406. #define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \
  407. (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
  408. #define AER_GET_AGENT(t, e) \
  409. ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \
  410. (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \
  411. (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \
  412. AER_AGENT_RECEIVER)
  413. #define AER_PHYSICAL_LAYER_ERROR 0
  414. #define AER_DATA_LINK_LAYER_ERROR 1
  415. #define AER_TRANSACTION_LAYER_ERROR 2
  416. #define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
  417. PCI_ERR_COR_RCVR : 0)
  418. #define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
  419. (PCI_ERR_COR_BAD_TLP| \
  420. PCI_ERR_COR_BAD_DLLP| \
  421. PCI_ERR_COR_REP_ROLL| \
  422. PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
  423. #define AER_GET_LAYER_ERROR(t, e) \
  424. ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
  425. (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
  426. AER_TRANSACTION_LAYER_ERROR)
  427. /*
  428. * AER error strings
  429. */
  430. static const char *aer_error_severity_string[] = {
  431. "Uncorrected (Non-Fatal)",
  432. "Uncorrected (Fatal)",
  433. "Corrected"
  434. };
  435. static const char *aer_error_layer[] = {
  436. "Physical Layer",
  437. "Data Link Layer",
  438. "Transaction Layer"
  439. };
  440. static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
  441. "RxErr", /* Bit Position 0 */
  442. NULL,
  443. NULL,
  444. NULL,
  445. NULL,
  446. NULL,
  447. "BadTLP", /* Bit Position 6 */
  448. "BadDLLP", /* Bit Position 7 */
  449. "Rollover", /* Bit Position 8 */
  450. NULL,
  451. NULL,
  452. NULL,
  453. "Timeout", /* Bit Position 12 */
  454. "NonFatalErr", /* Bit Position 13 */
  455. "CorrIntErr", /* Bit Position 14 */
  456. "HeaderOF", /* Bit Position 15 */
  457. };
  458. static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
  459. "Undefined", /* Bit Position 0 */
  460. NULL,
  461. NULL,
  462. NULL,
  463. "DLP", /* Bit Position 4 */
  464. "SDES", /* Bit Position 5 */
  465. NULL,
  466. NULL,
  467. NULL,
  468. NULL,
  469. NULL,
  470. NULL,
  471. "TLP", /* Bit Position 12 */
  472. "FCP", /* Bit Position 13 */
  473. "CmpltTO", /* Bit Position 14 */
  474. "CmpltAbrt", /* Bit Position 15 */
  475. "UnxCmplt", /* Bit Position 16 */
  476. "RxOF", /* Bit Position 17 */
  477. "MalfTLP", /* Bit Position 18 */
  478. "ECRC", /* Bit Position 19 */
  479. "UnsupReq", /* Bit Position 20 */
  480. "ACSViol", /* Bit Position 21 */
  481. "UncorrIntErr", /* Bit Position 22 */
  482. "BlockedTLP", /* Bit Position 23 */
  483. "AtomicOpBlocked", /* Bit Position 24 */
  484. "TLPBlockedErr", /* Bit Position 25 */
  485. };
  486. static const char *aer_agent_string[] = {
  487. "Receiver ID",
  488. "Requester ID",
  489. "Completer ID",
  490. "Transmitter ID"
  491. };
  492. #define aer_stats_dev_attr(name, stats_array, strings_array, \
  493. total_string, total_field) \
  494. static ssize_t \
  495. name##_show(struct device *dev, struct device_attribute *attr, \
  496. char *buf) \
  497. { \
  498. unsigned int i; \
  499. char *str = buf; \
  500. struct pci_dev *pdev = to_pci_dev(dev); \
  501. u64 *stats = pdev->aer_stats->stats_array; \
  502. \
  503. for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
  504. if (strings_array[i]) \
  505. str += sprintf(str, "%s %llu\n", \
  506. strings_array[i], stats[i]); \
  507. else if (stats[i]) \
  508. str += sprintf(str, #stats_array "_bit[%d] %llu\n",\
  509. i, stats[i]); \
  510. } \
  511. str += sprintf(str, "TOTAL_%s %llu\n", total_string, \
  512. pdev->aer_stats->total_field); \
  513. return str-buf; \
  514. } \
  515. static DEVICE_ATTR_RO(name)
  516. aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs,
  517. aer_correctable_error_string, "ERR_COR",
  518. dev_total_cor_errs);
  519. aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs,
  520. aer_uncorrectable_error_string, "ERR_FATAL",
  521. dev_total_fatal_errs);
  522. aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
  523. aer_uncorrectable_error_string, "ERR_NONFATAL",
  524. dev_total_nonfatal_errs);
  525. #define aer_stats_rootport_attr(name, field) \
  526. static ssize_t \
  527. name##_show(struct device *dev, struct device_attribute *attr, \
  528. char *buf) \
  529. { \
  530. struct pci_dev *pdev = to_pci_dev(dev); \
  531. return sprintf(buf, "%llu\n", pdev->aer_stats->field); \
  532. } \
  533. static DEVICE_ATTR_RO(name)
  534. aer_stats_rootport_attr(aer_rootport_total_err_cor,
  535. rootport_total_cor_errs);
  536. aer_stats_rootport_attr(aer_rootport_total_err_fatal,
  537. rootport_total_fatal_errs);
  538. aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
  539. rootport_total_nonfatal_errs);
  540. static struct attribute *aer_stats_attrs[] __ro_after_init = {
  541. &dev_attr_aer_dev_correctable.attr,
  542. &dev_attr_aer_dev_fatal.attr,
  543. &dev_attr_aer_dev_nonfatal.attr,
  544. &dev_attr_aer_rootport_total_err_cor.attr,
  545. &dev_attr_aer_rootport_total_err_fatal.attr,
  546. &dev_attr_aer_rootport_total_err_nonfatal.attr,
  547. NULL
  548. };
  549. static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
  550. struct attribute *a, int n)
  551. {
  552. struct device *dev = kobj_to_dev(kobj);
  553. struct pci_dev *pdev = to_pci_dev(dev);
  554. if (!pdev->aer_stats)
  555. return 0;
  556. if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
  557. a == &dev_attr_aer_rootport_total_err_fatal.attr ||
  558. a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
  559. pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
  560. return 0;
  561. return a->mode;
  562. }
  563. const struct attribute_group aer_stats_attr_group = {
  564. .attrs = aer_stats_attrs,
  565. .is_visible = aer_stats_attrs_are_visible,
  566. };
  567. static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
  568. struct aer_err_info *info)
  569. {
  570. int status, i, max = -1;
  571. u64 *counter = NULL;
  572. struct aer_stats *aer_stats = pdev->aer_stats;
  573. if (!aer_stats)
  574. return;
  575. switch (info->severity) {
  576. case AER_CORRECTABLE:
  577. aer_stats->dev_total_cor_errs++;
  578. counter = &aer_stats->dev_cor_errs[0];
  579. max = AER_MAX_TYPEOF_COR_ERRS;
  580. break;
  581. case AER_NONFATAL:
  582. aer_stats->dev_total_nonfatal_errs++;
  583. counter = &aer_stats->dev_nonfatal_errs[0];
  584. max = AER_MAX_TYPEOF_UNCOR_ERRS;
  585. break;
  586. case AER_FATAL:
  587. aer_stats->dev_total_fatal_errs++;
  588. counter = &aer_stats->dev_fatal_errs[0];
  589. max = AER_MAX_TYPEOF_UNCOR_ERRS;
  590. break;
  591. }
  592. status = (info->status & ~info->mask);
  593. for (i = 0; i < max; i++)
  594. if (status & (1 << i))
  595. counter[i]++;
  596. }
  597. static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
  598. struct aer_err_source *e_src)
  599. {
  600. struct aer_stats *aer_stats = pdev->aer_stats;
  601. if (!aer_stats)
  602. return;
  603. if (e_src->status & PCI_ERR_ROOT_COR_RCV)
  604. aer_stats->rootport_total_cor_errs++;
  605. if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
  606. if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
  607. aer_stats->rootport_total_fatal_errs++;
  608. else
  609. aer_stats->rootport_total_nonfatal_errs++;
  610. }
  611. }
  612. static void __print_tlp_header(struct pci_dev *dev,
  613. struct aer_header_log_regs *t)
  614. {
  615. pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
  616. t->dw0, t->dw1, t->dw2, t->dw3);
  617. }
  618. static void __aer_print_error(struct pci_dev *dev,
  619. struct aer_err_info *info)
  620. {
  621. int i, status;
  622. const char *errmsg = NULL;
  623. status = (info->status & ~info->mask);
  624. for (i = 0; i < 32; i++) {
  625. if (!(status & (1 << i)))
  626. continue;
  627. if (info->severity == AER_CORRECTABLE)
  628. errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
  629. aer_correctable_error_string[i] : NULL;
  630. else
  631. errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
  632. aer_uncorrectable_error_string[i] : NULL;
  633. if (errmsg)
  634. pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
  635. info->first_error == i ? " (First)" : "");
  636. else
  637. pci_err(dev, " [%2d] Unknown Error Bit%s\n",
  638. i, info->first_error == i ? " (First)" : "");
  639. }
  640. pci_dev_aer_stats_incr(dev, info);
  641. }
  642. void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
  643. {
  644. int layer, agent;
  645. int id = ((dev->bus->number << 8) | dev->devfn);
  646. if (!info->status) {
  647. pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
  648. aer_error_severity_string[info->severity]);
  649. goto out;
  650. }
  651. layer = AER_GET_LAYER_ERROR(info->severity, info->status);
  652. agent = AER_GET_AGENT(info->severity, info->status);
  653. pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
  654. aer_error_severity_string[info->severity],
  655. aer_error_layer[layer], aer_agent_string[agent]);
  656. pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
  657. dev->vendor, dev->device,
  658. info->status, info->mask);
  659. __aer_print_error(dev, info);
  660. if (info->tlp_header_valid)
  661. __print_tlp_header(dev, &info->tlp);
  662. out:
  663. if (info->id && info->error_dev_num > 1 && info->id == id)
  664. pci_err(dev, " Error of this Agent is reported first\n");
  665. trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
  666. info->severity, info->tlp_header_valid, &info->tlp);
  667. }
  668. static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
  669. {
  670. u8 bus = info->id >> 8;
  671. u8 devfn = info->id & 0xff;
  672. pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n",
  673. info->multi_error_valid ? "Multiple " : "",
  674. aer_error_severity_string[info->severity],
  675. pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  676. }
  677. #ifdef CONFIG_ACPI_APEI_PCIEAER
  678. int cper_severity_to_aer(int cper_severity)
  679. {
  680. switch (cper_severity) {
  681. case CPER_SEV_RECOVERABLE:
  682. return AER_NONFATAL;
  683. case CPER_SEV_FATAL:
  684. return AER_FATAL;
  685. default:
  686. return AER_CORRECTABLE;
  687. }
  688. }
  689. EXPORT_SYMBOL_GPL(cper_severity_to_aer);
  690. void cper_print_aer(struct pci_dev *dev, int aer_severity,
  691. struct aer_capability_regs *aer)
  692. {
  693. int layer, agent, tlp_header_valid = 0;
  694. u32 status, mask;
  695. struct aer_err_info info;
  696. if (aer_severity == AER_CORRECTABLE) {
  697. status = aer->cor_status;
  698. mask = aer->cor_mask;
  699. } else {
  700. status = aer->uncor_status;
  701. mask = aer->uncor_mask;
  702. tlp_header_valid = status & AER_LOG_TLP_MASKS;
  703. }
  704. layer = AER_GET_LAYER_ERROR(aer_severity, status);
  705. agent = AER_GET_AGENT(aer_severity, status);
  706. memset(&info, 0, sizeof(info));
  707. info.severity = aer_severity;
  708. info.status = status;
  709. info.mask = mask;
  710. info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
  711. pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
  712. __aer_print_error(dev, &info);
  713. pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
  714. aer_error_layer[layer], aer_agent_string[agent]);
  715. if (aer_severity != AER_CORRECTABLE)
  716. pci_err(dev, "aer_uncor_severity: 0x%08x\n",
  717. aer->uncor_severity);
  718. if (tlp_header_valid)
  719. __print_tlp_header(dev, &aer->header_log);
  720. trace_aer_event(dev_name(&dev->dev), (status & ~mask),
  721. aer_severity, tlp_header_valid, &aer->header_log);
  722. }
  723. #endif
  724. /**
  725. * add_error_device - list device to be handled
  726. * @e_info: pointer to error info
  727. * @dev: pointer to pci_dev to be added
  728. */
  729. static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
  730. {
  731. if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
  732. e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
  733. e_info->error_dev_num++;
  734. return 0;
  735. }
  736. return -ENOSPC;
  737. }
  738. /**
  739. * is_error_source - check whether the device is source of reported error
  740. * @dev: pointer to pci_dev to be checked
  741. * @e_info: pointer to reported error info
  742. */
  743. static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
  744. {
  745. int pos;
  746. u32 status, mask;
  747. u16 reg16;
  748. /*
  749. * When bus id is equal to 0, it might be a bad id
  750. * reported by root port.
  751. */
  752. if ((PCI_BUS_NUM(e_info->id) != 0) &&
  753. !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
  754. /* Device ID match? */
  755. if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
  756. return true;
  757. /* Continue id comparing if there is no multiple error */
  758. if (!e_info->multi_error_valid)
  759. return false;
  760. }
  761. /*
  762. * When either
  763. * 1) bus id is equal to 0. Some ports might lose the bus
  764. * id of error source id;
  765. * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
  766. * 3) There are multiple errors and prior ID comparing fails;
  767. * We check AER status registers to find possible reporter.
  768. */
  769. if (atomic_read(&dev->enable_cnt) == 0)
  770. return false;
  771. /* Check if AER is enabled */
  772. pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &reg16);
  773. if (!(reg16 & PCI_EXP_AER_FLAGS))
  774. return false;
  775. pos = dev->aer_cap;
  776. if (!pos)
  777. return false;
  778. /* Check if error is recorded */
  779. if (e_info->severity == AER_CORRECTABLE) {
  780. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  781. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
  782. } else {
  783. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  784. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
  785. }
  786. if (status & ~mask)
  787. return true;
  788. return false;
  789. }
  790. static int find_device_iter(struct pci_dev *dev, void *data)
  791. {
  792. struct aer_err_info *e_info = (struct aer_err_info *)data;
  793. if (is_error_source(dev, e_info)) {
  794. /* List this device */
  795. if (add_error_device(e_info, dev)) {
  796. /* We cannot handle more... Stop iteration */
  797. /* TODO: Should print error message here? */
  798. return 1;
  799. }
  800. /* If there is only a single error, stop iteration */
  801. if (!e_info->multi_error_valid)
  802. return 1;
  803. }
  804. return 0;
  805. }
  806. /**
  807. * find_source_device - search through device hierarchy for source device
  808. * @parent: pointer to Root Port pci_dev data structure
  809. * @e_info: including detailed error information such like id
  810. *
  811. * Return true if found.
  812. *
  813. * Invoked by DPC when error is detected at the Root Port.
  814. * Caller of this function must set id, severity, and multi_error_valid of
  815. * struct aer_err_info pointed by @e_info properly. This function must fill
  816. * e_info->error_dev_num and e_info->dev[], based on the given information.
  817. */
  818. static bool find_source_device(struct pci_dev *parent,
  819. struct aer_err_info *e_info)
  820. {
  821. struct pci_dev *dev = parent;
  822. int result;
  823. /* Must reset in this function */
  824. e_info->error_dev_num = 0;
  825. /* Is Root Port an agent that sends error message? */
  826. result = find_device_iter(dev, e_info);
  827. if (result)
  828. return true;
  829. pci_walk_bus(parent->subordinate, find_device_iter, e_info);
  830. if (!e_info->error_dev_num) {
  831. pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n",
  832. e_info->id);
  833. return false;
  834. }
  835. return true;
  836. }
  837. /**
  838. * handle_error_source - handle logging error into an event log
  839. * @dev: pointer to pci_dev data structure of error source device
  840. * @info: comprehensive error information
  841. *
  842. * Invoked when an error being detected by Root Port.
  843. */
  844. static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
  845. {
  846. int pos;
  847. if (info->severity == AER_CORRECTABLE) {
  848. /*
  849. * Correctable error does not need software intervention.
  850. * No need to go through error recovery process.
  851. */
  852. pos = dev->aer_cap;
  853. if (pos)
  854. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  855. info->status);
  856. pci_aer_clear_device_status(dev);
  857. } else if (info->severity == AER_NONFATAL)
  858. pcie_do_nonfatal_recovery(dev);
  859. else if (info->severity == AER_FATAL)
  860. pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER);
  861. pci_dev_put(dev);
  862. }
  863. #ifdef CONFIG_ACPI_APEI_PCIEAER
  864. #define AER_RECOVER_RING_ORDER 4
  865. #define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
  866. struct aer_recover_entry {
  867. u8 bus;
  868. u8 devfn;
  869. u16 domain;
  870. int severity;
  871. struct aer_capability_regs *regs;
  872. };
  873. static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
  874. AER_RECOVER_RING_SIZE);
  875. static void aer_recover_work_func(struct work_struct *work)
  876. {
  877. struct aer_recover_entry entry;
  878. struct pci_dev *pdev;
  879. while (kfifo_get(&aer_recover_ring, &entry)) {
  880. pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
  881. entry.devfn);
  882. if (!pdev) {
  883. pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
  884. entry.domain, entry.bus,
  885. PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
  886. continue;
  887. }
  888. cper_print_aer(pdev, entry.severity, entry.regs);
  889. if (entry.severity == AER_NONFATAL)
  890. pcie_do_nonfatal_recovery(pdev);
  891. else if (entry.severity == AER_FATAL)
  892. pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER);
  893. pci_dev_put(pdev);
  894. }
  895. }
  896. /*
  897. * Mutual exclusion for writers of aer_recover_ring, reader side don't
  898. * need lock, because there is only one reader and lock is not needed
  899. * between reader and writer.
  900. */
  901. static DEFINE_SPINLOCK(aer_recover_ring_lock);
  902. static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
  903. void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
  904. int severity, struct aer_capability_regs *aer_regs)
  905. {
  906. unsigned long flags;
  907. struct aer_recover_entry entry = {
  908. .bus = bus,
  909. .devfn = devfn,
  910. .domain = domain,
  911. .severity = severity,
  912. .regs = aer_regs,
  913. };
  914. spin_lock_irqsave(&aer_recover_ring_lock, flags);
  915. if (kfifo_put(&aer_recover_ring, entry))
  916. schedule_work(&aer_recover_work);
  917. else
  918. pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
  919. domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
  920. spin_unlock_irqrestore(&aer_recover_ring_lock, flags);
  921. }
  922. EXPORT_SYMBOL_GPL(aer_recover_queue);
  923. #endif
  924. /**
  925. * aer_get_device_error_info - read error status from dev and store it to info
  926. * @dev: pointer to the device expected to have a error record
  927. * @info: pointer to structure to store the error record
  928. *
  929. * Return 1 on success, 0 on error.
  930. *
  931. * Note that @info is reused among all error devices. Clear fields properly.
  932. */
  933. int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
  934. {
  935. int pos, temp;
  936. /* Must reset in this function */
  937. info->status = 0;
  938. info->tlp_header_valid = 0;
  939. pos = dev->aer_cap;
  940. /* The device might not support AER */
  941. if (!pos)
  942. return 0;
  943. if (info->severity == AER_CORRECTABLE) {
  944. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  945. &info->status);
  946. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
  947. &info->mask);
  948. if (!(info->status & ~info->mask))
  949. return 0;
  950. } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
  951. pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
  952. info->severity == AER_NONFATAL) {
  953. /* Link is still healthy for IO reads */
  954. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
  955. &info->status);
  956. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
  957. &info->mask);
  958. if (!(info->status & ~info->mask))
  959. return 0;
  960. /* Get First Error Pointer */
  961. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
  962. info->first_error = PCI_ERR_CAP_FEP(temp);
  963. if (info->status & AER_LOG_TLP_MASKS) {
  964. info->tlp_header_valid = 1;
  965. pci_read_config_dword(dev,
  966. pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
  967. pci_read_config_dword(dev,
  968. pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
  969. pci_read_config_dword(dev,
  970. pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
  971. pci_read_config_dword(dev,
  972. pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
  973. }
  974. }
  975. return 1;
  976. }
  977. static inline void aer_process_err_devices(struct aer_err_info *e_info)
  978. {
  979. int i;
  980. /* Report all before handle them, not to lost records by reset etc. */
  981. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  982. if (aer_get_device_error_info(e_info->dev[i], e_info))
  983. aer_print_error(e_info->dev[i], e_info);
  984. }
  985. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  986. if (aer_get_device_error_info(e_info->dev[i], e_info))
  987. handle_error_source(e_info->dev[i], e_info);
  988. }
  989. }
  990. /**
  991. * aer_isr_one_error - consume an error detected by root port
  992. * @rpc: pointer to the root port which holds an error
  993. * @e_src: pointer to an error source
  994. */
  995. static void aer_isr_one_error(struct aer_rpc *rpc,
  996. struct aer_err_source *e_src)
  997. {
  998. struct pci_dev *pdev = rpc->rpd;
  999. struct aer_err_info *e_info = &rpc->e_info;
  1000. pci_rootport_aer_stats_incr(pdev, e_src);
  1001. /*
  1002. * There is a possibility that both correctable error and
  1003. * uncorrectable error being logged. Report correctable error first.
  1004. */
  1005. if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
  1006. e_info->id = ERR_COR_ID(e_src->id);
  1007. e_info->severity = AER_CORRECTABLE;
  1008. if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
  1009. e_info->multi_error_valid = 1;
  1010. else
  1011. e_info->multi_error_valid = 0;
  1012. aer_print_port_info(pdev, e_info);
  1013. if (find_source_device(pdev, e_info))
  1014. aer_process_err_devices(e_info);
  1015. }
  1016. if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
  1017. e_info->id = ERR_UNCOR_ID(e_src->id);
  1018. if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
  1019. e_info->severity = AER_FATAL;
  1020. else
  1021. e_info->severity = AER_NONFATAL;
  1022. if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
  1023. e_info->multi_error_valid = 1;
  1024. else
  1025. e_info->multi_error_valid = 0;
  1026. aer_print_port_info(pdev, e_info);
  1027. if (find_source_device(pdev, e_info))
  1028. aer_process_err_devices(e_info);
  1029. }
  1030. }
  1031. /**
  1032. * get_e_source - retrieve an error source
  1033. * @rpc: pointer to the root port which holds an error
  1034. * @e_src: pointer to store retrieved error source
  1035. *
  1036. * Return 1 if an error source is retrieved, otherwise 0.
  1037. *
  1038. * Invoked by DPC handler to consume an error.
  1039. */
  1040. static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src)
  1041. {
  1042. unsigned long flags;
  1043. /* Lock access to Root error producer/consumer index */
  1044. spin_lock_irqsave(&rpc->e_lock, flags);
  1045. if (rpc->prod_idx == rpc->cons_idx) {
  1046. spin_unlock_irqrestore(&rpc->e_lock, flags);
  1047. return 0;
  1048. }
  1049. *e_src = rpc->e_sources[rpc->cons_idx];
  1050. rpc->cons_idx++;
  1051. if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
  1052. rpc->cons_idx = 0;
  1053. spin_unlock_irqrestore(&rpc->e_lock, flags);
  1054. return 1;
  1055. }
  1056. /**
  1057. * aer_isr - consume errors detected by root port
  1058. * @work: definition of this work item
  1059. *
  1060. * Invoked, as DPC, when root port records new detected error
  1061. */
  1062. static void aer_isr(struct work_struct *work)
  1063. {
  1064. struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
  1065. struct aer_err_source uninitialized_var(e_src);
  1066. mutex_lock(&rpc->rpc_mutex);
  1067. while (get_e_source(rpc, &e_src))
  1068. aer_isr_one_error(rpc, &e_src);
  1069. mutex_unlock(&rpc->rpc_mutex);
  1070. }
  1071. /**
  1072. * aer_irq - Root Port's ISR
  1073. * @irq: IRQ assigned to Root Port
  1074. * @context: pointer to Root Port data structure
  1075. *
  1076. * Invoked when Root Port detects AER messages.
  1077. */
  1078. irqreturn_t aer_irq(int irq, void *context)
  1079. {
  1080. unsigned int status, id;
  1081. struct pcie_device *pdev = (struct pcie_device *)context;
  1082. struct aer_rpc *rpc = get_service_data(pdev);
  1083. int next_prod_idx;
  1084. unsigned long flags;
  1085. int pos;
  1086. pos = pdev->port->aer_cap;
  1087. /*
  1088. * Must lock access to Root Error Status Reg, Root Error ID Reg,
  1089. * and Root error producer/consumer index
  1090. */
  1091. spin_lock_irqsave(&rpc->e_lock, flags);
  1092. /* Read error status */
  1093. pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status);
  1094. if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) {
  1095. spin_unlock_irqrestore(&rpc->e_lock, flags);
  1096. return IRQ_NONE;
  1097. }
  1098. /* Read error source and clear error status */
  1099. pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id);
  1100. pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status);
  1101. /* Store error source for later DPC handler */
  1102. next_prod_idx = rpc->prod_idx + 1;
  1103. if (next_prod_idx == AER_ERROR_SOURCES_MAX)
  1104. next_prod_idx = 0;
  1105. if (next_prod_idx == rpc->cons_idx) {
  1106. /*
  1107. * Error Storm Condition - possibly the same error occurred.
  1108. * Drop the error.
  1109. */
  1110. spin_unlock_irqrestore(&rpc->e_lock, flags);
  1111. return IRQ_HANDLED;
  1112. }
  1113. rpc->e_sources[rpc->prod_idx].status = status;
  1114. rpc->e_sources[rpc->prod_idx].id = id;
  1115. rpc->prod_idx = next_prod_idx;
  1116. spin_unlock_irqrestore(&rpc->e_lock, flags);
  1117. /* Invoke DPC handler */
  1118. schedule_work(&rpc->dpc_handler);
  1119. return IRQ_HANDLED;
  1120. }
  1121. EXPORT_SYMBOL_GPL(aer_irq);
  1122. static int set_device_error_reporting(struct pci_dev *dev, void *data)
  1123. {
  1124. bool enable = *((bool *)data);
  1125. int type = pci_pcie_type(dev);
  1126. if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
  1127. (type == PCI_EXP_TYPE_UPSTREAM) ||
  1128. (type == PCI_EXP_TYPE_DOWNSTREAM)) {
  1129. if (enable)
  1130. pci_enable_pcie_error_reporting(dev);
  1131. else
  1132. pci_disable_pcie_error_reporting(dev);
  1133. }
  1134. if (enable)
  1135. pcie_set_ecrc_checking(dev);
  1136. return 0;
  1137. }
  1138. /**
  1139. * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
  1140. * @dev: pointer to root port's pci_dev data structure
  1141. * @enable: true = enable error reporting, false = disable error reporting.
  1142. */
  1143. static void set_downstream_devices_error_reporting(struct pci_dev *dev,
  1144. bool enable)
  1145. {
  1146. set_device_error_reporting(dev, &enable);
  1147. if (!dev->subordinate)
  1148. return;
  1149. pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
  1150. }
  1151. /**
  1152. * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  1153. * @rpc: pointer to a Root Port data structure
  1154. *
  1155. * Invoked when PCIe bus loads AER service driver.
  1156. */
  1157. static void aer_enable_rootport(struct aer_rpc *rpc)
  1158. {
  1159. struct pci_dev *pdev = rpc->rpd;
  1160. int aer_pos;
  1161. u16 reg16;
  1162. u32 reg32;
  1163. /* Clear PCIe Capability's Device Status */
  1164. pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, &reg16);
  1165. pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);
  1166. /* Disable system error generation in response to error messages */
  1167. pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
  1168. SYSTEM_ERROR_INTR_ON_MESG_MASK);
  1169. aer_pos = pdev->aer_cap;
  1170. /* Clear error status */
  1171. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
  1172. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
  1173. pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
  1174. pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
  1175. pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
  1176. pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
  1177. /*
  1178. * Enable error reporting for the root port device and downstream port
  1179. * devices.
  1180. */
  1181. set_downstream_devices_error_reporting(pdev, true);
  1182. /* Enable Root Port's interrupt in response to error messages */
  1183. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1184. reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
  1185. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32);
  1186. }
  1187. /**
  1188. * aer_disable_rootport - disable Root Port's interrupts when receiving messages
  1189. * @rpc: pointer to a Root Port data structure
  1190. *
  1191. * Invoked when PCIe bus unloads AER service driver.
  1192. */
  1193. static void aer_disable_rootport(struct aer_rpc *rpc)
  1194. {
  1195. struct pci_dev *pdev = rpc->rpd;
  1196. u32 reg32;
  1197. int pos;
  1198. /*
  1199. * Disable error reporting for the root port device and downstream port
  1200. * devices.
  1201. */
  1202. set_downstream_devices_error_reporting(pdev, false);
  1203. pos = pdev->aer_cap;
  1204. /* Disable Root's interrupt in response to error messages */
  1205. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1206. reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
  1207. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32);
  1208. /* Clear Root's error status reg */
  1209. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  1210. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
  1211. }
  1212. /**
  1213. * aer_alloc_rpc - allocate Root Port data structure
  1214. * @dev: pointer to the pcie_dev data structure
  1215. *
  1216. * Invoked when Root Port's AER service is loaded.
  1217. */
  1218. static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev)
  1219. {
  1220. struct aer_rpc *rpc;
  1221. rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL);
  1222. if (!rpc)
  1223. return NULL;
  1224. /* Initialize Root lock access, e_lock, to Root Error Status Reg */
  1225. spin_lock_init(&rpc->e_lock);
  1226. rpc->rpd = dev->port;
  1227. INIT_WORK(&rpc->dpc_handler, aer_isr);
  1228. mutex_init(&rpc->rpc_mutex);
  1229. /* Use PCIe bus function to store rpc into PCIe device */
  1230. set_service_data(dev, rpc);
  1231. return rpc;
  1232. }
  1233. /**
  1234. * aer_remove - clean up resources
  1235. * @dev: pointer to the pcie_dev data structure
  1236. *
  1237. * Invoked when PCI Express bus unloads or AER probe fails.
  1238. */
  1239. static void aer_remove(struct pcie_device *dev)
  1240. {
  1241. struct aer_rpc *rpc = get_service_data(dev);
  1242. if (rpc) {
  1243. /* If register interrupt service, it must be free. */
  1244. if (rpc->isr)
  1245. free_irq(dev->irq, dev);
  1246. flush_work(&rpc->dpc_handler);
  1247. aer_disable_rootport(rpc);
  1248. kfree(rpc);
  1249. set_service_data(dev, NULL);
  1250. }
  1251. }
  1252. /**
  1253. * aer_probe - initialize resources
  1254. * @dev: pointer to the pcie_dev data structure
  1255. *
  1256. * Invoked when PCI Express bus loads AER service driver.
  1257. */
  1258. static int aer_probe(struct pcie_device *dev)
  1259. {
  1260. int status;
  1261. struct aer_rpc *rpc;
  1262. struct device *device = &dev->port->dev;
  1263. /* Alloc rpc data structure */
  1264. rpc = aer_alloc_rpc(dev);
  1265. if (!rpc) {
  1266. dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n");
  1267. aer_remove(dev);
  1268. return -ENOMEM;
  1269. }
  1270. /* Request IRQ ISR */
  1271. status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev);
  1272. if (status) {
  1273. dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n",
  1274. dev->irq);
  1275. aer_remove(dev);
  1276. return status;
  1277. }
  1278. rpc->isr = 1;
  1279. aer_enable_rootport(rpc);
  1280. dev_info(device, "AER enabled with IRQ %d\n", dev->irq);
  1281. return 0;
  1282. }
  1283. /**
  1284. * aer_root_reset - reset link on Root Port
  1285. * @dev: pointer to Root Port's pci_dev data structure
  1286. *
  1287. * Invoked by Port Bus driver when performing link reset at Root Port.
  1288. */
  1289. static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
  1290. {
  1291. u32 reg32;
  1292. int pos;
  1293. int rc;
  1294. pos = dev->aer_cap;
  1295. /* Disable Root's interrupt in response to error messages */
  1296. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1297. reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
  1298. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
  1299. rc = pci_bus_error_reset(dev);
  1300. pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n");
  1301. /* Clear Root Error Status */
  1302. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  1303. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32);
  1304. /* Enable Root Port's interrupt in response to error messages */
  1305. pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
  1306. reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
  1307. pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
  1308. return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
  1309. }
  1310. /**
  1311. * aer_error_resume - clean up corresponding error status bits
  1312. * @dev: pointer to Root Port's pci_dev data structure
  1313. *
  1314. * Invoked by Port Bus driver during nonfatal recovery.
  1315. */
  1316. static void aer_error_resume(struct pci_dev *dev)
  1317. {
  1318. pci_aer_clear_device_status(dev);
  1319. pci_cleanup_aer_uncorrect_error_status(dev);
  1320. }
  1321. static struct pcie_port_service_driver aerdriver = {
  1322. .name = "aer",
  1323. .port_type = PCI_EXP_TYPE_ROOT_PORT,
  1324. .service = PCIE_PORT_SERVICE_AER,
  1325. .probe = aer_probe,
  1326. .remove = aer_remove,
  1327. .error_resume = aer_error_resume,
  1328. .reset_link = aer_root_reset,
  1329. };
  1330. /**
  1331. * aer_service_init - register AER root service driver
  1332. *
  1333. * Invoked when AER root service driver is loaded.
  1334. */
  1335. int __init pcie_aer_init(void)
  1336. {
  1337. if (!pci_aer_available() || aer_acpi_firmware_first())
  1338. return -ENXIO;
  1339. return pcie_port_service_register(&aerdriver);
  1340. }