zynqmp_edac.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Xilinx ZynqMP OCM ECC Driver
  4. *
  5. * Copyright (C) 2022 Advanced Micro Devices, Inc.
  6. */
  7. #include <linux/edac.h>
  8. #include <linux/interrupt.h>
  9. #include <linux/module.h>
  10. #include <linux/of.h>
  11. #include <linux/of_platform.h>
  12. #include <linux/platform_device.h>
  13. #include "edac_module.h"
  14. #define ZYNQMP_OCM_EDAC_MSG_SIZE 256
  15. #define ZYNQMP_OCM_EDAC_STRING "zynqmp_ocm"
  16. /* Error/Interrupt registers */
  17. #define ERR_CTRL_OFST 0x0
  18. #define OCM_ISR_OFST 0x04
  19. #define OCM_IMR_OFST 0x08
  20. #define OCM_IEN_OFST 0x0C
  21. #define OCM_IDS_OFST 0x10
  22. /* ECC control register */
  23. #define ECC_CTRL_OFST 0x14
  24. /* Correctable error info registers */
  25. #define CE_FFA_OFST 0x1C
  26. #define CE_FFD0_OFST 0x20
  27. #define CE_FFD1_OFST 0x24
  28. #define CE_FFD2_OFST 0x28
  29. #define CE_FFD3_OFST 0x2C
  30. #define CE_FFE_OFST 0x30
  31. /* Uncorrectable error info registers */
  32. #define UE_FFA_OFST 0x34
  33. #define UE_FFD0_OFST 0x38
  34. #define UE_FFD1_OFST 0x3C
  35. #define UE_FFD2_OFST 0x40
  36. #define UE_FFD3_OFST 0x44
  37. #define UE_FFE_OFST 0x48
  38. /* ECC control register bit field definitions */
  39. #define ECC_CTRL_CLR_CE_ERR 0x40
  40. #define ECC_CTRL_CLR_UE_ERR 0x80
  41. /* Fault injection data and count registers */
  42. #define OCM_FID0_OFST 0x4C
  43. #define OCM_FID1_OFST 0x50
  44. #define OCM_FID2_OFST 0x54
  45. #define OCM_FID3_OFST 0x58
  46. #define OCM_FIC_OFST 0x74
  47. #define UE_MAX_BITPOS_LOWER 31
  48. #define UE_MIN_BITPOS_UPPER 32
  49. #define UE_MAX_BITPOS_UPPER 63
  50. /* Interrupt masks */
  51. #define OCM_CEINTR_MASK BIT(6)
  52. #define OCM_UEINTR_MASK BIT(7)
  53. #define OCM_ECC_ENABLE_MASK BIT(0)
  54. #define OCM_FICOUNT_MASK GENMASK(23, 0)
  55. #define OCM_NUM_UE_BITPOS 2
  56. #define OCM_BASEVAL 0xFFFC0000
  57. #define EDAC_DEVICE "ZynqMP-OCM"
  58. /**
  59. * struct ecc_error_info - ECC error log information
  60. * @addr: Fault generated at this address
  61. * @fault_lo: Generated fault data (lower 32-bit)
  62. * @fault_hi: Generated fault data (upper 32-bit)
  63. */
  64. struct ecc_error_info {
  65. u32 addr;
  66. u32 fault_lo;
  67. u32 fault_hi;
  68. };
  69. /**
  70. * struct ecc_status - ECC status information to report
  71. * @ce_cnt: Correctable error count
  72. * @ue_cnt: Uncorrectable error count
  73. * @ceinfo: Correctable error log information
  74. * @ueinfo: Uncorrectable error log information
  75. */
  76. struct ecc_status {
  77. u32 ce_cnt;
  78. u32 ue_cnt;
  79. struct ecc_error_info ceinfo;
  80. struct ecc_error_info ueinfo;
  81. };
  82. /**
  83. * struct edac_priv - OCM private instance data
  84. * @baseaddr: Base address of the OCM
  85. * @message: Buffer for framing the event specific info
  86. * @stat: ECC status information
  87. * @ce_cnt: Correctable Error count
  88. * @ue_cnt: Uncorrectable Error count
  89. * @debugfs_dir: Directory entry for debugfs
  90. * @ce_bitpos: Bit position for Correctable Error
  91. * @ue_bitpos: Array to store UnCorrectable Error bit positions
  92. * @fault_injection_cnt: Fault Injection Counter value
  93. */
  94. struct edac_priv {
  95. void __iomem *baseaddr;
  96. char message[ZYNQMP_OCM_EDAC_MSG_SIZE];
  97. struct ecc_status stat;
  98. u32 ce_cnt;
  99. u32 ue_cnt;
  100. #ifdef CONFIG_EDAC_DEBUG
  101. struct dentry *debugfs_dir;
  102. u8 ce_bitpos;
  103. u8 ue_bitpos[OCM_NUM_UE_BITPOS];
  104. u32 fault_injection_cnt;
  105. #endif
  106. };
  107. /**
  108. * get_error_info - Get the current ECC error info
  109. * @base: Pointer to the base address of the OCM
  110. * @p: Pointer to the OCM ECC status structure
  111. * @mask: Status register mask value
  112. *
  113. * Determines there is any ECC error or not
  114. *
  115. */
  116. static void get_error_info(void __iomem *base, struct ecc_status *p, int mask)
  117. {
  118. if (mask & OCM_CEINTR_MASK) {
  119. p->ce_cnt++;
  120. p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST);
  121. p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST);
  122. p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST));
  123. writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST);
  124. } else if (mask & OCM_UEINTR_MASK) {
  125. p->ue_cnt++;
  126. p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST);
  127. p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST);
  128. p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST));
  129. writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST);
  130. }
  131. }
  132. /**
  133. * handle_error - Handle error types CE and UE
  134. * @dci: Pointer to the EDAC device instance
  135. * @p: Pointer to the OCM ECC status structure
  136. *
  137. * Handles correctable and uncorrectable errors.
  138. */
  139. static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p)
  140. {
  141. struct edac_priv *priv = dci->pvt_info;
  142. struct ecc_error_info *pinf;
  143. if (p->ce_cnt) {
  144. pinf = &p->ceinfo;
  145. snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
  146. "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
  147. "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
  148. edac_device_handle_ce(dci, 0, 0, priv->message);
  149. }
  150. if (p->ue_cnt) {
  151. pinf = &p->ueinfo;
  152. snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
  153. "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
  154. "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
  155. edac_device_handle_ue(dci, 0, 0, priv->message);
  156. }
  157. memset(p, 0, sizeof(*p));
  158. }
  159. /**
  160. * intr_handler - ISR routine
  161. * @irq: irq number
  162. * @dev_id: device id pointer
  163. *
  164. * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise
  165. */
  166. static irqreturn_t intr_handler(int irq, void *dev_id)
  167. {
  168. struct edac_device_ctl_info *dci = dev_id;
  169. struct edac_priv *priv = dci->pvt_info;
  170. int regval;
  171. regval = readl(priv->baseaddr + OCM_ISR_OFST);
  172. if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) {
  173. WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval);
  174. return IRQ_NONE;
  175. }
  176. get_error_info(priv->baseaddr, &priv->stat, regval);
  177. priv->ce_cnt += priv->stat.ce_cnt;
  178. priv->ue_cnt += priv->stat.ue_cnt;
  179. handle_error(dci, &priv->stat);
  180. return IRQ_HANDLED;
  181. }
  182. /**
  183. * get_eccstate - Return the ECC status
  184. * @base: Pointer to the OCM base address
  185. *
  186. * Get the ECC enable/disable status
  187. *
  188. * Return: ECC status 0/1.
  189. */
  190. static bool get_eccstate(void __iomem *base)
  191. {
  192. return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK;
  193. }
  194. #ifdef CONFIG_EDAC_DEBUG
  195. /**
  196. * write_fault_count - write fault injection count
  197. * @priv: Pointer to the EDAC private struct
  198. *
  199. * Update the fault injection count register, once the counter reaches
  200. * zero, it injects errors
  201. */
  202. static void write_fault_count(struct edac_priv *priv)
  203. {
  204. u32 ficount = priv->fault_injection_cnt;
  205. if (ficount & ~OCM_FICOUNT_MASK) {
  206. ficount &= OCM_FICOUNT_MASK;
  207. edac_printk(KERN_INFO, EDAC_DEVICE,
  208. "Fault injection count value truncated to %d\n", ficount);
  209. }
  210. writel(ficount, priv->baseaddr + OCM_FIC_OFST);
  211. }
  212. /*
  213. * To get the Correctable Error injected, the following steps are needed:
  214. * - Setup the optional Fault Injection Count:
  215. * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
  216. * - Write the Correctable Error bit position value:
  217. * echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos
  218. */
  219. static ssize_t inject_ce_write(struct file *file, const char __user *data,
  220. size_t count, loff_t *ppos)
  221. {
  222. struct edac_device_ctl_info *edac_dev = file->private_data;
  223. struct edac_priv *priv = edac_dev->pvt_info;
  224. int ret;
  225. if (!data)
  226. return -EFAULT;
  227. ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos);
  228. if (ret)
  229. return ret;
  230. if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER)
  231. return -EINVAL;
  232. if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) {
  233. writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST);
  234. writel(0, priv->baseaddr + OCM_FID1_OFST);
  235. } else {
  236. writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER),
  237. priv->baseaddr + OCM_FID1_OFST);
  238. writel(0, priv->baseaddr + OCM_FID0_OFST);
  239. }
  240. write_fault_count(priv);
  241. return count;
  242. }
  243. static const struct file_operations inject_ce_fops = {
  244. .open = simple_open,
  245. .write = inject_ce_write,
  246. .llseek = generic_file_llseek,
  247. };
  248. /*
  249. * To get the Uncorrectable Error injected, the following steps are needed:
  250. * - Setup the optional Fault Injection Count:
  251. * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
  252. * - Write the Uncorrectable Error bit position values:
  253. * echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos
  254. */
  255. static ssize_t inject_ue_write(struct file *file, const char __user *data,
  256. size_t count, loff_t *ppos)
  257. {
  258. struct edac_device_ctl_info *edac_dev = file->private_data;
  259. struct edac_priv *priv = edac_dev->pvt_info;
  260. char buf[6], *pbuf, *token[2];
  261. u64 ue_bitpos;
  262. int i, ret;
  263. u8 len;
  264. if (!data)
  265. return -EFAULT;
  266. len = min_t(size_t, count, sizeof(buf));
  267. if (copy_from_user(buf, data, len))
  268. return -EFAULT;
  269. buf[len] = '\0';
  270. pbuf = &buf[0];
  271. for (i = 0; i < OCM_NUM_UE_BITPOS; i++)
  272. token[i] = strsep(&pbuf, ",");
  273. ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]);
  274. if (ret)
  275. return ret;
  276. ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]);
  277. if (ret)
  278. return ret;
  279. if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER ||
  280. priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER)
  281. return -EINVAL;
  282. if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) {
  283. edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n");
  284. return -EINVAL;
  285. }
  286. ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]);
  287. writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST);
  288. writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST);
  289. write_fault_count(priv);
  290. return count;
  291. }
  292. static const struct file_operations inject_ue_fops = {
  293. .open = simple_open,
  294. .write = inject_ue_write,
  295. .llseek = generic_file_llseek,
  296. };
  297. static void setup_debugfs(struct edac_device_ctl_info *edac_dev)
  298. {
  299. struct edac_priv *priv = edac_dev->pvt_info;
  300. priv->debugfs_dir = edac_debugfs_create_dir("ocm");
  301. if (!priv->debugfs_dir)
  302. return;
  303. edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir,
  304. &priv->fault_injection_cnt);
  305. edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir,
  306. edac_dev, &inject_ue_fops);
  307. edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir,
  308. edac_dev, &inject_ce_fops);
  309. }
  310. #endif
  311. static int edac_probe(struct platform_device *pdev)
  312. {
  313. struct edac_device_ctl_info *dci;
  314. struct edac_priv *priv;
  315. void __iomem *baseaddr;
  316. struct resource *res;
  317. int irq, ret;
  318. baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
  319. if (IS_ERR(baseaddr))
  320. return PTR_ERR(baseaddr);
  321. if (!get_eccstate(baseaddr)) {
  322. edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n");
  323. return -ENXIO;
  324. }
  325. dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING,
  326. 1, ZYNQMP_OCM_EDAC_STRING, 1, 0,
  327. edac_device_alloc_index());
  328. if (!dci)
  329. return -ENOMEM;
  330. priv = dci->pvt_info;
  331. platform_set_drvdata(pdev, dci);
  332. dci->dev = &pdev->dev;
  333. priv->baseaddr = baseaddr;
  334. dci->mod_name = pdev->dev.driver->name;
  335. dci->ctl_name = ZYNQMP_OCM_EDAC_STRING;
  336. dci->dev_name = dev_name(&pdev->dev);
  337. irq = platform_get_irq(pdev, 0);
  338. if (irq < 0) {
  339. ret = irq;
  340. goto free_dev_ctl;
  341. }
  342. ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0,
  343. dev_name(&pdev->dev), dci);
  344. if (ret) {
  345. edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n");
  346. goto free_dev_ctl;
  347. }
  348. /* Enable UE, CE interrupts */
  349. writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST);
  350. #ifdef CONFIG_EDAC_DEBUG
  351. setup_debugfs(dci);
  352. #endif
  353. ret = edac_device_add_device(dci);
  354. if (ret)
  355. goto free_dev_ctl;
  356. return 0;
  357. free_dev_ctl:
  358. edac_device_free_ctl_info(dci);
  359. return ret;
  360. }
  361. static void edac_remove(struct platform_device *pdev)
  362. {
  363. struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
  364. struct edac_priv *priv = dci->pvt_info;
  365. /* Disable UE, CE interrupts */
  366. writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST);
  367. #ifdef CONFIG_EDAC_DEBUG
  368. debugfs_remove_recursive(priv->debugfs_dir);
  369. #endif
  370. edac_device_del_device(&pdev->dev);
  371. edac_device_free_ctl_info(dci);
  372. }
  373. static const struct of_device_id zynqmp_ocm_edac_match[] = {
  374. { .compatible = "xlnx,zynqmp-ocmc-1.0"},
  375. { /* end of table */ }
  376. };
  377. MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match);
  378. static struct platform_driver zynqmp_ocm_edac_driver = {
  379. .driver = {
  380. .name = "zynqmp-ocm-edac",
  381. .of_match_table = zynqmp_ocm_edac_match,
  382. },
  383. .probe = edac_probe,
  384. .remove_new = edac_remove,
  385. };
  386. module_platform_driver(zynqmp_ocm_edac_driver);
  387. MODULE_AUTHOR("Advanced Micro Devices, Inc");
  388. MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver");
  389. MODULE_LICENSE("GPL");