thunderx_edac.c 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146
  1. /*
  2. * Cavium ThunderX memory controller kernel module
  3. *
  4. * This file is subject to the terms and conditions of the GNU General Public
  5. * License. See the file "COPYING" in the main directory of this archive
  6. * for more details.
  7. *
  8. * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
  9. *
  10. */
  11. #include <linux/module.h>
  12. #include <linux/pci.h>
  13. #include <linux/edac.h>
  14. #include <linux/interrupt.h>
  15. #include <linux/string.h>
  16. #include <linux/stop_machine.h>
  17. #include <linux/delay.h>
  18. #include <linux/sizes.h>
  19. #include <linux/atomic.h>
  20. #include <linux/bitfield.h>
  21. #include <linux/circ_buf.h>
  22. #include <asm/page.h>
  23. #include "edac_module.h"
  24. #define phys_to_pfn(phys) (PFN_DOWN(phys))
  25. #define THUNDERX_NODE GENMASK(45, 44)
  26. enum {
  27. ERR_CORRECTED = 1,
  28. ERR_UNCORRECTED = 2,
  29. ERR_UNKNOWN = 3,
  30. };
  31. struct error_descr {
  32. int type;
  33. u64 mask;
  34. char *descr;
  35. };
  36. static void decode_register(char *str, size_t size,
  37. const struct error_descr *descr,
  38. const uint64_t reg)
  39. {
  40. int ret = 0;
  41. while (descr->type && descr->mask && descr->descr) {
  42. if (reg & descr->mask) {
  43. ret = snprintf(str, size, "\n\t%s, %s",
  44. descr->type == ERR_CORRECTED ?
  45. "Corrected" : "Uncorrected",
  46. descr->descr);
  47. str += ret;
  48. size -= ret;
  49. }
  50. descr++;
  51. }
  52. }
  53. static unsigned long get_bits(unsigned long data, int pos, int width)
  54. {
  55. return (data >> pos) & ((1 << width) - 1);
  56. }
  57. #define L2C_CTL 0x87E080800000
  58. #define L2C_CTL_DISIDXALIAS BIT(0)
  59. #define PCI_DEVICE_ID_THUNDER_LMC 0xa022
  60. #define LMC_FADR 0x20
  61. #define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1)
  62. #define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1)
  63. #define LMC_FADR_FBANK(x) ((x >> 32) & 0xf)
  64. #define LMC_FADR_FROW(x) ((x >> 14) & 0xffff)
  65. #define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff)
  66. #define LMC_NXM_FADR 0x28
  67. #define LMC_ECC_SYND 0x38
  68. #define LMC_ECC_PARITY_TEST 0x108
  69. #define LMC_INT_W1S 0x150
  70. #define LMC_INT_ENA_W1C 0x158
  71. #define LMC_INT_ENA_W1S 0x160
  72. #define LMC_CONFIG 0x188
  73. #define LMC_CONFIG_BG2 BIT(62)
  74. #define LMC_CONFIG_RANK_ENA BIT(42)
  75. #define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF)
  76. #define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7)
  77. #define LMC_CONTROL 0x190
  78. #define LMC_CONTROL_XOR_BANK BIT(16)
  79. #define LMC_INT 0x1F0
  80. #define LMC_INT_DDR_ERR BIT(11)
  81. #define LMC_INT_DED_ERR (0xFUL << 5)
  82. #define LMC_INT_SEC_ERR (0xFUL << 1)
  83. #define LMC_INT_NXM_WR_MASK BIT(0)
  84. #define LMC_DDR_PLL_CTL 0x258
  85. #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
  86. #define LMC_FADR_SCRAMBLED 0x330
  87. #define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
  88. LMC_INT_NXM_WR_MASK)
  89. #define LMC_INT_CE (LMC_INT_SEC_ERR)
  90. static const struct error_descr lmc_errors[] = {
  91. {
  92. .type = ERR_CORRECTED,
  93. .mask = LMC_INT_SEC_ERR,
  94. .descr = "Single-bit ECC error",
  95. },
  96. {
  97. .type = ERR_UNCORRECTED,
  98. .mask = LMC_INT_DDR_ERR,
  99. .descr = "DDR chip error",
  100. },
  101. {
  102. .type = ERR_UNCORRECTED,
  103. .mask = LMC_INT_DED_ERR,
  104. .descr = "Double-bit ECC error",
  105. },
  106. {
  107. .type = ERR_UNCORRECTED,
  108. .mask = LMC_INT_NXM_WR_MASK,
  109. .descr = "Non-existent memory write",
  110. },
  111. {0, 0, NULL},
  112. };
  113. #define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
  114. #define LMC_INT_EN_DLCRAM_DED_ERR BIT(4)
  115. #define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3)
  116. #define LMC_INT_INTR_DED_ENA BIT(2)
  117. #define LMC_INT_INTR_SEC_ENA BIT(1)
  118. #define LMC_INT_INTR_NXM_WR_ENA BIT(0)
  119. #define LMC_INT_ENA_ALL GENMASK(5, 0)
  120. #define LMC_DDR_PLL_CTL 0x258
  121. #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
  122. #define LMC_CONTROL 0x190
  123. #define LMC_CONTROL_RDIMM BIT(0)
  124. #define LMC_SCRAM_FADR 0x330
  125. #define LMC_CHAR_MASK0 0x228
  126. #define LMC_CHAR_MASK2 0x238
  127. #define RING_ENTRIES 8
  128. struct debugfs_entry {
  129. const char *name;
  130. umode_t mode;
  131. const struct file_operations fops;
  132. };
  133. struct lmc_err_ctx {
  134. u64 reg_int;
  135. u64 reg_fadr;
  136. u64 reg_nxm_fadr;
  137. u64 reg_scram_fadr;
  138. u64 reg_ecc_synd;
  139. };
  140. struct thunderx_lmc {
  141. void __iomem *regs;
  142. struct pci_dev *pdev;
  143. struct msix_entry msix_ent;
  144. atomic_t ecc_int;
  145. u64 mask0;
  146. u64 mask2;
  147. u64 parity_test;
  148. u64 node;
  149. int xbits;
  150. int bank_width;
  151. int pbank_lsb;
  152. int dimm_lsb;
  153. int rank_lsb;
  154. int bank_lsb;
  155. int row_lsb;
  156. int col_hi_lsb;
  157. int xor_bank;
  158. int l2c_alias;
  159. struct page *mem;
  160. struct lmc_err_ctx err_ctx[RING_ENTRIES];
  161. unsigned long ring_head;
  162. unsigned long ring_tail;
  163. };
  164. #define ring_pos(pos, size) ((pos) & (size - 1))
  165. #define DEBUGFS_STRUCT(_name, _mode, _write, _read) \
  166. static struct debugfs_entry debugfs_##_name = { \
  167. .name = __stringify(_name), \
  168. .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
  169. .fops = { \
  170. .open = simple_open, \
  171. .write = _write, \
  172. .read = _read, \
  173. .llseek = generic_file_llseek, \
  174. }, \
  175. }
  176. #define DEBUGFS_FIELD_ATTR(_type, _field) \
  177. static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \
  178. char __user *data, \
  179. size_t count, loff_t *ppos) \
  180. { \
  181. struct thunderx_##_type *pdata = file->private_data; \
  182. char buf[20]; \
  183. \
  184. snprintf(buf, count, "0x%016llx", pdata->_field); \
  185. return simple_read_from_buffer(data, count, ppos, \
  186. buf, sizeof(buf)); \
  187. } \
  188. \
  189. static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \
  190. const char __user *data, \
  191. size_t count, loff_t *ppos) \
  192. { \
  193. struct thunderx_##_type *pdata = file->private_data; \
  194. int res; \
  195. \
  196. res = kstrtoull_from_user(data, count, 0, &pdata->_field); \
  197. \
  198. return res ? res : count; \
  199. } \
  200. \
  201. DEBUGFS_STRUCT(_field, 0600, \
  202. thunderx_##_type##_##_field##_write, \
  203. thunderx_##_type##_##_field##_read) \
  204. #define DEBUGFS_REG_ATTR(_type, _name, _reg) \
  205. static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \
  206. char __user *data, \
  207. size_t count, loff_t *ppos) \
  208. { \
  209. struct thunderx_##_type *pdata = file->private_data; \
  210. char buf[20]; \
  211. \
  212. sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \
  213. return simple_read_from_buffer(data, count, ppos, \
  214. buf, sizeof(buf)); \
  215. } \
  216. \
  217. static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \
  218. const char __user *data, \
  219. size_t count, loff_t *ppos) \
  220. { \
  221. struct thunderx_##_type *pdata = file->private_data; \
  222. u64 val; \
  223. int res; \
  224. \
  225. res = kstrtoull_from_user(data, count, 0, &val); \
  226. \
  227. if (!res) { \
  228. writeq(val, pdata->regs + _reg); \
  229. res = count; \
  230. } \
  231. \
  232. return res; \
  233. } \
  234. \
  235. DEBUGFS_STRUCT(_name, 0600, \
  236. thunderx_##_type##_##_name##_write, \
  237. thunderx_##_type##_##_name##_read)
  238. #define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field)
  239. /*
  240. * To get an ECC error injected, the following steps are needed:
  241. * - Setup the ECC injection by writing the appropriate parameters:
  242. * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
  243. * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
  244. * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
  245. * - Do the actual injection:
  246. * echo 1 > /sys/kernel/debug/<device number>/inject_ecc
  247. */
  248. static ssize_t thunderx_lmc_inject_int_write(struct file *file,
  249. const char __user *data,
  250. size_t count, loff_t *ppos)
  251. {
  252. struct thunderx_lmc *lmc = file->private_data;
  253. u64 val;
  254. int res;
  255. res = kstrtoull_from_user(data, count, 0, &val);
  256. if (!res) {
  257. /* Trigger the interrupt */
  258. writeq(val, lmc->regs + LMC_INT_W1S);
  259. res = count;
  260. }
  261. return res;
  262. }
  263. static ssize_t thunderx_lmc_int_read(struct file *file,
  264. char __user *data,
  265. size_t count, loff_t *ppos)
  266. {
  267. struct thunderx_lmc *lmc = file->private_data;
  268. char buf[20];
  269. u64 lmc_int = readq(lmc->regs + LMC_INT);
  270. snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
  271. return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
  272. }
  273. #define TEST_PATTERN 0xa5
  274. static int inject_ecc_fn(void *arg)
  275. {
  276. struct thunderx_lmc *lmc = arg;
  277. uintptr_t addr, phys;
  278. unsigned int cline_size = cache_line_size();
  279. const unsigned int lines = PAGE_SIZE / cline_size;
  280. unsigned int i, cl_idx;
  281. addr = (uintptr_t)page_address(lmc->mem);
  282. phys = (uintptr_t)page_to_phys(lmc->mem);
  283. cl_idx = (phys & 0x7f) >> 4;
  284. lmc->parity_test &= ~(7ULL << 8);
  285. lmc->parity_test |= (cl_idx << 8);
  286. writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
  287. writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
  288. writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
  289. readq(lmc->regs + LMC_CHAR_MASK0);
  290. readq(lmc->regs + LMC_CHAR_MASK2);
  291. readq(lmc->regs + LMC_ECC_PARITY_TEST);
  292. for (i = 0; i < lines; i++) {
  293. memset((void *)addr, TEST_PATTERN, cline_size);
  294. barrier();
  295. /*
  296. * Flush L1 cachelines to the PoC (L2).
  297. * This will cause cacheline eviction to the L2.
  298. */
  299. asm volatile("dc civac, %0\n"
  300. "dsb sy\n"
  301. : : "r"(addr + i * cline_size));
  302. }
  303. for (i = 0; i < lines; i++) {
  304. /*
  305. * Flush L2 cachelines to the DRAM.
  306. * This will cause cacheline eviction to the DRAM
  307. * and ECC corruption according to the masks set.
  308. */
  309. __asm__ volatile("sys #0,c11,C1,#2, %0\n"
  310. : : "r"(phys + i * cline_size));
  311. }
  312. for (i = 0; i < lines; i++) {
  313. /*
  314. * Invalidate L2 cachelines.
  315. * The subsequent load will cause cacheline fetch
  316. * from the DRAM and an error interrupt
  317. */
  318. __asm__ volatile("sys #0,c11,C1,#1, %0"
  319. : : "r"(phys + i * cline_size));
  320. }
  321. for (i = 0; i < lines; i++) {
  322. /*
  323. * Invalidate L1 cachelines.
  324. * The subsequent load will cause cacheline fetch
  325. * from the L2 and/or DRAM
  326. */
  327. asm volatile("dc ivac, %0\n"
  328. "dsb sy\n"
  329. : : "r"(addr + i * cline_size));
  330. }
  331. return 0;
  332. }
  333. static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
  334. const char __user *data,
  335. size_t count, loff_t *ppos)
  336. {
  337. struct thunderx_lmc *lmc = file->private_data;
  338. unsigned int cline_size = cache_line_size();
  339. u8 *tmp;
  340. void __iomem *addr;
  341. unsigned int offs, timeout = 100000;
  342. atomic_set(&lmc->ecc_int, 0);
  343. lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
  344. if (!lmc->mem)
  345. return -ENOMEM;
  346. tmp = kmalloc(cline_size, GFP_KERNEL);
  347. if (!tmp) {
  348. __free_pages(lmc->mem, 0);
  349. return -ENOMEM;
  350. }
  351. addr = page_address(lmc->mem);
  352. while (!atomic_read(&lmc->ecc_int) && timeout--) {
  353. stop_machine(inject_ecc_fn, lmc, NULL);
  354. for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
  355. /*
  356. * Do a load from the previously rigged location
  357. * This should generate an error interrupt.
  358. */
  359. memcpy(tmp, addr + offs, cline_size);
  360. asm volatile("dsb ld\n");
  361. }
  362. }
  363. kfree(tmp);
  364. __free_pages(lmc->mem, 0);
  365. return count;
  366. }
  367. LMC_DEBUGFS_ENT(mask0);
  368. LMC_DEBUGFS_ENT(mask2);
  369. LMC_DEBUGFS_ENT(parity_test);
  370. DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
  371. DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
  372. DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
  373. static struct debugfs_entry *lmc_dfs_ents[] = {
  374. &debugfs_mask0,
  375. &debugfs_mask2,
  376. &debugfs_parity_test,
  377. &debugfs_inject_ecc,
  378. &debugfs_inject_int,
  379. &debugfs_int_w1c,
  380. };
  381. static int thunderx_create_debugfs_nodes(struct dentry *parent,
  382. struct debugfs_entry *attrs[],
  383. void *data,
  384. size_t num)
  385. {
  386. int i;
  387. struct dentry *ent;
  388. if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
  389. return 0;
  390. if (!parent)
  391. return -ENOENT;
  392. for (i = 0; i < num; i++) {
  393. ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
  394. parent, data, &attrs[i]->fops);
  395. if (IS_ERR(ent))
  396. break;
  397. }
  398. return i;
  399. }
  400. static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
  401. {
  402. phys_addr_t addr = 0;
  403. int bank, xbits;
  404. addr |= lmc->node << 40;
  405. addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
  406. addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
  407. addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
  408. addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
  409. bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
  410. if (lmc->xor_bank)
  411. bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
  412. addr |= bank << lmc->bank_lsb;
  413. xbits = PCI_FUNC(lmc->pdev->devfn);
  414. if (lmc->l2c_alias)
  415. xbits ^= get_bits(addr, 20, lmc->xbits) ^
  416. get_bits(addr, 12, lmc->xbits);
  417. addr |= xbits << 7;
  418. return addr;
  419. }
  420. static unsigned int thunderx_get_num_lmcs(unsigned int node)
  421. {
  422. unsigned int number = 0;
  423. struct pci_dev *pdev = NULL;
  424. do {
  425. pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
  426. PCI_DEVICE_ID_THUNDER_LMC,
  427. pdev);
  428. if (pdev) {
  429. #ifdef CONFIG_NUMA
  430. if (pdev->dev.numa_node == node)
  431. number++;
  432. #else
  433. number++;
  434. #endif
  435. }
  436. } while (pdev);
  437. return number;
  438. }
  439. #define LMC_MESSAGE_SIZE 120
  440. #define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors))
  441. static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
  442. {
  443. struct mem_ctl_info *mci = dev_id;
  444. struct thunderx_lmc *lmc = mci->pvt_info;
  445. unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
  446. struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
  447. writeq(0, lmc->regs + LMC_CHAR_MASK0);
  448. writeq(0, lmc->regs + LMC_CHAR_MASK2);
  449. writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
  450. ctx->reg_int = readq(lmc->regs + LMC_INT);
  451. ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
  452. ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
  453. ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
  454. ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
  455. lmc->ring_head++;
  456. atomic_set(&lmc->ecc_int, 1);
  457. /* Clear the interrupt */
  458. writeq(ctx->reg_int, lmc->regs + LMC_INT);
  459. return IRQ_WAKE_THREAD;
  460. }
  461. static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
  462. {
  463. struct mem_ctl_info *mci = dev_id;
  464. struct thunderx_lmc *lmc = mci->pvt_info;
  465. phys_addr_t phys_addr;
  466. unsigned long tail;
  467. struct lmc_err_ctx *ctx;
  468. irqreturn_t ret = IRQ_NONE;
  469. char *msg;
  470. char *other;
  471. msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
  472. other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
  473. if (!msg || !other)
  474. goto err_free;
  475. while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
  476. ARRAY_SIZE(lmc->err_ctx))) {
  477. tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
  478. ctx = &lmc->err_ctx[tail];
  479. dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
  480. ctx->reg_int);
  481. dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
  482. ctx->reg_fadr);
  483. dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
  484. ctx->reg_nxm_fadr);
  485. dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
  486. ctx->reg_scram_fadr);
  487. dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
  488. ctx->reg_ecc_synd);
  489. snprintf(msg, LMC_MESSAGE_SIZE,
  490. "DIMM %lld rank %lld bank %lld row %lld col %lld",
  491. LMC_FADR_FDIMM(ctx->reg_scram_fadr),
  492. LMC_FADR_FBUNK(ctx->reg_scram_fadr),
  493. LMC_FADR_FBANK(ctx->reg_scram_fadr),
  494. LMC_FADR_FROW(ctx->reg_scram_fadr),
  495. LMC_FADR_FCOL(ctx->reg_scram_fadr));
  496. decode_register(other, LMC_OTHER_SIZE, lmc_errors,
  497. ctx->reg_int);
  498. phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
  499. if (ctx->reg_int & LMC_INT_UE)
  500. edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
  501. phys_to_pfn(phys_addr),
  502. offset_in_page(phys_addr),
  503. 0, -1, -1, -1, msg, other);
  504. else if (ctx->reg_int & LMC_INT_CE)
  505. edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
  506. phys_to_pfn(phys_addr),
  507. offset_in_page(phys_addr),
  508. 0, -1, -1, -1, msg, other);
  509. lmc->ring_tail++;
  510. }
  511. ret = IRQ_HANDLED;
  512. err_free:
  513. kfree(msg);
  514. kfree(other);
  515. return ret;
  516. }
  517. static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
  518. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
  519. { 0, },
  520. };
  521. static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
  522. {
  523. int node = dev_to_node(&pdev->dev);
  524. int ret = PCI_FUNC(pdev->devfn);
  525. ret += max(node, 0) << 3;
  526. return ret;
  527. }
  528. static int thunderx_lmc_probe(struct pci_dev *pdev,
  529. const struct pci_device_id *id)
  530. {
  531. struct thunderx_lmc *lmc;
  532. struct edac_mc_layer layer;
  533. struct mem_ctl_info *mci;
  534. u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
  535. int ret;
  536. u64 lmc_int;
  537. void *l2c_ioaddr;
  538. layer.type = EDAC_MC_LAYER_SLOT;
  539. layer.size = 2;
  540. layer.is_virt_csrow = false;
  541. ret = pcim_enable_device(pdev);
  542. if (ret) {
  543. dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
  544. return ret;
  545. }
  546. ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
  547. if (ret) {
  548. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  549. return ret;
  550. }
  551. mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
  552. sizeof(struct thunderx_lmc));
  553. if (!mci)
  554. return -ENOMEM;
  555. mci->pdev = &pdev->dev;
  556. lmc = mci->pvt_info;
  557. pci_set_drvdata(pdev, mci);
  558. lmc->regs = pcim_iomap_table(pdev)[0];
  559. lmc_control = readq(lmc->regs + LMC_CONTROL);
  560. lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
  561. lmc_config = readq(lmc->regs + LMC_CONFIG);
  562. if (lmc_control & LMC_CONTROL_RDIMM) {
  563. mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
  564. lmc_ddr_pll_ctl) ?
  565. MEM_RDDR4 : MEM_RDDR3;
  566. } else {
  567. mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
  568. lmc_ddr_pll_ctl) ?
  569. MEM_DDR4 : MEM_DDR3;
  570. }
  571. mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
  572. mci->edac_cap = EDAC_FLAG_SECDED;
  573. mci->mod_name = "thunderx-lmc";
  574. mci->ctl_name = "thunderx-lmc";
  575. mci->dev_name = dev_name(&pdev->dev);
  576. mci->scrub_mode = SCRUB_NONE;
  577. lmc->pdev = pdev;
  578. lmc->msix_ent.entry = 0;
  579. lmc->ring_head = 0;
  580. lmc->ring_tail = 0;
  581. ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
  582. if (ret) {
  583. dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
  584. goto err_free;
  585. }
  586. ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
  587. thunderx_lmc_err_isr,
  588. thunderx_lmc_threaded_isr, 0,
  589. "[EDAC] ThunderX LMC", mci);
  590. if (ret) {
  591. dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
  592. goto err_free;
  593. }
  594. lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
  595. lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
  596. lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
  597. FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
  598. lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
  599. lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits;
  600. lmc->rank_lsb = lmc->dimm_lsb;
  601. lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
  602. lmc->bank_lsb = 7 + lmc->xbits;
  603. lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
  604. lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
  605. lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
  606. l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE);
  607. if (!l2c_ioaddr) {
  608. dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
  609. ret = -ENOMEM;
  610. goto err_free;
  611. }
  612. lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
  613. iounmap(l2c_ioaddr);
  614. ret = edac_mc_add_mc(mci);
  615. if (ret) {
  616. dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
  617. goto err_free;
  618. }
  619. lmc_int = readq(lmc->regs + LMC_INT);
  620. writeq(lmc_int, lmc->regs + LMC_INT);
  621. writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
  622. if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
  623. ret = thunderx_create_debugfs_nodes(mci->debugfs,
  624. lmc_dfs_ents,
  625. lmc,
  626. ARRAY_SIZE(lmc_dfs_ents));
  627. if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
  628. dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
  629. ret, ret >= 0 ? " created" : "");
  630. }
  631. }
  632. return 0;
  633. err_free:
  634. pci_set_drvdata(pdev, NULL);
  635. edac_mc_free(mci);
  636. return ret;
  637. }
  638. static void thunderx_lmc_remove(struct pci_dev *pdev)
  639. {
  640. struct mem_ctl_info *mci = pci_get_drvdata(pdev);
  641. struct thunderx_lmc *lmc = mci->pvt_info;
  642. writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
  643. edac_mc_del_mc(&pdev->dev);
  644. edac_mc_free(mci);
  645. }
  646. MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
  647. static struct pci_driver thunderx_lmc_driver = {
  648. .name = "thunderx_lmc_edac",
  649. .probe = thunderx_lmc_probe,
  650. .remove = thunderx_lmc_remove,
  651. .id_table = thunderx_lmc_pci_tbl,
  652. };
  653. /*---------------------- OCX driver ---------------------------------*/
  654. #define PCI_DEVICE_ID_THUNDER_OCX 0xa013
  655. #define OCX_LINK_INTS 3
  656. #define OCX_INTS (OCX_LINK_INTS + 1)
  657. #define OCX_RX_LANES 24
  658. #define OCX_RX_LANE_STATS 15
  659. #define OCX_COM_INT 0x100
  660. #define OCX_COM_INT_W1S 0x108
  661. #define OCX_COM_INT_ENA_W1S 0x110
  662. #define OCX_COM_INT_ENA_W1C 0x118
  663. #define OCX_COM_IO_BADID BIT(54)
  664. #define OCX_COM_MEM_BADID BIT(53)
  665. #define OCX_COM_COPR_BADID BIT(52)
  666. #define OCX_COM_WIN_REQ_BADID BIT(51)
  667. #define OCX_COM_WIN_REQ_TOUT BIT(50)
  668. #define OCX_COM_RX_LANE GENMASK(23, 0)
  669. #define OCX_COM_INT_CE (OCX_COM_IO_BADID | \
  670. OCX_COM_MEM_BADID | \
  671. OCX_COM_COPR_BADID | \
  672. OCX_COM_WIN_REQ_BADID | \
  673. OCX_COM_WIN_REQ_TOUT)
  674. static const struct error_descr ocx_com_errors[] = {
  675. {
  676. .type = ERR_CORRECTED,
  677. .mask = OCX_COM_IO_BADID,
  678. .descr = "Invalid IO transaction node ID",
  679. },
  680. {
  681. .type = ERR_CORRECTED,
  682. .mask = OCX_COM_MEM_BADID,
  683. .descr = "Invalid memory transaction node ID",
  684. },
  685. {
  686. .type = ERR_CORRECTED,
  687. .mask = OCX_COM_COPR_BADID,
  688. .descr = "Invalid coprocessor transaction node ID",
  689. },
  690. {
  691. .type = ERR_CORRECTED,
  692. .mask = OCX_COM_WIN_REQ_BADID,
  693. .descr = "Invalid SLI transaction node ID",
  694. },
  695. {
  696. .type = ERR_CORRECTED,
  697. .mask = OCX_COM_WIN_REQ_TOUT,
  698. .descr = "Window/core request timeout",
  699. },
  700. {0, 0, NULL},
  701. };
  702. #define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8)
  703. #define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8)
  704. #define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8)
  705. #define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8)
  706. #define OCX_COM_LINK_BAD_WORD BIT(13)
  707. #define OCX_COM_LINK_ALIGN_FAIL BIT(12)
  708. #define OCX_COM_LINK_ALIGN_DONE BIT(11)
  709. #define OCX_COM_LINK_UP BIT(10)
  710. #define OCX_COM_LINK_STOP BIT(9)
  711. #define OCX_COM_LINK_BLK_ERR BIT(8)
  712. #define OCX_COM_LINK_REINIT BIT(7)
  713. #define OCX_COM_LINK_LNK_DATA BIT(6)
  714. #define OCX_COM_LINK_RXFIFO_DBE BIT(5)
  715. #define OCX_COM_LINK_RXFIFO_SBE BIT(4)
  716. #define OCX_COM_LINK_TXFIFO_DBE BIT(3)
  717. #define OCX_COM_LINK_TXFIFO_SBE BIT(2)
  718. #define OCX_COM_LINK_REPLAY_DBE BIT(1)
  719. #define OCX_COM_LINK_REPLAY_SBE BIT(0)
  720. static const struct error_descr ocx_com_link_errors[] = {
  721. {
  722. .type = ERR_CORRECTED,
  723. .mask = OCX_COM_LINK_REPLAY_SBE,
  724. .descr = "Replay buffer single-bit error",
  725. },
  726. {
  727. .type = ERR_CORRECTED,
  728. .mask = OCX_COM_LINK_TXFIFO_SBE,
  729. .descr = "TX FIFO single-bit error",
  730. },
  731. {
  732. .type = ERR_CORRECTED,
  733. .mask = OCX_COM_LINK_RXFIFO_SBE,
  734. .descr = "RX FIFO single-bit error",
  735. },
  736. {
  737. .type = ERR_CORRECTED,
  738. .mask = OCX_COM_LINK_BLK_ERR,
  739. .descr = "Block code error",
  740. },
  741. {
  742. .type = ERR_CORRECTED,
  743. .mask = OCX_COM_LINK_ALIGN_FAIL,
  744. .descr = "Link alignment failure",
  745. },
  746. {
  747. .type = ERR_CORRECTED,
  748. .mask = OCX_COM_LINK_BAD_WORD,
  749. .descr = "Bad code word",
  750. },
  751. {
  752. .type = ERR_UNCORRECTED,
  753. .mask = OCX_COM_LINK_REPLAY_DBE,
  754. .descr = "Replay buffer double-bit error",
  755. },
  756. {
  757. .type = ERR_UNCORRECTED,
  758. .mask = OCX_COM_LINK_TXFIFO_DBE,
  759. .descr = "TX FIFO double-bit error",
  760. },
  761. {
  762. .type = ERR_UNCORRECTED,
  763. .mask = OCX_COM_LINK_RXFIFO_DBE,
  764. .descr = "RX FIFO double-bit error",
  765. },
  766. {
  767. .type = ERR_UNCORRECTED,
  768. .mask = OCX_COM_LINK_STOP,
  769. .descr = "Link stopped",
  770. },
  771. {0, 0, NULL},
  772. };
  773. #define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \
  774. OCX_COM_LINK_TXFIFO_DBE | \
  775. OCX_COM_LINK_RXFIFO_DBE | \
  776. OCX_COM_LINK_STOP)
  777. #define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \
  778. OCX_COM_LINK_TXFIFO_SBE | \
  779. OCX_COM_LINK_RXFIFO_SBE | \
  780. OCX_COM_LINK_BLK_ERR | \
  781. OCX_COM_LINK_ALIGN_FAIL | \
  782. OCX_COM_LINK_BAD_WORD)
  783. #define OCX_LNE_INT(x) (0x8018 + (x) * 0x100)
  784. #define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100)
  785. #define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100)
  786. #define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100)
  787. #define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8)
  788. #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8)
  789. #define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2)
  790. #define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1)
  791. #define OCX_LNE_CFG_RX_STAT_ENA BIT(0)
  792. #define OCX_LANE_BAD_64B67B BIT(8)
  793. #define OCX_LANE_DSKEW_FIFO_OVFL BIT(5)
  794. #define OCX_LANE_SCRM_SYNC_LOSS BIT(4)
  795. #define OCX_LANE_UKWN_CNTL_WORD BIT(3)
  796. #define OCX_LANE_CRC32_ERR BIT(2)
  797. #define OCX_LANE_BDRY_SYNC_LOSS BIT(1)
  798. #define OCX_LANE_SERDES_LOCK_LOSS BIT(0)
  799. #define OCX_COM_LANE_INT_UE (0)
  800. #define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \
  801. OCX_LANE_BDRY_SYNC_LOSS | \
  802. OCX_LANE_CRC32_ERR | \
  803. OCX_LANE_UKWN_CNTL_WORD | \
  804. OCX_LANE_SCRM_SYNC_LOSS | \
  805. OCX_LANE_DSKEW_FIFO_OVFL | \
  806. OCX_LANE_BAD_64B67B)
  807. static const struct error_descr ocx_lane_errors[] = {
  808. {
  809. .type = ERR_CORRECTED,
  810. .mask = OCX_LANE_SERDES_LOCK_LOSS,
  811. .descr = "RX SerDes lock lost",
  812. },
  813. {
  814. .type = ERR_CORRECTED,
  815. .mask = OCX_LANE_BDRY_SYNC_LOSS,
  816. .descr = "RX word boundary lost",
  817. },
  818. {
  819. .type = ERR_CORRECTED,
  820. .mask = OCX_LANE_CRC32_ERR,
  821. .descr = "CRC32 error",
  822. },
  823. {
  824. .type = ERR_CORRECTED,
  825. .mask = OCX_LANE_UKWN_CNTL_WORD,
  826. .descr = "Unknown control word",
  827. },
  828. {
  829. .type = ERR_CORRECTED,
  830. .mask = OCX_LANE_SCRM_SYNC_LOSS,
  831. .descr = "Scrambler synchronization lost",
  832. },
  833. {
  834. .type = ERR_CORRECTED,
  835. .mask = OCX_LANE_DSKEW_FIFO_OVFL,
  836. .descr = "RX deskew FIFO overflow",
  837. },
  838. {
  839. .type = ERR_CORRECTED,
  840. .mask = OCX_LANE_BAD_64B67B,
  841. .descr = "Bad 64B/67B codeword",
  842. },
  843. {0, 0, NULL},
  844. };
  845. #define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0))
  846. #define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0))
  847. #define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \
  848. GENMASK(9, 7) | GENMASK(5, 0))
  849. #define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000)
  850. #define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000)
  851. struct ocx_com_err_ctx {
  852. u64 reg_com_int;
  853. u64 reg_lane_int[OCX_RX_LANES];
  854. u64 reg_lane_stat11[OCX_RX_LANES];
  855. };
  856. struct ocx_link_err_ctx {
  857. u64 reg_com_link_int;
  858. int link;
  859. };
  860. struct thunderx_ocx {
  861. void __iomem *regs;
  862. int com_link;
  863. struct pci_dev *pdev;
  864. struct edac_device_ctl_info *edac_dev;
  865. struct dentry *debugfs;
  866. struct msix_entry msix_ent[OCX_INTS];
  867. struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
  868. struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
  869. unsigned long com_ring_head;
  870. unsigned long com_ring_tail;
  871. unsigned long link_ring_head;
  872. unsigned long link_ring_tail;
  873. };
  874. #define OCX_MESSAGE_SIZE SZ_1K
  875. #define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors))
  876. /* This handler is threaded */
  877. static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
  878. {
  879. struct msix_entry *msix = irq_id;
  880. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  881. msix_ent[msix->entry]);
  882. int lane;
  883. unsigned long head = ring_pos(ocx->com_ring_head,
  884. ARRAY_SIZE(ocx->com_err_ctx));
  885. struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
  886. ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
  887. for (lane = 0; lane < OCX_RX_LANES; lane++) {
  888. ctx->reg_lane_int[lane] =
  889. readq(ocx->regs + OCX_LNE_INT(lane));
  890. ctx->reg_lane_stat11[lane] =
  891. readq(ocx->regs + OCX_LNE_STAT(lane, 11));
  892. writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
  893. }
  894. writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
  895. ocx->com_ring_head++;
  896. return IRQ_WAKE_THREAD;
  897. }
  898. static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
  899. {
  900. struct msix_entry *msix = irq_id;
  901. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  902. msix_ent[msix->entry]);
  903. irqreturn_t ret = IRQ_NONE;
  904. unsigned long tail;
  905. struct ocx_com_err_ctx *ctx;
  906. int lane;
  907. char *msg;
  908. char *other;
  909. msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
  910. other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
  911. if (!msg || !other)
  912. goto err_free;
  913. while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
  914. ARRAY_SIZE(ocx->com_err_ctx))) {
  915. tail = ring_pos(ocx->com_ring_tail,
  916. ARRAY_SIZE(ocx->com_err_ctx));
  917. ctx = &ocx->com_err_ctx[tail];
  918. snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
  919. ocx->edac_dev->ctl_name, ctx->reg_com_int);
  920. decode_register(other, OCX_OTHER_SIZE,
  921. ocx_com_errors, ctx->reg_com_int);
  922. strlcat(msg, other, OCX_MESSAGE_SIZE);
  923. for (lane = 0; lane < OCX_RX_LANES; lane++)
  924. if (ctx->reg_com_int & BIT(lane)) {
  925. snprintf(other, OCX_OTHER_SIZE,
  926. "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
  927. lane, ctx->reg_lane_int[lane],
  928. lane, ctx->reg_lane_stat11[lane]);
  929. strlcat(msg, other, OCX_MESSAGE_SIZE);
  930. decode_register(other, OCX_OTHER_SIZE,
  931. ocx_lane_errors,
  932. ctx->reg_lane_int[lane]);
  933. strlcat(msg, other, OCX_MESSAGE_SIZE);
  934. }
  935. if (ctx->reg_com_int & OCX_COM_INT_CE)
  936. edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
  937. ocx->com_ring_tail++;
  938. }
  939. ret = IRQ_HANDLED;
  940. err_free:
  941. kfree(other);
  942. kfree(msg);
  943. return ret;
  944. }
  945. static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
  946. {
  947. struct msix_entry *msix = irq_id;
  948. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  949. msix_ent[msix->entry]);
  950. unsigned long head = ring_pos(ocx->link_ring_head,
  951. ARRAY_SIZE(ocx->link_err_ctx));
  952. struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
  953. ctx->link = msix->entry;
  954. ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
  955. writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
  956. ocx->link_ring_head++;
  957. return IRQ_WAKE_THREAD;
  958. }
  959. static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
  960. {
  961. struct msix_entry *msix = irq_id;
  962. struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
  963. msix_ent[msix->entry]);
  964. irqreturn_t ret = IRQ_NONE;
  965. unsigned long tail;
  966. struct ocx_link_err_ctx *ctx;
  967. char *msg;
  968. char *other;
  969. msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
  970. other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
  971. if (!msg || !other)
  972. goto err_free;
  973. while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
  974. ARRAY_SIZE(ocx->link_err_ctx))) {
  975. tail = ring_pos(ocx->link_ring_head,
  976. ARRAY_SIZE(ocx->link_err_ctx));
  977. ctx = &ocx->link_err_ctx[tail];
  978. snprintf(msg, OCX_MESSAGE_SIZE,
  979. "%s: OCX_COM_LINK_INT[%d]: %016llx",
  980. ocx->edac_dev->ctl_name,
  981. ctx->link, ctx->reg_com_link_int);
  982. decode_register(other, OCX_OTHER_SIZE,
  983. ocx_com_link_errors, ctx->reg_com_link_int);
  984. strlcat(msg, other, OCX_MESSAGE_SIZE);
  985. if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
  986. edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
  987. else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
  988. edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
  989. ocx->link_ring_tail++;
  990. }
  991. ret = IRQ_HANDLED;
  992. err_free:
  993. kfree(other);
  994. kfree(msg);
  995. return ret;
  996. }
  997. #define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg)
  998. OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
  999. OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
  1000. OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
  1001. OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
  1002. OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
  1003. OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
  1004. OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
  1005. OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
  1006. OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
  1007. OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
  1008. OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
  1009. OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
  1010. OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
  1011. OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
  1012. OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
  1013. OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
  1014. OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
  1015. OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
  1016. OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
  1017. OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
  1018. OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
  1019. OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
  1020. OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
  1021. OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
  1022. OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
  1023. OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
  1024. OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
  1025. OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
  1026. OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
  1027. OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
  1028. OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
  1029. OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
  1030. OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
  1031. OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
  1032. static struct debugfs_entry *ocx_dfs_ents[] = {
  1033. &debugfs_tlk0_ecc_ctl,
  1034. &debugfs_tlk1_ecc_ctl,
  1035. &debugfs_tlk2_ecc_ctl,
  1036. &debugfs_rlk0_ecc_ctl,
  1037. &debugfs_rlk1_ecc_ctl,
  1038. &debugfs_rlk2_ecc_ctl,
  1039. &debugfs_com_link0_int,
  1040. &debugfs_com_link1_int,
  1041. &debugfs_com_link2_int,
  1042. &debugfs_lne00_badcnt,
  1043. &debugfs_lne01_badcnt,
  1044. &debugfs_lne02_badcnt,
  1045. &debugfs_lne03_badcnt,
  1046. &debugfs_lne04_badcnt,
  1047. &debugfs_lne05_badcnt,
  1048. &debugfs_lne06_badcnt,
  1049. &debugfs_lne07_badcnt,
  1050. &debugfs_lne08_badcnt,
  1051. &debugfs_lne09_badcnt,
  1052. &debugfs_lne10_badcnt,
  1053. &debugfs_lne11_badcnt,
  1054. &debugfs_lne12_badcnt,
  1055. &debugfs_lne13_badcnt,
  1056. &debugfs_lne14_badcnt,
  1057. &debugfs_lne15_badcnt,
  1058. &debugfs_lne16_badcnt,
  1059. &debugfs_lne17_badcnt,
  1060. &debugfs_lne18_badcnt,
  1061. &debugfs_lne19_badcnt,
  1062. &debugfs_lne20_badcnt,
  1063. &debugfs_lne21_badcnt,
  1064. &debugfs_lne22_badcnt,
  1065. &debugfs_lne23_badcnt,
  1066. &debugfs_com_int,
  1067. };
  1068. static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
  1069. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
  1070. { 0, },
  1071. };
  1072. static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
  1073. {
  1074. int lane, stat, cfg;
  1075. for (lane = 0; lane < OCX_RX_LANES; lane++) {
  1076. cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
  1077. cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
  1078. cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
  1079. writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
  1080. for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
  1081. readq(ocx->regs + OCX_LNE_STAT(lane, stat));
  1082. }
  1083. }
  1084. static int thunderx_ocx_probe(struct pci_dev *pdev,
  1085. const struct pci_device_id *id)
  1086. {
  1087. struct thunderx_ocx *ocx;
  1088. struct edac_device_ctl_info *edac_dev;
  1089. char name[32];
  1090. int idx;
  1091. int i;
  1092. int ret;
  1093. u64 reg;
  1094. ret = pcim_enable_device(pdev);
  1095. if (ret) {
  1096. dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
  1097. return ret;
  1098. }
  1099. ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
  1100. if (ret) {
  1101. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  1102. return ret;
  1103. }
  1104. idx = edac_device_alloc_index();
  1105. snprintf(name, sizeof(name), "OCX%d", idx);
  1106. edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
  1107. name, 1, "CCPI", 1, 0, idx);
  1108. if (!edac_dev) {
  1109. dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
  1110. return -ENOMEM;
  1111. }
  1112. ocx = edac_dev->pvt_info;
  1113. ocx->edac_dev = edac_dev;
  1114. ocx->com_ring_head = 0;
  1115. ocx->com_ring_tail = 0;
  1116. ocx->link_ring_head = 0;
  1117. ocx->link_ring_tail = 0;
  1118. ocx->regs = pcim_iomap_table(pdev)[0];
  1119. if (!ocx->regs) {
  1120. dev_err(&pdev->dev, "Cannot map PCI resources\n");
  1121. ret = -ENODEV;
  1122. goto err_free;
  1123. }
  1124. ocx->pdev = pdev;
  1125. for (i = 0; i < OCX_INTS; i++) {
  1126. ocx->msix_ent[i].entry = i;
  1127. ocx->msix_ent[i].vector = 0;
  1128. }
  1129. ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
  1130. if (ret) {
  1131. dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
  1132. goto err_free;
  1133. }
  1134. for (i = 0; i < OCX_INTS; i++) {
  1135. ret = devm_request_threaded_irq(&pdev->dev,
  1136. ocx->msix_ent[i].vector,
  1137. (i == 3) ?
  1138. thunderx_ocx_com_isr :
  1139. thunderx_ocx_lnk_isr,
  1140. (i == 3) ?
  1141. thunderx_ocx_com_threaded_isr :
  1142. thunderx_ocx_lnk_threaded_isr,
  1143. 0, "[EDAC] ThunderX OCX",
  1144. &ocx->msix_ent[i]);
  1145. if (ret)
  1146. goto err_free;
  1147. }
  1148. edac_dev->dev = &pdev->dev;
  1149. edac_dev->dev_name = dev_name(&pdev->dev);
  1150. edac_dev->mod_name = "thunderx-ocx";
  1151. edac_dev->ctl_name = "thunderx-ocx";
  1152. ret = edac_device_add_device(edac_dev);
  1153. if (ret) {
  1154. dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
  1155. goto err_free;
  1156. }
  1157. if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
  1158. ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
  1159. ret = thunderx_create_debugfs_nodes(ocx->debugfs,
  1160. ocx_dfs_ents,
  1161. ocx,
  1162. ARRAY_SIZE(ocx_dfs_ents));
  1163. if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
  1164. dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
  1165. ret, ret >= 0 ? " created" : "");
  1166. }
  1167. }
  1168. pci_set_drvdata(pdev, edac_dev);
  1169. thunderx_ocx_clearstats(ocx);
  1170. for (i = 0; i < OCX_RX_LANES; i++) {
  1171. writeq(OCX_LNE_INT_ENA_ALL,
  1172. ocx->regs + OCX_LNE_INT_EN(i));
  1173. reg = readq(ocx->regs + OCX_LNE_INT(i));
  1174. writeq(reg, ocx->regs + OCX_LNE_INT(i));
  1175. }
  1176. for (i = 0; i < OCX_LINK_INTS; i++) {
  1177. reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
  1178. writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
  1179. writeq(OCX_COM_LINKX_INT_ENA_ALL,
  1180. ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
  1181. }
  1182. reg = readq(ocx->regs + OCX_COM_INT);
  1183. writeq(reg, ocx->regs + OCX_COM_INT);
  1184. writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
  1185. return 0;
  1186. err_free:
  1187. edac_device_free_ctl_info(edac_dev);
  1188. return ret;
  1189. }
  1190. static void thunderx_ocx_remove(struct pci_dev *pdev)
  1191. {
  1192. struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
  1193. struct thunderx_ocx *ocx = edac_dev->pvt_info;
  1194. int i;
  1195. writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
  1196. for (i = 0; i < OCX_INTS; i++) {
  1197. writeq(OCX_COM_LINKX_INT_ENA_ALL,
  1198. ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
  1199. }
  1200. edac_debugfs_remove_recursive(ocx->debugfs);
  1201. edac_device_del_device(&pdev->dev);
  1202. edac_device_free_ctl_info(edac_dev);
  1203. }
  1204. MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
  1205. static struct pci_driver thunderx_ocx_driver = {
  1206. .name = "thunderx_ocx_edac",
  1207. .probe = thunderx_ocx_probe,
  1208. .remove = thunderx_ocx_remove,
  1209. .id_table = thunderx_ocx_pci_tbl,
  1210. };
  1211. /*---------------------- L2C driver ---------------------------------*/
  1212. #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
  1213. #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
  1214. #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
  1215. #define L2C_TAD_INT_W1C 0x40000
  1216. #define L2C_TAD_INT_W1S 0x40008
  1217. #define L2C_TAD_INT_ENA_W1C 0x40020
  1218. #define L2C_TAD_INT_ENA_W1S 0x40028
  1219. #define L2C_TAD_INT_L2DDBE BIT(1)
  1220. #define L2C_TAD_INT_SBFSBE BIT(2)
  1221. #define L2C_TAD_INT_SBFDBE BIT(3)
  1222. #define L2C_TAD_INT_FBFSBE BIT(4)
  1223. #define L2C_TAD_INT_FBFDBE BIT(5)
  1224. #define L2C_TAD_INT_TAGDBE BIT(9)
  1225. #define L2C_TAD_INT_RDDISLMC BIT(15)
  1226. #define L2C_TAD_INT_WRDISLMC BIT(16)
  1227. #define L2C_TAD_INT_LFBTO BIT(17)
  1228. #define L2C_TAD_INT_GSYNCTO BIT(18)
  1229. #define L2C_TAD_INT_RTGSBE BIT(32)
  1230. #define L2C_TAD_INT_RTGDBE BIT(33)
  1231. #define L2C_TAD_INT_RDDISOCI BIT(34)
  1232. #define L2C_TAD_INT_WRDISOCI BIT(35)
  1233. #define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \
  1234. L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
  1235. L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
  1236. #define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \
  1237. L2C_TAD_INT_FBFSBE)
  1238. #define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \
  1239. L2C_TAD_INT_SBFDBE | \
  1240. L2C_TAD_INT_FBFDBE | \
  1241. L2C_TAD_INT_TAGDBE | \
  1242. L2C_TAD_INT_RTGDBE | \
  1243. L2C_TAD_INT_WRDISOCI | \
  1244. L2C_TAD_INT_RDDISOCI | \
  1245. L2C_TAD_INT_WRDISLMC | \
  1246. L2C_TAD_INT_RDDISLMC | \
  1247. L2C_TAD_INT_LFBTO | \
  1248. L2C_TAD_INT_GSYNCTO)
  1249. static const struct error_descr l2_tad_errors[] = {
  1250. {
  1251. .type = ERR_CORRECTED,
  1252. .mask = L2C_TAD_INT_SBFSBE,
  1253. .descr = "SBF single-bit error",
  1254. },
  1255. {
  1256. .type = ERR_CORRECTED,
  1257. .mask = L2C_TAD_INT_FBFSBE,
  1258. .descr = "FBF single-bit error",
  1259. },
  1260. {
  1261. .type = ERR_UNCORRECTED,
  1262. .mask = L2C_TAD_INT_L2DDBE,
  1263. .descr = "L2D double-bit error",
  1264. },
  1265. {
  1266. .type = ERR_UNCORRECTED,
  1267. .mask = L2C_TAD_INT_SBFDBE,
  1268. .descr = "SBF double-bit error",
  1269. },
  1270. {
  1271. .type = ERR_UNCORRECTED,
  1272. .mask = L2C_TAD_INT_FBFDBE,
  1273. .descr = "FBF double-bit error",
  1274. },
  1275. {
  1276. .type = ERR_UNCORRECTED,
  1277. .mask = L2C_TAD_INT_TAGDBE,
  1278. .descr = "TAG double-bit error",
  1279. },
  1280. {
  1281. .type = ERR_UNCORRECTED,
  1282. .mask = L2C_TAD_INT_RTGDBE,
  1283. .descr = "RTG double-bit error",
  1284. },
  1285. {
  1286. .type = ERR_UNCORRECTED,
  1287. .mask = L2C_TAD_INT_WRDISOCI,
  1288. .descr = "Write to a disabled CCPI",
  1289. },
  1290. {
  1291. .type = ERR_UNCORRECTED,
  1292. .mask = L2C_TAD_INT_RDDISOCI,
  1293. .descr = "Read from a disabled CCPI",
  1294. },
  1295. {
  1296. .type = ERR_UNCORRECTED,
  1297. .mask = L2C_TAD_INT_WRDISLMC,
  1298. .descr = "Write to a disabled LMC",
  1299. },
  1300. {
  1301. .type = ERR_UNCORRECTED,
  1302. .mask = L2C_TAD_INT_RDDISLMC,
  1303. .descr = "Read from a disabled LMC",
  1304. },
  1305. {
  1306. .type = ERR_UNCORRECTED,
  1307. .mask = L2C_TAD_INT_LFBTO,
  1308. .descr = "LFB entry timeout",
  1309. },
  1310. {
  1311. .type = ERR_UNCORRECTED,
  1312. .mask = L2C_TAD_INT_GSYNCTO,
  1313. .descr = "Global sync CCPI timeout",
  1314. },
  1315. {0, 0, NULL},
  1316. };
  1317. #define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE)
  1318. #define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE)
  1319. #define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
  1320. #define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
  1321. #define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
  1322. L2C_TAD_INT_RTG | \
  1323. L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
  1324. L2C_TAD_INT_LFBTO)
  1325. #define L2C_TAD_TIMETWO 0x50000
  1326. #define L2C_TAD_TIMEOUT 0x50100
  1327. #define L2C_TAD_ERR 0x60000
  1328. #define L2C_TAD_TQD_ERR 0x60100
  1329. #define L2C_TAD_TTG_ERR 0x60200
  1330. #define L2C_CBC_INT_W1C 0x60000
  1331. #define L2C_CBC_INT_RSDSBE BIT(0)
  1332. #define L2C_CBC_INT_RSDDBE BIT(1)
  1333. #define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
  1334. #define L2C_CBC_INT_MIBSBE BIT(4)
  1335. #define L2C_CBC_INT_MIBDBE BIT(5)
  1336. #define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
  1337. #define L2C_CBC_INT_IORDDISOCI BIT(6)
  1338. #define L2C_CBC_INT_IOWRDISOCI BIT(7)
  1339. #define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \
  1340. L2C_CBC_INT_IOWRDISOCI)
  1341. #define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
  1342. #define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
  1343. static const struct error_descr l2_cbc_errors[] = {
  1344. {
  1345. .type = ERR_CORRECTED,
  1346. .mask = L2C_CBC_INT_RSDSBE,
  1347. .descr = "RSD single-bit error",
  1348. },
  1349. {
  1350. .type = ERR_CORRECTED,
  1351. .mask = L2C_CBC_INT_MIBSBE,
  1352. .descr = "MIB single-bit error",
  1353. },
  1354. {
  1355. .type = ERR_UNCORRECTED,
  1356. .mask = L2C_CBC_INT_RSDDBE,
  1357. .descr = "RSD double-bit error",
  1358. },
  1359. {
  1360. .type = ERR_UNCORRECTED,
  1361. .mask = L2C_CBC_INT_MIBDBE,
  1362. .descr = "MIB double-bit error",
  1363. },
  1364. {
  1365. .type = ERR_UNCORRECTED,
  1366. .mask = L2C_CBC_INT_IORDDISOCI,
  1367. .descr = "Read from a disabled CCPI",
  1368. },
  1369. {
  1370. .type = ERR_UNCORRECTED,
  1371. .mask = L2C_CBC_INT_IOWRDISOCI,
  1372. .descr = "Write to a disabled CCPI",
  1373. },
  1374. {0, 0, NULL},
  1375. };
  1376. #define L2C_CBC_INT_W1S 0x60008
  1377. #define L2C_CBC_INT_ENA_W1C 0x60020
  1378. #define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
  1379. L2C_CBC_INT_IODISOCI)
  1380. #define L2C_CBC_INT_ENA_W1S 0x60028
  1381. #define L2C_CBC_IODISOCIERR 0x80008
  1382. #define L2C_CBC_IOCERR 0x80010
  1383. #define L2C_CBC_RSDERR 0x80018
  1384. #define L2C_CBC_MIBERR 0x80020
  1385. #define L2C_MCI_INT_W1C 0x0
  1386. #define L2C_MCI_INT_VBFSBE BIT(0)
  1387. #define L2C_MCI_INT_VBFDBE BIT(1)
  1388. static const struct error_descr l2_mci_errors[] = {
  1389. {
  1390. .type = ERR_CORRECTED,
  1391. .mask = L2C_MCI_INT_VBFSBE,
  1392. .descr = "VBF single-bit error",
  1393. },
  1394. {
  1395. .type = ERR_UNCORRECTED,
  1396. .mask = L2C_MCI_INT_VBFDBE,
  1397. .descr = "VBF double-bit error",
  1398. },
  1399. {0, 0, NULL},
  1400. };
  1401. #define L2C_MCI_INT_W1S 0x8
  1402. #define L2C_MCI_INT_ENA_W1C 0x20
  1403. #define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
  1404. #define L2C_MCI_INT_ENA_W1S 0x28
  1405. #define L2C_MCI_ERR 0x10000
  1406. #define L2C_MESSAGE_SIZE SZ_1K
  1407. #define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors))
  1408. struct l2c_err_ctx {
  1409. char *reg_ext_name;
  1410. u64 reg_int;
  1411. u64 reg_ext;
  1412. };
  1413. struct thunderx_l2c {
  1414. void __iomem *regs;
  1415. struct pci_dev *pdev;
  1416. struct edac_device_ctl_info *edac_dev;
  1417. struct dentry *debugfs;
  1418. int index;
  1419. struct msix_entry msix_ent;
  1420. struct l2c_err_ctx err_ctx[RING_ENTRIES];
  1421. unsigned long ring_head;
  1422. unsigned long ring_tail;
  1423. };
  1424. static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
  1425. {
  1426. struct msix_entry *msix = irq_id;
  1427. struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
  1428. msix_ent);
  1429. unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
  1430. struct l2c_err_ctx *ctx = &tad->err_ctx[head];
  1431. ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
  1432. if (ctx->reg_int & L2C_TAD_INT_ECC) {
  1433. ctx->reg_ext_name = "TQD_ERR";
  1434. ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
  1435. } else if (ctx->reg_int & L2C_TAD_INT_TAG) {
  1436. ctx->reg_ext_name = "TTG_ERR";
  1437. ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
  1438. } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
  1439. ctx->reg_ext_name = "TIMEOUT";
  1440. ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
  1441. } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
  1442. ctx->reg_ext_name = "ERR";
  1443. ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
  1444. }
  1445. writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
  1446. tad->ring_head++;
  1447. return IRQ_WAKE_THREAD;
  1448. }
  1449. static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
  1450. {
  1451. struct msix_entry *msix = irq_id;
  1452. struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
  1453. msix_ent);
  1454. unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
  1455. struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
  1456. ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
  1457. if (ctx->reg_int & L2C_CBC_INT_RSD) {
  1458. ctx->reg_ext_name = "RSDERR";
  1459. ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
  1460. } else if (ctx->reg_int & L2C_CBC_INT_MIB) {
  1461. ctx->reg_ext_name = "MIBERR";
  1462. ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
  1463. } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
  1464. ctx->reg_ext_name = "IODISOCIERR";
  1465. ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
  1466. }
  1467. writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
  1468. cbc->ring_head++;
  1469. return IRQ_WAKE_THREAD;
  1470. }
  1471. static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
  1472. {
  1473. struct msix_entry *msix = irq_id;
  1474. struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
  1475. msix_ent);
  1476. unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
  1477. struct l2c_err_ctx *ctx = &mci->err_ctx[head];
  1478. ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
  1479. ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
  1480. writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
  1481. ctx->reg_ext_name = "ERR";
  1482. mci->ring_head++;
  1483. return IRQ_WAKE_THREAD;
  1484. }
  1485. static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
  1486. {
  1487. struct msix_entry *msix = irq_id;
  1488. struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
  1489. msix_ent);
  1490. unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
  1491. struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
  1492. irqreturn_t ret = IRQ_NONE;
  1493. u64 mask_ue, mask_ce;
  1494. const struct error_descr *l2_errors;
  1495. char *reg_int_name;
  1496. char *msg;
  1497. char *other;
  1498. msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
  1499. other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
  1500. if (!msg || !other)
  1501. goto err_free;
  1502. switch (l2c->pdev->device) {
  1503. case PCI_DEVICE_ID_THUNDER_L2C_TAD:
  1504. reg_int_name = "L2C_TAD_INT";
  1505. mask_ue = L2C_TAD_INT_UE;
  1506. mask_ce = L2C_TAD_INT_CE;
  1507. l2_errors = l2_tad_errors;
  1508. break;
  1509. case PCI_DEVICE_ID_THUNDER_L2C_CBC:
  1510. reg_int_name = "L2C_CBC_INT";
  1511. mask_ue = L2C_CBC_INT_UE;
  1512. mask_ce = L2C_CBC_INT_CE;
  1513. l2_errors = l2_cbc_errors;
  1514. break;
  1515. case PCI_DEVICE_ID_THUNDER_L2C_MCI:
  1516. reg_int_name = "L2C_MCI_INT";
  1517. mask_ue = L2C_MCI_INT_VBFDBE;
  1518. mask_ce = L2C_MCI_INT_VBFSBE;
  1519. l2_errors = l2_mci_errors;
  1520. break;
  1521. default:
  1522. dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
  1523. l2c->pdev->device);
  1524. goto err_free;
  1525. }
  1526. while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
  1527. ARRAY_SIZE(l2c->err_ctx))) {
  1528. snprintf(msg, L2C_MESSAGE_SIZE,
  1529. "%s: %s: %016llx, %s: %016llx",
  1530. l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
  1531. ctx->reg_ext_name, ctx->reg_ext);
  1532. decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
  1533. strlcat(msg, other, L2C_MESSAGE_SIZE);
  1534. if (ctx->reg_int & mask_ue)
  1535. edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
  1536. else if (ctx->reg_int & mask_ce)
  1537. edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
  1538. l2c->ring_tail++;
  1539. }
  1540. ret = IRQ_HANDLED;
  1541. err_free:
  1542. kfree(other);
  1543. kfree(msg);
  1544. return ret;
  1545. }
  1546. #define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg)
  1547. L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
  1548. static struct debugfs_entry *l2c_tad_dfs_ents[] = {
  1549. &debugfs_tad_int,
  1550. };
  1551. L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
  1552. static struct debugfs_entry *l2c_cbc_dfs_ents[] = {
  1553. &debugfs_cbc_int,
  1554. };
  1555. L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
  1556. static struct debugfs_entry *l2c_mci_dfs_ents[] = {
  1557. &debugfs_mci_int,
  1558. };
  1559. static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
  1560. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
  1561. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
  1562. { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
  1563. { 0, },
  1564. };
  1565. static int thunderx_l2c_probe(struct pci_dev *pdev,
  1566. const struct pci_device_id *id)
  1567. {
  1568. struct thunderx_l2c *l2c;
  1569. struct edac_device_ctl_info *edac_dev;
  1570. struct debugfs_entry **l2c_devattr;
  1571. size_t dfs_entries;
  1572. irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
  1573. char name[32];
  1574. const char *fmt;
  1575. u64 reg_en_offs, reg_en_mask;
  1576. int idx;
  1577. int ret;
  1578. ret = pcim_enable_device(pdev);
  1579. if (ret) {
  1580. dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
  1581. return ret;
  1582. }
  1583. ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
  1584. if (ret) {
  1585. dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
  1586. return ret;
  1587. }
  1588. switch (pdev->device) {
  1589. case PCI_DEVICE_ID_THUNDER_L2C_TAD:
  1590. thunderx_l2c_isr = thunderx_l2c_tad_isr;
  1591. l2c_devattr = l2c_tad_dfs_ents;
  1592. dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
  1593. fmt = "L2C-TAD%d";
  1594. reg_en_offs = L2C_TAD_INT_ENA_W1S;
  1595. reg_en_mask = L2C_TAD_INT_ENA_ALL;
  1596. break;
  1597. case PCI_DEVICE_ID_THUNDER_L2C_CBC:
  1598. thunderx_l2c_isr = thunderx_l2c_cbc_isr;
  1599. l2c_devattr = l2c_cbc_dfs_ents;
  1600. dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
  1601. fmt = "L2C-CBC%d";
  1602. reg_en_offs = L2C_CBC_INT_ENA_W1S;
  1603. reg_en_mask = L2C_CBC_INT_ENA_ALL;
  1604. break;
  1605. case PCI_DEVICE_ID_THUNDER_L2C_MCI:
  1606. thunderx_l2c_isr = thunderx_l2c_mci_isr;
  1607. l2c_devattr = l2c_mci_dfs_ents;
  1608. dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
  1609. fmt = "L2C-MCI%d";
  1610. reg_en_offs = L2C_MCI_INT_ENA_W1S;
  1611. reg_en_mask = L2C_MCI_INT_ENA_ALL;
  1612. break;
  1613. default:
  1614. //Should never ever get here
  1615. dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
  1616. pdev->device);
  1617. return -EINVAL;
  1618. }
  1619. idx = edac_device_alloc_index();
  1620. snprintf(name, sizeof(name), fmt, idx);
  1621. edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
  1622. name, 1, "L2C", 1, 0, idx);
  1623. if (!edac_dev) {
  1624. dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
  1625. return -ENOMEM;
  1626. }
  1627. l2c = edac_dev->pvt_info;
  1628. l2c->edac_dev = edac_dev;
  1629. l2c->regs = pcim_iomap_table(pdev)[0];
  1630. if (!l2c->regs) {
  1631. dev_err(&pdev->dev, "Cannot map PCI resources\n");
  1632. ret = -ENODEV;
  1633. goto err_free;
  1634. }
  1635. l2c->pdev = pdev;
  1636. l2c->ring_head = 0;
  1637. l2c->ring_tail = 0;
  1638. l2c->msix_ent.entry = 0;
  1639. l2c->msix_ent.vector = 0;
  1640. ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
  1641. if (ret) {
  1642. dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
  1643. goto err_free;
  1644. }
  1645. ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
  1646. thunderx_l2c_isr,
  1647. thunderx_l2c_threaded_isr,
  1648. 0, "[EDAC] ThunderX L2C",
  1649. &l2c->msix_ent);
  1650. if (ret)
  1651. goto err_free;
  1652. edac_dev->dev = &pdev->dev;
  1653. edac_dev->dev_name = dev_name(&pdev->dev);
  1654. edac_dev->mod_name = "thunderx-l2c";
  1655. edac_dev->ctl_name = "thunderx-l2c";
  1656. ret = edac_device_add_device(edac_dev);
  1657. if (ret) {
  1658. dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
  1659. goto err_free;
  1660. }
  1661. if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
  1662. l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
  1663. ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
  1664. l2c, dfs_entries);
  1665. if (ret != dfs_entries) {
  1666. dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
  1667. ret, ret >= 0 ? " created" : "");
  1668. }
  1669. }
  1670. pci_set_drvdata(pdev, edac_dev);
  1671. writeq(reg_en_mask, l2c->regs + reg_en_offs);
  1672. return 0;
  1673. err_free:
  1674. edac_device_free_ctl_info(edac_dev);
  1675. return ret;
  1676. }
  1677. static void thunderx_l2c_remove(struct pci_dev *pdev)
  1678. {
  1679. struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
  1680. struct thunderx_l2c *l2c = edac_dev->pvt_info;
  1681. switch (pdev->device) {
  1682. case PCI_DEVICE_ID_THUNDER_L2C_TAD:
  1683. writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
  1684. break;
  1685. case PCI_DEVICE_ID_THUNDER_L2C_CBC:
  1686. writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
  1687. break;
  1688. case PCI_DEVICE_ID_THUNDER_L2C_MCI:
  1689. writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
  1690. break;
  1691. }
  1692. edac_debugfs_remove_recursive(l2c->debugfs);
  1693. edac_device_del_device(&pdev->dev);
  1694. edac_device_free_ctl_info(edac_dev);
  1695. }
  1696. MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
  1697. static struct pci_driver thunderx_l2c_driver = {
  1698. .name = "thunderx_l2c_edac",
  1699. .probe = thunderx_l2c_probe,
  1700. .remove = thunderx_l2c_remove,
  1701. .id_table = thunderx_l2c_pci_tbl,
  1702. };
  1703. static int __init thunderx_edac_init(void)
  1704. {
  1705. int rc = 0;
  1706. if (ghes_get_devices())
  1707. return -EBUSY;
  1708. rc = pci_register_driver(&thunderx_lmc_driver);
  1709. if (rc)
  1710. return rc;
  1711. rc = pci_register_driver(&thunderx_ocx_driver);
  1712. if (rc)
  1713. goto err_lmc;
  1714. rc = pci_register_driver(&thunderx_l2c_driver);
  1715. if (rc)
  1716. goto err_ocx;
  1717. return rc;
  1718. err_ocx:
  1719. pci_unregister_driver(&thunderx_ocx_driver);
  1720. err_lmc:
  1721. pci_unregister_driver(&thunderx_lmc_driver);
  1722. return rc;
  1723. }
  1724. static void __exit thunderx_edac_exit(void)
  1725. {
  1726. pci_unregister_driver(&thunderx_l2c_driver);
  1727. pci_unregister_driver(&thunderx_ocx_driver);
  1728. pci_unregister_driver(&thunderx_lmc_driver);
  1729. }
  1730. module_init(thunderx_edac_init);
  1731. module_exit(thunderx_edac_exit);
  1732. MODULE_LICENSE("GPL v2");
  1733. MODULE_AUTHOR("Cavium, Inc.");
  1734. MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");