igen6_edac.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Driver for Intel client SoC with integrated memory controller using IBECC
  4. *
  5. * Copyright (C) 2020 Intel Corporation
  6. *
  7. * The In-Band ECC (IBECC) IP provides ECC protection to all or specific
  8. * regions of the physical memory space. It's used for memory controllers
  9. * that don't support the out-of-band ECC which often needs an additional
  10. * storage device to each channel for storing ECC data.
  11. */
  12. #include <linux/module.h>
  13. #include <linux/init.h>
  14. #include <linux/pci.h>
  15. #include <linux/slab.h>
  16. #include <linux/irq_work.h>
  17. #include <linux/llist.h>
  18. #include <linux/genalloc.h>
  19. #include <linux/edac.h>
  20. #include <linux/bits.h>
  21. #include <linux/io.h>
  22. #include <asm/mach_traps.h>
  23. #include <asm/nmi.h>
  24. #include <asm/mce.h>
  25. #include "edac_mc.h"
  26. #include "edac_module.h"
  27. #define IGEN6_REVISION "v2.5.1"
  28. #define EDAC_MOD_STR "igen6_edac"
  29. #define IGEN6_NMI_NAME "igen6_ibecc"
  30. /* Debug macros */
  31. #define igen6_printk(level, fmt, arg...) \
  32. edac_printk(level, "igen6", fmt, ##arg)
  33. #define igen6_mc_printk(mci, level, fmt, arg...) \
  34. edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
  35. #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
  36. #define NUM_IMC 2 /* Max memory controllers */
  37. #define NUM_CHANNELS 2 /* Max channels */
  38. #define NUM_DIMMS 2 /* Max DIMMs per channel */
  39. #define _4GB BIT_ULL(32)
  40. /* Size of physical memory */
  41. #define TOM_OFFSET 0xa0
  42. /* Top of low usable DRAM */
  43. #define TOLUD_OFFSET 0xbc
  44. /* Capability register C */
  45. #define CAPID_C_OFFSET 0xec
  46. #define CAPID_C_IBECC BIT(15)
  47. /* Capability register E */
  48. #define CAPID_E_OFFSET 0xf0
  49. #define CAPID_E_IBECC BIT(12)
  50. #define CAPID_E_IBECC_BIT18 BIT(18)
  51. /* Error Status */
  52. #define ERRSTS_OFFSET 0xc8
  53. #define ERRSTS_CE BIT_ULL(6)
  54. #define ERRSTS_UE BIT_ULL(7)
  55. /* Error Command */
  56. #define ERRCMD_OFFSET 0xca
  57. #define ERRCMD_CE BIT_ULL(6)
  58. #define ERRCMD_UE BIT_ULL(7)
  59. /* IBECC MMIO base address */
  60. #define IBECC_BASE (res_cfg->ibecc_base)
  61. #define IBECC_ACTIVATE_OFFSET IBECC_BASE
  62. #define IBECC_ACTIVATE_EN BIT(0)
  63. /* IBECC error log */
  64. #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
  65. #define ECC_ERROR_LOG_CE BIT_ULL(62)
  66. #define ECC_ERROR_LOG_UE BIT_ULL(63)
  67. #define ECC_ERROR_LOG_ADDR_SHIFT 5
  68. #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38)
  69. #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45)
  70. #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
  71. /* Host MMIO base address */
  72. #define MCHBAR_OFFSET 0x48
  73. #define MCHBAR_EN BIT_ULL(0)
  74. #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16)
  75. #define MCHBAR_SIZE 0x10000
  76. /* Parameters for the channel decode stage */
  77. #define IMC_BASE (res_cfg->imc_base)
  78. #define MAD_INTER_CHANNEL_OFFSET IMC_BASE
  79. #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
  80. #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
  81. #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
  82. #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
  83. /* Parameters for DRAM decode stage */
  84. #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
  85. #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
  86. /* DIMM characteristics */
  87. #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
  88. #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
  89. #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
  90. #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
  91. #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
  92. /* Hash for memory controller selection */
  93. #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
  94. #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
  95. /* Hash for channel selection */
  96. #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
  97. /* Hash for enhanced channel selection */
  98. #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
  99. #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
  100. #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
  101. #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
  102. /* Parameters for memory slice decode stage */
  103. #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
  104. #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
  105. static struct res_config {
  106. bool machine_check;
  107. int num_imc;
  108. u32 imc_base;
  109. u32 cmf_base;
  110. u32 cmf_size;
  111. u32 ms_hash_offset;
  112. u32 ibecc_base;
  113. u32 ibecc_error_log_offset;
  114. bool (*ibecc_available)(struct pci_dev *pdev);
  115. /* Extract error address logged in IBECC */
  116. u64 (*err_addr)(u64 ecclog);
  117. /* Convert error address logged in IBECC to system physical address */
  118. u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
  119. /* Convert error address logged in IBECC to integrated memory controller address */
  120. u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
  121. } *res_cfg;
  122. struct igen6_imc {
  123. int mc;
  124. struct mem_ctl_info *mci;
  125. struct pci_dev *pdev;
  126. struct device dev;
  127. void __iomem *window;
  128. u64 size;
  129. u64 ch_s_size;
  130. int ch_l_map;
  131. u64 dimm_s_size[NUM_CHANNELS];
  132. u64 dimm_l_size[NUM_CHANNELS];
  133. int dimm_l_map[NUM_CHANNELS];
  134. };
  135. static struct igen6_pvt {
  136. struct igen6_imc imc[NUM_IMC];
  137. u64 ms_hash;
  138. u64 ms_s_size;
  139. int ms_l_map;
  140. } *igen6_pvt;
  141. /* The top of low usable DRAM */
  142. static u32 igen6_tolud;
  143. /* The size of physical memory */
  144. static u64 igen6_tom;
  145. struct decoded_addr {
  146. int mc;
  147. u64 imc_addr;
  148. u64 sys_addr;
  149. int channel_idx;
  150. u64 channel_addr;
  151. int sub_channel_idx;
  152. u64 sub_channel_addr;
  153. };
  154. struct ecclog_node {
  155. struct llist_node llnode;
  156. int mc;
  157. u64 ecclog;
  158. };
  159. /*
  160. * In the NMI handler, the driver uses the lock-less memory allocator
  161. * to allocate memory to store the IBECC error logs and links the logs
  162. * to the lock-less list. Delay printk() and the work of error reporting
  163. * to EDAC core in a worker.
  164. */
  165. #define ECCLOG_POOL_SIZE PAGE_SIZE
  166. static LLIST_HEAD(ecclog_llist);
  167. static struct gen_pool *ecclog_pool;
  168. static char ecclog_buf[ECCLOG_POOL_SIZE];
  169. static struct irq_work ecclog_irq_work;
  170. static struct work_struct ecclog_work;
  171. /* Compute die IDs for Elkhart Lake with IBECC */
  172. #define DID_EHL_SKU5 0x4514
  173. #define DID_EHL_SKU6 0x4528
  174. #define DID_EHL_SKU7 0x452a
  175. #define DID_EHL_SKU8 0x4516
  176. #define DID_EHL_SKU9 0x452c
  177. #define DID_EHL_SKU10 0x452e
  178. #define DID_EHL_SKU11 0x4532
  179. #define DID_EHL_SKU12 0x4518
  180. #define DID_EHL_SKU13 0x451a
  181. #define DID_EHL_SKU14 0x4534
  182. #define DID_EHL_SKU15 0x4536
  183. /* Compute die IDs for ICL-NNPI with IBECC */
  184. #define DID_ICL_SKU8 0x4581
  185. #define DID_ICL_SKU10 0x4585
  186. #define DID_ICL_SKU11 0x4589
  187. #define DID_ICL_SKU12 0x458d
  188. /* Compute die IDs for Tiger Lake with IBECC */
  189. #define DID_TGL_SKU 0x9a14
  190. /* Compute die IDs for Alder Lake with IBECC */
  191. #define DID_ADL_SKU1 0x4601
  192. #define DID_ADL_SKU2 0x4602
  193. #define DID_ADL_SKU3 0x4621
  194. #define DID_ADL_SKU4 0x4641
  195. /* Compute die IDs for Alder Lake-N with IBECC */
  196. #define DID_ADL_N_SKU1 0x4614
  197. #define DID_ADL_N_SKU2 0x4617
  198. #define DID_ADL_N_SKU3 0x461b
  199. #define DID_ADL_N_SKU4 0x461c
  200. #define DID_ADL_N_SKU5 0x4673
  201. #define DID_ADL_N_SKU6 0x4674
  202. #define DID_ADL_N_SKU7 0x4675
  203. #define DID_ADL_N_SKU8 0x4677
  204. #define DID_ADL_N_SKU9 0x4678
  205. #define DID_ADL_N_SKU10 0x4679
  206. #define DID_ADL_N_SKU11 0x467c
  207. #define DID_ADL_N_SKU12 0x4632
  208. /* Compute die IDs for Raptor Lake-P with IBECC */
  209. #define DID_RPL_P_SKU1 0xa706
  210. #define DID_RPL_P_SKU2 0xa707
  211. #define DID_RPL_P_SKU3 0xa708
  212. #define DID_RPL_P_SKU4 0xa716
  213. #define DID_RPL_P_SKU5 0xa718
  214. /* Compute die IDs for Meteor Lake-PS with IBECC */
  215. #define DID_MTL_PS_SKU1 0x7d21
  216. #define DID_MTL_PS_SKU2 0x7d22
  217. #define DID_MTL_PS_SKU3 0x7d23
  218. #define DID_MTL_PS_SKU4 0x7d24
  219. /* Compute die IDs for Meteor Lake-P with IBECC */
  220. #define DID_MTL_P_SKU1 0x7d01
  221. #define DID_MTL_P_SKU2 0x7d02
  222. #define DID_MTL_P_SKU3 0x7d14
  223. /* Compute die IDs for Arrow Lake-UH with IBECC */
  224. #define DID_ARL_UH_SKU1 0x7d06
  225. #define DID_ARL_UH_SKU2 0x7d20
  226. #define DID_ARL_UH_SKU3 0x7d30
  227. static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
  228. {
  229. union {
  230. u64 v;
  231. struct {
  232. u32 v_lo;
  233. u32 v_hi;
  234. };
  235. } u;
  236. if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
  237. igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
  238. return -ENODEV;
  239. }
  240. if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
  241. igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
  242. return -ENODEV;
  243. }
  244. if (!(u.v & MCHBAR_EN)) {
  245. igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
  246. return -ENODEV;
  247. }
  248. *mchbar = MCHBAR_BASE(u.v);
  249. return 0;
  250. }
  251. static bool ehl_ibecc_available(struct pci_dev *pdev)
  252. {
  253. u32 v;
  254. if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
  255. return false;
  256. return !!(CAPID_C_IBECC & v);
  257. }
  258. static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
  259. {
  260. return eaddr;
  261. }
  262. static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
  263. {
  264. if (eaddr < igen6_tolud)
  265. return eaddr;
  266. if (igen6_tom <= _4GB)
  267. return eaddr + igen6_tolud - _4GB;
  268. if (eaddr >= igen6_tom)
  269. return eaddr + igen6_tolud - igen6_tom;
  270. return eaddr;
  271. }
  272. static bool icl_ibecc_available(struct pci_dev *pdev)
  273. {
  274. u32 v;
  275. if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
  276. return false;
  277. return !(CAPID_C_IBECC & v) &&
  278. (boot_cpu_data.x86_stepping >= 1);
  279. }
  280. static bool tgl_ibecc_available(struct pci_dev *pdev)
  281. {
  282. u32 v;
  283. if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
  284. return false;
  285. return !(CAPID_E_IBECC & v);
  286. }
  287. static bool mtl_p_ibecc_available(struct pci_dev *pdev)
  288. {
  289. u32 v;
  290. if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
  291. return false;
  292. return !(CAPID_E_IBECC_BIT18 & v);
  293. }
  294. static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
  295. {
  296. #define MCHBAR_MEMSS_IBECCDIS 0x13c00
  297. void __iomem *window;
  298. u64 mchbar;
  299. u32 val;
  300. if (get_mchbar(pdev, &mchbar))
  301. return false;
  302. window = ioremap(mchbar, MCHBAR_SIZE * 2);
  303. if (!window) {
  304. igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
  305. return false;
  306. }
  307. val = readl(window + MCHBAR_MEMSS_IBECCDIS);
  308. iounmap(window);
  309. /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
  310. return !GET_BITFIELD(val, 6, 6);
  311. }
  312. static u64 mem_addr_to_sys_addr(u64 maddr)
  313. {
  314. if (maddr < igen6_tolud)
  315. return maddr;
  316. if (igen6_tom <= _4GB)
  317. return maddr - igen6_tolud + _4GB;
  318. if (maddr < _4GB)
  319. return maddr - igen6_tolud + igen6_tom;
  320. return maddr;
  321. }
  322. static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
  323. {
  324. u64 hash_addr = addr & mask, hash = hash_init;
  325. u64 intlv = (addr >> intlv_bit) & 1;
  326. int i;
  327. for (i = 6; i < 20; i++)
  328. hash ^= (hash_addr >> i) & 1;
  329. return hash ^ intlv;
  330. }
  331. static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
  332. {
  333. u64 maddr, hash, mask, ms_s_size;
  334. int intlv_bit;
  335. u32 ms_hash;
  336. ms_s_size = igen6_pvt->ms_s_size;
  337. if (eaddr >= ms_s_size)
  338. return eaddr + ms_s_size;
  339. ms_hash = igen6_pvt->ms_hash;
  340. mask = MEM_SLICE_HASH_MASK(ms_hash);
  341. intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
  342. maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
  343. GET_BITFIELD(eaddr, 0, intlv_bit - 1);
  344. hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
  345. return maddr | (hash << intlv_bit);
  346. }
  347. static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
  348. {
  349. u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
  350. return mem_addr_to_sys_addr(maddr);
  351. }
  352. static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
  353. {
  354. return eaddr;
  355. }
  356. static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
  357. {
  358. return mem_addr_to_sys_addr(eaddr);
  359. }
  360. static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
  361. {
  362. u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
  363. struct igen6_imc *imc = &igen6_pvt->imc[mc];
  364. int intlv_bit;
  365. u32 mc_hash;
  366. if (eaddr >= 2 * ms_s_size)
  367. return eaddr - ms_s_size;
  368. mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
  369. intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
  370. imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
  371. GET_BITFIELD(eaddr, 0, intlv_bit - 1);
  372. return imc_addr;
  373. }
  374. static u64 rpl_p_err_addr(u64 ecclog)
  375. {
  376. return ECC_ERROR_LOG_ADDR45(ecclog);
  377. }
  378. static struct res_config ehl_cfg = {
  379. .num_imc = 1,
  380. .imc_base = 0x5000,
  381. .ibecc_base = 0xdc00,
  382. .ibecc_available = ehl_ibecc_available,
  383. .ibecc_error_log_offset = 0x170,
  384. .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
  385. .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
  386. };
  387. static struct res_config icl_cfg = {
  388. .num_imc = 1,
  389. .imc_base = 0x5000,
  390. .ibecc_base = 0xd800,
  391. .ibecc_error_log_offset = 0x170,
  392. .ibecc_available = icl_ibecc_available,
  393. .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
  394. .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
  395. };
  396. static struct res_config tgl_cfg = {
  397. .machine_check = true,
  398. .num_imc = 2,
  399. .imc_base = 0x5000,
  400. .cmf_base = 0x11000,
  401. .cmf_size = 0x800,
  402. .ms_hash_offset = 0xac,
  403. .ibecc_base = 0xd400,
  404. .ibecc_error_log_offset = 0x170,
  405. .ibecc_available = tgl_ibecc_available,
  406. .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
  407. .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
  408. };
  409. static struct res_config adl_cfg = {
  410. .machine_check = true,
  411. .num_imc = 2,
  412. .imc_base = 0xd800,
  413. .ibecc_base = 0xd400,
  414. .ibecc_error_log_offset = 0x68,
  415. .ibecc_available = tgl_ibecc_available,
  416. .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
  417. .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
  418. };
  419. static struct res_config adl_n_cfg = {
  420. .machine_check = true,
  421. .num_imc = 1,
  422. .imc_base = 0xd800,
  423. .ibecc_base = 0xd400,
  424. .ibecc_error_log_offset = 0x68,
  425. .ibecc_available = tgl_ibecc_available,
  426. .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
  427. .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
  428. };
  429. static struct res_config rpl_p_cfg = {
  430. .machine_check = true,
  431. .num_imc = 2,
  432. .imc_base = 0xd800,
  433. .ibecc_base = 0xd400,
  434. .ibecc_error_log_offset = 0x68,
  435. .ibecc_available = tgl_ibecc_available,
  436. .err_addr = rpl_p_err_addr,
  437. .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
  438. .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
  439. };
  440. static struct res_config mtl_ps_cfg = {
  441. .machine_check = true,
  442. .num_imc = 2,
  443. .imc_base = 0xd800,
  444. .ibecc_base = 0xd400,
  445. .ibecc_error_log_offset = 0x170,
  446. .ibecc_available = mtl_ps_ibecc_available,
  447. .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
  448. .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
  449. };
  450. static struct res_config mtl_p_cfg = {
  451. .machine_check = true,
  452. .num_imc = 2,
  453. .imc_base = 0xd800,
  454. .ibecc_base = 0xd400,
  455. .ibecc_error_log_offset = 0x170,
  456. .ibecc_available = mtl_p_ibecc_available,
  457. .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
  458. .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
  459. };
  460. static const struct pci_device_id igen6_pci_tbl[] = {
  461. { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
  462. { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
  463. { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
  464. { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
  465. { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
  466. { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
  467. { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
  468. { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
  469. { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
  470. { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
  471. { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
  472. { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
  473. { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
  474. { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
  475. { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
  476. { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
  477. { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
  478. { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
  479. { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
  480. { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
  481. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
  482. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
  483. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
  484. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
  485. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
  486. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
  487. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
  488. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
  489. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
  490. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
  491. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
  492. { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
  493. { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
  494. { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
  495. { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
  496. { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
  497. { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
  498. { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
  499. { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
  500. { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
  501. { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
  502. { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
  503. { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
  504. { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
  505. { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
  506. { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
  507. { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
  508. { },
  509. };
  510. MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
  511. static enum dev_type get_width(int dimm_l, u32 mad_dimm)
  512. {
  513. u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
  514. MAD_DIMM_CH_DSW(mad_dimm);
  515. switch (w) {
  516. case 0:
  517. return DEV_X8;
  518. case 1:
  519. return DEV_X16;
  520. case 2:
  521. return DEV_X32;
  522. default:
  523. return DEV_UNKNOWN;
  524. }
  525. }
  526. static enum mem_type get_memory_type(u32 mad_inter)
  527. {
  528. u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
  529. switch (t) {
  530. case 0:
  531. return MEM_DDR4;
  532. case 1:
  533. return MEM_DDR3;
  534. case 2:
  535. return MEM_LPDDR3;
  536. case 3:
  537. return MEM_LPDDR4;
  538. case 4:
  539. return MEM_WIO2;
  540. default:
  541. return MEM_UNKNOWN;
  542. }
  543. }
  544. static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
  545. {
  546. u64 hash_addr = addr & mask, hash = 0;
  547. u64 intlv = (addr >> intlv_bit) & 1;
  548. int i;
  549. for (i = 6; i < 20; i++)
  550. hash ^= (hash_addr >> i) & 1;
  551. return (int)hash ^ intlv;
  552. }
  553. static u64 decode_channel_addr(u64 addr, int intlv_bit)
  554. {
  555. u64 channel_addr;
  556. /* Remove the interleave bit and shift upper part down to fill gap */
  557. channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
  558. channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
  559. return channel_addr;
  560. }
  561. static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
  562. int *idx, u64 *sub_addr)
  563. {
  564. int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
  565. if (addr > 2 * s_size) {
  566. *sub_addr = addr - s_size;
  567. *idx = l_map;
  568. return;
  569. }
  570. if (CHANNEL_HASH_MODE(hash)) {
  571. *sub_addr = decode_channel_addr(addr, intlv_bit);
  572. *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
  573. } else {
  574. *sub_addr = decode_channel_addr(addr, 6);
  575. *idx = GET_BITFIELD(addr, 6, 6);
  576. }
  577. }
  578. static int igen6_decode(struct decoded_addr *res)
  579. {
  580. struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
  581. u64 addr = res->imc_addr, sub_addr, s_size;
  582. int idx, l_map;
  583. u32 hash;
  584. if (addr >= igen6_tom) {
  585. edac_dbg(0, "Address 0x%llx out of range\n", addr);
  586. return -EINVAL;
  587. }
  588. /* Decode channel */
  589. hash = readl(imc->window + CHANNEL_HASH_OFFSET);
  590. s_size = imc->ch_s_size;
  591. l_map = imc->ch_l_map;
  592. decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
  593. res->channel_idx = idx;
  594. res->channel_addr = sub_addr;
  595. /* Decode sub-channel/DIMM */
  596. hash = readl(imc->window + CHANNEL_EHASH_OFFSET);
  597. s_size = imc->dimm_s_size[idx];
  598. l_map = imc->dimm_l_map[idx];
  599. decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
  600. res->sub_channel_idx = idx;
  601. res->sub_channel_addr = sub_addr;
  602. return 0;
  603. }
  604. static void igen6_output_error(struct decoded_addr *res,
  605. struct mem_ctl_info *mci, u64 ecclog)
  606. {
  607. enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
  608. HW_EVENT_ERR_UNCORRECTED :
  609. HW_EVENT_ERR_CORRECTED;
  610. edac_mc_handle_error(type, mci, 1,
  611. res->sys_addr >> PAGE_SHIFT,
  612. res->sys_addr & ~PAGE_MASK,
  613. ECC_ERROR_LOG_SYND(ecclog),
  614. res->channel_idx, res->sub_channel_idx,
  615. -1, "", "");
  616. }
  617. static struct gen_pool *ecclog_gen_pool_create(void)
  618. {
  619. struct gen_pool *pool;
  620. pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
  621. if (!pool)
  622. return NULL;
  623. if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
  624. gen_pool_destroy(pool);
  625. return NULL;
  626. }
  627. return pool;
  628. }
  629. static int ecclog_gen_pool_add(int mc, u64 ecclog)
  630. {
  631. struct ecclog_node *node;
  632. node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
  633. if (!node)
  634. return -ENOMEM;
  635. node->mc = mc;
  636. node->ecclog = ecclog;
  637. llist_add(&node->llnode, &ecclog_llist);
  638. return 0;
  639. }
  640. /*
  641. * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
  642. * configuration space status register ERRSTS can indicate whether a
  643. * correctable error or an uncorrectable error occurred. We only use the
  644. * ECC_ERROR_LOG register to check error type, but need to clear both
  645. * registers to enable future error events.
  646. */
  647. static u64 ecclog_read_and_clear(struct igen6_imc *imc)
  648. {
  649. u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
  650. if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
  651. /* Clear CE/UE bits by writing 1s */
  652. writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
  653. return ecclog;
  654. }
  655. return 0;
  656. }
  657. static void errsts_clear(struct igen6_imc *imc)
  658. {
  659. u16 errsts;
  660. if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
  661. igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
  662. return;
  663. }
  664. /* Clear CE/UE bits by writing 1s */
  665. if (errsts & (ERRSTS_CE | ERRSTS_UE))
  666. pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
  667. }
  668. static int errcmd_enable_error_reporting(bool enable)
  669. {
  670. struct igen6_imc *imc = &igen6_pvt->imc[0];
  671. u16 errcmd;
  672. int rc;
  673. rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
  674. if (rc)
  675. return pcibios_err_to_errno(rc);
  676. if (enable)
  677. errcmd |= ERRCMD_CE | ERRSTS_UE;
  678. else
  679. errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
  680. rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
  681. if (rc)
  682. return pcibios_err_to_errno(rc);
  683. return 0;
  684. }
  685. static int ecclog_handler(void)
  686. {
  687. struct igen6_imc *imc;
  688. int i, n = 0;
  689. u64 ecclog;
  690. for (i = 0; i < res_cfg->num_imc; i++) {
  691. imc = &igen6_pvt->imc[i];
  692. /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
  693. ecclog = ecclog_read_and_clear(imc);
  694. if (!ecclog)
  695. continue;
  696. if (!ecclog_gen_pool_add(i, ecclog))
  697. irq_work_queue(&ecclog_irq_work);
  698. n++;
  699. }
  700. return n;
  701. }
  702. static void ecclog_work_cb(struct work_struct *work)
  703. {
  704. struct ecclog_node *node, *tmp;
  705. struct mem_ctl_info *mci;
  706. struct llist_node *head;
  707. struct decoded_addr res;
  708. u64 eaddr;
  709. head = llist_del_all(&ecclog_llist);
  710. if (!head)
  711. return;
  712. llist_for_each_entry_safe(node, tmp, head, llnode) {
  713. memset(&res, 0, sizeof(res));
  714. if (res_cfg->err_addr)
  715. eaddr = res_cfg->err_addr(node->ecclog);
  716. else
  717. eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
  718. ECC_ERROR_LOG_ADDR_SHIFT;
  719. res.mc = node->mc;
  720. res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
  721. res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
  722. mci = igen6_pvt->imc[res.mc].mci;
  723. edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
  724. igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
  725. igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
  726. if (!igen6_decode(&res))
  727. igen6_output_error(&res, mci, node->ecclog);
  728. gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
  729. }
  730. }
  731. static void ecclog_irq_work_cb(struct irq_work *irq_work)
  732. {
  733. int i;
  734. for (i = 0; i < res_cfg->num_imc; i++)
  735. errsts_clear(&igen6_pvt->imc[i]);
  736. if (!llist_empty(&ecclog_llist))
  737. schedule_work(&ecclog_work);
  738. }
  739. static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
  740. {
  741. unsigned char reason;
  742. if (!ecclog_handler())
  743. return NMI_DONE;
  744. /*
  745. * Both In-Band ECC correctable error and uncorrectable error are
  746. * reported by SERR# NMI. The NMI generic code (see pci_serr_error())
  747. * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
  748. * re-enable the SERR# NMI after NMI handling. So clear this bit here
  749. * to re-enable SERR# NMI for receiving future In-Band ECC errors.
  750. */
  751. reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
  752. reason |= NMI_REASON_CLEAR_SERR;
  753. outb(reason, NMI_REASON_PORT);
  754. reason &= ~NMI_REASON_CLEAR_SERR;
  755. outb(reason, NMI_REASON_PORT);
  756. return NMI_HANDLED;
  757. }
  758. static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
  759. void *data)
  760. {
  761. struct mce *mce = (struct mce *)data;
  762. char *type;
  763. if (mce->kflags & MCE_HANDLED_CEC)
  764. return NOTIFY_DONE;
  765. /*
  766. * Ignore unless this is a memory related error.
  767. * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
  768. * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
  769. */
  770. if ((mce->status & 0xefff) >> 7 != 1)
  771. return NOTIFY_DONE;
  772. if (mce->mcgstatus & MCG_STATUS_MCIP)
  773. type = "Exception";
  774. else
  775. type = "Event";
  776. edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
  777. mce->extcpu, type, mce->mcgstatus,
  778. mce->bank, mce->status);
  779. edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
  780. edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
  781. edac_dbg(0, "MISC 0x%llx\n", mce->misc);
  782. edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
  783. mce->cpuvendor, mce->cpuid, mce->time,
  784. mce->socketid, mce->apicid);
  785. /*
  786. * We just use the Machine Check for the memory error notification.
  787. * Each memory controller is associated with an IBECC instance.
  788. * Directly read and clear the error information(error address and
  789. * error type) on all the IBECC instances so that we know on which
  790. * memory controller the memory error(s) occurred.
  791. */
  792. if (!ecclog_handler())
  793. return NOTIFY_DONE;
  794. mce->kflags |= MCE_HANDLED_EDAC;
  795. return NOTIFY_DONE;
  796. }
  797. static struct notifier_block ecclog_mce_dec = {
  798. .notifier_call = ecclog_mce_handler,
  799. .priority = MCE_PRIO_EDAC,
  800. };
  801. static bool igen6_check_ecc(struct igen6_imc *imc)
  802. {
  803. u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
  804. return !!(activate & IBECC_ACTIVATE_EN);
  805. }
  806. static int igen6_get_dimm_config(struct mem_ctl_info *mci)
  807. {
  808. struct igen6_imc *imc = mci->pvt_info;
  809. u32 mad_inter, mad_intra, mad_dimm;
  810. int i, j, ndimms, mc = imc->mc;
  811. struct dimm_info *dimm;
  812. enum mem_type mtype;
  813. enum dev_type dtype;
  814. u64 dsize;
  815. bool ecc;
  816. edac_dbg(2, "\n");
  817. mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
  818. mtype = get_memory_type(mad_inter);
  819. ecc = igen6_check_ecc(imc);
  820. imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
  821. imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
  822. for (i = 0; i < NUM_CHANNELS; i++) {
  823. mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
  824. mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
  825. imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
  826. imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
  827. imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
  828. imc->size += imc->dimm_s_size[i];
  829. imc->size += imc->dimm_l_size[i];
  830. ndimms = 0;
  831. for (j = 0; j < NUM_DIMMS; j++) {
  832. dimm = edac_get_dimm(mci, i, j, 0);
  833. if (j ^ imc->dimm_l_map[i]) {
  834. dtype = get_width(0, mad_dimm);
  835. dsize = imc->dimm_s_size[i];
  836. } else {
  837. dtype = get_width(1, mad_dimm);
  838. dsize = imc->dimm_l_size[i];
  839. }
  840. if (!dsize)
  841. continue;
  842. dimm->grain = 64;
  843. dimm->mtype = mtype;
  844. dimm->dtype = dtype;
  845. dimm->nr_pages = MiB_TO_PAGES(dsize >> 20);
  846. dimm->edac_mode = EDAC_SECDED;
  847. snprintf(dimm->label, sizeof(dimm->label),
  848. "MC#%d_Chan#%d_DIMM#%d", mc, i, j);
  849. edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
  850. mc, i, j, dsize >> 20, dimm->nr_pages);
  851. ndimms++;
  852. }
  853. if (ndimms && !ecc) {
  854. igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
  855. return -ENODEV;
  856. }
  857. }
  858. edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
  859. return 0;
  860. }
  861. #ifdef CONFIG_EDAC_DEBUG
  862. /* Top of upper usable DRAM */
  863. static u64 igen6_touud;
  864. #define TOUUD_OFFSET 0xa8
  865. static void igen6_reg_dump(struct igen6_imc *imc)
  866. {
  867. int i;
  868. edac_dbg(2, "CHANNEL_HASH : 0x%x\n",
  869. readl(imc->window + CHANNEL_HASH_OFFSET));
  870. edac_dbg(2, "CHANNEL_EHASH : 0x%x\n",
  871. readl(imc->window + CHANNEL_EHASH_OFFSET));
  872. edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
  873. readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
  874. edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n",
  875. readq(imc->window + ECC_ERROR_LOG_OFFSET));
  876. for (i = 0; i < NUM_CHANNELS; i++) {
  877. edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i,
  878. readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
  879. edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i,
  880. readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
  881. }
  882. edac_dbg(2, "TOLUD : 0x%x", igen6_tolud);
  883. edac_dbg(2, "TOUUD : 0x%llx", igen6_touud);
  884. edac_dbg(2, "TOM : 0x%llx", igen6_tom);
  885. }
  886. static struct dentry *igen6_test;
  887. static int debugfs_u64_set(void *data, u64 val)
  888. {
  889. u64 ecclog;
  890. if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
  891. edac_dbg(0, "Address 0x%llx out of range\n", val);
  892. return 0;
  893. }
  894. pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
  895. val >>= ECC_ERROR_LOG_ADDR_SHIFT;
  896. ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE;
  897. if (!ecclog_gen_pool_add(0, ecclog))
  898. irq_work_queue(&ecclog_irq_work);
  899. return 0;
  900. }
  901. DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
  902. static void igen6_debug_setup(void)
  903. {
  904. igen6_test = edac_debugfs_create_dir("igen6_test");
  905. if (!igen6_test)
  906. return;
  907. if (!edac_debugfs_create_file("addr", 0200, igen6_test,
  908. NULL, &fops_u64_wo)) {
  909. debugfs_remove(igen6_test);
  910. igen6_test = NULL;
  911. }
  912. }
  913. static void igen6_debug_teardown(void)
  914. {
  915. debugfs_remove_recursive(igen6_test);
  916. }
  917. #else
  918. static void igen6_reg_dump(struct igen6_imc *imc) {}
  919. static void igen6_debug_setup(void) {}
  920. static void igen6_debug_teardown(void) {}
  921. #endif
  922. static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
  923. {
  924. union {
  925. u64 v;
  926. struct {
  927. u32 v_lo;
  928. u32 v_hi;
  929. };
  930. } u;
  931. edac_dbg(2, "\n");
  932. if (!res_cfg->ibecc_available(pdev)) {
  933. edac_dbg(2, "No In-Band ECC IP\n");
  934. goto fail;
  935. }
  936. if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
  937. igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
  938. goto fail;
  939. }
  940. igen6_tolud &= GENMASK(31, 20);
  941. if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
  942. igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
  943. goto fail;
  944. }
  945. if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
  946. igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
  947. goto fail;
  948. }
  949. igen6_tom = u.v & GENMASK_ULL(38, 20);
  950. if (get_mchbar(pdev, mchbar))
  951. goto fail;
  952. #ifdef CONFIG_EDAC_DEBUG
  953. if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
  954. edac_dbg(2, "Failed to read lower TOUUD\n");
  955. else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
  956. edac_dbg(2, "Failed to read upper TOUUD\n");
  957. else
  958. igen6_touud = u.v & GENMASK_ULL(38, 20);
  959. #endif
  960. return 0;
  961. fail:
  962. return -ENODEV;
  963. }
  964. static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
  965. {
  966. struct edac_mc_layer layers[2];
  967. struct mem_ctl_info *mci;
  968. struct igen6_imc *imc;
  969. void __iomem *window;
  970. int rc;
  971. edac_dbg(2, "\n");
  972. mchbar += mc * MCHBAR_SIZE;
  973. window = ioremap(mchbar, MCHBAR_SIZE);
  974. if (!window) {
  975. igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
  976. return -ENODEV;
  977. }
  978. layers[0].type = EDAC_MC_LAYER_CHANNEL;
  979. layers[0].size = NUM_CHANNELS;
  980. layers[0].is_virt_csrow = false;
  981. layers[1].type = EDAC_MC_LAYER_SLOT;
  982. layers[1].size = NUM_DIMMS;
  983. layers[1].is_virt_csrow = true;
  984. mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
  985. if (!mci) {
  986. rc = -ENOMEM;
  987. goto fail;
  988. }
  989. mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
  990. if (!mci->ctl_name) {
  991. rc = -ENOMEM;
  992. goto fail2;
  993. }
  994. mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
  995. mci->edac_ctl_cap = EDAC_FLAG_SECDED;
  996. mci->edac_cap = EDAC_FLAG_SECDED;
  997. mci->mod_name = EDAC_MOD_STR;
  998. mci->dev_name = pci_name(pdev);
  999. mci->pvt_info = &igen6_pvt->imc[mc];
  1000. imc = mci->pvt_info;
  1001. device_initialize(&imc->dev);
  1002. /*
  1003. * EDAC core uses mci->pdev(pointer of structure device) as
  1004. * memory controller ID. The client SoCs attach one or more
  1005. * memory controllers to single pci_dev (single pci_dev->dev
  1006. * can be for multiple memory controllers).
  1007. *
  1008. * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
  1009. * for the first memory controller and assign a unique imc->dev
  1010. * to mci->pdev for each non-first memory controller.
  1011. */
  1012. mci->pdev = mc ? &imc->dev : &pdev->dev;
  1013. imc->mc = mc;
  1014. imc->pdev = pdev;
  1015. imc->window = window;
  1016. igen6_reg_dump(imc);
  1017. rc = igen6_get_dimm_config(mci);
  1018. if (rc)
  1019. goto fail3;
  1020. rc = edac_mc_add_mc(mci);
  1021. if (rc) {
  1022. igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
  1023. goto fail3;
  1024. }
  1025. imc->mci = mci;
  1026. return 0;
  1027. fail3:
  1028. mci->pvt_info = NULL;
  1029. kfree(mci->ctl_name);
  1030. fail2:
  1031. edac_mc_free(mci);
  1032. fail:
  1033. iounmap(window);
  1034. return rc;
  1035. }
  1036. static void igen6_unregister_mcis(void)
  1037. {
  1038. struct mem_ctl_info *mci;
  1039. struct igen6_imc *imc;
  1040. int i;
  1041. edac_dbg(2, "\n");
  1042. for (i = 0; i < res_cfg->num_imc; i++) {
  1043. imc = &igen6_pvt->imc[i];
  1044. mci = imc->mci;
  1045. if (!mci)
  1046. continue;
  1047. edac_mc_del_mc(mci->pdev);
  1048. kfree(mci->ctl_name);
  1049. mci->pvt_info = NULL;
  1050. edac_mc_free(mci);
  1051. iounmap(imc->window);
  1052. }
  1053. }
  1054. static int igen6_mem_slice_setup(u64 mchbar)
  1055. {
  1056. struct igen6_imc *imc = &igen6_pvt->imc[0];
  1057. u64 base = mchbar + res_cfg->cmf_base;
  1058. u32 offset = res_cfg->ms_hash_offset;
  1059. u32 size = res_cfg->cmf_size;
  1060. u64 ms_s_size, ms_hash;
  1061. void __iomem *cmf;
  1062. int ms_l_map;
  1063. edac_dbg(2, "\n");
  1064. if (imc[0].size < imc[1].size) {
  1065. ms_s_size = imc[0].size;
  1066. ms_l_map = 1;
  1067. } else {
  1068. ms_s_size = imc[1].size;
  1069. ms_l_map = 0;
  1070. }
  1071. igen6_pvt->ms_s_size = ms_s_size;
  1072. igen6_pvt->ms_l_map = ms_l_map;
  1073. edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
  1074. ms_s_size >> 20, ms_l_map);
  1075. if (!size)
  1076. return 0;
  1077. cmf = ioremap(base, size);
  1078. if (!cmf) {
  1079. igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
  1080. return -ENODEV;
  1081. }
  1082. ms_hash = readq(cmf + offset);
  1083. igen6_pvt->ms_hash = ms_hash;
  1084. edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
  1085. iounmap(cmf);
  1086. return 0;
  1087. }
  1088. static int register_err_handler(void)
  1089. {
  1090. int rc;
  1091. if (res_cfg->machine_check) {
  1092. mce_register_decode_chain(&ecclog_mce_dec);
  1093. return 0;
  1094. }
  1095. rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
  1096. 0, IGEN6_NMI_NAME);
  1097. if (rc) {
  1098. igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
  1099. return rc;
  1100. }
  1101. return 0;
  1102. }
  1103. static void unregister_err_handler(void)
  1104. {
  1105. if (res_cfg->machine_check) {
  1106. mce_unregister_decode_chain(&ecclog_mce_dec);
  1107. return;
  1108. }
  1109. unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
  1110. }
  1111. static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
  1112. {
  1113. u64 mchbar;
  1114. int i, rc;
  1115. edac_dbg(2, "\n");
  1116. igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
  1117. if (!igen6_pvt)
  1118. return -ENOMEM;
  1119. res_cfg = (struct res_config *)ent->driver_data;
  1120. rc = igen6_pci_setup(pdev, &mchbar);
  1121. if (rc)
  1122. goto fail;
  1123. for (i = 0; i < res_cfg->num_imc; i++) {
  1124. rc = igen6_register_mci(i, mchbar, pdev);
  1125. if (rc)
  1126. goto fail2;
  1127. }
  1128. if (res_cfg->num_imc > 1) {
  1129. rc = igen6_mem_slice_setup(mchbar);
  1130. if (rc)
  1131. goto fail2;
  1132. }
  1133. ecclog_pool = ecclog_gen_pool_create();
  1134. if (!ecclog_pool) {
  1135. rc = -ENOMEM;
  1136. goto fail2;
  1137. }
  1138. INIT_WORK(&ecclog_work, ecclog_work_cb);
  1139. init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
  1140. rc = register_err_handler();
  1141. if (rc)
  1142. goto fail3;
  1143. /* Enable error reporting */
  1144. rc = errcmd_enable_error_reporting(true);
  1145. if (rc) {
  1146. igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
  1147. goto fail4;
  1148. }
  1149. /* Check if any pending errors before/during the registration of the error handler */
  1150. ecclog_handler();
  1151. igen6_debug_setup();
  1152. return 0;
  1153. fail4:
  1154. unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
  1155. fail3:
  1156. gen_pool_destroy(ecclog_pool);
  1157. fail2:
  1158. igen6_unregister_mcis();
  1159. fail:
  1160. kfree(igen6_pvt);
  1161. return rc;
  1162. }
  1163. static void igen6_remove(struct pci_dev *pdev)
  1164. {
  1165. edac_dbg(2, "\n");
  1166. igen6_debug_teardown();
  1167. errcmd_enable_error_reporting(false);
  1168. unregister_err_handler();
  1169. irq_work_sync(&ecclog_irq_work);
  1170. flush_work(&ecclog_work);
  1171. gen_pool_destroy(ecclog_pool);
  1172. igen6_unregister_mcis();
  1173. kfree(igen6_pvt);
  1174. }
  1175. static struct pci_driver igen6_driver = {
  1176. .name = EDAC_MOD_STR,
  1177. .probe = igen6_probe,
  1178. .remove = igen6_remove,
  1179. .id_table = igen6_pci_tbl,
  1180. };
  1181. static int __init igen6_init(void)
  1182. {
  1183. const char *owner;
  1184. int rc;
  1185. edac_dbg(2, "\n");
  1186. if (ghes_get_devices())
  1187. return -EBUSY;
  1188. owner = edac_get_owner();
  1189. if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
  1190. return -EBUSY;
  1191. edac_op_state = EDAC_OPSTATE_NMI;
  1192. rc = pci_register_driver(&igen6_driver);
  1193. if (rc)
  1194. return rc;
  1195. igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
  1196. return 0;
  1197. }
  1198. static void __exit igen6_exit(void)
  1199. {
  1200. edac_dbg(2, "\n");
  1201. pci_unregister_driver(&igen6_driver);
  1202. }
  1203. module_init(igen6_init);
  1204. module_exit(igen6_exit);
  1205. MODULE_LICENSE("GPL v2");
  1206. MODULE_AUTHOR("Qiuxu Zhuo");
  1207. MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");