skx_common.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
  4. * Originally split out from the skx_edac driver.
  5. *
  6. * Copyright (c) 2018, Intel Corporation.
  7. */
  8. #ifndef _SKX_COMM_EDAC_H
  9. #define _SKX_COMM_EDAC_H
  10. #include <linux/bits.h>
  11. #include <asm/mce.h>
  12. #define MSG_SIZE 1024
  13. /*
  14. * Debug macros
  15. */
  16. #define skx_printk(level, fmt, arg...) \
  17. edac_printk(level, "skx", fmt, ##arg)
  18. #define skx_mc_printk(mci, level, fmt, arg...) \
  19. edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
  20. /*
  21. * Get a bit field at register value <v>, from bit <lo> to bit <hi>
  22. */
  23. #define GET_BITFIELD(v, lo, hi) \
  24. (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
  25. #define SKX_NUM_IMC 2 /* Memory controllers per socket */
  26. #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
  27. #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
  28. #define I10NM_NUM_DDR_IMC 12
  29. #define I10NM_NUM_DDR_CHANNELS 2
  30. #define I10NM_NUM_DDR_DIMMS 2
  31. #define I10NM_NUM_HBM_IMC 16
  32. #define I10NM_NUM_HBM_CHANNELS 2
  33. #define I10NM_NUM_HBM_DIMMS 1
  34. #define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
  35. #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
  36. #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
  37. #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
  38. #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
  39. #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
  40. #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
  41. #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
  42. #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
  43. #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
  44. /*
  45. * According to Intel Architecture spec vol 3B,
  46. * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
  47. * memory errors should fit one of these masks:
  48. * 000f 0000 1mmm cccc (binary)
  49. * 000f 0010 1mmm cccc (binary) [RAM used as cache]
  50. * where:
  51. * f = Correction Report Filtering Bit. If 1, subsequent errors
  52. * won't be shown
  53. * mmm = error type
  54. * cccc = channel
  55. */
  56. #define MCACOD_MEM_ERR_MASK 0xef80
  57. /*
  58. * Errors from either the memory of the 1-level memory system or the
  59. * 2nd level memory (the slow "far" memory) of the 2-level memory system.
  60. */
  61. #define MCACOD_MEM_CTL_ERR 0x80
  62. /*
  63. * Errors from the 1st level memory (the fast "near" memory as cache)
  64. * of the 2-level memory system.
  65. */
  66. #define MCACOD_EXT_MEM_ERR 0x280
  67. /*
  68. * Each cpu socket contains some pci devices that provide global
  69. * information, and also some that are local to each of the two
  70. * memory controllers on the die.
  71. */
  72. struct skx_dev {
  73. struct list_head list;
  74. u8 bus[4];
  75. int seg;
  76. struct pci_dev *sad_all;
  77. struct pci_dev *util_all;
  78. struct pci_dev *uracu; /* for i10nm CPU */
  79. struct pci_dev *pcu_cr3; /* for HBM memory detection */
  80. u32 mcroute;
  81. struct skx_imc {
  82. struct mem_ctl_info *mci;
  83. struct pci_dev *mdev; /* for i10nm CPU */
  84. void __iomem *mbase; /* for i10nm CPU */
  85. int chan_mmio_sz; /* for i10nm CPU */
  86. int num_channels; /* channels per memory controller */
  87. int num_dimms; /* dimms per channel */
  88. bool hbm_mc;
  89. u8 mc; /* system wide mc# */
  90. u8 lmc; /* socket relative mc# */
  91. u8 src_id, node_id;
  92. struct skx_channel {
  93. struct pci_dev *cdev;
  94. struct pci_dev *edev;
  95. u32 retry_rd_err_log_s;
  96. u32 retry_rd_err_log_d;
  97. u32 retry_rd_err_log_d2;
  98. struct skx_dimm {
  99. u8 close_pg;
  100. u8 bank_xor_enable;
  101. u8 fine_grain_bank;
  102. u8 rowbits;
  103. u8 colbits;
  104. } dimms[NUM_DIMMS];
  105. } chan[NUM_CHANNELS];
  106. } imc[NUM_IMC];
  107. };
  108. struct skx_pvt {
  109. struct skx_imc *imc;
  110. };
  111. enum type {
  112. SKX,
  113. I10NM,
  114. SPR,
  115. GNR
  116. };
  117. enum {
  118. INDEX_SOCKET,
  119. INDEX_MEMCTRL,
  120. INDEX_CHANNEL,
  121. INDEX_DIMM,
  122. INDEX_CS,
  123. INDEX_NM_FIRST,
  124. INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
  125. INDEX_NM_CHANNEL,
  126. INDEX_NM_DIMM,
  127. INDEX_NM_CS,
  128. INDEX_MAX
  129. };
  130. enum error_source {
  131. ERR_SRC_1LM,
  132. ERR_SRC_2LM_NM,
  133. ERR_SRC_2LM_FM,
  134. ERR_SRC_NOT_MEMORY,
  135. };
  136. #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
  137. #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
  138. #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
  139. #define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
  140. struct decoded_addr {
  141. struct mce *mce;
  142. struct skx_dev *dev;
  143. u64 addr;
  144. int socket;
  145. int imc;
  146. int channel;
  147. u64 chan_addr;
  148. int sktways;
  149. int chanways;
  150. int dimm;
  151. int cs;
  152. int rank;
  153. int channel_rank;
  154. u64 rank_address;
  155. int row;
  156. int column;
  157. int bank_address;
  158. int bank_group;
  159. bool decoded_by_adxl;
  160. };
  161. struct pci_bdf {
  162. u32 bus : 8;
  163. u32 dev : 5;
  164. u32 fun : 3;
  165. };
  166. struct res_config {
  167. enum type type;
  168. /* Configuration agent device ID */
  169. unsigned int decs_did;
  170. /* Default bus number configuration register offset */
  171. int busno_cfg_offset;
  172. /* DDR memory controllers per socket */
  173. int ddr_imc_num;
  174. /* DDR channels per DDR memory controller */
  175. int ddr_chan_num;
  176. /* DDR DIMMs per DDR memory channel */
  177. int ddr_dimm_num;
  178. /* Per DDR channel memory-mapped I/O size */
  179. int ddr_chan_mmio_sz;
  180. /* HBM memory controllers per socket */
  181. int hbm_imc_num;
  182. /* HBM channels per HBM memory controller */
  183. int hbm_chan_num;
  184. /* HBM DIMMs per HBM memory channel */
  185. int hbm_dimm_num;
  186. /* Per HBM channel memory-mapped I/O size */
  187. int hbm_chan_mmio_sz;
  188. bool support_ddr5;
  189. /* SAD device BDF */
  190. struct pci_bdf sad_all_bdf;
  191. /* PCU device BDF */
  192. struct pci_bdf pcu_cr3_bdf;
  193. /* UTIL device BDF */
  194. struct pci_bdf util_all_bdf;
  195. /* URACU device BDF */
  196. struct pci_bdf uracu_bdf;
  197. /* DDR mdev device BDF */
  198. struct pci_bdf ddr_mdev_bdf;
  199. /* HBM mdev device BDF */
  200. struct pci_bdf hbm_mdev_bdf;
  201. int sad_all_offset;
  202. /* Offsets of retry_rd_err_log registers */
  203. u32 *offsets_scrub;
  204. u32 *offsets_scrub_hbm0;
  205. u32 *offsets_scrub_hbm1;
  206. u32 *offsets_demand;
  207. u32 *offsets_demand2;
  208. u32 *offsets_demand_hbm0;
  209. u32 *offsets_demand_hbm1;
  210. };
  211. typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
  212. struct res_config *cfg);
  213. typedef bool (*skx_decode_f)(struct decoded_addr *res);
  214. typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
  215. int skx_adxl_get(void);
  216. void skx_adxl_put(void);
  217. void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
  218. void skx_set_mem_cfg(bool mem_cfg_2lm);
  219. void skx_set_res_cfg(struct res_config *cfg);
  220. int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
  221. int skx_get_node_id(struct skx_dev *d, u8 *id);
  222. int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
  223. int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
  224. int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
  225. struct skx_imc *imc, int chan, int dimmno,
  226. struct res_config *cfg);
  227. int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
  228. int chan, int dimmno, const char *mod_str);
  229. int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
  230. const char *ctl_name, const char *mod_str,
  231. get_dimm_config_f get_dimm_config,
  232. struct res_config *cfg);
  233. int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
  234. void *data);
  235. void skx_remove(void);
  236. #ifdef CONFIG_EDAC_DEBUG
  237. void skx_setup_debug(const char *name);
  238. void skx_teardown_debug(void);
  239. #else
  240. static inline void skx_setup_debug(const char *name) {}
  241. static inline void skx_teardown_debug(void) {}
  242. #endif
  243. #endif /* _SKX_COMM_EDAC_H */