edac_device.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. /*
  2. * Defines, structures, APIs for edac_device
  3. *
  4. * (C) 2007 Linux Networx (http://lnxi.com)
  5. * This file may be distributed under the terms of the
  6. * GNU General Public License.
  7. *
  8. * Written by Thayne Harbaugh
  9. * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10. * http://www.anime.net/~goemon/linux-ecc/
  11. *
  12. * NMI handling support added by
  13. * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14. *
  15. * Refactored for multi-source files:
  16. * Doug Thompson <norsk5@xmission.com>
  17. *
  18. * Please look at Documentation/driver-api/edac.rst for more info about
  19. * EDAC core structs and functions.
  20. */
  21. #ifndef _EDAC_DEVICE_H_
  22. #define _EDAC_DEVICE_H_
  23. #include <linux/device.h>
  24. #include <linux/edac.h>
  25. #include <linux/kobject.h>
  26. #include <linux/list.h>
  27. #include <linux/types.h>
  28. #include <linux/sysfs.h>
  29. #include <linux/workqueue.h>
  30. /*
  31. * The following are the structures to provide for a generic
  32. * or abstract 'edac_device'. This set of structures and the
  33. * code that implements the APIs for the same, provide for
  34. * registering EDAC type devices which are NOT standard memory.
  35. *
  36. * CPU caches (L1 and L2)
  37. * DMA engines
  38. * Core CPU switches
  39. * Fabric switch units
  40. * PCIe interface controllers
  41. * other EDAC/ECC type devices that can be monitored for
  42. * errors, etc.
  43. *
  44. * It allows for a 2 level set of hierarchy. For example:
  45. *
  46. * cache could be composed of L1, L2 and L3 levels of cache.
  47. * Each CPU core would have its own L1 cache, while sharing
  48. * L2 and maybe L3 caches.
  49. *
  50. * View them arranged, via the sysfs presentation:
  51. * /sys/devices/system/edac/..
  52. *
  53. * mc/ <existing memory device directory>
  54. * cpu/cpu0/.. <L1 and L2 block directory>
  55. * /L1-cache/ce_count
  56. * /ue_count
  57. * /L2-cache/ce_count
  58. * /ue_count
  59. * cpu/cpu1/.. <L1 and L2 block directory>
  60. * /L1-cache/ce_count
  61. * /ue_count
  62. * /L2-cache/ce_count
  63. * /ue_count
  64. * ...
  65. *
  66. * the L1 and L2 directories would be "edac_device_block's"
  67. */
  68. struct edac_device_counter {
  69. u32 ue_count;
  70. u32 ce_count;
  71. };
  72. /* forward reference */
  73. struct edac_device_ctl_info;
  74. struct edac_device_block;
  75. /* edac_dev_sysfs_attribute structure
  76. * used for driver sysfs attributes in mem_ctl_info
  77. * for extra controls and attributes:
  78. * like high level error Injection controls
  79. */
  80. struct edac_dev_sysfs_attribute {
  81. struct attribute attr;
  82. ssize_t (*show)(struct edac_device_ctl_info *, char *);
  83. ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
  84. };
  85. /* edac_dev_sysfs_block_attribute structure
  86. *
  87. * used in leaf 'block' nodes for adding controls/attributes
  88. *
  89. * each block in each instance of the containing control structure can
  90. * have an array of the following. The show function will be filled in
  91. * with the show function in the low level driver.
  92. */
  93. struct edac_dev_sysfs_block_attribute {
  94. struct attribute attr;
  95. ssize_t (*show)(struct kobject *, struct attribute *, char *);
  96. };
  97. /* device block control structure */
  98. struct edac_device_block {
  99. struct edac_device_instance *instance; /* Up Pointer */
  100. char name[EDAC_DEVICE_NAME_LEN + 1];
  101. struct edac_device_counter counters; /* basic UE and CE counters */
  102. int nr_attribs; /* how many attributes */
  103. /* this block's attributes, could be NULL */
  104. struct edac_dev_sysfs_block_attribute *block_attributes;
  105. /* edac sysfs device control */
  106. struct kobject kobj;
  107. };
  108. /* device instance control structure */
  109. struct edac_device_instance {
  110. struct edac_device_ctl_info *ctl; /* Up pointer */
  111. char name[EDAC_DEVICE_NAME_LEN + 4];
  112. struct edac_device_counter counters; /* instance counters */
  113. u32 nr_blocks; /* how many blocks */
  114. struct edac_device_block *blocks; /* block array */
  115. /* edac sysfs device control */
  116. struct kobject kobj;
  117. };
  118. /*
  119. * Abstract edac_device control info structure
  120. *
  121. */
  122. struct edac_device_ctl_info {
  123. /* for global list of edac_device_ctl_info structs */
  124. struct list_head link;
  125. struct module *owner; /* Module owner of this control struct */
  126. int dev_idx;
  127. /* Per instance controls for this edac_device */
  128. int log_ue; /* boolean for logging UEs */
  129. int log_ce; /* boolean for logging CEs */
  130. int panic_on_ue; /* boolean for panic'ing on an UE */
  131. unsigned poll_msec; /* number of milliseconds to poll interval */
  132. unsigned long delay; /* number of jiffies for poll_msec */
  133. /* Additional top controller level attributes, but specified
  134. * by the low level driver.
  135. *
  136. * Set by the low level driver to provide attributes at the
  137. * controller level, same level as 'ue_count' and 'ce_count' above.
  138. * An array of structures, NULL terminated
  139. *
  140. * If attributes are desired, then set to array of attributes
  141. * If no attributes are desired, leave NULL
  142. */
  143. struct edac_dev_sysfs_attribute *sysfs_attributes;
  144. /* pointer to main 'edac' subsys in sysfs */
  145. const struct bus_type *edac_subsys;
  146. /* the internal state of this controller instance */
  147. int op_state;
  148. /* work struct for this instance */
  149. struct delayed_work work;
  150. /* pointer to edac polling checking routine:
  151. * If NOT NULL: points to polling check routine
  152. * If NULL: Then assumes INTERRUPT operation, where
  153. * MC driver will receive events
  154. */
  155. void (*edac_check) (struct edac_device_ctl_info * edac_dev);
  156. struct device *dev; /* pointer to device structure */
  157. const char *mod_name; /* module name */
  158. const char *ctl_name; /* edac controller name */
  159. const char *dev_name; /* pci/platform/etc... name */
  160. void *pvt_info; /* pointer to 'private driver' info */
  161. unsigned long start_time; /* edac_device load start time (jiffies) */
  162. /* sysfs top name under 'edac' directory
  163. * and instance name:
  164. * cpu/cpu0/...
  165. * cpu/cpu1/...
  166. * cpu/cpu2/...
  167. * ...
  168. */
  169. char name[EDAC_DEVICE_NAME_LEN + 1];
  170. /* Number of instances supported on this control structure
  171. * and the array of those instances
  172. */
  173. u32 nr_instances;
  174. struct edac_device_instance *instances;
  175. struct edac_device_block *blocks;
  176. /* Event counters for the this whole EDAC Device */
  177. struct edac_device_counter counters;
  178. /* edac sysfs device control for the 'name'
  179. * device this structure controls
  180. */
  181. struct kobject kobj;
  182. };
  183. /* To get from the instance's wq to the beginning of the ctl structure */
  184. #define to_edac_mem_ctl_work(w) \
  185. container_of(w, struct mem_ctl_info, work)
  186. #define to_edac_device_ctl_work(w) \
  187. container_of(w,struct edac_device_ctl_info,work)
  188. /*
  189. * The alloc() and free() functions for the 'edac_device' control info
  190. * structure. A MC driver will allocate one of these for each edac_device
  191. * it is going to control/register with the EDAC CORE.
  192. */
  193. extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
  194. unsigned sizeof_private,
  195. char *edac_device_name, unsigned nr_instances,
  196. char *edac_block_name, unsigned nr_blocks,
  197. unsigned offset_value,
  198. int device_index);
  199. /* The offset value can be:
  200. * -1 indicating no offset value
  201. * 0 for zero-based block numbers
  202. * 1 for 1-based block number
  203. * other for other-based block number
  204. */
  205. #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
  206. extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
  207. /**
  208. * edac_device_add_device - Insert the 'edac_dev' structure into the
  209. * edac_device global list and create sysfs entries associated with
  210. * edac_device structure.
  211. *
  212. * @edac_dev: pointer to edac_device structure to be added to the list
  213. * 'edac_device' structure.
  214. *
  215. * Returns:
  216. * 0 on Success, or an error code on failure
  217. */
  218. extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
  219. /**
  220. * edac_device_del_device - Remove sysfs entries for specified edac_device
  221. * structure and then remove edac_device structure from global list
  222. *
  223. * @dev:
  224. * Pointer to struct &device representing the edac device
  225. * structure to remove.
  226. *
  227. * Returns:
  228. * Pointer to removed edac_device structure,
  229. * or %NULL if device not found.
  230. */
  231. extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
  232. /**
  233. * edac_device_handle_ce_count - Log correctable errors.
  234. *
  235. * @edac_dev: pointer to struct &edac_device_ctl_info
  236. * @inst_nr: number of the instance where the CE error happened
  237. * @count: Number of errors to log.
  238. * @block_nr: number of the block where the CE error happened
  239. * @msg: message to be printed
  240. */
  241. void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
  242. unsigned int count, int inst_nr, int block_nr,
  243. const char *msg);
  244. /**
  245. * edac_device_handle_ue_count - Log uncorrectable errors.
  246. *
  247. * @edac_dev: pointer to struct &edac_device_ctl_info
  248. * @inst_nr: number of the instance where the CE error happened
  249. * @count: Number of errors to log.
  250. * @block_nr: number of the block where the CE error happened
  251. * @msg: message to be printed
  252. */
  253. void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
  254. unsigned int count, int inst_nr, int block_nr,
  255. const char *msg);
  256. /**
  257. * edac_device_handle_ce(): Log a single correctable error
  258. *
  259. * @edac_dev: pointer to struct &edac_device_ctl_info
  260. * @inst_nr: number of the instance where the CE error happened
  261. * @block_nr: number of the block where the CE error happened
  262. * @msg: message to be printed
  263. */
  264. static inline void
  265. edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
  266. int block_nr, const char *msg)
  267. {
  268. edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
  269. }
  270. /**
  271. * edac_device_handle_ue(): Log a single uncorrectable error
  272. *
  273. * @edac_dev: pointer to struct &edac_device_ctl_info
  274. * @inst_nr: number of the instance where the UE error happened
  275. * @block_nr: number of the block where the UE error happened
  276. * @msg: message to be printed
  277. */
  278. static inline void
  279. edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
  280. int block_nr, const char *msg)
  281. {
  282. edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
  283. }
  284. /**
  285. * edac_device_alloc_index: Allocate a unique device index number
  286. *
  287. * Returns:
  288. * allocated index number
  289. */
  290. extern int edac_device_alloc_index(void);
  291. extern const char *edac_layer_name[];
  292. /* Free the actual struct */
  293. static inline void __edac_device_free_ctl_info(struct edac_device_ctl_info *ci)
  294. {
  295. if (ci) {
  296. kfree(ci->pvt_info);
  297. kfree(ci->blocks);
  298. kfree(ci->instances);
  299. kfree(ci);
  300. }
  301. }
  302. #endif