mcelog.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /******************************************************************************
  2. * mcelog.c
  3. * Driver for receiving and transferring machine check error infomation
  4. *
  5. * Copyright (c) 2012 Intel Corporation
  6. * Author: Liu, Jinsong <jinsong.liu@intel.com>
  7. * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
  8. * Author: Ke, Liping <liping.ke@intel.com>
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License version 2
  12. * as published by the Free Software Foundation; or, when distributed
  13. * separately from the Linux kernel or incorporated into other
  14. * software packages, subject to the following license:
  15. *
  16. * Permission is hereby granted, free of charge, to any person obtaining a copy
  17. * of this source file (the "Software"), to deal in the Software without
  18. * restriction, including without limitation the rights to use, copy, modify,
  19. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  20. * and to permit persons to whom the Software is furnished to do so, subject to
  21. * the following conditions:
  22. *
  23. * The above copyright notice and this permission notice shall be included in
  24. * all copies or substantial portions of the Software.
  25. *
  26. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  32. * IN THE SOFTWARE.
  33. */
  34. #define pr_fmt(fmt) "xen_mcelog: " fmt
  35. #include <linux/init.h>
  36. #include <linux/types.h>
  37. #include <linux/kernel.h>
  38. #include <linux/slab.h>
  39. #include <linux/fs.h>
  40. #include <linux/device.h>
  41. #include <linux/miscdevice.h>
  42. #include <linux/uaccess.h>
  43. #include <linux/capability.h>
  44. #include <linux/poll.h>
  45. #include <linux/sched.h>
  46. #include <xen/interface/xen.h>
  47. #include <xen/events.h>
  48. #include <xen/interface/vcpu.h>
  49. #include <xen/xen.h>
  50. #include <asm/xen/hypercall.h>
  51. #include <asm/xen/hypervisor.h>
  52. static struct mc_info g_mi;
  53. static struct mcinfo_logical_cpu *g_physinfo;
  54. static uint32_t ncpus;
  55. static DEFINE_MUTEX(mcelog_lock);
  56. static struct xen_mce_log xen_mcelog = {
  57. .signature = XEN_MCE_LOG_SIGNATURE,
  58. .len = XEN_MCE_LOG_LEN,
  59. .recordlen = sizeof(struct xen_mce),
  60. };
  61. static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
  62. static int xen_mce_chrdev_open_count; /* #times opened */
  63. static int xen_mce_chrdev_open_exclu; /* already open exclusive? */
  64. static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
  65. static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
  66. {
  67. spin_lock(&xen_mce_chrdev_state_lock);
  68. if (xen_mce_chrdev_open_exclu ||
  69. (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
  70. spin_unlock(&xen_mce_chrdev_state_lock);
  71. return -EBUSY;
  72. }
  73. if (file->f_flags & O_EXCL)
  74. xen_mce_chrdev_open_exclu = 1;
  75. xen_mce_chrdev_open_count++;
  76. spin_unlock(&xen_mce_chrdev_state_lock);
  77. return nonseekable_open(inode, file);
  78. }
  79. static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
  80. {
  81. spin_lock(&xen_mce_chrdev_state_lock);
  82. xen_mce_chrdev_open_count--;
  83. xen_mce_chrdev_open_exclu = 0;
  84. spin_unlock(&xen_mce_chrdev_state_lock);
  85. return 0;
  86. }
  87. static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
  88. size_t usize, loff_t *off)
  89. {
  90. char __user *buf = ubuf;
  91. unsigned num;
  92. int i, err;
  93. mutex_lock(&mcelog_lock);
  94. num = xen_mcelog.next;
  95. /* Only supports full reads right now */
  96. err = -EINVAL;
  97. if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
  98. goto out;
  99. err = 0;
  100. for (i = 0; i < num; i++) {
  101. struct xen_mce *m = &xen_mcelog.entry[i];
  102. err |= copy_to_user(buf, m, sizeof(*m));
  103. buf += sizeof(*m);
  104. }
  105. memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
  106. xen_mcelog.next = 0;
  107. if (err)
  108. err = -EFAULT;
  109. out:
  110. mutex_unlock(&mcelog_lock);
  111. return err ? err : buf - ubuf;
  112. }
  113. static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait)
  114. {
  115. poll_wait(file, &xen_mce_chrdev_wait, wait);
  116. if (xen_mcelog.next)
  117. return EPOLLIN | EPOLLRDNORM;
  118. return 0;
  119. }
  120. static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
  121. unsigned long arg)
  122. {
  123. int __user *p = (int __user *)arg;
  124. if (!capable(CAP_SYS_ADMIN))
  125. return -EPERM;
  126. switch (cmd) {
  127. case MCE_GET_RECORD_LEN:
  128. return put_user(sizeof(struct xen_mce), p);
  129. case MCE_GET_LOG_LEN:
  130. return put_user(XEN_MCE_LOG_LEN, p);
  131. case MCE_GETCLEAR_FLAGS: {
  132. unsigned flags;
  133. do {
  134. flags = xen_mcelog.flags;
  135. } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
  136. return put_user(flags, p);
  137. }
  138. default:
  139. return -ENOTTY;
  140. }
  141. }
  142. static const struct file_operations xen_mce_chrdev_ops = {
  143. .open = xen_mce_chrdev_open,
  144. .release = xen_mce_chrdev_release,
  145. .read = xen_mce_chrdev_read,
  146. .poll = xen_mce_chrdev_poll,
  147. .unlocked_ioctl = xen_mce_chrdev_ioctl,
  148. };
  149. static struct miscdevice xen_mce_chrdev_device = {
  150. MISC_MCELOG_MINOR,
  151. "mcelog",
  152. &xen_mce_chrdev_ops,
  153. };
  154. /*
  155. * Caller should hold the mcelog_lock
  156. */
  157. static void xen_mce_log(struct xen_mce *mce)
  158. {
  159. unsigned entry;
  160. entry = xen_mcelog.next;
  161. /*
  162. * When the buffer fills up discard new entries.
  163. * Assume that the earlier errors are the more
  164. * interesting ones:
  165. */
  166. if (entry >= XEN_MCE_LOG_LEN) {
  167. set_bit(XEN_MCE_OVERFLOW,
  168. (unsigned long *)&xen_mcelog.flags);
  169. return;
  170. }
  171. memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
  172. xen_mcelog.next++;
  173. }
  174. static int convert_log(struct mc_info *mi)
  175. {
  176. struct mcinfo_common *mic;
  177. struct mcinfo_global *mc_global;
  178. struct mcinfo_bank *mc_bank;
  179. struct xen_mce m;
  180. unsigned int i, j;
  181. mic = NULL;
  182. x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
  183. if (unlikely(!mic)) {
  184. pr_warn("Failed to find global error info\n");
  185. return -ENODEV;
  186. }
  187. memset(&m, 0, sizeof(struct xen_mce));
  188. mc_global = (struct mcinfo_global *)mic;
  189. m.mcgstatus = mc_global->mc_gstatus;
  190. m.apicid = mc_global->mc_apicid;
  191. for (i = 0; i < ncpus; i++)
  192. if (g_physinfo[i].mc_apicid == m.apicid)
  193. break;
  194. if (unlikely(i == ncpus)) {
  195. pr_warn("Failed to match cpu with apicid %d\n", m.apicid);
  196. return -ENODEV;
  197. }
  198. m.socketid = g_physinfo[i].mc_chipid;
  199. m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
  200. m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
  201. for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j)
  202. switch (g_physinfo[i].mc_msrvalues[j].reg) {
  203. case MSR_IA32_MCG_CAP:
  204. m.mcgcap = g_physinfo[i].mc_msrvalues[j].value;
  205. break;
  206. case MSR_PPIN:
  207. case MSR_AMD_PPIN:
  208. m.ppin = g_physinfo[i].mc_msrvalues[j].value;
  209. break;
  210. }
  211. mic = NULL;
  212. x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
  213. if (unlikely(!mic)) {
  214. pr_warn("Fail to find bank error info\n");
  215. return -ENODEV;
  216. }
  217. do {
  218. if ((!mic) || (mic->size == 0) ||
  219. (mic->type != MC_TYPE_GLOBAL &&
  220. mic->type != MC_TYPE_BANK &&
  221. mic->type != MC_TYPE_EXTENDED &&
  222. mic->type != MC_TYPE_RECOVERY))
  223. break;
  224. if (mic->type == MC_TYPE_BANK) {
  225. mc_bank = (struct mcinfo_bank *)mic;
  226. m.misc = mc_bank->mc_misc;
  227. m.status = mc_bank->mc_status;
  228. m.addr = mc_bank->mc_addr;
  229. m.tsc = mc_bank->mc_tsc;
  230. m.bank = mc_bank->mc_bank;
  231. m.finished = 1;
  232. /*log this record*/
  233. xen_mce_log(&m);
  234. }
  235. mic = x86_mcinfo_next(mic);
  236. } while (1);
  237. return 0;
  238. }
  239. static int mc_queue_handle(uint32_t flags)
  240. {
  241. struct xen_mc mc_op;
  242. int ret = 0;
  243. mc_op.cmd = XEN_MC_fetch;
  244. set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
  245. do {
  246. mc_op.u.mc_fetch.flags = flags;
  247. ret = HYPERVISOR_mca(&mc_op);
  248. if (ret) {
  249. pr_err("Failed to fetch %surgent error log\n",
  250. flags == XEN_MC_URGENT ? "" : "non");
  251. break;
  252. }
  253. if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
  254. mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
  255. break;
  256. else {
  257. ret = convert_log(&g_mi);
  258. if (ret)
  259. pr_warn("Failed to convert this error log, continue acking it anyway\n");
  260. mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
  261. ret = HYPERVISOR_mca(&mc_op);
  262. if (ret) {
  263. pr_err("Failed to ack previous error log\n");
  264. break;
  265. }
  266. }
  267. } while (1);
  268. return ret;
  269. }
  270. /* virq handler for machine check error info*/
  271. static void xen_mce_work_fn(struct work_struct *work)
  272. {
  273. int err;
  274. mutex_lock(&mcelog_lock);
  275. /* urgent mc_info */
  276. err = mc_queue_handle(XEN_MC_URGENT);
  277. if (err)
  278. pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n");
  279. /* nonurgent mc_info */
  280. err = mc_queue_handle(XEN_MC_NONURGENT);
  281. if (err)
  282. pr_err("Failed to handle nonurgent mc_info queue\n");
  283. /* wake processes polling /dev/mcelog */
  284. wake_up_interruptible(&xen_mce_chrdev_wait);
  285. mutex_unlock(&mcelog_lock);
  286. }
  287. static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
  288. static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
  289. {
  290. schedule_work(&xen_mce_work);
  291. return IRQ_HANDLED;
  292. }
  293. static int bind_virq_for_mce(void)
  294. {
  295. int ret;
  296. struct xen_mc mc_op;
  297. memset(&mc_op, 0, sizeof(struct xen_mc));
  298. /* Fetch physical CPU Numbers */
  299. mc_op.cmd = XEN_MC_physcpuinfo;
  300. set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
  301. ret = HYPERVISOR_mca(&mc_op);
  302. if (ret) {
  303. pr_err("Failed to get CPU numbers\n");
  304. return ret;
  305. }
  306. /* Fetch each CPU Physical Info for later reference*/
  307. ncpus = mc_op.u.mc_physcpuinfo.ncpus;
  308. g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
  309. GFP_KERNEL);
  310. if (!g_physinfo)
  311. return -ENOMEM;
  312. set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
  313. ret = HYPERVISOR_mca(&mc_op);
  314. if (ret) {
  315. pr_err("Failed to get CPU info\n");
  316. kfree(g_physinfo);
  317. return ret;
  318. }
  319. ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
  320. xen_mce_interrupt, 0, "mce", NULL);
  321. if (ret < 0) {
  322. pr_err("Failed to bind virq\n");
  323. kfree(g_physinfo);
  324. return ret;
  325. }
  326. return 0;
  327. }
  328. static int __init xen_late_init_mcelog(void)
  329. {
  330. int ret;
  331. /* Only DOM0 is responsible for MCE logging */
  332. if (!xen_initial_domain())
  333. return -ENODEV;
  334. /* register character device /dev/mcelog for xen mcelog */
  335. ret = misc_register(&xen_mce_chrdev_device);
  336. if (ret)
  337. return ret;
  338. ret = bind_virq_for_mce();
  339. if (ret)
  340. goto deregister;
  341. pr_info("/dev/mcelog registered by Xen\n");
  342. return 0;
  343. deregister:
  344. misc_deregister(&xen_mce_chrdev_device);
  345. return ret;
  346. }
  347. device_initcall(xen_late_init_mcelog);