devcoredump.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright(c) 2014 Intel Mobile Communications GmbH
  4. * Copyright(c) 2015 Intel Deutschland GmbH
  5. *
  6. * Author: Johannes Berg <johannes@sipsolutions.net>
  7. */
  8. #include <linux/module.h>
  9. #include <linux/device.h>
  10. #include <linux/devcoredump.h>
  11. #include <linux/list.h>
  12. #include <linux/slab.h>
  13. #include <linux/fs.h>
  14. #include <linux/workqueue.h>
  15. static struct class devcd_class;
  16. /* global disable flag, for security purposes */
  17. static bool devcd_disabled;
  18. struct devcd_entry {
  19. struct device devcd_dev;
  20. void *data;
  21. size_t datalen;
  22. /*
  23. * There are 2 races for which mutex is required.
  24. *
  25. * The first race is between device creation and userspace writing to
  26. * schedule immediately destruction.
  27. *
  28. * This race is handled by arming the timer before device creation, but
  29. * when device creation fails the timer still exists.
  30. *
  31. * To solve this, hold the mutex during device_add(), and set
  32. * init_completed on success before releasing the mutex.
  33. *
  34. * That way the timer will never fire until device_add() is called,
  35. * it will do nothing if init_completed is not set. The timer is also
  36. * cancelled in that case.
  37. *
  38. * The second race involves multiple parallel invocations of devcd_free(),
  39. * add a deleted flag so only 1 can call the destructor.
  40. */
  41. struct mutex mutex;
  42. bool init_completed, deleted;
  43. struct module *owner;
  44. ssize_t (*read)(char *buffer, loff_t offset, size_t count,
  45. void *data, size_t datalen);
  46. void (*free)(void *data);
  47. /*
  48. * If nothing interferes and device_add() was returns success,
  49. * del_wk will destroy the device after the timer fires.
  50. *
  51. * Multiple userspace processes can interfere in the working of the timer:
  52. * - Writing to the coredump will reschedule the timer to run immediately,
  53. * if still armed.
  54. *
  55. * This is handled by using "if (cancel_delayed_work()) {
  56. * schedule_delayed_work() }", to prevent re-arming after having
  57. * been previously fired.
  58. * - Writing to /sys/class/devcoredump/disabled will destroy the
  59. * coredump synchronously.
  60. * This is handled by using disable_delayed_work_sync(), and then
  61. * checking if deleted flag is set with &devcd->mutex held.
  62. */
  63. struct delayed_work del_wk;
  64. struct device *failing_dev;
  65. };
  66. static struct devcd_entry *dev_to_devcd(struct device *dev)
  67. {
  68. return container_of(dev, struct devcd_entry, devcd_dev);
  69. }
  70. static void devcd_dev_release(struct device *dev)
  71. {
  72. struct devcd_entry *devcd = dev_to_devcd(dev);
  73. devcd->free(devcd->data);
  74. module_put(devcd->owner);
  75. /*
  76. * this seems racy, but I don't see a notifier or such on
  77. * a struct device to know when it goes away?
  78. */
  79. if (devcd->failing_dev->kobj.sd)
  80. sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,
  81. "devcoredump");
  82. put_device(devcd->failing_dev);
  83. kfree(devcd);
  84. }
  85. static void __devcd_del(struct devcd_entry *devcd)
  86. {
  87. devcd->deleted = true;
  88. device_del(&devcd->devcd_dev);
  89. put_device(&devcd->devcd_dev);
  90. }
  91. static void devcd_del(struct work_struct *wk)
  92. {
  93. struct devcd_entry *devcd;
  94. bool init_completed;
  95. devcd = container_of(wk, struct devcd_entry, del_wk.work);
  96. /* devcd->mutex serializes against dev_coredumpm_timeout */
  97. mutex_lock(&devcd->mutex);
  98. init_completed = devcd->init_completed;
  99. mutex_unlock(&devcd->mutex);
  100. if (init_completed)
  101. __devcd_del(devcd);
  102. }
  103. static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
  104. struct bin_attribute *bin_attr,
  105. char *buffer, loff_t offset, size_t count)
  106. {
  107. struct device *dev = kobj_to_dev(kobj);
  108. struct devcd_entry *devcd = dev_to_devcd(dev);
  109. return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
  110. }
  111. static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
  112. struct bin_attribute *bin_attr,
  113. char *buffer, loff_t offset, size_t count)
  114. {
  115. struct device *dev = kobj_to_dev(kobj);
  116. struct devcd_entry *devcd = dev_to_devcd(dev);
  117. /*
  118. * Although it's tempting to use mod_delayed work here,
  119. * that will cause a reschedule if the timer already fired.
  120. */
  121. if (cancel_delayed_work(&devcd->del_wk))
  122. schedule_delayed_work(&devcd->del_wk, 0);
  123. return count;
  124. }
  125. static struct bin_attribute devcd_attr_data = {
  126. .attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, },
  127. .size = 0,
  128. .read = devcd_data_read,
  129. .write = devcd_data_write,
  130. };
  131. static struct bin_attribute *devcd_dev_bin_attrs[] = {
  132. &devcd_attr_data, NULL,
  133. };
  134. static const struct attribute_group devcd_dev_group = {
  135. .bin_attrs = devcd_dev_bin_attrs,
  136. };
  137. static const struct attribute_group *devcd_dev_groups[] = {
  138. &devcd_dev_group, NULL,
  139. };
  140. static int devcd_free(struct device *dev, void *data)
  141. {
  142. struct devcd_entry *devcd = dev_to_devcd(dev);
  143. /*
  144. * To prevent a race with devcd_data_write(), disable work and
  145. * complete manually instead.
  146. *
  147. * We cannot rely on the return value of
  148. * disable_delayed_work_sync() here, because it might be in the
  149. * middle of a cancel_delayed_work + schedule_delayed_work pair.
  150. *
  151. * devcd->mutex here guards against multiple parallel invocations
  152. * of devcd_free().
  153. */
  154. disable_delayed_work_sync(&devcd->del_wk);
  155. mutex_lock(&devcd->mutex);
  156. if (!devcd->deleted)
  157. __devcd_del(devcd);
  158. mutex_unlock(&devcd->mutex);
  159. return 0;
  160. }
  161. static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr,
  162. char *buf)
  163. {
  164. return sysfs_emit(buf, "%d\n", devcd_disabled);
  165. }
  166. /*
  167. *
  168. * disabled_store() worker()
  169. * class_for_each_device(&devcd_class,
  170. * NULL, NULL, devcd_free)
  171. * ...
  172. * ...
  173. * while ((dev = class_dev_iter_next(&iter))
  174. * devcd_del()
  175. * device_del()
  176. * put_device() <- last reference
  177. * error = fn(dev, data) devcd_dev_release()
  178. * devcd_free(dev, data) kfree(devcd)
  179. *
  180. *
  181. * In the above diagram, it looks like disabled_store() would be racing with parallelly
  182. * running devcd_del() and result in memory abort after dropping its last reference with
  183. * put_device(). However, this will not happens as fn(dev, data) runs
  184. * with its own reference to device via klist_node so it is not its last reference.
  185. * so, above situation would not occur.
  186. */
  187. static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,
  188. const char *buf, size_t count)
  189. {
  190. long tmp = simple_strtol(buf, NULL, 10);
  191. /*
  192. * This essentially makes the attribute write-once, since you can't
  193. * go back to not having it disabled. This is intentional, it serves
  194. * as a system lockdown feature.
  195. */
  196. if (tmp != 1)
  197. return -EINVAL;
  198. devcd_disabled = true;
  199. class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
  200. return count;
  201. }
  202. static CLASS_ATTR_RW(disabled);
  203. static struct attribute *devcd_class_attrs[] = {
  204. &class_attr_disabled.attr,
  205. NULL,
  206. };
  207. ATTRIBUTE_GROUPS(devcd_class);
  208. static struct class devcd_class = {
  209. .name = "devcoredump",
  210. .dev_release = devcd_dev_release,
  211. .dev_groups = devcd_dev_groups,
  212. .class_groups = devcd_class_groups,
  213. };
  214. static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
  215. void *data, size_t datalen)
  216. {
  217. return memory_read_from_buffer(buffer, count, &offset, data, datalen);
  218. }
  219. static void devcd_freev(void *data)
  220. {
  221. vfree(data);
  222. }
  223. /**
  224. * dev_coredumpv - create device coredump with vmalloc data
  225. * @dev: the struct device for the crashed device
  226. * @data: vmalloc data containing the device coredump
  227. * @datalen: length of the data
  228. * @gfp: allocation flags
  229. *
  230. * This function takes ownership of the vmalloc'ed data and will free
  231. * it when it is no longer used. See dev_coredumpm() for more information.
  232. */
  233. void dev_coredumpv(struct device *dev, void *data, size_t datalen,
  234. gfp_t gfp)
  235. {
  236. dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev);
  237. }
  238. EXPORT_SYMBOL_GPL(dev_coredumpv);
  239. static int devcd_match_failing(struct device *dev, const void *failing)
  240. {
  241. struct devcd_entry *devcd = dev_to_devcd(dev);
  242. return devcd->failing_dev == failing;
  243. }
  244. /**
  245. * devcd_free_sgtable - free all the memory of the given scatterlist table
  246. * (i.e. both pages and scatterlist instances)
  247. * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained
  248. * using the sg_chain function then that function should be called only once
  249. * on the chained table
  250. * @data: pointer to sg_table to free
  251. */
  252. static void devcd_free_sgtable(void *data)
  253. {
  254. _devcd_free_sgtable(data);
  255. }
  256. /**
  257. * devcd_read_from_sgtable - copy data from sg_table to a given buffer
  258. * and return the number of bytes read
  259. * @buffer: the buffer to copy the data to it
  260. * @buf_len: the length of the buffer
  261. * @data: the scatterlist table to copy from
  262. * @offset: start copy from @offset@ bytes from the head of the data
  263. * in the given scatterlist
  264. * @data_len: the length of the data in the sg_table
  265. */
  266. static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
  267. size_t buf_len, void *data,
  268. size_t data_len)
  269. {
  270. struct scatterlist *table = data;
  271. if (offset > data_len)
  272. return -EINVAL;
  273. if (offset + buf_len > data_len)
  274. buf_len = data_len - offset;
  275. return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len,
  276. offset);
  277. }
  278. /**
  279. * dev_coredump_put - remove device coredump
  280. * @dev: the struct device for the crashed device
  281. *
  282. * dev_coredump_put() removes coredump, if exists, for a given device from
  283. * the file system and free its associated data otherwise, does nothing.
  284. *
  285. * It is useful for modules that do not want to keep coredump
  286. * available after its unload.
  287. */
  288. void dev_coredump_put(struct device *dev)
  289. {
  290. struct device *existing;
  291. existing = class_find_device(&devcd_class, NULL, dev,
  292. devcd_match_failing);
  293. if (existing) {
  294. devcd_free(existing, NULL);
  295. put_device(existing);
  296. }
  297. }
  298. EXPORT_SYMBOL_GPL(dev_coredump_put);
  299. /**
  300. * dev_coredumpm_timeout - create device coredump with read/free methods with a
  301. * custom timeout.
  302. * @dev: the struct device for the crashed device
  303. * @owner: the module that contains the read/free functions, use %THIS_MODULE
  304. * @data: data cookie for the @read/@free functions
  305. * @datalen: length of the data
  306. * @gfp: allocation flags
  307. * @read: function to read from the given buffer
  308. * @free: function to free the given buffer
  309. * @timeout: time in jiffies to remove coredump
  310. *
  311. * Creates a new device coredump for the given device. If a previous one hasn't
  312. * been read yet, the new coredump is discarded. The data lifetime is determined
  313. * by the device coredump framework and when it is no longer needed the @free
  314. * function will be called to free the data.
  315. */
  316. void dev_coredumpm_timeout(struct device *dev, struct module *owner,
  317. void *data, size_t datalen, gfp_t gfp,
  318. ssize_t (*read)(char *buffer, loff_t offset,
  319. size_t count, void *data,
  320. size_t datalen),
  321. void (*free)(void *data),
  322. unsigned long timeout)
  323. {
  324. static atomic_t devcd_count = ATOMIC_INIT(0);
  325. struct devcd_entry *devcd;
  326. struct device *existing;
  327. if (devcd_disabled)
  328. goto free;
  329. existing = class_find_device(&devcd_class, NULL, dev,
  330. devcd_match_failing);
  331. if (existing) {
  332. put_device(existing);
  333. goto free;
  334. }
  335. if (!try_module_get(owner))
  336. goto free;
  337. devcd = kzalloc(sizeof(*devcd), gfp);
  338. if (!devcd)
  339. goto put_module;
  340. devcd->owner = owner;
  341. devcd->data = data;
  342. devcd->datalen = datalen;
  343. devcd->read = read;
  344. devcd->free = free;
  345. devcd->failing_dev = get_device(dev);
  346. devcd->deleted = false;
  347. mutex_init(&devcd->mutex);
  348. device_initialize(&devcd->devcd_dev);
  349. dev_set_name(&devcd->devcd_dev, "devcd%d",
  350. atomic_inc_return(&devcd_count));
  351. devcd->devcd_dev.class = &devcd_class;
  352. dev_set_uevent_suppress(&devcd->devcd_dev, true);
  353. /* devcd->mutex prevents devcd_del() completing until init finishes */
  354. mutex_lock(&devcd->mutex);
  355. devcd->init_completed = false;
  356. INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
  357. schedule_delayed_work(&devcd->del_wk, timeout);
  358. if (device_add(&devcd->devcd_dev))
  359. goto put_device;
  360. /*
  361. * These should normally not fail, but there is no problem
  362. * continuing without the links, so just warn instead of
  363. * failing.
  364. */
  365. if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
  366. "failing_device") ||
  367. sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
  368. "devcoredump"))
  369. dev_warn(dev, "devcoredump create_link failed\n");
  370. dev_set_uevent_suppress(&devcd->devcd_dev, false);
  371. kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
  372. /*
  373. * Safe to run devcd_del() now that we are done with devcd_dev.
  374. * Alternatively we could have taken a ref on devcd_dev before
  375. * dropping the lock.
  376. */
  377. devcd->init_completed = true;
  378. mutex_unlock(&devcd->mutex);
  379. return;
  380. put_device:
  381. mutex_unlock(&devcd->mutex);
  382. cancel_delayed_work_sync(&devcd->del_wk);
  383. put_device(&devcd->devcd_dev);
  384. put_module:
  385. module_put(owner);
  386. free:
  387. free(data);
  388. }
  389. EXPORT_SYMBOL_GPL(dev_coredumpm_timeout);
  390. /**
  391. * dev_coredumpsg - create device coredump that uses scatterlist as data
  392. * parameter
  393. * @dev: the struct device for the crashed device
  394. * @table: the dump data
  395. * @datalen: length of the data
  396. * @gfp: allocation flags
  397. *
  398. * Creates a new device coredump for the given device. If a previous one hasn't
  399. * been read yet, the new coredump is discarded. The data lifetime is determined
  400. * by the device coredump framework and when it is no longer needed
  401. * it will free the data.
  402. */
  403. void dev_coredumpsg(struct device *dev, struct scatterlist *table,
  404. size_t datalen, gfp_t gfp)
  405. {
  406. dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable,
  407. devcd_free_sgtable);
  408. }
  409. EXPORT_SYMBOL_GPL(dev_coredumpsg);
  410. static int __init devcoredump_init(void)
  411. {
  412. return class_register(&devcd_class);
  413. }
  414. __initcall(devcoredump_init);
  415. static void __exit devcoredump_exit(void)
  416. {
  417. class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
  418. class_unregister(&devcd_class);
  419. }
  420. __exitcall(devcoredump_exit);