io-pgfault.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Handle device page faults
  4. *
  5. * Copyright (C) 2020 ARM Ltd.
  6. */
  7. #include <linux/iommu.h>
  8. #include <linux/list.h>
  9. #include <linux/sched/mm.h>
  10. #include <linux/slab.h>
  11. #include <linux/workqueue.h>
  12. #include "iommu-priv.h"
  13. /*
  14. * Return the fault parameter of a device if it exists. Otherwise, return NULL.
  15. * On a successful return, the caller takes a reference of this parameter and
  16. * should put it after use by calling iopf_put_dev_fault_param().
  17. */
  18. static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev)
  19. {
  20. struct dev_iommu *param = dev->iommu;
  21. struct iommu_fault_param *fault_param;
  22. rcu_read_lock();
  23. fault_param = rcu_dereference(param->fault_param);
  24. if (fault_param && !refcount_inc_not_zero(&fault_param->users))
  25. fault_param = NULL;
  26. rcu_read_unlock();
  27. return fault_param;
  28. }
  29. /* Caller must hold a reference of the fault parameter. */
  30. static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param)
  31. {
  32. if (refcount_dec_and_test(&fault_param->users))
  33. kfree_rcu(fault_param, rcu);
  34. }
  35. static void __iopf_free_group(struct iopf_group *group)
  36. {
  37. struct iopf_fault *iopf, *next;
  38. list_for_each_entry_safe(iopf, next, &group->faults, list) {
  39. if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
  40. kfree(iopf);
  41. }
  42. /* Pair with iommu_report_device_fault(). */
  43. iopf_put_dev_fault_param(group->fault_param);
  44. }
  45. void iopf_free_group(struct iopf_group *group)
  46. {
  47. __iopf_free_group(group);
  48. kfree(group);
  49. }
  50. EXPORT_SYMBOL_GPL(iopf_free_group);
  51. /* Non-last request of a group. Postpone until the last one. */
  52. static int report_partial_fault(struct iommu_fault_param *fault_param,
  53. struct iommu_fault *fault)
  54. {
  55. struct iopf_fault *iopf;
  56. iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
  57. if (!iopf)
  58. return -ENOMEM;
  59. iopf->fault = *fault;
  60. mutex_lock(&fault_param->lock);
  61. list_add(&iopf->list, &fault_param->partial);
  62. mutex_unlock(&fault_param->lock);
  63. return 0;
  64. }
  65. static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
  66. struct iopf_fault *evt,
  67. struct iopf_group *abort_group)
  68. {
  69. struct iopf_fault *iopf, *next;
  70. struct iopf_group *group;
  71. group = kzalloc(sizeof(*group), GFP_KERNEL);
  72. if (!group) {
  73. /*
  74. * We always need to construct the group as we need it to abort
  75. * the request at the driver if it can't be handled.
  76. */
  77. group = abort_group;
  78. }
  79. group->fault_param = iopf_param;
  80. group->last_fault.fault = evt->fault;
  81. INIT_LIST_HEAD(&group->faults);
  82. INIT_LIST_HEAD(&group->pending_node);
  83. list_add(&group->last_fault.list, &group->faults);
  84. /* See if we have partial faults for this group */
  85. mutex_lock(&iopf_param->lock);
  86. list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
  87. if (iopf->fault.prm.grpid == evt->fault.prm.grpid)
  88. /* Insert *before* the last fault */
  89. list_move(&iopf->list, &group->faults);
  90. }
  91. list_add(&group->pending_node, &iopf_param->faults);
  92. mutex_unlock(&iopf_param->lock);
  93. group->fault_count = list_count_nodes(&group->faults);
  94. return group;
  95. }
  96. static struct iommu_attach_handle *find_fault_handler(struct device *dev,
  97. struct iopf_fault *evt)
  98. {
  99. struct iommu_fault *fault = &evt->fault;
  100. struct iommu_attach_handle *attach_handle;
  101. if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
  102. attach_handle = iommu_attach_handle_get(dev->iommu_group,
  103. fault->prm.pasid, 0);
  104. if (IS_ERR(attach_handle)) {
  105. const struct iommu_ops *ops = dev_iommu_ops(dev);
  106. if (!ops->user_pasid_table)
  107. return NULL;
  108. /*
  109. * The iommu driver for this device supports user-
  110. * managed PASID table. Therefore page faults for
  111. * any PASID should go through the NESTING domain
  112. * attached to the device RID.
  113. */
  114. attach_handle = iommu_attach_handle_get(
  115. dev->iommu_group, IOMMU_NO_PASID,
  116. IOMMU_DOMAIN_NESTED);
  117. if (IS_ERR(attach_handle))
  118. return NULL;
  119. }
  120. } else {
  121. attach_handle = iommu_attach_handle_get(dev->iommu_group,
  122. IOMMU_NO_PASID, 0);
  123. if (IS_ERR(attach_handle))
  124. return NULL;
  125. }
  126. if (!attach_handle->domain->iopf_handler)
  127. return NULL;
  128. return attach_handle;
  129. }
  130. static void iopf_error_response(struct device *dev, struct iopf_fault *evt)
  131. {
  132. const struct iommu_ops *ops = dev_iommu_ops(dev);
  133. struct iommu_fault *fault = &evt->fault;
  134. struct iommu_page_response resp = {
  135. .pasid = fault->prm.pasid,
  136. .grpid = fault->prm.grpid,
  137. .code = IOMMU_PAGE_RESP_INVALID
  138. };
  139. ops->page_response(dev, evt, &resp);
  140. }
  141. /**
  142. * iommu_report_device_fault() - Report fault event to device driver
  143. * @dev: the device
  144. * @evt: fault event data
  145. *
  146. * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ
  147. * handler. If this function fails then ops->page_response() was called to
  148. * complete evt if required.
  149. *
  150. * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
  151. * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
  152. * expect a response. It may be generated when disabling a PASID (issuing a
  153. * PASID stop request) by some PCI devices.
  154. *
  155. * The PASID stop request is issued by the device driver before unbind(). Once
  156. * it completes, no page request is generated for this PASID anymore and
  157. * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
  158. * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
  159. * for all outstanding page requests to come back with a response before
  160. * completing the PASID stop request. Others do not wait for page responses, and
  161. * instead issue this Stop Marker that tells us when the PASID can be
  162. * reallocated.
  163. *
  164. * It is safe to discard the Stop Marker because it is an optimization.
  165. * a. Page requests, which are posted requests, have been flushed to the IOMMU
  166. * when the stop request completes.
  167. * b. The IOMMU driver flushes all fault queues on unbind() before freeing the
  168. * PASID.
  169. *
  170. * So even though the Stop Marker might be issued by the device *after* the stop
  171. * request completes, outstanding faults will have been dealt with by the time
  172. * the PASID is freed.
  173. *
  174. * Any valid page fault will be eventually routed to an iommu domain and the
  175. * page fault handler installed there will get called. The users of this
  176. * handling framework should guarantee that the iommu domain could only be
  177. * freed after the device has stopped generating page faults (or the iommu
  178. * hardware has been set to block the page faults) and the pending page faults
  179. * have been flushed. In case no page fault handler is attached or no iopf params
  180. * are setup, then the ops->page_response() is called to complete the evt.
  181. *
  182. * Returns 0 on success, or an error in case of a bad/failed iopf setup.
  183. */
  184. int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt)
  185. {
  186. struct iommu_attach_handle *attach_handle;
  187. struct iommu_fault *fault = &evt->fault;
  188. struct iommu_fault_param *iopf_param;
  189. struct iopf_group abort_group = {};
  190. struct iopf_group *group;
  191. attach_handle = find_fault_handler(dev, evt);
  192. if (!attach_handle)
  193. goto err_bad_iopf;
  194. /*
  195. * Something has gone wrong if a fault capable domain is attached but no
  196. * iopf_param is setup
  197. */
  198. iopf_param = iopf_get_dev_fault_param(dev);
  199. if (WARN_ON(!iopf_param))
  200. goto err_bad_iopf;
  201. if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
  202. int ret;
  203. ret = report_partial_fault(iopf_param, fault);
  204. iopf_put_dev_fault_param(iopf_param);
  205. /* A request that is not the last does not need to be ack'd */
  206. return ret;
  207. }
  208. /*
  209. * This is the last page fault of a group. Allocate an iopf group and
  210. * pass it to domain's page fault handler. The group holds a reference
  211. * count of the fault parameter. It will be released after response or
  212. * error path of this function. If an error is returned, the caller
  213. * will send a response to the hardware. We need to clean up before
  214. * leaving, otherwise partial faults will be stuck.
  215. */
  216. group = iopf_group_alloc(iopf_param, evt, &abort_group);
  217. if (group == &abort_group)
  218. goto err_abort;
  219. group->attach_handle = attach_handle;
  220. /*
  221. * On success iopf_handler must call iopf_group_response() and
  222. * iopf_free_group()
  223. */
  224. if (group->attach_handle->domain->iopf_handler(group))
  225. goto err_abort;
  226. return 0;
  227. err_abort:
  228. dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n",
  229. fault->prm.pasid);
  230. iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE);
  231. if (group == &abort_group)
  232. __iopf_free_group(group);
  233. else
  234. iopf_free_group(group);
  235. return 0;
  236. err_bad_iopf:
  237. if (fault->type == IOMMU_FAULT_PAGE_REQ)
  238. iopf_error_response(dev, evt);
  239. return -EINVAL;
  240. }
  241. EXPORT_SYMBOL_GPL(iommu_report_device_fault);
  242. /**
  243. * iopf_queue_flush_dev - Ensure that all queued faults have been processed
  244. * @dev: the endpoint whose faults need to be flushed.
  245. *
  246. * The IOMMU driver calls this before releasing a PASID, to ensure that all
  247. * pending faults for this PASID have been handled, and won't hit the address
  248. * space of the next process that uses this PASID. The driver must make sure
  249. * that no new fault is added to the queue. In particular it must flush its
  250. * low-level queue before calling this function.
  251. *
  252. * Return: 0 on success and <0 on error.
  253. */
  254. int iopf_queue_flush_dev(struct device *dev)
  255. {
  256. struct iommu_fault_param *iopf_param;
  257. /*
  258. * It's a driver bug to be here after iopf_queue_remove_device().
  259. * Therefore, it's safe to dereference the fault parameter without
  260. * holding the lock.
  261. */
  262. iopf_param = rcu_dereference_check(dev->iommu->fault_param, true);
  263. if (WARN_ON(!iopf_param))
  264. return -ENODEV;
  265. flush_workqueue(iopf_param->queue->wq);
  266. return 0;
  267. }
  268. EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
  269. /**
  270. * iopf_group_response - Respond a group of page faults
  271. * @group: the group of faults with the same group id
  272. * @status: the response code
  273. */
  274. void iopf_group_response(struct iopf_group *group,
  275. enum iommu_page_response_code status)
  276. {
  277. struct iommu_fault_param *fault_param = group->fault_param;
  278. struct iopf_fault *iopf = &group->last_fault;
  279. struct device *dev = group->fault_param->dev;
  280. const struct iommu_ops *ops = dev_iommu_ops(dev);
  281. struct iommu_page_response resp = {
  282. .pasid = iopf->fault.prm.pasid,
  283. .grpid = iopf->fault.prm.grpid,
  284. .code = status,
  285. };
  286. /* Only send response if there is a fault report pending */
  287. mutex_lock(&fault_param->lock);
  288. if (!list_empty(&group->pending_node)) {
  289. ops->page_response(dev, &group->last_fault, &resp);
  290. list_del_init(&group->pending_node);
  291. }
  292. mutex_unlock(&fault_param->lock);
  293. }
  294. EXPORT_SYMBOL_GPL(iopf_group_response);
  295. /**
  296. * iopf_queue_discard_partial - Remove all pending partial fault
  297. * @queue: the queue whose partial faults need to be discarded
  298. *
  299. * When the hardware queue overflows, last page faults in a group may have been
  300. * lost and the IOMMU driver calls this to discard all partial faults. The
  301. * driver shouldn't be adding new faults to this queue concurrently.
  302. *
  303. * Return: 0 on success and <0 on error.
  304. */
  305. int iopf_queue_discard_partial(struct iopf_queue *queue)
  306. {
  307. struct iopf_fault *iopf, *next;
  308. struct iommu_fault_param *iopf_param;
  309. if (!queue)
  310. return -EINVAL;
  311. mutex_lock(&queue->lock);
  312. list_for_each_entry(iopf_param, &queue->devices, queue_list) {
  313. mutex_lock(&iopf_param->lock);
  314. list_for_each_entry_safe(iopf, next, &iopf_param->partial,
  315. list) {
  316. list_del(&iopf->list);
  317. kfree(iopf);
  318. }
  319. mutex_unlock(&iopf_param->lock);
  320. }
  321. mutex_unlock(&queue->lock);
  322. return 0;
  323. }
  324. EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
  325. /**
  326. * iopf_queue_add_device - Add producer to the fault queue
  327. * @queue: IOPF queue
  328. * @dev: device to add
  329. *
  330. * Return: 0 on success and <0 on error.
  331. */
  332. int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
  333. {
  334. int ret = 0;
  335. struct dev_iommu *param = dev->iommu;
  336. struct iommu_fault_param *fault_param;
  337. const struct iommu_ops *ops = dev_iommu_ops(dev);
  338. if (!ops->page_response)
  339. return -ENODEV;
  340. mutex_lock(&queue->lock);
  341. mutex_lock(&param->lock);
  342. if (rcu_dereference_check(param->fault_param,
  343. lockdep_is_held(&param->lock))) {
  344. ret = -EBUSY;
  345. goto done_unlock;
  346. }
  347. fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL);
  348. if (!fault_param) {
  349. ret = -ENOMEM;
  350. goto done_unlock;
  351. }
  352. mutex_init(&fault_param->lock);
  353. INIT_LIST_HEAD(&fault_param->faults);
  354. INIT_LIST_HEAD(&fault_param->partial);
  355. fault_param->dev = dev;
  356. refcount_set(&fault_param->users, 1);
  357. list_add(&fault_param->queue_list, &queue->devices);
  358. fault_param->queue = queue;
  359. rcu_assign_pointer(param->fault_param, fault_param);
  360. done_unlock:
  361. mutex_unlock(&param->lock);
  362. mutex_unlock(&queue->lock);
  363. return ret;
  364. }
  365. EXPORT_SYMBOL_GPL(iopf_queue_add_device);
  366. /**
  367. * iopf_queue_remove_device - Remove producer from fault queue
  368. * @queue: IOPF queue
  369. * @dev: device to remove
  370. *
  371. * Removing a device from an iopf_queue. It's recommended to follow these
  372. * steps when removing a device:
  373. *
  374. * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware
  375. * and flush any hardware page request queues. This should be done before
  376. * calling into this helper.
  377. * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding
  378. * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should
  379. * not retry. This helper function handles this.
  380. * - Disable PRI on the device: After calling this helper, the caller could
  381. * then disable PRI on the device.
  382. *
  383. * Calling iopf_queue_remove_device() essentially disassociates the device.
  384. * The fault_param might still exist, but iommu_page_response() will do
  385. * nothing. The device fault parameter reference count has been properly
  386. * passed from iommu_report_device_fault() to the fault handling work, and
  387. * will eventually be released after iommu_page_response().
  388. */
  389. void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
  390. {
  391. struct iopf_fault *partial_iopf;
  392. struct iopf_fault *next;
  393. struct iopf_group *group, *temp;
  394. struct dev_iommu *param = dev->iommu;
  395. struct iommu_fault_param *fault_param;
  396. const struct iommu_ops *ops = dev_iommu_ops(dev);
  397. mutex_lock(&queue->lock);
  398. mutex_lock(&param->lock);
  399. fault_param = rcu_dereference_check(param->fault_param,
  400. lockdep_is_held(&param->lock));
  401. if (WARN_ON(!fault_param || fault_param->queue != queue))
  402. goto unlock;
  403. mutex_lock(&fault_param->lock);
  404. list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list)
  405. kfree(partial_iopf);
  406. list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) {
  407. struct iopf_fault *iopf = &group->last_fault;
  408. struct iommu_page_response resp = {
  409. .pasid = iopf->fault.prm.pasid,
  410. .grpid = iopf->fault.prm.grpid,
  411. .code = IOMMU_PAGE_RESP_INVALID
  412. };
  413. ops->page_response(dev, iopf, &resp);
  414. list_del_init(&group->pending_node);
  415. iopf_free_group(group);
  416. }
  417. mutex_unlock(&fault_param->lock);
  418. list_del(&fault_param->queue_list);
  419. /* dec the ref owned by iopf_queue_add_device() */
  420. rcu_assign_pointer(param->fault_param, NULL);
  421. iopf_put_dev_fault_param(fault_param);
  422. unlock:
  423. mutex_unlock(&param->lock);
  424. mutex_unlock(&queue->lock);
  425. }
  426. EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
  427. /**
  428. * iopf_queue_alloc - Allocate and initialize a fault queue
  429. * @name: a unique string identifying the queue (for workqueue)
  430. *
  431. * Return: the queue on success and NULL on error.
  432. */
  433. struct iopf_queue *iopf_queue_alloc(const char *name)
  434. {
  435. struct iopf_queue *queue;
  436. queue = kzalloc(sizeof(*queue), GFP_KERNEL);
  437. if (!queue)
  438. return NULL;
  439. /*
  440. * The WQ is unordered because the low-level handler enqueues faults by
  441. * group. PRI requests within a group have to be ordered, but once
  442. * that's dealt with, the high-level function can handle groups out of
  443. * order.
  444. */
  445. queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name);
  446. if (!queue->wq) {
  447. kfree(queue);
  448. return NULL;
  449. }
  450. INIT_LIST_HEAD(&queue->devices);
  451. mutex_init(&queue->lock);
  452. return queue;
  453. }
  454. EXPORT_SYMBOL_GPL(iopf_queue_alloc);
  455. /**
  456. * iopf_queue_free - Free IOPF queue
  457. * @queue: queue to free
  458. *
  459. * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or
  460. * adding/removing devices on this queue anymore.
  461. */
  462. void iopf_queue_free(struct iopf_queue *queue)
  463. {
  464. struct iommu_fault_param *iopf_param, *next;
  465. if (!queue)
  466. return;
  467. list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list)
  468. iopf_queue_remove_device(queue, iopf_param->dev);
  469. destroy_workqueue(queue->wq);
  470. kfree(queue);
  471. }
  472. EXPORT_SYMBOL_GPL(iopf_queue_free);