scif_dma.c 52 KB


  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2015 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * Intel SCIF driver.
  16. *
  17. */
  18. #include "scif_main.h"
  19. #include "scif_map.h"
  20. /*
  21. * struct scif_dma_comp_cb - SCIF DMA completion callback
  22. *
  23. * @dma_completion_func: DMA completion callback
  24. * @cb_cookie: DMA completion callback cookie
  25. * @temp_buf: Temporary buffer
  26. * @temp_buf_to_free: Temporary buffer to be freed
  27. * @is_cache: Is a kmem_cache allocated buffer
  28. * @dst_offset: Destination registration offset
  29. * @dst_window: Destination registration window
  30. * @len: Length of the temp buffer
  31. * @temp_phys: DMA address of the temp buffer
  32. * @sdev: The SCIF device
  33. * @header_padding: padding for cache line alignment
  34. */
  35. struct scif_dma_comp_cb {
  36. void (*dma_completion_func)(void *cookie);
  37. void *cb_cookie;
  38. u8 *temp_buf;
  39. u8 *temp_buf_to_free;
  40. bool is_cache;
  41. s64 dst_offset;
  42. struct scif_window *dst_window;
  43. size_t len;
  44. dma_addr_t temp_phys;
  45. struct scif_dev *sdev;
  46. int header_padding;
  47. };
  48. /**
  49. * struct scif_copy_work - Work for DMA copy
  50. *
  51. * @src_offset: Starting source offset
  52. * @dst_offset: Starting destination offset
  53. * @src_window: Starting src registered window
  54. * @dst_window: Starting dst registered window
  55. * @loopback: true if this is a loopback DMA transfer
  56. * @len: Length of the transfer
  57. * @comp_cb: DMA copy completion callback
  58. * @remote_dev: The remote SCIF peer device
  59. * @fence_type: polling or interrupt based
  60. * @ordered: is this a tail byte ordered DMA transfer
  61. */
  62. struct scif_copy_work {
  63. s64 src_offset;
  64. s64 dst_offset;
  65. struct scif_window *src_window;
  66. struct scif_window *dst_window;
  67. int loopback;
  68. size_t len;
  69. struct scif_dma_comp_cb *comp_cb;
  70. struct scif_dev *remote_dev;
  71. int fence_type;
  72. bool ordered;
  73. };
  74. /**
  75. * scif_reserve_dma_chan:
  76. * @ep: Endpoint Descriptor.
  77. *
  78. * This routine reserves a DMA channel for a particular
  79. * endpoint. All DMA transfers for an endpoint are always
  80. * programmed on the same DMA channel.
  81. */
  82. int scif_reserve_dma_chan(struct scif_endpt *ep)
  83. {
  84. int err = 0;
  85. struct scif_dev *scifdev;
  86. struct scif_hw_dev *sdev;
  87. struct dma_chan *chan;
  88. /* Loopback DMAs are not supported on the management node */
  89. if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
  90. return 0;
  91. if (scif_info.nodeid)
  92. scifdev = &scif_dev[0];
  93. else
  94. scifdev = ep->remote_dev;
  95. sdev = scifdev->sdev;
  96. if (!sdev->num_dma_ch)
  97. return -ENODEV;
  98. chan = sdev->dma_ch[scifdev->dma_ch_idx];
  99. scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
  100. mutex_lock(&ep->rma_info.rma_lock);
  101. ep->rma_info.dma_chan = chan;
  102. mutex_unlock(&ep->rma_info.rma_lock);
  103. return err;
  104. }
  105. #ifdef CONFIG_MMU_NOTIFIER
  106. /**
  107. * scif_rma_destroy_tcw:
  108. *
  109. * This routine destroys temporary cached windows
  110. */
  111. static
  112. void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
  113. u64 start, u64 len)
  114. {
  115. struct list_head *item, *tmp;
  116. struct scif_window *window;
  117. u64 start_va, end_va;
  118. u64 end = start + len;
  119. if (end <= start)
  120. return;
  121. list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
  122. window = list_entry(item, struct scif_window, list);
  123. if (!len)
  124. break;
  125. start_va = window->va_for_temp;
  126. end_va = start_va + (window->nr_pages << PAGE_SHIFT);
  127. if (start < start_va && end <= start_va)
  128. break;
  129. if (start >= end_va)
  130. continue;
  131. __scif_rma_destroy_tcw_helper(window);
  132. }
  133. }
  134. static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
  135. {
  136. struct scif_endpt *ep = mmn->ep;
  137. spin_lock(&ep->rma_info.tc_lock);
  138. __scif_rma_destroy_tcw(mmn, start, len);
  139. spin_unlock(&ep->rma_info.tc_lock);
  140. }
  141. static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
  142. {
  143. struct list_head *item, *tmp;
  144. struct scif_mmu_notif *mmn;
  145. list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
  146. mmn = list_entry(item, struct scif_mmu_notif, list);
  147. scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
  148. }
  149. }
  150. static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
  151. {
  152. struct list_head *item, *tmp;
  153. struct scif_mmu_notif *mmn;
  154. spin_lock(&ep->rma_info.tc_lock);
  155. list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
  156. mmn = list_entry(item, struct scif_mmu_notif, list);
  157. __scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
  158. }
  159. spin_unlock(&ep->rma_info.tc_lock);
  160. }
  161. static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
  162. {
  163. if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
  164. return false;
  165. if ((atomic_read(&ep->rma_info.tcw_total_pages)
  166. + (cur_bytes >> PAGE_SHIFT)) >
  167. scif_info.rma_tc_limit) {
  168. dev_info(scif_info.mdev.this_device,
  169. "%s %d total=%d, current=%zu reached max\n",
  170. __func__, __LINE__,
  171. atomic_read(&ep->rma_info.tcw_total_pages),
  172. (1 + (cur_bytes >> PAGE_SHIFT)));
  173. scif_rma_destroy_tcw_invalid();
  174. __scif_rma_destroy_tcw_ep(ep);
  175. }
  176. return true;
  177. }
  178. static void scif_mmu_notifier_release(struct mmu_notifier *mn,
  179. struct mm_struct *mm)
  180. {
  181. struct scif_mmu_notif *mmn;
  182. mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
  183. scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
  184. schedule_work(&scif_info.misc_work);
  185. }
  186. static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
  187. struct mm_struct *mm,
  188. unsigned long start,
  189. unsigned long end,
  190. bool blockable)
  191. {
  192. struct scif_mmu_notif *mmn;
  193. mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
  194. scif_rma_destroy_tcw(mmn, start, end - start);
  195. return 0;
  196. }
  197. static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
  198. struct mm_struct *mm,
  199. unsigned long start,
  200. unsigned long end)
  201. {
  202. /*
  203. * Nothing to do here, everything needed was done in
  204. * invalidate_range_start.
  205. */
  206. }
  207. static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
  208. .release = scif_mmu_notifier_release,
  209. .clear_flush_young = NULL,
  210. .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
  211. .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
  212. static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
  213. {
  214. struct scif_endpt_rma_info *rma = &ep->rma_info;
  215. struct scif_mmu_notif *mmn = NULL;
  216. struct list_head *item, *tmp;
  217. mutex_lock(&ep->rma_info.mmn_lock);
  218. list_for_each_safe(item, tmp, &rma->mmn_list) {
  219. mmn = list_entry(item, struct scif_mmu_notif, list);
  220. mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
  221. list_del(item);
  222. kfree(mmn);
  223. }
  224. mutex_unlock(&ep->rma_info.mmn_lock);
  225. }
  226. static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
  227. struct mm_struct *mm, struct scif_endpt *ep)
  228. {
  229. mmn->ep = ep;
  230. mmn->mm = mm;
  231. mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
  232. INIT_LIST_HEAD(&mmn->list);
  233. INIT_LIST_HEAD(&mmn->tc_reg_list);
  234. }
  235. static struct scif_mmu_notif *
  236. scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
  237. {
  238. struct scif_mmu_notif *mmn;
  239. list_for_each_entry(mmn, &rma->mmn_list, list)
  240. if (mmn->mm == mm)
  241. return mmn;
  242. return NULL;
  243. }
  244. static struct scif_mmu_notif *
  245. scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
  246. {
  247. struct scif_mmu_notif *mmn
  248. = kzalloc(sizeof(*mmn), GFP_KERNEL);
  249. if (!mmn)
  250. return ERR_PTR(-ENOMEM);
  251. scif_init_mmu_notifier(mmn, current->mm, ep);
  252. if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) {
  253. kfree(mmn);
  254. return ERR_PTR(-EBUSY);
  255. }
  256. list_add(&mmn->list, &ep->rma_info.mmn_list);
  257. return mmn;
  258. }
  259. /*
  260. * Called from the misc thread to destroy temporary cached windows and
  261. * unregister the MMU notifier for the SCIF endpoint.
  262. */
  263. void scif_mmu_notif_handler(struct work_struct *work)
  264. {
  265. struct list_head *pos, *tmpq;
  266. struct scif_endpt *ep;
  267. restart:
  268. scif_rma_destroy_tcw_invalid();
  269. spin_lock(&scif_info.rmalock);
  270. list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
  271. ep = list_entry(pos, struct scif_endpt, mmu_list);
  272. list_del(&ep->mmu_list);
  273. spin_unlock(&scif_info.rmalock);
  274. scif_rma_destroy_tcw_ep(ep);
  275. scif_ep_unregister_mmu_notifier(ep);
  276. goto restart;
  277. }
  278. spin_unlock(&scif_info.rmalock);
  279. }
  280. static bool scif_is_set_reg_cache(int flags)
  281. {
  282. return !!(flags & SCIF_RMA_USECACHE);
  283. }
  284. #else
  285. static struct scif_mmu_notif *
  286. scif_find_mmu_notifier(struct mm_struct *mm,
  287. struct scif_endpt_rma_info *rma)
  288. {
  289. return NULL;
  290. }
  291. static struct scif_mmu_notif *
  292. scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
  293. {
  294. return NULL;
  295. }
  296. void scif_mmu_notif_handler(struct work_struct *work)
  297. {
  298. }
  299. static bool scif_is_set_reg_cache(int flags)
  300. {
  301. return false;
  302. }
  303. static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
  304. {
  305. return false;
  306. }
  307. #endif
  308. /**
  309. * scif_register_temp:
  310. * @epd: End Point Descriptor.
  311. * @addr: virtual address to/from which to copy
  312. * @len: length of range to copy
  313. * @out_offset: computed offset returned by reference.
  314. * @out_window: allocated registered window returned by reference.
  315. *
  316. * Create a temporary registered window. The peer will not know about this
  317. * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
  318. */
  319. static int
  320. scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
  321. off_t *out_offset, struct scif_window **out_window)
  322. {
  323. struct scif_endpt *ep = (struct scif_endpt *)epd;
  324. int err;
  325. scif_pinned_pages_t pinned_pages;
  326. size_t aligned_len;
  327. aligned_len = ALIGN(len, PAGE_SIZE);
  328. err = __scif_pin_pages((void *)(addr & PAGE_MASK),
  329. aligned_len, &prot, 0, &pinned_pages);
  330. if (err)
  331. return err;
  332. pinned_pages->prot = prot;
  333. /* Compute the offset for this registration */
  334. err = scif_get_window_offset(ep, 0, 0,
  335. aligned_len >> PAGE_SHIFT,
  336. (s64 *)out_offset);
  337. if (err)
  338. goto error_unpin;
  339. /* Allocate and prepare self registration window */
  340. *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
  341. *out_offset, true);
  342. if (!*out_window) {
  343. scif_free_window_offset(ep, NULL, *out_offset);
  344. err = -ENOMEM;
  345. goto error_unpin;
  346. }
  347. (*out_window)->pinned_pages = pinned_pages;
  348. (*out_window)->nr_pages = pinned_pages->nr_pages;
  349. (*out_window)->prot = pinned_pages->prot;
  350. (*out_window)->va_for_temp = addr & PAGE_MASK;
  351. err = scif_map_window(ep->remote_dev, *out_window);
  352. if (err) {
  353. /* Something went wrong! Rollback */
  354. scif_destroy_window(ep, *out_window);
  355. *out_window = NULL;
  356. } else {
  357. *out_offset |= (addr - (*out_window)->va_for_temp);
  358. }
  359. return err;
  360. error_unpin:
  361. if (err)
  362. dev_err(&ep->remote_dev->sdev->dev,
  363. "%s %d err %d\n", __func__, __LINE__, err);
  364. scif_unpin_pages(pinned_pages);
  365. return err;
  366. }
  367. #define SCIF_DMA_TO (3 * HZ)
  368. /*
  369. * scif_sync_dma - Program a DMA without an interrupt descriptor
  370. *
  371. * @dev - The address of the pointer to the device instance used
  372. * for DMA registration.
  373. * @chan - DMA channel to be used.
  374. * @sync_wait: Wait for DMA to complete?
  375. *
  376. * Return 0 on success and -errno on error.
  377. */
  378. static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
  379. bool sync_wait)
  380. {
  381. int err = 0;
  382. struct dma_async_tx_descriptor *tx = NULL;
  383. enum dma_ctrl_flags flags = DMA_PREP_FENCE;
  384. dma_cookie_t cookie;
  385. struct dma_device *ddev;
  386. if (!chan) {
  387. err = -EIO;
  388. dev_err(&sdev->dev, "%s %d err %d\n",
  389. __func__, __LINE__, err);
  390. return err;
  391. }
  392. ddev = chan->device;
  393. tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
  394. if (!tx) {
  395. err = -ENOMEM;
  396. dev_err(&sdev->dev, "%s %d err %d\n",
  397. __func__, __LINE__, err);
  398. goto release;
  399. }
  400. cookie = tx->tx_submit(tx);
  401. if (dma_submit_error(cookie)) {
  402. err = -ENOMEM;
  403. dev_err(&sdev->dev, "%s %d err %d\n",
  404. __func__, __LINE__, err);
  405. goto release;
  406. }
  407. if (!sync_wait) {
  408. dma_async_issue_pending(chan);
  409. } else {
  410. if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
  411. err = 0;
  412. } else {
  413. err = -EIO;
  414. dev_err(&sdev->dev, "%s %d err %d\n",
  415. __func__, __LINE__, err);
  416. }
  417. }
  418. release:
  419. return err;
  420. }
  421. static void scif_dma_callback(void *arg)
  422. {
  423. struct completion *done = (struct completion *)arg;
  424. complete(done);
  425. }
  426. #define SCIF_DMA_SYNC_WAIT true
  427. #define SCIF_DMA_POLL BIT(0)
  428. #define SCIF_DMA_INTR BIT(1)
  429. /*
  430. * scif_async_dma - Program a DMA with an interrupt descriptor
  431. *
  432. * @dev - The address of the pointer to the device instance used
  433. * for DMA registration.
  434. * @chan - DMA channel to be used.
  435. * Return 0 on success and -errno on error.
  436. */
  437. static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
  438. {
  439. int err = 0;
  440. struct dma_device *ddev;
  441. struct dma_async_tx_descriptor *tx = NULL;
  442. enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
  443. DECLARE_COMPLETION_ONSTACK(done_wait);
  444. dma_cookie_t cookie;
  445. enum dma_status status;
  446. if (!chan) {
  447. err = -EIO;
  448. dev_err(&sdev->dev, "%s %d err %d\n",
  449. __func__, __LINE__, err);
  450. return err;
  451. }
  452. ddev = chan->device;
  453. tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
  454. if (!tx) {
  455. err = -ENOMEM;
  456. dev_err(&sdev->dev, "%s %d err %d\n",
  457. __func__, __LINE__, err);
  458. goto release;
  459. }
  460. reinit_completion(&done_wait);
  461. tx->callback = scif_dma_callback;
  462. tx->callback_param = &done_wait;
  463. cookie = tx->tx_submit(tx);
  464. if (dma_submit_error(cookie)) {
  465. err = -ENOMEM;
  466. dev_err(&sdev->dev, "%s %d err %d\n",
  467. __func__, __LINE__, err);
  468. goto release;
  469. }
  470. dma_async_issue_pending(chan);
  471. err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
  472. if (!err) {
  473. err = -EIO;
  474. dev_err(&sdev->dev, "%s %d err %d\n",
  475. __func__, __LINE__, err);
  476. goto release;
  477. }
  478. err = 0;
  479. status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
  480. if (status != DMA_COMPLETE) {
  481. err = -EIO;
  482. dev_err(&sdev->dev, "%s %d err %d\n",
  483. __func__, __LINE__, err);
  484. goto release;
  485. }
  486. release:
  487. return err;
  488. }
  489. /*
  490. * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
  491. * DMA channel via polling.
  492. *
  493. * @sdev - The SCIF device
  494. * @chan - DMA channel
  495. * Return 0 on success and -errno on error.
  496. */
  497. static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
  498. {
  499. if (!chan)
  500. return -EINVAL;
  501. return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
  502. }
  503. /*
  504. * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
  505. * DMA channel via interrupt based blocking wait.
  506. *
  507. * @sdev - The SCIF device
  508. * @chan - DMA channel
  509. * Return 0 on success and -errno on error.
  510. */
  511. int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
  512. {
  513. if (!chan)
  514. return -EINVAL;
  515. return scif_async_dma(sdev, chan);
  516. }
  517. /**
  518. * scif_rma_destroy_windows:
  519. *
  520. * This routine destroys all windows queued for cleanup
  521. */
  522. void scif_rma_destroy_windows(void)
  523. {
  524. struct list_head *item, *tmp;
  525. struct scif_window *window;
  526. struct scif_endpt *ep;
  527. struct dma_chan *chan;
  528. might_sleep();
  529. restart:
  530. spin_lock(&scif_info.rmalock);
  531. list_for_each_safe(item, tmp, &scif_info.rma) {
  532. window = list_entry(item, struct scif_window,
  533. list);
  534. ep = (struct scif_endpt *)window->ep;
  535. chan = ep->rma_info.dma_chan;
  536. list_del_init(&window->list);
  537. spin_unlock(&scif_info.rmalock);
  538. if (!chan || !scifdev_alive(ep) ||
  539. !scif_drain_dma_intr(ep->remote_dev->sdev,
  540. ep->rma_info.dma_chan))
  541. /* Remove window from global list */
  542. window->unreg_state = OP_COMPLETED;
  543. else
  544. dev_warn(&ep->remote_dev->sdev->dev,
  545. "DMA engine hung?\n");
  546. if (window->unreg_state == OP_COMPLETED) {
  547. if (window->type == SCIF_WINDOW_SELF)
  548. scif_destroy_window(ep, window);
  549. else
  550. scif_destroy_remote_window(window);
  551. atomic_dec(&ep->rma_info.tw_refcount);
  552. }
  553. goto restart;
  554. }
  555. spin_unlock(&scif_info.rmalock);
  556. }
  557. /**
  558. * scif_rma_destroy_tcw:
  559. *
  560. * This routine destroys temporary cached registered windows
  561. * which have been queued for cleanup.
  562. */
  563. void scif_rma_destroy_tcw_invalid(void)
  564. {
  565. struct list_head *item, *tmp;
  566. struct scif_window *window;
  567. struct scif_endpt *ep;
  568. struct dma_chan *chan;
  569. might_sleep();
  570. restart:
  571. spin_lock(&scif_info.rmalock);
  572. list_for_each_safe(item, tmp, &scif_info.rma_tc) {
  573. window = list_entry(item, struct scif_window, list);
  574. ep = (struct scif_endpt *)window->ep;
  575. chan = ep->rma_info.dma_chan;
  576. list_del_init(&window->list);
  577. spin_unlock(&scif_info.rmalock);
  578. mutex_lock(&ep->rma_info.rma_lock);
  579. if (!chan || !scifdev_alive(ep) ||
  580. !scif_drain_dma_intr(ep->remote_dev->sdev,
  581. ep->rma_info.dma_chan)) {
  582. atomic_sub(window->nr_pages,
  583. &ep->rma_info.tcw_total_pages);
  584. scif_destroy_window(ep, window);
  585. atomic_dec(&ep->rma_info.tcw_refcount);
  586. } else {
  587. dev_warn(&ep->remote_dev->sdev->dev,
  588. "DMA engine hung?\n");
  589. }
  590. mutex_unlock(&ep->rma_info.rma_lock);
  591. goto restart;
  592. }
  593. spin_unlock(&scif_info.rmalock);
  594. }
  595. static inline
  596. void *_get_local_va(off_t off, struct scif_window *window, size_t len)
  597. {
  598. int page_nr = (off - window->offset) >> PAGE_SHIFT;
  599. off_t page_off = off & ~PAGE_MASK;
  600. void *va = NULL;
  601. if (window->type == SCIF_WINDOW_SELF) {
  602. struct page **pages = window->pinned_pages->pages;
  603. va = page_address(pages[page_nr]) + page_off;
  604. }
  605. return va;
  606. }
  607. static inline
  608. void *ioremap_remote(off_t off, struct scif_window *window,
  609. size_t len, struct scif_dev *dev,
  610. struct scif_window_iter *iter)
  611. {
  612. dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
  613. /*
  614. * If the DMA address is not card relative then we need the DMA
  615. * addresses to be an offset into the bar. The aperture base was already
  616. * added so subtract it here since scif_ioremap is going to add it again
  617. */
  618. if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
  619. dev->sdev->aper && !dev->sdev->card_rel_da)
  620. phys = phys - dev->sdev->aper->pa;
  621. return scif_ioremap(phys, len, dev);
  622. }
  623. static inline void
  624. iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
  625. {
  626. scif_iounmap(virt, size, work->remote_dev);
  627. }
  628. /*
  629. * Takes care of ordering issue caused by
  630. * 1. Hardware: Only in the case of cpu copy from mgmt node to card
  631. * because of WC memory.
  632. * 2. Software: If memcpy reorders copy instructions for optimization.
  633. * This could happen at both mgmt node and card.
  634. */
  635. static inline void
  636. scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
  637. {
  638. if (!count)
  639. return;
  640. memcpy_toio((void __iomem __force *)dst, src, --count);
  641. /* Order the last byte with the previous stores */
  642. wmb();
  643. *(dst + count) = *(src + count);
  644. }
  645. static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
  646. size_t count, bool ordered)
  647. {
  648. if (ordered)
  649. scif_ordered_memcpy_toio(dst, src, count);
  650. else
  651. memcpy_toio((void __iomem __force *)dst, src, count);
  652. }
  653. static inline
  654. void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
  655. {
  656. if (!count)
  657. return;
  658. memcpy_fromio(dst, (void __iomem __force *)src, --count);
  659. /* Order the last byte with the previous loads */
  660. rmb();
  661. *(dst + count) = *(src + count);
  662. }
  663. static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
  664. size_t count, bool ordered)
  665. {
  666. if (ordered)
  667. scif_ordered_memcpy_fromio(dst, src, count);
  668. else
  669. memcpy_fromio(dst, (void __iomem __force *)src, count);
  670. }
  671. #define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
  672. /*
  673. * scif_off_to_dma_addr:
  674. * Obtain the dma_addr given the window and the offset.
  675. * @window: Registered window.
  676. * @off: Window offset.
  677. * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
  678. * @index: Return the index of the dma_addr array found.
  679. * @start_off: start offset of index of the dma addr array found.
  680. * The nr_bytes provides the callee an estimate of the maximum possible
  681. * DMA xfer possible while the index/start_off provide faster lookups
  682. * for the next iteration.
  683. */
  684. dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
  685. size_t *nr_bytes, struct scif_window_iter *iter)
  686. {
  687. int i, page_nr;
  688. s64 start, end;
  689. off_t page_off;
  690. if (window->nr_pages == window->nr_contig_chunks) {
  691. page_nr = (off - window->offset) >> PAGE_SHIFT;
  692. page_off = off & ~PAGE_MASK;
  693. if (nr_bytes)
  694. *nr_bytes = PAGE_SIZE - page_off;
  695. return window->dma_addr[page_nr] | page_off;
  696. }
  697. if (iter) {
  698. i = iter->index;
  699. start = iter->offset;
  700. } else {
  701. i = 0;
  702. start = window->offset;
  703. }
  704. for (; i < window->nr_contig_chunks; i++) {
  705. end = start + (window->num_pages[i] << PAGE_SHIFT);
  706. if (off >= start && off < end) {
  707. if (iter) {
  708. iter->index = i;
  709. iter->offset = start;
  710. }
  711. if (nr_bytes)
  712. *nr_bytes = end - off;
  713. return (window->dma_addr[i] + (off - start));
  714. }
  715. start += (window->num_pages[i] << PAGE_SHIFT);
  716. }
  717. dev_err(scif_info.mdev.this_device,
  718. "%s %d BUG. Addr not found? window %p off 0x%llx\n",
  719. __func__, __LINE__, window, off);
  720. return SCIF_RMA_ERROR_CODE;
  721. }
  722. /*
  723. * Copy between rma window and temporary buffer
  724. */
  725. static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
  726. u8 *temp, size_t rem_len, bool to_temp)
  727. {
  728. void *window_virt;
  729. size_t loop_len;
  730. int offset_in_page;
  731. s64 end_offset;
  732. offset_in_page = offset & ~PAGE_MASK;
  733. loop_len = PAGE_SIZE - offset_in_page;
  734. if (rem_len < loop_len)
  735. loop_len = rem_len;
  736. window_virt = _get_local_va(offset, window, loop_len);
  737. if (!window_virt)
  738. return;
  739. if (to_temp)
  740. memcpy(temp, window_virt, loop_len);
  741. else
  742. memcpy(window_virt, temp, loop_len);
  743. offset += loop_len;
  744. temp += loop_len;
  745. rem_len -= loop_len;
  746. end_offset = window->offset +
  747. (window->nr_pages << PAGE_SHIFT);
  748. while (rem_len) {
  749. if (offset == end_offset) {
  750. window = list_next_entry(window, list);
  751. end_offset = window->offset +
  752. (window->nr_pages << PAGE_SHIFT);
  753. }
  754. loop_len = min(PAGE_SIZE, rem_len);
  755. window_virt = _get_local_va(offset, window, loop_len);
  756. if (!window_virt)
  757. return;
  758. if (to_temp)
  759. memcpy(temp, window_virt, loop_len);
  760. else
  761. memcpy(window_virt, temp, loop_len);
  762. offset += loop_len;
  763. temp += loop_len;
  764. rem_len -= loop_len;
  765. }
  766. }
  767. /**
  768. * scif_rma_completion_cb:
  769. * @data: RMA cookie
  770. *
  771. * RMA interrupt completion callback.
  772. */
  773. static void scif_rma_completion_cb(void *data)
  774. {
  775. struct scif_dma_comp_cb *comp_cb = data;
  776. /* Free DMA Completion CB. */
  777. if (comp_cb->dst_window)
  778. scif_rma_local_cpu_copy(comp_cb->dst_offset,
  779. comp_cb->dst_window,
  780. comp_cb->temp_buf +
  781. comp_cb->header_padding,
  782. comp_cb->len, false);
  783. scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
  784. SCIF_KMEM_UNALIGNED_BUF_SIZE);
  785. if (comp_cb->is_cache)
  786. kmem_cache_free(unaligned_cache,
  787. comp_cb->temp_buf_to_free);
  788. else
  789. kfree(comp_cb->temp_buf_to_free);
  790. }
  791. /* Copies between temporary buffer and offsets provided in work */
  792. static int
  793. scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
  794. u8 *temp, struct dma_chan *chan,
  795. bool src_local)
  796. {
  797. struct scif_dma_comp_cb *comp_cb = work->comp_cb;
  798. dma_addr_t window_dma_addr, temp_dma_addr;
  799. dma_addr_t temp_phys = comp_cb->temp_phys;
  800. size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
  801. int offset_in_ca, ret = 0;
  802. s64 end_offset, offset;
  803. struct scif_window *window;
  804. void *window_virt_addr;
  805. size_t tail_len;
  806. struct dma_async_tx_descriptor *tx;
  807. struct dma_device *dev = chan->device;
  808. dma_cookie_t cookie;
  809. if (src_local) {
  810. offset = work->dst_offset;
  811. window = work->dst_window;
  812. } else {
  813. offset = work->src_offset;
  814. window = work->src_window;
  815. }
  816. offset_in_ca = offset & (L1_CACHE_BYTES - 1);
  817. if (offset_in_ca) {
  818. loop_len = L1_CACHE_BYTES - offset_in_ca;
  819. loop_len = min(loop_len, remaining_len);
  820. window_virt_addr = ioremap_remote(offset, window,
  821. loop_len,
  822. work->remote_dev,
  823. NULL);
  824. if (!window_virt_addr)
  825. return -ENOMEM;
  826. if (src_local)
  827. scif_unaligned_cpy_toio(window_virt_addr, temp,
  828. loop_len,
  829. work->ordered &&
  830. !(remaining_len - loop_len));
  831. else
  832. scif_unaligned_cpy_fromio(temp, window_virt_addr,
  833. loop_len, work->ordered &&
  834. !(remaining_len - loop_len));
  835. iounmap_remote(window_virt_addr, loop_len, work);
  836. offset += loop_len;
  837. temp += loop_len;
  838. temp_phys += loop_len;
  839. remaining_len -= loop_len;
  840. }
  841. offset_in_ca = offset & ~PAGE_MASK;
  842. end_offset = window->offset +
  843. (window->nr_pages << PAGE_SHIFT);
  844. tail_len = remaining_len & (L1_CACHE_BYTES - 1);
  845. remaining_len -= tail_len;
  846. while (remaining_len) {
  847. if (offset == end_offset) {
  848. window = list_next_entry(window, list);
  849. end_offset = window->offset +
  850. (window->nr_pages << PAGE_SHIFT);
  851. }
  852. if (scif_is_mgmt_node())
  853. temp_dma_addr = temp_phys;
  854. else
  855. /* Fix if we ever enable IOMMU on the card */
  856. temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
  857. window_dma_addr = scif_off_to_dma_addr(window, offset,
  858. &nr_contig_bytes,
  859. NULL);
  860. loop_len = min(nr_contig_bytes, remaining_len);
  861. if (src_local) {
  862. if (work->ordered && !tail_len &&
  863. !(remaining_len - loop_len) &&
  864. loop_len != L1_CACHE_BYTES) {
  865. /*
  866. * Break up the last chunk of the transfer into
  867. * two steps. if there is no tail to guarantee
  868. * DMA ordering. SCIF_DMA_POLLING inserts
  869. * a status update descriptor in step 1 which
  870. * acts as a double sided synchronization fence
  871. * for the DMA engine to ensure that the last
  872. * cache line in step 2 is updated last.
  873. */
  874. /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
  875. tx =
  876. dev->device_prep_dma_memcpy(chan,
  877. window_dma_addr,
  878. temp_dma_addr,
  879. loop_len -
  880. L1_CACHE_BYTES,
  881. DMA_PREP_FENCE);
  882. if (!tx) {
  883. ret = -ENOMEM;
  884. goto err;
  885. }
  886. cookie = tx->tx_submit(tx);
  887. if (dma_submit_error(cookie)) {
  888. ret = -ENOMEM;
  889. goto err;
  890. }
  891. dma_async_issue_pending(chan);
  892. offset += (loop_len - L1_CACHE_BYTES);
  893. temp_dma_addr += (loop_len - L1_CACHE_BYTES);
  894. window_dma_addr += (loop_len - L1_CACHE_BYTES);
  895. remaining_len -= (loop_len - L1_CACHE_BYTES);
  896. loop_len = remaining_len;
  897. /* Step 2) DMA: L1_CACHE_BYTES */
  898. tx =
  899. dev->device_prep_dma_memcpy(chan,
  900. window_dma_addr,
  901. temp_dma_addr,
  902. loop_len, 0);
  903. if (!tx) {
  904. ret = -ENOMEM;
  905. goto err;
  906. }
  907. cookie = tx->tx_submit(tx);
  908. if (dma_submit_error(cookie)) {
  909. ret = -ENOMEM;
  910. goto err;
  911. }
  912. dma_async_issue_pending(chan);
  913. } else {
  914. tx =
  915. dev->device_prep_dma_memcpy(chan,
  916. window_dma_addr,
  917. temp_dma_addr,
  918. loop_len, 0);
  919. if (!tx) {
  920. ret = -ENOMEM;
  921. goto err;
  922. }
  923. cookie = tx->tx_submit(tx);
  924. if (dma_submit_error(cookie)) {
  925. ret = -ENOMEM;
  926. goto err;
  927. }
  928. dma_async_issue_pending(chan);
  929. }
  930. } else {
  931. tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
  932. window_dma_addr, loop_len, 0);
  933. if (!tx) {
  934. ret = -ENOMEM;
  935. goto err;
  936. }
  937. cookie = tx->tx_submit(tx);
  938. if (dma_submit_error(cookie)) {
  939. ret = -ENOMEM;
  940. goto err;
  941. }
  942. dma_async_issue_pending(chan);
  943. }
  944. if (ret < 0)
  945. goto err;
  946. offset += loop_len;
  947. temp += loop_len;
  948. temp_phys += loop_len;
  949. remaining_len -= loop_len;
  950. offset_in_ca = 0;
  951. }
  952. if (tail_len) {
  953. if (offset == end_offset) {
  954. window = list_next_entry(window, list);
  955. end_offset = window->offset +
  956. (window->nr_pages << PAGE_SHIFT);
  957. }
  958. window_virt_addr = ioremap_remote(offset, window, tail_len,
  959. work->remote_dev,
  960. NULL);
  961. if (!window_virt_addr)
  962. return -ENOMEM;
  963. /*
  964. * The CPU copy for the tail bytes must be initiated only once
  965. * previous DMA transfers for this endpoint have completed
  966. * to guarantee ordering.
  967. */
  968. if (work->ordered) {
  969. struct scif_dev *rdev = work->remote_dev;
  970. ret = scif_drain_dma_intr(rdev->sdev, chan);
  971. if (ret)
  972. return ret;
  973. }
  974. if (src_local)
  975. scif_unaligned_cpy_toio(window_virt_addr, temp,
  976. tail_len, work->ordered);
  977. else
  978. scif_unaligned_cpy_fromio(temp, window_virt_addr,
  979. tail_len, work->ordered);
  980. iounmap_remote(window_virt_addr, tail_len, work);
  981. }
  982. tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
  983. if (!tx) {
  984. ret = -ENOMEM;
  985. return ret;
  986. }
  987. tx->callback = &scif_rma_completion_cb;
  988. tx->callback_param = comp_cb;
  989. cookie = tx->tx_submit(tx);
  990. if (dma_submit_error(cookie)) {
  991. ret = -ENOMEM;
  992. return ret;
  993. }
  994. dma_async_issue_pending(chan);
  995. return 0;
  996. err:
  997. dev_err(scif_info.mdev.this_device,
  998. "%s %d Desc Prog Failed ret %d\n",
  999. __func__, __LINE__, ret);
  1000. return ret;
  1001. }
  1002. /*
  1003. * _scif_rma_list_dma_copy_aligned:
  1004. *
  1005. * Traverse all the windows and perform DMA copy.
  1006. */
  1007. static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
  1008. struct dma_chan *chan)
  1009. {
  1010. dma_addr_t src_dma_addr, dst_dma_addr;
  1011. size_t loop_len, remaining_len, src_contig_bytes = 0;
  1012. size_t dst_contig_bytes = 0;
  1013. struct scif_window_iter src_win_iter;
  1014. struct scif_window_iter dst_win_iter;
  1015. s64 end_src_offset, end_dst_offset;
  1016. struct scif_window *src_window = work->src_window;
  1017. struct scif_window *dst_window = work->dst_window;
  1018. s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
  1019. int ret = 0;
  1020. struct dma_async_tx_descriptor *tx;
  1021. struct dma_device *dev = chan->device;
  1022. dma_cookie_t cookie;
  1023. remaining_len = work->len;
  1024. scif_init_window_iter(src_window, &src_win_iter);
  1025. scif_init_window_iter(dst_window, &dst_win_iter);
  1026. end_src_offset = src_window->offset +
  1027. (src_window->nr_pages << PAGE_SHIFT);
  1028. end_dst_offset = dst_window->offset +
  1029. (dst_window->nr_pages << PAGE_SHIFT);
  1030. while (remaining_len) {
  1031. if (src_offset == end_src_offset) {
  1032. src_window = list_next_entry(src_window, list);
  1033. end_src_offset = src_window->offset +
  1034. (src_window->nr_pages << PAGE_SHIFT);
  1035. scif_init_window_iter(src_window, &src_win_iter);
  1036. }
  1037. if (dst_offset == end_dst_offset) {
  1038. dst_window = list_next_entry(dst_window, list);
  1039. end_dst_offset = dst_window->offset +
  1040. (dst_window->nr_pages << PAGE_SHIFT);
  1041. scif_init_window_iter(dst_window, &dst_win_iter);
  1042. }
  1043. /* compute dma addresses for transfer */
  1044. src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
  1045. &src_contig_bytes,
  1046. &src_win_iter);
  1047. dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
  1048. &dst_contig_bytes,
  1049. &dst_win_iter);
  1050. loop_len = min(src_contig_bytes, dst_contig_bytes);
  1051. loop_len = min(loop_len, remaining_len);
  1052. if (work->ordered && !(remaining_len - loop_len)) {
  1053. /*
  1054. * Break up the last chunk of the transfer into two
  1055. * steps to ensure that the last byte in step 2 is
  1056. * updated last.
  1057. */
  1058. /* Step 1) DMA: Body Length - 1 */
  1059. tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
  1060. src_dma_addr,
  1061. loop_len - 1,
  1062. DMA_PREP_FENCE);
  1063. if (!tx) {
  1064. ret = -ENOMEM;
  1065. goto err;
  1066. }
  1067. cookie = tx->tx_submit(tx);
  1068. if (dma_submit_error(cookie)) {
  1069. ret = -ENOMEM;
  1070. goto err;
  1071. }
  1072. src_offset += (loop_len - 1);
  1073. dst_offset += (loop_len - 1);
  1074. src_dma_addr += (loop_len - 1);
  1075. dst_dma_addr += (loop_len - 1);
  1076. remaining_len -= (loop_len - 1);
  1077. loop_len = remaining_len;
  1078. /* Step 2) DMA: 1 BYTES */
  1079. tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
  1080. src_dma_addr, loop_len, 0);
  1081. if (!tx) {
  1082. ret = -ENOMEM;
  1083. goto err;
  1084. }
  1085. cookie = tx->tx_submit(tx);
  1086. if (dma_submit_error(cookie)) {
  1087. ret = -ENOMEM;
  1088. goto err;
  1089. }
  1090. dma_async_issue_pending(chan);
  1091. } else {
  1092. tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
  1093. src_dma_addr, loop_len, 0);
  1094. if (!tx) {
  1095. ret = -ENOMEM;
  1096. goto err;
  1097. }
  1098. cookie = tx->tx_submit(tx);
  1099. if (dma_submit_error(cookie)) {
  1100. ret = -ENOMEM;
  1101. goto err;
  1102. }
  1103. }
  1104. src_offset += loop_len;
  1105. dst_offset += loop_len;
  1106. remaining_len -= loop_len;
  1107. }
  1108. return ret;
  1109. err:
  1110. dev_err(scif_info.mdev.this_device,
  1111. "%s %d Desc Prog Failed ret %d\n",
  1112. __func__, __LINE__, ret);
  1113. return ret;
  1114. }
  1115. /*
  1116. * scif_rma_list_dma_copy_aligned:
  1117. *
  1118. * Traverse all the windows and perform DMA copy.
  1119. */
  1120. static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
  1121. struct dma_chan *chan)
  1122. {
  1123. dma_addr_t src_dma_addr, dst_dma_addr;
  1124. size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
  1125. size_t dst_contig_bytes = 0;
  1126. int src_cache_off;
  1127. s64 end_src_offset, end_dst_offset;
  1128. struct scif_window_iter src_win_iter;
  1129. struct scif_window_iter dst_win_iter;
  1130. void *src_virt, *dst_virt;
  1131. struct scif_window *src_window = work->src_window;
  1132. struct scif_window *dst_window = work->dst_window;
  1133. s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
  1134. int ret = 0;
  1135. struct dma_async_tx_descriptor *tx;
  1136. struct dma_device *dev = chan->device;
  1137. dma_cookie_t cookie;
  1138. remaining_len = work->len;
  1139. scif_init_window_iter(src_window, &src_win_iter);
  1140. scif_init_window_iter(dst_window, &dst_win_iter);
  1141. src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
  1142. if (src_cache_off != 0) {
  1143. /* Head */
  1144. loop_len = L1_CACHE_BYTES - src_cache_off;
  1145. loop_len = min(loop_len, remaining_len);
  1146. src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
  1147. dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
  1148. if (src_window->type == SCIF_WINDOW_SELF)
  1149. src_virt = _get_local_va(src_offset, src_window,
  1150. loop_len);
  1151. else
  1152. src_virt = ioremap_remote(src_offset, src_window,
  1153. loop_len,
  1154. work->remote_dev, NULL);
  1155. if (!src_virt)
  1156. return -ENOMEM;
  1157. if (dst_window->type == SCIF_WINDOW_SELF)
  1158. dst_virt = _get_local_va(dst_offset, dst_window,
  1159. loop_len);
  1160. else
  1161. dst_virt = ioremap_remote(dst_offset, dst_window,
  1162. loop_len,
  1163. work->remote_dev, NULL);
  1164. if (!dst_virt) {
  1165. if (src_window->type != SCIF_WINDOW_SELF)
  1166. iounmap_remote(src_virt, loop_len, work);
  1167. return -ENOMEM;
  1168. }
  1169. if (src_window->type == SCIF_WINDOW_SELF)
  1170. scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
  1171. remaining_len == loop_len ?
  1172. work->ordered : false);
  1173. else
  1174. scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
  1175. remaining_len == loop_len ?
  1176. work->ordered : false);
  1177. if (src_window->type != SCIF_WINDOW_SELF)
  1178. iounmap_remote(src_virt, loop_len, work);
  1179. if (dst_window->type != SCIF_WINDOW_SELF)
  1180. iounmap_remote(dst_virt, loop_len, work);
  1181. src_offset += loop_len;
  1182. dst_offset += loop_len;
  1183. remaining_len -= loop_len;
  1184. }
  1185. end_src_offset = src_window->offset +
  1186. (src_window->nr_pages << PAGE_SHIFT);
  1187. end_dst_offset = dst_window->offset +
  1188. (dst_window->nr_pages << PAGE_SHIFT);
  1189. tail_len = remaining_len & (L1_CACHE_BYTES - 1);
  1190. remaining_len -= tail_len;
  1191. while (remaining_len) {
  1192. if (src_offset == end_src_offset) {
  1193. src_window = list_next_entry(src_window, list);
  1194. end_src_offset = src_window->offset +
  1195. (src_window->nr_pages << PAGE_SHIFT);
  1196. scif_init_window_iter(src_window, &src_win_iter);
  1197. }
  1198. if (dst_offset == end_dst_offset) {
  1199. dst_window = list_next_entry(dst_window, list);
  1200. end_dst_offset = dst_window->offset +
  1201. (dst_window->nr_pages << PAGE_SHIFT);
  1202. scif_init_window_iter(dst_window, &dst_win_iter);
  1203. }
  1204. /* compute dma addresses for transfer */
  1205. src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
  1206. &src_contig_bytes,
  1207. &src_win_iter);
  1208. dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
  1209. &dst_contig_bytes,
  1210. &dst_win_iter);
  1211. loop_len = min(src_contig_bytes, dst_contig_bytes);
  1212. loop_len = min(loop_len, remaining_len);
  1213. if (work->ordered && !tail_len &&
  1214. !(remaining_len - loop_len)) {
  1215. /*
  1216. * Break up the last chunk of the transfer into two
  1217. * steps. if there is no tail to gurantee DMA ordering.
  1218. * Passing SCIF_DMA_POLLING inserts a status update
  1219. * descriptor in step 1 which acts as a double sided
  1220. * synchronization fence for the DMA engine to ensure
  1221. * that the last cache line in step 2 is updated last.
  1222. */
  1223. /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
  1224. tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
  1225. src_dma_addr,
  1226. loop_len -
  1227. L1_CACHE_BYTES,
  1228. DMA_PREP_FENCE);
  1229. if (!tx) {
  1230. ret = -ENOMEM;
  1231. goto err;
  1232. }
  1233. cookie = tx->tx_submit(tx);
  1234. if (dma_submit_error(cookie)) {
  1235. ret = -ENOMEM;
  1236. goto err;
  1237. }
  1238. dma_async_issue_pending(chan);
  1239. src_offset += (loop_len - L1_CACHE_BYTES);
  1240. dst_offset += (loop_len - L1_CACHE_BYTES);
  1241. src_dma_addr += (loop_len - L1_CACHE_BYTES);
  1242. dst_dma_addr += (loop_len - L1_CACHE_BYTES);
  1243. remaining_len -= (loop_len - L1_CACHE_BYTES);
  1244. loop_len = remaining_len;
  1245. /* Step 2) DMA: L1_CACHE_BYTES */
  1246. tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
  1247. src_dma_addr,
  1248. loop_len, 0);
  1249. if (!tx) {
  1250. ret = -ENOMEM;
  1251. goto err;
  1252. }
  1253. cookie = tx->tx_submit(tx);
  1254. if (dma_submit_error(cookie)) {
  1255. ret = -ENOMEM;
  1256. goto err;
  1257. }
  1258. dma_async_issue_pending(chan);
  1259. } else {
  1260. tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
  1261. src_dma_addr,
  1262. loop_len, 0);
  1263. if (!tx) {
  1264. ret = -ENOMEM;
  1265. goto err;
  1266. }
  1267. cookie = tx->tx_submit(tx);
  1268. if (dma_submit_error(cookie)) {
  1269. ret = -ENOMEM;
  1270. goto err;
  1271. }
  1272. dma_async_issue_pending(chan);
  1273. }
  1274. src_offset += loop_len;
  1275. dst_offset += loop_len;
  1276. remaining_len -= loop_len;
  1277. }
  1278. remaining_len = tail_len;
  1279. if (remaining_len) {
  1280. loop_len = remaining_len;
  1281. if (src_offset == end_src_offset)
  1282. src_window = list_next_entry(src_window, list);
  1283. if (dst_offset == end_dst_offset)
  1284. dst_window = list_next_entry(dst_window, list);
  1285. src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
  1286. dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
  1287. /*
  1288. * The CPU copy for the tail bytes must be initiated only once
  1289. * previous DMA transfers for this endpoint have completed to
  1290. * guarantee ordering.
  1291. */
  1292. if (work->ordered) {
  1293. struct scif_dev *rdev = work->remote_dev;
  1294. ret = scif_drain_dma_poll(rdev->sdev, chan);
  1295. if (ret)
  1296. return ret;
  1297. }
  1298. if (src_window->type == SCIF_WINDOW_SELF)
  1299. src_virt = _get_local_va(src_offset, src_window,
  1300. loop_len);
  1301. else
  1302. src_virt = ioremap_remote(src_offset, src_window,
  1303. loop_len,
  1304. work->remote_dev, NULL);
  1305. if (!src_virt)
  1306. return -ENOMEM;
  1307. if (dst_window->type == SCIF_WINDOW_SELF)
  1308. dst_virt = _get_local_va(dst_offset, dst_window,
  1309. loop_len);
  1310. else
  1311. dst_virt = ioremap_remote(dst_offset, dst_window,
  1312. loop_len,
  1313. work->remote_dev, NULL);
  1314. if (!dst_virt) {
  1315. if (src_window->type != SCIF_WINDOW_SELF)
  1316. iounmap_remote(src_virt, loop_len, work);
  1317. return -ENOMEM;
  1318. }
  1319. if (src_window->type == SCIF_WINDOW_SELF)
  1320. scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
  1321. work->ordered);
  1322. else
  1323. scif_unaligned_cpy_fromio(dst_virt, src_virt,
  1324. loop_len, work->ordered);
  1325. if (src_window->type != SCIF_WINDOW_SELF)
  1326. iounmap_remote(src_virt, loop_len, work);
  1327. if (dst_window->type != SCIF_WINDOW_SELF)
  1328. iounmap_remote(dst_virt, loop_len, work);
  1329. remaining_len -= loop_len;
  1330. }
  1331. return ret;
  1332. err:
  1333. dev_err(scif_info.mdev.this_device,
  1334. "%s %d Desc Prog Failed ret %d\n",
  1335. __func__, __LINE__, ret);
  1336. return ret;
  1337. }
  1338. /*
  1339. * scif_rma_list_cpu_copy:
  1340. *
  1341. * Traverse all the windows and perform CPU copy.
  1342. */
  1343. static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
  1344. {
  1345. void *src_virt, *dst_virt;
  1346. size_t loop_len, remaining_len;
  1347. int src_page_off, dst_page_off;
  1348. s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
  1349. struct scif_window *src_window = work->src_window;
  1350. struct scif_window *dst_window = work->dst_window;
  1351. s64 end_src_offset, end_dst_offset;
  1352. int ret = 0;
  1353. struct scif_window_iter src_win_iter;
  1354. struct scif_window_iter dst_win_iter;
  1355. remaining_len = work->len;
  1356. scif_init_window_iter(src_window, &src_win_iter);
  1357. scif_init_window_iter(dst_window, &dst_win_iter);
  1358. while (remaining_len) {
  1359. src_page_off = src_offset & ~PAGE_MASK;
  1360. dst_page_off = dst_offset & ~PAGE_MASK;
  1361. loop_len = min(PAGE_SIZE -
  1362. max(src_page_off, dst_page_off),
  1363. remaining_len);
  1364. if (src_window->type == SCIF_WINDOW_SELF)
  1365. src_virt = _get_local_va(src_offset, src_window,
  1366. loop_len);
  1367. else
  1368. src_virt = ioremap_remote(src_offset, src_window,
  1369. loop_len,
  1370. work->remote_dev,
  1371. &src_win_iter);
  1372. if (!src_virt) {
  1373. ret = -ENOMEM;
  1374. goto error;
  1375. }
  1376. if (dst_window->type == SCIF_WINDOW_SELF)
  1377. dst_virt = _get_local_va(dst_offset, dst_window,
  1378. loop_len);
  1379. else
  1380. dst_virt = ioremap_remote(dst_offset, dst_window,
  1381. loop_len,
  1382. work->remote_dev,
  1383. &dst_win_iter);
  1384. if (!dst_virt) {
  1385. if (src_window->type == SCIF_WINDOW_PEER)
  1386. iounmap_remote(src_virt, loop_len, work);
  1387. ret = -ENOMEM;
  1388. goto error;
  1389. }
  1390. if (work->loopback) {
  1391. memcpy(dst_virt, src_virt, loop_len);
  1392. } else {
  1393. if (src_window->type == SCIF_WINDOW_SELF)
  1394. memcpy_toio((void __iomem __force *)dst_virt,
  1395. src_virt, loop_len);
  1396. else
  1397. memcpy_fromio(dst_virt,
  1398. (void __iomem __force *)src_virt,
  1399. loop_len);
  1400. }
  1401. if (src_window->type == SCIF_WINDOW_PEER)
  1402. iounmap_remote(src_virt, loop_len, work);
  1403. if (dst_window->type == SCIF_WINDOW_PEER)
  1404. iounmap_remote(dst_virt, loop_len, work);
  1405. src_offset += loop_len;
  1406. dst_offset += loop_len;
  1407. remaining_len -= loop_len;
  1408. if (remaining_len) {
  1409. end_src_offset = src_window->offset +
  1410. (src_window->nr_pages << PAGE_SHIFT);
  1411. end_dst_offset = dst_window->offset +
  1412. (dst_window->nr_pages << PAGE_SHIFT);
  1413. if (src_offset == end_src_offset) {
  1414. src_window = list_next_entry(src_window, list);
  1415. scif_init_window_iter(src_window,
  1416. &src_win_iter);
  1417. }
  1418. if (dst_offset == end_dst_offset) {
  1419. dst_window = list_next_entry(dst_window, list);
  1420. scif_init_window_iter(dst_window,
  1421. &dst_win_iter);
  1422. }
  1423. }
  1424. }
  1425. error:
  1426. return ret;
  1427. }
  1428. static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
  1429. struct scif_copy_work *work,
  1430. struct dma_chan *chan, off_t loffset)
  1431. {
  1432. int src_cache_off, dst_cache_off;
  1433. s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
  1434. u8 *temp = NULL;
  1435. bool src_local = true, dst_local = false;
  1436. struct scif_dma_comp_cb *comp_cb;
  1437. dma_addr_t src_dma_addr, dst_dma_addr;
  1438. int err;
  1439. if (is_dma_copy_aligned(chan->device, 1, 1, 1))
  1440. return _scif_rma_list_dma_copy_aligned(work, chan);
  1441. src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
  1442. dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
  1443. if (dst_cache_off == src_cache_off)
  1444. return scif_rma_list_dma_copy_aligned(work, chan);
  1445. if (work->loopback)
  1446. return scif_rma_list_cpu_copy(work);
  1447. src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
  1448. dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
  1449. src_local = work->src_window->type == SCIF_WINDOW_SELF;
  1450. dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
  1451. dst_local = dst_local;
  1452. /* Allocate dma_completion cb */
  1453. comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
  1454. if (!comp_cb)
  1455. goto error;
  1456. work->comp_cb = comp_cb;
  1457. comp_cb->cb_cookie = comp_cb;
  1458. comp_cb->dma_completion_func = &scif_rma_completion_cb;
  1459. if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
  1460. comp_cb->is_cache = false;
  1461. /* Allocate padding bytes to align to a cache line */
  1462. temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
  1463. GFP_KERNEL);
  1464. if (!temp)
  1465. goto free_comp_cb;
  1466. comp_cb->temp_buf_to_free = temp;
  1467. /* kmalloc(..) does not guarantee cache line alignment */
  1468. if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
  1469. temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
  1470. } else {
  1471. comp_cb->is_cache = true;
  1472. temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
  1473. if (!temp)
  1474. goto free_comp_cb;
  1475. comp_cb->temp_buf_to_free = temp;
  1476. }
  1477. if (src_local) {
  1478. temp += dst_cache_off;
  1479. scif_rma_local_cpu_copy(work->src_offset, work->src_window,
  1480. temp, work->len, true);
  1481. } else {
  1482. comp_cb->dst_window = work->dst_window;
  1483. comp_cb->dst_offset = work->dst_offset;
  1484. work->src_offset = work->src_offset - src_cache_off;
  1485. comp_cb->len = work->len;
  1486. work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
  1487. comp_cb->header_padding = src_cache_off;
  1488. }
  1489. comp_cb->temp_buf = temp;
  1490. err = scif_map_single(&comp_cb->temp_phys, temp,
  1491. work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
  1492. if (err)
  1493. goto free_temp_buf;
  1494. comp_cb->sdev = work->remote_dev;
  1495. if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
  1496. goto free_temp_buf;
  1497. if (!src_local)
  1498. work->fence_type = SCIF_DMA_INTR;
  1499. return 0;
  1500. free_temp_buf:
  1501. if (comp_cb->is_cache)
  1502. kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
  1503. else
  1504. kfree(comp_cb->temp_buf_to_free);
  1505. free_comp_cb:
  1506. kfree(comp_cb);
  1507. error:
  1508. return -ENOMEM;
  1509. }
  1510. /**
  1511. * scif_rma_copy:
  1512. * @epd: end point descriptor.
  1513. * @loffset: offset in local registered address space to/from which to copy
  1514. * @addr: user virtual address to/from which to copy
  1515. * @len: length of range to copy
  1516. * @roffset: offset in remote registered address space to/from which to copy
  1517. * @flags: flags
  1518. * @dir: LOCAL->REMOTE or vice versa.
  1519. * @last_chunk: true if this is the last chunk of a larger transfer
  1520. *
  1521. * Validate parameters, check if src/dst registered ranges requested for copy
  1522. * are valid and initiate either CPU or DMA copy.
  1523. */
  1524. static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
  1525. size_t len, off_t roffset, int flags,
  1526. enum scif_rma_dir dir, bool last_chunk)
  1527. {
  1528. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1529. struct scif_rma_req remote_req;
  1530. struct scif_rma_req req;
  1531. struct scif_window *local_window = NULL;
  1532. struct scif_window *remote_window = NULL;
  1533. struct scif_copy_work copy_work;
  1534. bool loopback;
  1535. int err = 0;
  1536. struct dma_chan *chan;
  1537. struct scif_mmu_notif *mmn = NULL;
  1538. bool cache = false;
  1539. struct device *spdev;
  1540. err = scif_verify_epd(ep);
  1541. if (err)
  1542. return err;
  1543. if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
  1544. SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
  1545. return -EINVAL;
  1546. loopback = scifdev_self(ep->remote_dev) ? true : false;
  1547. copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
  1548. SCIF_DMA_POLL : 0;
  1549. copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
  1550. /* Use CPU for Mgmt node <-> Mgmt node copies */
  1551. if (loopback && scif_is_mgmt_node()) {
  1552. flags |= SCIF_RMA_USECPU;
  1553. copy_work.fence_type = 0x0;
  1554. }
  1555. cache = scif_is_set_reg_cache(flags);
  1556. remote_req.out_window = &remote_window;
  1557. remote_req.offset = roffset;
  1558. remote_req.nr_bytes = len;
  1559. /*
  1560. * If transfer is from local to remote then the remote window
  1561. * must be writeable and vice versa.
  1562. */
  1563. remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
  1564. remote_req.type = SCIF_WINDOW_PARTIAL;
  1565. remote_req.head = &ep->rma_info.remote_reg_list;
  1566. spdev = scif_get_peer_dev(ep->remote_dev);
  1567. if (IS_ERR(spdev)) {
  1568. err = PTR_ERR(spdev);
  1569. return err;
  1570. }
  1571. if (addr && cache) {
  1572. mutex_lock(&ep->rma_info.mmn_lock);
  1573. mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
  1574. if (!mmn)
  1575. mmn = scif_add_mmu_notifier(current->mm, ep);
  1576. mutex_unlock(&ep->rma_info.mmn_lock);
  1577. if (IS_ERR(mmn)) {
  1578. scif_put_peer_dev(spdev);
  1579. return PTR_ERR(mmn);
  1580. }
  1581. cache = cache && !scif_rma_tc_can_cache(ep, len);
  1582. }
  1583. mutex_lock(&ep->rma_info.rma_lock);
  1584. if (addr) {
  1585. req.out_window = &local_window;
  1586. req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
  1587. PAGE_SIZE);
  1588. req.va_for_temp = addr & PAGE_MASK;
  1589. req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
  1590. VM_READ : VM_WRITE | VM_READ);
  1591. /* Does a valid local window exist? */
  1592. if (mmn) {
  1593. spin_lock(&ep->rma_info.tc_lock);
  1594. req.head = &mmn->tc_reg_list;
  1595. err = scif_query_tcw(ep, &req);
  1596. spin_unlock(&ep->rma_info.tc_lock);
  1597. }
  1598. if (!mmn || err) {
  1599. err = scif_register_temp(epd, req.va_for_temp,
  1600. req.nr_bytes, req.prot,
  1601. &loffset, &local_window);
  1602. if (err) {
  1603. mutex_unlock(&ep->rma_info.rma_lock);
  1604. goto error;
  1605. }
  1606. if (!cache)
  1607. goto skip_cache;
  1608. atomic_inc(&ep->rma_info.tcw_refcount);
  1609. atomic_add_return(local_window->nr_pages,
  1610. &ep->rma_info.tcw_total_pages);
  1611. if (mmn) {
  1612. spin_lock(&ep->rma_info.tc_lock);
  1613. scif_insert_tcw(local_window,
  1614. &mmn->tc_reg_list);
  1615. spin_unlock(&ep->rma_info.tc_lock);
  1616. }
  1617. }
  1618. skip_cache:
  1619. loffset = local_window->offset +
  1620. (addr - local_window->va_for_temp);
  1621. } else {
  1622. req.out_window = &local_window;
  1623. req.offset = loffset;
  1624. /*
  1625. * If transfer is from local to remote then the self window
  1626. * must be readable and vice versa.
  1627. */
  1628. req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
  1629. req.nr_bytes = len;
  1630. req.type = SCIF_WINDOW_PARTIAL;
  1631. req.head = &ep->rma_info.reg_list;
  1632. /* Does a valid local window exist? */
  1633. err = scif_query_window(&req);
  1634. if (err) {
  1635. mutex_unlock(&ep->rma_info.rma_lock);
  1636. goto error;
  1637. }
  1638. }
  1639. /* Does a valid remote window exist? */
  1640. err = scif_query_window(&remote_req);
  1641. if (err) {
  1642. mutex_unlock(&ep->rma_info.rma_lock);
  1643. goto error;
  1644. }
  1645. /*
  1646. * Prepare copy_work for submitting work to the DMA kernel thread
  1647. * or CPU copy routine.
  1648. */
  1649. copy_work.len = len;
  1650. copy_work.loopback = loopback;
  1651. copy_work.remote_dev = ep->remote_dev;
  1652. if (dir == SCIF_LOCAL_TO_REMOTE) {
  1653. copy_work.src_offset = loffset;
  1654. copy_work.src_window = local_window;
  1655. copy_work.dst_offset = roffset;
  1656. copy_work.dst_window = remote_window;
  1657. } else {
  1658. copy_work.src_offset = roffset;
  1659. copy_work.src_window = remote_window;
  1660. copy_work.dst_offset = loffset;
  1661. copy_work.dst_window = local_window;
  1662. }
  1663. if (flags & SCIF_RMA_USECPU) {
  1664. scif_rma_list_cpu_copy(&copy_work);
  1665. } else {
  1666. chan = ep->rma_info.dma_chan;
  1667. err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
  1668. chan, loffset);
  1669. }
  1670. if (addr && !cache)
  1671. atomic_inc(&ep->rma_info.tw_refcount);
  1672. mutex_unlock(&ep->rma_info.rma_lock);
  1673. if (last_chunk) {
  1674. struct scif_dev *rdev = ep->remote_dev;
  1675. if (copy_work.fence_type == SCIF_DMA_POLL)
  1676. err = scif_drain_dma_poll(rdev->sdev,
  1677. ep->rma_info.dma_chan);
  1678. else if (copy_work.fence_type == SCIF_DMA_INTR)
  1679. err = scif_drain_dma_intr(rdev->sdev,
  1680. ep->rma_info.dma_chan);
  1681. }
  1682. if (addr && !cache)
  1683. scif_queue_for_cleanup(local_window, &scif_info.rma);
  1684. scif_put_peer_dev(spdev);
  1685. return err;
  1686. error:
  1687. if (err) {
  1688. if (addr && local_window && !cache)
  1689. scif_destroy_window(ep, local_window);
  1690. dev_err(scif_info.mdev.this_device,
  1691. "%s %d err %d len 0x%lx\n",
  1692. __func__, __LINE__, err, len);
  1693. }
  1694. scif_put_peer_dev(spdev);
  1695. return err;
  1696. }
  1697. int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
  1698. off_t roffset, int flags)
  1699. {
  1700. int err;
  1701. dev_dbg(scif_info.mdev.this_device,
  1702. "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
  1703. epd, loffset, len, roffset, flags);
  1704. if (scif_unaligned(loffset, roffset)) {
  1705. while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
  1706. err = scif_rma_copy(epd, loffset, 0x0,
  1707. SCIF_MAX_UNALIGNED_BUF_SIZE,
  1708. roffset, flags,
  1709. SCIF_REMOTE_TO_LOCAL, false);
  1710. if (err)
  1711. goto readfrom_err;
  1712. loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1713. roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1714. len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
  1715. }
  1716. }
  1717. err = scif_rma_copy(epd, loffset, 0x0, len,
  1718. roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
  1719. readfrom_err:
  1720. return err;
  1721. }
  1722. EXPORT_SYMBOL_GPL(scif_readfrom);
  1723. int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
  1724. off_t roffset, int flags)
  1725. {
  1726. int err;
  1727. dev_dbg(scif_info.mdev.this_device,
  1728. "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
  1729. epd, loffset, len, roffset, flags);
  1730. if (scif_unaligned(loffset, roffset)) {
  1731. while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
  1732. err = scif_rma_copy(epd, loffset, 0x0,
  1733. SCIF_MAX_UNALIGNED_BUF_SIZE,
  1734. roffset, flags,
  1735. SCIF_LOCAL_TO_REMOTE, false);
  1736. if (err)
  1737. goto writeto_err;
  1738. loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1739. roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1740. len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
  1741. }
  1742. }
  1743. err = scif_rma_copy(epd, loffset, 0x0, len,
  1744. roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
  1745. writeto_err:
  1746. return err;
  1747. }
  1748. EXPORT_SYMBOL_GPL(scif_writeto);
  1749. int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
  1750. off_t roffset, int flags)
  1751. {
  1752. int err;
  1753. dev_dbg(scif_info.mdev.this_device,
  1754. "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
  1755. epd, addr, len, roffset, flags);
  1756. if (scif_unaligned((off_t __force)addr, roffset)) {
  1757. if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
  1758. flags &= ~SCIF_RMA_USECACHE;
  1759. while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
  1760. err = scif_rma_copy(epd, 0, (u64)addr,
  1761. SCIF_MAX_UNALIGNED_BUF_SIZE,
  1762. roffset, flags,
  1763. SCIF_REMOTE_TO_LOCAL, false);
  1764. if (err)
  1765. goto vreadfrom_err;
  1766. addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1767. roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1768. len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
  1769. }
  1770. }
  1771. err = scif_rma_copy(epd, 0, (u64)addr, len,
  1772. roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
  1773. vreadfrom_err:
  1774. return err;
  1775. }
  1776. EXPORT_SYMBOL_GPL(scif_vreadfrom);
  1777. int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
  1778. off_t roffset, int flags)
  1779. {
  1780. int err;
  1781. dev_dbg(scif_info.mdev.this_device,
  1782. "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
  1783. epd, addr, len, roffset, flags);
  1784. if (scif_unaligned((off_t __force)addr, roffset)) {
  1785. if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
  1786. flags &= ~SCIF_RMA_USECACHE;
  1787. while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
  1788. err = scif_rma_copy(epd, 0, (u64)addr,
  1789. SCIF_MAX_UNALIGNED_BUF_SIZE,
  1790. roffset, flags,
  1791. SCIF_LOCAL_TO_REMOTE, false);
  1792. if (err)
  1793. goto vwriteto_err;
  1794. addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1795. roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
  1796. len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
  1797. }
  1798. }
  1799. err = scif_rma_copy(epd, 0, (u64)addr, len,
  1800. roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
  1801. vwriteto_err:
  1802. return err;
  1803. }
  1804. EXPORT_SYMBOL_GPL(scif_vwriteto);