vfio_ccw_cp.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * channel program interfaces
  4. *
  5. * Copyright IBM Corp. 2017
  6. *
  7. * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  8. * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
  9. */
  10. #include <linux/ratelimit.h>
  11. #include <linux/mm.h>
  12. #include <linux/slab.h>
  13. #include <linux/highmem.h>
  14. #include <linux/iommu.h>
  15. #include <linux/vfio.h>
  16. #include <asm/idals.h>
  17. #include "vfio_ccw_cp.h"
  18. #include "vfio_ccw_private.h"
  19. struct page_array {
  20. /* Array that stores pages need to pin. */
  21. dma_addr_t *pa_iova;
  22. /* Array that receives the pinned pages. */
  23. struct page **pa_page;
  24. /* Number of pages pinned from @pa_iova. */
  25. int pa_nr;
  26. };
  27. struct ccwchain {
  28. struct list_head next;
  29. struct ccw1 *ch_ccw;
  30. /* Guest physical address of the current chain. */
  31. u64 ch_iova;
  32. /* Count of the valid ccws in chain. */
  33. int ch_len;
  34. /* Pinned PAGEs for the original data. */
  35. struct page_array *ch_pa;
  36. };
  37. /*
  38. * page_array_alloc() - alloc memory for page array
  39. * @pa: page_array on which to perform the operation
  40. * @len: number of pages that should be pinned from @iova
  41. *
  42. * Attempt to allocate memory for page array.
  43. *
  44. * Usage of page_array:
  45. * We expect (pa_nr == 0) and (pa_iova == NULL), any field in
  46. * this structure will be filled in by this function.
  47. *
  48. * Returns:
  49. * 0 if page array is allocated
  50. * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL
  51. * -ENOMEM if alloc failed
  52. */
  53. static int page_array_alloc(struct page_array *pa, unsigned int len)
  54. {
  55. if (pa->pa_nr || pa->pa_iova)
  56. return -EINVAL;
  57. if (len == 0)
  58. return -EINVAL;
  59. pa->pa_nr = len;
  60. pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL);
  61. if (!pa->pa_iova)
  62. return -ENOMEM;
  63. pa->pa_page = kcalloc(len, sizeof(*pa->pa_page), GFP_KERNEL);
  64. if (!pa->pa_page) {
  65. kfree(pa->pa_iova);
  66. return -ENOMEM;
  67. }
  68. return 0;
  69. }
  70. /*
  71. * page_array_unpin() - Unpin user pages in memory
  72. * @pa: page_array on which to perform the operation
  73. * @vdev: the vfio device to perform the operation
  74. * @pa_nr: number of user pages to unpin
  75. * @unaligned: were pages unaligned on the pin request
  76. *
  77. * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0,
  78. * otherwise only clear pa->pa_nr
  79. */
  80. static void page_array_unpin(struct page_array *pa,
  81. struct vfio_device *vdev, int pa_nr, bool unaligned)
  82. {
  83. int unpinned = 0, npage = 1;
  84. while (unpinned < pa_nr) {
  85. dma_addr_t *first = &pa->pa_iova[unpinned];
  86. dma_addr_t *last = &first[npage];
  87. if (unpinned + npage < pa_nr &&
  88. *first + npage * PAGE_SIZE == *last &&
  89. !unaligned) {
  90. npage++;
  91. continue;
  92. }
  93. vfio_unpin_pages(vdev, *first, npage);
  94. unpinned += npage;
  95. npage = 1;
  96. }
  97. pa->pa_nr = 0;
  98. }
  99. /*
  100. * page_array_pin() - Pin user pages in memory
  101. * @pa: page_array on which to perform the operation
  102. * @vdev: the vfio device to perform pin operations
  103. * @unaligned: are pages aligned to 4K boundary?
  104. *
  105. * Returns number of pages pinned upon success.
  106. * If the pin request partially succeeds, or fails completely,
  107. * all pages are left unpinned and a negative error value is returned.
  108. *
  109. * Requests to pin "aligned" pages can be coalesced into a single
  110. * vfio_pin_pages request for the sake of efficiency, based on the
  111. * expectation of 4K page requests. Unaligned requests are probably
  112. * dealing with 2K "pages", and cannot be coalesced without
  113. * reworking this logic to incorporate that math.
  114. */
  115. static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
  116. {
  117. int pinned = 0, npage = 1;
  118. int ret = 0;
  119. while (pinned < pa->pa_nr) {
  120. dma_addr_t *first = &pa->pa_iova[pinned];
  121. dma_addr_t *last = &first[npage];
  122. if (pinned + npage < pa->pa_nr &&
  123. *first + npage * PAGE_SIZE == *last &&
  124. !unaligned) {
  125. npage++;
  126. continue;
  127. }
  128. ret = vfio_pin_pages(vdev, *first, npage,
  129. IOMMU_READ | IOMMU_WRITE,
  130. &pa->pa_page[pinned]);
  131. if (ret < 0) {
  132. goto err_out;
  133. } else if (ret > 0 && ret != npage) {
  134. pinned += ret;
  135. ret = -EINVAL;
  136. goto err_out;
  137. }
  138. pinned += npage;
  139. npage = 1;
  140. }
  141. return ret;
  142. err_out:
  143. page_array_unpin(pa, vdev, pinned, unaligned);
  144. return ret;
  145. }
  146. /* Unpin the pages before releasing the memory. */
  147. static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
  148. {
  149. page_array_unpin(pa, vdev, pa->pa_nr, unaligned);
  150. kfree(pa->pa_page);
  151. kfree(pa->pa_iova);
  152. }
  153. static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length)
  154. {
  155. u64 iova_pfn_start = iova >> PAGE_SHIFT;
  156. u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT;
  157. u64 pfn;
  158. int i;
  159. for (i = 0; i < pa->pa_nr; i++) {
  160. pfn = pa->pa_iova[i] >> PAGE_SHIFT;
  161. if (pfn >= iova_pfn_start && pfn <= iova_pfn_end)
  162. return true;
  163. }
  164. return false;
  165. }
  166. /* Create the list of IDAL words for a page_array. */
  167. static inline void page_array_idal_create_words(struct page_array *pa,
  168. dma64_t *idaws)
  169. {
  170. int i;
  171. /*
  172. * Idal words (execept the first one) rely on the memory being 4k
  173. * aligned. If a user virtual address is 4K aligned, then it's
  174. * corresponding kernel physical address will also be 4K aligned. Thus
  175. * there will be no problem here to simply use the phys to create an
  176. * idaw.
  177. */
  178. for (i = 0; i < pa->pa_nr; i++) {
  179. idaws[i] = virt_to_dma64(page_to_virt(pa->pa_page[i]));
  180. /* Incorporate any offset from each starting address */
  181. idaws[i] = dma64_add(idaws[i], pa->pa_iova[i] & ~PAGE_MASK);
  182. }
  183. }
  184. static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
  185. {
  186. struct ccw0 ccw0;
  187. struct ccw1 *pccw1 = source;
  188. int i;
  189. for (i = 0; i < len; i++) {
  190. ccw0 = *(struct ccw0 *)pccw1;
  191. if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
  192. pccw1->cmd_code = CCW_CMD_TIC;
  193. pccw1->flags = 0;
  194. pccw1->count = 0;
  195. } else {
  196. pccw1->cmd_code = ccw0.cmd_code;
  197. pccw1->flags = ccw0.flags;
  198. pccw1->count = ccw0.count;
  199. }
  200. pccw1->cda = u32_to_dma32(ccw0.cda);
  201. pccw1++;
  202. }
  203. }
  204. #define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k)
  205. /*
  206. * Helpers to operate ccwchain.
  207. */
  208. #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
  209. #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
  210. #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
  211. #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
  212. #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
  213. #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
  214. #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
  215. #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
  216. /*
  217. * ccw_does_data_transfer()
  218. *
  219. * Determine whether a CCW will move any data, such that the guest pages
  220. * would need to be pinned before performing the I/O.
  221. *
  222. * Returns 1 if yes, 0 if no.
  223. */
  224. static inline int ccw_does_data_transfer(struct ccw1 *ccw)
  225. {
  226. /* If the count field is zero, then no data will be transferred */
  227. if (ccw->count == 0)
  228. return 0;
  229. /* If the command is a NOP, then no data will be transferred */
  230. if (ccw_is_noop(ccw))
  231. return 0;
  232. /* If the skip flag is off, then data will be transferred */
  233. if (!ccw_is_skip(ccw))
  234. return 1;
  235. /*
  236. * If the skip flag is on, it is only meaningful if the command
  237. * code is a read, read backward, sense, or sense ID. In those
  238. * cases, no data will be transferred.
  239. */
  240. if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
  241. return 0;
  242. if (ccw_is_sense(ccw))
  243. return 0;
  244. /* The skip flag is on, but it is ignored for this command code. */
  245. return 1;
  246. }
  247. /*
  248. * is_cpa_within_range()
  249. *
  250. * @cpa: channel program address being questioned
  251. * @head: address of the beginning of a CCW chain
  252. * @len: number of CCWs within the chain
  253. *
  254. * Determine whether the address of a CCW (whether a new chain,
  255. * or the target of a TIC) falls within a range (including the end points).
  256. *
  257. * Returns 1 if yes, 0 if no.
  258. */
  259. static inline int is_cpa_within_range(dma32_t cpa, u32 head, int len)
  260. {
  261. u32 tail = head + (len - 1) * sizeof(struct ccw1);
  262. u32 gcpa = dma32_to_u32(cpa);
  263. return head <= gcpa && gcpa <= tail;
  264. }
  265. static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
  266. {
  267. if (!ccw_is_tic(ccw))
  268. return 0;
  269. return is_cpa_within_range(ccw->cda, head, len);
  270. }
  271. static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
  272. {
  273. struct ccwchain *chain;
  274. chain = kzalloc(sizeof(*chain), GFP_KERNEL);
  275. if (!chain)
  276. return NULL;
  277. chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL);
  278. if (!chain->ch_ccw)
  279. goto out_err;
  280. chain->ch_pa = kcalloc(len, sizeof(*chain->ch_pa), GFP_KERNEL);
  281. if (!chain->ch_pa)
  282. goto out_err;
  283. list_add_tail(&chain->next, &cp->ccwchain_list);
  284. return chain;
  285. out_err:
  286. kfree(chain->ch_ccw);
  287. kfree(chain);
  288. return NULL;
  289. }
  290. static void ccwchain_free(struct ccwchain *chain)
  291. {
  292. list_del(&chain->next);
  293. kfree(chain->ch_pa);
  294. kfree(chain->ch_ccw);
  295. kfree(chain);
  296. }
  297. /* Free resource for a ccw that allocated memory for its cda. */
  298. static void ccwchain_cda_free(struct ccwchain *chain, int idx)
  299. {
  300. struct ccw1 *ccw = &chain->ch_ccw[idx];
  301. if (ccw_is_tic(ccw))
  302. return;
  303. kfree(dma32_to_virt(ccw->cda));
  304. }
  305. /**
  306. * ccwchain_calc_length - calculate the length of the ccw chain.
  307. * @iova: guest physical address of the target ccw chain
  308. * @cp: channel_program on which to perform the operation
  309. *
  310. * This is the chain length not considering any TICs.
  311. * You need to do a new round for each TIC target.
  312. *
  313. * The program is also validated for absence of not yet supported
  314. * indirect data addressing scenarios.
  315. *
  316. * Returns: the length of the ccw chain or -errno.
  317. */
  318. static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
  319. {
  320. struct ccw1 *ccw = cp->guest_cp;
  321. int cnt = 0;
  322. do {
  323. cnt++;
  324. /*
  325. * We want to keep counting if the current CCW has the
  326. * command-chaining flag enabled, or if it is a TIC CCW
  327. * that loops back into the current chain. The latter
  328. * is used for device orientation, where the CCW PRIOR to
  329. * the TIC can either jump to the TIC or a CCW immediately
  330. * after the TIC, depending on the results of its operation.
  331. */
  332. if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt))
  333. break;
  334. ccw++;
  335. } while (cnt < CCWCHAIN_LEN_MAX + 1);
  336. if (cnt == CCWCHAIN_LEN_MAX + 1)
  337. cnt = -EINVAL;
  338. return cnt;
  339. }
  340. static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
  341. {
  342. struct ccwchain *chain;
  343. u32 ccw_head;
  344. list_for_each_entry(chain, &cp->ccwchain_list, next) {
  345. ccw_head = chain->ch_iova;
  346. if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len))
  347. return 1;
  348. }
  349. return 0;
  350. }
  351. static int ccwchain_loop_tic(struct ccwchain *chain,
  352. struct channel_program *cp);
  353. static int ccwchain_handle_ccw(dma32_t cda, struct channel_program *cp)
  354. {
  355. struct vfio_device *vdev =
  356. &container_of(cp, struct vfio_ccw_private, cp)->vdev;
  357. struct ccwchain *chain;
  358. int len, ret;
  359. u32 gcda;
  360. gcda = dma32_to_u32(cda);
  361. /* Copy 2K (the most we support today) of possible CCWs */
  362. ret = vfio_dma_rw(vdev, gcda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false);
  363. if (ret)
  364. return ret;
  365. /* Convert any Format-0 CCWs to Format-1 */
  366. if (!cp->orb.cmd.fmt)
  367. convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
  368. /* Count the CCWs in the current chain */
  369. len = ccwchain_calc_length(gcda, cp);
  370. if (len < 0)
  371. return len;
  372. /* Need alloc a new chain for this one. */
  373. chain = ccwchain_alloc(cp, len);
  374. if (!chain)
  375. return -ENOMEM;
  376. chain->ch_len = len;
  377. chain->ch_iova = gcda;
  378. /* Copy the actual CCWs into the new chain */
  379. memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
  380. /* Loop for tics on this new chain. */
  381. ret = ccwchain_loop_tic(chain, cp);
  382. if (ret)
  383. ccwchain_free(chain);
  384. return ret;
  385. }
  386. /* Loop for TICs. */
  387. static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
  388. {
  389. struct ccw1 *tic;
  390. int i, ret;
  391. for (i = 0; i < chain->ch_len; i++) {
  392. tic = &chain->ch_ccw[i];
  393. if (!ccw_is_tic(tic))
  394. continue;
  395. /* May transfer to an existing chain. */
  396. if (tic_target_chain_exists(tic, cp))
  397. continue;
  398. /* Build a ccwchain for the next segment */
  399. ret = ccwchain_handle_ccw(tic->cda, cp);
  400. if (ret)
  401. return ret;
  402. }
  403. return 0;
  404. }
  405. static int ccwchain_fetch_tic(struct ccw1 *ccw,
  406. struct channel_program *cp)
  407. {
  408. struct ccwchain *iter;
  409. u32 offset, ccw_head;
  410. list_for_each_entry(iter, &cp->ccwchain_list, next) {
  411. ccw_head = iter->ch_iova;
  412. if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
  413. /* Calculate offset of TIC target */
  414. offset = dma32_to_u32(ccw->cda) - ccw_head;
  415. ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset);
  416. return 0;
  417. }
  418. }
  419. return -EFAULT;
  420. }
  421. static dma64_t *get_guest_idal(struct ccw1 *ccw, struct channel_program *cp, int idaw_nr)
  422. {
  423. struct vfio_device *vdev =
  424. &container_of(cp, struct vfio_ccw_private, cp)->vdev;
  425. dma64_t *idaws;
  426. dma32_t *idaws_f1;
  427. int idal_len = idaw_nr * sizeof(*idaws);
  428. int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE;
  429. int idaw_mask = ~(idaw_size - 1);
  430. int i, ret;
  431. idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
  432. if (!idaws)
  433. return ERR_PTR(-ENOMEM);
  434. if (ccw_is_idal(ccw)) {
  435. /* Copy IDAL from guest */
  436. ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), idaws, idal_len, false);
  437. if (ret) {
  438. kfree(idaws);
  439. return ERR_PTR(ret);
  440. }
  441. } else {
  442. /* Fabricate an IDAL based off CCW data address */
  443. if (cp->orb.cmd.c64) {
  444. idaws[0] = u64_to_dma64(dma32_to_u32(ccw->cda));
  445. for (i = 1; i < idaw_nr; i++) {
  446. idaws[i] = dma64_add(idaws[i - 1], idaw_size);
  447. idaws[i] = dma64_and(idaws[i], idaw_mask);
  448. }
  449. } else {
  450. idaws_f1 = (dma32_t *)idaws;
  451. idaws_f1[0] = ccw->cda;
  452. for (i = 1; i < idaw_nr; i++) {
  453. idaws_f1[i] = dma32_add(idaws_f1[i - 1], idaw_size);
  454. idaws_f1[i] = dma32_and(idaws_f1[i], idaw_mask);
  455. }
  456. }
  457. }
  458. return idaws;
  459. }
  460. /*
  461. * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer
  462. * a specified amount of data
  463. *
  464. * @ccw: The Channel Command Word being translated
  465. * @cp: Channel Program being processed
  466. *
  467. * The ORB is examined, since it specifies what IDAWs could actually be
  468. * used by any CCW in the channel program, regardless of whether or not
  469. * the CCW actually does. An ORB that does not specify Format-2-IDAW
  470. * Control could still contain a CCW with an IDAL, which would be
  471. * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within
  472. * the channel program must follow the same size requirements.
  473. */
  474. static int ccw_count_idaws(struct ccw1 *ccw,
  475. struct channel_program *cp)
  476. {
  477. struct vfio_device *vdev =
  478. &container_of(cp, struct vfio_ccw_private, cp)->vdev;
  479. u64 iova;
  480. int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32);
  481. int ret;
  482. int bytes = 1;
  483. if (ccw->count)
  484. bytes = ccw->count;
  485. if (ccw_is_idal(ccw)) {
  486. /* Read first IDAW to check its starting address. */
  487. /* All subsequent IDAWs will be 2K- or 4K-aligned. */
  488. ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), &iova, size, false);
  489. if (ret)
  490. return ret;
  491. /*
  492. * Format-1 IDAWs only occupy the first 32 bits,
  493. * and bit 0 is always off.
  494. */
  495. if (!cp->orb.cmd.c64)
  496. iova = iova >> 32;
  497. } else {
  498. iova = dma32_to_u32(ccw->cda);
  499. }
  500. /* Format-1 IDAWs operate on 2K each */
  501. if (!cp->orb.cmd.c64)
  502. return idal_2k_nr_words((void *)iova, bytes);
  503. /* Using the 2K variant of Format-2 IDAWs? */
  504. if (cp->orb.cmd.i2k)
  505. return idal_2k_nr_words((void *)iova, bytes);
  506. /* The 'usual' case is 4K Format-2 IDAWs */
  507. return idal_nr_words((void *)iova, bytes);
  508. }
  509. static int ccwchain_fetch_ccw(struct ccw1 *ccw,
  510. struct page_array *pa,
  511. struct channel_program *cp)
  512. {
  513. struct vfio_device *vdev =
  514. &container_of(cp, struct vfio_ccw_private, cp)->vdev;
  515. dma64_t *idaws;
  516. dma32_t *idaws_f1;
  517. int ret;
  518. int idaw_nr;
  519. int i;
  520. /* Calculate size of IDAL */
  521. idaw_nr = ccw_count_idaws(ccw, cp);
  522. if (idaw_nr < 0)
  523. return idaw_nr;
  524. /* Allocate an IDAL from host storage */
  525. idaws = get_guest_idal(ccw, cp, idaw_nr);
  526. if (IS_ERR(idaws)) {
  527. ret = PTR_ERR(idaws);
  528. goto out_init;
  529. }
  530. /*
  531. * Allocate an array of pages to pin/translate.
  532. * The number of pages is actually the count of the idaws
  533. * required for the data transfer, since we only only support
  534. * 4K IDAWs today.
  535. */
  536. ret = page_array_alloc(pa, idaw_nr);
  537. if (ret < 0)
  538. goto out_free_idaws;
  539. /*
  540. * Copy guest IDAWs into page_array, in case the memory they
  541. * occupy is not contiguous.
  542. */
  543. idaws_f1 = (dma32_t *)idaws;
  544. for (i = 0; i < idaw_nr; i++) {
  545. if (cp->orb.cmd.c64)
  546. pa->pa_iova[i] = dma64_to_u64(idaws[i]);
  547. else
  548. pa->pa_iova[i] = dma32_to_u32(idaws_f1[i]);
  549. }
  550. if (ccw_does_data_transfer(ccw)) {
  551. ret = page_array_pin(pa, vdev, idal_is_2k(cp));
  552. if (ret < 0)
  553. goto out_unpin;
  554. } else {
  555. pa->pa_nr = 0;
  556. }
  557. ccw->cda = virt_to_dma32(idaws);
  558. ccw->flags |= CCW_FLAG_IDA;
  559. /* Populate the IDAL with pinned/translated addresses from page */
  560. page_array_idal_create_words(pa, idaws);
  561. return 0;
  562. out_unpin:
  563. page_array_unpin_free(pa, vdev, idal_is_2k(cp));
  564. out_free_idaws:
  565. kfree(idaws);
  566. out_init:
  567. ccw->cda = 0;
  568. return ret;
  569. }
  570. /*
  571. * Fetch one ccw.
  572. * To reduce memory copy, we'll pin the cda page in memory,
  573. * and to get rid of the cda 2G limitation of ccw1, we'll translate
  574. * direct ccws to idal ccws.
  575. */
  576. static int ccwchain_fetch_one(struct ccw1 *ccw,
  577. struct page_array *pa,
  578. struct channel_program *cp)
  579. {
  580. if (ccw_is_tic(ccw))
  581. return ccwchain_fetch_tic(ccw, cp);
  582. return ccwchain_fetch_ccw(ccw, pa, cp);
  583. }
  584. /**
  585. * cp_init() - allocate ccwchains for a channel program.
  586. * @cp: channel_program on which to perform the operation
  587. * @orb: control block for the channel program from the guest
  588. *
  589. * This creates one or more ccwchain(s), and copies the raw data of
  590. * the target channel program from @orb->cmd.iova to the new ccwchain(s).
  591. *
  592. * Limitations:
  593. * 1. Supports idal(c64) ccw chaining.
  594. * 2. Supports 4k idaw.
  595. *
  596. * Returns:
  597. * %0 on success and a negative error value on failure.
  598. */
  599. int cp_init(struct channel_program *cp, union orb *orb)
  600. {
  601. struct vfio_device *vdev =
  602. &container_of(cp, struct vfio_ccw_private, cp)->vdev;
  603. /* custom ratelimit used to avoid flood during guest IPL */
  604. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
  605. int ret;
  606. /* this is an error in the caller */
  607. if (cp->initialized)
  608. return -EBUSY;
  609. /*
  610. * We only support prefetching the channel program. We assume all channel
  611. * programs executed by supported guests likewise support prefetching.
  612. * Executing a channel program that does not specify prefetching will
  613. * typically not cause an error, but a warning is issued to help identify
  614. * the problem if something does break.
  615. */
  616. if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
  617. dev_warn(
  618. vdev->dev,
  619. "Prefetching channel program even though prefetch not specified in ORB");
  620. INIT_LIST_HEAD(&cp->ccwchain_list);
  621. memcpy(&cp->orb, orb, sizeof(*orb));
  622. /* Build a ccwchain for the first CCW segment */
  623. ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
  624. if (!ret)
  625. cp->initialized = true;
  626. return ret;
  627. }
  628. /**
  629. * cp_free() - free resources for channel program.
  630. * @cp: channel_program on which to perform the operation
  631. *
  632. * This unpins the memory pages and frees the memory space occupied by
  633. * @cp, which must have been returned by a previous call to cp_init().
  634. * Otherwise, undefined behavior occurs.
  635. */
  636. void cp_free(struct channel_program *cp)
  637. {
  638. struct vfio_device *vdev =
  639. &container_of(cp, struct vfio_ccw_private, cp)->vdev;
  640. struct ccwchain *chain, *temp;
  641. int i;
  642. if (!cp->initialized)
  643. return;
  644. cp->initialized = false;
  645. list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
  646. for (i = 0; i < chain->ch_len; i++) {
  647. page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp));
  648. ccwchain_cda_free(chain, i);
  649. }
  650. ccwchain_free(chain);
  651. }
  652. }
  653. /**
  654. * cp_prefetch() - translate a guest physical address channel program to
  655. * a real-device runnable channel program.
  656. * @cp: channel_program on which to perform the operation
  657. *
  658. * This function translates the guest-physical-address channel program
  659. * and stores the result to ccwchain list. @cp must have been
  660. * initialized by a previous call with cp_init(). Otherwise, undefined
  661. * behavior occurs.
  662. * For each chain composing the channel program:
  663. * - On entry ch_len holds the count of CCWs to be translated.
  664. * - On exit ch_len is adjusted to the count of successfully translated CCWs.
  665. * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
  666. *
  667. * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
  668. * as helpers to do ccw chain translation inside the kernel. Basically
  669. * they accept a channel program issued by a virtual machine, and
  670. * translate the channel program to a real-device runnable channel
  671. * program.
  672. *
  673. * These APIs will copy the ccws into kernel-space buffers, and update
  674. * the guest physical addresses with their corresponding host physical
  675. * addresses. Then channel I/O device drivers could issue the
  676. * translated channel program to real devices to perform an I/O
  677. * operation.
  678. *
  679. * These interfaces are designed to support translation only for
  680. * channel programs, which are generated and formatted by a
  681. * guest. Thus this will make it possible for things like VFIO to
  682. * leverage the interfaces to passthrough a channel I/O mediated
  683. * device in QEMU.
  684. *
  685. * We support direct ccw chaining by translating them to idal ccws.
  686. *
  687. * Returns:
  688. * %0 on success and a negative error value on failure.
  689. */
  690. int cp_prefetch(struct channel_program *cp)
  691. {
  692. struct ccwchain *chain;
  693. struct ccw1 *ccw;
  694. struct page_array *pa;
  695. int len, idx, ret;
  696. /* this is an error in the caller */
  697. if (!cp->initialized)
  698. return -EINVAL;
  699. list_for_each_entry(chain, &cp->ccwchain_list, next) {
  700. len = chain->ch_len;
  701. for (idx = 0; idx < len; idx++) {
  702. ccw = &chain->ch_ccw[idx];
  703. pa = &chain->ch_pa[idx];
  704. ret = ccwchain_fetch_one(ccw, pa, cp);
  705. if (ret)
  706. goto out_err;
  707. }
  708. }
  709. return 0;
  710. out_err:
  711. /* Only cleanup the chain elements that were actually translated. */
  712. chain->ch_len = idx;
  713. list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
  714. chain->ch_len = 0;
  715. }
  716. return ret;
  717. }
  718. /**
  719. * cp_get_orb() - get the orb of the channel program
  720. * @cp: channel_program on which to perform the operation
  721. * @sch: subchannel the operation will be performed against
  722. *
  723. * This function returns the address of the updated orb of the channel
  724. * program. Channel I/O device drivers could use this orb to issue a
  725. * ssch.
  726. */
  727. union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch)
  728. {
  729. union orb *orb;
  730. struct ccwchain *chain;
  731. struct ccw1 *cpa;
  732. /* this is an error in the caller */
  733. if (!cp->initialized)
  734. return NULL;
  735. orb = &cp->orb;
  736. orb->cmd.intparm = (u32)virt_to_phys(sch);
  737. orb->cmd.fmt = 1;
  738. /*
  739. * Everything built by vfio-ccw is a Format-2 IDAL.
  740. * If the input was a Format-1 IDAL, indicate that
  741. * 2K Format-2 IDAWs were created here.
  742. */
  743. if (!orb->cmd.c64)
  744. orb->cmd.i2k = 1;
  745. orb->cmd.c64 = 1;
  746. if (orb->cmd.lpm == 0)
  747. orb->cmd.lpm = sch->lpm;
  748. chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
  749. cpa = chain->ch_ccw;
  750. orb->cmd.cpa = virt_to_dma32(cpa);
  751. return orb;
  752. }
  753. /**
  754. * cp_update_scsw() - update scsw for a channel program.
  755. * @cp: channel_program on which to perform the operation
  756. * @scsw: I/O results of the channel program and also the target to be
  757. * updated
  758. *
  759. * @scsw contains the I/O results of the channel program that pointed
  760. * to by @cp. However what @scsw->cpa stores is a host physical
  761. * address, which is meaningless for the guest, which is waiting for
  762. * the I/O results.
  763. *
  764. * This function updates @scsw->cpa to its coressponding guest physical
  765. * address.
  766. */
  767. void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
  768. {
  769. struct ccwchain *chain;
  770. dma32_t cpa = scsw->cmd.cpa;
  771. u32 ccw_head;
  772. if (!cp->initialized)
  773. return;
  774. /*
  775. * LATER:
  776. * For now, only update the cmd.cpa part. We may need to deal with
  777. * other portions of the schib as well, even if we don't return them
  778. * in the ioctl directly. Path status changes etc.
  779. */
  780. list_for_each_entry(chain, &cp->ccwchain_list, next) {
  781. ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw));
  782. /*
  783. * On successful execution, cpa points just beyond the end
  784. * of the chain.
  785. */
  786. if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
  787. /*
  788. * (cpa - ccw_head) is the offset value of the host
  789. * physical ccw to its chain head.
  790. * Adding this value to the guest physical ccw chain
  791. * head gets us the guest cpa:
  792. * cpa = chain->ch_iova + (cpa - ccw_head)
  793. */
  794. cpa = dma32_add(cpa, chain->ch_iova - ccw_head);
  795. break;
  796. }
  797. }
  798. scsw->cmd.cpa = cpa;
  799. }
  800. /**
  801. * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
  802. * @cp: channel_program on which to perform the operation
  803. * @iova: the iova to check
  804. * @length: the length to check from @iova
  805. *
  806. * If the @iova is currently pinned for the ccw chain, return true;
  807. * else return false.
  808. */
  809. bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length)
  810. {
  811. struct ccwchain *chain;
  812. int i;
  813. if (!cp->initialized)
  814. return false;
  815. list_for_each_entry(chain, &cp->ccwchain_list, next) {
  816. for (i = 0; i < chain->ch_len; i++)
  817. if (page_array_iova_pinned(&chain->ch_pa[i], iova, length))
  818. return true;
  819. }
  820. return false;
  821. }