vringh.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. /*
  2. * Helpers for the host side of a virtio ring.
  3. *
  4. * Since these may be in userspace, we use (inline) accessors.
  5. */
  6. #include <linux/compiler.h>
  7. #include <linux/module.h>
  8. #include <linux/vringh.h>
  9. #include <linux/virtio_ring.h>
  10. #include <linux/kernel.h>
  11. #include <linux/ratelimit.h>
  12. #include <linux/uaccess.h>
  13. #include <linux/slab.h>
  14. #include <linux/export.h>
  15. #include <uapi/linux/virtio_config.h>
  16. static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
  17. {
  18. static DEFINE_RATELIMIT_STATE(vringh_rs,
  19. DEFAULT_RATELIMIT_INTERVAL,
  20. DEFAULT_RATELIMIT_BURST);
  21. if (__ratelimit(&vringh_rs)) {
  22. va_list ap;
  23. va_start(ap, fmt);
  24. printk(KERN_NOTICE "vringh:");
  25. vprintk(fmt, ap);
  26. va_end(ap);
  27. }
  28. }
  29. /* Returns vring->num if empty, -ve on error. */
  30. static inline int __vringh_get_head(const struct vringh *vrh,
  31. int (*getu16)(const struct vringh *vrh,
  32. u16 *val, const __virtio16 *p),
  33. u16 *last_avail_idx)
  34. {
  35. u16 avail_idx, i, head;
  36. int err;
  37. err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
  38. if (err) {
  39. vringh_bad("Failed to access avail idx at %p",
  40. &vrh->vring.avail->idx);
  41. return err;
  42. }
  43. if (*last_avail_idx == avail_idx)
  44. return vrh->vring.num;
  45. /* Only get avail ring entries after they have been exposed by guest. */
  46. virtio_rmb(vrh->weak_barriers);
  47. i = *last_avail_idx & (vrh->vring.num - 1);
  48. err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
  49. if (err) {
  50. vringh_bad("Failed to read head: idx %d address %p",
  51. *last_avail_idx, &vrh->vring.avail->ring[i]);
  52. return err;
  53. }
  54. if (head >= vrh->vring.num) {
  55. vringh_bad("Guest says index %u > %u is available",
  56. head, vrh->vring.num);
  57. return -EINVAL;
  58. }
  59. (*last_avail_idx)++;
  60. return head;
  61. }
  62. /* Copy some bytes to/from the iovec. Returns num copied. */
  63. static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
  64. void *ptr, size_t len,
  65. int (*xfer)(void *addr, void *ptr,
  66. size_t len))
  67. {
  68. int err, done = 0;
  69. while (len && iov->i < iov->used) {
  70. size_t partlen;
  71. partlen = min(iov->iov[iov->i].iov_len, len);
  72. err = xfer(iov->iov[iov->i].iov_base, ptr, partlen);
  73. if (err)
  74. return err;
  75. done += partlen;
  76. len -= partlen;
  77. ptr += partlen;
  78. iov->consumed += partlen;
  79. iov->iov[iov->i].iov_len -= partlen;
  80. iov->iov[iov->i].iov_base += partlen;
  81. if (!iov->iov[iov->i].iov_len) {
  82. /* Fix up old iov element then increment. */
  83. iov->iov[iov->i].iov_len = iov->consumed;
  84. iov->iov[iov->i].iov_base -= iov->consumed;
  85. iov->consumed = 0;
  86. iov->i++;
  87. }
  88. }
  89. return done;
  90. }
  91. /* May reduce *len if range is shorter. */
  92. static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
  93. struct vringh_range *range,
  94. bool (*getrange)(struct vringh *,
  95. u64, struct vringh_range *))
  96. {
  97. if (addr < range->start || addr > range->end_incl) {
  98. if (!getrange(vrh, addr, range))
  99. return false;
  100. }
  101. BUG_ON(addr < range->start || addr > range->end_incl);
  102. /* To end of memory? */
  103. if (unlikely(addr + *len == 0)) {
  104. if (range->end_incl == -1ULL)
  105. return true;
  106. goto truncate;
  107. }
  108. /* Otherwise, don't wrap. */
  109. if (addr + *len < addr) {
  110. vringh_bad("Wrapping descriptor %zu@0x%llx",
  111. *len, (unsigned long long)addr);
  112. return false;
  113. }
  114. if (unlikely(addr + *len - 1 > range->end_incl))
  115. goto truncate;
  116. return true;
  117. truncate:
  118. *len = range->end_incl + 1 - addr;
  119. return true;
  120. }
  121. static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
  122. struct vringh_range *range,
  123. bool (*getrange)(struct vringh *,
  124. u64, struct vringh_range *))
  125. {
  126. return true;
  127. }
  128. /* No reason for this code to be inline. */
  129. static int move_to_indirect(const struct vringh *vrh,
  130. int *up_next, u16 *i, void *addr,
  131. const struct vring_desc *desc,
  132. struct vring_desc **descs, int *desc_max)
  133. {
  134. u32 len;
  135. /* Indirect tables can't have indirect. */
  136. if (*up_next != -1) {
  137. vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
  138. return -EINVAL;
  139. }
  140. len = vringh32_to_cpu(vrh, desc->len);
  141. if (unlikely(len % sizeof(struct vring_desc))) {
  142. vringh_bad("Strange indirect len %u", desc->len);
  143. return -EINVAL;
  144. }
  145. /* We will check this when we follow it! */
  146. if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
  147. *up_next = vringh16_to_cpu(vrh, desc->next);
  148. else
  149. *up_next = -2;
  150. *descs = addr;
  151. *desc_max = len / sizeof(struct vring_desc);
  152. /* Now, start at the first indirect. */
  153. *i = 0;
  154. return 0;
  155. }
  156. static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
  157. {
  158. struct kvec *new;
  159. unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
  160. if (new_num < 8)
  161. new_num = 8;
  162. flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
  163. if (flag)
  164. new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp);
  165. else {
  166. new = kmalloc_array(new_num, sizeof(struct iovec), gfp);
  167. if (new) {
  168. memcpy(new, iov->iov,
  169. iov->max_num * sizeof(struct iovec));
  170. flag = VRINGH_IOV_ALLOCATED;
  171. }
  172. }
  173. if (!new)
  174. return -ENOMEM;
  175. iov->iov = new;
  176. iov->max_num = (new_num | flag);
  177. return 0;
  178. }
  179. static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
  180. struct vring_desc **descs, int *desc_max)
  181. {
  182. u16 i = *up_next;
  183. *up_next = -1;
  184. *descs = vrh->vring.desc;
  185. *desc_max = vrh->vring.num;
  186. return i;
  187. }
  188. static int slow_copy(struct vringh *vrh, void *dst, const void *src,
  189. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  190. struct vringh_range *range,
  191. bool (*getrange)(struct vringh *vrh,
  192. u64,
  193. struct vringh_range *)),
  194. bool (*getrange)(struct vringh *vrh,
  195. u64 addr,
  196. struct vringh_range *r),
  197. struct vringh_range *range,
  198. int (*copy)(void *dst, const void *src, size_t len))
  199. {
  200. size_t part, len = sizeof(struct vring_desc);
  201. do {
  202. u64 addr;
  203. int err;
  204. part = len;
  205. addr = (u64)(unsigned long)src - range->offset;
  206. if (!rcheck(vrh, addr, &part, range, getrange))
  207. return -EINVAL;
  208. err = copy(dst, src, part);
  209. if (err)
  210. return err;
  211. dst += part;
  212. src += part;
  213. len -= part;
  214. } while (len);
  215. return 0;
  216. }
  217. static inline int
  218. __vringh_iov(struct vringh *vrh, u16 i,
  219. struct vringh_kiov *riov,
  220. struct vringh_kiov *wiov,
  221. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  222. struct vringh_range *range,
  223. bool (*getrange)(struct vringh *, u64,
  224. struct vringh_range *)),
  225. bool (*getrange)(struct vringh *, u64, struct vringh_range *),
  226. gfp_t gfp,
  227. int (*copy)(void *dst, const void *src, size_t len))
  228. {
  229. int err, count = 0, up_next, desc_max;
  230. struct vring_desc desc, *descs;
  231. struct vringh_range range = { -1ULL, 0 }, slowrange;
  232. bool slow = false;
  233. /* We start traversing vring's descriptor table. */
  234. descs = vrh->vring.desc;
  235. desc_max = vrh->vring.num;
  236. up_next = -1;
  237. /* You must want something! */
  238. if (WARN_ON(!riov && !wiov))
  239. return -EINVAL;
  240. if (riov)
  241. riov->i = riov->used = 0;
  242. if (wiov)
  243. wiov->i = wiov->used = 0;
  244. for (;;) {
  245. void *addr;
  246. struct vringh_kiov *iov;
  247. size_t len;
  248. if (unlikely(slow))
  249. err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
  250. &slowrange, copy);
  251. else
  252. err = copy(&desc, &descs[i], sizeof(desc));
  253. if (unlikely(err))
  254. goto fail;
  255. if (unlikely(desc.flags &
  256. cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
  257. u64 a = vringh64_to_cpu(vrh, desc.addr);
  258. /* Make sure it's OK, and get offset. */
  259. len = vringh32_to_cpu(vrh, desc.len);
  260. if (!rcheck(vrh, a, &len, &range, getrange)) {
  261. err = -EINVAL;
  262. goto fail;
  263. }
  264. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  265. slow = true;
  266. /* We need to save this range to use offset */
  267. slowrange = range;
  268. }
  269. addr = (void *)(long)(a + range.offset);
  270. err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
  271. &descs, &desc_max);
  272. if (err)
  273. goto fail;
  274. continue;
  275. }
  276. if (count++ == vrh->vring.num) {
  277. vringh_bad("Descriptor loop in %p", descs);
  278. err = -ELOOP;
  279. goto fail;
  280. }
  281. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
  282. iov = wiov;
  283. else {
  284. iov = riov;
  285. if (unlikely(wiov && wiov->i)) {
  286. vringh_bad("Readable desc %p after writable",
  287. &descs[i]);
  288. err = -EINVAL;
  289. goto fail;
  290. }
  291. }
  292. if (!iov) {
  293. vringh_bad("Unexpected %s desc",
  294. !wiov ? "writable" : "readable");
  295. err = -EPROTO;
  296. goto fail;
  297. }
  298. again:
  299. /* Make sure it's OK, and get offset. */
  300. len = vringh32_to_cpu(vrh, desc.len);
  301. if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
  302. getrange)) {
  303. err = -EINVAL;
  304. goto fail;
  305. }
  306. addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
  307. range.offset);
  308. if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
  309. err = resize_iovec(iov, gfp);
  310. if (err)
  311. goto fail;
  312. }
  313. iov->iov[iov->used].iov_base = addr;
  314. iov->iov[iov->used].iov_len = len;
  315. iov->used++;
  316. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  317. desc.len = cpu_to_vringh32(vrh,
  318. vringh32_to_cpu(vrh, desc.len) - len);
  319. desc.addr = cpu_to_vringh64(vrh,
  320. vringh64_to_cpu(vrh, desc.addr) + len);
  321. goto again;
  322. }
  323. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
  324. i = vringh16_to_cpu(vrh, desc.next);
  325. } else {
  326. /* Just in case we need to finish traversing above. */
  327. if (unlikely(up_next > 0)) {
  328. i = return_from_indirect(vrh, &up_next,
  329. &descs, &desc_max);
  330. slow = false;
  331. } else
  332. break;
  333. }
  334. if (i >= desc_max) {
  335. vringh_bad("Chained index %u > %u", i, desc_max);
  336. err = -EINVAL;
  337. goto fail;
  338. }
  339. }
  340. return 0;
  341. fail:
  342. return err;
  343. }
  344. static inline int __vringh_complete(struct vringh *vrh,
  345. const struct vring_used_elem *used,
  346. unsigned int num_used,
  347. int (*putu16)(const struct vringh *vrh,
  348. __virtio16 *p, u16 val),
  349. int (*putused)(struct vring_used_elem *dst,
  350. const struct vring_used_elem
  351. *src, unsigned num))
  352. {
  353. struct vring_used *used_ring;
  354. int err;
  355. u16 used_idx, off;
  356. used_ring = vrh->vring.used;
  357. used_idx = vrh->last_used_idx + vrh->completed;
  358. off = used_idx % vrh->vring.num;
  359. /* Compiler knows num_used == 1 sometimes, hence extra check */
  360. if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
  361. u16 part = vrh->vring.num - off;
  362. err = putused(&used_ring->ring[off], used, part);
  363. if (!err)
  364. err = putused(&used_ring->ring[0], used + part,
  365. num_used - part);
  366. } else
  367. err = putused(&used_ring->ring[off], used, num_used);
  368. if (err) {
  369. vringh_bad("Failed to write %u used entries %u at %p",
  370. num_used, off, &used_ring->ring[off]);
  371. return err;
  372. }
  373. /* Make sure buffer is written before we update index. */
  374. virtio_wmb(vrh->weak_barriers);
  375. err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
  376. if (err) {
  377. vringh_bad("Failed to update used index at %p",
  378. &vrh->vring.used->idx);
  379. return err;
  380. }
  381. vrh->completed += num_used;
  382. return 0;
  383. }
  384. static inline int __vringh_need_notify(struct vringh *vrh,
  385. int (*getu16)(const struct vringh *vrh,
  386. u16 *val,
  387. const __virtio16 *p))
  388. {
  389. bool notify;
  390. u16 used_event;
  391. int err;
  392. /* Flush out used index update. This is paired with the
  393. * barrier that the Guest executes when enabling
  394. * interrupts. */
  395. virtio_mb(vrh->weak_barriers);
  396. /* Old-style, without event indices. */
  397. if (!vrh->event_indices) {
  398. u16 flags;
  399. err = getu16(vrh, &flags, &vrh->vring.avail->flags);
  400. if (err) {
  401. vringh_bad("Failed to get flags at %p",
  402. &vrh->vring.avail->flags);
  403. return err;
  404. }
  405. return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
  406. }
  407. /* Modern: we know when other side wants to know. */
  408. err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
  409. if (err) {
  410. vringh_bad("Failed to get used event idx at %p",
  411. &vring_used_event(&vrh->vring));
  412. return err;
  413. }
  414. /* Just in case we added so many that we wrap. */
  415. if (unlikely(vrh->completed > 0xffff))
  416. notify = true;
  417. else
  418. notify = vring_need_event(used_event,
  419. vrh->last_used_idx + vrh->completed,
  420. vrh->last_used_idx);
  421. vrh->last_used_idx += vrh->completed;
  422. vrh->completed = 0;
  423. return notify;
  424. }
  425. static inline bool __vringh_notify_enable(struct vringh *vrh,
  426. int (*getu16)(const struct vringh *vrh,
  427. u16 *val, const __virtio16 *p),
  428. int (*putu16)(const struct vringh *vrh,
  429. __virtio16 *p, u16 val))
  430. {
  431. u16 avail;
  432. if (!vrh->event_indices) {
  433. /* Old-school; update flags. */
  434. if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
  435. vringh_bad("Clearing used flags %p",
  436. &vrh->vring.used->flags);
  437. return true;
  438. }
  439. } else {
  440. if (putu16(vrh, &vring_avail_event(&vrh->vring),
  441. vrh->last_avail_idx) != 0) {
  442. vringh_bad("Updating avail event index %p",
  443. &vring_avail_event(&vrh->vring));
  444. return true;
  445. }
  446. }
  447. /* They could have slipped one in as we were doing that: make
  448. * sure it's written, then check again. */
  449. virtio_mb(vrh->weak_barriers);
  450. if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
  451. vringh_bad("Failed to check avail idx at %p",
  452. &vrh->vring.avail->idx);
  453. return true;
  454. }
  455. /* This is unlikely, so we just leave notifications enabled
  456. * (if we're using event_indices, we'll only get one
  457. * notification anyway). */
  458. return avail == vrh->last_avail_idx;
  459. }
  460. static inline void __vringh_notify_disable(struct vringh *vrh,
  461. int (*putu16)(const struct vringh *vrh,
  462. __virtio16 *p, u16 val))
  463. {
  464. if (!vrh->event_indices) {
  465. /* Old-school; update flags. */
  466. if (putu16(vrh, &vrh->vring.used->flags,
  467. VRING_USED_F_NO_NOTIFY)) {
  468. vringh_bad("Setting used flags %p",
  469. &vrh->vring.used->flags);
  470. }
  471. }
  472. }
  473. /* Userspace access helpers: in this case, addresses are really userspace. */
  474. static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
  475. {
  476. __virtio16 v = 0;
  477. int rc = get_user(v, (__force __virtio16 __user *)p);
  478. *val = vringh16_to_cpu(vrh, v);
  479. return rc;
  480. }
  481. static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
  482. {
  483. __virtio16 v = cpu_to_vringh16(vrh, val);
  484. return put_user(v, (__force __virtio16 __user *)p);
  485. }
  486. static inline int copydesc_user(void *dst, const void *src, size_t len)
  487. {
  488. return copy_from_user(dst, (__force void __user *)src, len) ?
  489. -EFAULT : 0;
  490. }
  491. static inline int putused_user(struct vring_used_elem *dst,
  492. const struct vring_used_elem *src,
  493. unsigned int num)
  494. {
  495. return copy_to_user((__force void __user *)dst, src,
  496. sizeof(*dst) * num) ? -EFAULT : 0;
  497. }
  498. static inline int xfer_from_user(void *src, void *dst, size_t len)
  499. {
  500. return copy_from_user(dst, (__force void __user *)src, len) ?
  501. -EFAULT : 0;
  502. }
  503. static inline int xfer_to_user(void *dst, void *src, size_t len)
  504. {
  505. return copy_to_user((__force void __user *)dst, src, len) ?
  506. -EFAULT : 0;
  507. }
  508. /**
  509. * vringh_init_user - initialize a vringh for a userspace vring.
  510. * @vrh: the vringh to initialize.
  511. * @features: the feature bits for this ring.
  512. * @num: the number of elements.
  513. * @weak_barriers: true if we only need memory barriers, not I/O.
  514. * @desc: the userpace descriptor pointer.
  515. * @avail: the userpace avail pointer.
  516. * @used: the userpace used pointer.
  517. *
  518. * Returns an error if num is invalid: you should check pointers
  519. * yourself!
  520. */
  521. int vringh_init_user(struct vringh *vrh, u64 features,
  522. unsigned int num, bool weak_barriers,
  523. struct vring_desc __user *desc,
  524. struct vring_avail __user *avail,
  525. struct vring_used __user *used)
  526. {
  527. /* Sane power of 2 please! */
  528. if (!num || num > 0xffff || (num & (num - 1))) {
  529. vringh_bad("Bad ring size %u", num);
  530. return -EINVAL;
  531. }
  532. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  533. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  534. vrh->weak_barriers = weak_barriers;
  535. vrh->completed = 0;
  536. vrh->last_avail_idx = 0;
  537. vrh->last_used_idx = 0;
  538. vrh->vring.num = num;
  539. /* vring expects kernel addresses, but only used via accessors. */
  540. vrh->vring.desc = (__force struct vring_desc *)desc;
  541. vrh->vring.avail = (__force struct vring_avail *)avail;
  542. vrh->vring.used = (__force struct vring_used *)used;
  543. return 0;
  544. }
  545. EXPORT_SYMBOL(vringh_init_user);
  546. /**
  547. * vringh_getdesc_user - get next available descriptor from userspace ring.
  548. * @vrh: the userspace vring.
  549. * @riov: where to put the readable descriptors (or NULL)
  550. * @wiov: where to put the writable descriptors (or NULL)
  551. * @getrange: function to call to check ranges.
  552. * @head: head index we received, for passing to vringh_complete_user().
  553. *
  554. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  555. *
  556. * Note that on error return, you can tell the difference between an
  557. * invalid ring and a single invalid descriptor: in the former case,
  558. * *head will be vrh->vring.num. You may be able to ignore an invalid
  559. * descriptor, but there's not much you can do with an invalid ring.
  560. *
  561. * Note that you may need to clean up riov and wiov, even on error!
  562. */
  563. int vringh_getdesc_user(struct vringh *vrh,
  564. struct vringh_iov *riov,
  565. struct vringh_iov *wiov,
  566. bool (*getrange)(struct vringh *vrh,
  567. u64 addr, struct vringh_range *r),
  568. u16 *head)
  569. {
  570. int err;
  571. *head = vrh->vring.num;
  572. err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
  573. if (err < 0)
  574. return err;
  575. /* Empty... */
  576. if (err == vrh->vring.num)
  577. return 0;
  578. /* We need the layouts to be the identical for this to work */
  579. BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
  580. BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
  581. offsetof(struct vringh_iov, iov));
  582. BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
  583. offsetof(struct vringh_iov, i));
  584. BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
  585. offsetof(struct vringh_iov, used));
  586. BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
  587. offsetof(struct vringh_iov, max_num));
  588. BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
  589. BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
  590. offsetof(struct kvec, iov_base));
  591. BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
  592. offsetof(struct kvec, iov_len));
  593. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
  594. != sizeof(((struct kvec *)NULL)->iov_base));
  595. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
  596. != sizeof(((struct kvec *)NULL)->iov_len));
  597. *head = err;
  598. err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
  599. (struct vringh_kiov *)wiov,
  600. range_check, getrange, GFP_KERNEL, copydesc_user);
  601. if (err)
  602. return err;
  603. return 1;
  604. }
  605. EXPORT_SYMBOL(vringh_getdesc_user);
  606. /**
  607. * vringh_iov_pull_user - copy bytes from vring_iov.
  608. * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
  609. * @dst: the place to copy.
  610. * @len: the maximum length to copy.
  611. *
  612. * Returns the bytes copied <= len or a negative errno.
  613. */
  614. ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
  615. {
  616. return vringh_iov_xfer((struct vringh_kiov *)riov,
  617. dst, len, xfer_from_user);
  618. }
  619. EXPORT_SYMBOL(vringh_iov_pull_user);
  620. /**
  621. * vringh_iov_push_user - copy bytes into vring_iov.
  622. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
  623. * @dst: the place to copy.
  624. * @len: the maximum length to copy.
  625. *
  626. * Returns the bytes copied <= len or a negative errno.
  627. */
  628. ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
  629. const void *src, size_t len)
  630. {
  631. return vringh_iov_xfer((struct vringh_kiov *)wiov,
  632. (void *)src, len, xfer_to_user);
  633. }
  634. EXPORT_SYMBOL(vringh_iov_push_user);
  635. /**
  636. * vringh_abandon_user - we've decided not to handle the descriptor(s).
  637. * @vrh: the vring.
  638. * @num: the number of descriptors to put back (ie. num
  639. * vringh_get_user() to undo).
  640. *
  641. * The next vringh_get_user() will return the old descriptor(s) again.
  642. */
  643. void vringh_abandon_user(struct vringh *vrh, unsigned int num)
  644. {
  645. /* We only update vring_avail_event(vr) when we want to be notified,
  646. * so we haven't changed that yet. */
  647. vrh->last_avail_idx -= num;
  648. }
  649. EXPORT_SYMBOL(vringh_abandon_user);
  650. /**
  651. * vringh_complete_user - we've finished with descriptor, publish it.
  652. * @vrh: the vring.
  653. * @head: the head as filled in by vringh_getdesc_user.
  654. * @len: the length of data we have written.
  655. *
  656. * You should check vringh_need_notify_user() after one or more calls
  657. * to this function.
  658. */
  659. int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
  660. {
  661. struct vring_used_elem used;
  662. used.id = cpu_to_vringh32(vrh, head);
  663. used.len = cpu_to_vringh32(vrh, len);
  664. return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
  665. }
  666. EXPORT_SYMBOL(vringh_complete_user);
  667. /**
  668. * vringh_complete_multi_user - we've finished with many descriptors.
  669. * @vrh: the vring.
  670. * @used: the head, length pairs.
  671. * @num_used: the number of used elements.
  672. *
  673. * You should check vringh_need_notify_user() after one or more calls
  674. * to this function.
  675. */
  676. int vringh_complete_multi_user(struct vringh *vrh,
  677. const struct vring_used_elem used[],
  678. unsigned num_used)
  679. {
  680. return __vringh_complete(vrh, used, num_used,
  681. putu16_user, putused_user);
  682. }
  683. EXPORT_SYMBOL(vringh_complete_multi_user);
  684. /**
  685. * vringh_notify_enable_user - we want to know if something changes.
  686. * @vrh: the vring.
  687. *
  688. * This always enables notifications, but returns false if there are
  689. * now more buffers available in the vring.
  690. */
  691. bool vringh_notify_enable_user(struct vringh *vrh)
  692. {
  693. return __vringh_notify_enable(vrh, getu16_user, putu16_user);
  694. }
  695. EXPORT_SYMBOL(vringh_notify_enable_user);
  696. /**
  697. * vringh_notify_disable_user - don't tell us if something changes.
  698. * @vrh: the vring.
  699. *
  700. * This is our normal running state: we disable and then only enable when
  701. * we're going to sleep.
  702. */
  703. void vringh_notify_disable_user(struct vringh *vrh)
  704. {
  705. __vringh_notify_disable(vrh, putu16_user);
  706. }
  707. EXPORT_SYMBOL(vringh_notify_disable_user);
  708. /**
  709. * vringh_need_notify_user - must we tell the other side about used buffers?
  710. * @vrh: the vring we've called vringh_complete_user() on.
  711. *
  712. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  713. */
  714. int vringh_need_notify_user(struct vringh *vrh)
  715. {
  716. return __vringh_need_notify(vrh, getu16_user);
  717. }
  718. EXPORT_SYMBOL(vringh_need_notify_user);
  719. /* Kernelspace access helpers. */
  720. static inline int getu16_kern(const struct vringh *vrh,
  721. u16 *val, const __virtio16 *p)
  722. {
  723. *val = vringh16_to_cpu(vrh, READ_ONCE(*p));
  724. return 0;
  725. }
  726. static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
  727. {
  728. WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
  729. return 0;
  730. }
  731. static inline int copydesc_kern(void *dst, const void *src, size_t len)
  732. {
  733. memcpy(dst, src, len);
  734. return 0;
  735. }
  736. static inline int putused_kern(struct vring_used_elem *dst,
  737. const struct vring_used_elem *src,
  738. unsigned int num)
  739. {
  740. memcpy(dst, src, num * sizeof(*dst));
  741. return 0;
  742. }
  743. static inline int xfer_kern(void *src, void *dst, size_t len)
  744. {
  745. memcpy(dst, src, len);
  746. return 0;
  747. }
  748. /**
  749. * vringh_init_kern - initialize a vringh for a kernelspace vring.
  750. * @vrh: the vringh to initialize.
  751. * @features: the feature bits for this ring.
  752. * @num: the number of elements.
  753. * @weak_barriers: true if we only need memory barriers, not I/O.
  754. * @desc: the userpace descriptor pointer.
  755. * @avail: the userpace avail pointer.
  756. * @used: the userpace used pointer.
  757. *
  758. * Returns an error if num is invalid.
  759. */
  760. int vringh_init_kern(struct vringh *vrh, u64 features,
  761. unsigned int num, bool weak_barriers,
  762. struct vring_desc *desc,
  763. struct vring_avail *avail,
  764. struct vring_used *used)
  765. {
  766. /* Sane power of 2 please! */
  767. if (!num || num > 0xffff || (num & (num - 1))) {
  768. vringh_bad("Bad ring size %u", num);
  769. return -EINVAL;
  770. }
  771. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  772. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  773. vrh->weak_barriers = weak_barriers;
  774. vrh->completed = 0;
  775. vrh->last_avail_idx = 0;
  776. vrh->last_used_idx = 0;
  777. vrh->vring.num = num;
  778. vrh->vring.desc = desc;
  779. vrh->vring.avail = avail;
  780. vrh->vring.used = used;
  781. return 0;
  782. }
  783. EXPORT_SYMBOL(vringh_init_kern);
  784. /**
  785. * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
  786. * @vrh: the kernelspace vring.
  787. * @riov: where to put the readable descriptors (or NULL)
  788. * @wiov: where to put the writable descriptors (or NULL)
  789. * @head: head index we received, for passing to vringh_complete_kern().
  790. * @gfp: flags for allocating larger riov/wiov.
  791. *
  792. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  793. *
  794. * Note that on error return, you can tell the difference between an
  795. * invalid ring and a single invalid descriptor: in the former case,
  796. * *head will be vrh->vring.num. You may be able to ignore an invalid
  797. * descriptor, but there's not much you can do with an invalid ring.
  798. *
  799. * Note that you may need to clean up riov and wiov, even on error!
  800. */
  801. int vringh_getdesc_kern(struct vringh *vrh,
  802. struct vringh_kiov *riov,
  803. struct vringh_kiov *wiov,
  804. u16 *head,
  805. gfp_t gfp)
  806. {
  807. int err;
  808. err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
  809. if (err < 0)
  810. return err;
  811. /* Empty... */
  812. if (err == vrh->vring.num)
  813. return 0;
  814. *head = err;
  815. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  816. gfp, copydesc_kern);
  817. if (err)
  818. return err;
  819. return 1;
  820. }
  821. EXPORT_SYMBOL(vringh_getdesc_kern);
  822. /**
  823. * vringh_iov_pull_kern - copy bytes from vring_iov.
  824. * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
  825. * @dst: the place to copy.
  826. * @len: the maximum length to copy.
  827. *
  828. * Returns the bytes copied <= len or a negative errno.
  829. */
  830. ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
  831. {
  832. return vringh_iov_xfer(riov, dst, len, xfer_kern);
  833. }
  834. EXPORT_SYMBOL(vringh_iov_pull_kern);
  835. /**
  836. * vringh_iov_push_kern - copy bytes into vring_iov.
  837. * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
  838. * @dst: the place to copy.
  839. * @len: the maximum length to copy.
  840. *
  841. * Returns the bytes copied <= len or a negative errno.
  842. */
  843. ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
  844. const void *src, size_t len)
  845. {
  846. return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
  847. }
  848. EXPORT_SYMBOL(vringh_iov_push_kern);
  849. /**
  850. * vringh_abandon_kern - we've decided not to handle the descriptor(s).
  851. * @vrh: the vring.
  852. * @num: the number of descriptors to put back (ie. num
  853. * vringh_get_kern() to undo).
  854. *
  855. * The next vringh_get_kern() will return the old descriptor(s) again.
  856. */
  857. void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
  858. {
  859. /* We only update vring_avail_event(vr) when we want to be notified,
  860. * so we haven't changed that yet. */
  861. vrh->last_avail_idx -= num;
  862. }
  863. EXPORT_SYMBOL(vringh_abandon_kern);
  864. /**
  865. * vringh_complete_kern - we've finished with descriptor, publish it.
  866. * @vrh: the vring.
  867. * @head: the head as filled in by vringh_getdesc_kern.
  868. * @len: the length of data we have written.
  869. *
  870. * You should check vringh_need_notify_kern() after one or more calls
  871. * to this function.
  872. */
  873. int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
  874. {
  875. struct vring_used_elem used;
  876. used.id = cpu_to_vringh32(vrh, head);
  877. used.len = cpu_to_vringh32(vrh, len);
  878. return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
  879. }
  880. EXPORT_SYMBOL(vringh_complete_kern);
  881. /**
  882. * vringh_notify_enable_kern - we want to know if something changes.
  883. * @vrh: the vring.
  884. *
  885. * This always enables notifications, but returns false if there are
  886. * now more buffers available in the vring.
  887. */
  888. bool vringh_notify_enable_kern(struct vringh *vrh)
  889. {
  890. return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
  891. }
  892. EXPORT_SYMBOL(vringh_notify_enable_kern);
  893. /**
  894. * vringh_notify_disable_kern - don't tell us if something changes.
  895. * @vrh: the vring.
  896. *
  897. * This is our normal running state: we disable and then only enable when
  898. * we're going to sleep.
  899. */
  900. void vringh_notify_disable_kern(struct vringh *vrh)
  901. {
  902. __vringh_notify_disable(vrh, putu16_kern);
  903. }
  904. EXPORT_SYMBOL(vringh_notify_disable_kern);
  905. /**
  906. * vringh_need_notify_kern - must we tell the other side about used buffers?
  907. * @vrh: the vring we've called vringh_complete_kern() on.
  908. *
  909. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  910. */
  911. int vringh_need_notify_kern(struct vringh *vrh)
  912. {
  913. return __vringh_need_notify(vrh, getu16_kern);
  914. }
  915. EXPORT_SYMBOL(vringh_need_notify_kern);
  916. MODULE_LICENSE("GPL");