vsock.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. /*
  2. * vhost transport for vsock
  3. *
  4. * Copyright (C) 2013-2015 Red Hat, Inc.
  5. * Author: Asias He <asias@redhat.com>
  6. * Stefan Hajnoczi <stefanha@redhat.com>
  7. *
  8. * This work is licensed under the terms of the GNU GPL, version 2.
  9. */
  10. #include <linux/miscdevice.h>
  11. #include <linux/atomic.h>
  12. #include <linux/module.h>
  13. #include <linux/mutex.h>
  14. #include <linux/vmalloc.h>
  15. #include <net/sock.h>
  16. #include <linux/virtio_vsock.h>
  17. #include <linux/vhost.h>
  18. #include <linux/hashtable.h>
  19. #include <net/af_vsock.h>
  20. #include "vhost.h"
  21. #define VHOST_VSOCK_DEFAULT_HOST_CID 2
  22. /* Max number of bytes transferred before requeueing the job.
  23. * Using this limit prevents one virtqueue from starving others. */
  24. #define VHOST_VSOCK_WEIGHT 0x80000
  25. /* Max number of packets transferred before requeueing the job.
  26. * Using this limit prevents one virtqueue from starving others with
  27. * small pkts.
  28. */
  29. #define VHOST_VSOCK_PKT_WEIGHT 256
  30. enum {
  31. VHOST_VSOCK_FEATURES = VHOST_FEATURES,
  32. };
  33. /* Used to track all the vhost_vsock instances on the system. */
  34. static DEFINE_SPINLOCK(vhost_vsock_lock);
  35. static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
  36. struct vhost_vsock {
  37. struct vhost_dev dev;
  38. struct vhost_virtqueue vqs[2];
  39. /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */
  40. struct hlist_node hash;
  41. struct vhost_work send_pkt_work;
  42. spinlock_t send_pkt_list_lock;
  43. struct list_head send_pkt_list; /* host->guest pending packets */
  44. atomic_t queued_replies;
  45. u32 guest_cid;
  46. };
  47. static u32 vhost_transport_get_local_cid(void)
  48. {
  49. return VHOST_VSOCK_DEFAULT_HOST_CID;
  50. }
  51. /* Callers that dereference the return value must hold vhost_vsock_lock or the
  52. * RCU read lock.
  53. */
  54. static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
  55. {
  56. struct vhost_vsock *vsock;
  57. hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
  58. u32 other_cid = vsock->guest_cid;
  59. /* Skip instances that have no CID yet */
  60. if (other_cid == 0)
  61. continue;
  62. if (other_cid == guest_cid)
  63. return vsock;
  64. }
  65. return NULL;
  66. }
  67. static void
  68. vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
  69. struct vhost_virtqueue *vq)
  70. {
  71. struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
  72. int pkts = 0, total_len = 0;
  73. bool added = false;
  74. bool restart_tx = false;
  75. mutex_lock(&vq->mutex);
  76. if (!vq->private_data)
  77. goto out;
  78. /* Avoid further vmexits, we're already processing the virtqueue */
  79. vhost_disable_notify(&vsock->dev, vq);
  80. do {
  81. struct virtio_vsock_pkt *pkt;
  82. struct iov_iter iov_iter;
  83. unsigned out, in;
  84. size_t nbytes;
  85. size_t iov_len, payload_len;
  86. int head;
  87. spin_lock_bh(&vsock->send_pkt_list_lock);
  88. if (list_empty(&vsock->send_pkt_list)) {
  89. spin_unlock_bh(&vsock->send_pkt_list_lock);
  90. vhost_enable_notify(&vsock->dev, vq);
  91. break;
  92. }
  93. pkt = list_first_entry(&vsock->send_pkt_list,
  94. struct virtio_vsock_pkt, list);
  95. list_del_init(&pkt->list);
  96. spin_unlock_bh(&vsock->send_pkt_list_lock);
  97. head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
  98. &out, &in, NULL, NULL);
  99. if (head < 0) {
  100. spin_lock_bh(&vsock->send_pkt_list_lock);
  101. list_add(&pkt->list, &vsock->send_pkt_list);
  102. spin_unlock_bh(&vsock->send_pkt_list_lock);
  103. break;
  104. }
  105. if (head == vq->num) {
  106. spin_lock_bh(&vsock->send_pkt_list_lock);
  107. list_add(&pkt->list, &vsock->send_pkt_list);
  108. spin_unlock_bh(&vsock->send_pkt_list_lock);
  109. /* We cannot finish yet if more buffers snuck in while
  110. * re-enabling notify.
  111. */
  112. if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
  113. vhost_disable_notify(&vsock->dev, vq);
  114. continue;
  115. }
  116. break;
  117. }
  118. if (out) {
  119. virtio_transport_free_pkt(pkt);
  120. vq_err(vq, "Expected 0 output buffers, got %u\n", out);
  121. break;
  122. }
  123. iov_len = iov_length(&vq->iov[out], in);
  124. if (iov_len < sizeof(pkt->hdr)) {
  125. virtio_transport_free_pkt(pkt);
  126. vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
  127. break;
  128. }
  129. iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
  130. payload_len = pkt->len - pkt->off;
  131. /* If the packet is greater than the space available in the
  132. * buffer, we split it using multiple buffers.
  133. */
  134. if (payload_len > iov_len - sizeof(pkt->hdr))
  135. payload_len = iov_len - sizeof(pkt->hdr);
  136. /* Set the correct length in the header */
  137. pkt->hdr.len = cpu_to_le32(payload_len);
  138. nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
  139. if (nbytes != sizeof(pkt->hdr)) {
  140. virtio_transport_free_pkt(pkt);
  141. vq_err(vq, "Faulted on copying pkt hdr\n");
  142. break;
  143. }
  144. nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
  145. &iov_iter);
  146. if (nbytes != payload_len) {
  147. virtio_transport_free_pkt(pkt);
  148. vq_err(vq, "Faulted on copying pkt buf\n");
  149. break;
  150. }
  151. /* Deliver to monitoring devices all packets that we
  152. * will transmit.
  153. */
  154. virtio_transport_deliver_tap_pkt(pkt);
  155. vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
  156. added = true;
  157. pkt->off += payload_len;
  158. total_len += payload_len;
  159. /* If we didn't send all the payload we can requeue the packet
  160. * to send it with the next available buffer.
  161. */
  162. if (pkt->off < pkt->len) {
  163. spin_lock_bh(&vsock->send_pkt_list_lock);
  164. list_add(&pkt->list, &vsock->send_pkt_list);
  165. spin_unlock_bh(&vsock->send_pkt_list_lock);
  166. } else {
  167. if (pkt->reply) {
  168. int val;
  169. val = atomic_dec_return(&vsock->queued_replies);
  170. /* Do we have resources to resume tx
  171. * processing?
  172. */
  173. if (val + 1 == tx_vq->num)
  174. restart_tx = true;
  175. }
  176. virtio_transport_free_pkt(pkt);
  177. }
  178. } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
  179. if (added)
  180. vhost_signal(&vsock->dev, vq);
  181. out:
  182. mutex_unlock(&vq->mutex);
  183. if (restart_tx)
  184. vhost_poll_queue(&tx_vq->poll);
  185. }
  186. static void vhost_transport_send_pkt_work(struct vhost_work *work)
  187. {
  188. struct vhost_virtqueue *vq;
  189. struct vhost_vsock *vsock;
  190. vsock = container_of(work, struct vhost_vsock, send_pkt_work);
  191. vq = &vsock->vqs[VSOCK_VQ_RX];
  192. vhost_transport_do_send_pkt(vsock, vq);
  193. }
  194. static int
  195. vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
  196. {
  197. struct vhost_vsock *vsock;
  198. int len = pkt->len;
  199. rcu_read_lock();
  200. /* Find the vhost_vsock according to guest context id */
  201. vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
  202. if (!vsock) {
  203. rcu_read_unlock();
  204. virtio_transport_free_pkt(pkt);
  205. return -ENODEV;
  206. }
  207. if (pkt->reply)
  208. atomic_inc(&vsock->queued_replies);
  209. spin_lock_bh(&vsock->send_pkt_list_lock);
  210. list_add_tail(&pkt->list, &vsock->send_pkt_list);
  211. spin_unlock_bh(&vsock->send_pkt_list_lock);
  212. vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
  213. rcu_read_unlock();
  214. return len;
  215. }
  216. static int
  217. vhost_transport_cancel_pkt(struct vsock_sock *vsk)
  218. {
  219. struct vhost_vsock *vsock;
  220. struct virtio_vsock_pkt *pkt, *n;
  221. int cnt = 0;
  222. int ret = -ENODEV;
  223. LIST_HEAD(freeme);
  224. rcu_read_lock();
  225. /* Find the vhost_vsock according to guest context id */
  226. vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
  227. if (!vsock)
  228. goto out;
  229. spin_lock_bh(&vsock->send_pkt_list_lock);
  230. list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
  231. if (pkt->vsk != vsk)
  232. continue;
  233. list_move(&pkt->list, &freeme);
  234. }
  235. spin_unlock_bh(&vsock->send_pkt_list_lock);
  236. list_for_each_entry_safe(pkt, n, &freeme, list) {
  237. if (pkt->reply)
  238. cnt++;
  239. list_del(&pkt->list);
  240. virtio_transport_free_pkt(pkt);
  241. }
  242. if (cnt) {
  243. struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
  244. int new_cnt;
  245. new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
  246. if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
  247. vhost_poll_queue(&tx_vq->poll);
  248. }
  249. ret = 0;
  250. out:
  251. rcu_read_unlock();
  252. return ret;
  253. }
  254. static struct virtio_vsock_pkt *
  255. vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
  256. unsigned int out, unsigned int in)
  257. {
  258. struct virtio_vsock_pkt *pkt;
  259. struct iov_iter iov_iter;
  260. size_t nbytes;
  261. size_t len;
  262. if (in != 0) {
  263. vq_err(vq, "Expected 0 input buffers, got %u\n", in);
  264. return NULL;
  265. }
  266. pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
  267. if (!pkt)
  268. return NULL;
  269. len = iov_length(vq->iov, out);
  270. iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
  271. nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
  272. if (nbytes != sizeof(pkt->hdr)) {
  273. vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
  274. sizeof(pkt->hdr), nbytes);
  275. kfree(pkt);
  276. return NULL;
  277. }
  278. if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
  279. pkt->len = le32_to_cpu(pkt->hdr.len);
  280. /* No payload */
  281. if (!pkt->len)
  282. return pkt;
  283. /* The pkt is too big */
  284. if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
  285. kfree(pkt);
  286. return NULL;
  287. }
  288. pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
  289. if (!pkt->buf) {
  290. kfree(pkt);
  291. return NULL;
  292. }
  293. nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
  294. if (nbytes != pkt->len) {
  295. vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
  296. pkt->len, nbytes);
  297. virtio_transport_free_pkt(pkt);
  298. return NULL;
  299. }
  300. return pkt;
  301. }
  302. /* Is there space left for replies to rx packets? */
  303. static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
  304. {
  305. struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
  306. int val;
  307. smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
  308. val = atomic_read(&vsock->queued_replies);
  309. return val < vq->num;
  310. }
  311. static struct virtio_transport vhost_transport = {
  312. .transport = {
  313. .get_local_cid = vhost_transport_get_local_cid,
  314. .init = virtio_transport_do_socket_init,
  315. .destruct = virtio_transport_destruct,
  316. .release = virtio_transport_release,
  317. .connect = virtio_transport_connect,
  318. .shutdown = virtio_transport_shutdown,
  319. .cancel_pkt = vhost_transport_cancel_pkt,
  320. .dgram_enqueue = virtio_transport_dgram_enqueue,
  321. .dgram_dequeue = virtio_transport_dgram_dequeue,
  322. .dgram_bind = virtio_transport_dgram_bind,
  323. .dgram_allow = virtio_transport_dgram_allow,
  324. .stream_enqueue = virtio_transport_stream_enqueue,
  325. .stream_dequeue = virtio_transport_stream_dequeue,
  326. .stream_has_data = virtio_transport_stream_has_data,
  327. .stream_has_space = virtio_transport_stream_has_space,
  328. .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
  329. .stream_is_active = virtio_transport_stream_is_active,
  330. .stream_allow = virtio_transport_stream_allow,
  331. .notify_poll_in = virtio_transport_notify_poll_in,
  332. .notify_poll_out = virtio_transport_notify_poll_out,
  333. .notify_recv_init = virtio_transport_notify_recv_init,
  334. .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
  335. .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
  336. .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
  337. .notify_send_init = virtio_transport_notify_send_init,
  338. .notify_send_pre_block = virtio_transport_notify_send_pre_block,
  339. .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
  340. .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
  341. .set_buffer_size = virtio_transport_set_buffer_size,
  342. .set_min_buffer_size = virtio_transport_set_min_buffer_size,
  343. .set_max_buffer_size = virtio_transport_set_max_buffer_size,
  344. .get_buffer_size = virtio_transport_get_buffer_size,
  345. .get_min_buffer_size = virtio_transport_get_min_buffer_size,
  346. .get_max_buffer_size = virtio_transport_get_max_buffer_size,
  347. },
  348. .send_pkt = vhost_transport_send_pkt,
  349. };
  350. static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
  351. {
  352. struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  353. poll.work);
  354. struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
  355. dev);
  356. struct virtio_vsock_pkt *pkt;
  357. int head, pkts = 0, total_len = 0;
  358. unsigned int out, in;
  359. bool added = false;
  360. mutex_lock(&vq->mutex);
  361. if (!vq->private_data)
  362. goto out;
  363. vhost_disable_notify(&vsock->dev, vq);
  364. do {
  365. u32 len;
  366. if (!vhost_vsock_more_replies(vsock)) {
  367. /* Stop tx until the device processes already
  368. * pending replies. Leave tx virtqueue
  369. * callbacks disabled.
  370. */
  371. goto no_more_replies;
  372. }
  373. head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
  374. &out, &in, NULL, NULL);
  375. if (head < 0)
  376. break;
  377. if (head == vq->num) {
  378. if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
  379. vhost_disable_notify(&vsock->dev, vq);
  380. continue;
  381. }
  382. break;
  383. }
  384. pkt = vhost_vsock_alloc_pkt(vq, out, in);
  385. if (!pkt) {
  386. vq_err(vq, "Faulted on pkt\n");
  387. continue;
  388. }
  389. len = pkt->len;
  390. /* Deliver to monitoring devices all received packets */
  391. virtio_transport_deliver_tap_pkt(pkt);
  392. /* Only accept correctly addressed packets */
  393. if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
  394. le64_to_cpu(pkt->hdr.dst_cid) ==
  395. vhost_transport_get_local_cid())
  396. virtio_transport_recv_pkt(&vhost_transport, pkt);
  397. else
  398. virtio_transport_free_pkt(pkt);
  399. len += sizeof(pkt->hdr);
  400. vhost_add_used(vq, head, len);
  401. total_len += len;
  402. added = true;
  403. } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
  404. no_more_replies:
  405. if (added)
  406. vhost_signal(&vsock->dev, vq);
  407. out:
  408. mutex_unlock(&vq->mutex);
  409. }
  410. static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
  411. {
  412. struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  413. poll.work);
  414. struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
  415. dev);
  416. vhost_transport_do_send_pkt(vsock, vq);
  417. }
  418. static int vhost_vsock_start(struct vhost_vsock *vsock)
  419. {
  420. struct vhost_virtqueue *vq;
  421. size_t i;
  422. int ret;
  423. mutex_lock(&vsock->dev.mutex);
  424. ret = vhost_dev_check_owner(&vsock->dev);
  425. if (ret)
  426. goto err;
  427. for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
  428. vq = &vsock->vqs[i];
  429. mutex_lock(&vq->mutex);
  430. if (!vhost_vq_access_ok(vq)) {
  431. ret = -EFAULT;
  432. goto err_vq;
  433. }
  434. if (!vq->private_data) {
  435. vq->private_data = vsock;
  436. ret = vhost_vq_init_access(vq);
  437. if (ret)
  438. goto err_vq;
  439. }
  440. mutex_unlock(&vq->mutex);
  441. }
  442. /* Some packets may have been queued before the device was started,
  443. * let's kick the send worker to send them.
  444. */
  445. vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
  446. mutex_unlock(&vsock->dev.mutex);
  447. return 0;
  448. err_vq:
  449. vq->private_data = NULL;
  450. mutex_unlock(&vq->mutex);
  451. for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
  452. vq = &vsock->vqs[i];
  453. mutex_lock(&vq->mutex);
  454. vq->private_data = NULL;
  455. mutex_unlock(&vq->mutex);
  456. }
  457. err:
  458. mutex_unlock(&vsock->dev.mutex);
  459. return ret;
  460. }
  461. static int vhost_vsock_stop(struct vhost_vsock *vsock)
  462. {
  463. size_t i;
  464. int ret;
  465. mutex_lock(&vsock->dev.mutex);
  466. ret = vhost_dev_check_owner(&vsock->dev);
  467. if (ret)
  468. goto err;
  469. for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
  470. struct vhost_virtqueue *vq = &vsock->vqs[i];
  471. mutex_lock(&vq->mutex);
  472. vq->private_data = NULL;
  473. mutex_unlock(&vq->mutex);
  474. }
  475. err:
  476. mutex_unlock(&vsock->dev.mutex);
  477. return ret;
  478. }
  479. static void vhost_vsock_free(struct vhost_vsock *vsock)
  480. {
  481. kvfree(vsock);
  482. }
  483. static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
  484. {
  485. struct vhost_virtqueue **vqs;
  486. struct vhost_vsock *vsock;
  487. int ret;
  488. /* This struct is large and allocation could fail, fall back to vmalloc
  489. * if there is no other way.
  490. */
  491. vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
  492. if (!vsock)
  493. return -ENOMEM;
  494. vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
  495. if (!vqs) {
  496. ret = -ENOMEM;
  497. goto out;
  498. }
  499. vsock->guest_cid = 0; /* no CID assigned yet */
  500. atomic_set(&vsock->queued_replies, 0);
  501. vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
  502. vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
  503. vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
  504. vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
  505. vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
  506. UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
  507. VHOST_VSOCK_WEIGHT);
  508. file->private_data = vsock;
  509. spin_lock_init(&vsock->send_pkt_list_lock);
  510. INIT_LIST_HEAD(&vsock->send_pkt_list);
  511. vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
  512. return 0;
  513. out:
  514. vhost_vsock_free(vsock);
  515. return ret;
  516. }
  517. static void vhost_vsock_flush(struct vhost_vsock *vsock)
  518. {
  519. int i;
  520. for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
  521. if (vsock->vqs[i].handle_kick)
  522. vhost_poll_flush(&vsock->vqs[i].poll);
  523. vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
  524. }
  525. static void vhost_vsock_reset_orphans(struct sock *sk)
  526. {
  527. struct vsock_sock *vsk = vsock_sk(sk);
  528. /* vmci_transport.c doesn't take sk_lock here either. At least we're
  529. * under vsock_table_lock so the sock cannot disappear while we're
  530. * executing.
  531. */
  532. /* If the peer is still valid, no need to reset connection */
  533. if (vhost_vsock_get(vsk->remote_addr.svm_cid))
  534. return;
  535. /* If the close timeout is pending, let it expire. This avoids races
  536. * with the timeout callback.
  537. */
  538. if (vsk->close_work_scheduled)
  539. return;
  540. sock_set_flag(sk, SOCK_DONE);
  541. vsk->peer_shutdown = SHUTDOWN_MASK;
  542. sk->sk_state = SS_UNCONNECTED;
  543. sk->sk_err = ECONNRESET;
  544. sk->sk_error_report(sk);
  545. }
  546. static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
  547. {
  548. struct vhost_vsock *vsock = file->private_data;
  549. spin_lock_bh(&vhost_vsock_lock);
  550. if (vsock->guest_cid)
  551. hash_del_rcu(&vsock->hash);
  552. spin_unlock_bh(&vhost_vsock_lock);
  553. /* Wait for other CPUs to finish using vsock */
  554. synchronize_rcu();
  555. /* Iterating over all connections for all CIDs to find orphans is
  556. * inefficient. Room for improvement here. */
  557. vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
  558. vhost_vsock_stop(vsock);
  559. vhost_vsock_flush(vsock);
  560. vhost_dev_stop(&vsock->dev);
  561. spin_lock_bh(&vsock->send_pkt_list_lock);
  562. while (!list_empty(&vsock->send_pkt_list)) {
  563. struct virtio_vsock_pkt *pkt;
  564. pkt = list_first_entry(&vsock->send_pkt_list,
  565. struct virtio_vsock_pkt, list);
  566. list_del_init(&pkt->list);
  567. virtio_transport_free_pkt(pkt);
  568. }
  569. spin_unlock_bh(&vsock->send_pkt_list_lock);
  570. vhost_dev_cleanup(&vsock->dev);
  571. kfree(vsock->dev.vqs);
  572. vhost_vsock_free(vsock);
  573. return 0;
  574. }
  575. static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
  576. {
  577. struct vhost_vsock *other;
  578. /* Refuse reserved CIDs */
  579. if (guest_cid <= VMADDR_CID_HOST ||
  580. guest_cid == U32_MAX)
  581. return -EINVAL;
  582. /* 64-bit CIDs are not yet supported */
  583. if (guest_cid > U32_MAX)
  584. return -EINVAL;
  585. /* Refuse if CID is already in use */
  586. spin_lock_bh(&vhost_vsock_lock);
  587. other = vhost_vsock_get(guest_cid);
  588. if (other && other != vsock) {
  589. spin_unlock_bh(&vhost_vsock_lock);
  590. return -EADDRINUSE;
  591. }
  592. if (vsock->guest_cid)
  593. hash_del_rcu(&vsock->hash);
  594. vsock->guest_cid = guest_cid;
  595. hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
  596. spin_unlock_bh(&vhost_vsock_lock);
  597. return 0;
  598. }
  599. static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
  600. {
  601. struct vhost_virtqueue *vq;
  602. int i;
  603. if (features & ~VHOST_VSOCK_FEATURES)
  604. return -EOPNOTSUPP;
  605. mutex_lock(&vsock->dev.mutex);
  606. if ((features & (1 << VHOST_F_LOG_ALL)) &&
  607. !vhost_log_access_ok(&vsock->dev)) {
  608. mutex_unlock(&vsock->dev.mutex);
  609. return -EFAULT;
  610. }
  611. for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
  612. vq = &vsock->vqs[i];
  613. mutex_lock(&vq->mutex);
  614. vq->acked_features = features;
  615. mutex_unlock(&vq->mutex);
  616. }
  617. mutex_unlock(&vsock->dev.mutex);
  618. return 0;
  619. }
  620. static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
  621. unsigned long arg)
  622. {
  623. struct vhost_vsock *vsock = f->private_data;
  624. void __user *argp = (void __user *)arg;
  625. u64 guest_cid;
  626. u64 features;
  627. int start;
  628. int r;
  629. switch (ioctl) {
  630. case VHOST_VSOCK_SET_GUEST_CID:
  631. if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
  632. return -EFAULT;
  633. return vhost_vsock_set_cid(vsock, guest_cid);
  634. case VHOST_VSOCK_SET_RUNNING:
  635. if (copy_from_user(&start, argp, sizeof(start)))
  636. return -EFAULT;
  637. if (start)
  638. return vhost_vsock_start(vsock);
  639. else
  640. return vhost_vsock_stop(vsock);
  641. case VHOST_GET_FEATURES:
  642. features = VHOST_VSOCK_FEATURES;
  643. if (copy_to_user(argp, &features, sizeof(features)))
  644. return -EFAULT;
  645. return 0;
  646. case VHOST_SET_FEATURES:
  647. if (copy_from_user(&features, argp, sizeof(features)))
  648. return -EFAULT;
  649. return vhost_vsock_set_features(vsock, features);
  650. default:
  651. mutex_lock(&vsock->dev.mutex);
  652. r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
  653. if (r == -ENOIOCTLCMD)
  654. r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
  655. else
  656. vhost_vsock_flush(vsock);
  657. mutex_unlock(&vsock->dev.mutex);
  658. return r;
  659. }
  660. }
  661. #ifdef CONFIG_COMPAT
  662. static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl,
  663. unsigned long arg)
  664. {
  665. return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
  666. }
  667. #endif
  668. static const struct file_operations vhost_vsock_fops = {
  669. .owner = THIS_MODULE,
  670. .open = vhost_vsock_dev_open,
  671. .release = vhost_vsock_dev_release,
  672. .llseek = noop_llseek,
  673. .unlocked_ioctl = vhost_vsock_dev_ioctl,
  674. #ifdef CONFIG_COMPAT
  675. .compat_ioctl = vhost_vsock_dev_compat_ioctl,
  676. #endif
  677. };
  678. static struct miscdevice vhost_vsock_misc = {
  679. .minor = VHOST_VSOCK_MINOR,
  680. .name = "vhost-vsock",
  681. .fops = &vhost_vsock_fops,
  682. };
  683. static int __init vhost_vsock_init(void)
  684. {
  685. int ret;
  686. ret = vsock_core_init(&vhost_transport.transport);
  687. if (ret < 0)
  688. return ret;
  689. return misc_register(&vhost_vsock_misc);
  690. };
  691. static void __exit vhost_vsock_exit(void)
  692. {
  693. misc_deregister(&vhost_vsock_misc);
  694. vsock_core_exit();
  695. };
  696. module_init(vhost_vsock_init);
  697. module_exit(vhost_vsock_exit);
  698. MODULE_LICENSE("GPL v2");
  699. MODULE_AUTHOR("Asias He");
  700. MODULE_DESCRIPTION("vhost transport for vsock ");
  701. MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
  702. MODULE_ALIAS("devname:vhost-vsock");