virtio_transport_common.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * common code for virtio vsock
  4. *
  5. * Copyright (C) 2013-2015 Red Hat, Inc.
  6. * Author: Asias He <asias@redhat.com>
  7. * Stefan Hajnoczi <stefanha@redhat.com>
  8. */
  9. #include <linux/spinlock.h>
  10. #include <linux/module.h>
  11. #include <linux/sched/signal.h>
  12. #include <linux/ctype.h>
  13. #include <linux/list.h>
  14. #include <linux/virtio_vsock.h>
  15. #include <uapi/linux/vsockmon.h>
  16. #include <net/sock.h>
  17. #include <net/af_vsock.h>
  18. #define CREATE_TRACE_POINTS
  19. #include <trace/events/vsock_virtio_transport_common.h>
  20. /* How long to wait for graceful shutdown of a connection */
  21. #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
  22. /* Threshold for detecting small packets to copy */
  23. #define GOOD_COPY_LEN 128
  24. static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
  25. bool cancel_timeout);
  26. static const struct virtio_transport *
  27. virtio_transport_get_ops(struct vsock_sock *vsk)
  28. {
  29. const struct vsock_transport *t = vsock_core_get_transport(vsk);
  30. if (WARN_ON(!t))
  31. return NULL;
  32. return container_of(t, struct virtio_transport, transport);
  33. }
  34. static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops,
  35. struct virtio_vsock_pkt_info *info,
  36. size_t pkt_len)
  37. {
  38. struct iov_iter *iov_iter;
  39. if (!info->msg)
  40. return false;
  41. iov_iter = &info->msg->msg_iter;
  42. if (iov_iter->iov_offset)
  43. return false;
  44. /* We can't send whole iov. */
  45. if (iov_iter->count > pkt_len)
  46. return false;
  47. /* Check that transport can send data in zerocopy mode. */
  48. t_ops = virtio_transport_get_ops(info->vsk);
  49. if (t_ops->can_msgzerocopy) {
  50. int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS);
  51. /* +1 is for packet header. */
  52. return t_ops->can_msgzerocopy(pages_to_send + 1);
  53. }
  54. return true;
  55. }
  56. static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk,
  57. struct sk_buff *skb,
  58. struct msghdr *msg,
  59. bool zerocopy)
  60. {
  61. struct ubuf_info *uarg;
  62. if (msg->msg_ubuf) {
  63. uarg = msg->msg_ubuf;
  64. net_zcopy_get(uarg);
  65. } else {
  66. struct iov_iter *iter = &msg->msg_iter;
  67. struct ubuf_info_msgzc *uarg_zc;
  68. uarg = msg_zerocopy_realloc(sk_vsock(vsk),
  69. iter->count,
  70. NULL);
  71. if (!uarg)
  72. return -1;
  73. uarg_zc = uarg_to_msgzc(uarg);
  74. uarg_zc->zerocopy = zerocopy ? 1 : 0;
  75. }
  76. skb_zcopy_init(skb, uarg);
  77. return 0;
  78. }
  79. static int virtio_transport_fill_skb(struct sk_buff *skb,
  80. struct virtio_vsock_pkt_info *info,
  81. size_t len,
  82. bool zcopy)
  83. {
  84. if (zcopy)
  85. return __zerocopy_sg_from_iter(info->msg, NULL, skb,
  86. &info->msg->msg_iter,
  87. len);
  88. return memcpy_from_msg(skb_put(skb, len), info->msg, len);
  89. }
  90. static void virtio_transport_init_hdr(struct sk_buff *skb,
  91. struct virtio_vsock_pkt_info *info,
  92. size_t payload_len,
  93. u32 src_cid,
  94. u32 src_port,
  95. u32 dst_cid,
  96. u32 dst_port)
  97. {
  98. struct virtio_vsock_hdr *hdr;
  99. hdr = virtio_vsock_hdr(skb);
  100. hdr->type = cpu_to_le16(info->type);
  101. hdr->op = cpu_to_le16(info->op);
  102. hdr->src_cid = cpu_to_le64(src_cid);
  103. hdr->dst_cid = cpu_to_le64(dst_cid);
  104. hdr->src_port = cpu_to_le32(src_port);
  105. hdr->dst_port = cpu_to_le32(dst_port);
  106. hdr->flags = cpu_to_le32(info->flags);
  107. hdr->len = cpu_to_le32(payload_len);
  108. hdr->buf_alloc = cpu_to_le32(0);
  109. hdr->fwd_cnt = cpu_to_le32(0);
  110. }
  111. static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
  112. void *dst,
  113. size_t len)
  114. {
  115. struct iov_iter iov_iter = { 0 };
  116. struct kvec kvec;
  117. size_t to_copy;
  118. kvec.iov_base = dst;
  119. kvec.iov_len = len;
  120. iov_iter.iter_type = ITER_KVEC;
  121. iov_iter.kvec = &kvec;
  122. iov_iter.nr_segs = 1;
  123. to_copy = min_t(size_t, len, skb->len);
  124. skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
  125. &iov_iter, to_copy);
  126. }
  127. /* Packet capture */
  128. static struct sk_buff *virtio_transport_build_skb(void *opaque)
  129. {
  130. struct virtio_vsock_hdr *pkt_hdr;
  131. struct sk_buff *pkt = opaque;
  132. struct af_vsockmon_hdr *hdr;
  133. struct sk_buff *skb;
  134. size_t payload_len;
  135. /* A packet could be split to fit the RX buffer, so we can retrieve
  136. * the payload length from the header and the buffer pointer taking
  137. * care of the offset in the original packet.
  138. */
  139. pkt_hdr = virtio_vsock_hdr(pkt);
  140. payload_len = pkt->len;
  141. skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
  142. GFP_ATOMIC);
  143. if (!skb)
  144. return NULL;
  145. hdr = skb_put(skb, sizeof(*hdr));
  146. /* pkt->hdr is little-endian so no need to byteswap here */
  147. hdr->src_cid = pkt_hdr->src_cid;
  148. hdr->src_port = pkt_hdr->src_port;
  149. hdr->dst_cid = pkt_hdr->dst_cid;
  150. hdr->dst_port = pkt_hdr->dst_port;
  151. hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
  152. hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
  153. memset(hdr->reserved, 0, sizeof(hdr->reserved));
  154. switch (le16_to_cpu(pkt_hdr->op)) {
  155. case VIRTIO_VSOCK_OP_REQUEST:
  156. case VIRTIO_VSOCK_OP_RESPONSE:
  157. hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
  158. break;
  159. case VIRTIO_VSOCK_OP_RST:
  160. case VIRTIO_VSOCK_OP_SHUTDOWN:
  161. hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
  162. break;
  163. case VIRTIO_VSOCK_OP_RW:
  164. hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
  165. break;
  166. case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
  167. case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
  168. hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
  169. break;
  170. default:
  171. hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
  172. break;
  173. }
  174. skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
  175. if (payload_len) {
  176. if (skb_is_nonlinear(pkt)) {
  177. void *data = skb_put(skb, payload_len);
  178. virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
  179. } else {
  180. skb_put_data(skb, pkt->data, payload_len);
  181. }
  182. }
  183. return skb;
  184. }
  185. void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
  186. {
  187. if (virtio_vsock_skb_tap_delivered(skb))
  188. return;
  189. vsock_deliver_tap(virtio_transport_build_skb, skb);
  190. virtio_vsock_skb_set_tap_delivered(skb);
  191. }
  192. EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
  193. static u16 virtio_transport_get_type(struct sock *sk)
  194. {
  195. if (sk->sk_type == SOCK_STREAM)
  196. return VIRTIO_VSOCK_TYPE_STREAM;
  197. else
  198. return VIRTIO_VSOCK_TYPE_SEQPACKET;
  199. }
  200. /* Returns new sk_buff on success, otherwise returns NULL. */
  201. static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
  202. size_t payload_len,
  203. bool zcopy,
  204. u32 src_cid,
  205. u32 src_port,
  206. u32 dst_cid,
  207. u32 dst_port)
  208. {
  209. struct vsock_sock *vsk;
  210. struct sk_buff *skb;
  211. size_t skb_len;
  212. skb_len = VIRTIO_VSOCK_SKB_HEADROOM;
  213. if (!zcopy)
  214. skb_len += payload_len;
  215. skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
  216. if (!skb)
  217. return NULL;
  218. virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port,
  219. dst_cid, dst_port);
  220. vsk = info->vsk;
  221. /* If 'vsk' != NULL then payload is always present, so we
  222. * will never call '__zerocopy_sg_from_iter()' below without
  223. * setting skb owner in 'skb_set_owner_w()'. The only case
  224. * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message
  225. * without payload.
  226. */
  227. WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy);
  228. /* Set owner here, because '__zerocopy_sg_from_iter()' uses
  229. * owner of skb without check to update 'sk_wmem_alloc'.
  230. */
  231. if (vsk)
  232. skb_set_owner_w(skb, sk_vsock(vsk));
  233. if (info->msg && payload_len > 0) {
  234. int err;
  235. err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
  236. if (err)
  237. goto out;
  238. if (msg_data_left(info->msg) == 0 &&
  239. info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
  240. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  241. hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
  242. if (info->msg->msg_flags & MSG_EOR)
  243. hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
  244. }
  245. }
  246. if (info->reply)
  247. virtio_vsock_skb_set_reply(skb);
  248. trace_virtio_transport_alloc_pkt(src_cid, src_port,
  249. dst_cid, dst_port,
  250. payload_len,
  251. info->type,
  252. info->op,
  253. info->flags,
  254. zcopy);
  255. return skb;
  256. out:
  257. kfree_skb(skb);
  258. return NULL;
  259. }
  260. /* This function can only be used on connecting/connected sockets,
  261. * since a socket assigned to a transport is required.
  262. *
  263. * Do not use on listener sockets!
  264. */
  265. static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
  266. struct virtio_vsock_pkt_info *info)
  267. {
  268. u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
  269. u32 src_cid, src_port, dst_cid, dst_port;
  270. const struct virtio_transport *t_ops;
  271. struct virtio_vsock_sock *vvs;
  272. u32 pkt_len = info->pkt_len;
  273. bool can_zcopy = false;
  274. u32 rest_len;
  275. int ret;
  276. info->type = virtio_transport_get_type(sk_vsock(vsk));
  277. t_ops = virtio_transport_get_ops(vsk);
  278. if (unlikely(!t_ops))
  279. return -EFAULT;
  280. src_cid = t_ops->transport.get_local_cid();
  281. src_port = vsk->local_addr.svm_port;
  282. if (!info->remote_cid) {
  283. dst_cid = vsk->remote_addr.svm_cid;
  284. dst_port = vsk->remote_addr.svm_port;
  285. } else {
  286. dst_cid = info->remote_cid;
  287. dst_port = info->remote_port;
  288. }
  289. vvs = vsk->trans;
  290. /* virtio_transport_get_credit might return less than pkt_len credit */
  291. pkt_len = virtio_transport_get_credit(vvs, pkt_len);
  292. /* Do not send zero length OP_RW pkt */
  293. if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
  294. return pkt_len;
  295. if (info->msg) {
  296. /* If zerocopy is not enabled by 'setsockopt()', we behave as
  297. * there is no MSG_ZEROCOPY flag set.
  298. */
  299. if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
  300. info->msg->msg_flags &= ~MSG_ZEROCOPY;
  301. if (info->msg->msg_flags & MSG_ZEROCOPY)
  302. can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len);
  303. if (can_zcopy)
  304. max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
  305. (MAX_SKB_FRAGS * PAGE_SIZE));
  306. }
  307. rest_len = pkt_len;
  308. do {
  309. struct sk_buff *skb;
  310. size_t skb_len;
  311. skb_len = min(max_skb_len, rest_len);
  312. skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
  313. src_cid, src_port,
  314. dst_cid, dst_port);
  315. if (!skb) {
  316. ret = -ENOMEM;
  317. break;
  318. }
  319. /* We process buffer part by part, allocating skb on
  320. * each iteration. If this is last skb for this buffer
  321. * and MSG_ZEROCOPY mode is in use - we must allocate
  322. * completion for the current syscall.
  323. */
  324. if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY &&
  325. skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) {
  326. if (virtio_transport_init_zcopy_skb(vsk, skb,
  327. info->msg,
  328. can_zcopy)) {
  329. kfree_skb(skb);
  330. ret = -ENOMEM;
  331. break;
  332. }
  333. }
  334. virtio_transport_inc_tx_pkt(vvs, skb);
  335. ret = t_ops->send_pkt(skb);
  336. if (ret < 0)
  337. break;
  338. /* Both virtio and vhost 'send_pkt()' returns 'skb_len',
  339. * but for reliability use 'ret' instead of 'skb_len'.
  340. * Also if partial send happens (e.g. 'ret' != 'skb_len')
  341. * somehow, we break this loop, but account such returned
  342. * value in 'virtio_transport_put_credit()'.
  343. */
  344. rest_len -= ret;
  345. if (WARN_ONCE(ret != skb_len,
  346. "'send_pkt()' returns %i, but %zu expected\n",
  347. ret, skb_len))
  348. break;
  349. } while (rest_len);
  350. virtio_transport_put_credit(vvs, rest_len);
  351. /* Return number of bytes, if any data has been sent. */
  352. if (rest_len != pkt_len)
  353. ret = pkt_len - rest_len;
  354. return ret;
  355. }
  356. static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
  357. u32 len)
  358. {
  359. if (vvs->rx_bytes + len > vvs->buf_alloc)
  360. return false;
  361. vvs->rx_bytes += len;
  362. return true;
  363. }
  364. static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
  365. u32 len)
  366. {
  367. vvs->rx_bytes -= len;
  368. vvs->fwd_cnt += len;
  369. }
  370. void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
  371. {
  372. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  373. spin_lock_bh(&vvs->rx_lock);
  374. vvs->last_fwd_cnt = vvs->fwd_cnt;
  375. hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
  376. hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
  377. spin_unlock_bh(&vvs->rx_lock);
  378. }
  379. EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
  380. void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
  381. {
  382. struct sock *s = skb->sk;
  383. if (s && skb->len) {
  384. struct vsock_sock *vs = vsock_sk(s);
  385. struct virtio_vsock_sock *vvs;
  386. vvs = vs->trans;
  387. spin_lock_bh(&vvs->tx_lock);
  388. vvs->bytes_unsent -= skb->len;
  389. spin_unlock_bh(&vvs->tx_lock);
  390. }
  391. if (consume)
  392. consume_skb(skb);
  393. }
  394. EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
  395. u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
  396. {
  397. u32 ret;
  398. if (!credit)
  399. return 0;
  400. spin_lock_bh(&vvs->tx_lock);
  401. ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
  402. if (ret > credit)
  403. ret = credit;
  404. vvs->tx_cnt += ret;
  405. vvs->bytes_unsent += ret;
  406. spin_unlock_bh(&vvs->tx_lock);
  407. return ret;
  408. }
  409. EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
  410. void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
  411. {
  412. if (!credit)
  413. return;
  414. spin_lock_bh(&vvs->tx_lock);
  415. vvs->tx_cnt -= credit;
  416. vvs->bytes_unsent -= credit;
  417. spin_unlock_bh(&vvs->tx_lock);
  418. }
  419. EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
  420. static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
  421. {
  422. struct virtio_vsock_pkt_info info = {
  423. .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
  424. .vsk = vsk,
  425. };
  426. return virtio_transport_send_pkt_info(vsk, &info);
  427. }
  428. static ssize_t
  429. virtio_transport_stream_do_peek(struct vsock_sock *vsk,
  430. struct msghdr *msg,
  431. size_t len)
  432. {
  433. struct virtio_vsock_sock *vvs = vsk->trans;
  434. struct sk_buff *skb;
  435. size_t total = 0;
  436. int err;
  437. spin_lock_bh(&vvs->rx_lock);
  438. skb_queue_walk(&vvs->rx_queue, skb) {
  439. size_t bytes;
  440. bytes = len - total;
  441. if (bytes > skb->len)
  442. bytes = skb->len;
  443. spin_unlock_bh(&vvs->rx_lock);
  444. /* sk_lock is held by caller so no one else can dequeue.
  445. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
  446. */
  447. err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
  448. &msg->msg_iter, bytes);
  449. if (err)
  450. goto out;
  451. total += bytes;
  452. spin_lock_bh(&vvs->rx_lock);
  453. if (total == len)
  454. break;
  455. }
  456. spin_unlock_bh(&vvs->rx_lock);
  457. return total;
  458. out:
  459. if (total)
  460. err = total;
  461. return err;
  462. }
  463. static ssize_t
  464. virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
  465. struct msghdr *msg,
  466. size_t len)
  467. {
  468. struct virtio_vsock_sock *vvs = vsk->trans;
  469. size_t bytes, total = 0;
  470. struct sk_buff *skb;
  471. u32 fwd_cnt_delta;
  472. bool low_rx_bytes;
  473. int err = -EFAULT;
  474. u32 free_space;
  475. spin_lock_bh(&vvs->rx_lock);
  476. if (WARN_ONCE(skb_queue_empty(&vvs->rx_queue) && vvs->rx_bytes,
  477. "rx_queue is empty, but rx_bytes is non-zero\n")) {
  478. spin_unlock_bh(&vvs->rx_lock);
  479. return err;
  480. }
  481. while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
  482. skb = skb_peek(&vvs->rx_queue);
  483. bytes = min_t(size_t, len - total,
  484. skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset);
  485. /* sk_lock is held by caller so no one else can dequeue.
  486. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
  487. */
  488. spin_unlock_bh(&vvs->rx_lock);
  489. err = skb_copy_datagram_iter(skb,
  490. VIRTIO_VSOCK_SKB_CB(skb)->offset,
  491. &msg->msg_iter, bytes);
  492. if (err)
  493. goto out;
  494. spin_lock_bh(&vvs->rx_lock);
  495. total += bytes;
  496. VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
  497. if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
  498. u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
  499. virtio_transport_dec_rx_pkt(vvs, pkt_len);
  500. __skb_unlink(skb, &vvs->rx_queue);
  501. consume_skb(skb);
  502. }
  503. }
  504. fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
  505. free_space = vvs->buf_alloc - fwd_cnt_delta;
  506. low_rx_bytes = (vvs->rx_bytes <
  507. sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
  508. spin_unlock_bh(&vvs->rx_lock);
  509. /* To reduce the number of credit update messages,
  510. * don't update credits as long as lots of space is available.
  511. * Note: the limit chosen here is arbitrary. Setting the limit
  512. * too high causes extra messages. Too low causes transmitter
  513. * stalls. As stalls are in theory more expensive than extra
  514. * messages, we set the limit to a high value. TODO: experiment
  515. * with different values. Also send credit update message when
  516. * number of bytes in rx queue is not enough to wake up reader.
  517. */
  518. if (fwd_cnt_delta &&
  519. (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes))
  520. virtio_transport_send_credit_update(vsk);
  521. return total;
  522. out:
  523. if (total)
  524. err = total;
  525. return err;
  526. }
  527. static ssize_t
  528. virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk,
  529. struct msghdr *msg)
  530. {
  531. struct virtio_vsock_sock *vvs = vsk->trans;
  532. struct sk_buff *skb;
  533. size_t total, len;
  534. spin_lock_bh(&vvs->rx_lock);
  535. if (!vvs->msg_count) {
  536. spin_unlock_bh(&vvs->rx_lock);
  537. return 0;
  538. }
  539. total = 0;
  540. len = msg_data_left(msg);
  541. skb_queue_walk(&vvs->rx_queue, skb) {
  542. struct virtio_vsock_hdr *hdr;
  543. if (total < len) {
  544. size_t bytes;
  545. int err;
  546. bytes = len - total;
  547. if (bytes > skb->len)
  548. bytes = skb->len;
  549. spin_unlock_bh(&vvs->rx_lock);
  550. /* sk_lock is held by caller so no one else can dequeue.
  551. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
  552. */
  553. err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
  554. &msg->msg_iter, bytes);
  555. if (err)
  556. return err;
  557. spin_lock_bh(&vvs->rx_lock);
  558. }
  559. total += skb->len;
  560. hdr = virtio_vsock_hdr(skb);
  561. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
  562. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
  563. msg->msg_flags |= MSG_EOR;
  564. break;
  565. }
  566. }
  567. spin_unlock_bh(&vvs->rx_lock);
  568. return total;
  569. }
  570. static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
  571. struct msghdr *msg,
  572. int flags)
  573. {
  574. struct virtio_vsock_sock *vvs = vsk->trans;
  575. int dequeued_len = 0;
  576. size_t user_buf_len = msg_data_left(msg);
  577. bool msg_ready = false;
  578. struct sk_buff *skb;
  579. spin_lock_bh(&vvs->rx_lock);
  580. if (vvs->msg_count == 0) {
  581. spin_unlock_bh(&vvs->rx_lock);
  582. return 0;
  583. }
  584. while (!msg_ready) {
  585. struct virtio_vsock_hdr *hdr;
  586. size_t pkt_len;
  587. skb = __skb_dequeue(&vvs->rx_queue);
  588. if (!skb)
  589. break;
  590. hdr = virtio_vsock_hdr(skb);
  591. pkt_len = (size_t)le32_to_cpu(hdr->len);
  592. if (dequeued_len >= 0) {
  593. size_t bytes_to_copy;
  594. bytes_to_copy = min(user_buf_len, pkt_len);
  595. if (bytes_to_copy) {
  596. int err;
  597. /* sk_lock is held by caller so no one else can dequeue.
  598. * Unlock rx_lock since skb_copy_datagram_iter() may sleep.
  599. */
  600. spin_unlock_bh(&vvs->rx_lock);
  601. err = skb_copy_datagram_iter(skb, 0,
  602. &msg->msg_iter,
  603. bytes_to_copy);
  604. if (err) {
  605. /* Copy of message failed. Rest of
  606. * fragments will be freed without copy.
  607. */
  608. dequeued_len = err;
  609. } else {
  610. user_buf_len -= bytes_to_copy;
  611. }
  612. spin_lock_bh(&vvs->rx_lock);
  613. }
  614. if (dequeued_len >= 0)
  615. dequeued_len += pkt_len;
  616. }
  617. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
  618. msg_ready = true;
  619. vvs->msg_count--;
  620. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
  621. msg->msg_flags |= MSG_EOR;
  622. }
  623. virtio_transport_dec_rx_pkt(vvs, pkt_len);
  624. kfree_skb(skb);
  625. }
  626. spin_unlock_bh(&vvs->rx_lock);
  627. virtio_transport_send_credit_update(vsk);
  628. return dequeued_len;
  629. }
  630. ssize_t
  631. virtio_transport_stream_dequeue(struct vsock_sock *vsk,
  632. struct msghdr *msg,
  633. size_t len, int flags)
  634. {
  635. if (flags & MSG_PEEK)
  636. return virtio_transport_stream_do_peek(vsk, msg, len);
  637. else
  638. return virtio_transport_stream_do_dequeue(vsk, msg, len);
  639. }
  640. EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
  641. ssize_t
  642. virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
  643. struct msghdr *msg,
  644. int flags)
  645. {
  646. if (flags & MSG_PEEK)
  647. return virtio_transport_seqpacket_do_peek(vsk, msg);
  648. else
  649. return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
  650. }
  651. EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
  652. int
  653. virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
  654. struct msghdr *msg,
  655. size_t len)
  656. {
  657. struct virtio_vsock_sock *vvs = vsk->trans;
  658. spin_lock_bh(&vvs->tx_lock);
  659. if (len > vvs->peer_buf_alloc) {
  660. spin_unlock_bh(&vvs->tx_lock);
  661. return -EMSGSIZE;
  662. }
  663. spin_unlock_bh(&vvs->tx_lock);
  664. return virtio_transport_stream_enqueue(vsk, msg, len);
  665. }
  666. EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
  667. int
  668. virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
  669. struct msghdr *msg,
  670. size_t len, int flags)
  671. {
  672. return -EOPNOTSUPP;
  673. }
  674. EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
  675. s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
  676. {
  677. struct virtio_vsock_sock *vvs = vsk->trans;
  678. s64 bytes;
  679. spin_lock_bh(&vvs->rx_lock);
  680. bytes = vvs->rx_bytes;
  681. spin_unlock_bh(&vvs->rx_lock);
  682. return bytes;
  683. }
  684. EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
  685. u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
  686. {
  687. struct virtio_vsock_sock *vvs = vsk->trans;
  688. u32 msg_count;
  689. spin_lock_bh(&vvs->rx_lock);
  690. msg_count = vvs->msg_count;
  691. spin_unlock_bh(&vvs->rx_lock);
  692. return msg_count;
  693. }
  694. EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
  695. static s64 virtio_transport_has_space(struct vsock_sock *vsk)
  696. {
  697. struct virtio_vsock_sock *vvs = vsk->trans;
  698. s64 bytes;
  699. bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
  700. if (bytes < 0)
  701. bytes = 0;
  702. return bytes;
  703. }
  704. s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
  705. {
  706. struct virtio_vsock_sock *vvs = vsk->trans;
  707. s64 bytes;
  708. spin_lock_bh(&vvs->tx_lock);
  709. bytes = virtio_transport_has_space(vsk);
  710. spin_unlock_bh(&vvs->tx_lock);
  711. return bytes;
  712. }
  713. EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
  714. int virtio_transport_do_socket_init(struct vsock_sock *vsk,
  715. struct vsock_sock *psk)
  716. {
  717. struct virtio_vsock_sock *vvs;
  718. vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
  719. if (!vvs)
  720. return -ENOMEM;
  721. vsk->trans = vvs;
  722. vvs->vsk = vsk;
  723. if (psk && psk->trans) {
  724. struct virtio_vsock_sock *ptrans = psk->trans;
  725. vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
  726. }
  727. if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
  728. vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
  729. vvs->buf_alloc = vsk->buffer_size;
  730. spin_lock_init(&vvs->rx_lock);
  731. spin_lock_init(&vvs->tx_lock);
  732. skb_queue_head_init(&vvs->rx_queue);
  733. return 0;
  734. }
  735. EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
  736. /* sk_lock held by the caller */
  737. void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
  738. {
  739. struct virtio_vsock_sock *vvs = vsk->trans;
  740. if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
  741. *val = VIRTIO_VSOCK_MAX_BUF_SIZE;
  742. vvs->buf_alloc = *val;
  743. virtio_transport_send_credit_update(vsk);
  744. }
  745. EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
  746. int
  747. virtio_transport_notify_poll_in(struct vsock_sock *vsk,
  748. size_t target,
  749. bool *data_ready_now)
  750. {
  751. *data_ready_now = vsock_stream_has_data(vsk) >= target;
  752. return 0;
  753. }
  754. EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
  755. int
  756. virtio_transport_notify_poll_out(struct vsock_sock *vsk,
  757. size_t target,
  758. bool *space_avail_now)
  759. {
  760. s64 free_space;
  761. free_space = vsock_stream_has_space(vsk);
  762. if (free_space > 0)
  763. *space_avail_now = true;
  764. else if (free_space == 0)
  765. *space_avail_now = false;
  766. return 0;
  767. }
  768. EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
  769. int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
  770. size_t target, struct vsock_transport_recv_notify_data *data)
  771. {
  772. return 0;
  773. }
  774. EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
  775. int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
  776. size_t target, struct vsock_transport_recv_notify_data *data)
  777. {
  778. return 0;
  779. }
  780. EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
  781. int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
  782. size_t target, struct vsock_transport_recv_notify_data *data)
  783. {
  784. return 0;
  785. }
  786. EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
  787. int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
  788. size_t target, ssize_t copied, bool data_read,
  789. struct vsock_transport_recv_notify_data *data)
  790. {
  791. return 0;
  792. }
  793. EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
  794. int virtio_transport_notify_send_init(struct vsock_sock *vsk,
  795. struct vsock_transport_send_notify_data *data)
  796. {
  797. return 0;
  798. }
  799. EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
  800. int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
  801. struct vsock_transport_send_notify_data *data)
  802. {
  803. return 0;
  804. }
  805. EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
  806. int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
  807. struct vsock_transport_send_notify_data *data)
  808. {
  809. return 0;
  810. }
  811. EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
  812. int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
  813. ssize_t written, struct vsock_transport_send_notify_data *data)
  814. {
  815. return 0;
  816. }
  817. EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
  818. u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
  819. {
  820. return vsk->buffer_size;
  821. }
  822. EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
  823. bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
  824. {
  825. return true;
  826. }
  827. EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
  828. bool virtio_transport_stream_allow(u32 cid, u32 port)
  829. {
  830. return true;
  831. }
  832. EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
  833. int virtio_transport_dgram_bind(struct vsock_sock *vsk,
  834. struct sockaddr_vm *addr)
  835. {
  836. return -EOPNOTSUPP;
  837. }
  838. EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
  839. bool virtio_transport_dgram_allow(u32 cid, u32 port)
  840. {
  841. return false;
  842. }
  843. EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
  844. int virtio_transport_connect(struct vsock_sock *vsk)
  845. {
  846. struct virtio_vsock_pkt_info info = {
  847. .op = VIRTIO_VSOCK_OP_REQUEST,
  848. .vsk = vsk,
  849. };
  850. return virtio_transport_send_pkt_info(vsk, &info);
  851. }
  852. EXPORT_SYMBOL_GPL(virtio_transport_connect);
  853. int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
  854. {
  855. struct virtio_vsock_pkt_info info = {
  856. .op = VIRTIO_VSOCK_OP_SHUTDOWN,
  857. .flags = (mode & RCV_SHUTDOWN ?
  858. VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
  859. (mode & SEND_SHUTDOWN ?
  860. VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
  861. .vsk = vsk,
  862. };
  863. return virtio_transport_send_pkt_info(vsk, &info);
  864. }
  865. EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
  866. int
  867. virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
  868. struct sockaddr_vm *remote_addr,
  869. struct msghdr *msg,
  870. size_t dgram_len)
  871. {
  872. return -EOPNOTSUPP;
  873. }
  874. EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
  875. ssize_t
  876. virtio_transport_stream_enqueue(struct vsock_sock *vsk,
  877. struct msghdr *msg,
  878. size_t len)
  879. {
  880. struct virtio_vsock_pkt_info info = {
  881. .op = VIRTIO_VSOCK_OP_RW,
  882. .msg = msg,
  883. .pkt_len = len,
  884. .vsk = vsk,
  885. };
  886. return virtio_transport_send_pkt_info(vsk, &info);
  887. }
  888. EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
  889. void virtio_transport_destruct(struct vsock_sock *vsk)
  890. {
  891. struct virtio_vsock_sock *vvs = vsk->trans;
  892. virtio_transport_cancel_close_work(vsk, true);
  893. kfree(vvs);
  894. vsk->trans = NULL;
  895. }
  896. EXPORT_SYMBOL_GPL(virtio_transport_destruct);
  897. ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
  898. {
  899. struct virtio_vsock_sock *vvs = vsk->trans;
  900. size_t ret;
  901. spin_lock_bh(&vvs->tx_lock);
  902. ret = vvs->bytes_unsent;
  903. spin_unlock_bh(&vvs->tx_lock);
  904. return ret;
  905. }
  906. EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
  907. static int virtio_transport_reset(struct vsock_sock *vsk,
  908. struct sk_buff *skb)
  909. {
  910. struct virtio_vsock_pkt_info info = {
  911. .op = VIRTIO_VSOCK_OP_RST,
  912. .reply = !!skb,
  913. .vsk = vsk,
  914. };
  915. /* Send RST only if the original pkt is not a RST pkt */
  916. if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
  917. return 0;
  918. return virtio_transport_send_pkt_info(vsk, &info);
  919. }
  920. /* Normally packets are associated with a socket. There may be no socket if an
  921. * attempt was made to connect to a socket that does not exist.
  922. */
  923. static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
  924. struct sk_buff *skb)
  925. {
  926. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  927. struct virtio_vsock_pkt_info info = {
  928. .op = VIRTIO_VSOCK_OP_RST,
  929. .type = le16_to_cpu(hdr->type),
  930. .reply = true,
  931. };
  932. struct sk_buff *reply;
  933. /* Send RST only if the original pkt is not a RST pkt */
  934. if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
  935. return 0;
  936. if (!t)
  937. return -ENOTCONN;
  938. reply = virtio_transport_alloc_skb(&info, 0, false,
  939. le64_to_cpu(hdr->dst_cid),
  940. le32_to_cpu(hdr->dst_port),
  941. le64_to_cpu(hdr->src_cid),
  942. le32_to_cpu(hdr->src_port));
  943. if (!reply)
  944. return -ENOMEM;
  945. return t->send_pkt(reply);
  946. }
  947. /* This function should be called with sk_lock held and SOCK_DONE set */
  948. static void virtio_transport_remove_sock(struct vsock_sock *vsk)
  949. {
  950. struct virtio_vsock_sock *vvs = vsk->trans;
  951. /* We don't need to take rx_lock, as the socket is closing and we are
  952. * removing it.
  953. */
  954. __skb_queue_purge(&vvs->rx_queue);
  955. vsock_remove_sock(vsk);
  956. }
  957. static void virtio_transport_wait_close(struct sock *sk, long timeout)
  958. {
  959. if (timeout) {
  960. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  961. add_wait_queue(sk_sleep(sk), &wait);
  962. do {
  963. if (sk_wait_event(sk, &timeout,
  964. sock_flag(sk, SOCK_DONE), &wait))
  965. break;
  966. } while (!signal_pending(current) && timeout);
  967. remove_wait_queue(sk_sleep(sk), &wait);
  968. }
  969. }
  970. static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
  971. bool cancel_timeout)
  972. {
  973. struct sock *sk = sk_vsock(vsk);
  974. if (vsk->close_work_scheduled &&
  975. (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
  976. vsk->close_work_scheduled = false;
  977. virtio_transport_remove_sock(vsk);
  978. /* Release refcnt obtained when we scheduled the timeout */
  979. sock_put(sk);
  980. }
  981. }
  982. static void virtio_transport_do_close(struct vsock_sock *vsk,
  983. bool cancel_timeout)
  984. {
  985. struct sock *sk = sk_vsock(vsk);
  986. sock_set_flag(sk, SOCK_DONE);
  987. vsk->peer_shutdown = SHUTDOWN_MASK;
  988. if (vsock_stream_has_data(vsk) <= 0)
  989. sk->sk_state = TCP_CLOSING;
  990. sk->sk_state_change(sk);
  991. virtio_transport_cancel_close_work(vsk, cancel_timeout);
  992. }
  993. static void virtio_transport_close_timeout(struct work_struct *work)
  994. {
  995. struct vsock_sock *vsk =
  996. container_of(work, struct vsock_sock, close_work.work);
  997. struct sock *sk = sk_vsock(vsk);
  998. sock_hold(sk);
  999. lock_sock(sk);
  1000. if (!sock_flag(sk, SOCK_DONE)) {
  1001. (void)virtio_transport_reset(vsk, NULL);
  1002. virtio_transport_do_close(vsk, false);
  1003. }
  1004. vsk->close_work_scheduled = false;
  1005. release_sock(sk);
  1006. sock_put(sk);
  1007. }
  1008. /* User context, vsk->sk is locked */
  1009. static bool virtio_transport_close(struct vsock_sock *vsk)
  1010. {
  1011. struct sock *sk = &vsk->sk;
  1012. if (!(sk->sk_state == TCP_ESTABLISHED ||
  1013. sk->sk_state == TCP_CLOSING))
  1014. return true;
  1015. /* Already received SHUTDOWN from peer, reply with RST */
  1016. if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
  1017. (void)virtio_transport_reset(vsk, NULL);
  1018. return true;
  1019. }
  1020. if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
  1021. (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
  1022. if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
  1023. virtio_transport_wait_close(sk, sk->sk_lingertime);
  1024. if (sock_flag(sk, SOCK_DONE)) {
  1025. return true;
  1026. }
  1027. sock_hold(sk);
  1028. INIT_DELAYED_WORK(&vsk->close_work,
  1029. virtio_transport_close_timeout);
  1030. vsk->close_work_scheduled = true;
  1031. schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
  1032. return false;
  1033. }
  1034. void virtio_transport_release(struct vsock_sock *vsk)
  1035. {
  1036. struct sock *sk = &vsk->sk;
  1037. bool remove_sock = true;
  1038. if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
  1039. remove_sock = virtio_transport_close(vsk);
  1040. if (remove_sock) {
  1041. sock_set_flag(sk, SOCK_DONE);
  1042. virtio_transport_remove_sock(vsk);
  1043. }
  1044. }
  1045. EXPORT_SYMBOL_GPL(virtio_transport_release);
  1046. static int
  1047. virtio_transport_recv_connecting(struct sock *sk,
  1048. struct sk_buff *skb)
  1049. {
  1050. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1051. struct vsock_sock *vsk = vsock_sk(sk);
  1052. int skerr;
  1053. int err;
  1054. switch (le16_to_cpu(hdr->op)) {
  1055. case VIRTIO_VSOCK_OP_RESPONSE:
  1056. sk->sk_state = TCP_ESTABLISHED;
  1057. sk->sk_socket->state = SS_CONNECTED;
  1058. vsock_insert_connected(vsk);
  1059. sk->sk_state_change(sk);
  1060. break;
  1061. case VIRTIO_VSOCK_OP_INVALID:
  1062. break;
  1063. case VIRTIO_VSOCK_OP_RST:
  1064. skerr = ECONNRESET;
  1065. err = 0;
  1066. goto destroy;
  1067. default:
  1068. skerr = EPROTO;
  1069. err = -EINVAL;
  1070. goto destroy;
  1071. }
  1072. return 0;
  1073. destroy:
  1074. virtio_transport_reset(vsk, skb);
  1075. sk->sk_state = TCP_CLOSE;
  1076. sk->sk_err = skerr;
  1077. sk_error_report(sk);
  1078. return err;
  1079. }
  1080. static void
  1081. virtio_transport_recv_enqueue(struct vsock_sock *vsk,
  1082. struct sk_buff *skb)
  1083. {
  1084. struct virtio_vsock_sock *vvs = vsk->trans;
  1085. bool can_enqueue, free_pkt = false;
  1086. struct virtio_vsock_hdr *hdr;
  1087. u32 len;
  1088. hdr = virtio_vsock_hdr(skb);
  1089. len = le32_to_cpu(hdr->len);
  1090. spin_lock_bh(&vvs->rx_lock);
  1091. can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
  1092. if (!can_enqueue) {
  1093. free_pkt = true;
  1094. goto out;
  1095. }
  1096. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
  1097. vvs->msg_count++;
  1098. /* Try to copy small packets into the buffer of last packet queued,
  1099. * to avoid wasting memory queueing the entire buffer with a small
  1100. * payload.
  1101. */
  1102. if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
  1103. struct virtio_vsock_hdr *last_hdr;
  1104. struct sk_buff *last_skb;
  1105. last_skb = skb_peek_tail(&vvs->rx_queue);
  1106. last_hdr = virtio_vsock_hdr(last_skb);
  1107. /* If there is space in the last packet queued, we copy the
  1108. * new packet in its buffer. We avoid this if the last packet
  1109. * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
  1110. * delimiter of SEQPACKET message, so 'pkt' is the first packet
  1111. * of a new message.
  1112. */
  1113. if (skb->len < skb_tailroom(last_skb) &&
  1114. !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
  1115. memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
  1116. free_pkt = true;
  1117. last_hdr->flags |= hdr->flags;
  1118. le32_add_cpu(&last_hdr->len, len);
  1119. goto out;
  1120. }
  1121. }
  1122. __skb_queue_tail(&vvs->rx_queue, skb);
  1123. out:
  1124. spin_unlock_bh(&vvs->rx_lock);
  1125. if (free_pkt)
  1126. kfree_skb(skb);
  1127. }
  1128. static int
  1129. virtio_transport_recv_connected(struct sock *sk,
  1130. struct sk_buff *skb)
  1131. {
  1132. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1133. struct vsock_sock *vsk = vsock_sk(sk);
  1134. int err = 0;
  1135. switch (le16_to_cpu(hdr->op)) {
  1136. case VIRTIO_VSOCK_OP_RW:
  1137. virtio_transport_recv_enqueue(vsk, skb);
  1138. vsock_data_ready(sk);
  1139. return err;
  1140. case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
  1141. virtio_transport_send_credit_update(vsk);
  1142. break;
  1143. case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
  1144. sk->sk_write_space(sk);
  1145. break;
  1146. case VIRTIO_VSOCK_OP_SHUTDOWN:
  1147. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
  1148. vsk->peer_shutdown |= RCV_SHUTDOWN;
  1149. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
  1150. vsk->peer_shutdown |= SEND_SHUTDOWN;
  1151. if (vsk->peer_shutdown == SHUTDOWN_MASK) {
  1152. if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
  1153. (void)virtio_transport_reset(vsk, NULL);
  1154. virtio_transport_do_close(vsk, true);
  1155. }
  1156. /* Remove this socket anyway because the remote peer sent
  1157. * the shutdown. This way a new connection will succeed
  1158. * if the remote peer uses the same source port,
  1159. * even if the old socket is still unreleased, but now disconnected.
  1160. */
  1161. vsock_remove_sock(vsk);
  1162. }
  1163. if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
  1164. sk->sk_state_change(sk);
  1165. break;
  1166. case VIRTIO_VSOCK_OP_RST:
  1167. virtio_transport_do_close(vsk, true);
  1168. break;
  1169. default:
  1170. err = -EINVAL;
  1171. break;
  1172. }
  1173. kfree_skb(skb);
  1174. return err;
  1175. }
  1176. static void
  1177. virtio_transport_recv_disconnecting(struct sock *sk,
  1178. struct sk_buff *skb)
  1179. {
  1180. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1181. struct vsock_sock *vsk = vsock_sk(sk);
  1182. if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
  1183. virtio_transport_do_close(vsk, true);
  1184. }
  1185. static int
  1186. virtio_transport_send_response(struct vsock_sock *vsk,
  1187. struct sk_buff *skb)
  1188. {
  1189. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1190. struct virtio_vsock_pkt_info info = {
  1191. .op = VIRTIO_VSOCK_OP_RESPONSE,
  1192. .remote_cid = le64_to_cpu(hdr->src_cid),
  1193. .remote_port = le32_to_cpu(hdr->src_port),
  1194. .reply = true,
  1195. .vsk = vsk,
  1196. };
  1197. return virtio_transport_send_pkt_info(vsk, &info);
  1198. }
  1199. static bool virtio_transport_space_update(struct sock *sk,
  1200. struct sk_buff *skb)
  1201. {
  1202. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1203. struct vsock_sock *vsk = vsock_sk(sk);
  1204. struct virtio_vsock_sock *vvs = vsk->trans;
  1205. bool space_available;
  1206. /* Listener sockets are not associated with any transport, so we are
  1207. * not able to take the state to see if there is space available in the
  1208. * remote peer, but since they are only used to receive requests, we
  1209. * can assume that there is always space available in the other peer.
  1210. */
  1211. if (!vvs)
  1212. return true;
  1213. /* buf_alloc and fwd_cnt is always included in the hdr */
  1214. spin_lock_bh(&vvs->tx_lock);
  1215. vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
  1216. vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
  1217. space_available = virtio_transport_has_space(vsk);
  1218. spin_unlock_bh(&vvs->tx_lock);
  1219. return space_available;
  1220. }
  1221. /* Handle server socket */
  1222. static int
  1223. virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
  1224. struct virtio_transport *t)
  1225. {
  1226. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1227. struct vsock_sock *vsk = vsock_sk(sk);
  1228. struct vsock_sock *vchild;
  1229. struct sock *child;
  1230. int ret;
  1231. if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
  1232. virtio_transport_reset_no_sock(t, skb);
  1233. return -EINVAL;
  1234. }
  1235. if (sk_acceptq_is_full(sk)) {
  1236. virtio_transport_reset_no_sock(t, skb);
  1237. return -ENOMEM;
  1238. }
  1239. /* __vsock_release() might have already flushed accept_queue.
  1240. * Subsequent enqueues would lead to a memory leak.
  1241. */
  1242. if (sk->sk_shutdown == SHUTDOWN_MASK) {
  1243. virtio_transport_reset_no_sock(t, skb);
  1244. return -ESHUTDOWN;
  1245. }
  1246. child = vsock_create_connected(sk);
  1247. if (!child) {
  1248. virtio_transport_reset_no_sock(t, skb);
  1249. return -ENOMEM;
  1250. }
  1251. sk_acceptq_added(sk);
  1252. lock_sock_nested(child, SINGLE_DEPTH_NESTING);
  1253. child->sk_state = TCP_ESTABLISHED;
  1254. vchild = vsock_sk(child);
  1255. vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
  1256. le32_to_cpu(hdr->dst_port));
  1257. vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
  1258. le32_to_cpu(hdr->src_port));
  1259. ret = vsock_assign_transport(vchild, vsk);
  1260. /* Transport assigned (looking at remote_addr) must be the same
  1261. * where we received the request.
  1262. */
  1263. if (ret || vchild->transport != &t->transport) {
  1264. release_sock(child);
  1265. virtio_transport_reset_no_sock(t, skb);
  1266. sock_put(child);
  1267. return ret;
  1268. }
  1269. if (virtio_transport_space_update(child, skb))
  1270. child->sk_write_space(child);
  1271. vsock_insert_connected(vchild);
  1272. vsock_enqueue_accept(sk, child);
  1273. virtio_transport_send_response(vchild, skb);
  1274. release_sock(child);
  1275. sk->sk_data_ready(sk);
  1276. return 0;
  1277. }
  1278. static bool virtio_transport_valid_type(u16 type)
  1279. {
  1280. return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
  1281. (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
  1282. }
  1283. /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
  1284. * lock.
  1285. */
  1286. void virtio_transport_recv_pkt(struct virtio_transport *t,
  1287. struct sk_buff *skb)
  1288. {
  1289. struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
  1290. struct sockaddr_vm src, dst;
  1291. struct vsock_sock *vsk;
  1292. struct sock *sk;
  1293. bool space_available;
  1294. vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
  1295. le32_to_cpu(hdr->src_port));
  1296. vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
  1297. le32_to_cpu(hdr->dst_port));
  1298. trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
  1299. dst.svm_cid, dst.svm_port,
  1300. le32_to_cpu(hdr->len),
  1301. le16_to_cpu(hdr->type),
  1302. le16_to_cpu(hdr->op),
  1303. le32_to_cpu(hdr->flags),
  1304. le32_to_cpu(hdr->buf_alloc),
  1305. le32_to_cpu(hdr->fwd_cnt));
  1306. if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
  1307. (void)virtio_transport_reset_no_sock(t, skb);
  1308. goto free_pkt;
  1309. }
  1310. /* The socket must be in connected or bound table
  1311. * otherwise send reset back
  1312. */
  1313. sk = vsock_find_connected_socket(&src, &dst);
  1314. if (!sk) {
  1315. sk = vsock_find_bound_socket(&dst);
  1316. if (!sk) {
  1317. (void)virtio_transport_reset_no_sock(t, skb);
  1318. goto free_pkt;
  1319. }
  1320. }
  1321. if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
  1322. (void)virtio_transport_reset_no_sock(t, skb);
  1323. sock_put(sk);
  1324. goto free_pkt;
  1325. }
  1326. if (!skb_set_owner_sk_safe(skb, sk)) {
  1327. WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n");
  1328. goto free_pkt;
  1329. }
  1330. vsk = vsock_sk(sk);
  1331. lock_sock(sk);
  1332. /* Check if sk has been closed or assigned to another transport before
  1333. * lock_sock (note: listener sockets are not assigned to any transport)
  1334. */
  1335. if (sock_flag(sk, SOCK_DONE) ||
  1336. (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
  1337. (void)virtio_transport_reset_no_sock(t, skb);
  1338. release_sock(sk);
  1339. sock_put(sk);
  1340. goto free_pkt;
  1341. }
  1342. space_available = virtio_transport_space_update(sk, skb);
  1343. /* Update CID in case it has changed after a transport reset event */
  1344. if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
  1345. vsk->local_addr.svm_cid = dst.svm_cid;
  1346. if (space_available)
  1347. sk->sk_write_space(sk);
  1348. switch (sk->sk_state) {
  1349. case TCP_LISTEN:
  1350. virtio_transport_recv_listen(sk, skb, t);
  1351. kfree_skb(skb);
  1352. break;
  1353. case TCP_SYN_SENT:
  1354. virtio_transport_recv_connecting(sk, skb);
  1355. kfree_skb(skb);
  1356. break;
  1357. case TCP_ESTABLISHED:
  1358. virtio_transport_recv_connected(sk, skb);
  1359. break;
  1360. case TCP_CLOSING:
  1361. virtio_transport_recv_disconnecting(sk, skb);
  1362. kfree_skb(skb);
  1363. break;
  1364. default:
  1365. (void)virtio_transport_reset_no_sock(t, skb);
  1366. kfree_skb(skb);
  1367. break;
  1368. }
  1369. release_sock(sk);
  1370. /* Release refcnt obtained when we fetched this socket out of the
  1371. * bound or connected list.
  1372. */
  1373. sock_put(sk);
  1374. return;
  1375. free_pkt:
  1376. kfree_skb(skb);
  1377. }
  1378. EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
  1379. /* Remove skbs found in a queue that have a vsk that matches.
  1380. *
  1381. * Each skb is freed.
  1382. *
  1383. * Returns the count of skbs that were reply packets.
  1384. */
  1385. int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
  1386. {
  1387. struct sk_buff_head freeme;
  1388. struct sk_buff *skb, *tmp;
  1389. int cnt = 0;
  1390. skb_queue_head_init(&freeme);
  1391. spin_lock_bh(&queue->lock);
  1392. skb_queue_walk_safe(queue, skb, tmp) {
  1393. if (vsock_sk(skb->sk) != vsk)
  1394. continue;
  1395. __skb_unlink(skb, queue);
  1396. __skb_queue_tail(&freeme, skb);
  1397. if (virtio_vsock_skb_reply(skb))
  1398. cnt++;
  1399. }
  1400. spin_unlock_bh(&queue->lock);
  1401. __skb_queue_purge(&freeme);
  1402. return cnt;
  1403. }
  1404. EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
  1405. int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_actor)
  1406. {
  1407. struct virtio_vsock_sock *vvs = vsk->trans;
  1408. struct sock *sk = sk_vsock(vsk);
  1409. struct virtio_vsock_hdr *hdr;
  1410. struct sk_buff *skb;
  1411. int off = 0;
  1412. int err;
  1413. spin_lock_bh(&vvs->rx_lock);
  1414. /* Use __skb_recv_datagram() for race-free handling of the receive. It
  1415. * works for types other than dgrams.
  1416. */
  1417. skb = __skb_recv_datagram(sk, &vvs->rx_queue, MSG_DONTWAIT, &off, &err);
  1418. if (!skb) {
  1419. spin_unlock_bh(&vvs->rx_lock);
  1420. return err;
  1421. }
  1422. hdr = virtio_vsock_hdr(skb);
  1423. if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
  1424. vvs->msg_count--;
  1425. virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
  1426. spin_unlock_bh(&vvs->rx_lock);
  1427. virtio_transport_send_credit_update(vsk);
  1428. return recv_actor(sk, skb);
  1429. }
  1430. EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
  1431. int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
  1432. {
  1433. struct virtio_vsock_sock *vvs = vsk->trans;
  1434. bool send_update;
  1435. spin_lock_bh(&vvs->rx_lock);
  1436. /* If number of available bytes is less than new SO_RCVLOWAT value,
  1437. * kick sender to send more data, because sender may sleep in its
  1438. * 'send()' syscall waiting for enough space at our side. Also
  1439. * don't send credit update when peer already knows actual value -
  1440. * such transmission will be useless.
  1441. */
  1442. send_update = (vvs->rx_bytes < val) &&
  1443. (vvs->fwd_cnt != vvs->last_fwd_cnt);
  1444. spin_unlock_bh(&vvs->rx_lock);
  1445. if (send_update) {
  1446. int err;
  1447. err = virtio_transport_send_credit_update(vsk);
  1448. if (err < 0)
  1449. return err;
  1450. }
  1451. return 0;
  1452. }
  1453. EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat);
  1454. MODULE_LICENSE("GPL v2");
  1455. MODULE_AUTHOR("Asias He");
  1456. MODULE_DESCRIPTION("common code for virtio vsock");