af_vsock.c 64 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VMware vSockets Driver
  4. *
  5. * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
  6. */
  7. /* Implementation notes:
  8. *
  9. * - There are two kinds of sockets: those created by user action (such as
  10. * calling socket(2)) and those created by incoming connection request packets.
  11. *
  12. * - There are two "global" tables, one for bound sockets (sockets that have
  13. * specified an address that they are responsible for) and one for connected
  14. * sockets (sockets that have established a connection with another socket).
  15. * These tables are "global" in that all sockets on the system are placed
  16. * within them. - Note, though, that the bound table contains an extra entry
  17. * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
  18. * that list. The bound table is used solely for lookup of sockets when packets
  19. * are received and that's not necessary for SOCK_DGRAM sockets since we create
  20. * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM
  21. * sockets out of the bound hash buckets will reduce the chance of collisions
  22. * when looking for SOCK_STREAM sockets and prevents us from having to check the
  23. * socket type in the hash table lookups.
  24. *
  25. * - Sockets created by user action will either be "client" sockets that
  26. * initiate a connection or "server" sockets that listen for connections; we do
  27. * not support simultaneous connects (two "client" sockets connecting).
  28. *
  29. * - "Server" sockets are referred to as listener sockets throughout this
  30. * implementation because they are in the TCP_LISTEN state. When a
  31. * connection request is received (the second kind of socket mentioned above),
  32. * we create a new socket and refer to it as a pending socket. These pending
  33. * sockets are placed on the pending connection list of the listener socket.
  34. * When future packets are received for the address the listener socket is
  35. * bound to, we check if the source of the packet is from one that has an
  36. * existing pending connection. If it does, we process the packet for the
  37. * pending socket. When that socket reaches the connected state, it is removed
  38. * from the listener socket's pending list and enqueued in the listener
  39. * socket's accept queue. Callers of accept(2) will accept connected sockets
  40. * from the listener socket's accept queue. If the socket cannot be accepted
  41. * for some reason then it is marked rejected. Once the connection is
  42. * accepted, it is owned by the user process and the responsibility for cleanup
  43. * falls with that user process.
  44. *
  45. * - It is possible that these pending sockets will never reach the connected
  46. * state; in fact, we may never receive another packet after the connection
  47. * request. Because of this, we must schedule a cleanup function to run in the
  48. * future, after some amount of time passes where a connection should have been
  49. * established. This function ensures that the socket is off all lists so it
  50. * cannot be retrieved, then drops all references to the socket so it is cleaned
  51. * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this
  52. * function will also cleanup rejected sockets, those that reach the connected
  53. * state but leave it before they have been accepted.
  54. *
  55. * - Lock ordering for pending or accept queue sockets is:
  56. *
  57. * lock_sock(listener);
  58. * lock_sock_nested(pending, SINGLE_DEPTH_NESTING);
  59. *
  60. * Using explicit nested locking keeps lockdep happy since normally only one
  61. * lock of a given class may be taken at a time.
  62. *
  63. * - Sockets created by user action will be cleaned up when the user process
  64. * calls close(2), causing our release implementation to be called. Our release
  65. * implementation will perform some cleanup then drop the last reference so our
  66. * sk_destruct implementation is invoked. Our sk_destruct implementation will
  67. * perform additional cleanup that's common for both types of sockets.
  68. *
  69. * - A socket's reference count is what ensures that the structure won't be
  70. * freed. Each entry in a list (such as the "global" bound and connected tables
  71. * and the listener socket's pending list and connected queue) ensures a
  72. * reference. When we defer work until process context and pass a socket as our
  73. * argument, we must ensure the reference count is increased to ensure the
  74. * socket isn't freed before the function is run; the deferred function will
  75. * then drop the reference.
  76. *
  77. * - sk->sk_state uses the TCP state constants because they are widely used by
  78. * other address families and exposed to userspace tools like ss(8):
  79. *
  80. * TCP_CLOSE - unconnected
  81. * TCP_SYN_SENT - connecting
  82. * TCP_ESTABLISHED - connected
  83. * TCP_CLOSING - disconnecting
  84. * TCP_LISTEN - listening
  85. */
  86. #include <linux/compat.h>
  87. #include <linux/types.h>
  88. #include <linux/bitops.h>
  89. #include <linux/cred.h>
  90. #include <linux/errqueue.h>
  91. #include <linux/init.h>
  92. #include <linux/io.h>
  93. #include <linux/kernel.h>
  94. #include <linux/sched/signal.h>
  95. #include <linux/kmod.h>
  96. #include <linux/list.h>
  97. #include <linux/miscdevice.h>
  98. #include <linux/module.h>
  99. #include <linux/mutex.h>
  100. #include <linux/net.h>
  101. #include <linux/poll.h>
  102. #include <linux/random.h>
  103. #include <linux/skbuff.h>
  104. #include <linux/smp.h>
  105. #include <linux/socket.h>
  106. #include <linux/stddef.h>
  107. #include <linux/unistd.h>
  108. #include <linux/wait.h>
  109. #include <linux/workqueue.h>
  110. #include <net/sock.h>
  111. #include <net/af_vsock.h>
  112. #include <uapi/linux/vm_sockets.h>
  113. #include <uapi/asm-generic/ioctls.h>
  114. static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
  115. static void vsock_sk_destruct(struct sock *sk);
  116. static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
  117. static void vsock_close(struct sock *sk, long timeout);
  118. /* Protocol family. */
  119. struct proto vsock_proto = {
  120. .name = "AF_VSOCK",
  121. .owner = THIS_MODULE,
  122. .obj_size = sizeof(struct vsock_sock),
  123. .close = vsock_close,
  124. #ifdef CONFIG_BPF_SYSCALL
  125. .psock_update_sk_prot = vsock_bpf_update_proto,
  126. #endif
  127. };
  128. /* The default peer timeout indicates how long we will wait for a peer response
  129. * to a control message.
  130. */
  131. #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
  132. #define VSOCK_DEFAULT_BUFFER_SIZE (1024 * 256)
  133. #define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)
  134. #define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
  135. /* Transport used for host->guest communication */
  136. static const struct vsock_transport *transport_h2g;
  137. /* Transport used for guest->host communication */
  138. static const struct vsock_transport *transport_g2h;
  139. /* Transport used for DGRAM communication */
  140. static const struct vsock_transport *transport_dgram;
  141. /* Transport used for local communication */
  142. static const struct vsock_transport *transport_local;
  143. static DEFINE_MUTEX(vsock_register_mutex);
  144. /**** UTILS ****/
  145. /* Each bound VSocket is stored in the bind hash table and each connected
  146. * VSocket is stored in the connected hash table.
  147. *
  148. * Unbound sockets are all put on the same list attached to the end of the hash
  149. * table (vsock_unbound_sockets). Bound sockets are added to the hash table in
  150. * the bucket that their local address hashes to (vsock_bound_sockets(addr)
  151. * represents the list that addr hashes to).
  152. *
  153. * Specifically, we initialize the vsock_bind_table array to a size of
  154. * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
  155. * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
  156. * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
  157. * mods with VSOCK_HASH_SIZE to ensure this.
  158. */
  159. #define MAX_PORT_RETRIES 24
  160. #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
  161. #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
  162. #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE])
  163. /* XXX This can probably be implemented in a better way. */
  164. #define VSOCK_CONN_HASH(src, dst) \
  165. (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE)
  166. #define vsock_connected_sockets(src, dst) \
  167. (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
  168. #define vsock_connected_sockets_vsk(vsk) \
  169. vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
  170. struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
  171. EXPORT_SYMBOL_GPL(vsock_bind_table);
  172. struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
  173. EXPORT_SYMBOL_GPL(vsock_connected_table);
  174. DEFINE_SPINLOCK(vsock_table_lock);
  175. EXPORT_SYMBOL_GPL(vsock_table_lock);
  176. /* Autobind this socket to the local address if necessary. */
  177. static int vsock_auto_bind(struct vsock_sock *vsk)
  178. {
  179. struct sock *sk = sk_vsock(vsk);
  180. struct sockaddr_vm local_addr;
  181. if (vsock_addr_bound(&vsk->local_addr))
  182. return 0;
  183. vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  184. return __vsock_bind(sk, &local_addr);
  185. }
  186. static void vsock_init_tables(void)
  187. {
  188. int i;
  189. for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
  190. INIT_LIST_HEAD(&vsock_bind_table[i]);
  191. for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
  192. INIT_LIST_HEAD(&vsock_connected_table[i]);
  193. }
  194. static void __vsock_insert_bound(struct list_head *list,
  195. struct vsock_sock *vsk)
  196. {
  197. sock_hold(&vsk->sk);
  198. list_add(&vsk->bound_table, list);
  199. }
  200. static void __vsock_insert_connected(struct list_head *list,
  201. struct vsock_sock *vsk)
  202. {
  203. sock_hold(&vsk->sk);
  204. list_add(&vsk->connected_table, list);
  205. }
  206. static void __vsock_remove_bound(struct vsock_sock *vsk)
  207. {
  208. list_del_init(&vsk->bound_table);
  209. sock_put(&vsk->sk);
  210. }
  211. static void __vsock_remove_connected(struct vsock_sock *vsk)
  212. {
  213. list_del_init(&vsk->connected_table);
  214. sock_put(&vsk->sk);
  215. }
  216. static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
  217. {
  218. struct vsock_sock *vsk;
  219. list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
  220. if (vsock_addr_equals_addr(addr, &vsk->local_addr))
  221. return sk_vsock(vsk);
  222. if (addr->svm_port == vsk->local_addr.svm_port &&
  223. (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
  224. addr->svm_cid == VMADDR_CID_ANY))
  225. return sk_vsock(vsk);
  226. }
  227. return NULL;
  228. }
  229. static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
  230. struct sockaddr_vm *dst)
  231. {
  232. struct vsock_sock *vsk;
  233. list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
  234. connected_table) {
  235. if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
  236. dst->svm_port == vsk->local_addr.svm_port) {
  237. return sk_vsock(vsk);
  238. }
  239. }
  240. return NULL;
  241. }
  242. static void vsock_insert_unbound(struct vsock_sock *vsk)
  243. {
  244. spin_lock_bh(&vsock_table_lock);
  245. __vsock_insert_bound(vsock_unbound_sockets, vsk);
  246. spin_unlock_bh(&vsock_table_lock);
  247. }
  248. void vsock_insert_connected(struct vsock_sock *vsk)
  249. {
  250. struct list_head *list = vsock_connected_sockets(
  251. &vsk->remote_addr, &vsk->local_addr);
  252. spin_lock_bh(&vsock_table_lock);
  253. __vsock_insert_connected(list, vsk);
  254. spin_unlock_bh(&vsock_table_lock);
  255. }
  256. EXPORT_SYMBOL_GPL(vsock_insert_connected);
  257. void vsock_remove_bound(struct vsock_sock *vsk)
  258. {
  259. spin_lock_bh(&vsock_table_lock);
  260. if (__vsock_in_bound_table(vsk))
  261. __vsock_remove_bound(vsk);
  262. spin_unlock_bh(&vsock_table_lock);
  263. }
  264. EXPORT_SYMBOL_GPL(vsock_remove_bound);
  265. void vsock_remove_connected(struct vsock_sock *vsk)
  266. {
  267. spin_lock_bh(&vsock_table_lock);
  268. if (__vsock_in_connected_table(vsk))
  269. __vsock_remove_connected(vsk);
  270. spin_unlock_bh(&vsock_table_lock);
  271. }
  272. EXPORT_SYMBOL_GPL(vsock_remove_connected);
  273. struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
  274. {
  275. struct sock *sk;
  276. spin_lock_bh(&vsock_table_lock);
  277. sk = __vsock_find_bound_socket(addr);
  278. if (sk)
  279. sock_hold(sk);
  280. spin_unlock_bh(&vsock_table_lock);
  281. return sk;
  282. }
  283. EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
  284. struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
  285. struct sockaddr_vm *dst)
  286. {
  287. struct sock *sk;
  288. spin_lock_bh(&vsock_table_lock);
  289. sk = __vsock_find_connected_socket(src, dst);
  290. if (sk)
  291. sock_hold(sk);
  292. spin_unlock_bh(&vsock_table_lock);
  293. return sk;
  294. }
  295. EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
  296. void vsock_remove_sock(struct vsock_sock *vsk)
  297. {
  298. /* Transport reassignment must not remove the binding. */
  299. if (sock_flag(sk_vsock(vsk), SOCK_DEAD))
  300. vsock_remove_bound(vsk);
  301. vsock_remove_connected(vsk);
  302. }
  303. EXPORT_SYMBOL_GPL(vsock_remove_sock);
  304. void vsock_for_each_connected_socket(struct vsock_transport *transport,
  305. void (*fn)(struct sock *sk))
  306. {
  307. int i;
  308. spin_lock_bh(&vsock_table_lock);
  309. for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
  310. struct vsock_sock *vsk;
  311. list_for_each_entry(vsk, &vsock_connected_table[i],
  312. connected_table) {
  313. if (vsk->transport != transport)
  314. continue;
  315. fn(sk_vsock(vsk));
  316. }
  317. }
  318. spin_unlock_bh(&vsock_table_lock);
  319. }
  320. EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
  321. void vsock_add_pending(struct sock *listener, struct sock *pending)
  322. {
  323. struct vsock_sock *vlistener;
  324. struct vsock_sock *vpending;
  325. vlistener = vsock_sk(listener);
  326. vpending = vsock_sk(pending);
  327. sock_hold(pending);
  328. sock_hold(listener);
  329. list_add_tail(&vpending->pending_links, &vlistener->pending_links);
  330. }
  331. EXPORT_SYMBOL_GPL(vsock_add_pending);
  332. void vsock_remove_pending(struct sock *listener, struct sock *pending)
  333. {
  334. struct vsock_sock *vpending = vsock_sk(pending);
  335. list_del_init(&vpending->pending_links);
  336. sock_put(listener);
  337. sock_put(pending);
  338. }
  339. EXPORT_SYMBOL_GPL(vsock_remove_pending);
  340. void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
  341. {
  342. struct vsock_sock *vlistener;
  343. struct vsock_sock *vconnected;
  344. vlistener = vsock_sk(listener);
  345. vconnected = vsock_sk(connected);
  346. sock_hold(connected);
  347. sock_hold(listener);
  348. list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
  349. }
  350. EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
  351. static bool vsock_use_local_transport(unsigned int remote_cid)
  352. {
  353. if (!transport_local)
  354. return false;
  355. if (remote_cid == VMADDR_CID_LOCAL)
  356. return true;
  357. if (transport_g2h) {
  358. return remote_cid == transport_g2h->get_local_cid();
  359. } else {
  360. return remote_cid == VMADDR_CID_HOST;
  361. }
  362. }
  363. static void vsock_deassign_transport(struct vsock_sock *vsk)
  364. {
  365. if (!vsk->transport)
  366. return;
  367. vsk->transport->destruct(vsk);
  368. module_put(vsk->transport->module);
  369. vsk->transport = NULL;
  370. }
  371. /* Assign a transport to a socket and call the .init transport callback.
  372. *
  373. * Note: for connection oriented socket this must be called when vsk->remote_addr
  374. * is set (e.g. during the connect() or when a connection request on a listener
  375. * socket is received).
  376. * The vsk->remote_addr is used to decide which transport to use:
  377. * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
  378. * g2h is not loaded, will use local transport;
  379. * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field
  380. * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport;
  381. * - remote CID > VMADDR_CID_HOST will use host->guest transport;
  382. */
  383. int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
  384. {
  385. const struct vsock_transport *new_transport;
  386. struct sock *sk = sk_vsock(vsk);
  387. unsigned int remote_cid = vsk->remote_addr.svm_cid;
  388. __u8 remote_flags;
  389. int ret;
  390. /* If the packet is coming with the source and destination CIDs higher
  391. * than VMADDR_CID_HOST, then a vsock channel where all the packets are
  392. * forwarded to the host should be established. Then the host will
  393. * need to forward the packets to the guest.
  394. *
  395. * The flag is set on the (listen) receive path (psk is not NULL). On
  396. * the connect path the flag can be set by the user space application.
  397. */
  398. if (psk && vsk->local_addr.svm_cid > VMADDR_CID_HOST &&
  399. vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
  400. vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST;
  401. remote_flags = vsk->remote_addr.svm_flags;
  402. switch (sk->sk_type) {
  403. case SOCK_DGRAM:
  404. new_transport = transport_dgram;
  405. break;
  406. case SOCK_STREAM:
  407. case SOCK_SEQPACKET:
  408. if (vsock_use_local_transport(remote_cid))
  409. new_transport = transport_local;
  410. else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
  411. (remote_flags & VMADDR_FLAG_TO_HOST))
  412. new_transport = transport_g2h;
  413. else
  414. new_transport = transport_h2g;
  415. break;
  416. default:
  417. return -ESOCKTNOSUPPORT;
  418. }
  419. if (vsk->transport) {
  420. if (vsk->transport == new_transport)
  421. return 0;
  422. /* transport->release() must be called with sock lock acquired.
  423. * This path can only be taken during vsock_connect(), where we
  424. * have already held the sock lock. In the other cases, this
  425. * function is called on a new socket which is not assigned to
  426. * any transport.
  427. */
  428. vsk->transport->release(vsk);
  429. vsock_deassign_transport(vsk);
  430. /* transport's release() and destruct() can touch some socket
  431. * state, since we are reassigning the socket to a new transport
  432. * during vsock_connect(), let's reset these fields to have a
  433. * clean state.
  434. */
  435. sock_reset_flag(sk, SOCK_DONE);
  436. sk->sk_state = TCP_CLOSE;
  437. vsk->peer_shutdown = 0;
  438. }
  439. /* We increase the module refcnt to prevent the transport unloading
  440. * while there are open sockets assigned to it.
  441. */
  442. if (!new_transport || !try_module_get(new_transport->module))
  443. return -ENODEV;
  444. if (sk->sk_type == SOCK_SEQPACKET) {
  445. if (!new_transport->seqpacket_allow ||
  446. !new_transport->seqpacket_allow(remote_cid)) {
  447. module_put(new_transport->module);
  448. return -ESOCKTNOSUPPORT;
  449. }
  450. }
  451. ret = new_transport->init(vsk, psk);
  452. if (ret) {
  453. module_put(new_transport->module);
  454. return ret;
  455. }
  456. vsk->transport = new_transport;
  457. return 0;
  458. }
  459. EXPORT_SYMBOL_GPL(vsock_assign_transport);
  460. bool vsock_find_cid(unsigned int cid)
  461. {
  462. if (transport_g2h && cid == transport_g2h->get_local_cid())
  463. return true;
  464. if (transport_h2g && cid == VMADDR_CID_HOST)
  465. return true;
  466. if (transport_local && cid == VMADDR_CID_LOCAL)
  467. return true;
  468. return false;
  469. }
  470. EXPORT_SYMBOL_GPL(vsock_find_cid);
  471. static struct sock *vsock_dequeue_accept(struct sock *listener)
  472. {
  473. struct vsock_sock *vlistener;
  474. struct vsock_sock *vconnected;
  475. vlistener = vsock_sk(listener);
  476. if (list_empty(&vlistener->accept_queue))
  477. return NULL;
  478. vconnected = list_entry(vlistener->accept_queue.next,
  479. struct vsock_sock, accept_queue);
  480. list_del_init(&vconnected->accept_queue);
  481. sock_put(listener);
  482. /* The caller will need a reference on the connected socket so we let
  483. * it call sock_put().
  484. */
  485. return sk_vsock(vconnected);
  486. }
  487. static bool vsock_is_accept_queue_empty(struct sock *sk)
  488. {
  489. struct vsock_sock *vsk = vsock_sk(sk);
  490. return list_empty(&vsk->accept_queue);
  491. }
  492. static bool vsock_is_pending(struct sock *sk)
  493. {
  494. struct vsock_sock *vsk = vsock_sk(sk);
  495. return !list_empty(&vsk->pending_links);
  496. }
  497. static int vsock_send_shutdown(struct sock *sk, int mode)
  498. {
  499. struct vsock_sock *vsk = vsock_sk(sk);
  500. if (!vsk->transport)
  501. return -ENODEV;
  502. return vsk->transport->shutdown(vsk, mode);
  503. }
  504. static void vsock_pending_work(struct work_struct *work)
  505. {
  506. struct sock *sk;
  507. struct sock *listener;
  508. struct vsock_sock *vsk;
  509. bool cleanup;
  510. vsk = container_of(work, struct vsock_sock, pending_work.work);
  511. sk = sk_vsock(vsk);
  512. listener = vsk->listener;
  513. cleanup = true;
  514. lock_sock(listener);
  515. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  516. if (vsock_is_pending(sk)) {
  517. vsock_remove_pending(listener, sk);
  518. sk_acceptq_removed(listener);
  519. } else if (!vsk->rejected) {
  520. /* We are not on the pending list and accept() did not reject
  521. * us, so we must have been accepted by our user process. We
  522. * just need to drop our references to the sockets and be on
  523. * our way.
  524. */
  525. cleanup = false;
  526. goto out;
  527. }
  528. /* We need to remove ourself from the global connected sockets list so
  529. * incoming packets can't find this socket, and to reduce the reference
  530. * count.
  531. */
  532. vsock_remove_connected(vsk);
  533. sk->sk_state = TCP_CLOSE;
  534. out:
  535. release_sock(sk);
  536. release_sock(listener);
  537. if (cleanup)
  538. sock_put(sk);
  539. sock_put(sk);
  540. sock_put(listener);
  541. }
  542. /**** SOCKET OPERATIONS ****/
  543. static int __vsock_bind_connectible(struct vsock_sock *vsk,
  544. struct sockaddr_vm *addr)
  545. {
  546. static u32 port;
  547. struct sockaddr_vm new_addr;
  548. if (!port)
  549. port = get_random_u32_above(LAST_RESERVED_PORT);
  550. vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
  551. if (addr->svm_port == VMADDR_PORT_ANY) {
  552. bool found = false;
  553. unsigned int i;
  554. for (i = 0; i < MAX_PORT_RETRIES; i++) {
  555. if (port <= LAST_RESERVED_PORT)
  556. port = LAST_RESERVED_PORT + 1;
  557. new_addr.svm_port = port++;
  558. if (!__vsock_find_bound_socket(&new_addr)) {
  559. found = true;
  560. break;
  561. }
  562. }
  563. if (!found)
  564. return -EADDRNOTAVAIL;
  565. } else {
  566. /* If port is in reserved range, ensure caller
  567. * has necessary privileges.
  568. */
  569. if (addr->svm_port <= LAST_RESERVED_PORT &&
  570. !capable(CAP_NET_BIND_SERVICE)) {
  571. return -EACCES;
  572. }
  573. if (__vsock_find_bound_socket(&new_addr))
  574. return -EADDRINUSE;
  575. }
  576. vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
  577. /* Remove connection oriented sockets from the unbound list and add them
  578. * to the hash table for easy lookup by its address. The unbound list
  579. * is simply an extra entry at the end of the hash table, a trick used
  580. * by AF_UNIX.
  581. */
  582. __vsock_remove_bound(vsk);
  583. __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
  584. return 0;
  585. }
  586. static int __vsock_bind_dgram(struct vsock_sock *vsk,
  587. struct sockaddr_vm *addr)
  588. {
  589. return vsk->transport->dgram_bind(vsk, addr);
  590. }
  591. static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
  592. {
  593. struct vsock_sock *vsk = vsock_sk(sk);
  594. int retval;
  595. /* First ensure this socket isn't already bound. */
  596. if (vsock_addr_bound(&vsk->local_addr))
  597. return -EINVAL;
  598. /* Now bind to the provided address or select appropriate values if
  599. * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that
  600. * like AF_INET prevents binding to a non-local IP address (in most
  601. * cases), we only allow binding to a local CID.
  602. */
  603. if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid))
  604. return -EADDRNOTAVAIL;
  605. switch (sk->sk_socket->type) {
  606. case SOCK_STREAM:
  607. case SOCK_SEQPACKET:
  608. spin_lock_bh(&vsock_table_lock);
  609. retval = __vsock_bind_connectible(vsk, addr);
  610. spin_unlock_bh(&vsock_table_lock);
  611. break;
  612. case SOCK_DGRAM:
  613. retval = __vsock_bind_dgram(vsk, addr);
  614. break;
  615. default:
  616. retval = -EINVAL;
  617. break;
  618. }
  619. return retval;
  620. }
  621. static void vsock_connect_timeout(struct work_struct *work);
  622. static struct sock *__vsock_create(struct net *net,
  623. struct socket *sock,
  624. struct sock *parent,
  625. gfp_t priority,
  626. unsigned short type,
  627. int kern)
  628. {
  629. struct sock *sk;
  630. struct vsock_sock *psk;
  631. struct vsock_sock *vsk;
  632. sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern);
  633. if (!sk)
  634. return NULL;
  635. sock_init_data(sock, sk);
  636. /* sk->sk_type is normally set in sock_init_data, but only if sock is
  637. * non-NULL. We make sure that our sockets always have a type by
  638. * setting it here if needed.
  639. */
  640. if (!sock)
  641. sk->sk_type = type;
  642. vsk = vsock_sk(sk);
  643. vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  644. vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  645. sk->sk_destruct = vsock_sk_destruct;
  646. sk->sk_backlog_rcv = vsock_queue_rcv_skb;
  647. sock_reset_flag(sk, SOCK_DONE);
  648. INIT_LIST_HEAD(&vsk->bound_table);
  649. INIT_LIST_HEAD(&vsk->connected_table);
  650. vsk->listener = NULL;
  651. INIT_LIST_HEAD(&vsk->pending_links);
  652. INIT_LIST_HEAD(&vsk->accept_queue);
  653. vsk->rejected = false;
  654. vsk->sent_request = false;
  655. vsk->ignore_connecting_rst = false;
  656. vsk->peer_shutdown = 0;
  657. INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
  658. INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
  659. psk = parent ? vsock_sk(parent) : NULL;
  660. if (parent) {
  661. vsk->trusted = psk->trusted;
  662. vsk->owner = get_cred(psk->owner);
  663. vsk->connect_timeout = psk->connect_timeout;
  664. vsk->buffer_size = psk->buffer_size;
  665. vsk->buffer_min_size = psk->buffer_min_size;
  666. vsk->buffer_max_size = psk->buffer_max_size;
  667. security_sk_clone(parent, sk);
  668. } else {
  669. vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
  670. vsk->owner = get_current_cred();
  671. vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
  672. vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
  673. vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
  674. vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE;
  675. }
  676. return sk;
  677. }
  678. static bool sock_type_connectible(u16 type)
  679. {
  680. return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET);
  681. }
  682. static void __vsock_release(struct sock *sk, int level)
  683. {
  684. struct vsock_sock *vsk;
  685. struct sock *pending;
  686. vsk = vsock_sk(sk);
  687. pending = NULL; /* Compiler warning. */
  688. /* When "level" is SINGLE_DEPTH_NESTING, use the nested
  689. * version to avoid the warning "possible recursive locking
  690. * detected". When "level" is 0, lock_sock_nested(sk, level)
  691. * is the same as lock_sock(sk).
  692. */
  693. lock_sock_nested(sk, level);
  694. /* Indicate to vsock_remove_sock() that the socket is being released and
  695. * can be removed from the bound_table. Unlike transport reassignment
  696. * case, where the socket must remain bound despite vsock_remove_sock()
  697. * being called from the transport release() callback.
  698. */
  699. sock_set_flag(sk, SOCK_DEAD);
  700. if (vsk->transport)
  701. vsk->transport->release(vsk);
  702. else if (sock_type_connectible(sk->sk_type))
  703. vsock_remove_sock(vsk);
  704. sock_orphan(sk);
  705. sk->sk_shutdown = SHUTDOWN_MASK;
  706. skb_queue_purge(&sk->sk_receive_queue);
  707. /* Clean up any sockets that never were accepted. */
  708. while ((pending = vsock_dequeue_accept(sk)) != NULL) {
  709. __vsock_release(pending, SINGLE_DEPTH_NESTING);
  710. sock_put(pending);
  711. }
  712. release_sock(sk);
  713. sock_put(sk);
  714. }
  715. static void vsock_sk_destruct(struct sock *sk)
  716. {
  717. struct vsock_sock *vsk = vsock_sk(sk);
  718. /* Flush MSG_ZEROCOPY leftovers. */
  719. __skb_queue_purge(&sk->sk_error_queue);
  720. vsock_deassign_transport(vsk);
  721. /* When clearing these addresses, there's no need to set the family and
  722. * possibly register the address family with the kernel.
  723. */
  724. vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  725. vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  726. put_cred(vsk->owner);
  727. }
  728. static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  729. {
  730. int err;
  731. err = sock_queue_rcv_skb(sk, skb);
  732. if (err)
  733. kfree_skb(skb);
  734. return err;
  735. }
  736. struct sock *vsock_create_connected(struct sock *parent)
  737. {
  738. return __vsock_create(sock_net(parent), NULL, parent, GFP_KERNEL,
  739. parent->sk_type, 0);
  740. }
  741. EXPORT_SYMBOL_GPL(vsock_create_connected);
  742. s64 vsock_stream_has_data(struct vsock_sock *vsk)
  743. {
  744. if (WARN_ON(!vsk->transport))
  745. return 0;
  746. return vsk->transport->stream_has_data(vsk);
  747. }
  748. EXPORT_SYMBOL_GPL(vsock_stream_has_data);
  749. s64 vsock_connectible_has_data(struct vsock_sock *vsk)
  750. {
  751. struct sock *sk = sk_vsock(vsk);
  752. if (WARN_ON(!vsk->transport))
  753. return 0;
  754. if (sk->sk_type == SOCK_SEQPACKET)
  755. return vsk->transport->seqpacket_has_data(vsk);
  756. else
  757. return vsock_stream_has_data(vsk);
  758. }
  759. EXPORT_SYMBOL_GPL(vsock_connectible_has_data);
  760. s64 vsock_stream_has_space(struct vsock_sock *vsk)
  761. {
  762. if (WARN_ON(!vsk->transport))
  763. return 0;
  764. return vsk->transport->stream_has_space(vsk);
  765. }
  766. EXPORT_SYMBOL_GPL(vsock_stream_has_space);
  767. void vsock_data_ready(struct sock *sk)
  768. {
  769. struct vsock_sock *vsk = vsock_sk(sk);
  770. if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat ||
  771. sock_flag(sk, SOCK_DONE))
  772. sk->sk_data_ready(sk);
  773. }
  774. EXPORT_SYMBOL_GPL(vsock_data_ready);
  775. /* Dummy callback required by sockmap.
  776. * See unconditional call of saved_close() in sock_map_close().
  777. */
  778. static void vsock_close(struct sock *sk, long timeout)
  779. {
  780. }
  781. static int vsock_release(struct socket *sock)
  782. {
  783. struct sock *sk = sock->sk;
  784. if (!sk)
  785. return 0;
  786. sk->sk_prot->close(sk, 0);
  787. __vsock_release(sk, 0);
  788. sock->sk = NULL;
  789. sock->state = SS_FREE;
  790. return 0;
  791. }
  792. static int
  793. vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
  794. {
  795. int err;
  796. struct sock *sk;
  797. struct sockaddr_vm *vm_addr;
  798. sk = sock->sk;
  799. if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
  800. return -EINVAL;
  801. lock_sock(sk);
  802. err = __vsock_bind(sk, vm_addr);
  803. release_sock(sk);
  804. return err;
  805. }
  806. static int vsock_getname(struct socket *sock,
  807. struct sockaddr *addr, int peer)
  808. {
  809. int err;
  810. struct sock *sk;
  811. struct vsock_sock *vsk;
  812. struct sockaddr_vm *vm_addr;
  813. sk = sock->sk;
  814. vsk = vsock_sk(sk);
  815. err = 0;
  816. lock_sock(sk);
  817. if (peer) {
  818. if (sock->state != SS_CONNECTED) {
  819. err = -ENOTCONN;
  820. goto out;
  821. }
  822. vm_addr = &vsk->remote_addr;
  823. } else {
  824. vm_addr = &vsk->local_addr;
  825. }
  826. if (!vm_addr) {
  827. err = -EINVAL;
  828. goto out;
  829. }
  830. /* sys_getsockname() and sys_getpeername() pass us a
  831. * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately
  832. * that macro is defined in socket.c instead of .h, so we hardcode its
  833. * value here.
  834. */
  835. BUILD_BUG_ON(sizeof(*vm_addr) > 128);
  836. memcpy(addr, vm_addr, sizeof(*vm_addr));
  837. err = sizeof(*vm_addr);
  838. out:
  839. release_sock(sk);
  840. return err;
  841. }
  842. static int vsock_shutdown(struct socket *sock, int mode)
  843. {
  844. int err;
  845. struct sock *sk;
  846. /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
  847. * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode
  848. * here like the other address families do. Note also that the
  849. * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3),
  850. * which is what we want.
  851. */
  852. mode++;
  853. if ((mode & ~SHUTDOWN_MASK) || !mode)
  854. return -EINVAL;
  855. /* If this is a connection oriented socket and it is not connected then
  856. * bail out immediately. If it is a DGRAM socket then we must first
  857. * kick the socket so that it wakes up from any sleeping calls, for
  858. * example recv(), and then afterwards return the error.
  859. */
  860. sk = sock->sk;
  861. lock_sock(sk);
  862. if (sock->state == SS_UNCONNECTED) {
  863. err = -ENOTCONN;
  864. if (sock_type_connectible(sk->sk_type))
  865. goto out;
  866. } else {
  867. sock->state = SS_DISCONNECTING;
  868. err = 0;
  869. }
  870. /* Receive and send shutdowns are treated alike. */
  871. mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
  872. if (mode) {
  873. sk->sk_shutdown |= mode;
  874. sk->sk_state_change(sk);
  875. if (sock_type_connectible(sk->sk_type)) {
  876. sock_reset_flag(sk, SOCK_DONE);
  877. vsock_send_shutdown(sk, mode);
  878. }
  879. }
  880. out:
  881. release_sock(sk);
  882. return err;
  883. }
  884. static __poll_t vsock_poll(struct file *file, struct socket *sock,
  885. poll_table *wait)
  886. {
  887. struct sock *sk;
  888. __poll_t mask;
  889. struct vsock_sock *vsk;
  890. sk = sock->sk;
  891. vsk = vsock_sk(sk);
  892. poll_wait(file, sk_sleep(sk), wait);
  893. mask = 0;
  894. if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
  895. /* Signify that there has been an error on this socket. */
  896. mask |= EPOLLERR;
  897. /* INET sockets treat local write shutdown and peer write shutdown as a
  898. * case of EPOLLHUP set.
  899. */
  900. if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
  901. ((sk->sk_shutdown & SEND_SHUTDOWN) &&
  902. (vsk->peer_shutdown & SEND_SHUTDOWN))) {
  903. mask |= EPOLLHUP;
  904. }
  905. if (sk->sk_shutdown & RCV_SHUTDOWN ||
  906. vsk->peer_shutdown & SEND_SHUTDOWN) {
  907. mask |= EPOLLRDHUP;
  908. }
  909. if (sk_is_readable(sk))
  910. mask |= EPOLLIN | EPOLLRDNORM;
  911. if (sock->type == SOCK_DGRAM) {
  912. /* For datagram sockets we can read if there is something in
  913. * the queue and write as long as the socket isn't shutdown for
  914. * sending.
  915. */
  916. if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
  917. (sk->sk_shutdown & RCV_SHUTDOWN)) {
  918. mask |= EPOLLIN | EPOLLRDNORM;
  919. }
  920. if (!(sk->sk_shutdown & SEND_SHUTDOWN))
  921. mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  922. } else if (sock_type_connectible(sk->sk_type)) {
  923. const struct vsock_transport *transport;
  924. lock_sock(sk);
  925. transport = vsk->transport;
  926. /* Listening sockets that have connections in their accept
  927. * queue can be read.
  928. */
  929. if (sk->sk_state == TCP_LISTEN
  930. && !vsock_is_accept_queue_empty(sk))
  931. mask |= EPOLLIN | EPOLLRDNORM;
  932. /* If there is something in the queue then we can read. */
  933. if (transport && transport->stream_is_active(vsk) &&
  934. !(sk->sk_shutdown & RCV_SHUTDOWN)) {
  935. bool data_ready_now = false;
  936. int target = sock_rcvlowat(sk, 0, INT_MAX);
  937. int ret = transport->notify_poll_in(
  938. vsk, target, &data_ready_now);
  939. if (ret < 0) {
  940. mask |= EPOLLERR;
  941. } else {
  942. if (data_ready_now)
  943. mask |= EPOLLIN | EPOLLRDNORM;
  944. }
  945. }
  946. /* Sockets whose connections have been closed, reset, or
  947. * terminated should also be considered read, and we check the
  948. * shutdown flag for that.
  949. */
  950. if (sk->sk_shutdown & RCV_SHUTDOWN ||
  951. vsk->peer_shutdown & SEND_SHUTDOWN) {
  952. mask |= EPOLLIN | EPOLLRDNORM;
  953. }
  954. /* Connected sockets that can produce data can be written. */
  955. if (transport && sk->sk_state == TCP_ESTABLISHED) {
  956. if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
  957. bool space_avail_now = false;
  958. int ret = transport->notify_poll_out(
  959. vsk, 1, &space_avail_now);
  960. if (ret < 0) {
  961. mask |= EPOLLERR;
  962. } else {
  963. if (space_avail_now)
  964. /* Remove EPOLLWRBAND since INET
  965. * sockets are not setting it.
  966. */
  967. mask |= EPOLLOUT | EPOLLWRNORM;
  968. }
  969. }
  970. }
  971. /* Simulate INET socket poll behaviors, which sets
  972. * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read,
  973. * but local send is not shutdown.
  974. */
  975. if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
  976. if (!(sk->sk_shutdown & SEND_SHUTDOWN))
  977. mask |= EPOLLOUT | EPOLLWRNORM;
  978. }
  979. release_sock(sk);
  980. }
  981. return mask;
  982. }
  983. static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
  984. {
  985. struct vsock_sock *vsk = vsock_sk(sk);
  986. return vsk->transport->read_skb(vsk, read_actor);
  987. }
  988. static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
  989. size_t len)
  990. {
  991. int err;
  992. struct sock *sk;
  993. struct vsock_sock *vsk;
  994. struct sockaddr_vm *remote_addr;
  995. const struct vsock_transport *transport;
  996. if (msg->msg_flags & MSG_OOB)
  997. return -EOPNOTSUPP;
  998. /* For now, MSG_DONTWAIT is always assumed... */
  999. err = 0;
  1000. sk = sock->sk;
  1001. vsk = vsock_sk(sk);
  1002. lock_sock(sk);
  1003. transport = vsk->transport;
  1004. err = vsock_auto_bind(vsk);
  1005. if (err)
  1006. goto out;
  1007. /* If the provided message contains an address, use that. Otherwise
  1008. * fall back on the socket's remote handle (if it has been connected).
  1009. */
  1010. if (msg->msg_name &&
  1011. vsock_addr_cast(msg->msg_name, msg->msg_namelen,
  1012. &remote_addr) == 0) {
  1013. /* Ensure this address is of the right type and is a valid
  1014. * destination.
  1015. */
  1016. if (remote_addr->svm_cid == VMADDR_CID_ANY)
  1017. remote_addr->svm_cid = transport->get_local_cid();
  1018. if (!vsock_addr_bound(remote_addr)) {
  1019. err = -EINVAL;
  1020. goto out;
  1021. }
  1022. } else if (sock->state == SS_CONNECTED) {
  1023. remote_addr = &vsk->remote_addr;
  1024. if (remote_addr->svm_cid == VMADDR_CID_ANY)
  1025. remote_addr->svm_cid = transport->get_local_cid();
  1026. /* XXX Should connect() or this function ensure remote_addr is
  1027. * bound?
  1028. */
  1029. if (!vsock_addr_bound(&vsk->remote_addr)) {
  1030. err = -EINVAL;
  1031. goto out;
  1032. }
  1033. } else {
  1034. err = -EINVAL;
  1035. goto out;
  1036. }
  1037. if (!transport->dgram_allow(remote_addr->svm_cid,
  1038. remote_addr->svm_port)) {
  1039. err = -EINVAL;
  1040. goto out;
  1041. }
  1042. err = transport->dgram_enqueue(vsk, remote_addr, msg, len);
  1043. out:
  1044. release_sock(sk);
  1045. return err;
  1046. }
  1047. static int vsock_dgram_connect(struct socket *sock,
  1048. struct sockaddr *addr, int addr_len, int flags)
  1049. {
  1050. int err;
  1051. struct sock *sk;
  1052. struct vsock_sock *vsk;
  1053. struct sockaddr_vm *remote_addr;
  1054. sk = sock->sk;
  1055. vsk = vsock_sk(sk);
  1056. err = vsock_addr_cast(addr, addr_len, &remote_addr);
  1057. if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
  1058. lock_sock(sk);
  1059. vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
  1060. VMADDR_PORT_ANY);
  1061. sock->state = SS_UNCONNECTED;
  1062. release_sock(sk);
  1063. return 0;
  1064. } else if (err != 0)
  1065. return -EINVAL;
  1066. lock_sock(sk);
  1067. err = vsock_auto_bind(vsk);
  1068. if (err)
  1069. goto out;
  1070. if (!vsk->transport->dgram_allow(remote_addr->svm_cid,
  1071. remote_addr->svm_port)) {
  1072. err = -EINVAL;
  1073. goto out;
  1074. }
  1075. memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
  1076. sock->state = SS_CONNECTED;
  1077. /* sock map disallows redirection of non-TCP sockets with sk_state !=
  1078. * TCP_ESTABLISHED (see sock_map_redirect_allowed()), so we set
  1079. * TCP_ESTABLISHED here to allow redirection of connected vsock dgrams.
  1080. *
  1081. * This doesn't seem to be abnormal state for datagram sockets, as the
  1082. * same approach can be see in other datagram socket types as well
  1083. * (such as unix sockets).
  1084. */
  1085. sk->sk_state = TCP_ESTABLISHED;
  1086. out:
  1087. release_sock(sk);
  1088. return err;
  1089. }
  1090. int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
  1091. size_t len, int flags)
  1092. {
  1093. struct sock *sk = sock->sk;
  1094. struct vsock_sock *vsk = vsock_sk(sk);
  1095. return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
  1096. }
  1097. int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
  1098. size_t len, int flags)
  1099. {
  1100. #ifdef CONFIG_BPF_SYSCALL
  1101. struct sock *sk = sock->sk;
  1102. const struct proto *prot;
  1103. prot = READ_ONCE(sk->sk_prot);
  1104. if (prot != &vsock_proto)
  1105. return prot->recvmsg(sk, msg, len, flags, NULL);
  1106. #endif
  1107. return __vsock_dgram_recvmsg(sock, msg, len, flags);
  1108. }
  1109. EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
  1110. static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
  1111. int __user *arg)
  1112. {
  1113. struct sock *sk = sock->sk;
  1114. struct vsock_sock *vsk;
  1115. int ret;
  1116. vsk = vsock_sk(sk);
  1117. switch (cmd) {
  1118. case SIOCOUTQ: {
  1119. ssize_t n_bytes;
  1120. if (!vsk->transport || !vsk->transport->unsent_bytes) {
  1121. ret = -EOPNOTSUPP;
  1122. break;
  1123. }
  1124. if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
  1125. ret = -EINVAL;
  1126. break;
  1127. }
  1128. n_bytes = vsk->transport->unsent_bytes(vsk);
  1129. if (n_bytes < 0) {
  1130. ret = n_bytes;
  1131. break;
  1132. }
  1133. ret = put_user(n_bytes, arg);
  1134. break;
  1135. }
  1136. default:
  1137. ret = -ENOIOCTLCMD;
  1138. }
  1139. return ret;
  1140. }
  1141. static int vsock_ioctl(struct socket *sock, unsigned int cmd,
  1142. unsigned long arg)
  1143. {
  1144. int ret;
  1145. lock_sock(sock->sk);
  1146. ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
  1147. release_sock(sock->sk);
  1148. return ret;
  1149. }
  1150. static const struct proto_ops vsock_dgram_ops = {
  1151. .family = PF_VSOCK,
  1152. .owner = THIS_MODULE,
  1153. .release = vsock_release,
  1154. .bind = vsock_bind,
  1155. .connect = vsock_dgram_connect,
  1156. .socketpair = sock_no_socketpair,
  1157. .accept = sock_no_accept,
  1158. .getname = vsock_getname,
  1159. .poll = vsock_poll,
  1160. .ioctl = vsock_ioctl,
  1161. .listen = sock_no_listen,
  1162. .shutdown = vsock_shutdown,
  1163. .sendmsg = vsock_dgram_sendmsg,
  1164. .recvmsg = vsock_dgram_recvmsg,
  1165. .mmap = sock_no_mmap,
  1166. .read_skb = vsock_read_skb,
  1167. };
  1168. static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
  1169. {
  1170. const struct vsock_transport *transport = vsk->transport;
  1171. if (!transport || !transport->cancel_pkt)
  1172. return -EOPNOTSUPP;
  1173. return transport->cancel_pkt(vsk);
  1174. }
  1175. static void vsock_connect_timeout(struct work_struct *work)
  1176. {
  1177. struct sock *sk;
  1178. struct vsock_sock *vsk;
  1179. vsk = container_of(work, struct vsock_sock, connect_work.work);
  1180. sk = sk_vsock(vsk);
  1181. lock_sock(sk);
  1182. if (sk->sk_state == TCP_SYN_SENT &&
  1183. (sk->sk_shutdown != SHUTDOWN_MASK)) {
  1184. sk->sk_state = TCP_CLOSE;
  1185. sk->sk_socket->state = SS_UNCONNECTED;
  1186. sk->sk_err = ETIMEDOUT;
  1187. sk_error_report(sk);
  1188. vsock_transport_cancel_pkt(vsk);
  1189. }
  1190. release_sock(sk);
  1191. sock_put(sk);
  1192. }
  1193. static int vsock_connect(struct socket *sock, struct sockaddr *addr,
  1194. int addr_len, int flags)
  1195. {
  1196. int err;
  1197. struct sock *sk;
  1198. struct vsock_sock *vsk;
  1199. const struct vsock_transport *transport;
  1200. struct sockaddr_vm *remote_addr;
  1201. long timeout;
  1202. DEFINE_WAIT(wait);
  1203. err = 0;
  1204. sk = sock->sk;
  1205. vsk = vsock_sk(sk);
  1206. lock_sock(sk);
  1207. /* XXX AF_UNSPEC should make us disconnect like AF_INET. */
  1208. switch (sock->state) {
  1209. case SS_CONNECTED:
  1210. err = -EISCONN;
  1211. goto out;
  1212. case SS_DISCONNECTING:
  1213. err = -EINVAL;
  1214. goto out;
  1215. case SS_CONNECTING:
  1216. /* This continues on so we can move sock into the SS_CONNECTED
  1217. * state once the connection has completed (at which point err
  1218. * will be set to zero also). Otherwise, we will either wait
  1219. * for the connection or return -EALREADY should this be a
  1220. * non-blocking call.
  1221. */
  1222. err = -EALREADY;
  1223. if (flags & O_NONBLOCK)
  1224. goto out;
  1225. break;
  1226. default:
  1227. if ((sk->sk_state == TCP_LISTEN) ||
  1228. vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
  1229. err = -EINVAL;
  1230. goto out;
  1231. }
  1232. /* Set the remote address that we are connecting to. */
  1233. memcpy(&vsk->remote_addr, remote_addr,
  1234. sizeof(vsk->remote_addr));
  1235. err = vsock_assign_transport(vsk, NULL);
  1236. if (err)
  1237. goto out;
  1238. transport = vsk->transport;
  1239. /* The hypervisor and well-known contexts do not have socket
  1240. * endpoints.
  1241. */
  1242. if (!transport ||
  1243. !transport->stream_allow(remote_addr->svm_cid,
  1244. remote_addr->svm_port)) {
  1245. err = -ENETUNREACH;
  1246. goto out;
  1247. }
  1248. if (vsock_msgzerocopy_allow(transport)) {
  1249. set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
  1250. } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
  1251. /* If this option was set before 'connect()',
  1252. * when transport was unknown, check that this
  1253. * feature is supported here.
  1254. */
  1255. err = -EOPNOTSUPP;
  1256. goto out;
  1257. }
  1258. err = vsock_auto_bind(vsk);
  1259. if (err)
  1260. goto out;
  1261. sk->sk_state = TCP_SYN_SENT;
  1262. err = transport->connect(vsk);
  1263. if (err < 0)
  1264. goto out;
  1265. /* sk_err might have been set as a result of an earlier
  1266. * (failed) connect attempt.
  1267. */
  1268. sk->sk_err = 0;
  1269. /* Mark sock as connecting and set the error code to in
  1270. * progress in case this is a non-blocking connect.
  1271. */
  1272. sock->state = SS_CONNECTING;
  1273. err = -EINPROGRESS;
  1274. }
  1275. /* The receive path will handle all communication until we are able to
  1276. * enter the connected state. Here we wait for the connection to be
  1277. * completed or a notification of an error.
  1278. */
  1279. timeout = vsk->connect_timeout;
  1280. prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
  1281. while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
  1282. if (flags & O_NONBLOCK) {
  1283. /* If we're not going to block, we schedule a timeout
  1284. * function to generate a timeout on the connection
  1285. * attempt, in case the peer doesn't respond in a
  1286. * timely manner. We hold on to the socket until the
  1287. * timeout fires.
  1288. */
  1289. sock_hold(sk);
  1290. /* If the timeout function is already scheduled,
  1291. * reschedule it, then ungrab the socket refcount to
  1292. * keep it balanced.
  1293. */
  1294. if (mod_delayed_work(system_wq, &vsk->connect_work,
  1295. timeout))
  1296. sock_put(sk);
  1297. /* Skip ahead to preserve error code set above. */
  1298. goto out_wait;
  1299. }
  1300. release_sock(sk);
  1301. timeout = schedule_timeout(timeout);
  1302. lock_sock(sk);
  1303. if (signal_pending(current)) {
  1304. err = sock_intr_errno(timeout);
  1305. sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
  1306. sock->state = SS_UNCONNECTED;
  1307. vsock_transport_cancel_pkt(vsk);
  1308. vsock_remove_connected(vsk);
  1309. goto out_wait;
  1310. } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) {
  1311. err = -ETIMEDOUT;
  1312. sk->sk_state = TCP_CLOSE;
  1313. sock->state = SS_UNCONNECTED;
  1314. vsock_transport_cancel_pkt(vsk);
  1315. goto out_wait;
  1316. }
  1317. prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
  1318. }
  1319. if (sk->sk_err) {
  1320. err = -sk->sk_err;
  1321. sk->sk_state = TCP_CLOSE;
  1322. sock->state = SS_UNCONNECTED;
  1323. } else {
  1324. err = 0;
  1325. }
  1326. out_wait:
  1327. finish_wait(sk_sleep(sk), &wait);
  1328. out:
  1329. release_sock(sk);
  1330. return err;
  1331. }
  1332. static int vsock_accept(struct socket *sock, struct socket *newsock,
  1333. struct proto_accept_arg *arg)
  1334. {
  1335. struct sock *listener;
  1336. int err;
  1337. struct sock *connected;
  1338. struct vsock_sock *vconnected;
  1339. long timeout;
  1340. DEFINE_WAIT(wait);
  1341. err = 0;
  1342. listener = sock->sk;
  1343. lock_sock(listener);
  1344. if (!sock_type_connectible(sock->type)) {
  1345. err = -EOPNOTSUPP;
  1346. goto out;
  1347. }
  1348. if (listener->sk_state != TCP_LISTEN) {
  1349. err = -EINVAL;
  1350. goto out;
  1351. }
  1352. /* Wait for children sockets to appear; these are the new sockets
  1353. * created upon connection establishment.
  1354. */
  1355. timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK);
  1356. prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
  1357. while ((connected = vsock_dequeue_accept(listener)) == NULL &&
  1358. listener->sk_err == 0) {
  1359. release_sock(listener);
  1360. timeout = schedule_timeout(timeout);
  1361. finish_wait(sk_sleep(listener), &wait);
  1362. lock_sock(listener);
  1363. if (signal_pending(current)) {
  1364. err = sock_intr_errno(timeout);
  1365. goto out;
  1366. } else if (timeout == 0) {
  1367. err = -EAGAIN;
  1368. goto out;
  1369. }
  1370. prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
  1371. }
  1372. finish_wait(sk_sleep(listener), &wait);
  1373. if (listener->sk_err)
  1374. err = -listener->sk_err;
  1375. if (connected) {
  1376. sk_acceptq_removed(listener);
  1377. lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
  1378. vconnected = vsock_sk(connected);
  1379. /* If the listener socket has received an error, then we should
  1380. * reject this socket and return. Note that we simply mark the
  1381. * socket rejected, drop our reference, and let the cleanup
  1382. * function handle the cleanup; the fact that we found it in
  1383. * the listener's accept queue guarantees that the cleanup
  1384. * function hasn't run yet.
  1385. */
  1386. if (err) {
  1387. vconnected->rejected = true;
  1388. } else {
  1389. newsock->state = SS_CONNECTED;
  1390. sock_graft(connected, newsock);
  1391. if (vsock_msgzerocopy_allow(vconnected->transport))
  1392. set_bit(SOCK_SUPPORT_ZC,
  1393. &connected->sk_socket->flags);
  1394. }
  1395. release_sock(connected);
  1396. sock_put(connected);
  1397. }
  1398. out:
  1399. release_sock(listener);
  1400. return err;
  1401. }
  1402. static int vsock_listen(struct socket *sock, int backlog)
  1403. {
  1404. int err;
  1405. struct sock *sk;
  1406. struct vsock_sock *vsk;
  1407. sk = sock->sk;
  1408. lock_sock(sk);
  1409. if (!sock_type_connectible(sk->sk_type)) {
  1410. err = -EOPNOTSUPP;
  1411. goto out;
  1412. }
  1413. if (sock->state != SS_UNCONNECTED) {
  1414. err = -EINVAL;
  1415. goto out;
  1416. }
  1417. vsk = vsock_sk(sk);
  1418. if (!vsock_addr_bound(&vsk->local_addr)) {
  1419. err = -EINVAL;
  1420. goto out;
  1421. }
  1422. sk->sk_max_ack_backlog = backlog;
  1423. sk->sk_state = TCP_LISTEN;
  1424. err = 0;
  1425. out:
  1426. release_sock(sk);
  1427. return err;
  1428. }
  1429. static void vsock_update_buffer_size(struct vsock_sock *vsk,
  1430. const struct vsock_transport *transport,
  1431. u64 val)
  1432. {
  1433. if (val > vsk->buffer_max_size)
  1434. val = vsk->buffer_max_size;
  1435. if (val < vsk->buffer_min_size)
  1436. val = vsk->buffer_min_size;
  1437. if (val != vsk->buffer_size &&
  1438. transport && transport->notify_buffer_size)
  1439. transport->notify_buffer_size(vsk, &val);
  1440. vsk->buffer_size = val;
  1441. }
  1442. static int vsock_connectible_setsockopt(struct socket *sock,
  1443. int level,
  1444. int optname,
  1445. sockptr_t optval,
  1446. unsigned int optlen)
  1447. {
  1448. int err;
  1449. struct sock *sk;
  1450. struct vsock_sock *vsk;
  1451. const struct vsock_transport *transport;
  1452. u64 val;
  1453. if (level != AF_VSOCK && level != SOL_SOCKET)
  1454. return -ENOPROTOOPT;
  1455. #define COPY_IN(_v) \
  1456. do { \
  1457. if (optlen < sizeof(_v)) { \
  1458. err = -EINVAL; \
  1459. goto exit; \
  1460. } \
  1461. if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \
  1462. err = -EFAULT; \
  1463. goto exit; \
  1464. } \
  1465. } while (0)
  1466. err = 0;
  1467. sk = sock->sk;
  1468. vsk = vsock_sk(sk);
  1469. lock_sock(sk);
  1470. transport = vsk->transport;
  1471. if (level == SOL_SOCKET) {
  1472. int zerocopy;
  1473. if (optname != SO_ZEROCOPY) {
  1474. release_sock(sk);
  1475. return sock_setsockopt(sock, level, optname, optval, optlen);
  1476. }
  1477. /* Use 'int' type here, because variable to
  1478. * set this option usually has this type.
  1479. */
  1480. COPY_IN(zerocopy);
  1481. if (zerocopy < 0 || zerocopy > 1) {
  1482. err = -EINVAL;
  1483. goto exit;
  1484. }
  1485. if (transport && !vsock_msgzerocopy_allow(transport)) {
  1486. err = -EOPNOTSUPP;
  1487. goto exit;
  1488. }
  1489. sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy);
  1490. goto exit;
  1491. }
  1492. switch (optname) {
  1493. case SO_VM_SOCKETS_BUFFER_SIZE:
  1494. COPY_IN(val);
  1495. vsock_update_buffer_size(vsk, transport, val);
  1496. break;
  1497. case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
  1498. COPY_IN(val);
  1499. vsk->buffer_max_size = val;
  1500. vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
  1501. break;
  1502. case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
  1503. COPY_IN(val);
  1504. vsk->buffer_min_size = val;
  1505. vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
  1506. break;
  1507. case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW:
  1508. case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: {
  1509. struct __kernel_sock_timeval tv;
  1510. err = sock_copy_user_timeval(&tv, optval, optlen,
  1511. optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
  1512. if (err)
  1513. break;
  1514. if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
  1515. tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
  1516. vsk->connect_timeout = tv.tv_sec * HZ +
  1517. DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ));
  1518. if (vsk->connect_timeout == 0)
  1519. vsk->connect_timeout =
  1520. VSOCK_DEFAULT_CONNECT_TIMEOUT;
  1521. } else {
  1522. err = -ERANGE;
  1523. }
  1524. break;
  1525. }
  1526. default:
  1527. err = -ENOPROTOOPT;
  1528. break;
  1529. }
  1530. #undef COPY_IN
  1531. exit:
  1532. release_sock(sk);
  1533. return err;
  1534. }
  1535. static int vsock_connectible_getsockopt(struct socket *sock,
  1536. int level, int optname,
  1537. char __user *optval,
  1538. int __user *optlen)
  1539. {
  1540. struct sock *sk = sock->sk;
  1541. struct vsock_sock *vsk = vsock_sk(sk);
  1542. union {
  1543. u64 val64;
  1544. struct old_timeval32 tm32;
  1545. struct __kernel_old_timeval tm;
  1546. struct __kernel_sock_timeval stm;
  1547. } v;
  1548. int lv = sizeof(v.val64);
  1549. int len;
  1550. if (level != AF_VSOCK)
  1551. return -ENOPROTOOPT;
  1552. if (get_user(len, optlen))
  1553. return -EFAULT;
  1554. memset(&v, 0, sizeof(v));
  1555. switch (optname) {
  1556. case SO_VM_SOCKETS_BUFFER_SIZE:
  1557. v.val64 = vsk->buffer_size;
  1558. break;
  1559. case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
  1560. v.val64 = vsk->buffer_max_size;
  1561. break;
  1562. case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
  1563. v.val64 = vsk->buffer_min_size;
  1564. break;
  1565. case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW:
  1566. case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD:
  1567. lv = sock_get_timeout(vsk->connect_timeout, &v,
  1568. optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
  1569. break;
  1570. default:
  1571. return -ENOPROTOOPT;
  1572. }
  1573. if (len < lv)
  1574. return -EINVAL;
  1575. if (len > lv)
  1576. len = lv;
  1577. if (copy_to_user(optval, &v, len))
  1578. return -EFAULT;
  1579. if (put_user(len, optlen))
  1580. return -EFAULT;
  1581. return 0;
  1582. }
  1583. static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
  1584. size_t len)
  1585. {
  1586. struct sock *sk;
  1587. struct vsock_sock *vsk;
  1588. const struct vsock_transport *transport;
  1589. ssize_t total_written;
  1590. long timeout;
  1591. int err;
  1592. struct vsock_transport_send_notify_data send_data;
  1593. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  1594. sk = sock->sk;
  1595. vsk = vsock_sk(sk);
  1596. total_written = 0;
  1597. err = 0;
  1598. if (msg->msg_flags & MSG_OOB)
  1599. return -EOPNOTSUPP;
  1600. lock_sock(sk);
  1601. transport = vsk->transport;
  1602. /* Callers should not provide a destination with connection oriented
  1603. * sockets.
  1604. */
  1605. if (msg->msg_namelen) {
  1606. err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
  1607. goto out;
  1608. }
  1609. /* Send data only if both sides are not shutdown in the direction. */
  1610. if (sk->sk_shutdown & SEND_SHUTDOWN ||
  1611. vsk->peer_shutdown & RCV_SHUTDOWN) {
  1612. err = -EPIPE;
  1613. goto out;
  1614. }
  1615. if (!transport || sk->sk_state != TCP_ESTABLISHED ||
  1616. !vsock_addr_bound(&vsk->local_addr)) {
  1617. err = -ENOTCONN;
  1618. goto out;
  1619. }
  1620. if (!vsock_addr_bound(&vsk->remote_addr)) {
  1621. err = -EDESTADDRREQ;
  1622. goto out;
  1623. }
  1624. if (msg->msg_flags & MSG_ZEROCOPY &&
  1625. !vsock_msgzerocopy_allow(transport)) {
  1626. err = -EOPNOTSUPP;
  1627. goto out;
  1628. }
  1629. /* Wait for room in the produce queue to enqueue our user's data. */
  1630. timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  1631. err = transport->notify_send_init(vsk, &send_data);
  1632. if (err < 0)
  1633. goto out;
  1634. while (total_written < len) {
  1635. ssize_t written;
  1636. add_wait_queue(sk_sleep(sk), &wait);
  1637. while (vsock_stream_has_space(vsk) == 0 &&
  1638. sk->sk_err == 0 &&
  1639. !(sk->sk_shutdown & SEND_SHUTDOWN) &&
  1640. !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
  1641. /* Don't wait for non-blocking sockets. */
  1642. if (timeout == 0) {
  1643. err = -EAGAIN;
  1644. remove_wait_queue(sk_sleep(sk), &wait);
  1645. goto out_err;
  1646. }
  1647. err = transport->notify_send_pre_block(vsk, &send_data);
  1648. if (err < 0) {
  1649. remove_wait_queue(sk_sleep(sk), &wait);
  1650. goto out_err;
  1651. }
  1652. release_sock(sk);
  1653. timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
  1654. lock_sock(sk);
  1655. if (signal_pending(current)) {
  1656. err = sock_intr_errno(timeout);
  1657. remove_wait_queue(sk_sleep(sk), &wait);
  1658. goto out_err;
  1659. } else if (timeout == 0) {
  1660. err = -EAGAIN;
  1661. remove_wait_queue(sk_sleep(sk), &wait);
  1662. goto out_err;
  1663. }
  1664. }
  1665. remove_wait_queue(sk_sleep(sk), &wait);
  1666. /* These checks occur both as part of and after the loop
  1667. * conditional since we need to check before and after
  1668. * sleeping.
  1669. */
  1670. if (sk->sk_err) {
  1671. err = -sk->sk_err;
  1672. goto out_err;
  1673. } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
  1674. (vsk->peer_shutdown & RCV_SHUTDOWN)) {
  1675. err = -EPIPE;
  1676. goto out_err;
  1677. }
  1678. err = transport->notify_send_pre_enqueue(vsk, &send_data);
  1679. if (err < 0)
  1680. goto out_err;
  1681. /* Note that enqueue will only write as many bytes as are free
  1682. * in the produce queue, so we don't need to ensure len is
  1683. * smaller than the queue size. It is the caller's
  1684. * responsibility to check how many bytes we were able to send.
  1685. */
  1686. if (sk->sk_type == SOCK_SEQPACKET) {
  1687. written = transport->seqpacket_enqueue(vsk,
  1688. msg, len - total_written);
  1689. } else {
  1690. written = transport->stream_enqueue(vsk,
  1691. msg, len - total_written);
  1692. }
  1693. if (written < 0) {
  1694. err = written;
  1695. goto out_err;
  1696. }
  1697. total_written += written;
  1698. err = transport->notify_send_post_enqueue(
  1699. vsk, written, &send_data);
  1700. if (err < 0)
  1701. goto out_err;
  1702. }
  1703. out_err:
  1704. if (total_written > 0) {
  1705. /* Return number of written bytes only if:
  1706. * 1) SOCK_STREAM socket.
  1707. * 2) SOCK_SEQPACKET socket when whole buffer is sent.
  1708. */
  1709. if (sk->sk_type == SOCK_STREAM || total_written == len)
  1710. err = total_written;
  1711. }
  1712. out:
  1713. if (sk->sk_type == SOCK_STREAM)
  1714. err = sk_stream_error(sk, msg->msg_flags, err);
  1715. release_sock(sk);
  1716. return err;
  1717. }
  1718. static int vsock_connectible_wait_data(struct sock *sk,
  1719. struct wait_queue_entry *wait,
  1720. long timeout,
  1721. struct vsock_transport_recv_notify_data *recv_data,
  1722. size_t target)
  1723. {
  1724. const struct vsock_transport *transport;
  1725. struct vsock_sock *vsk;
  1726. s64 data;
  1727. int err;
  1728. vsk = vsock_sk(sk);
  1729. err = 0;
  1730. transport = vsk->transport;
  1731. while (1) {
  1732. prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
  1733. data = vsock_connectible_has_data(vsk);
  1734. if (data != 0)
  1735. break;
  1736. if (sk->sk_err != 0 ||
  1737. (sk->sk_shutdown & RCV_SHUTDOWN) ||
  1738. (vsk->peer_shutdown & SEND_SHUTDOWN)) {
  1739. break;
  1740. }
  1741. /* Don't wait for non-blocking sockets. */
  1742. if (timeout == 0) {
  1743. err = -EAGAIN;
  1744. break;
  1745. }
  1746. if (recv_data) {
  1747. err = transport->notify_recv_pre_block(vsk, target, recv_data);
  1748. if (err < 0)
  1749. break;
  1750. }
  1751. release_sock(sk);
  1752. timeout = schedule_timeout(timeout);
  1753. lock_sock(sk);
  1754. if (signal_pending(current)) {
  1755. err = sock_intr_errno(timeout);
  1756. break;
  1757. } else if (timeout == 0) {
  1758. err = -EAGAIN;
  1759. break;
  1760. }
  1761. }
  1762. finish_wait(sk_sleep(sk), wait);
  1763. if (err)
  1764. return err;
  1765. /* Internal transport error when checking for available
  1766. * data. XXX This should be changed to a connection
  1767. * reset in a later change.
  1768. */
  1769. if (data < 0)
  1770. return -ENOMEM;
  1771. return data;
  1772. }
  1773. static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
  1774. size_t len, int flags)
  1775. {
  1776. struct vsock_transport_recv_notify_data recv_data;
  1777. const struct vsock_transport *transport;
  1778. struct vsock_sock *vsk;
  1779. ssize_t copied;
  1780. size_t target;
  1781. long timeout;
  1782. int err;
  1783. DEFINE_WAIT(wait);
  1784. vsk = vsock_sk(sk);
  1785. transport = vsk->transport;
  1786. /* We must not copy less than target bytes into the user's buffer
  1787. * before returning successfully, so we wait for the consume queue to
  1788. * have that much data to consume before dequeueing. Note that this
  1789. * makes it impossible to handle cases where target is greater than the
  1790. * queue size.
  1791. */
  1792. target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
  1793. if (target >= transport->stream_rcvhiwat(vsk)) {
  1794. err = -ENOMEM;
  1795. goto out;
  1796. }
  1797. timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  1798. copied = 0;
  1799. err = transport->notify_recv_init(vsk, target, &recv_data);
  1800. if (err < 0)
  1801. goto out;
  1802. while (1) {
  1803. ssize_t read;
  1804. err = vsock_connectible_wait_data(sk, &wait, timeout,
  1805. &recv_data, target);
  1806. if (err <= 0)
  1807. break;
  1808. err = transport->notify_recv_pre_dequeue(vsk, target,
  1809. &recv_data);
  1810. if (err < 0)
  1811. break;
  1812. read = transport->stream_dequeue(vsk, msg, len - copied, flags);
  1813. if (read < 0) {
  1814. err = read;
  1815. break;
  1816. }
  1817. copied += read;
  1818. err = transport->notify_recv_post_dequeue(vsk, target, read,
  1819. !(flags & MSG_PEEK), &recv_data);
  1820. if (err < 0)
  1821. goto out;
  1822. if (read >= target || flags & MSG_PEEK)
  1823. break;
  1824. target -= read;
  1825. }
  1826. if (sk->sk_err)
  1827. err = -sk->sk_err;
  1828. else if (sk->sk_shutdown & RCV_SHUTDOWN)
  1829. err = 0;
  1830. if (copied > 0)
  1831. err = copied;
  1832. out:
  1833. return err;
  1834. }
  1835. static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
  1836. size_t len, int flags)
  1837. {
  1838. const struct vsock_transport *transport;
  1839. struct vsock_sock *vsk;
  1840. ssize_t msg_len;
  1841. long timeout;
  1842. int err = 0;
  1843. DEFINE_WAIT(wait);
  1844. vsk = vsock_sk(sk);
  1845. transport = vsk->transport;
  1846. timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  1847. err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0);
  1848. if (err <= 0)
  1849. goto out;
  1850. msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
  1851. if (msg_len < 0) {
  1852. err = msg_len;
  1853. goto out;
  1854. }
  1855. if (sk->sk_err) {
  1856. err = -sk->sk_err;
  1857. } else if (sk->sk_shutdown & RCV_SHUTDOWN) {
  1858. err = 0;
  1859. } else {
  1860. /* User sets MSG_TRUNC, so return real length of
  1861. * packet.
  1862. */
  1863. if (flags & MSG_TRUNC)
  1864. err = msg_len;
  1865. else
  1866. err = len - msg_data_left(msg);
  1867. /* Always set MSG_TRUNC if real length of packet is
  1868. * bigger than user's buffer.
  1869. */
  1870. if (msg_len > len)
  1871. msg->msg_flags |= MSG_TRUNC;
  1872. }
  1873. out:
  1874. return err;
  1875. }
  1876. int
  1877. __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  1878. int flags)
  1879. {
  1880. struct sock *sk;
  1881. struct vsock_sock *vsk;
  1882. const struct vsock_transport *transport;
  1883. int err;
  1884. sk = sock->sk;
  1885. if (unlikely(flags & MSG_ERRQUEUE))
  1886. return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR);
  1887. vsk = vsock_sk(sk);
  1888. err = 0;
  1889. lock_sock(sk);
  1890. transport = vsk->transport;
  1891. if (!transport || sk->sk_state != TCP_ESTABLISHED) {
  1892. /* Recvmsg is supposed to return 0 if a peer performs an
  1893. * orderly shutdown. Differentiate between that case and when a
  1894. * peer has not connected or a local shutdown occurred with the
  1895. * SOCK_DONE flag.
  1896. */
  1897. if (sock_flag(sk, SOCK_DONE))
  1898. err = 0;
  1899. else
  1900. err = -ENOTCONN;
  1901. goto out;
  1902. }
  1903. if (flags & MSG_OOB) {
  1904. err = -EOPNOTSUPP;
  1905. goto out;
  1906. }
  1907. /* We don't check peer_shutdown flag here since peer may actually shut
  1908. * down, but there can be data in the queue that a local socket can
  1909. * receive.
  1910. */
  1911. if (sk->sk_shutdown & RCV_SHUTDOWN) {
  1912. err = 0;
  1913. goto out;
  1914. }
  1915. /* It is valid on Linux to pass in a zero-length receive buffer. This
  1916. * is not an error. We may as well bail out now.
  1917. */
  1918. if (!len) {
  1919. err = 0;
  1920. goto out;
  1921. }
  1922. if (sk->sk_type == SOCK_STREAM)
  1923. err = __vsock_stream_recvmsg(sk, msg, len, flags);
  1924. else
  1925. err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
  1926. out:
  1927. release_sock(sk);
  1928. return err;
  1929. }
  1930. int
  1931. vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  1932. int flags)
  1933. {
  1934. #ifdef CONFIG_BPF_SYSCALL
  1935. struct sock *sk = sock->sk;
  1936. const struct proto *prot;
  1937. prot = READ_ONCE(sk->sk_prot);
  1938. if (prot != &vsock_proto)
  1939. return prot->recvmsg(sk, msg, len, flags, NULL);
  1940. #endif
  1941. return __vsock_connectible_recvmsg(sock, msg, len, flags);
  1942. }
  1943. EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg);
  1944. static int vsock_set_rcvlowat(struct sock *sk, int val)
  1945. {
  1946. const struct vsock_transport *transport;
  1947. struct vsock_sock *vsk;
  1948. vsk = vsock_sk(sk);
  1949. if (val > vsk->buffer_size)
  1950. return -EINVAL;
  1951. transport = vsk->transport;
  1952. if (transport && transport->notify_set_rcvlowat) {
  1953. int err;
  1954. err = transport->notify_set_rcvlowat(vsk, val);
  1955. if (err)
  1956. return err;
  1957. }
  1958. WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
  1959. return 0;
  1960. }
  1961. static const struct proto_ops vsock_stream_ops = {
  1962. .family = PF_VSOCK,
  1963. .owner = THIS_MODULE,
  1964. .release = vsock_release,
  1965. .bind = vsock_bind,
  1966. .connect = vsock_connect,
  1967. .socketpair = sock_no_socketpair,
  1968. .accept = vsock_accept,
  1969. .getname = vsock_getname,
  1970. .poll = vsock_poll,
  1971. .ioctl = vsock_ioctl,
  1972. .listen = vsock_listen,
  1973. .shutdown = vsock_shutdown,
  1974. .setsockopt = vsock_connectible_setsockopt,
  1975. .getsockopt = vsock_connectible_getsockopt,
  1976. .sendmsg = vsock_connectible_sendmsg,
  1977. .recvmsg = vsock_connectible_recvmsg,
  1978. .mmap = sock_no_mmap,
  1979. .set_rcvlowat = vsock_set_rcvlowat,
  1980. .read_skb = vsock_read_skb,
  1981. };
  1982. static const struct proto_ops vsock_seqpacket_ops = {
  1983. .family = PF_VSOCK,
  1984. .owner = THIS_MODULE,
  1985. .release = vsock_release,
  1986. .bind = vsock_bind,
  1987. .connect = vsock_connect,
  1988. .socketpair = sock_no_socketpair,
  1989. .accept = vsock_accept,
  1990. .getname = vsock_getname,
  1991. .poll = vsock_poll,
  1992. .ioctl = vsock_ioctl,
  1993. .listen = vsock_listen,
  1994. .shutdown = vsock_shutdown,
  1995. .setsockopt = vsock_connectible_setsockopt,
  1996. .getsockopt = vsock_connectible_getsockopt,
  1997. .sendmsg = vsock_connectible_sendmsg,
  1998. .recvmsg = vsock_connectible_recvmsg,
  1999. .mmap = sock_no_mmap,
  2000. .read_skb = vsock_read_skb,
  2001. };
  2002. static int vsock_create(struct net *net, struct socket *sock,
  2003. int protocol, int kern)
  2004. {
  2005. struct vsock_sock *vsk;
  2006. struct sock *sk;
  2007. int ret;
  2008. if (!sock)
  2009. return -EINVAL;
  2010. if (protocol && protocol != PF_VSOCK)
  2011. return -EPROTONOSUPPORT;
  2012. switch (sock->type) {
  2013. case SOCK_DGRAM:
  2014. sock->ops = &vsock_dgram_ops;
  2015. break;
  2016. case SOCK_STREAM:
  2017. sock->ops = &vsock_stream_ops;
  2018. break;
  2019. case SOCK_SEQPACKET:
  2020. sock->ops = &vsock_seqpacket_ops;
  2021. break;
  2022. default:
  2023. return -ESOCKTNOSUPPORT;
  2024. }
  2025. sock->state = SS_UNCONNECTED;
  2026. sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern);
  2027. if (!sk)
  2028. return -ENOMEM;
  2029. vsk = vsock_sk(sk);
  2030. if (sock->type == SOCK_DGRAM) {
  2031. ret = vsock_assign_transport(vsk, NULL);
  2032. if (ret < 0) {
  2033. sock_put(sk);
  2034. return ret;
  2035. }
  2036. }
  2037. /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its
  2038. * proto_ops, so there is no handler for custom logic.
  2039. */
  2040. if (sock_type_connectible(sock->type))
  2041. set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
  2042. vsock_insert_unbound(vsk);
  2043. return 0;
  2044. }
  2045. static const struct net_proto_family vsock_family_ops = {
  2046. .family = AF_VSOCK,
  2047. .create = vsock_create,
  2048. .owner = THIS_MODULE,
  2049. };
  2050. static long vsock_dev_do_ioctl(struct file *filp,
  2051. unsigned int cmd, void __user *ptr)
  2052. {
  2053. u32 __user *p = ptr;
  2054. u32 cid = VMADDR_CID_ANY;
  2055. int retval = 0;
  2056. switch (cmd) {
  2057. case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
  2058. /* To be compatible with the VMCI behavior, we prioritize the
  2059. * guest CID instead of well-know host CID (VMADDR_CID_HOST).
  2060. */
  2061. if (transport_g2h)
  2062. cid = transport_g2h->get_local_cid();
  2063. else if (transport_h2g)
  2064. cid = transport_h2g->get_local_cid();
  2065. if (put_user(cid, p) != 0)
  2066. retval = -EFAULT;
  2067. break;
  2068. default:
  2069. retval = -ENOIOCTLCMD;
  2070. }
  2071. return retval;
  2072. }
  2073. static long vsock_dev_ioctl(struct file *filp,
  2074. unsigned int cmd, unsigned long arg)
  2075. {
  2076. return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
  2077. }
  2078. #ifdef CONFIG_COMPAT
  2079. static long vsock_dev_compat_ioctl(struct file *filp,
  2080. unsigned int cmd, unsigned long arg)
  2081. {
  2082. return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
  2083. }
  2084. #endif
  2085. static const struct file_operations vsock_device_ops = {
  2086. .owner = THIS_MODULE,
  2087. .unlocked_ioctl = vsock_dev_ioctl,
  2088. #ifdef CONFIG_COMPAT
  2089. .compat_ioctl = vsock_dev_compat_ioctl,
  2090. #endif
  2091. .open = nonseekable_open,
  2092. };
  2093. static struct miscdevice vsock_device = {
  2094. .name = "vsock",
  2095. .fops = &vsock_device_ops,
  2096. };
  2097. static int __init vsock_init(void)
  2098. {
  2099. int err = 0;
  2100. vsock_init_tables();
  2101. vsock_proto.owner = THIS_MODULE;
  2102. vsock_device.minor = MISC_DYNAMIC_MINOR;
  2103. err = misc_register(&vsock_device);
  2104. if (err) {
  2105. pr_err("Failed to register misc device\n");
  2106. goto err_reset_transport;
  2107. }
  2108. err = proto_register(&vsock_proto, 1); /* we want our slab */
  2109. if (err) {
  2110. pr_err("Cannot register vsock protocol\n");
  2111. goto err_deregister_misc;
  2112. }
  2113. err = sock_register(&vsock_family_ops);
  2114. if (err) {
  2115. pr_err("could not register af_vsock (%d) address family: %d\n",
  2116. AF_VSOCK, err);
  2117. goto err_unregister_proto;
  2118. }
  2119. vsock_bpf_build_proto();
  2120. return 0;
  2121. err_unregister_proto:
  2122. proto_unregister(&vsock_proto);
  2123. err_deregister_misc:
  2124. misc_deregister(&vsock_device);
  2125. err_reset_transport:
  2126. return err;
  2127. }
  2128. static void __exit vsock_exit(void)
  2129. {
  2130. misc_deregister(&vsock_device);
  2131. sock_unregister(AF_VSOCK);
  2132. proto_unregister(&vsock_proto);
  2133. }
  2134. const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
  2135. {
  2136. return vsk->transport;
  2137. }
  2138. EXPORT_SYMBOL_GPL(vsock_core_get_transport);
  2139. int vsock_core_register(const struct vsock_transport *t, int features)
  2140. {
  2141. const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
  2142. int err = mutex_lock_interruptible(&vsock_register_mutex);
  2143. if (err)
  2144. return err;
  2145. t_h2g = transport_h2g;
  2146. t_g2h = transport_g2h;
  2147. t_dgram = transport_dgram;
  2148. t_local = transport_local;
  2149. if (features & VSOCK_TRANSPORT_F_H2G) {
  2150. if (t_h2g) {
  2151. err = -EBUSY;
  2152. goto err_busy;
  2153. }
  2154. t_h2g = t;
  2155. }
  2156. if (features & VSOCK_TRANSPORT_F_G2H) {
  2157. if (t_g2h) {
  2158. err = -EBUSY;
  2159. goto err_busy;
  2160. }
  2161. t_g2h = t;
  2162. }
  2163. if (features & VSOCK_TRANSPORT_F_DGRAM) {
  2164. if (t_dgram) {
  2165. err = -EBUSY;
  2166. goto err_busy;
  2167. }
  2168. t_dgram = t;
  2169. }
  2170. if (features & VSOCK_TRANSPORT_F_LOCAL) {
  2171. if (t_local) {
  2172. err = -EBUSY;
  2173. goto err_busy;
  2174. }
  2175. t_local = t;
  2176. }
  2177. transport_h2g = t_h2g;
  2178. transport_g2h = t_g2h;
  2179. transport_dgram = t_dgram;
  2180. transport_local = t_local;
  2181. err_busy:
  2182. mutex_unlock(&vsock_register_mutex);
  2183. return err;
  2184. }
  2185. EXPORT_SYMBOL_GPL(vsock_core_register);
  2186. void vsock_core_unregister(const struct vsock_transport *t)
  2187. {
  2188. mutex_lock(&vsock_register_mutex);
  2189. if (transport_h2g == t)
  2190. transport_h2g = NULL;
  2191. if (transport_g2h == t)
  2192. transport_g2h = NULL;
  2193. if (transport_dgram == t)
  2194. transport_dgram = NULL;
  2195. if (transport_local == t)
  2196. transport_local = NULL;
  2197. mutex_unlock(&vsock_register_mutex);
  2198. }
  2199. EXPORT_SYMBOL_GPL(vsock_core_unregister);
  2200. module_init(vsock_init);
  2201. module_exit(vsock_exit);
  2202. MODULE_AUTHOR("VMware, Inc.");
  2203. MODULE_DESCRIPTION("VMware Virtual Socket Family");
  2204. MODULE_VERSION("1.0.2.0-k");
  2205. MODULE_LICENSE("GPL v2");